From 708c828f662cb4888237a12d7fd1722e3b3bf403 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Mon, 16 Sep 2024 09:43:53 -0600 Subject: [PATCH] refactor: Deprecated `Faker` class in stream maps (#2670) * refactor: Deprecated `Faker` class in stream maps * Update docs * Update singer_sdk/mapper.py --- docs/stream_maps.md | 12 +++--- singer_sdk/mapper.py | 8 ++++ tests/core/test_mapper.py | 37 +++++++++++++++++++ .../mapped_stream/fake_email_seed_class.jsonl | 6 +++ .../fake_email_seed_instance.jsonl | 6 +++ 5 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 tests/snapshots/mapped_stream/fake_email_seed_class.jsonl create mode 100644 tests/snapshots/mapped_stream/fake_email_seed_instance.jsonl diff --git a/docs/stream_maps.md b/docs/stream_maps.md index 94d9ac72a..e31a9b634 100644 --- a/docs/stream_maps.md +++ b/docs/stream_maps.md @@ -249,11 +249,9 @@ can be referenced directly by mapping expressions. - `fake` - a [`Faker`](inv:faker:std:doc#index) instance, configurable via `faker_config` (see previous example) - see the built-in [standard providers](inv:faker:std:doc#providers) for available methods -- `Faker` - the [`Faker`](inv:faker:std:doc#fakerclass) class. This was made available to enable consistent data - masking by allowing users to call `Faker.seed()`. ```{tip} - The `fake` object and `Faker` are only available if the plugin specifies `faker` as an additional dependency (through the `singer-sdk` `faker` extra, or directly). + The `fake` object is only available if the plugin specifies `faker` as an additional dependency (through the `singer-sdk` `faker` extra, or directly). ``` :::{versionadded} 0.35.0 @@ -264,6 +262,10 @@ The `faker` object. The `Faker` class. ::: +:::{versionchanged} TODO +The `Faker` class was deprecated in favor of instance methods on the `fake` object. +::: + #### Automatic Schema Detection For performance reasons, type detection is performed at runtime using text analysis @@ -475,9 +477,9 @@ To generate consistent masked values, you must provide the **same seed each time stream_maps: customers: # will always generate the same value for the same seed - first_name: Faker.seed(_['first_name']) or fake.first_name() + first_name: fake.seed_instance(_['first_name']) or fake.first_name() faker_config: - # IMPORTANT: `fake` and `Faker` names are only available if faker_config is defined. + # IMPORTANT: `fake` is only available if the `faker` extra is installed locale: en_US ``` diff --git a/singer_sdk/mapper.py b/singer_sdk/mapper.py index c222acc67..b613b78ec 100644 --- a/singer_sdk/mapper.py +++ b/singer_sdk/mapper.py @@ -14,7 +14,9 @@ import importlib.util import json import logging +import sys import typing as t +import warnings import simpleeval # type: ignore[import-untyped] @@ -515,6 +517,12 @@ def _init_functions_and_schema( # noqa: PLR0912, PLR0915, C901 self._eval_type(prop_def, default=default_type), ).to_dict(), ) + if "Faker" in prop_def: + warnings.warn( + "Class 'Faker' is deprecated in stream maps. Use instance methods, like 'fake.seed_instance.'", # noqa: E501 + DeprecationWarning, + stacklevel=2, + ) try: parsed_def: ast.Expr = ast.parse(prop_def).body[0] # type: ignore[assignment] stream_map_parsed.append((prop_key, prop_def, parsed_def)) diff --git a/tests/core/test_mapper.py b/tests/core/test_mapper.py index abec1c7ff..a776689a1 100644 --- a/tests/core/test_mapper.py +++ b/tests/core/test_mapper.py @@ -853,6 +853,43 @@ def discover_streams(self): "fake_credit_card_number.jsonl", id="fake_credit_card_number", ), + pytest.param( + { + "mystream": { + "email": "Faker.seed(email) or fake.email()", + "__else__": None, + }, + }, + { + "flattening_enabled": False, + "flattening_max_depth": 0, + "faker_config": { + "locale": "en_US", + }, + }, + "fake_email_seed_class.jsonl", + id="fake_email_seed_class", + marks=pytest.mark.filterwarnings( + "default:Class 'Faker' is deprecated:DeprecationWarning" + ), + ), + pytest.param( + { + "mystream": { + "email": "fake.seed_instance(email) or fake.email()", + "__else__": None, + }, + }, + { + "flattening_enabled": False, + "flattening_max_depth": 0, + "faker_config": { + "locale": "en_US", + }, + }, + "fake_email_seed_instance.jsonl", + id="fake_email_seed_instance", + ), ], ) def test_mapped_stream( diff --git a/tests/snapshots/mapped_stream/fake_email_seed_class.jsonl b/tests/snapshots/mapped_stream/fake_email_seed_class.jsonl new file mode 100644 index 000000000..e27c2e53f --- /dev/null +++ b/tests/snapshots/mapped_stream/fake_email_seed_class.jsonl @@ -0,0 +1,6 @@ +{"type":"STATE","value":{}} +{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"email":{"type":["string","null"]}}},"key_properties":[]} +{"type":"RECORD","stream":"mystream","record":{"email":"zwells@example.org"},"time_extracted":"2022-01-01T00:00:00+00:00"} +{"type":"RECORD","stream":"mystream","record":{"email":"josephcunningham@example.com"},"time_extracted":"2022-01-01T00:00:00+00:00"} +{"type":"RECORD","stream":"mystream","record":{"email":"lydia62@example.net"},"time_extracted":"2022-01-01T00:00:00+00:00"} +{"type":"STATE","value":{"bookmarks":{"mystream":{}}}} diff --git a/tests/snapshots/mapped_stream/fake_email_seed_instance.jsonl b/tests/snapshots/mapped_stream/fake_email_seed_instance.jsonl new file mode 100644 index 000000000..e27c2e53f --- /dev/null +++ b/tests/snapshots/mapped_stream/fake_email_seed_instance.jsonl @@ -0,0 +1,6 @@ +{"type":"STATE","value":{}} +{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"email":{"type":["string","null"]}}},"key_properties":[]} +{"type":"RECORD","stream":"mystream","record":{"email":"zwells@example.org"},"time_extracted":"2022-01-01T00:00:00+00:00"} +{"type":"RECORD","stream":"mystream","record":{"email":"josephcunningham@example.com"},"time_extracted":"2022-01-01T00:00:00+00:00"} +{"type":"RECORD","stream":"mystream","record":{"email":"lydia62@example.net"},"time_extracted":"2022-01-01T00:00:00+00:00"} +{"type":"STATE","value":{"bookmarks":{"mystream":{}}}}