diff --git a/docs/stream_maps.md b/docs/stream_maps.md index 7c65b88c8..9df083e9b 100644 --- a/docs/stream_maps.md +++ b/docs/stream_maps.md @@ -230,11 +230,12 @@ can be referenced directly by mapping expressions. The following functions and namespaces are available for use in mapping expressions: -| Function | Description | -| :------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [`md5()`](inv:python:py:module:#hashlib) | Returns an inline MD5 hash of any string, outputting the string representation of the hash's hex digest. This is defined by the SDK internally with native python: [`hashlib.md5(.encode("utf-8")).hexdigest()`](inv:python:py:method:#hashlib.hash.hexdigest). | -| [`datetime`](inv:python:py:module:#datetime) | This is the datetime module object from the Python standard library. You can access [`datetime.datetime`](inv:python:py:class:#datetime.datetime), [`datetime.timedelta`](inv:python:py:class:#datetime.timedelta), etc. | -| [`json`](inv:python:py:module:#json) | This is the json module object from the Python standard library. Primarily used for calling [`json.dumps()`](inv:python:py:function:#json.dumps) and [`json.loads()`](inv:python:py:function:#json.loads). | +| Function | Description | +| :--------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [`md5()`](inv:python:py:function:#hashlib.md5) | Returns an inline MD5 hash of any string, outputting the string representation of the hash's hex digest. This is defined by the SDK internally with native python: [`hashlib.md5(.encode("utf-8")).hexdigest()`](inv:python:py:method:#hashlib.hash.hexdigest). | +| [`sha256()`](inv:python:py:function:#hashlib.sha256) | Returns an inline SHA256 hash of any string, outputting the string representation of the hash's hex digest. This is defined by the SDK internally with native python: [`hashlib.sha256(.encode("utf-8")).hexdigest()`](inv:python:py:method:#hashlib.hash.hexdigest). | +| [`datetime`](inv:python:py:module:#datetime) | This is the datetime module object from the Python standard library. You can access [`datetime.datetime`](inv:python:py:class:#datetime.datetime), [`datetime.timedelta`](inv:python:py:class:#datetime.timedelta), etc. | +| [`json`](inv:python:py:module:#json) | This is the json module object from the Python standard library. Primarily used for calling [`json.dumps()`](inv:python:py:function:#json.dumps) and [`json.loads()`](inv:python:py:function:#json.loads). | #### Built-in Variable Names diff --git a/singer_sdk/mapper.py b/singer_sdk/mapper.py index 004d0f60a..d528831a1 100644 --- a/singer_sdk/mapper.py +++ b/singer_sdk/mapper.py @@ -63,6 +63,18 @@ def md5(string: str) -> str: return hashlib.md5(string.encode("utf-8")).hexdigest() # noqa: S324 +def sha256(string: str) -> str: + """Digest a string using SHA256. This is a function for inline calculations. + + Args: + string: String to digest. + + Returns: + A string digested into SHA256. + """ + return hashlib.sha256(string.encode("utf-8")).hexdigest() + + StreamMapsDict: TypeAlias = dict[str, t.Union[str, dict, None]] @@ -307,6 +319,7 @@ def functions(self) -> dict[str, t.Callable]: """ funcs: dict[str, t.Any] = simpleeval.DEFAULT_FUNCTIONS.copy() funcs["md5"] = md5 + funcs["sha256"] = sha256 funcs["datetime"] = datetime funcs["bool"] = bool funcs["json"] = json diff --git a/tests/core/test_mapper.py b/tests/core/test_mapper.py index 0ade78823..192fc2718 100644 --- a/tests/core/test_mapper.py +++ b/tests/core/test_mapper.py @@ -704,6 +704,7 @@ def discover_streams(self): { "mystream": { "email_hash": "md5(email)", + "email_hash_sha256": "sha256(email)", "fixed_count": "int(count-1)", "__else__": None, }, diff --git a/tests/snapshots/mapped_stream/only_mapped_fields.jsonl b/tests/snapshots/mapped_stream/only_mapped_fields.jsonl index ef70c9aac..c02e689c7 100644 --- a/tests/snapshots/mapped_stream/only_mapped_fields.jsonl +++ b/tests/snapshots/mapped_stream/only_mapped_fields.jsonl @@ -1,6 +1,6 @@ {"type":"STATE","value":{}} -{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"email_hash":{"type":["string","null"]},"fixed_count":{"type":["integer","null"]}},"$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]} -{"type":"RECORD","stream":"mystream","record":{"email_hash":"c160f8cc69a4f0bf2b0362752353d060","fixed_count":20},"time_extracted":"2022-01-01T00:00:00+00:00"} -{"type":"RECORD","stream":"mystream","record":{"email_hash":"4b9bb80620f03eb3719e0a061c14283d","fixed_count":12},"time_extracted":"2022-01-01T00:00:00+00:00"} -{"type":"RECORD","stream":"mystream","record":{"email_hash":"426b189df1e2f359efe6ee90f2d2030f","fixed_count":18},"time_extracted":"2022-01-01T00:00:00+00:00"} +{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"email_hash":{"type":["string","null"]},"email_hash_sha256":{"type":["string","null"]},"fixed_count":{"type":["integer","null"]}},"$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]} +{"type":"RECORD","stream":"mystream","record":{"email_hash":"c160f8cc69a4f0bf2b0362752353d060","email_hash_sha256":"ff8d9819fc0e12bf0d24892e45987e249a28dce836a85cad60e28eaaa8c6d976","fixed_count":20},"time_extracted":"2022-01-01T00:00:00+00:00"} +{"type":"RECORD","stream":"mystream","record":{"email_hash":"4b9bb80620f03eb3719e0a061c14283d","email_hash_sha256":"5ff860bf1190596c7188ab851db691f0f3169c453936e9e1eba2f9a47f7a0018","fixed_count":12},"time_extracted":"2022-01-01T00:00:00+00:00"} +{"type":"RECORD","stream":"mystream","record":{"email_hash":"426b189df1e2f359efe6ee90f2d2030f","email_hash_sha256":"add7232b65bb559f896cbcfa9a600170a7ca381a0366789dcf59ad986bdf4a98","fixed_count":18},"time_extracted":"2022-01-01T00:00:00+00:00"} {"type":"STATE","value":{"bookmarks":{"mystream":{}}}}