From 34db6999c4269e06d3333e4bde07938ff04fb9f4 Mon Sep 17 00:00:00 2001 From: Stephen Bailey Date: Fri, 12 Nov 2021 14:59:16 -0500 Subject: [PATCH 1/3] Handle `anyOf` types other than string, array, object --- target_snowflake/flattening.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target_snowflake/flattening.py b/target_snowflake/flattening.py index 40ab48ec..035353ce 100644 --- a/target_snowflake/flattening.py +++ b/target_snowflake/flattening.py @@ -48,11 +48,13 @@ def flatten_schema(d, parent_key=None, sep='__', level=0, max_level=0): for k, v in d['properties'].items(): new_key = flatten_key(k, parent_key, sep) if 'type' in v.keys(): + # This is probably skipping over values if 'object' in v['type'] and 'properties' in v and level < max_level: items.extend(flatten_schema(v, parent_key + [k], sep=sep, level=level + 1, max_level=max_level).items()) else: items.append((new_key, v)) else: + # handle situations where, e.g., an `anyOf` value is passed if len(v.values()) > 0: if list(v.values())[0][0]['type'] == 'string': list(v.values())[0][0]['type'] = ['null', 'string'] @@ -63,6 +65,9 @@ def flatten_schema(d, parent_key=None, sep='__', level=0, max_level=0): elif list(v.values())[0][0]['type'] == 'object': list(v.values())[0][0]['type'] = ['null', 'object'] items.append((new_key, list(v.values())[0][0])) + else: + list(v.values())[0][0]['type'] = ['null', 'string'] + items.append((new_key, list(v.values())[0][0])) key_func = lambda item: item[0] sorted_items = sorted(items, key=key_func) From 238a14ba2402a68e1b0db161d0ec40482bd4ac1b Mon Sep 17 00:00:00 2001 From: Stephen Bailey Date: Fri, 12 Nov 2021 15:06:18 -0500 Subject: [PATCH 2/3] Remove extra comment --- target_snowflake/flattening.py | 1 - 1 file changed, 1 deletion(-) diff --git a/target_snowflake/flattening.py b/target_snowflake/flattening.py index 035353ce..d86964e1 100644 --- a/target_snowflake/flattening.py +++ b/target_snowflake/flattening.py @@ -48,7 +48,6 @@ def flatten_schema(d, parent_key=None, sep='__', level=0, max_level=0): for k, v in d['properties'].items(): new_key = flatten_key(k, parent_key, sep) if 'type' in v.keys(): - # This is probably skipping over values if 'object' in v['type'] and 'properties' in v and level < max_level: items.extend(flatten_schema(v, parent_key + [k], sep=sep, level=level + 1, max_level=max_level).items()) else: From fa98535db347fa1fa3377e6ce1b32918600bd52e Mon Sep 17 00:00:00 2001 From: Stephen Bailey Date: Fri, 12 Nov 2021 15:20:57 -0500 Subject: [PATCH 3/3] Add unit test for `anyOf` handling --- tests/unit/test_flattening.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/unit/test_flattening.py b/tests/unit/test_flattening.py index 99f47d4c..5b04204b 100644 --- a/tests/unit/test_flattening.py +++ b/tests/unit/test_flattening.py @@ -61,6 +61,24 @@ def test_flatten_schema(self): } } + not_nested_schema_with_anyof_property_type = { + "type": "object", + "properties": { + "object_col": {"anyOf": [{"type": "object"}, {"type": ["null", "string"]}]}, + "array_col": {"anyOf": [{"type": "array"}, {"type": ["null", "string"]}]}, + "bool_col": {"anyOf": [{"type": ["boolean", "null"]}, {"type": ["null", "string"]}]} + } + } + flattened_schema_with_anyof_property_type = { + "object_col": {"type": ["null", "object"]}, + "array_col": {"type": ["null", "array"]}, + "bool_col": {"type": ["null", "string"]} + } + + # NO FLATTENING - Schema with anyOf properties should be cast to a single data type + self.assertEqual(flatten_schema(not_nested_schema_with_anyof_property_type), + flattened_schema_with_anyof_property_type) + # NO FLATTENING - Schema with object type property but without further properties should be a plain dictionary # No flattening (default) self.assertEqual(flatten_schema(nested_schema_with_properties), nested_schema_with_properties['properties'])