From 47ef1741f75c9c28d1e4f39b17d8b3b4e9a52500 Mon Sep 17 00:00:00 2001 From: mjsqu Date: Tue, 29 Aug 2023 11:46:11 +1200 Subject: [PATCH 1/2] Force DataFrame datatypes to 'object' --- target_snowflake/file_formats/parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target_snowflake/file_formats/parquet.py b/target_snowflake/file_formats/parquet.py index ad02e6a5..a0ef3c6c 100644 --- a/target_snowflake/file_formats/parquet.py +++ b/target_snowflake/file_formats/parquet.py @@ -66,7 +66,7 @@ def records_to_dataframe(records: Dict, flatten_record = flattening.flatten_record(record, schema, max_level=data_flattening_max_level) flattened_records.append(flatten_record) - return pandas.DataFrame(data=flattened_records) + return pandas.DataFrame(data=flattened_records, dtype='object',) def records_to_file(records: Dict, From 3c6b354c803978b843361d0d7f908a3f2ac45d32 Mon Sep 17 00:00:00 2001 From: "Mark.Johnston1" Date: Wed, 30 Aug 2023 12:27:18 +1200 Subject: [PATCH 2/2] Add test for large integer value --- target_snowflake/file_formats/parquet.py | 5 ++++- tests/unit/file_formats/test_parquet.py | 28 +++++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/target_snowflake/file_formats/parquet.py b/target_snowflake/file_formats/parquet.py index a0ef3c6c..7774a824 100644 --- a/target_snowflake/file_formats/parquet.py +++ b/target_snowflake/file_formats/parquet.py @@ -66,7 +66,10 @@ def records_to_dataframe(records: Dict, flatten_record = flattening.flatten_record(record, schema, max_level=data_flattening_max_level) flattened_records.append(flatten_record) - return pandas.DataFrame(data=flattened_records, dtype='object',) + return pandas.DataFrame( + data=flattened_records, + dtype='object', + ) def records_to_file(records: Dict, diff --git a/tests/unit/file_formats/test_parquet.py b/tests/unit/file_formats/test_parquet.py index c20aa44e..1a6a2cc6 100644 --- a/tests/unit/file_formats/test_parquet.py +++ b/tests/unit/file_formats/test_parquet.py @@ -48,7 +48,33 @@ def test_records_to_dataframe(self): 'key3': ['10000-01-22 12:04:22', '10000-01-22 12:04:22', '10000-01-22 12:04:22'], 'key4': ['12:01:01', '13:01:01', '14:01:01'], 'key5': ['I\'m good', 'I\'m good too', 'I want to be good'], - 'key6': [None, None, None]})) + 'key6': [None, None, None]}, + dtype='object', + ), + ) + + def test_large_integer(self): + """Specific test for dataframes checking that integer values are reproduced exactly.""" + + # Create a test record of a large integer and a null in the same key + large_integer = 9223372036854775807 + + test_records = { + '1': + { + 'key1':large_integer + }, + '2': + { + 'key1':None + }, + } + + # Ensure that the large integer is not equal to itself minus 1 + self.assertNotEqual( + large_integer-1 + ,parquet.records_to_dataframe(records=test_records, schema={})['key1'][0] + ) def test_create_copy_sql(self): self.assertEqual(parquet.create_copy_sql(table_name='foo_table',