diff --git a/target_snowflake/file_formats/parquet.py b/target_snowflake/file_formats/parquet.py index ad02e6a5..7774a824 100644 --- a/target_snowflake/file_formats/parquet.py +++ b/target_snowflake/file_formats/parquet.py @@ -66,7 +66,10 @@ def records_to_dataframe(records: Dict, flatten_record = flattening.flatten_record(record, schema, max_level=data_flattening_max_level) flattened_records.append(flatten_record) - return pandas.DataFrame(data=flattened_records) + return pandas.DataFrame( + data=flattened_records, + dtype='object', + ) def records_to_file(records: Dict, diff --git a/tests/unit/file_formats/test_parquet.py b/tests/unit/file_formats/test_parquet.py index c20aa44e..1a6a2cc6 100644 --- a/tests/unit/file_formats/test_parquet.py +++ b/tests/unit/file_formats/test_parquet.py @@ -48,7 +48,33 @@ def test_records_to_dataframe(self): 'key3': ['10000-01-22 12:04:22', '10000-01-22 12:04:22', '10000-01-22 12:04:22'], 'key4': ['12:01:01', '13:01:01', '14:01:01'], 'key5': ['I\'m good', 'I\'m good too', 'I want to be good'], - 'key6': [None, None, None]})) + 'key6': [None, None, None]}, + dtype='object', + ), + ) + + def test_large_integer(self): + """Specific test for dataframes checking that integer values are reproduced exactly.""" + + # Create a test record of a large integer and a null in the same key + large_integer = 9223372036854775807 + + test_records = { + '1': + { + 'key1':large_integer + }, + '2': + { + 'key1':None + }, + } + + # Ensure that the large integer is not equal to itself minus 1 + self.assertNotEqual( + large_integer-1 + ,parquet.records_to_dataframe(records=test_records, schema={})['key1'][0] + ) def test_create_copy_sql(self): self.assertEqual(parquet.create_copy_sql(table_name='foo_table',