From 6db9761a46d66ba3c98f62d65108d3dde43d2bfc Mon Sep 17 00:00:00 2001
From: Neil McCallum <neil.mccallum@telefab.co.nz>
Date: Mon, 12 Feb 2024 10:25:15 +1300
Subject: [PATCH 1/2] tests and proposed fix for tab char data corruption

---
 target_snowflake/file_formats/csv.py    | 2 +-
 tests/unit/file_formats/test_csv.py     | 6 +++++-
 tests/unit/file_formats/test_parquet.py | 6 +++++-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/target_snowflake/file_formats/csv.py b/target_snowflake/file_formats/csv.py
index 42d76ee4..a0748808 100644
--- a/target_snowflake/file_formats/csv.py
+++ b/target_snowflake/file_formats/csv.py
@@ -63,7 +63,7 @@ def record_to_csv_line(record: dict,
 
     return ','.join(
         [
-            json.dumps(flatten_record[column], ensure_ascii=False) if column in flatten_record and (
+            '"{}"'.format(flatten_record[column].replace('"','""')) if column in flatten_record and (
                     flatten_record[column] == 0 or flatten_record[column]) else ''
             for column in schema
         ]
diff --git a/tests/unit/file_formats/test_csv.py b/tests/unit/file_formats/test_csv.py
index 900ba9f1..ed45b8db 100644
--- a/tests/unit/file_formats/test_csv.py
+++ b/tests/unit/file_formats/test_csv.py
@@ -62,6 +62,7 @@ def test_record_to_csv_line(self):
             'key4': '25:01:01',
             'key5': 'I\'m good',
             'key6': None,
+            'key7': 'tab\tin\tvalue\n\r\0"ὠ',
         }
 
         schema = {
@@ -89,10 +90,13 @@ def test_record_to_csv_line(self):
             'key6': {
                 'type': ['null', 'string'], 'format': 'time',
             },
+            'key7':{
+                'type':['null', 'string'],
+            },
         }
 
         self.assertEqual(csv.record_to_csv_line(record, schema),
-                         '"1","2030-01-22","10000-01-22 12:04:22","25:01:01","I\'m good",')
+                         '"1","2030-01-22","10000-01-22 12:04:22","25:01:01","I\'m good",,"tab\tin\tvalue\n\r\0""ὠ"')
 
     def test_create_copy_sql(self):
         self.assertEqual(csv.create_copy_sql(table_name='foo_table',
diff --git a/tests/unit/file_formats/test_parquet.py b/tests/unit/file_formats/test_parquet.py
index c20aa44e..0476b20c 100644
--- a/tests/unit/file_formats/test_parquet.py
+++ b/tests/unit/file_formats/test_parquet.py
@@ -21,6 +21,7 @@ def test_records_to_dataframe(self):
                 'key4': '12:01:01',
                 'key5': 'I\'m good',
                 'key6': None,
+                'key7': 'A tab is a char too',
             },
             '2': {
                 'key1': 2,
@@ -29,6 +30,7 @@ def test_records_to_dataframe(self):
                 'key4': '13:01:01',
                 'key5': 'I\'m good too',
                 'key6': None,
+                'key7': 'A\tis a char too',
             },
             '3': {
                 'key1': 3,
@@ -37,6 +39,7 @@ def test_records_to_dataframe(self):
                 'key4': '14:01:01',
                 'key5': 'I want to be good',
                 'key6': None,
+                'key7': 'A\t\tis a char too',
             }
         }
 
@@ -48,7 +51,8 @@ def test_records_to_dataframe(self):
                                'key3': ['10000-01-22 12:04:22', '10000-01-22 12:04:22', '10000-01-22 12:04:22'],
                                'key4': ['12:01:01', '13:01:01', '14:01:01'],
                                'key5': ['I\'m good', 'I\'m good too', 'I want to be good'],
-                               'key6': [None, None, None]}))
+                               'key6': [None, None, None],
+                               'key7': ['A tab is a char too', 'A\tis a char too', 'A\t\tis a char too'] }))
 
     def test_create_copy_sql(self):
         self.assertEqual(parquet.create_copy_sql(table_name='foo_table',

From d0577570c2c84ca693bba3efedd60df91d42dc2c Mon Sep 17 00:00:00 2001
From: Neil McCallum <neilm@telefab.co.nz>
Date: Wed, 14 Feb 2024 15:20:00 +1300
Subject: [PATCH 2/2] explicit str cast for bool etc

---
 target_snowflake/file_formats/csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target_snowflake/file_formats/csv.py b/target_snowflake/file_formats/csv.py
index a0748808..02a67ecf 100644
--- a/target_snowflake/file_formats/csv.py
+++ b/target_snowflake/file_formats/csv.py
@@ -63,7 +63,7 @@ def record_to_csv_line(record: dict,
 
     return ','.join(
         [
-            '"{}"'.format(flatten_record[column].replace('"','""')) if column in flatten_record and (
+            '"{}"'.format(str(flatten_record[column]).replace('"','""')) if column in flatten_record and (
                     flatten_record[column] == 0 or flatten_record[column]) else ''
             for column in schema
         ]