diff --git a/benchmark/local_infinity/CMakeLists.txt b/benchmark/local_infinity/CMakeLists.txt
index 5f1aa0c22b..48e0389fa7 100644
--- a/benchmark/local_infinity/CMakeLists.txt
+++ b/benchmark/local_infinity/CMakeLists.txt
@@ -26,11 +26,13 @@ target_link_libraries(
     event.a
     c++.a
     c++abi.a
+    snappy.a
     ${JEMALLOC_STATIC_LIB}
 )
 
 target_link_directories(infinity_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(infinity_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(infinity_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 # ########################################
 # knn
@@ -62,11 +64,13 @@ target_link_libraries(
     arrow.a
     thrift.a
     thriftnb.a
+    snappy.a
     ${JEMALLOC_STATIC_LIB}
 )
 
-target_link_directories(knn_import_benchmark BEFORE PUBLIC "${CMAKE_BINARY_DIR}/lib")
+target_link_directories(knn_import_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(knn_import_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(knn_import_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 # query benchmark
 add_executable(knn_query_benchmark
@@ -94,11 +98,13 @@ target_link_libraries(
     arrow.a
     thrift.a
     thriftnb.a
+    snappy.a
     ${JEMALLOC_STATIC_LIB}
 )
 
-target_link_directories(knn_query_benchmark BEFORE PUBLIC "${CMAKE_BINARY_DIR}/lib")
+target_link_directories(knn_query_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(knn_query_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(knn_query_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 # ########################################
 # fulltext
@@ -128,11 +134,13 @@ target_link_libraries(
     arrow.a
     thrift.a
     thriftnb.a
+    snappy.a
     ${JEMALLOC_STATIC_LIB}
 )
 
-target_link_directories(fulltext_benchmark BEFORE PUBLIC "${CMAKE_BINARY_DIR}/lib")
+target_link_directories(fulltext_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(fulltext_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(fulltext_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 # ########################################
 add_executable(sparse_benchmark
@@ -160,11 +168,13 @@ target_link_libraries(
     arrow.a
     thrift.a
     thriftnb.a
+    snappy.a
     ${JEMALLOC_STATIC_LIB}
 )
 
-target_link_directories(sparse_benchmark BEFORE PUBLIC "${CMAKE_BINARY_DIR}/lib")
+target_link_directories(sparse_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(sparse_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(sparse_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 add_executable(bmp_benchmark
     ./sparse/bmp_benchmark.cpp
@@ -191,11 +201,13 @@ target_link_libraries(
     arrow.a
     thrift.a
     thriftnb.a
+    snappy.a
     ${JEMALLOC_STATIC_LIB}
 )
 
-target_link_directories(bmp_benchmark BEFORE PUBLIC "${CMAKE_BINARY_DIR}/lib")
+target_link_directories(bmp_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(bmp_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(bmp_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 add_executable(hnsw_benchmark
     ./knn/hnsw_benchmark.cpp
@@ -216,18 +228,19 @@ target_link_libraries(
     dl
     lz4.a
     atomic.a
-
     c++.a
     c++abi.a
     parquet.a
     arrow.a
     thrift.a
     thriftnb.a
+    snappy.a
     ${JEMALLOC_STATIC_LIB}
 )
 
-target_link_directories(hnsw_benchmark BEFORE PUBLIC "${CMAKE_BINARY_DIR}/lib")
+target_link_directories(hnsw_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(hnsw_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(hnsw_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 # add_definitions(-march=native)
 # add_definitions(-msse4.2 -mfma)
diff --git a/benchmark/remote_infinity/CMakeLists.txt b/benchmark/remote_infinity/CMakeLists.txt
index 022f2a7bd7..d5d2e1c1c1 100644
--- a/benchmark/remote_infinity/CMakeLists.txt
+++ b/benchmark/remote_infinity/CMakeLists.txt
@@ -12,6 +12,7 @@ target_include_directories(remote_query_benchmark PUBLIC "${CMAKE_SOURCE_DIR}/th
 target_include_directories(remote_query_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/thrift/")
 target_link_directories(remote_query_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(remote_query_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(remote_query_benchmark PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 target_link_libraries(
         remote_query_benchmark
@@ -32,6 +33,7 @@ target_link_libraries(
         c++abi.a
         parquet.a
         arrow.a
+        snappy.a
         ${JEMALLOC_STATIC_LIB}
 )
 
diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md
index 0e5143abb2..8142000b14 100644
--- a/docs/references/pysdk_api_reference.md
+++ b/docs/references/pysdk_api_reference.md
@@ -169,7 +169,7 @@ metadata.table_count  #0
 
 ## create_table
 
-**RemoteDatabase.create_table(*table_name, columns_definition, conflict_type = ConflictType.Error*)**
+**Database.create_table(*table_name, columns_definition, conflict_type = ConflictType.Error*)**
 
 Create a table with a given name, defining each column in it.
 
@@ -242,7 +242,7 @@ db_obj.create_table("test_create_embedding_table",
 
 ## drop_table
 
-**RemoteDatabase.drop_table(*table_name, conflict_type = ConflictType.Error*)**
+**Database.drop_table(*table_name, conflict_type = ConflictType.Error*)**
 
 Drop a table by name.
 
@@ -266,7 +266,7 @@ db_obj.drop_table("my_table", ConflictType.Error)
 
 ## get_table
 
-**RemoteDatabase.get_table(*table_name*)**
+**Database.get_table(*table_name*)**
 
 Retrieve a table object by name.
 
@@ -290,7 +290,7 @@ except Exception as e:
 
 ## list_tables
 
-**RemoteDatabase.list_tables()**
+**Database.list_tables()**
 
 List all tables in the current database.
 
@@ -308,7 +308,7 @@ res.table_names #["my_table"]
 
 ## show_table
 
-**RemoteDatabase.show_tables()**
+**Database.show_tables()**
 
 Get the information of all tables in the database.
 
@@ -346,7 +346,7 @@ res
 
 ## create_index
 
-**RemoteTable.create_index(*index_name, index_infos, conflict_type = ConflictType.Error*)**
+**Table.create_index(*index_name, index_infos, conflict_type = ConflictType.Error*)**
 
 Create an index by `IndexInfo` list.
 
@@ -443,7 +443,7 @@ table_obj.create_index("my_index",
 
 ## drop_index
 
-**RemoteTable.drop_index(*index_name, conflict_type = ConflictType.Error*)**
+**Table.drop_index(*index_name, conflict_type = ConflictType.Error*)**
 
 Drop an index by name.
 
@@ -467,7 +467,7 @@ table_obj.drop_index("my_index")
 
 ## show_index
 
-**RemoteTable.show_index(*index_name*)**
+**Table.show_index(*index_name*)**
 
 Retrieve the metadata of an index by name.
 
@@ -506,7 +506,7 @@ print(res)
 
 ## list_indexes
 
-**RemoteTable.list_indexes(*index_name*)**
+**Table.list_indexes(*index_name*)**
 
 List the indexes built on the table.
 
@@ -524,7 +524,7 @@ res.index_names #['my_index']
 
 ## insert
 
-**RemoteTable.insert(*data*)**
+**Table.insert(*data*)**
 
 Insert records into the current table. 
 
@@ -550,9 +550,9 @@ table_obj.insert([{"c1": [1.1, 2.2, 3.3]}, {"c1": [4.4, 5.5, 6.6]}, {"c1": [7.7,
 
 ## import_data
 
-**RemoteTable.import_data(*filpath, import_options = None*)**
+**Table.import_data(*filepath, import_options = None*)**
 
-Import data from a file into the table. 
+Imports data from a file into the table. 
 
 ### Parameters
 
@@ -579,9 +579,66 @@ Import data from a file into the table.
 table_obj.import_data(test_csv_dir, None)
 ```
 
+## export_data
+
+```python
+Table.export_data(filepath, export_options = None, columns = None)
+```
+
+Exports the current table to a specified file. 
+
+### Parameters
+
+#### file_path: `str` *Required*
+
+Absolute path to the file for export. Supported file types include: 
+
+- `csv`
+- `jsonl`
+  
+#### export_options: `json`
+
+- **header**: `bool` *Optional*
+  Whether to display table header or not. Works with **.csv** files only:
+  - `True`: Display table header. 
+  - `False`: (Default) Do not display table header. 
+
+- **delimiter**: `str` *Optional* Defaults to ","
+  Delimiter to separate columns. Works with **.csv** files only.
+
+- **file_type**: `str` *Required*
+  The type of the exported file. Supported file types include:
+  - `csv`
+  - `jsonl`
+  
+- **offset**: `int` *Optional*
+  Index specifying the starting row for export. Usually used in conjunction with `limit`. If not specified, the file export starts from the first row. 
+
+- **limit**: `int` *Optional*
+  The maximum number of rows to export. Usually used in conjunction with `offset`. If the table's row count exceeds `offset` + `limit`, the excess rows are excluded from the export.
+
+- **row_limit**: `int` *Optional*
+  Used when you have a large table and need to break the output file into multiple parts. This argument sets the row limit for each part. If you specify **test_export_file.csv** as the file name, the exported files will be named **test_export_file.csv**, **test_export_file.csv.part1**, **test_export_file.csv.part2**, and so one. 
+
+#### columns: `[str]` *Optional*
+
+Columns to export to the output file, for example, `["num", "name", "score"]`. If not specified, the entire table is exported. 
+
+### Returns
+
+- Success: `True`
+- Failure: `Exception`
+
+### Examples
+
+```python
+    table_instance.export_data(os.getcwd() + "/export_data.jsonl",
+                               {"header": False, "file_type": "jsonl", "delimiter": ",", "row_limit": 2}, ["num", "name", "score"])
+```
+
 ## delete
 
-**RemoteTable.delete(*cond = None*)**
+**Table.delete(*cond = None*)**
 
 Delete rows by condition.The condition is similar to the WHERE conditions in SQL. If  `cond` is not specified, all the data will be removed in the table object.
 
@@ -603,7 +660,7 @@ table_obj.delete()
 
 ## update
 
-**RemoteTable.update(*cond = None*)**
+**Table.update(*cond = None*)**
 
 Search for rows that match the specified condition and update them accordingly.
 
@@ -628,7 +685,7 @@ table_obj.update("c1 > 2", [{"c2": 100, "c3": 1000}])
 
 ## output
 
-**RemoteTable.output(*columns*)**
+**Table.output(*columns*)**
 Specify the columns to display in the search output, or perform aggregation operations or arithmetic calculations. 
 
 ```python
@@ -651,12 +708,12 @@ table_obj.output(["c1+5"])
   
 ### Returns
 
-- Success: self `RemoteTable`
+- Success: self `Table`
 - Failure: `Exception`
 
 ## filter
 
-**RemoteTable.filter(*cond*)**
+**Table.filter(*cond*)**
 
 Create a filtering condition expression.
 
@@ -667,7 +724,7 @@ Create a filtering condition expression.
 
 ### Returns
 
-- Success: self `RemoteTable`
+- Success: self `Table`
 - Failure: `Exception`
 
 ### Examples
@@ -678,7 +735,7 @@ table_obj.filter("(-7 < c1 or 9 >= c1) and (c2 = 3)")
 
 ## knn
 
-**RemoteTable.knn(*vector_column_name, embedding_data, embedding_data_type, distance_type, topn, knn_params = None*)**
+**Table.knn(*vector_column_name, embedding_data, embedding_data_type, distance_type, topn, knn_params = None*)**
 
 Build a KNN search expression. Find the top n closet records to the given vector.
 
@@ -698,7 +755,7 @@ Build a KNN search expression. Find the top n closet records to the given vector
 
 ### Returns
 
-- Success: Self `RemoteTable`
+- Success: Self `Table`
 - Failure: `Exception`
 
 ### Examples
@@ -710,7 +767,7 @@ table_obj.knn('vec', [3.0] * 5, 'float', 'ip', 2)
 
 ## match sparse
 
-**RemoteTable.match_sparse(*vector_column_name, sparse_data, distance_type, topn, opt_params = None*)**
+**Table.match_sparse(*vector_column_name, sparse_data, distance_type, topn, opt_params = None*)**
 
 ### Parameters
 
@@ -725,7 +782,7 @@ table_obj.knn('vec', [3.0] * 5, 'float', 'ip', 2)
       - 'beta=0.0~1.0'(default: 1.0): A "Query Term Pruning" parameter. The smaller the value, the more aggressive the pruning.
 
 ### Returns
-- Success: Self `RemoteTable`
+- Success: Self `Table`
 - Failure: `Exception`
 
 ### Examples
@@ -749,7 +806,7 @@ Create a full-text search expression.
 
 ### Returns
 
-- Success: Self `RemoteTable`
+- Success: Self `Table`
 - Failure: `Exception`
 
 ### Examples
@@ -769,7 +826,7 @@ for question in questions:
 
 ## match tensor
 
-**RemoteTable.match_tensor(*vector_column_name, tensor_data, tensor_data_type, method_type, topn, extra_option)**
+**Table.match_tensor(*vector_column_name, tensor_data, tensor_data_type, method_type, topn, extra_option)**
 
 Build a KNN tensor search expression. Find the top n closet records to the given tensor according to chosen method.
 
@@ -794,7 +851,7 @@ For example, find k most match tensors generated by ColBERT.
 
 ### Returns
 
-- Success: Self `RemoteTable`
+- Success: Self `Table`
 - Failure: `Exception`
 
 ### Examples
@@ -806,7 +863,7 @@ match_tensor('t', [[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0]], 'float', 'maxsim
 
 ## fusion
 
-**RemoteTable.fusion(*method, options_text = ''*)**
+**Table.fusion(*method, options_text = ''*)**
 
 Build a fusion expression.
 
@@ -830,7 +887,7 @@ Build a fusion expression.
 
 ### Returns
 
-- Success: Self `RemoteTable`
+- Success: Self `Table`
 - Failure: `Exception`
 
 ### Examples
@@ -858,7 +915,7 @@ table_obj.fusion('match_tensor', 'topn=2', make_match_tensor_expr('t', [[0.0, -1
 
 ## optimize
 
-**RemoteTable.optimize(*index_name, opt_params*)**
+**Table.optimize(*index_name, opt_params*)**
 
 ### Parameters
 
@@ -880,10 +937,10 @@ table_obj.optimize('bmp_index_name', {'topk': '10'})
 
 ## get result
 
-**RemoteTable.to_result()**
-**RemoteTable.to_df()**
-**RemoteTable.to_pl()**
-**RemoteTable.to_arrow()**
+**Table.to_result()**
+**Table.to_df()**
+**Table.to_pl()**
+**Table.to_arrow()**
 
 After querying, these four methods above can get result into specific type. 
 `Note: output method must be executed before get result`
diff --git a/example/export_data.py b/example/export_data.py
new file mode 100644
index 0000000000..f06094796d
--- /dev/null
+++ b/example/export_data.py
@@ -0,0 +1,120 @@
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''
+This example is about connecting local infinity instance, creating table, ing data, importing file into a table, and exporting table's data
+'''
+
+import infinity
+import os
+
+current_path = os.path.abspath(__file__)
+project_directory = os.path.dirname(current_path)
+
+try:
+    # open a local directory to store the data
+    infinity_instance = infinity.connect("/var/infinity")
+
+    # connect to server with 127.0.0.1
+    # infinity_instance = infinity.connect(infinity.common.LOCAL_HOST)
+
+    # 'default_db' is the default database
+    db_instance = infinity_instance.get_database("default_db")
+
+    # Drop my_table if it already exists
+    db_instance.drop_table("my_table", infinity.common.ConflictType.Ignore)
+
+    # Create a table named "my_table"
+    table_instance = db_instance.create_table("my_table", {
+        "num": {"type": "integer"},
+        "name": {"type": "varchar"},
+        "age": {"type": "integer"},
+        "score": {"type": "float"},
+    })
+
+    # Insert 10 rows of data into the 'my_table'
+    table_instance.insert(
+        [
+            {
+                "num": 1,
+                "name": "Tom",
+                "age": 19, 
+                "score": 90.5,
+            },
+            {
+                "num": 2,
+                "name": "Henry",
+                "age": 20, 
+                "score": 70.0,
+            },
+            {
+                "num": 3,
+                "name": "James",
+                "age": 20,
+                "score": 75.0,
+            },
+            {
+                "num": 4,
+                "name": "Toby",
+                "age": 24,
+                "score": 92.0,
+            },
+            {
+                "num": 5,
+                "name": "Thomas",
+                "age": 20,
+                "score": 72.5,
+            },
+            {
+                "num": 6,
+                "name": "Charlie",
+                "age": 20,
+                "score": 69.0,
+            },
+            {
+                "num": 7,
+                "body": "Chris",
+                "age": 21,
+                "score": 88.0,
+            },
+            {
+                "num": 8,
+                "name": "Bill",
+                "age": 21,
+                "score": 90.0,
+            },
+            {
+                "num": 9,
+                "name": "Stefan",
+                "age": 25,
+                "score": 86.5,
+            },
+            {
+                "num": 10,
+                "name": "Steven",
+                "age": 20,
+                "score": 86.0,
+            },
+        ]
+    )
+
+    # TODO also show how to export other type of file
+    table_instance.export_data(os.getcwd() + "/export_data.jsonl",
+                               {"header": False, "file_type": "jsonl", "delimiter": ",", "row_limit": 2}, ["num", "name", "score"])
+
+
+    infinity_instance.disconnect()
+
+except Exception as e:
+    print(str(e))
\ No newline at end of file
diff --git a/example/simple_example.py b/example/simple_example.py
index d07a26a8d6..f5b2d40bbb 100644
--- a/example/simple_example.py
+++ b/example/simple_example.py
@@ -33,7 +33,7 @@
 
     # Create a table named "my_table"
     table_instance = db_instance.create_table("my_table", {
-        "num": {"type": "integer"},
+        "num": {"type": "integer", "constraints": ["PRIMARY KEY"]},
         "body": {"type": "varchar"},
         "vec": {"type": "vector, 4, float"},
     })
diff --git a/python/benchmark/legacy_benchmark/remote_benchmark_knn.py b/python/benchmark/legacy_benchmark/remote_benchmark_knn.py
index b71783819d..e17f66f41c 100644
--- a/python/benchmark/legacy_benchmark/remote_benchmark_knn.py
+++ b/python/benchmark/legacy_benchmark/remote_benchmark_knn.py
@@ -234,7 +234,7 @@ def one_thread(rounds, query_path, ground_truth_path, ef: int, remote: bool, tab
     results.append(f"Avg total dur: {dur_sum:.2f} s")
     results.append(f"Avg QPS: {(len(queries) / dur_sum):.2f}")
 
-    conn.disconnect()
+    infinity_obj.disconnect()
 
     for result in results:
         print(result)
@@ -308,7 +308,7 @@ def str2bool(value):
     parser.add_argument(
         "--ef",
         type=int,
-        default=100,
+        default=200,
         dest="ef"
     )
     parser.add_argument(
diff --git a/python/infinity/local_infinity/db.py b/python/infinity/local_infinity/db.py
index 5de00838a9..d191fc74e7 100644
--- a/python/infinity/local_infinity/db.py
+++ b/python/infinity/local_infinity/db.py
@@ -53,46 +53,77 @@ def get_ordinary_info(column_info, column_defs, column_name, index):
     proto_column_def = WrapColumnDef()
     proto_column_def.id = index
     proto_column_def.column_name = column_name
-
-    proto_column_type = WrapDataType()
-    datatype = column_info["type"]
-    if datatype == "int8":
-        proto_column_type.logical_type = LogicalType.kTinyInt
-    elif datatype == "int16":
-        proto_column_type.logical_type = LogicalType.kSmallInt
-    elif datatype == "int32" or datatype == "int" or datatype == "integer":
-        proto_column_type.logical_type = LogicalType.kInteger
-    elif datatype == "int64":
-        proto_column_type.logical_type = LogicalType.kBigInt
-    elif datatype == "int128":
-        proto_column_type.logical_type = LogicalType.kHugeInt
-    elif datatype == "float" or datatype == "float32":
-        proto_column_type.logical_type = LogicalType.kFloat
-    elif datatype == "double" or datatype == "float64":
-        proto_column_type.logical_type = LogicalType.kDouble
-    elif datatype == "varchar":
-        proto_column_type.logical_type = LogicalType.kVarchar
-        # proto_column_type.physical_type = ttypes.VarcharType()
-    elif datatype == "bool":
-        proto_column_type.logical_type = LogicalType.kBoolean
-    else:
-        raise InfinityException(ErrorCode.INVALID_DATA_TYPE, f"Unknown datatype: {datatype}")
-
-    # process constraints
-    proto_column_def.column_type = proto_column_type
-    if "constraints" in column_info:
-        constraints = column_info["constraints"]
-        for constraint in constraints:
-            if constraint == "null":
-                proto_column_def.constraints.add(ConstraintType.kNull)
-            elif constraint == "not null":
-                proto_column_def.constraints.add(ConstraintType.kNotNull)
-            elif constraint == "primary key":
-                proto_column_def.constraints.add(ConstraintType.kPrimaryKey)
-            elif constraint == "unique":
-                proto_column_def.constraints.add(ConstraintType.kUnique)
-            else:
-                raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Unknown constraint: {constraint}")
+    proto_column_def.column_type.logical_type = LogicalType.kInvalid
+
+    for key, value in column_info.items():
+        lower_key = key.lower()
+        match lower_key:
+            case "type":
+                datatype = value.lower()
+                column_big_info = [item.strip() for item in datatype.split(",")]
+                column_big_info_first_str = column_big_info[0].lower()
+                if column_big_info_first_str == "vector" or column_big_info_first_str == "tensor" or column_big_info_first_str == "tensorarray":
+                    return get_embedding_info(column_info, column_defs, column_name, index)
+                elif column_big_info_first_str == "sparse":
+                    return get_sparse_info(column_info, column_defs, column_name, index)
+                else:
+                    pass
+
+                proto_column_type = WrapDataType()
+                match datatype:
+                    case "int8":
+                        proto_column_type.logical_type = LogicalType.kTinyInt
+                    case "int16":
+                        proto_column_type.logical_type = LogicalType.kSmallInt
+                    case "int32" | "int" | "integer":
+                        proto_column_type.logical_type = LogicalType.kInteger
+                    case "int64":
+                        proto_column_type.logical_type = LogicalType.kBigInt
+                    case "int128":
+                        proto_column_type.logical_type = LogicalType.kHugeInt
+                    case "float" | "float32":
+                        proto_column_type.logical_type = LogicalType.kFloat
+                    case "double" | "float64":
+                        proto_column_type.logical_type = LogicalType.kDouble
+                    case "varchar":
+                        proto_column_type.logical_type = LogicalType.kVarchar
+                    case "bool":
+                        proto_column_type.logical_type = LogicalType.kBoolean
+                    case _:
+                        raise InfinityException(ErrorCode.INVALID_DATA_TYPE, f"Unknown datatype: {datatype}")
+                proto_column_def.column_type = proto_column_type
+
+            case "constraints":
+                # process constraints
+                constraints = value
+                for constraint in constraints:
+                    constraint = constraint.lower()
+                    match constraint:
+                        case "null":
+                            if ConstraintType.kNull not in proto_column_def.constraints:
+                                proto_column_def.constraints.add(ConstraintType.kNull)
+                            else:
+                                raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Duplicated constraint: {constraint}")
+                        case "not null":
+                            if ConstraintType.kNotNull not in proto_column_def.constraints:
+                                proto_column_def.constraints.add(ConstraintType.kNotNull)
+                            else:
+                                raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Duplicated constraint: {constraint}")
+                        case "primary key":
+                            if ConstraintType.kPrimaryKey not in proto_column_def.constraints:
+                                proto_column_def.constraints.add(ConstraintType.kPrimaryKey)
+                            else:
+                                raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Duplicated constraint: {constraint}")
+                        case "unique":
+                            if ConstraintType.kUnique not in proto_column_def.constraints:
+                                proto_column_def.constraints.add(ConstraintType.kUnique)
+                            else:
+                                raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Duplicated constraint: {constraint}")
+                        case _:
+                            raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Unknown constraint: {constraint}")
+
+    if proto_column_def.column_type.logical_type is None:
+        raise InfinityException(ErrorCode.NO_COLUMN_DEFINED, f"Column definition without data type")
 
     proto_column_def.constant_expr = get_constant_expr(column_info)
 
@@ -229,13 +260,7 @@ def create_table(self, table_name: str, columns_definition,
         column_defs = []
         for index, (column_name, column_info) in enumerate(columns_definition.items()):
             check_valid_name(column_name, "Column")
-            column_big_info = [item.strip() for item in column_info["type"].split(",")]
-            if column_big_info[0] == "vector" or column_big_info[0] == "tensor" or column_big_info[0] == "tensorarray":
-                get_embedding_info(column_info, column_defs, column_name, index)
-            elif column_big_info[0] == "sparse":
-                get_sparse_info(column_info, column_defs, column_name, index)
-            else:  # numeric or varchar
-                get_ordinary_info(column_info, column_defs, column_name, index)
+            get_ordinary_info(column_info, column_defs, column_name, index)
 
         create_table_conflict: LocalConflictType
         if conflict_type == ConflictType.Error:
diff --git a/python/infinity/remote_thrift/db.py b/python/infinity/remote_thrift/db.py
index f82c8bc812..d37f49e670 100644
--- a/python/infinity/remote_thrift/db.py
+++ b/python/infinity/remote_thrift/db.py
@@ -64,45 +64,76 @@ def get_ordinary_info(column_info, column_defs, column_name, index):
     proto_column_def.id = index
     proto_column_def.name = column_name
 
-    proto_column_type = ttypes.DataType()
-    datatype = column_info["type"]
-    if datatype == "int8":
-        proto_column_type.logic_type = ttypes.LogicType.TinyInt
-    elif datatype == "int16":
-        proto_column_type.logic_type = ttypes.LogicType.SmallInt
-    elif datatype == "int32" or datatype == "int" or datatype == "integer":
-        proto_column_type.logic_type = ttypes.LogicType.Integer
-    elif datatype == "int64":
-        proto_column_type.logic_type = ttypes.LogicType.BigInt
-    elif datatype == "int128":
-        proto_column_type.logic_type = ttypes.LogicType.HugeInt
-    elif datatype == "float" or datatype == "float32":
-        proto_column_type.logic_type = ttypes.LogicType.Float
-    elif datatype == "double" or datatype == "float64":
-        proto_column_type.logic_type = ttypes.LogicType.Double
-    elif datatype == "varchar":
-        proto_column_type.logic_type = ttypes.LogicType.Varchar
-        proto_column_type.physical_type = ttypes.VarcharType()
-    elif datatype == "bool":
-        proto_column_type.logic_type = ttypes.LogicType.Boolean
-    else:
-        raise InfinityException(ErrorCode.INVALID_DATA_TYPE, f"Unknown datatype: {datatype}")
-
-    # process constraints
-    proto_column_def.data_type = proto_column_type
-    if "constraints" in column_info:
-        constraints = column_info["constraints"]
-        for constraint in constraints:
-            if constraint == "null":
-                proto_column_def.constraints.append(ttypes.Constraint.Null)
-            elif constraint == "not null":
-                proto_column_def.constraints.append(ttypes.Constraint.NotNull)
-            elif constraint == "primary key":
-                proto_column_def.constraints.append(ttypes.Constraint.PrimaryKey)
-            elif constraint == "unique":
-                proto_column_def.constraints.append(ttypes.Constraint.Unique)
-            else:
-                raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Unknown constraint: {constraint}")
+    for key, value in column_info.items():
+        lower_key = key.lower()
+        match lower_key:
+            case "type":
+                datatype = value.lower()
+                column_big_info = [item.strip() for item in datatype.split(",")]
+                column_big_info_first_str = column_big_info[0].lower()
+                if column_big_info_first_str == "vector" or column_big_info_first_str == "tensor" or column_big_info_first_str == "tensorarray":
+                    return get_embedding_info(column_info, column_defs, column_name, index)
+                elif column_big_info_first_str == "sparse":
+                    return get_sparse_info(column_info, column_defs, column_name, index)
+                else:
+                    pass
+
+                proto_column_type = ttypes.DataType()
+                match datatype:
+                    case "int8":
+                        proto_column_type.logic_type = ttypes.LogicType.TinyInt
+                    case "int16":
+                        proto_column_type.logic_type = ttypes.LogicType.SmallInt
+                    case "int32" | "int" | "integer":
+                        proto_column_type.logic_type = ttypes.LogicType.Integer
+                    case "int64":
+                        proto_column_type.logic_type = ttypes.LogicType.BigInt
+                    case "int128":
+                        proto_column_type.logic_type = ttypes.LogicType.HugeInt
+                    case "float" | "float32":
+                        proto_column_type.logic_type = ttypes.LogicType.Float
+                    case "double" | "float64":
+                        proto_column_type.logic_type = ttypes.LogicType.Double
+                    case "varchar":
+                        proto_column_type.logic_type = ttypes.LogicType.Varchar
+                        proto_column_type.physical_type = ttypes.VarcharType()
+                    case "bool":
+                        proto_column_type.logic_type = ttypes.LogicType.Boolean
+                    case _:
+                        raise InfinityException(ErrorCode.INVALID_DATA_TYPE, f"Unknown datatype: {datatype}")
+                proto_column_def.data_type = proto_column_type
+
+            case "constraints":
+                # process constraints
+                constraints = value
+                for constraint in constraints:
+                    constraint = constraint.lower()
+                    match constraint:
+                        case "null":
+                            if ttypes.Constraint.Null not in proto_column_def.constraints:
+                                proto_column_def.constraints.append(ttypes.Constraint.Null)
+                            else:
+                                raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Duplicated constraint: {constraint}")
+                        case "not null":
+                            if ttypes.Constraint.NotNull not in proto_column_def.constraints:
+                                proto_column_def.constraints.append(ttypes.Constraint.NotNull)
+                            else:
+                                raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Duplicated constraint: {constraint}")
+                        case "primary key":
+                            if ttypes.Constraint.PrimaryKey not in proto_column_def.constraints:
+                                proto_column_def.constraints.append(ttypes.Constraint.PrimaryKey)
+                            else:
+                                raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Duplicated constraint: {constraint}")
+                        case "unique":
+                            if ttypes.Constraint.Unique not in proto_column_def.constraints:
+                                proto_column_def.constraints.append(ttypes.Constraint.Unique)
+                            else:
+                                raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Duplicated constraint: {constraint}")
+                        case _:
+                            raise InfinityException(ErrorCode.INVALID_CONSTRAINT_TYPE, f"Unknown constraint: {constraint}")
+
+    if proto_column_def.data_type is None:
+        raise InfinityException(ErrorCode.NO_COLUMN_DEFINED, f"Column definition without data type")
 
     proto_column_def.constant_expr = get_constant_expr(column_info)
     column_defs.append(proto_column_def)
@@ -238,13 +269,7 @@ def create_table(self, table_name: str, columns_definition,
         column_defs = []
         for index, (column_name, column_info) in enumerate(columns_definition.items()):
             check_valid_name(column_name, "Column")
-            column_big_info = [item.strip() for item in column_info["type"].split(",")]
-            if column_big_info[0] == "vector" or column_big_info[0] == "tensor" or column_big_info[0] == "tensorarray":
-                get_embedding_info(column_info, column_defs, column_name, index)
-            elif column_big_info[0] == "sparse":
-                get_sparse_info(column_info, column_defs, column_name, index)
-            else:  # numeric or varchar
-                get_ordinary_info(column_info, column_defs, column_name, index)
+            get_ordinary_info(column_info, column_defs, column_name, index)
 
         create_table_conflict: ttypes.CreateConflict
         if conflict_type == ConflictType.Error:
diff --git a/python/test/cases/test_database.py b/python/test/cases/test_database.py
index 73336258f1..44ec86f244 100644
--- a/python/test/cases/test_database.py
+++ b/python/test/cases/test_database.py
@@ -112,4 +112,7 @@ def test_show_table_columns_with_invalid_name(self, column_name):
 
     @pytest.mark.slow
     def test_create_drop_show_1M_databases(self):
-        self.test_infinity_obj._test_create_drop_show_1M_databases()
\ No newline at end of file
+        self.test_infinity_obj._test_create_drop_show_1M_databases()
+
+    def test_create_upper_database_name(self):
+        self.test_infinity_obj._test_create_upper_database_name()
diff --git a/python/test/cases/test_index.py b/python/test/cases/test_index.py
index 26b99bd10a..0af2ec44fc 100644
--- a/python/test/cases/test_index.py
+++ b/python/test/cases/test_index.py
@@ -241,3 +241,30 @@ def test_supported_vector_index(self, index_distance_type):
     @pytest.mark.parametrize("index_distance_type", ["cosine", "hamming"])
     def test_unsupported_vector_index(self, index_distance_type):
         self.test_infinity_obj._test_unsupported_vector_index(index_distance_type)
+
+
+    def test_create_upper_name_index(self):
+        self.test_infinity_obj._test_create_upper_name_index()
+
+    @pytest.mark.parametrize("index_type", [
+        index.IndexType.IVFFlat,
+        index.IndexType.Hnsw,
+        index.IndexType.BMP,
+        index.IndexType.FullText,
+        index.IndexType.EMVB,
+        index.IndexType.Secondary,
+    ])
+    def test_create_index_with_converse_param_name(self, index_type):
+        self.test_infinity_obj._test_create_index_with_converse_param_name(index_type)
+
+    @pytest.mark.parametrize("index_type", [
+        index.IndexType.IVFFlat,
+        index.IndexType.Hnsw,
+        index.IndexType.BMP,
+        index.IndexType.FullText,
+        index.IndexType.EMVB,
+        index.IndexType.Secondary,
+    ])
+    def test_create_index_with_converse_param_value(self, index_type):
+        self.test_infinity_obj._test_create_index_with_converse_param_value(index_type)
+
diff --git a/python/test/cases/test_table.py b/python/test/cases/test_table.py
index 32788a7a65..844676b78a 100644
--- a/python/test/cases/test_table.py
+++ b/python/test/cases/test_table.py
@@ -149,6 +149,21 @@ def test_create_duplicated_table_with_error_option(self):
     def test_create_duplicated_table_with_replace_option(self):
         self.test_infinity_obj._test_create_duplicated_table_with_replace_option()
 
+    def test_create_upper_table_name(self):
+        self.test_infinity_obj._test_create_upper_table_name()
+
+    def test_create_table_with_upper_column_name(self):
+        self.test_infinity_obj._test_create_table_with_upper_column_name()
+
+    def test_create_table_with_upper_param_name(self):
+        self.test_infinity_obj._test_create_table_with_upper_param_name()
+
+    def test_create_table_with_upper_data_type_name(self):
+        self.test_infinity_obj._test_create_table_with_upper_data_type_name()
+
+    def test_create_table_with_upper_constraint_name(self):
+        self.test_infinity_obj._test_create_table_with_upper_constraint_name()
+
     def test_table(self):
         # self.test_infinity_obj._test_version()
         self.test_infinity_obj._test_table()
@@ -180,6 +195,3 @@ def test_create_10k_table(self):
 
     def test_create_1K_table(self):
         self.test_infinity_obj._test_create_1K_table()
-
-
-
diff --git a/python/test/internal/test_database.py b/python/test/internal/test_database.py
index be925e3c1d..eca05fb502 100755
--- a/python/test/internal/test_database.py
+++ b/python/test/internal/test_database.py
@@ -554,4 +554,14 @@ def _test_show_table_columns_with_invalid_name(self, column_name):
         assert e.type == infinity.common.InfinityException
         assert e.value.args[0] == ErrorCode.TABLE_NOT_EXIST or e.value.args[0] == ErrorCode.INVALID_IDENTIFIER_NAME
 
-        db_obj.drop_table("test_show_table_columns", ConflictType.Error)
\ No newline at end of file
+        db_obj.drop_table("test_show_table_columns", ConflictType.Error)
+
+    def _test_create_upper_database_name(self):
+        db_upper_name = "MY_DATABASE"
+        db_lower_name = "my_database"
+        self.infinity_obj.drop_database(db_lower_name, ConflictType.Ignore)
+
+        db = self.infinity_obj.create_database(db_upper_name, ConflictType.Error)
+
+        db = self.infinity_obj.get_database(db_lower_name)
+        db = self.infinity_obj.get_database(db_upper_name)
\ No newline at end of file
diff --git a/python/test/internal/test_index.py b/python/test/internal/test_index.py
index 41d10923a2..12fc0dfbde 100755
--- a/python/test/internal/test_index.py
+++ b/python/test/internal/test_index.py
@@ -1077,3 +1077,224 @@ def _test_unsupported_vector_index(self, index_distance_type):
         res = db_obj.drop_table(
             "test_unsupported_vector_index", ConflictType.Error)
         assert res.error_code == ErrorCode.OK
+
+    def _test_create_upper_name_index(self):
+        db_obj = self.infinity_obj.get_database("default_db")
+        res = db_obj.drop_table("test_upper_name_index", ConflictType.Ignore)
+        assert res.error_code == ErrorCode.OK
+        table_obj = db_obj.create_table("test_upper_name_index", {
+            "c1": {"type": "vector,1024,float"}}, ConflictType.Error)
+        assert table_obj is not None
+
+        upper_name_index = "MY_INDEX"
+        lower_name_index = "my_index"
+        res = table_obj.create_index(upper_name_index,
+                                     [index.IndexInfo("c1",
+                                                      index.IndexType.IVFFlat,
+                                                      [index.InitParameter("centroids_count", "128"),
+                                                       index.InitParameter("metric", "l2")])], ConflictType.Error)
+        assert res.error_code == ErrorCode.OK
+
+        res = table_obj.show_index(lower_name_index)
+        assert res.error_code == ErrorCode.OK
+        res = table_obj.show_index(upper_name_index)
+        assert res.error_code == ErrorCode.OK
+
+        res = table_obj.drop_index(lower_name_index, ConflictType.Error)
+        assert res.error_code == ErrorCode.OK
+        res = db_obj.drop_table("test_upper_name_index", ConflictType.Error)
+        assert res.error_code == ErrorCode.OK
+
+    def _test_create_index_with_converse_param_name(self, index_type):
+        db_obj = self.infinity_obj.get_database("default_db")
+        res = db_obj.drop_table("test_index", ConflictType.Ignore)
+        assert res.error_code == ErrorCode.OK
+
+        if index_type == index.IndexType.IVFFlat:
+            table_obj = db_obj.create_table("test_index", {
+                "c1": {"type": "vector,1024,float"}}, ConflictType.Error)
+            assert table_obj is not None
+
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("c1",
+                                                          index.IndexType.IVFFlat,
+                                                          [index.InitParameter("CENTROIDS_COUNT", "128"),
+                                                           index.InitParameter("METRIC", "l2")])], ConflictType.Error)
+            assert res.error_code == ErrorCode.OK
+        elif index_type == index.IndexType.Hnsw:
+            table_obj = db_obj.create_table(
+                "test_index", {"c1": {"type": "vector,1024,float"}}, ConflictType.Error)
+            assert table_obj is not None
+
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("c1",
+                                                          index.IndexType.Hnsw,
+                                                          [
+                                                              index.InitParameter(
+                                                                  "m", "16"),
+                                                              index.InitParameter(
+                                                                  "EF_CONSTRUCTION", "50"),
+                                                              index.InitParameter(
+                                                                  "EF", "50"),
+                                                              index.InitParameter(
+                                                                  "METRIC", "l2")
+                                                          ])], ConflictType.Error)
+
+            assert res.error_code == ErrorCode.OK
+        elif index_type == index.IndexType.BMP:
+            table_obj = db_obj.create_table(
+                "test_index", {"col1": {"type": "int"}, "col2": {"type": "sparse,30000,float,int16"}}, ConflictType.Error)
+            assert table_obj is not None
+
+            # CREATE INDEX idx1 ON test_bmp (col2) USING Bmp WITH (block_size = 16, compress_type = compress);
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("col2",
+                                                          index.IndexType.BMP,
+                                                          [index.InitParameter("BLOCK_SIZE", "8"),
+                                                           index.InitParameter("COMPRESS_TYPE", "compress")])],
+                                         ConflictType.Error)
+            assert res.error_code == ErrorCode.OK
+        elif index_type == index.IndexType.FullText:
+            table_obj = db_obj.create_table(
+                "test_index", {
+                    "doctitle": {"type": "varchar"}, "docdate": {"type": "varchar"}, "body": {"type": "varchar"}
+                }, ConflictType.Error)
+            assert table_obj is not None
+
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("body",
+                                                          index.IndexType.FullText,
+                                                          [index.InitParameter('analyzer', 'standard')]),
+                                          ], ConflictType.Error)
+
+            assert res.error_code == ErrorCode.OK
+        elif index_type == index.IndexType.EMVB:
+            table_obj = db_obj.create_table(
+                "test_index", {
+                    "c1": {"type": "int"}, "c2": {"type": "tensor, 128, float"}
+                }, ConflictType.Error)
+            assert table_obj is not None
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("c2",
+                                                          index.IndexType.EMVB,
+                                                          [index.InitParameter("PQ_SUBSPACE_NUM", "32"),
+                                                           index.InitParameter("PQ_SUBSPACE_BITS", "8")]),
+                                          ], ConflictType.Error)
+            assert res.error_code == ErrorCode.OK
+        elif index_type == index.IndexType.Secondary:
+            table_obj = db_obj.create_table(
+                "test_index", {
+                    "c1": {"type": "int"}, "body": {"type": "varchar"}
+                }, ConflictType.Error)
+            assert table_obj is not None
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("c1",
+                                                          index.IndexType.Secondary,
+                                                          []),
+                                          ], ConflictType.Error)
+            assert res.error_code == ErrorCode.OK
+
+        res = table_obj.show_index("my_index")
+        assert res.error_code == ErrorCode.OK
+
+        res = table_obj.drop_index("my_index", ConflictType.Error)
+        assert res.error_code == ErrorCode.OK
+        res = db_obj.drop_table("test_index", ConflictType.Error)
+        assert res.error_code == ErrorCode.OK
+
+    def _test_create_index_with_converse_param_value(self, index_type):
+        db_obj = self.infinity_obj.get_database("default_db")
+        res = db_obj.drop_table("test_index", ConflictType.Ignore)
+        assert res.error_code == ErrorCode.OK
+        if index_type == index.IndexType.IVFFlat:
+            table_obj = db_obj.create_table("test_index", {
+                "c1": {"type": "vector,1024,float"}}, ConflictType.Error)
+            assert table_obj is not None
+
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("c1",
+                                                          index.IndexType.IVFFlat,
+                                                          [index.InitParameter("centroids_count", "128"),
+                                                           index.InitParameter("metric", "L2")])], ConflictType.Error)
+            assert res.error_code == ErrorCode.OK
+        elif index_type == index.IndexType.Hnsw:
+            table_obj = db_obj.create_table(
+                "test_index", {"c1": {"type": "vector,1024,float"}}, ConflictType.Error)
+            assert table_obj is not None
+
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("c1",
+                                                          index.IndexType.Hnsw,
+                                                          [
+                                                              index.InitParameter(
+                                                                  "M", "16"),
+                                                              index.InitParameter(
+                                                                  "ef_construction", "50"),
+                                                              index.InitParameter(
+                                                                  "ef", "50"),
+                                                              index.InitParameter(
+                                                                  "metric", "L2")
+                                                          ])], ConflictType.Error)
+
+            assert res.error_code == ErrorCode.OK
+        elif index_type == index.IndexType.BMP:
+            table_obj = db_obj.create_table(
+                "test_index", {"col1": {"type": "int"}, "col2": {"type": "sparse,30000,float,int16"}},
+                ConflictType.Error)
+            assert table_obj is not None
+
+            # CREATE INDEX idx1 ON test_bmp (col2) USING Bmp WITH (block_size = 16, compress_type = compress);
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("col2",
+                                                          index.IndexType.BMP,
+                                                          [index.InitParameter("block_size", "8"),
+                                                           index.InitParameter("compress_type", "COMPRESS")])],
+                                         ConflictType.Error)
+            assert res.error_code == ErrorCode.OK
+        elif index_type == index.IndexType.FullText:
+            table_obj = db_obj.create_table(
+                "test_index", {
+                    "doctitle": {"type": "varchar"}, "docdate": {"type": "varchar"}, "body": {"type": "varchar"}
+                }, ConflictType.Error)
+            assert table_obj is not None
+
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("body",
+                                                          index.IndexType.FullText,
+                                                          [index.InitParameter('ANALYZER', 'STANDARD')]),
+                                          ], ConflictType.Error)
+
+            assert res.error_code == ErrorCode.OK
+        elif index_type == index.IndexType.EMVB:
+            table_obj = db_obj.create_table(
+                "test_index", {
+                    "c1": {"type": "int"}, "c2": {"type": "tensor, 128, float"}
+                }, ConflictType.Error)
+            assert table_obj is not None
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("c2",
+                                                          index.IndexType.EMVB,
+                                                          [index.InitParameter("pq_subspace_num", "32"),
+                                                           index.InitParameter("pq_subspace_bits", "8")]),
+                                          ], ConflictType.Error)
+            assert res.error_code == ErrorCode.OK
+        elif index_type == index.IndexType.Secondary:
+            table_obj = db_obj.create_table(
+                "test_index", {
+                    "c1": {"type": "int"}, "body": {"type": "varchar"}
+                }, ConflictType.Error)
+            assert table_obj is not None
+            res = table_obj.create_index("my_index",
+                                         [index.IndexInfo("c1",
+                                                          index.IndexType.Secondary,
+                                                          []),
+                                          ], ConflictType.Error)
+            assert res.error_code == ErrorCode.OK
+
+        res = table_obj.show_index("my_index")
+        assert res.error_code == ErrorCode.OK
+
+        res = table_obj.drop_index("my_index", ConflictType.Error)
+        assert res.error_code == ErrorCode.OK
+        res = db_obj.drop_table("test_index", ConflictType.Error)
+        assert res.error_code == ErrorCode.OK
\ No newline at end of file
diff --git a/python/test/internal/test_table.py b/python/test/internal/test_table.py
index 4fb242dcdf..70d96c33cb 100755
--- a/python/test/internal/test_table.py
+++ b/python/test/internal/test_table.py
@@ -848,3 +848,89 @@ def _test_create_duplicated_table_with_replace_option(self):
 
         res = db_obj.drop_table("test_create_duplicated_table_with_replace_option", ConflictType.Error)
         assert res.error_code == ErrorCode.OK
+
+    def _test_create_upper_table_name(self):
+        db_obj = self.infinity_obj.get_database("default_db")
+        table_lower_name = "test_table_my_table"
+        db_obj.drop_table(table_lower_name, ConflictType.Ignore)
+
+        table_upper_name = "TEST_TABLE_MY_TABLE"
+        # create table
+        tb = db_obj.create_table(
+            table_upper_name, {"c1": {"type": "int", "constraints": ["primary key"]}, "c2": {"type": "float"}},
+            ConflictType.Error)
+        assert tb
+
+        # get table
+        res = db_obj.get_table(table_lower_name)
+        res = db_obj.get_table(table_upper_name)
+
+    def _test_create_table_with_upper_column_name(self):
+        db_obj = self.infinity_obj.get_database("default_db")
+        table_name = "test_table_my_table"
+        db_obj.drop_table(table_name, ConflictType.Ignore)
+        # create table
+        tb = db_obj.create_table(
+            table_name, {"C1": {"type": "int", "constraints": ["primary key"]}, "C2": {"type": "float"}},
+            ConflictType.Error)
+        assert tb
+
+        # get table
+        res = db_obj.get_table(table_name)
+
+        res = db_obj.show_columns(table_name)
+        print("\n")
+        print(res)
+        assert res["column_name"][0] == "c1"
+        assert res["column_name"][1] == "c2"
+
+    def _test_create_table_with_upper_param_name(self):
+        db_obj = self.infinity_obj.get_database("default_db")
+        table_name = "test_table_my_table"
+        db_obj.drop_table(table_name, ConflictType.Ignore)
+        # create table
+        tb = db_obj.create_table(
+            table_name, {"c1": {"TYPE": "int", "CONSTRAINTS": ["primary key"]}, "C2": {"TYPE": "float"}},
+            ConflictType.Error)
+        assert tb
+
+        # get table
+        res = db_obj.get_table(table_name)
+
+        res = db_obj.show_columns(table_name)
+        print("\n")
+        print(res)
+
+    def _test_create_table_with_upper_data_type_name(self):
+        db_obj = self.infinity_obj.get_database("default_db")
+        table_name = "test_table_my_table"
+        db_obj.drop_table(table_name, ConflictType.Ignore)
+        # create table
+        tb = db_obj.create_table(
+            table_name, {"c1": {"type": "INT", "constraints": ["primary key"]}, "c2": {"type": "FLOAT"}},
+            ConflictType.Error)
+        assert tb
+
+        # get table
+        res = db_obj.get_table(table_name)
+
+        res = db_obj.show_columns(table_name)
+        print("\n")
+        print(res)
+
+    def _test_create_table_with_upper_constraint_name(self):
+        db_obj = self.infinity_obj.get_database("default_db")
+        table_name = "test_table_my_table"
+        db_obj.drop_table(table_name, ConflictType.Ignore)
+        # create table
+        tb = db_obj.create_table(
+            table_name, {"c1": {"type": "int", "constraints": ["PRIMARY KEY"]}, "c2": {"type": "float"}},
+            ConflictType.Error)
+        assert tb
+
+        # get table
+        res = db_obj.get_table(table_name)
+
+        res = db_obj.show_columns(table_name)
+        print("\n")
+        print(res)
\ No newline at end of file
diff --git a/scripts/Dockerfile_infinity_builder_centos7 b/scripts/Dockerfile_infinity_builder_centos7
index 24aa95866e..37f07e7d65 100644
--- a/scripts/Dockerfile_infinity_builder_centos7
+++ b/scripts/Dockerfile_infinity_builder_centos7
@@ -1,20 +1,5 @@
 # NOTICE: This Dockerfile depends on BuildKit
-# NOTICE: You should prepare the following files
-# NOTICE: You can use the download_deps_infinity_builder_centos7.sh script to download them
-# bison-3.8.2.tar.xz
-# binutils-2.41.tar.xz
-# gcc-13.2.0.tar.xz
-# cmake-3.29.3-linux-x86_64.tar.gz
-# ninja-linux.zip
-# llvm-project-18.1.8.src.tar.xz
-# boost_1_81_0.tar.bz2
-# flex-2.6.4.tar.gz
-# libevent-2.1.12-stable.tar.gz
-# lz4-1.9.4.tar.gz
-# jemalloc-5.3.0.tar.bz2
-# gperftools-2.15.tar.gz
-# openssl-1.1.1w.tar.gz
-# Python-3.12.4.tar.xz
+# NOTICE: You can use the download_deps_infinity_builder_centos7.sh script to download dependencies.
 
 FROM centos:7.9.2009
 
@@ -36,6 +21,7 @@ RUN --mount=type=bind,source=bison-3.8.2.tar.xz,target=/root/bison-3.8.2.tar.xz
     && ldconfig && cd /root && rm -rf bison-3.8.2
 
 # Install binutils-2.41
+# Failed to build binutils-2.42
 RUN --mount=type=bind,source=binutils-2.41.tar.xz,target=/root/binutils-2.41.tar.xz \
     cd /root && tar xf binutils-2.41.tar.xz && cd binutils-2.41 \
     && ./configure --enable-gold \
@@ -55,12 +41,12 @@ RUN --mount=type=bind,source=gcc-13.2.0.tar.xz,target=/root/gcc-13.2.0.tar.xz \
 
 ENV LIBRARY_PATH=/usr/local/lib:/usr/local/lib64
 
-# Install cmake-3.29.3
-RUN --mount=type=bind,source=cmake-3.29.3-linux-x86_64.tar.gz,target=/root/cmake-3.29.3-linux-x86_64.tar.gz \
-    cd /root && tar xf cmake-3.29.3-linux-x86_64.tar.gz \
-    && cp -rf cmake-3.29.3-linux-x86_64/bin/* /usr/local/bin \
-    && cp -rf cmake-3.29.3-linux-x86_64/share/* /usr/local/share \
-    && rm -rf cmake-3.29.3-linux-x86_64
+# Install cmake-3.30.1
+RUN --mount=type=bind,source=cmake-3.30.1-linux-x86_64.tar.gz,target=/root/cmake-3.30.1-linux-x86_64.tar.gz \
+    cd /root && tar xf cmake-3.30.1-linux-x86_64.tar.gz \
+    && cp -rf cmake-3.30.1-linux-x86_64/bin/* /usr/local/bin \
+    && cp -rf cmake-3.30.1-linux-x86_64/share/* /usr/local/share \
+    && rm -rf cmake-3.30.1-linux-x86_64
 
 # Install ninja-1.12.1
 RUN --mount=type=bind,source=ninja-linux.zip,target=/root/ninja-linux.zip \
@@ -107,11 +93,11 @@ RUN --mount=type=bind,source=libevent-2.1.12-stable.tar.gz,target=/root/libevent
     && ninja install \
     && ldconfig && cd /root && rm -rf libevent-2.1.12-stable
 
-# Install lz4-1.9.4
-RUN --mount=type=bind,source=lz4-1.9.4.tar.gz,target=/root/lz4-1.9.4.tar.gz  \
-    cd /root && tar xf lz4-1.9.4.tar.gz \
-    && cd lz4-1.9.4 && CFLAGS="-fPIC" make -j install \
-    && ldconfig && cd /root && rm -rf lz4-1.9.4
+# Install lz4-1.10.0
+RUN --mount=type=bind,source=lz4-1.10.0.tar.gz,target=/root/lz4-1.10.0.tar.gz  \
+    cd /root && tar xf lz4-1.10.0.tar.gz \
+    && cd lz4-1.10.0 && CFLAGS="-fPIC" make -j install \
+    && ldconfig && cd /root && rm -rf lz4-1.10.0
 
 # Install zlib-1.3.1
 RUN --mount=type=bind,source=zlib-1.3.1.tar.gz,target=/root/zlib-1.3.1.tar.gz  \
@@ -119,6 +105,24 @@ RUN --mount=type=bind,source=zlib-1.3.1.tar.gz,target=/root/zlib-1.3.1.tar.gz  \
     && cd zlib-1.3.1 && ./configure && CFLAGS="-fPIC" make -j install \
     && ldconfig && cd /root && rm -rf zlib-1.3.1
 
+# Install zstd-1.5.5
+RUN --mount=type=bind,source=zstd-1.5.5.tar.gz,target=/root/zstd-1.5.5.tar.gz  \
+    cd /root && tar xf zstd-1.5.5.tar.gz \
+    && cd zstd-1.5.5 && make -j lib-mt && make install \
+    && ldconfig && cd /root && rm -rf zstd-1.5.5
+
+# Install bzip2-1.0.8
+RUN --mount=type=bind,source=bzip2-1.0.8.tar.gz,target=/root/bzip2-1.0.8.tar.gz  \
+    cd /root && tar xf bzip2-1.0.8.tar.gz \
+    && cd bzip2-1.0.8 && make install \
+    && ldconfig && cd /root && rm -rf bzip2-1.0.8
+
+# Install brotli-1.1.0
+RUN --mount=type=bind,source=v1.1.0.tar.gz,target=/root/v1.1.0.tar.gz  \
+    cd /root && tar xf v1.1.0.tar.gz \
+    && cd brotli-1.1.0 && mkdir out && cd out && cmake -DCMAKE_BUILD_TYPE=Release .. && cmake --build . --config Release --target install \
+    && ldconfig && cd /root && rm -rf brotli-1.1.0
+
 # Install jemalloc-5.3.0
 # Known issue: Composition of `-fsanitize=address`, staticly linked jemalloc and `mallctl` cause crash at initialization.
 # Refers to https://github.com/jemalloc/jemalloc/issues/2454
diff --git a/scripts/download_deps_infinity_builder_centos7.sh b/scripts/download_deps_infinity_builder_centos7.sh
index 4074846c63..48857a956e 100644
--- a/scripts/download_deps_infinity_builder_centos7.sh
+++ b/scripts/download_deps_infinity_builder_centos7.sh
@@ -1,18 +1,5 @@
 #!/usr/bin/env bash
 
-# This script will download the following files:
-# bison-3.8.2.tar.xz
-# binutils-2.41.tar.xz
-# gcc-13.2.0.tar.xz
-# cmake-3.28.3-linux-x86_64.tar.gz
-# ninja-linux.zip
-# llvm-project-17.0.6.src.tar.xz
-# boost_1_81_0.tar.bz2
-# flex-2.6.4.tar.gz
-# liburing-2.5.tar.gz
-# libevent-2.1.12-stable.tar.gz
-# lz4-1.9.4.tar.gz
-
 download()
 {
     echo "download $1"
@@ -27,14 +14,17 @@ download()
 names="https://ftp.gnu.org/gnu/bison/bison-3.8.2.tar.xz
 https://ftp.gnu.org/gnu/binutils/binutils-2.41.tar.xz
 https://ftp.gnu.org/gnu/gcc/gcc-13.2.0/gcc-13.2.0.tar.xz
-https://github.com/Kitware/CMake/releases/download/v3.29.3/cmake-3.29.3-linux-x86_64.tar.gz
+https://github.com/Kitware/CMake/releases/download/v3.30.1/cmake-3.30.1-linux-x86_64.tar.gz
 https://github.com/ninja-build/ninja/releases/download/v1.12.1/ninja-linux.zip
 https://github.com/llvm/llvm-project/releases/download/llvmorg-18.1.8/llvm-project-18.1.8.src.tar.xz
 https://boostorg.jfrog.io/artifactory/main/release/1.81.0/source/boost_1_81_0.tar.bz2
 https://github.com/westes/flex/releases/download/v2.6.4/flex-2.6.4.tar.gz
 https://github.com/libevent/libevent/releases/download/release-2.1.12-stable/libevent-2.1.12-stable.tar.gz
-https://github.com/lz4/lz4/releases/download/v1.9.4/lz4-1.9.4.tar.gz
+https://github.com/lz4/lz4/releases/download/v1.10.0/lz4-1.10.0.tar.gz
 https://github.com/madler/zlib/releases/download/v1.3.1/zlib-1.3.1.tar.gz
+https://github.com/facebook/zstd/releases/download/v1.5.5/zstd-1.5.5.tar.gz
+https://sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz
+https://github.com/google/brotli/archive/refs/tags/v1.1.0.tar.gz
 https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2
 https://github.com/gperftools/gperftools/releases/download/gperftools-2.15/gperftools-2.15.tar.gz
 https://github.com/openssl/openssl/releases/download/OpenSSL_1_1_1w/openssl-1.1.1w.tar.gz
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f87f1ee4f9..3952380971 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -209,7 +209,7 @@ target_sources(infinity_core
         ${metrics_cppm}
 )
 
-add_dependencies(infinity_core thrift thriftnb parquet_static)
+add_dependencies(infinity_core thrift thriftnb parquet_static snappy)
 
 target_include_directories(infinity_core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 target_include_directories(infinity_core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/parser")
@@ -236,6 +236,7 @@ target_include_directories(infinity_core PUBLIC "${CMAKE_SOURCE_DIR}/third_party
 target_include_directories(infinity_core PUBLIC "${CMAKE_SOURCE_DIR}/third_party/eigen-3.4.0")
 target_include_directories(infinity_core PUBLIC "${CMAKE_SOURCE_DIR}/third_party/opencc")
 target_include_directories(infinity_core PUBLIC "${CMAKE_SOURCE_DIR}/third_party/arrow/src")
+target_include_directories(infinity_core PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 if (NOT SUPPORT_FMA EQUAL 0)
     message(FATAL_ERROR "This project requires the processor support fused multiply-add (FMA) instructions.")
@@ -296,12 +297,14 @@ target_link_libraries(infinity
         oatpp.a
         parquet.a
         arrow.a
+        snappy.a
         ${JEMALLOC_STATIC_LIB}
 )
 
 target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/third_party/oatpp/src/")
 target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 target_include_directories(infinity PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 target_include_directories(infinity PUBLIC "${CMAKE_SOURCE_DIR}/third_party/thrift/lib/cpp/src")
@@ -348,6 +351,7 @@ target_link_libraries(embedded_infinity_ext PRIVATE
         arrow.a
         thrift.a
         thriftnb.a
+        snappy.a
 )
 
 # WARN: python modules shall not link to static libstdc++!!!
@@ -357,6 +361,7 @@ set_target_properties(embedded_infinity_ext PROPERTIES CXX_VISIBILITY_PRESET hid
 target_link_directories(embedded_infinity_ext PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(embedded_infinity_ext PUBLIC "${CMAKE_BINARY_DIR}/third_party/oatpp/src/")
 target_link_directories(embedded_infinity_ext PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(embedded_infinity_ext PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 nanobind_disable_stack_protector(embedded_infinity_ext)
 nanobind_opt_size(embedded_infinity_ext)
 nanobind_strip(embedded_infinity_ext)
@@ -478,11 +483,13 @@ target_link_libraries(unit_test
         arrow.a
         thrift.a
         thriftnb.a
+        snappy.a
         ${JEMALLOC_STATIC_LIB}
 )
 
 target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/lib")
 target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
+target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
 target_sources(unit_test
         PRIVATE
diff --git a/src/executor/operator/physical_export.cppm b/src/executor/operator/physical_export.cppm
index a355c880bb..6b88dc6720 100644
--- a/src/executor/operator/physical_export.cppm
+++ b/src/executor/operator/physical_export.cppm
@@ -101,7 +101,7 @@ private:
     SharedPtr<Vector<SharedPtr<DataType>>> output_types_{};
 
     TableEntry *table_entry_{};
-    CopyFileType file_type_{CopyFileType::kCSV};
+    CopyFileType file_type_{CopyFileType::kInvalid};
     String file_path_{};
     String table_name_{};
     String schema_name_{"default_db"};
diff --git a/src/executor/operator/physical_import.cpp b/src/executor/operator/physical_import.cpp
index edf97c3037..cbbeb8b539 100644
--- a/src/executor/operator/physical_import.cpp
+++ b/src/executor/operator/physical_import.cpp
@@ -107,8 +107,8 @@ bool PhysicalImport::Execute(QueryContext *query_context, OperatorState *operato
             break;
         }
         case CopyFileType::kInvalid: {
-            String error_message = "Invalid file type";
-            UnrecoverableError(error_message);
+            Status status = Status::ImportFileFormatError("Invalid import file type");
+            RecoverableError(status);
         }
     }
     import_op_state->SetComplete();
diff --git a/src/executor/operator/physical_import.cppm b/src/executor/operator/physical_import.cppm
index dabc8eb9ba..b1c8763412 100644
--- a/src/executor/operator/physical_import.cppm
+++ b/src/executor/operator/physical_import.cppm
@@ -133,7 +133,7 @@ private:
     SharedPtr<Vector<SharedPtr<DataType>>> output_types_{};
 
     TableEntry *table_entry_{};
-    CopyFileType file_type_{CopyFileType::kCSV};
+    CopyFileType file_type_{CopyFileType::kInvalid};
     String file_path_{};
     bool header_{false};
     char delimiter_{','};
diff --git a/src/main/config.cpp b/src/main/config.cpp
index 5c279ebfb0..2261c24724 100644
--- a/src/main/config.cpp
+++ b/src/main/config.cpp
@@ -124,7 +124,8 @@ Status Config::Init(const SharedPtr<String> &config_path, DefaultConfig* default
     toml::table config_toml{};
     if (config_path.get() == nullptr || !fs.Exists(*config_path)) {
         if (config_path.get() == nullptr) {
-            fmt::print("No config file is given, use default configs.\n");
+//            fmt::print("No config file is given, use default configs.\n");
+            ;
         } else {
             if (!fs.Exists(*config_path)) {
                 fmt::print("Config file: {} is not existent.\n", *config_path);
diff --git a/src/main/infinity.cpp b/src/main/infinity.cpp
index 2a8bc3698b..637e70715e 100644
--- a/src/main/infinity.cpp
+++ b/src/main/infinity.cpp
@@ -400,9 +400,16 @@ QueryResult Infinity::CreateTable(const String &db_name,
     ToLower(create_table_info->table_name_);
 
     create_table_info->column_defs_ = std::move(column_defs);
+    for(ColumnDef* column_def_ptr: create_table_info->column_defs_) {
+        ToLower(column_def_ptr->name_);
+    }
     create_table_info->constraints_ = std::move(constraints);
     create_table_info->conflict_type_ = create_table_options.conflict_type_;
     create_table_info->properties_ = std::move(create_table_options.properties_);
+    for(InitParameter* parameter_ptr: create_table_info->properties_) {
+        ToLower(parameter_ptr->param_name_);
+        ToLower(parameter_ptr->param_value_);
+    }
     create_statement->create_info_ = std::move(create_table_info);
     QueryResult result = query_context_ptr->QueryStatement(create_statement.get());
     return result;
@@ -431,13 +438,14 @@ QueryResult Infinity::DropTable(const String &db_name, const String &table_name,
 
 QueryResult Infinity::ListTables(const String &db_name) {
     UniquePtr<QueryContext> query_context_ptr = MakeUnique<QueryContext>(session_.get());
-    query_context_ptr->set_current_schema(db_name);
     query_context_ptr->Init(InfinityContext::instance().config(),
                             InfinityContext::instance().task_scheduler(),
                             InfinityContext::instance().storage(),
                             InfinityContext::instance().resource_manager(),
                             InfinityContext::instance().session_manager());
     UniquePtr<ShowStatement> show_statement = MakeUnique<ShowStatement>();
+    show_statement->schema_name_ = db_name;
+    ToLower(show_statement->schema_name_);
     show_statement->show_type_ = ShowStmtType::kTables;
     QueryResult result = query_context_ptr->QueryStatement(show_statement.get());
     return result;
@@ -558,6 +566,13 @@ QueryResult Infinity::CreateIndex(const String &db_name,
     create_index_info->index_name_ = index_name;
     ToLower(create_index_info->index_name_);
 
+    for(IndexInfo* index_info_ptr: *index_info_list) {
+        ToLower(index_info_ptr->column_name_);
+        for(InitParameter* init_param_ptr: *index_info_ptr->index_param_list_) {
+            ToLower(init_param_ptr->param_name_);
+            ToLower(init_param_ptr->param_value_);
+        }
+    }
     create_index_info->index_info_list_ = index_info_list;
 
     create_statement->create_info_ = create_index_info;
@@ -581,7 +596,6 @@ Infinity::DropIndex(const String &db_name, const String &table_name, const Strin
     drop_index_info->schema_name_ = db_name;
     ToLower(drop_index_info->schema_name_);
 
-
     drop_index_info->table_name_ = table_name;
     ToLower(drop_index_info->table_name_);
 
@@ -795,6 +809,9 @@ QueryResult Infinity::Insert(const String &db_name, const String &table_name, Ve
     ToLower(insert_statement->table_name_);
 
     insert_statement->columns_ = columns;
+    for(String& column_name: *insert_statement->columns_) {
+        ToLower(column_name);
+    }
     insert_statement->values_ = values;
     QueryResult result = query_context_ptr->QueryStatement(insert_statement.get());
     return result;
@@ -889,14 +906,14 @@ QueryResult Infinity::Update(const String &db_name, const String &table_name, Pa
     UniquePtr<UpdateStatement> update_statement = MakeUnique<UpdateStatement>();
 
     update_statement->schema_name_ = db_name;
-    ToLower(update_statement->schema_name_);
-
     update_statement->table_name_ = table_name;
-    ToLower(update_statement->table_name_);
 
     // TODO: to lower expression identifier string
     update_statement->where_expr_ = filter;
     update_statement->update_expr_array_ = update_list;
+    for(UpdateExpr* update_expr_ptr: *update_statement->update_expr_array_) {
+        ToLower(update_expr_ptr->column_name);
+    }
     QueryResult result = query_context_ptr->QueryStatement(update_statement.get());
     return result;
 }
diff --git a/src/main/query_options.cppm b/src/main/query_options.cppm
index 4cd06c47b3..45877c8106 100644
--- a/src/main/query_options.cppm
+++ b/src/main/query_options.cppm
@@ -55,7 +55,7 @@ export class ImportOptions {
 public:
     char delimiter_{','};
     bool header_{false};
-    CopyFileType copy_file_type_{CopyFileType::kCSV};
+    CopyFileType copy_file_type_{CopyFileType::kInvalid};
 };
 
 export class ExportOptions {
@@ -65,7 +65,7 @@ public:
     SizeT offset_{0};
     SizeT limit_{0};
     SizeT row_limit_{0};
-    CopyFileType copy_file_type_{CopyFileType::kCSV};
+    CopyFileType copy_file_type_{CopyFileType::kInvalid};
 };
 
 export class OptimizeOptions {
diff --git a/src/network/infinity_thrift_service.cpp b/src/network/infinity_thrift_service.cpp
index 4f7c3f5b7a..88a96cac7b 100644
--- a/src/network/infinity_thrift_service.cpp
+++ b/src/network/infinity_thrift_service.cpp
@@ -157,9 +157,7 @@ void InfinityThriftService::CreateDatabase(infinity_thrift_rpc::CommonResponse &
 
     auto [infinity, status] = GetInfinityBySessionID(request.session_id);
     if (status.ok()) {
-        String db_name = request.db_name;
-        ToLower(db_name);
-        auto result = infinity->CreateDatabase(db_name, create_database_opts);
+        auto result = infinity->CreateDatabase(request.db_name, create_database_opts);
         ProcessQueryResult(response, result);
     } else {
         ProcessStatus(response, status);
@@ -186,9 +184,7 @@ void InfinityThriftService::DropDatabase(infinity_thrift_rpc::CommonResponse &re
 
     auto [infinity, status] = GetInfinityBySessionID(request.session_id);
     if (status.ok()) {
-        String db_name = request.db_name;
-        ToLower(db_name);
-        auto result = infinity->DropDatabase(db_name, drop_database_opts);
+        auto result = infinity->DropDatabase(request.db_name, drop_database_opts);
         ProcessQueryResult(response, result);
     } else {
         ProcessStatus(response, status);
@@ -232,9 +228,7 @@ void InfinityThriftService::CreateTable(infinity_thrift_rpc::CommonResponse &res
     for (SizeT idx = 0; idx < properties_count; ++idx) {
         InitParameter *property = new InitParameter();
         property->param_name_ = request.create_option.properties[idx].key;
-        ToLower(property->param_name_);
         property->param_value_ = request.create_option.properties[idx].value;
-        ToLower(property->param_value_);
         create_table_opts.properties_.emplace_back(property);
     }
 
@@ -244,11 +238,7 @@ void InfinityThriftService::CreateTable(infinity_thrift_rpc::CommonResponse &res
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    auto result = infinity->CreateTable(db_name, table_name, column_defs, Vector<TableConstraint *>(), create_table_opts);
+    auto result = infinity->CreateTable(request.db_name, request.table_name, column_defs, Vector<TableConstraint *>(), create_table_opts);
     ProcessQueryResult(response, result);
 }
 
@@ -275,11 +265,7 @@ void InfinityThriftService::DropTable(infinity_thrift_rpc::CommonResponse &respo
         }
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    auto result = infinity->DropTable(db_name, table_name, drop_table_opts);
+    auto result = infinity->DropTable(request.db_name, request.table_name, drop_table_opts);
     ProcessQueryResult(response, result);
 }
 
@@ -347,11 +333,7 @@ void InfinityThriftService::Insert(infinity_thrift_rpc::CommonResponse &response
         values->emplace_back(value_list);
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    auto result = infinity->Insert(db_name, table_name, columns, values);
+    auto result = infinity->Insert(request.db_name, request.table_name, columns, values);
     ProcessQueryResult(response, result);
 }
 
@@ -393,11 +375,7 @@ void InfinityThriftService::Import(infinity_thrift_rpc::CommonResponse &response
     }
     import_options.delimiter_ = delimiter_string[0];
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->Import(db_name, table_name, request.file_name.c_str(), import_options);
+    const QueryResult result = infinity->Import(request.db_name, request.table_name, request.file_name.c_str(), import_options);
     ProcessQueryResult(response, result);
 }
 
@@ -448,11 +426,7 @@ void InfinityThriftService::Export(infinity_thrift_rpc::CommonResponse &response
     export_options.limit_ = request.export_option.limit;
     export_options.row_limit_ = request.export_option.row_limit;
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->Export(db_name, table_name, export_columns, request.file_name.c_str(), export_options);
+    const QueryResult result = infinity->Export(request.db_name, request.table_name, export_columns, request.file_name.c_str(), export_options);
     ProcessQueryResult(response, result);
 }
 
@@ -594,11 +568,7 @@ void InfinityThriftService::Select(infinity_thrift_rpc::SelectResponse &response
     //
     // auto start3 = std::chrono::steady_clock::now();
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->Search(db_name, table_name, search_expr, filter, output_columns);
+    const QueryResult result = infinity->Search(request.db_name, request.table_name, search_expr, filter, output_columns);
 
     // auto end3 = std::chrono::steady_clock::now();
     //
@@ -765,12 +735,7 @@ void InfinityThriftService::Explain(infinity_thrift_rpc::SelectResponse &respons
 
     // Explain type
     auto explain_type = GetExplainTypeFromProto(request.explain_type);
-
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->Explain(db_name, table_name, explain_type, search_expr, filter, output_columns);
+    const QueryResult result = infinity->Explain(request.db_name, request.table_name, explain_type, search_expr, filter, output_columns);
 
     if (result.IsOk()) {
         auto &columns = response.column_fields;
@@ -798,11 +763,7 @@ void InfinityThriftService::Delete(infinity_thrift_rpc::CommonResponse &response
         }
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->Delete(db_name, table_name, filter);
+    const QueryResult result = infinity->Delete(request.db_name, request.table_name, filter);
     ProcessQueryResult(response, result);
 };
 
@@ -852,11 +813,7 @@ void InfinityThriftService::Update(infinity_thrift_rpc::CommonResponse &response
         }
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->Update(db_name, table_name, filter, update_expr_array);
+    const QueryResult result = infinity->Update(request.db_name, request.table_name, filter, update_expr_array);
     ProcessQueryResult(response, result);
 }
 
@@ -869,11 +826,7 @@ void InfinityThriftService::Optimize(infinity_thrift_rpc::CommonResponse& respon
 
     auto optimize_options = GetParsedOptimizeOptionFromProto(request.optimize_options);
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->Optimize(db_name, table_name, std::move(optimize_options));
+    const QueryResult result = infinity->Optimize(request.db_name, request.table_name, std::move(optimize_options));
     ProcessQueryResult(response, result);
 }
 
@@ -907,9 +860,7 @@ void InfinityThriftService::ListTable(infinity_thrift_rpc::ListTableResponse &re
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    auto result = infinity->ListTables(db_name);
+    auto result = infinity->ListTables(request.db_name);
     if (result.IsOk()) {
         SharedPtr<DataBlock> data_block = result.result_table_->GetDataBlockById(0);
         auto row_count = data_block->row_count();
@@ -931,9 +882,7 @@ void InfinityThriftService::ShowDatabase(infinity_thrift_rpc::ShowDatabaseRespon
         ProcessStatus(response, infinity_status);
         return;
     }
-    String db_name = request.db_name;
-    ToLower(db_name);
-    const QueryResult result = infinity->ShowDatabase(db_name);
+    const QueryResult result = infinity->ShowDatabase(request.db_name);
     if (result.IsOk()) {
         SharedPtr<DataBlock> data_block = result.result_table_->GetDataBlockById(0);
         auto row_count = data_block->row_count();
@@ -970,11 +919,7 @@ void InfinityThriftService::ShowTable(infinity_thrift_rpc::ShowTableResponse &re
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->ShowTable(db_name, table_name);
+    const QueryResult result = infinity->ShowTable(request.db_name, request.table_name);
     if (result.IsOk()) {
         SharedPtr<DataBlock> data_block = result.result_table_->GetDataBlockById(0);
         auto row_count = data_block->row_count();
@@ -1026,11 +971,7 @@ void InfinityThriftService::ShowColumns(infinity_thrift_rpc::SelectResponse &res
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->ShowColumns(db_name, table_name);
+    const QueryResult result = infinity->ShowColumns(request.db_name, request.table_name);
     if (result.IsOk()) {
         auto &columns = response.column_fields;
         columns.resize(result.result_table_->ColumnCount());
@@ -1047,9 +988,7 @@ void InfinityThriftService::ShowTables(infinity_thrift_rpc::SelectResponse &resp
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    const QueryResult result = infinity->ShowTables(db_name);
+    const QueryResult result = infinity->ShowTables(request.db_name);
     if (result.IsOk()) {
         auto &columns = response.column_fields;
         columns.resize(result.result_table_->ColumnCount());
@@ -1066,9 +1005,7 @@ void InfinityThriftService::GetDatabase(infinity_thrift_rpc::CommonResponse &res
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    QueryResult result = infinity->GetDatabase(db_name);
+    QueryResult result = infinity->GetDatabase(request.db_name);
     ProcessQueryResult(response, result);
 }
 
@@ -1079,11 +1016,7 @@ void InfinityThriftService::GetTable(infinity_thrift_rpc::CommonResponse &respon
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    QueryResult result = infinity->GetTable(db_name, table_name);
+    QueryResult result = infinity->GetTable(request.db_name, request.table_name);
     ProcessQueryResult(response, result);
 }
 
@@ -1136,17 +1069,13 @@ void InfinityThriftService::CreateIndex(infinity_thrift_rpc::CommonResponse &res
             return;
         }
 
-        String column_name = index_info.column_name;
-        ToLower(column_name);
-        index_info_to_use->column_name_ = column_name;
+        index_info_to_use->column_name_ = index_info.column_name;
 
         auto *index_param_list = new Vector<InitParameter *>();
         for (auto &index_param : index_info.index_param_list) {
             auto init_parameter = new InitParameter();
             init_parameter->param_name_ = index_param.param_name;
-            ToLower(init_parameter->param_name_);
             init_parameter->param_value_ = index_param.param_value;
-            ToLower(init_parameter->param_value_);
             index_param_list->emplace_back(init_parameter);
         }
         index_info_to_use->index_param_list_ = index_param_list;
@@ -1154,13 +1083,7 @@ void InfinityThriftService::CreateIndex(infinity_thrift_rpc::CommonResponse &res
         index_info_list_to_use->emplace_back(index_info_to_use);
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    String index_name = request.index_name;
-    ToLower(index_name);
-    QueryResult result = infinity->CreateIndex(db_name, table_name, index_name, index_info_list_to_use, create_index_opts);
+    QueryResult result = infinity->CreateIndex(request.db_name, request.table_name, request.index_name, index_info_list_to_use, create_index_opts);
     ProcessQueryResult(response, result);
 }
 
@@ -1187,13 +1110,7 @@ void InfinityThriftService::DropIndex(infinity_thrift_rpc::CommonResponse &respo
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    String index_name = request.index_name;
-    ToLower(index_name);
-    QueryResult result = infinity->DropIndex(db_name, table_name, index_name, drop_index_opts);
+    QueryResult result = infinity->DropIndex(request.db_name, request.table_name, request.index_name, drop_index_opts);
     ProcessQueryResult(response, result);
 }
 
@@ -1204,11 +1121,7 @@ void InfinityThriftService::ListIndex(infinity_thrift_rpc::ListIndexResponse &re
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    auto result = infinity->ListTableIndexes(db_name, table_name);
+    auto result = infinity->ListTableIndexes(request.db_name, request.table_name);
     if (result.IsOk()) {
         SharedPtr<DataBlock> data_block = result.result_table_->GetDataBlockById(0);
         auto row_count = data_block->row_count();
@@ -1230,13 +1143,7 @@ void InfinityThriftService::ShowIndex(infinity_thrift_rpc::ShowIndexResponse &re
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    String index_name = request.index_name;
-    ToLower(index_name);
-    auto result = infinity->ShowIndex(db_name, table_name, index_name);
+    auto result = infinity->ShowIndex(request.db_name, request.table_name, request.index_name);
 
     if (result.IsOk()) {
         SharedPtr<DataBlock> data_block = result.result_table_->GetDataBlockById(0);
@@ -1309,11 +1216,7 @@ void InfinityThriftService::ShowSegments(infinity_thrift_rpc::SelectResponse &re
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->ShowSegments(db_name, table_name);
+    const QueryResult result = infinity->ShowSegments(request.db_name, request.table_name);
     if (result.IsOk()) {
         auto &columns = response.column_fields;
         columns.resize(result.result_table_->ColumnCount());
@@ -1330,11 +1233,7 @@ void InfinityThriftService::ShowSegment(infinity_thrift_rpc::ShowSegmentResponse
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->ShowSegment(db_name, table_name, request.segment_id);
+    const QueryResult result = infinity->ShowSegment(request.db_name, request.table_name, request.segment_id);
     if (result.IsOk()) {
         SharedPtr<DataBlock> data_block = result.result_table_->GetDataBlockById(0);
         auto row_count = data_block->row_count();
@@ -1401,11 +1300,7 @@ void InfinityThriftService::ShowBlocks(infinity_thrift_rpc::SelectResponse &resp
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->ShowBlocks(db_name, table_name, request.segment_id);
+    const QueryResult result = infinity->ShowBlocks(request.db_name, request.table_name, request.segment_id);
     if (result.IsOk()) {
         auto &columns = response.column_fields;
         columns.resize(result.result_table_->ColumnCount());
@@ -1422,11 +1317,7 @@ void InfinityThriftService::ShowBlock(infinity_thrift_rpc::ShowBlockResponse &re
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    const QueryResult result = infinity->ShowBlock(db_name, table_name, request.segment_id, request.block_id);
+    const QueryResult result = infinity->ShowBlock(request.db_name, request.table_name, request.segment_id, request.block_id);
     if (result.IsOk()) {
         SharedPtr<DataBlock> data_block = result.result_table_->GetDataBlockById(0);
         auto row_count = data_block->row_count();
@@ -1484,11 +1375,7 @@ void InfinityThriftService::ShowBlockColumn(infinity_thrift_rpc::ShowBlockColumn
         return;
     }
 
-    String db_name = request.db_name;
-    ToLower(db_name);
-    String table_name = request.table_name;
-    ToLower(table_name);
-    auto result = infinity->ShowBlockColumn(db_name, table_name, request.segment_id, request.block_id, request.column_id);
+    auto result = infinity->ShowBlockColumn(request.db_name, request.table_name, request.segment_id, request.block_id, request.column_id);
 
     if (result.IsOk()) {
         SharedPtr<DataBlock> data_block = result.result_table_->GetDataBlockById(0);
@@ -1577,8 +1464,7 @@ Tuple<ColumnDef *, Status> InfinityThriftService::GetColumnDefFromProto(const in
         return {nullptr, status};
     }
 
-    const auto &column_def_name = column_def.name;
-    auto col_def = new ColumnDef(column_def.id, column_def_data_type_ptr, column_def_name, constraints, const_expr);
+    auto col_def = new ColumnDef(column_def.id, column_def_data_type_ptr, column_def.name, constraints, const_expr);
     return {col_def, Status::OK()};
 }
 
@@ -1819,7 +1705,6 @@ ColumnExpr *InfinityThriftService::GetColumnExprFromProto(const infinity_thrift_
     auto parsed_expr = new ColumnExpr();
 
     for (auto column_name : column_expr.column_name) {
-        ToLower(column_name);
         parsed_expr->names_.emplace_back(column_name);
     }
 
@@ -1829,9 +1714,7 @@ ColumnExpr *InfinityThriftService::GetColumnExprFromProto(const infinity_thrift_
 
 FunctionExpr *InfinityThriftService::GetFunctionExprFromProto(Status &status, const infinity_thrift_rpc::FunctionExpr &function_expr) {
     auto *parsed_expr = new FunctionExpr();
-    String function_name = function_expr.function_name;
-    ToLower(function_name);
-    parsed_expr->func_name_ = function_name;
+    parsed_expr->func_name_ = function_expr.function_name;
     Vector<ParsedExpr *> *arguments;
     arguments = new Vector<ParsedExpr *>();
     arguments->reserve(function_expr.arguments.size());
@@ -1903,9 +1786,7 @@ KnnExpr *InfinityThriftService::GetKnnExprFromProto(Status &status, const infini
     for (auto &param : expr.opt_params) {
         auto init_parameter = new InitParameter();
         init_parameter->param_name_ = param.param_name;
-        ToLower(init_parameter->param_name_);
         init_parameter->param_value_ = param.param_value;
-        ToLower(init_parameter->param_value_);
         knn_expr->opt_params_->emplace_back(init_parameter);
     }
     status = Status::OK();
@@ -1931,9 +1812,7 @@ MatchSparseExpr *InfinityThriftService::GetMatchSparseExprFromProto(Status &stat
     for (auto &param : expr.opt_params) {
         auto *init_parameter = new InitParameter();
         init_parameter->param_name_ = param.param_name;
-        ToLower(init_parameter->param_name_);
         init_parameter->param_value_ = param.param_value;
-        ToLower(init_parameter->param_value_);
         opt_params_ptr->emplace_back(init_parameter);
     }
     match_sparse_expr->SetOptParams(expr.topn, opt_params_ptr);
@@ -1957,10 +1836,7 @@ MatchTensorExpr *InfinityThriftService::GetMatchTensorExprFromProto(Status &stat
     const auto copy_bytes = EmbeddingT::EmbeddingSize(match_tensor_expr->embedding_data_type_, match_tensor_expr->dimension_);
     match_tensor_expr->query_tensor_data_ptr_ = MakeUniqueForOverwrite<char[]>(copy_bytes);
     std::memcpy(match_tensor_expr->query_tensor_data_ptr_.get(), embedding_data_ptr, copy_bytes);
-
-    String options_text = expr.extra_options;
-    ToLower(options_text);
-    match_tensor_expr->options_text_ = options_text;
+    match_tensor_expr->options_text_ = expr.extra_options;
     status = Status::OK();
     return match_tensor_expr.release();
 }
@@ -1969,9 +1845,6 @@ MatchExpr *InfinityThriftService::GetMatchExprFromProto(const infinity_thrift_rp
     auto match_expr = new MatchExpr();
     match_expr->fields_ = expr.fields;
     match_expr->matching_text_ = expr.matching_text;
-
-    String options_text = expr.options_text;
-    ToLower(options_text);
     match_expr->options_text_ = expr.options_text;
     return match_expr;
 }
@@ -1998,15 +1871,8 @@ ParsedExpr *InfinityThriftService::GetGenericMatchExprFromProto(Status &status,
 
 FusionExpr *InfinityThriftService::GetFusionExprFromProto(const infinity_thrift_rpc::FusionExpr &expr) {
     auto fusion_expr = MakeUnique<FusionExpr>();
-
-    String fusion_method = expr.method;
-    ToLower(fusion_method);
-    fusion_expr->method_ = fusion_method;
-
-    String options_text = expr.options_text;
-    ToLower(options_text);
-    fusion_expr->SetOptions(options_text);
-
+    fusion_expr->method_ = expr.method;
+    fusion_expr->SetOptions(expr.options_text);
     if (expr.__isset.optional_match_tensor_expr) {
         Status status;
         const auto result_ptr = GetMatchTensorExprFromProto(status, expr.optional_match_tensor_expr);
@@ -2106,7 +1972,6 @@ Tuple<UpdateExpr *, Status> InfinityThriftService::GetUpdateExprFromProto(const
     Status status;
     auto up_expr = new UpdateExpr();
     up_expr->column_name = update_expr.column_name;
-    ToLower(up_expr->column_name);
     up_expr->value = GetParsedExprFromProto(status, update_expr.value);
     return {up_expr, status};
 }
@@ -2114,13 +1979,10 @@ Tuple<UpdateExpr *, Status> InfinityThriftService::GetUpdateExprFromProto(const
 OptimizeOptions InfinityThriftService::GetParsedOptimizeOptionFromProto(const infinity_thrift_rpc::OptimizeOptions &options) {
     OptimizeOptions opt;
     opt.index_name_ = options.index_name;
-    ToLower(opt.index_name_);
     for (const auto &param : options.opt_params) {
         auto *init_param = new InitParameter();
         init_param->param_name_ = param.param_name;
-        ToLower(init_param->param_name_);
         init_param->param_value_ = param.param_value;
-        ToLower(init_param->param_value_);
         opt.opt_params_.emplace_back(init_param);
     }
     return opt;
diff --git a/src/parser/statement/copy_statement.h b/src/parser/statement/copy_statement.h
index 62c1f79429..e2102a415b 100644
--- a/src/parser/statement/copy_statement.h
+++ b/src/parser/statement/copy_statement.h
@@ -32,7 +32,7 @@ enum class CopyOptionType {
 struct CopyOption {
     CopyOptionType option_type_{CopyOptionType::kFormat};
     bool header_{false};
-    CopyFileType file_type_{CopyFileType::kCSV};
+    CopyFileType file_type_{CopyFileType::kInvalid};
     char delimiter_{','};
     size_t offset_{0};
     size_t limit_{0};
@@ -52,7 +52,7 @@ class CopyStatement final : public BaseStatement {
     std::string table_name_{};
     std::string schema_name_{};
     bool header_{false};
-    CopyFileType copy_file_type_{CopyFileType::kCSV};
+    CopyFileType copy_file_type_{CopyFileType::kInvalid};
     char delimiter_{','};
     size_t offset_{0};
     size_t limit_{0};
diff --git a/src/planner/node/logical_export.cppm b/src/planner/node/logical_export.cppm
index 61c1d580e2..9901481bf2 100644
--- a/src/planner/node/logical_export.cppm
+++ b/src/planner/node/logical_export.cppm
@@ -78,7 +78,7 @@ private:
     String file_path_{};
     bool header_{false};
     char delimiter_{','};
-    CopyFileType file_type_{CopyFileType::kCSV};
+    CopyFileType file_type_{CopyFileType::kInvalid};
     SizeT offset_{};
     SizeT limit_{};
     SizeT row_limit_{};
diff --git a/src/planner/node/logical_import.cppm b/src/planner/node/logical_import.cppm
index 3889404543..da0347e08d 100644
--- a/src/planner/node/logical_import.cppm
+++ b/src/planner/node/logical_import.cppm
@@ -64,7 +64,7 @@ public:
 
 private:
     TableEntry *table_entry_{};
-    CopyFileType file_type_{CopyFileType::kCSV};
+    CopyFileType file_type_{CopyFileType::kInvalid};
     String file_path_{};
     bool header_{false};
     char delimiter_{','};
diff --git a/test/sql/basic.slt b/test/sql/basic.slt
index a6630c0073..d9d6a4a704 100644
--- a/test/sql/basic.slt
+++ b/test/sql/basic.slt
@@ -25,7 +25,7 @@ CREATE TABLE NATION (N_NATIONKEY  INT, N_REGIONKEY INT );
 
 # copy data from csv file
 query I
-COPY NATION FROM '/var/infinity/test_data/nation.csv' WITH ( DELIMITER ',' );
+COPY NATION FROM '/var/infinity/test_data/nation.csv' WITH ( DELIMITER ',', FORMAT CSV );
 ----
 
 query I
diff --git a/test/sql/ddl/index/test_secondary_index.slt b/test/sql/ddl/index/test_secondary_index.slt
index c74ea4010f..2ce4c7b96b 100644
--- a/test/sql/ddl/index/test_secondary_index.slt
+++ b/test/sql/ddl/index/test_secondary_index.slt
@@ -5,7 +5,7 @@ statement ok
 CREATE TABLE test_secondary_index (c1 integer, c2 boolean);
 
 statement ok
-COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',' );
+COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 statement ok
 CREATE INDEX idx_c1 ON test_secondary_index (c1);
diff --git a/test/sql/ddl/type/test_sparse_default.slt b/test/sql/ddl/type/test_sparse_default.slt
index c54e8433d7..65ca6d11f4 100644
--- a/test/sql/ddl/type/test_sparse_default.slt
+++ b/test/sql/ddl/type/test_sparse_default.slt
@@ -12,7 +12,7 @@ col2 Sparse(float,int16,30000) (empty) (empty)
 col3 Sparse(bit,int16,30000) (empty) (empty)
 
 statement ok
-COPY test_sparse_default FROM '/var/infinity/test_data/sparse_default.csv' WITH ( DELIMITER ',' );
+COPY test_sparse_default FROM '/var/infinity/test_data/sparse_default.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 query I
 SELECT * FROM test_sparse_default;
@@ -37,7 +37,7 @@ col2 Sparse(float,int16,30000) (empty) 0.000000: 0
 col3 Sparse(bit,int16,30000) (empty) 0
 
 statement ok
-COPY test_sparse_default FROM '/var/infinity/test_data/sparse_default.csv' WITH ( DELIMITER ',' );
+COPY test_sparse_default FROM '/var/infinity/test_data/sparse_default.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 query I
 SELECT * FROM test_sparse_default;
diff --git a/test/sql/dml/cleanup/test_cleanup.slt b/test/sql/dml/cleanup/test_cleanup.slt
index ba4fb34070..bf28575c7a 100644
--- a/test/sql/dml/cleanup/test_cleanup.slt
+++ b/test/sql/dml/cleanup/test_cleanup.slt
@@ -11,10 +11,10 @@ statement ok
 CREATE TABLE test_secondary_index (c1 integer, c2 boolean);
 
 statement ok
-COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',' );
+COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 statement ok
-COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',' );
+COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 statement ok
 CREATE INDEX idx_c1 ON test_secondary_index (c1);
@@ -46,10 +46,10 @@ statement ok
 CREATE TABLE test_secondary_index (c1 integer, c2 boolean);
 
 statement ok
-COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',' );
+COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 statement ok
-COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',' );
+COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 statement ok
 CREATE INDEX idx_c1 ON test_secondary_index (c1);
@@ -81,10 +81,10 @@ statement ok
 CREATE TABLE test_secondary_index (c1 integer, c2 boolean);
 
 statement ok
-COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',' );
+COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 statement ok
-COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',' );
+COPY test_secondary_index FROM '/var/infinity/test_data/test_big_top.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 statement ok
 CREATE INDEX idx_c1 ON test_secondary_index (c1);
diff --git a/test/sql/dml/compact/test_compact.slt b/test/sql/dml/compact/test_compact.slt
index ce3a5f6e34..2d22ce8e74 100644
--- a/test/sql/dml/compact/test_compact.slt
+++ b/test/sql/dml/compact/test_compact.slt
@@ -5,15 +5,15 @@ statement ok
 CREATE TABLE test_compact (c1 INT, c2 EMBEDDING(int, 3));
 
 query I
-COPY test_compact FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',');
+COPY test_compact FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
-COPY test_compact FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',');
+COPY test_compact FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
-COPY test_compact FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',');
+COPY test_compact FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 
diff --git a/test/sql/dml/compact/test_compact_import_insert.slt b/test/sql/dml/compact/test_compact_import_insert.slt
index 7d7dcd42a9..a55cdce938 100644
--- a/test/sql/dml/compact/test_compact_import_insert.slt
+++ b/test/sql/dml/compact/test_compact_import_insert.slt
@@ -5,7 +5,7 @@ statement ok
 CREATE TABLE test_compact_import_delete (c1 INT, c2 EMBEDDING(int, 3));
 
 query I
-COPY test_compact_import_delete FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',');
+COPY test_compact_import_delete FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
@@ -13,7 +13,7 @@ INSERT INTO test_compact_import_delete VALUES (13, [14,15,16]), (17, [18,19,20])
 ----
 
 query I
-COPY test_compact_import_delete FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',');
+COPY test_compact_import_delete FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
diff --git a/test/sql/dml/compact/test_compact_many_index.slt b/test/sql/dml/compact/test_compact_many_index.slt
index 8e441a83c0..964bbeae0c 100644
--- a/test/sql/dml/compact/test_compact_many_index.slt
+++ b/test/sql/dml/compact/test_compact_many_index.slt
@@ -5,10 +5,10 @@ statement ok
 CREATE TABLE tbl1(c1 EMBEDDING(FLOAT,2), c2 EMBEDDING(FLOAT, 4), c3 EMBEDDING(FLOAT, 4));
 
 statement ok
-COPY tbl1 FROM '/var/infinity/test_data/embedding_2.csv' WITH (DELIMITER ',');
+COPY tbl1 FROM '/var/infinity/test_data/embedding_2.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY tbl1 FROM '/var/infinity/test_data/embedding_2.csv' WITH (DELIMITER ',');
+COPY tbl1 FROM '/var/infinity/test_data/embedding_2.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
 CREATE INDEX idx1 ON tbl1 (c1) USING Hnsw WITH (M = 16, ef_construction = 200, metric = l2);
@@ -46,10 +46,10 @@ statement ok
 CREATE TABLE tbl2(c1 INT, c2 EMBEDDING(FLOAT, 4), c3 EMBEDDING(FLOAT, 4));
 
 statement ok
-COPY tbl2 FROM '/var/infinity/test_data/embedding_3.csv' WITH (DELIMITER ',');
+COPY tbl2 FROM '/var/infinity/test_data/embedding_3.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY tbl2 FROM '/var/infinity/test_data/embedding_3.csv' WITH (DELIMITER ',');
+COPY tbl2 FROM '/var/infinity/test_data/embedding_3.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
 CREATE INDEX idx1 ON tbl2 (c1);
diff --git a/test/sql/dml/compact/test_compact_with_delete.slt b/test/sql/dml/compact/test_compact_with_delete.slt
index 9ef11a165b..450635255a 100644
--- a/test/sql/dml/compact/test_compact_with_delete.slt
+++ b/test/sql/dml/compact/test_compact_with_delete.slt
@@ -5,15 +5,15 @@ statement ok
 CREATE TABLE test_compact_with_delete (c1 INT, c2 EMBEDDING(int, 3));
 
 query I
-COPY test_compact_with_delete FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',');
+COPY test_compact_with_delete FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
-COPY test_compact_with_delete FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',');
+COPY test_compact_with_delete FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
-COPY test_compact_with_delete FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',');
+COPY test_compact_with_delete FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 statement ok
diff --git a/test/sql/dml/compact/test_compact_with_index.slt b/test/sql/dml/compact/test_compact_with_index.slt
index 4419d2a6a4..aa20c3dbe6 100644
--- a/test/sql/dml/compact/test_compact_with_index.slt
+++ b/test/sql/dml/compact/test_compact_with_index.slt
@@ -5,11 +5,11 @@ statement ok
 CREATE TABLE test_compact_with_index (c1 INT, c2 EMBEDDING(FLOAT, 4));
 
 query I
-COPY test_compact_with_index FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_compact_with_index FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
-COPY test_compact_with_index FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_compact_with_index FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 statement ok
@@ -38,7 +38,7 @@ SELECT c1 FROM test_compact_with_index SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2,
 6
 
 query I
-COPY test_compact_with_index FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_compact_with_index FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
diff --git a/test/sql/dml/delete/test_delete_with_annivfflat.slt b/test/sql/dml/delete/test_delete_with_annivfflat.slt
index ec717b191e..f9c9ee6335 100644
--- a/test/sql/dml/delete/test_delete_with_annivfflat.slt
+++ b/test/sql/dml/delete/test_delete_with_annivfflat.slt
@@ -9,11 +9,11 @@ CREATE TABLE test_delete_with_annivfflat (c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 6, dist: 0.06
 # 8, dist: 0.02
 query I
-COPY test_delete_with_annivfflat FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_delete_with_annivfflat FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
-COPY test_delete_with_annivfflat FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_delete_with_annivfflat FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 statement ok
@@ -47,7 +47,7 @@ SELECT c1 FROM test_delete_with_annivfflat SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0
 2
 
 query I
-COPY test_delete_with_annivfflat FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_delete_with_annivfflat FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
diff --git a/test/sql/dml/delete/test_delete_with_hnsw.slt b/test/sql/dml/delete/test_delete_with_hnsw.slt
index 4f39439d2b..531a029245 100644
--- a/test/sql/dml/delete/test_delete_with_hnsw.slt
+++ b/test/sql/dml/delete/test_delete_with_hnsw.slt
@@ -9,11 +9,11 @@ CREATE TABLE test_delete_with_hnsw (c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 6, dist: 0.06
 # 8, dist: 0.02
 query I
-COPY test_delete_with_hnsw FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_delete_with_hnsw FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
-COPY test_delete_with_hnsw FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_delete_with_hnsw FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 statement ok
@@ -47,7 +47,7 @@ SELECT c1 FROM test_delete_with_hnsw SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.
 2
 
 query I
-COPY test_delete_with_hnsw FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_delete_with_hnsw FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 ----
 
 query I
diff --git a/test/sql/dml/import/test_embedding.slt b/test/sql/dml/import/test_embedding.slt
index ab35614150..38fce6c2bc 100644
--- a/test/sql/dml/import/test_embedding.slt
+++ b/test/sql/dml/import/test_embedding.slt
@@ -12,7 +12,7 @@ CREATE TABLE test_embedding_type ( c1 int, c2 embedding(int,3));
 
 # copy data from tbl file
 query I
-COPY test_embedding_type FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH ( DELIMITER ',' );
+COPY test_embedding_type FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH ( DELIMITER ',', FORMAT CSV );
 ----
 
 query II
diff --git a/test/sql/dml/import/test_import_default.slt b/test/sql/dml/import/test_import_default.slt
index d6de2578f6..93a7fc9012 100644
--- a/test/sql/dml/import/test_import_default.slt
+++ b/test/sql/dml/import/test_import_default.slt
@@ -6,7 +6,7 @@ statement ok
 CREATE TABLE test_import_default (c1 integer default 1, c2 integer default 4, c3 embedding(float, 3) default [1,2,3], c4 TensorArray(float, 3) default [[[1,2,3],[5,7,8]],[[9,9,9]]], c5 embedding(int, 3) default [1.3, 4.1, 33.7]);
 
 statement ok
-COPY test_import_default FROM '/var/infinity/test_data/pysdk_test_import_default.csv' WITH ( DELIMITER ',' );
+COPY test_import_default FROM '/var/infinity/test_data/pysdk_test_import_default.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 query I
 SELECT * FROM test_import_default;
diff --git a/test/sql/dml/import/test_import_tensor.slt b/test/sql/dml/import/test_import_tensor.slt
index aa1b1e899d..bcab61d50a 100644
--- a/test/sql/dml/import/test_import_tensor.slt
+++ b/test/sql/dml/import/test_import_tensor.slt
@@ -5,7 +5,7 @@ statement ok
 CREATE TABLE test_tensor_type ( c1 int, c2 tensor(float,4));
 
 statement ok
-COPY test_tensor_type FROM '/var/infinity/test_data/tensor_float_dim4.csv' WITH ( DELIMITER ',' );
+COPY test_tensor_type FROM '/var/infinity/test_data/tensor_float_dim4.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 query II
 SELECT c1, c2 FROM test_tensor_type;
@@ -21,7 +21,7 @@ SELECT count(*) FROM test_tensor_type;
 4
 
 statement error
-COPY test_tensor_type FROM '/var/infinity/test_data/tensor_float_dim4_invalid.csv' WITH ( DELIMITER ',' );
+COPY test_tensor_type FROM '/var/infinity/test_data/tensor_float_dim4_invalid.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 # Clean up
 statement ok
@@ -31,7 +31,7 @@ statement ok
 CREATE TABLE test_tensor_type ( c1 int, c2 tensor(bit,8));
 
 statement ok
-COPY test_tensor_type FROM '/var/infinity/test_data/tensor_float_dim8.csv' WITH ( DELIMITER ',' );
+COPY test_tensor_type FROM '/var/infinity/test_data/tensor_float_dim8.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 query II
 SELECT c1, c2 FROM test_tensor_type;
diff --git a/test/sql/dml/import/test_import_tensor_array.slt b/test/sql/dml/import/test_import_tensor_array.slt
index a644de23b6..c5afa15442 100644
--- a/test/sql/dml/import/test_import_tensor_array.slt
+++ b/test/sql/dml/import/test_import_tensor_array.slt
@@ -5,7 +5,7 @@ statement ok
 CREATE TABLE test_import_tensor_array (c1 int, c2 TensorArray(float,4));
 
 statement ok
-COPY test_import_tensor_array FROM '/var/infinity/test_data/tensor_array_float_dim4.csv' WITH ( DELIMITER ',' );
+COPY test_import_tensor_array FROM '/var/infinity/test_data/tensor_array_float_dim4.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 query II
 SELECT c1, c2 FROM test_import_tensor_array;
@@ -21,7 +21,7 @@ SELECT count(*) FROM test_import_tensor_array;
 4
 
 statement error
-COPY test_import_tensor_array FROM '/var/infinity/test_data/tensor_array_float_dim4_invalid.csv' WITH ( DELIMITER ',' );
+COPY test_import_tensor_array FROM '/var/infinity/test_data/tensor_array_float_dim4_invalid.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 # Clean up
 statement ok
@@ -31,7 +31,7 @@ statement ok
 CREATE TABLE test_import_tensor_array (c1 int, c2 tensorarray(bit,8));
 
 statement ok
-COPY test_import_tensor_array FROM '/var/infinity/test_data/tensor_array_float_dim8.csv' WITH ( DELIMITER ',' );
+COPY test_import_tensor_array FROM '/var/infinity/test_data/tensor_array_float_dim8.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 query II
 SELECT c1, c2 FROM test_import_tensor_array;
diff --git a/test/sql/dml/import/test_import_time.slt b/test/sql/dml/import/test_import_time.slt
index daa5b47e5a..49ef0c1711 100644
--- a/test/sql/dml/import/test_import_time.slt
+++ b/test/sql/dml/import/test_import_time.slt
@@ -5,7 +5,7 @@ statement ok
 CREATE TABLE sqllogic_d (d date, t time, dt datetime, ts timestamp);
 
 statement ok
-COPY sqllogic_d FROM '/var/infinity/test_data/test_import_time.csv' WITH ( DELIMITER ',' );
+COPY sqllogic_d FROM '/var/infinity/test_data/test_import_time.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 query I
 SELECT * FROM sqllogic_d;
diff --git a/test/sql/dml/import/test_varchar.slt b/test/sql/dml/import/test_varchar.slt
index 3b3eae1574..bf5648198b 100644
--- a/test/sql/dml/import/test_varchar.slt
+++ b/test/sql/dml/import/test_varchar.slt
@@ -12,7 +12,7 @@ CREATE TABLE test_varchar_type (c1 int, c2 varchar);
 
 # copy data from tbl file
 query I
-COPY test_varchar_type FROM '/var/infinity/test_data/varchar.csv' WITH ( DELIMITER ',' );
+COPY test_varchar_type FROM '/var/infinity/test_data/varchar.csv' WITH ( DELIMITER ',', FORMAT CSV );
 ----
 
 query II
diff --git a/test/sql/dql/aggregate/test_agg_load_meta.slt b/test/sql/dql/aggregate/test_agg_load_meta.slt
index a088d2faae..b2f9aeef02 100644
--- a/test/sql/dql/aggregate/test_agg_load_meta.slt
+++ b/test/sql/dql/aggregate/test_agg_load_meta.slt
@@ -29,7 +29,7 @@ SELECT COUNT(*) FROM test_agg_load_meta WHERE c2 = 5;
 0
 
 statement ok
-COPY test_agg_load_meta FROM '/var/infinity/test_data/basic.csv' WITH ( DELIMITER ',' );
+COPY test_agg_load_meta FROM '/var/infinity/test_data/basic.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 query I
 EXPLAIN SELECT COUNT(*) FROM test_agg_load_meta WHERE c2 = 5;
@@ -54,7 +54,7 @@ SELECT COUNT(*) FROM test_agg_load_meta WHERE c2 = 5;
 2
 
 statement ok
-COPY test_agg_load_meta FROM '/var/infinity/test_data/basic.csv' WITH ( DELIMITER ',' );
+COPY test_agg_load_meta FROM '/var/infinity/test_data/basic.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 query I
 EXPLAIN SELECT COUNT(*) FROM test_agg_load_meta WHERE c2 = 5;
diff --git a/test/sql/dql/fulltext/fulltext.slt b/test/sql/dql/fulltext/fulltext.slt
index 5593e60dd1..09ea3deec1 100644
--- a/test/sql/dql/fulltext/fulltext.slt
+++ b/test/sql/dql/fulltext/fulltext.slt
@@ -10,7 +10,7 @@ CREATE TABLE sqllogic_test_enwiki(doctitle varchar, docdate varchar, body varcha
 
 # copy data from csv file
 query I
-COPY sqllogic_test_enwiki FROM '/var/infinity/test_data/enwiki_99.csv' WITH ( DELIMITER '\t' );
+COPY sqllogic_test_enwiki FROM '/var/infinity/test_data/enwiki_99.csv' WITH ( DELIMITER '\t', FORMAT CSV );
 ----
 
 statement error
@@ -42,7 +42,7 @@ Anarchism 30-APR-2012 03:25:17.000 6 20.881144
 
 # copy data from csv file
 query I
-COPY sqllogic_test_enwiki FROM '/var/infinity/test_data/enwiki_99.csv' WITH ( DELIMITER '\t' );
+COPY sqllogic_test_enwiki FROM '/var/infinity/test_data/enwiki_99.csv' WITH ( DELIMITER '\t', FORMAT CSV );
 ----
 
 query TTI rowsort
@@ -53,7 +53,7 @@ Anarchism 30-APR-2012 03:25:17.000 4294967296 22.533094
 
 # copy data from csv file
 query I
-COPY sqllogic_test_enwiki FROM '/var/infinity/test_data/enwiki_99.csv' WITH ( DELIMITER '\t' );
+COPY sqllogic_test_enwiki FROM '/var/infinity/test_data/enwiki_99.csv' WITH ( DELIMITER '\t', FORMAT CSV );
 ----
 
 query TTI rowsort
diff --git a/test/sql/dql/fulltext/fulltext_delete.slt b/test/sql/dql/fulltext/fulltext_delete.slt
index 46f09b53a9..feb1bbd1d4 100644
--- a/test/sql/dql/fulltext/fulltext_delete.slt
+++ b/test/sql/dql/fulltext/fulltext_delete.slt
@@ -7,7 +7,7 @@ CREATE TABLE ft_delete(num int, doc varchar);
 
 # copy data from csv file
 statement ok
-COPY ft_delete FROM '/var/infinity/test_data/fulltext_delete.csv' WITH ( DELIMITER '\t' );
+COPY ft_delete FROM '/var/infinity/test_data/fulltext_delete.csv' WITH ( DELIMITER '\t', FORMAT CSV );
 
 statement ok
 CREATE INDEX ft_index ON ft_delete(doc) USING FULLTEXT;
diff --git a/test/sql/dql/fusion.slt b/test/sql/dql/fusion.slt
index 0f6c52649e..52af3e7309 100644
--- a/test/sql/dql/fusion.slt
+++ b/test/sql/dql/fusion.slt
@@ -11,7 +11,7 @@ CREATE TABLE enwiki_embedding(doctitle varchar, docdate varchar, body varchar, n
 
 # copy data from csv file
 query I
-COPY enwiki_embedding FROM '/var/infinity/test_data/enwiki_embedding_9999.csv' WITH ( DELIMITER '\t' );
+COPY enwiki_embedding FROM '/var/infinity/test_data/enwiki_embedding_9999.csv' WITH ( DELIMITER '\t', FORMAT CSV );
 ----
 
 statement ok
diff --git a/test/sql/dql/index_scan/index_scan_delete.slt b/test/sql/dql/index_scan/index_scan_delete.slt
index 05cf370b4d..84c897df95 100644
--- a/test/sql/dql/index_scan/index_scan_delete.slt
+++ b/test/sql/dql/index_scan/index_scan_delete.slt
@@ -5,7 +5,7 @@ statement ok
 CREATE TABLE test_index_scan_delete (c1 integer, mod_256_min_128 tinyint, mod_7 tinyint);
 
 statement ok
-COPY test_index_scan_delete FROM '/var/infinity/test_data/test_big_index_scan.csv' WITH ( DELIMITER ',' );
+COPY test_index_scan_delete FROM '/var/infinity/test_data/test_big_index_scan.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
 DELETE FROM test_index_scan_delete WHERE mod_7 = 1;
diff --git a/test/sql/dql/index_scan/index_scan_explain.slt b/test/sql/dql/index_scan/index_scan_explain.slt
index 315096e539..4414027241 100644
--- a/test/sql/dql/index_scan/index_scan_explain.slt
+++ b/test/sql/dql/index_scan/index_scan_explain.slt
@@ -5,7 +5,7 @@ statement ok
 CREATE TABLE test_explain_index_scan (c1 integer, mod_256_min_128 tinyint, mod_7 tinyint);
 
 statement ok
-COPY test_explain_index_scan FROM '/var/infinity/test_data/test_big_index_scan.csv' WITH ( DELIMITER ',' );
+COPY test_explain_index_scan FROM '/var/infinity/test_data/test_big_index_scan.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # create index on c1
 statement ok
diff --git a/test/sql/dql/knn/embedding/test_knn_annivfflat_ip.slt b/test/sql/dql/knn/embedding/test_knn_annivfflat_ip.slt
index b97d0f1aa7..82792f9b60 100644
--- a/test/sql/dql/knn/embedding/test_knn_annivfflat_ip.slt
+++ b/test/sql/dql/knn/embedding/test_knn_annivfflat_ip.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_annivfflat_ip(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0.3*0.3 + 0.3*0.2 + 0.2*0.1 + 0.2*0.4 = 0.25
 # 4. 0.3*0.4 + 0.3*0.3 + 0.2*0.2 + 0.2*0.1 = 0.27
 statement ok
-COPY test_knn_annivfflat_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic ip will order descendingly. The query will return row 4, 3, 2
 query I
@@ -24,7 +24,7 @@ SELECT c1 FROM test_knn_annivfflat_ip SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_annivfflat_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -47,7 +47,7 @@ SELECT c1 FROM test_knn_annivfflat_ip SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0
 
 # copy to create another new block
 statement ok
-COPY test_knn_annivfflat_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_knn_annivfflat_ip_filter.slt b/test/sql/dql/knn/embedding/test_knn_annivfflat_ip_filter.slt
index 9419db68ac..7ec95f5ba0 100644
--- a/test/sql/dql/knn/embedding/test_knn_annivfflat_ip_filter.slt
+++ b/test/sql/dql/knn/embedding/test_knn_annivfflat_ip_filter.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_annivfflat_ip_filter(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0.3*0.3 + 0.3*0.2 + 0.2*0.1 + 0.2*0.4 = 0.25
 # 4. 0.3*0.4 + 0.3*0.3 + 0.2*0.2 + 0.2*0.1 = 0.27
 statement ok
-COPY test_knn_annivfflat_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic ip will order descendingly. The query will return row 4, 3, 2
 query I
@@ -24,7 +24,7 @@ SELECT c1 FROM test_knn_annivfflat_ip_filter SEARCH MATCH VECTOR  (c2, [0.3, 0.3
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_annivfflat_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -64,7 +64,7 @@ SELECT c1 FROM test_knn_annivfflat_ip_filter SEARCH MATCH VECTOR  (c2, [0.3, 0.3
 
 # copy to create another new block
 statement ok
-COPY test_knn_annivfflat_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_knn_annivfflat_l2.slt b/test/sql/dql/knn/embedding/test_knn_annivfflat_l2.slt
index aa93f36c07..06423f73b9 100644
--- a/test/sql/dql/knn/embedding/test_knn_annivfflat_l2.slt
+++ b/test/sql/dql/knn/embedding/test_knn_annivfflat_l2.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_annivfflat_l2(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0 + 0.1^2 + 0.1^2 + 0.2^2 = 0.06
 # 4. 0.1^2 + 0 + 0 + 0.1^2 = 0.02
 statement ok
-COPY test_knn_annivfflat_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic l2 will order ascendingly. The query will return row 4, 3, 2
 query I
@@ -24,7 +24,7 @@ SELECT c1 FROM test_knn_annivfflat_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_annivfflat_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -47,7 +47,7 @@ SELECT c1 FROM test_knn_annivfflat_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0
 
 # copy to create another new block
 statement ok
-COPY test_knn_annivfflat_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_knn_annivfflat_l2_filter.slt b/test/sql/dql/knn/embedding/test_knn_annivfflat_l2_filter.slt
index 553f888b87..9a28a43df5 100644
--- a/test/sql/dql/knn/embedding/test_knn_annivfflat_l2_filter.slt
+++ b/test/sql/dql/knn/embedding/test_knn_annivfflat_l2_filter.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_annivfflat_l2_filter(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0 + 0.1^2 + 0.1^2 + 0.2^2 = 0.06
 # 4. 0.1^2 + 0 + 0 + 0.1^2 = 0.02
 statement ok
-COPY test_knn_annivfflat_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic l2 will order ascendingly. The query will return row 4, 3, 2
 query I
@@ -24,7 +24,7 @@ SELECT c1 FROM test_knn_annivfflat_l2_filter SEARCH MATCH VECTOR (c2, [0.3, 0.3,
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_annivfflat_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -64,7 +64,7 @@ SELECT c1 FROM test_knn_annivfflat_l2_filter SEARCH MATCH VECTOR (c2, [0.3, 0.3,
 
 # copy to create another new block
 statement ok
-COPY test_knn_annivfflat_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_annivfflat_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_knn_cos.slt b/test/sql/dql/knn/embedding/test_knn_cos.slt
index 40fd48de5f..7499c2a92c 100644
--- a/test/sql/dql/knn/embedding/test_knn_cos.slt
+++ b/test/sql/dql/knn/embedding/test_knn_cos.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_cos(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. (0.3*0.2+0.3*0.1+0.2*0.3+0.2*0.4) / sqrt((0.3^2+0.3^2+0.2^2+0.2^2) * (0.2^2+0.1^2+0.3^2+0.4^2)) = 0.823532105
 # 4. (0.3*0.1+0.3*0.2+0.2*0.3-0.2*0.2) / sqrt((0.3^2+0.3^2+0.2^2+0.2^2) * (0.1^2+0.2^2+0.3^2+(-0.2)^2)) = 0.50847518
 statement ok
-COPY test_knn_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # metric cos will order descendingly. The query will return row 1, 2, 3
 query I
@@ -41,7 +41,7 @@ SELECT c1, ROW_ID() DISTANCE() FROM test_knn_cos SEARCH MATCH VECTOR (c2, [0.3,
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -53,7 +53,7 @@ SELECT c1 FROM test_knn_cos SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], 'floa
 
 # copy to create another new block
 statement ok
-COPY test_knn_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_knn_hnsw_cos.slt b/test/sql/dql/knn/embedding/test_knn_hnsw_cos.slt
index 094c270345..1cc4b49d7e 100644
--- a/test/sql/dql/knn/embedding/test_knn_hnsw_cos.slt
+++ b/test/sql/dql/knn/embedding/test_knn_hnsw_cos.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_hnsw_cos(c1 INT,  c2 EMBEDDING(FLOAT, 4));
 # 3. (0.3*0.2+0.3*0.1+0.2*0.3+0.2*0.4) / sqrt((0.3^2+0.3^2+0.2^2+0.2^2) * (0.2^2+0.1^2+0.3^2+0.4^2)) = 0.823532105
 # 4. (0.3*0.1+0.3*0.2+0.2*0.3-0.2*0.2) / sqrt((0.3^2+0.3^2+0.2^2+0.2^2) * (0.1^2+0.2^2+0.3^2+(-0.2)^2)) = 0.50847518
 statement ok
-COPY test_knn_hnsw_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic cos will order ascendingly. The query will return row 4, 3, 2
 query I
@@ -24,7 +24,7 @@ SELECT c1 FROM test_knn_hnsw_cos SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2],
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_hnsw_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4, block 1 row 3
 query I
@@ -49,7 +49,7 @@ SELECT c1 FROM test_knn_hnsw_cos SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2],
 
 # copy to create another new block with no index
 statement ok
-COPY test_knn_hnsw_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_cos FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 # select with 2 index segment and 1 non-index segment
diff --git a/test/sql/dql/knn/embedding/test_knn_hnsw_ip.slt b/test/sql/dql/knn/embedding/test_knn_hnsw_ip.slt
index fd2470fe00..041fda5768 100644
--- a/test/sql/dql/knn/embedding/test_knn_hnsw_ip.slt
+++ b/test/sql/dql/knn/embedding/test_knn_hnsw_ip.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_hnsw_ip(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0.3*0.3 + 0.3*0.2 + 0.2*0.1 + 0.2*0.4 = 0.25
 # 4. 0.3*0.4 + 0.3*0.3 + 0.2*0.2 + 0.2*0.1 = 0.27
 statement ok
-COPY test_knn_hnsw_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic ip will order descendingly. The query will return row 4, 3, 2
 query I
@@ -24,7 +24,7 @@ SELECT c1 FROM test_knn_hnsw_ip SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], '
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_hnsw_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -49,7 +49,7 @@ SELECT c1 FROM test_knn_hnsw_ip SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], '
 
 # copy to create another new block with no index
 statement ok
-COPY test_knn_hnsw_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 # select with 2 index segment and 1 non-index segment
diff --git a/test/sql/dql/knn/embedding/test_knn_hnsw_ip_filter.slt b/test/sql/dql/knn/embedding/test_knn_hnsw_ip_filter.slt
index 23dfe8deed..2c4a7aae8f 100644
--- a/test/sql/dql/knn/embedding/test_knn_hnsw_ip_filter.slt
+++ b/test/sql/dql/knn/embedding/test_knn_hnsw_ip_filter.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_hnsw_ip_filter(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0.3*0.3 + 0.3*0.2 + 0.2*0.1 + 0.2*0.4 = 0.25
 # 4. 0.3*0.4 + 0.3*0.3 + 0.2*0.2 + 0.2*0.1 = 0.27
 statement ok
-COPY test_knn_hnsw_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic ip will order descendingly. The query will return row 4, 3, 2
 query I
@@ -24,7 +24,7 @@ SELECT c1 FROM test_knn_hnsw_ip_filter SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2,
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_hnsw_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -66,7 +66,7 @@ SELECT c1 FROM test_knn_hnsw_ip_filter SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2,
 
 # copy to create another new block
 statement ok
-COPY test_knn_hnsw_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_knn_hnsw_l2.slt b/test/sql/dql/knn/embedding/test_knn_hnsw_l2.slt
index 2d059201ef..077c012b08 100644
--- a/test/sql/dql/knn/embedding/test_knn_hnsw_l2.slt
+++ b/test/sql/dql/knn/embedding/test_knn_hnsw_l2.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_hnsw_l2(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0 + 0.1^2 + 0.1^2 + 0.2^2 = 0.06
 # 4. 0.1^2 + 0 + 0 + 0.1^2 = 0.02
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic l2 will order ascendingly. The query will return row 4, 3, 2
 query I
@@ -24,7 +24,7 @@ SELECT c1 FROM test_knn_hnsw_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], '
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -49,7 +49,7 @@ SELECT c1 FROM test_knn_hnsw_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], '
 
 # copy to create another new block with no index
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 # select with 2 index segment and 1 non-index segment
diff --git a/test/sql/dql/knn/embedding/test_knn_hnsw_l2_filter.slt b/test/sql/dql/knn/embedding/test_knn_hnsw_l2_filter.slt
index 5dd3f7b40f..33daaec42a 100644
--- a/test/sql/dql/knn/embedding/test_knn_hnsw_l2_filter.slt
+++ b/test/sql/dql/knn/embedding/test_knn_hnsw_l2_filter.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_hnsw_l2_filter(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0 + 0.1^2 + 0.1^2 + 0.2^2 = 0.06
 # 4. 0.1^2 + 0 + 0 + 0.1^2 = 0.02
 statement ok
-COPY test_knn_hnsw_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic l2 will order ascendingly. The query will return row 4, 3, 2
 query I
@@ -24,7 +24,7 @@ SELECT c1 FROM test_knn_hnsw_l2_filter SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2,
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_hnsw_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -66,7 +66,7 @@ SELECT c1 FROM test_knn_hnsw_l2_filter SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2,
 
 # copy to create another new block
 statement ok
-COPY test_knn_hnsw_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_knn_hnsw_l2_lvq.slt b/test/sql/dql/knn/embedding/test_knn_hnsw_l2_lvq.slt
index d8a1b1d81a..37b2a1a051 100644
--- a/test/sql/dql/knn/embedding/test_knn_hnsw_l2_lvq.slt
+++ b/test/sql/dql/knn/embedding/test_knn_hnsw_l2_lvq.slt
@@ -5,7 +5,7 @@ statement ok
 CREATE TABLE test_knn_hnsw_l2(c1 INT, c2 EMBEDDING(FLOAT, 4));
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 query I
 SELECT c1 FROM test_knn_hnsw_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], 'float', 'l2', 3);
@@ -25,7 +25,7 @@ SELECT c1 FROM test_knn_hnsw_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], '
 4
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 query I
 SELECT c1 FROM test_knn_hnsw_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], 'float', 'l2', 3) WITH (ef = 6, rerank);
diff --git a/test/sql/dql/knn/embedding/test_knn_hnsw_l2_lvq2.slt b/test/sql/dql/knn/embedding/test_knn_hnsw_l2_lvq2.slt
index 04ac776063..20112d82ce 100644
--- a/test/sql/dql/knn/embedding/test_knn_hnsw_l2_lvq2.slt
+++ b/test/sql/dql/knn/embedding/test_knn_hnsw_l2_lvq2.slt
@@ -50,7 +50,7 @@ SELECT c1 FROM test_knn_hnsw_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], '
 6
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 query I
 SELECT c1 FROM test_knn_hnsw_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], 'float', 'l2', 3) WITH (ef = 12, rerank);
diff --git a/test/sql/dql/knn/embedding/test_knn_hnsw_l2_realtime.slt b/test/sql/dql/knn/embedding/test_knn_hnsw_l2_realtime.slt
index 6f31a81c99..94d1896fe6 100644
--- a/test/sql/dql/knn/embedding/test_knn_hnsw_l2_realtime.slt
+++ b/test/sql/dql/knn/embedding/test_knn_hnsw_l2_realtime.slt
@@ -24,7 +24,7 @@ SELECT c1 FROM test_knn_hnsw_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], '
 4
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 query I
 SELECT c1 FROM test_knn_hnsw_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], 'float', 'l2', 3);
diff --git a/test/sql/dql/knn/embedding/test_knn_ip.slt b/test/sql/dql/knn/embedding/test_knn_ip.slt
index 1062bdbdf7..78aa5611dc 100644
--- a/test/sql/dql/knn/embedding/test_knn_ip.slt
+++ b/test/sql/dql/knn/embedding/test_knn_ip.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_ip(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0.3*0.3 + 0.3*0.2 + 0.2*0.1 + 0.2*0.4 = 0.25
 # 4. 0.3*0.4 + 0.3*0.3 + 0.2*0.2 + 0.2*0.1 = 0.27
 statement ok
-COPY test_knn_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic ip will order descendingly. The query will return row 4, 3, 2
 query I
@@ -41,7 +41,7 @@ SELECT c1, ROW_ID(), DISTANCE() FROM test_knn_l2 SEARCH MATCH VECTOR (c2, [0.3,
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -53,7 +53,7 @@ SELECT c1 FROM test_knn_ip SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], 'float
 
 # copy to create another new block
 statement ok
-COPY test_knn_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_ip FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_knn_ip_filter.slt b/test/sql/dql/knn/embedding/test_knn_ip_filter.slt
index 11941ead49..7ffe65f87a 100644
--- a/test/sql/dql/knn/embedding/test_knn_ip_filter.slt
+++ b/test/sql/dql/knn/embedding/test_knn_ip_filter.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_ip_filter(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0.3*0.3 + 0.3*0.2 + 0.2*0.1 + 0.2*0.4 = 0.25
 # 4. 0.3*0.4 + 0.3*0.3 + 0.2*0.2 + 0.2*0.1 = 0.27
 statement ok
-COPY test_knn_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic ip will order descendingly. The query will return row 4, 3, 2
 query I
@@ -43,7 +43,7 @@ SELECT c1 FROM test_knn_ip_filter SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2],
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -72,7 +72,7 @@ SELECT c1 FROM test_knn_ip_filter SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2],
 
 # copy to create another new block
 statement ok
-COPY test_knn_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_ip_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_knn_l2.slt b/test/sql/dql/knn/embedding/test_knn_l2.slt
index bcb1fbeb1a..65e42d7268 100644
--- a/test/sql/dql/knn/embedding/test_knn_l2.slt
+++ b/test/sql/dql/knn/embedding/test_knn_l2.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_l2(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0 + 0.1^2 + 0.1^2 + 0.2^2 = 0.06
 # 4. 0.1^2 + 0 + 0 + 0.1^2 = 0.02
 statement ok
-COPY test_knn_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic l2 will order ascendingly. The query will return row 4, 3, 2
 query I
@@ -41,7 +41,7 @@ SELECT c1, ROW_ID(), SIMILARITY() FROM test_knn_l2 SEARCH MATCH VECTOR (c2, [0.3
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -53,7 +53,7 @@ SELECT c1 FROM test_knn_l2 SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2], 'float
 
 # copy to create another new block
 statement ok
-COPY test_knn_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_knn_l2_filter.slt b/test/sql/dql/knn/embedding/test_knn_l2_filter.slt
index 843e9057ca..19255b6d9e 100644
--- a/test/sql/dql/knn/embedding/test_knn_l2_filter.slt
+++ b/test/sql/dql/knn/embedding/test_knn_l2_filter.slt
@@ -11,7 +11,7 @@ CREATE TABLE test_knn_l2_filter(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 3. 0 + 0.1^2 + 0.1^2 + 0.2^2 = 0.06
 # 4. 0.1^2 + 0 + 0 + 0.1^2 = 0.02
 statement ok
-COPY test_knn_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic l2 will order ascendingly. The query will return row 4, 3, 2
 query I
@@ -45,7 +45,7 @@ SELECT c1 FROM test_knn_l2_filter SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2],
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -73,7 +73,7 @@ SELECT c1 FROM test_knn_l2_filter SEARCH MATCH VECTOR (c2, [0.3, 0.3, 0.2, 0.2],
 
 # copy to create another new block
 statement ok
-COPY test_knn_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_l2_filter FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/embedding/test_multi_thread.slt b/test/sql/dql/knn/embedding/test_multi_thread.slt
index 1dc9cbe9cb..edb350b776 100644
--- a/test/sql/dql/knn/embedding/test_multi_thread.slt
+++ b/test/sql/dql/knn/embedding/test_multi_thread.slt
@@ -12,19 +12,19 @@ CREATE TABLE test_knn_hnsw_l2(c1 INT, c2 EMBEDDING(FLOAT, 4));
 # 4. 0.1^2 + 0 + 0 + 0.1^2 = 0.02
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # create hnsw index on existing 5 segments
 statement ok
@@ -32,34 +32,34 @@ CREATE INDEX idx1 ON test_knn_hnsw_l2 (c2) USING Hnsw WITH (M = 16, ef_construct
 
 # create another 10 blocks with no index
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 statement ok
-COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',');
+COPY test_knn_hnsw_l2 FROM '/var/infinity/test_data/embedding_float_dim4.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # select with 5 index segment and 10 non-index segment
 query I
diff --git a/test/sql/dql/knn/sparse/test_knn_sparse.slt b/test/sql/dql/knn/sparse/test_knn_sparse.slt
index 7e31402e6e..1858d04809 100644
--- a/test/sql/dql/knn/sparse/test_knn_sparse.slt
+++ b/test/sql/dql/knn/sparse/test_knn_sparse.slt
@@ -12,7 +12,7 @@ CREATE TABLE test_knn_sparse(c1 INT, c2 SPARSE(FLOAT, 100));
 # 4. 4.0*1.0 + 4.0*3.0 = 16.0
 # 5. 0
 statement ok
-COPY test_knn_sparse FROM '/var/infinity/test_data/sparse_knn.csv' WITH (DELIMITER ',');
+COPY test_knn_sparse FROM '/var/infinity/test_data/sparse_knn.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic ip will order descendingly. The query will return row 4, 2, 1
 query I
@@ -90,7 +90,7 @@ SELECT c1, ROW_ID(), DISTANCE() FROM test_knn_sparse SEARCH MATCH SPARSE (c2, [0
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_sparse FROM '/var/infinity/test_data/sparse_knn.csv' WITH (DELIMITER ',');
+COPY test_knn_sparse FROM '/var/infinity/test_data/sparse_knn.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -102,7 +102,7 @@ SELECT c1 FROM test_knn_sparse SEARCH MATCH SPARSE (c2, [0:1.0,20:2.0,80:3.0], '
 
 # copy to create another new block
 statement ok
-COPY test_knn_sparse FROM '/var/infinity/test_data/sparse_knn.csv' WITH (DELIMITER ',');
+COPY test_knn_sparse FROM '/var/infinity/test_data/sparse_knn.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/sparse/test_knn_sparse_bit.slt b/test/sql/dql/knn/sparse/test_knn_sparse_bit.slt
index ecba05d382..15f29f108f 100644
--- a/test/sql/dql/knn/sparse/test_knn_sparse_bit.slt
+++ b/test/sql/dql/knn/sparse/test_knn_sparse_bit.slt
@@ -12,7 +12,7 @@ CREATE TABLE test_knn_sparse_bit(c1 INT, c2 SPARSE(BIT, 100));
 # 4. 1
 # 5. 0
 statement ok
-COPY test_knn_sparse_bit FROM '/var/infinity/test_data/sparse_knn_bit.csv' WITH (DELIMITER ',');
+COPY test_knn_sparse_bit FROM '/var/infinity/test_data/sparse_knn_bit.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # mertic ip will order descendingly. The query will return row 1, 2, 3
 query I
@@ -52,7 +52,7 @@ SELECT c1, ROW_ID(), DISTANCE() FROM test_knn_sparse_bit SEARCH MATCH SPARSE (c2
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_sparse_bit FROM '/var/infinity/test_data/sparse_knn_bit.csv' WITH (DELIMITER ',');
+COPY test_knn_sparse_bit FROM '/var/infinity/test_data/sparse_knn_bit.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 1, block 2 row 1 and a row 2
 query I
@@ -64,7 +64,7 @@ SELECT c1 FROM test_knn_sparse_bit SEARCH MATCH SPARSE (c2, [20,30,40,60], 'ip',
 
 # copy to create another new block
 statement ok
-COPY test_knn_sparse_bit FROM '/var/infinity/test_data/sparse_knn_bit.csv' WITH (DELIMITER ',');
+COPY test_knn_sparse_bit FROM '/var/infinity/test_data/sparse_knn_bit.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 1 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/knn/sparse/test_knn_sparse_integer.slt b/test/sql/dql/knn/sparse/test_knn_sparse_integer.slt
index ad12056f73..9fa644e5d7 100644
--- a/test/sql/dql/knn/sparse/test_knn_sparse_integer.slt
+++ b/test/sql/dql/knn/sparse/test_knn_sparse_integer.slt
@@ -12,7 +12,7 @@ CREATE TABLE test_knn_sparse_integer(c1 INT, c2 SPARSE(INT, 100));
 # 4. 4.0*1.0 + 4.0*3.0 = 16.0
 # 5. 0
 statement ok
-COPY test_knn_sparse_integer FROM '/var/infinity/test_data/sparse_knn_integer.csv' WITH (DELIMITER ',');
+COPY test_knn_sparse_integer FROM '/var/infinity/test_data/sparse_knn_integer.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # metric ip will be in descending order. The query will return row 4, 2, 1
 query I
@@ -52,7 +52,7 @@ SELECT c1, ROW_ID(), DISTANCE() FROM test_knn_sparse_integer SEARCH MATCH SPARSE
 # copy to create another new block
 # there will has 2 knn_scan operator to scan the blocks, and one merge_knn to merge
 statement ok
-COPY test_knn_sparse_integer FROM '/var/infinity/test_data/sparse_knn_integer.csv' WITH (DELIMITER ',');
+COPY test_knn_sparse_integer FROM '/var/infinity/test_data/sparse_knn_integer.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return block 1 row 4, block 2 row 4 and a row 3
 query I
@@ -64,7 +64,7 @@ SELECT c1 FROM test_knn_sparse_integer SEARCH MATCH SPARSE (c2, [0:1.0,20:2.0,80
 
 # copy to create another new block
 statement ok
-COPY test_knn_sparse_integer FROM '/var/infinity/test_data/sparse_knn_integer.csv' WITH (DELIMITER ',');
+COPY test_knn_sparse_integer FROM '/var/infinity/test_data/sparse_knn_integer.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # the query will return row 4 from block 1, 2 and 3
 query I
diff --git a/test/sql/dql/projection/test_table_star.slt b/test/sql/dql/projection/test_table_star.slt
index f49fb0e6e8..239bb26d20 100644
--- a/test/sql/dql/projection/test_table_star.slt
+++ b/test/sql/dql/projection/test_table_star.slt
@@ -8,7 +8,7 @@ CREATE TABLE test_table_star(a INTEGER, b INTEGER, c INTEGER);
 
 # copy data from tbl file
 statement ok
-COPY test_table_star FROM '/var/infinity/test_data/integer.csv' WITH ( DELIMITER ',' );
+COPY test_table_star FROM '/var/infinity/test_data/integer.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 
 query I
diff --git a/test/sql/dql/select.slt b/test/sql/dql/select.slt
index d60a3ac960..2403def743 100644
--- a/test/sql/dql/select.slt
+++ b/test/sql/dql/select.slt
@@ -18,7 +18,7 @@ CREATE TABLE select3 (c1 INTEGER, c2 INTEGER, c3 INTEGER);
 
 # copy data from csv file
 query I
-COPY select2 FROM '/var/infinity/test_data/nation.csv' WITH ( DELIMITER ',' );
+COPY select2 FROM '/var/infinity/test_data/nation.csv' WITH ( DELIMITER ',', FORMAT CSV );
 ----
 
 statement ok
diff --git a/test/sql/dql/select_ts.slt b/test/sql/dql/select_ts.slt
index 7d98b3dfe6..acfe198a8d 100644
--- a/test/sql/dql/select_ts.slt
+++ b/test/sql/dql/select_ts.slt
@@ -5,7 +5,7 @@ statement ok
 CREATE TABLE test_select_ts ( c1 int, c2 embedding(int,3));
 
 statement ok
-COPY test_select_ts FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH ( DELIMITER ',' );
+COPY test_select_ts FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 statement ok
 SELECT ROW_ID(), CREATE_TIMESTAMP() FROM test_select_ts;
@@ -14,7 +14,7 @@ statement ok
 SELECT ROW_ID(), DELETE_TIMESTAMP() FROM test_select_ts;
 
 statement ok
-COPY test_select_ts FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH ( DELIMITER ',' );
+COPY test_select_ts FROM '/var/infinity/test_data/embedding_int_dim3.csv' WITH ( DELIMITER ',', FORMAT CSV );
 
 statement ok
 SELECT ROW_ID(), CREATE_TIMESTAMP() FROM test_select_ts;
diff --git a/test/sql/dql/tensor/fusion_rerank_maxsim.slt b/test/sql/dql/tensor/fusion_rerank_maxsim.slt
index 47b905c807..dc5b4b6cac 100644
--- a/test/sql/dql/tensor/fusion_rerank_maxsim.slt
+++ b/test/sql/dql/tensor/fusion_rerank_maxsim.slt
@@ -6,7 +6,7 @@ statement ok
 CREATE TABLE sqllogic_fusion_rerank_maxsim (title VARCHAR, num INT, t TENSOR(FLOAT, 4), body VARCHAR);
 
 statement ok
-COPY sqllogic_fusion_rerank_maxsim FROM '/var/infinity/test_data/tensor_maxsim.csv' WITH ( DELIMITER ',' );
+COPY sqllogic_fusion_rerank_maxsim FROM '/var/infinity/test_data/tensor_maxsim.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 query I
 SELECT * FROM sqllogic_fusion_rerank_maxsim;
diff --git a/test/sql/dql/tensor/fusion_rerank_tensorarray_maxsim.slt b/test/sql/dql/tensor/fusion_rerank_tensorarray_maxsim.slt
index bc9f322b1c..f2e62671f5 100644
--- a/test/sql/dql/tensor/fusion_rerank_tensorarray_maxsim.slt
+++ b/test/sql/dql/tensor/fusion_rerank_tensorarray_maxsim.slt
@@ -6,7 +6,7 @@ statement ok
 CREATE TABLE fusion_rerank_tensor_array_maxsim (title VARCHAR, num INT, t TENSORARRAY(BIT, 8), body VARCHAR);
 
 statement ok
-COPY fusion_rerank_tensor_array_maxsim FROM '/var/infinity/test_data/tensor_array_maxsim.csv' WITH ( DELIMITER ',' );
+COPY fusion_rerank_tensor_array_maxsim FROM '/var/infinity/test_data/tensor_array_maxsim.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 query I
 SELECT * FROM fusion_rerank_tensor_array_maxsim;
diff --git a/test/sql/dql/tensor/tensor_array_bit_maxsim.slt b/test/sql/dql/tensor/tensor_array_bit_maxsim.slt
index ca8118dc5b..d6fca8a7e1 100644
--- a/test/sql/dql/tensor/tensor_array_bit_maxsim.slt
+++ b/test/sql/dql/tensor/tensor_array_bit_maxsim.slt
@@ -6,7 +6,7 @@ statement ok
 CREATE TABLE tensor_array_maxsim (c1 int, c2 TensorArray(float,8));
 
 statement ok
-COPY tensor_array_maxsim FROM '/var/infinity/test_data/tensor_array_bit_dim8.csv' WITH ( DELIMITER ',' );
+COPY tensor_array_maxsim FROM '/var/infinity/test_data/tensor_array_bit_dim8.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # default top 10
 query I
diff --git a/test/sql/dql/tensor/tensor_array_maxsim.slt b/test/sql/dql/tensor/tensor_array_maxsim.slt
index f6a5ef0d0e..9e956cd50a 100644
--- a/test/sql/dql/tensor/tensor_array_maxsim.slt
+++ b/test/sql/dql/tensor/tensor_array_maxsim.slt
@@ -6,7 +6,7 @@ statement ok
 CREATE TABLE tensor_array_maxsim (c1 int, c2 TensorArray(float,8));
 
 statement ok
-COPY tensor_array_maxsim FROM '/var/infinity/test_data/tensor_array_float_dim8.csv' WITH ( DELIMITER ',' );
+COPY tensor_array_maxsim FROM '/var/infinity/test_data/tensor_array_float_dim8.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 # default top 10
 query I
diff --git a/test/sql/dql/tensor/tensor_maxsim.slt b/test/sql/dql/tensor/tensor_maxsim.slt
index 26bdbb97d4..e7a6ccfee4 100644
--- a/test/sql/dql/tensor/tensor_maxsim.slt
+++ b/test/sql/dql/tensor/tensor_maxsim.slt
@@ -6,7 +6,7 @@ statement ok
 CREATE TABLE sqllogic_tensor_maxsim (title VARCHAR, num INT, t TENSOR(FLOAT, 4), body VARCHAR);
 
 statement ok
-COPY sqllogic_tensor_maxsim FROM '/var/infinity/test_data/tensor_maxsim.csv' WITH ( DELIMITER ',' );
+COPY sqllogic_tensor_maxsim FROM '/var/infinity/test_data/tensor_maxsim.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 query I
 SELECT * FROM sqllogic_tensor_maxsim;
@@ -113,7 +113,7 @@ test11 -9.400001
 
 # two blocks
 statement ok
-COPY sqllogic_tensor_maxsim FROM '/var/infinity/test_data/tensor_maxsim.csv' WITH ( DELIMITER ',' );
+COPY sqllogic_tensor_maxsim FROM '/var/infinity/test_data/tensor_maxsim.csv' WITH (DELIMITER ',', FORMAT CSV);
 
 query I
 EXPLAIN SELECT title, SCORE() FROM sqllogic_tensor_maxsim SEARCH MATCH TENSOR (t, [0.0, -10.0, 0.0, 0.7, 9.2, 45.6, -55.8, 3.5], 'float', 'maxsim') WHERE 10 > num;
diff --git a/test/sql/explain/explain.slt b/test/sql/explain/explain.slt
index da17794b86..553e1167ef 100644
--- a/test/sql/explain/explain.slt
+++ b/test/sql/explain/explain.slt
@@ -17,7 +17,7 @@ statement ok
 explain SELECT * FROM explain1;
 
 query I
-COPY explain1 FROM '/var/infinity/test_data/one.csv' WITH ( DELIMITER ',');
+COPY explain1 FROM '/var/infinity/test_data/one.csv' WITH ( DELIMITER ',', FORMAT CSV);
 ----
 
 query I
diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt
index 90b8b4f64d..eb50b889ea 100644
--- a/third_party/CMakeLists.txt
+++ b/third_party/CMakeLists.txt
@@ -1,5 +1,10 @@
 add_compile_options(-Wno-everything)
 
+################################################################################
+### snappy
+################################################################################
+add_subdirectory(snappy)
+
 ################################################################################
 ### arrow
 ################################################################################
diff --git a/third_party/arrow/cmake_modules/DefineOptions.cmake b/third_party/arrow/cmake_modules/DefineOptions.cmake
index 2c16b562e0..45b2c6703f 100644
--- a/third_party/arrow/cmake_modules/DefineOptions.cmake
+++ b/third_party/arrow/cmake_modules/DefineOptions.cmake
@@ -541,7 +541,7 @@ takes precedence over ccache if a storage backend is configured" ON)
   define_option(ARROW_WITH_BROTLI "Build with Brotli compression" OFF)
   define_option(ARROW_WITH_BZ2 "Build with BZ2 compression" OFF)
   define_option(ARROW_WITH_LZ4 "Build with lz4 compression" OFF)
-  define_option(ARROW_WITH_SNAPPY "Build with Snappy compression" OFF)
+  define_option(ARROW_WITH_SNAPPY "Build with Snappy compression" ON)
   define_option(ARROW_WITH_ZLIB "Build with zlib compression" OFF)
   define_option(ARROW_WITH_ZSTD "Build with zstd compression" OFF)
 
diff --git a/third_party/arrow/src/arrow/CMakeLists.txt b/third_party/arrow/src/arrow/CMakeLists.txt
index 75fe769a12..e76354574e 100644
--- a/third_party/arrow/src/arrow/CMakeLists.txt
+++ b/third_party/arrow/src/arrow/CMakeLists.txt
@@ -572,61 +572,64 @@ endif()
 
 arrow_add_object_library(ARROW_UTIL ${ARROW_UTIL_SRCS})
 
-# Disable DLL exports in vendored uriparser library
-foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-  target_compile_definitions(${ARROW_UTIL_TARGET} PRIVATE URI_STATIC_BUILD)
-endforeach()
+target_include_directories(arrow_util PRIVATE "${CMAKE_SOURCE_DIR}/third_party/snappy/")
+target_include_directories(arrow_util PRIVATE "${CMAKE_BINARY_DIR}/third_party/snappy/")
 
-if(ARROW_USE_BOOST)
-  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE Boost::headers)
-  endforeach()
-endif()
-if(ARROW_USE_GLOG)
-  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE glog::glog)
-  endforeach()
-endif()
-if(ARROW_USE_XSIMD)
-  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_XSIMD})
-  endforeach()
-endif()
-if(ARROW_WITH_BROTLI)
-  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_BROTLI_LIBS})
-  endforeach()
-endif()
-if(ARROW_WITH_BZ2)
-  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE BZip2::BZip2)
-  endforeach()
-endif()
-if(ARROW_WITH_LZ4)
-  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE LZ4::lz4)
-  endforeach()
-endif()
-if(ARROW_WITH_SNAPPY)
-  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${Snappy_TARGET})
-  endforeach()
-endif()
-if(ARROW_WITH_OPENTELEMETRY)
-  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_OPENTELEMETRY_LIBS})
-  endforeach()
-endif()
-if(ARROW_WITH_ZLIB)
-  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ZLIB::ZLIB)
-  endforeach()
-endif()
-if(ARROW_WITH_ZSTD)
-  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
-    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_ZSTD_LIBZSTD})
-  endforeach()
-endif()
+# Disable DLL exports in vendored uriparser library
+#foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#  target_compile_definitions(${ARROW_UTIL_TARGET} PRIVATE URI_STATIC_BUILD)
+#endforeach()
+#
+#if(ARROW_USE_BOOST)
+#  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE Boost::headers)
+#  endforeach()
+#endif()
+#if(ARROW_USE_GLOG)
+#  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE glog::glog)
+#  endforeach()
+#endif()
+#if(ARROW_USE_XSIMD)
+#  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_XSIMD})
+#  endforeach()
+#endif()
+#if(ARROW_WITH_BROTLI)
+#  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_BROTLI_LIBS})
+#  endforeach()
+#endif()
+#if(ARROW_WITH_BZ2)
+#  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE BZip2::BZip2)
+#  endforeach()
+#endif()
+#if(ARROW_WITH_LZ4)
+#  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE LZ4::lz4)
+#  endforeach()
+#endif()
+#if(ARROW_WITH_SNAPPY)
+#  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${Snappy_TARGET})
+#  endforeach()
+#endif()
+#if(ARROW_WITH_OPENTELEMETRY)
+#  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_OPENTELEMETRY_LIBS})
+#  endforeach()
+#endif()
+#if(ARROW_WITH_ZLIB)
+#  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ZLIB::ZLIB)
+#  endforeach()
+#endif()
+#if(ARROW_WITH_ZSTD)
+#  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+#    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_ZSTD_LIBZSTD})
+#  endforeach()
+#endif()
 
 if(ARROW_WITH_OPENTELEMETRY)
   arrow_add_object_library(ARROW_TELEMETRY telemetry/logging.cc)
@@ -886,11 +889,9 @@ if(ARROW_IPC)
   endif()
   arrow_add_object_library(ARROW_IPC ${ARROW_IPC_SRCS})
   foreach(ARROW_IPC_TARGET ${ARROW_IPC_TARGETS})
-    message(STATUS "bbbbb flatbuffers")
     target_link_libraries(${ARROW_IPC_TARGET} PRIVATE flatbuffers.a)
   endforeach()
   if(ARROW_JSON)
-    message(STATUS "bbbbb rapidjson")
     foreach(ARROW_IPC_TARGET ${ARROW_IPC_TARGETS})
       target_link_libraries(${ARROW_IPC_TARGET} PRIVATE RapidJSON)
     endforeach()
diff --git a/third_party/snappy/CMakeLists.txt b/third_party/snappy/CMakeLists.txt
new file mode 100644
index 0000000000..4afda04c96
--- /dev/null
+++ b/third_party/snappy/CMakeLists.txt
@@ -0,0 +1,420 @@
+# Copyright 2019 Google Inc. All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.1)
+project(Snappy VERSION 1.1.10 LANGUAGES C CXX)
+
+# C++ standard can be overridden when this is used as a sub-project.
+if(NOT CMAKE_CXX_STANDARD)
+  # This project requires C++11.
+  set(CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+  set(CMAKE_CXX_EXTENSIONS OFF)
+endif(NOT CMAKE_CXX_STANDARD)
+
+# https://github.com/izenecloud/cmake/blob/master/SetCompilerWarningAll.cmake
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+  # Use the highest warning level for Visual Studio.
+  set(CMAKE_CXX_WARNING_LEVEL 4)
+  if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
+    string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  else(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
+  endif(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
+
+  # Disable C++ exceptions.
+  string(REGEX REPLACE "/EH[a-z]+" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHs-c-")
+  add_definitions(-D_HAS_EXCEPTIONS=0)
+
+  # Disable RTTI.
+  string(REGEX REPLACE "/GR" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-")
+else(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+  # Use -Wall for clang and gcc.
+  if(NOT CMAKE_CXX_FLAGS MATCHES "-Wall")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
+  endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wall")
+
+  # Use -Wextra for clang and gcc.
+  if(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
+  endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra")
+
+  # Use -Werror for clang only.
+  if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    if(NOT CMAKE_CXX_FLAGS MATCHES "-Werror")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
+    endif(NOT CMAKE_CXX_FLAGS MATCHES "-Werror")
+  endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+
+  # Disable C++ exceptions.
+  string(REGEX REPLACE "-fexceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
+
+  # Disable RTTI.
+  string(REGEX REPLACE "-frtti" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
+endif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+
+# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make
+# it prominent in the GUI.
+option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF)
+
+option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." OFF)
+
+option(SNAPPY_BUILD_BENCHMARKS "Build Snappy's benchmarks" OFF)
+
+option(SNAPPY_FUZZING_BUILD "Build Snappy for fuzzing." OFF)
+
+option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF)
+
+option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF)
+
+option(SNAPPY_INSTALL "Install Snappy's header and library" ON)
+
+include(TestBigEndian)
+test_big_endian(SNAPPY_IS_BIG_ENDIAN)
+
+include(CheckIncludeFile)
+check_include_file("sys/mman.h" HAVE_SYS_MMAN_H)
+check_include_file("sys/resource.h" HAVE_SYS_RESOURCE_H)
+check_include_file("sys/time.h" HAVE_SYS_TIME_H)
+check_include_file("sys/uio.h" HAVE_SYS_UIO_H)
+check_include_file("unistd.h" HAVE_UNISTD_H)
+check_include_file("windows.h" HAVE_WINDOWS_H)
+
+include(CheckLibraryExists)
+check_library_exists(z zlibVersion "" HAVE_LIBZ)
+check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2)
+check_library_exists(lz4 LZ4_compress_default "" HAVE_LIBLZ4)
+
+include(CheckCXXCompilerFlag)
+CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX)
+CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2)
+CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX)
+CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2)
+if(SNAPPY_REQUIRE_AVX2)
+  if(HAVE_VISUAL_STUDIO_ARCH_AVX2)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
+  endif(HAVE_VISUAL_STUDIO_ARCH_AVX2)
+  if(HAVE_CLANG_MAVX)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
+  endif(HAVE_CLANG_MAVX)
+  if(HAVE_CLANG_MBMI2)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2")
+  endif(HAVE_CLANG_MBMI2)
+elseif (SNAPPY_REQUIRE_AVX)
+  if(HAVE_VISUAL_STUDIO_ARCH_AVX)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX")
+  endif(HAVE_VISUAL_STUDIO_ARCH_AVX)
+  if(HAVE_CLANG_MAVX)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
+  endif(HAVE_CLANG_MAVX)
+endif(SNAPPY_REQUIRE_AVX2)
+
+# Used by googletest.
+check_cxx_compiler_flag(-Wno-missing-field-initializers
+                        SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS)
+
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles("
+int main() {
+  return __builtin_expect(0, 1);
+}" HAVE_BUILTIN_EXPECT)
+
+check_cxx_source_compiles("
+int main() {
+  return __builtin_ctzll(0);
+}" HAVE_BUILTIN_CTZ)
+
+check_cxx_source_compiles("
+__attribute__((always_inline)) int zero() { return 0; }
+
+int main() {
+  return zero();
+}" HAVE_ATTRIBUTE_ALWAYS_INLINE)
+
+check_cxx_source_compiles("
+#include <tmmintrin.h>
+
+int main() {
+  const __m128i *src = 0;
+  __m128i dest;
+  const __m128i shuffle_mask = _mm_load_si128(src);
+  const __m128i pattern = _mm_shuffle_epi8(_mm_loadl_epi64(src), shuffle_mask);
+  _mm_storeu_si128(&dest, pattern);
+  return 0;
+}" SNAPPY_HAVE_SSSE3)
+
+check_cxx_source_compiles("
+#include <immintrin.h>
+int main() {
+  return _mm_crc32_u32(0, 1);
+}" SNAPPY_HAVE_X86_CRC32)
+
+check_cxx_source_compiles("
+#include <arm_neon.h>
+#include <arm_acle.h>
+int main() {
+  return __crc32cw(0, 1);
+}" SNAPPY_HAVE_NEON_CRC32)
+
+check_cxx_source_compiles("
+#include <immintrin.h>
+int main() {
+  return _bzhi_u32(0, 1);
+}" SNAPPY_HAVE_BMI2)
+
+check_cxx_source_compiles("
+#include <arm_neon.h>
+int main() {
+  uint8_t val = 3, dup[8];
+  uint8x16_t v = vld1q_dup_u8(&val);
+  vst1q_u8(dup, v);
+  return 0;
+}" SNAPPY_HAVE_NEON)
+
+include(CheckSymbolExists)
+check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP)
+check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF)
+
+configure_file(
+  "cmake/config.h.in"
+  "${PROJECT_BINARY_DIR}/config.h"
+)
+
+# We don't want to define HAVE_ macros in public headers. Instead, we use
+# CMake's variable substitution with 0/1 variables, which will be seen by the
+# preprocessor as constants.
+set(HAVE_SYS_UIO_H_01 ${HAVE_SYS_UIO_H})
+if(NOT HAVE_SYS_UIO_H_01)
+  set(HAVE_SYS_UIO_H_01 0)
+endif(NOT HAVE_SYS_UIO_H_01)
+
+if (SNAPPY_FUZZING_BUILD)
+  if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+    message(WARNING "Fuzzing builds are only supported with Clang")
+  endif (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+
+  if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
+  endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address")
+
+  if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer-no-link")
+  endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link")
+endif (SNAPPY_FUZZING_BUILD)
+
+configure_file(
+  "snappy-stubs-public.h.in"
+  "${PROJECT_BINARY_DIR}/snappy-stubs-public.h")
+
+add_library(snappy "")
+target_sources(snappy
+  PRIVATE
+    "snappy-internal.h"
+    "snappy-stubs-internal.h"
+    "snappy-c.cc"
+    "snappy-sinksource.cc"
+    "snappy-stubs-internal.cc"
+    "snappy.cc"
+    "${PROJECT_BINARY_DIR}/config.h"
+
+  # Only CMake 3.3+ supports PUBLIC sources in targets exported by "install".
+  $<$<VERSION_GREATER:CMAKE_VERSION,3.2>:PUBLIC>
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/snappy-c.h>
+    $<INSTALL_INTERFACE:include/snappy-c.h>
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/snappy-sinksource.h>
+    $<INSTALL_INTERFACE:include/snappy-sinksource.h>
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/snappy.h>
+    $<INSTALL_INTERFACE:include/snappy.h>
+    $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/snappy-stubs-public.h>
+    $<INSTALL_INTERFACE:include/snappy-stubs-public.h>
+)
+target_include_directories(snappy
+  PUBLIC
+    $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
+    $<INSTALL_INTERFACE:include>
+)
+set_target_properties(snappy
+  PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
+
+target_compile_definitions(snappy PRIVATE -DHAVE_CONFIG_H)
+if(BUILD_SHARED_LIBS)
+  set_target_properties(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
+endif(BUILD_SHARED_LIBS)
+
+if(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS)
+  add_library(snappy_test_support "")
+  target_sources(snappy_test_support
+    PRIVATE
+      "snappy-test.cc"
+      "snappy-test.h"
+      "snappy_test_data.cc"
+      "snappy_test_data.h"
+      "${PROJECT_BINARY_DIR}/config.h"
+  )
+
+  # Test files include snappy-test.h, HAVE_CONFIG_H must be defined.
+  target_compile_definitions(snappy_test_support PUBLIC -DHAVE_CONFIG_H)
+
+  target_link_libraries(snappy_test_support snappy)
+
+  if(HAVE_LIBZ)
+    target_link_libraries(snappy_test_support z)
+  endif(HAVE_LIBZ)
+  if(HAVE_LIBLZO2)
+    target_link_libraries(snappy_test_support lzo2)
+  endif(HAVE_LIBLZO2)
+  if(HAVE_LIBLZ4)
+    target_link_libraries(snappy_test_support lz4)
+  endif(HAVE_LIBLZ4)
+
+  target_include_directories(snappy_test_support
+    BEFORE PUBLIC
+      "${PROJECT_SOURCE_DIR}"
+  )
+endif(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS)
+
+if(SNAPPY_BUILD_TESTS)
+  enable_testing()
+
+  # Prevent overriding the parent project's compiler/linker settings on Windows.
+  set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+  set(install_gtest OFF)
+  set(install_gmock OFF)
+  set(build_gmock ON)
+
+  # This project is tested using GoogleTest.
+  add_subdirectory("third_party/googletest")
+
+  # GoogleTest triggers a missing field initializers warning.
+  if(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS)
+    set_property(TARGET gtest
+        APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
+    set_property(TARGET gmock
+        APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
+  endif(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS)
+
+  add_executable(snappy_unittest "")
+  target_sources(snappy_unittest
+    PRIVATE
+      "snappy_unittest.cc"
+  )
+  target_link_libraries(snappy_unittest snappy_test_support gmock_main gtest)
+
+  add_test(
+    NAME snappy_unittest
+    WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+    COMMAND "${PROJECT_BINARY_DIR}/snappy_unittest")
+
+  add_executable(snappy_test_tool "")
+  target_sources(snappy_test_tool
+    PRIVATE
+      "snappy_test_tool.cc"
+  )
+  target_link_libraries(snappy_test_tool snappy_test_support)
+endif(SNAPPY_BUILD_TESTS)
+
+if(SNAPPY_BUILD_BENCHMARKS)
+  add_executable(snappy_benchmark "")
+  target_sources(snappy_benchmark
+    PRIVATE
+      "snappy_benchmark.cc"
+  )
+  target_link_libraries(snappy_benchmark snappy_test_support benchmark_main)
+
+  # This project uses Google benchmark for benchmarking.
+  set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
+  set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE)
+  add_subdirectory("third_party/benchmark")
+endif(SNAPPY_BUILD_BENCHMARKS)
+
+if(SNAPPY_FUZZING_BUILD)
+  add_executable(snappy_compress_fuzzer "")
+  target_sources(snappy_compress_fuzzer
+    PRIVATE "snappy_compress_fuzzer.cc"
+  )
+  target_link_libraries(snappy_compress_fuzzer snappy)
+  set_target_properties(snappy_compress_fuzzer
+    PROPERTIES LINK_FLAGS "-fsanitize=fuzzer"
+  )
+
+  add_executable(snappy_uncompress_fuzzer "")
+  target_sources(snappy_uncompress_fuzzer
+    PRIVATE "snappy_uncompress_fuzzer.cc"
+  )
+  target_link_libraries(snappy_uncompress_fuzzer snappy)
+  set_target_properties(snappy_uncompress_fuzzer
+    PROPERTIES LINK_FLAGS "-fsanitize=fuzzer"
+  )
+endif(SNAPPY_FUZZING_BUILD)
+
+# Must be included before CMAKE_INSTALL_INCLUDEDIR is used.
+include(GNUInstallDirs)
+
+if(SNAPPY_INSTALL)
+  install(TARGETS snappy
+    EXPORT SnappyTargets
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  )
+  install(
+    FILES
+      "snappy-c.h"
+      "snappy-sinksource.h"
+      "snappy.h"
+      "${PROJECT_BINARY_DIR}/snappy-stubs-public.h"
+    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
+  )
+
+  include(CMakePackageConfigHelpers)
+  configure_package_config_file(
+    "cmake/${PROJECT_NAME}Config.cmake.in"
+    "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake"
+    INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
+  )
+  write_basic_package_version_file(
+    "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake"
+    COMPATIBILITY SameMajorVersion
+  )
+  install(
+    EXPORT SnappyTargets
+    NAMESPACE Snappy::
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
+  )
+  install(
+    FILES
+      "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake"
+      "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake"
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
+  )
+endif(SNAPPY_INSTALL)
diff --git a/third_party/snappy/CONTRIBUTING.md b/third_party/snappy/CONTRIBUTING.md
new file mode 100644
index 0000000000..66a60d5c86
--- /dev/null
+++ b/third_party/snappy/CONTRIBUTING.md
@@ -0,0 +1,31 @@
+# How to Contribute
+
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code Reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+See [the README](README.md#contributing-to-the-snappy-project) for areas
+where we are likely to accept external contributions.
+
+## Community Guidelines
+
+This project follows [Google's Open Source Community
+Guidelines](https://opensource.google/conduct/).
diff --git a/third_party/snappy/README.md b/third_party/snappy/README.md
new file mode 100644
index 0000000000..398be7d58a
--- /dev/null
+++ b/third_party/snappy/README.md
@@ -0,0 +1,165 @@
+Snappy, a fast compressor/decompressor.
+
+[![Build Status](https://github.com/google/snappy/actions/workflows/build.yml/badge.svg)](https://github.com/google/snappy/actions/workflows/build.yml)
+
+Introduction
+============
+
+Snappy is a compression/decompression library. It does not aim for maximum
+compression, or compatibility with any other compression library; instead,
+it aims for very high speeds and reasonable compression. For instance,
+compared to the fastest mode of zlib, Snappy is an order of magnitude faster
+for most inputs, but the resulting compressed files are anywhere from 20% to
+100% bigger. (For more information, see "Performance", below.)
+
+Snappy has the following properties:
+
+ * Fast: Compression speeds at 250 MB/sec and beyond, with no assembler code.
+   See "Performance" below.
+ * Stable: Over the last few years, Snappy has compressed and decompressed
+   petabytes of data in Google's production environment. The Snappy bitstream
+   format is stable and will not change between versions.
+ * Robust: The Snappy decompressor is designed not to crash in the face of
+   corrupted or malicious input.
+ * Free and open source software: Snappy is licensed under a BSD-type license.
+   For more information, see the included COPYING file.
+
+Snappy has previously been called "Zippy" in some Google presentations
+and the like.
+
+
+Performance
+===========
+
+Snappy is intended to be fast. On a single core of a Core i7 processor
+in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
+about 500 MB/sec or more. (These numbers are for the slowest inputs in our
+benchmark suite; others are much faster.) In our tests, Snappy usually
+is faster than algorithms in the same class (e.g. LZO, LZF, QuickLZ,
+etc.) while achieving comparable compression ratios.
+
+Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x
+for plain text, about 2-4x for HTML, and of course 1.0x for JPEGs, PNGs and
+other already-compressed data. Similar numbers for zlib in its fastest mode
+are 2.6-2.8x, 3-7x and 1.0x, respectively. More sophisticated algorithms are
+capable of achieving yet higher compression rates, although usually at the
+expense of speed. Of course, compression ratio will vary significantly with
+the input.
+
+Although Snappy should be fairly portable, it is primarily optimized
+for 64-bit x86-compatible processors, and may run slower in other environments.
+In particular:
+
+ - Snappy uses 64-bit operations in several places to process more data at
+   once than would otherwise be possible.
+ - Snappy assumes unaligned 32 and 64-bit loads and stores are cheap.
+   On some platforms, these must be emulated with single-byte loads
+   and stores, which is much slower.
+ - Snappy assumes little-endian throughout, and needs to byte-swap data in
+   several places if running on a big-endian platform.
+
+Experience has shown that even heavily tuned code can be improved.
+Performance optimizations, whether for 64-bit x86 or other platforms,
+are of course most welcome; see "Contact", below.
+
+
+Building
+========
+
+You need the CMake version specified in [CMakeLists.txt](./CMakeLists.txt)
+or later to build:
+
+```bash
+git submodule update --init
+mkdir build
+cd build && cmake ../ && make
+```
+
+Usage
+=====
+
+Note that Snappy, both the implementation and the main interface,
+is written in C++. However, several third-party bindings to other languages
+are available; see the [home page](docs/README.md) for more information.
+Also, if you want to use Snappy from C code, you can use the included C
+bindings in snappy-c.h.
+
+To use Snappy from your own C++ program, include the file "snappy.h" from
+your calling file, and link against the compiled library.
+
+There are many ways to call Snappy, but the simplest possible is
+
+```c++
+snappy::Compress(input.data(), input.size(), &output);
+```
+
+and similarly
+
+```c++
+snappy::Uncompress(input.data(), input.size(), &output);
+```
+
+where "input" and "output" are both instances of std::string.
+
+There are other interfaces that are more flexible in various ways, including
+support for custom (non-array) input sources. See the header file for more
+information.
+
+
+Tests and benchmarks
+====================
+
+When you compile Snappy, the following binaries are compiled in addition to the
+library itself. You do not need them to use the compressor from your own
+library, but they are useful for Snappy development.
+
+* `snappy_benchmark` contains microbenchmarks used to tune compression and
+  decompression performance.
+* `snappy_unittests` contains unit tests, verifying correctness on your machine
+  in various scenarios.
+* `snappy_test_tool` can benchmark Snappy against a few other compression
+  libraries (zlib, LZO, LZF, and QuickLZ), if they were detected at configure
+  time. To benchmark using a given file, give the compression algorithm you want
+  to test Snappy against (e.g. --zlib) and then a list of one or more file names
+  on the command line.
+
+If you want to change or optimize Snappy, please run the tests and benchmarks to
+verify you have not broken anything.
+
+The testdata/ directory contains the files used by the microbenchmarks, which
+should provide a reasonably balanced starting point for benchmarking. (Note that
+baddata[1-3].snappy are not intended as benchmarks; they are used to verify
+correctness in the presence of corrupted data in the unit test.)
+
+Contributing to the Snappy Project
+==================================
+
+In addition to the aims listed at the top of the [README](README.md) Snappy
+explicitly supports the following:
+
+1. C++11
+2. Clang (gcc and MSVC are best-effort).
+3. Low level optimizations (e.g. assembly or equivalent intrinsics) for:
+  1. [x86](https://en.wikipedia.org/wiki/X86)
+  2. [x86-64](https://en.wikipedia.org/wiki/X86-64)
+  3. ARMv7 (32-bit)
+  4. ARMv8 (AArch64)
+4. Supports only the Snappy compression scheme as described in
+  [format_description.txt](format_description.txt).
+5. CMake for building
+
+Changes adding features or dependencies outside of the core area of focus listed
+above might not be accepted. If in doubt post a message to the
+[Snappy discussion mailing list](https://groups.google.com/g/snappy-compression).
+
+We are unlikely to accept contributions to the build configuration files, such
+as `CMakeLists.txt`. We are focused on maintaining a build configuration that
+allows us to test that the project works in a few supported configurations
+inside Google. We are not currently interested in supporting other requirements,
+such as different operating systems, compilers, or build systems.
+
+Contact
+=======
+
+Snappy is distributed through GitHub. For the latest version and other
+information, see https://github.com/google/snappy.
diff --git a/third_party/snappy/cmake/SnappyConfig.cmake.in b/third_party/snappy/cmake/SnappyConfig.cmake.in
new file mode 100644
index 0000000000..9e7d134628
--- /dev/null
+++ b/third_party/snappy/cmake/SnappyConfig.cmake.in
@@ -0,0 +1,33 @@
+# Copyright 2019 Google Inc. All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+@PACKAGE_INIT@
+
+include("${CMAKE_CURRENT_LIST_DIR}/SnappyTargets.cmake")
+
+check_required_components(Snappy)
\ No newline at end of file
diff --git a/third_party/snappy/cmake/config.h.in b/third_party/snappy/cmake/config.h.in
new file mode 100644
index 0000000000..d1de25cf45
--- /dev/null
+++ b/third_party/snappy/cmake/config.h.in
@@ -0,0 +1,66 @@
+#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
+#define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
+
+/* Define to 1 if the compiler supports __attribute__((always_inline)). */
+#cmakedefine01 HAVE_ATTRIBUTE_ALWAYS_INLINE
+
+/* Define to 1 if the compiler supports __builtin_ctz and friends. */
+#cmakedefine01 HAVE_BUILTIN_CTZ
+
+/* Define to 1 if the compiler supports __builtin_expect. */
+#cmakedefine01 HAVE_BUILTIN_EXPECT
+
+/* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
+#cmakedefine01 HAVE_FUNC_MMAP
+
+/* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
+#cmakedefine01 HAVE_FUNC_SYSCONF
+
+/* Define to 1 if you have the `lzo2' library (-llzo2). */
+#cmakedefine01 HAVE_LIBLZO2
+
+/* Define to 1 if you have the `z' library (-lz). */
+#cmakedefine01 HAVE_LIBZ
+
+/* Define to 1 if you have the `lz4' library (-llz4). */
+#cmakedefine01 HAVE_LIBLZ4
+
+/* Define to 1 if you have the <sys/mman.h> header file. */
+#cmakedefine01 HAVE_SYS_MMAN_H
+
+/* Define to 1 if you have the <sys/resource.h> header file. */
+#cmakedefine01 HAVE_SYS_RESOURCE_H
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#cmakedefine01 HAVE_SYS_TIME_H
+
+/* Define to 1 if you have the <sys/uio.h> header file. */
+#cmakedefine01 HAVE_SYS_UIO_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#cmakedefine01 HAVE_UNISTD_H
+
+/* Define to 1 if you have the <windows.h> header file. */
+#cmakedefine01 HAVE_WINDOWS_H
+
+/* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
+#cmakedefine01 SNAPPY_HAVE_SSSE3
+
+/* Define to 1 if you target processors with SSE4.2 and have <crc32intrin.h>. */
+#cmakedefine01 SNAPPY_HAVE_X86_CRC32
+
+/* Define to 1 if you target processors with BMI2+ and have <bmi2intrin.h>. */
+#cmakedefine01 SNAPPY_HAVE_BMI2
+
+/* Define to 1 if you target processors with NEON and have <arm_neon.h>. */
+#cmakedefine01 SNAPPY_HAVE_NEON
+
+/* Define to 1 if you have <arm_neon.h> and <arm_acle.h> and want to optimize
+   compression speed by using __crc32cw from <arm_acle.h>. */
+#cmakedefine01 SNAPPY_HAVE_NEON_CRC32
+
+/* Define to 1 if your processor stores words with the most significant byte
+   first (like Motorola and SPARC, unlike Intel and VAX). */
+#cmakedefine01 SNAPPY_IS_BIG_ENDIAN
+
+#endif  // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
diff --git a/third_party/snappy/snappy-c.cc b/third_party/snappy/snappy-c.cc
new file mode 100644
index 0000000000..473a0b0978
--- /dev/null
+++ b/third_party/snappy/snappy-c.cc
@@ -0,0 +1,90 @@
+// Copyright 2011 Martin Gieseking <martin.gieseking@uos.de>.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "snappy.h"
+#include "snappy-c.h"
+
+extern "C" {
+
+snappy_status snappy_compress(const char* input,
+                              size_t input_length,
+                              char* compressed,
+                              size_t *compressed_length) {
+  if (*compressed_length < snappy_max_compressed_length(input_length)) {
+    return SNAPPY_BUFFER_TOO_SMALL;
+  }
+  snappy::RawCompress(input, input_length, compressed, compressed_length);
+  return SNAPPY_OK;
+}
+
+snappy_status snappy_uncompress(const char* compressed,
+                                size_t compressed_length,
+                                char* uncompressed,
+                                size_t* uncompressed_length) {
+  size_t real_uncompressed_length;
+  if (!snappy::GetUncompressedLength(compressed,
+                                     compressed_length,
+                                     &real_uncompressed_length)) {
+    return SNAPPY_INVALID_INPUT;
+  }
+  if (*uncompressed_length < real_uncompressed_length) {
+    return SNAPPY_BUFFER_TOO_SMALL;
+  }
+  if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) {
+    return SNAPPY_INVALID_INPUT;
+  }
+  *uncompressed_length = real_uncompressed_length;
+  return SNAPPY_OK;
+}
+
+size_t snappy_max_compressed_length(size_t source_length) {
+  return snappy::MaxCompressedLength(source_length);
+}
+
+snappy_status snappy_uncompressed_length(const char *compressed,
+                                         size_t compressed_length,
+                                         size_t *result) {
+  if (snappy::GetUncompressedLength(compressed,
+                                    compressed_length,
+                                    result)) {
+    return SNAPPY_OK;
+  } else {
+    return SNAPPY_INVALID_INPUT;
+  }
+}
+
+snappy_status snappy_validate_compressed_buffer(const char *compressed,
+                                                size_t compressed_length) {
+  if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) {
+    return SNAPPY_OK;
+  } else {
+    return SNAPPY_INVALID_INPUT;
+  }
+}
+
+}  // extern "C"
diff --git a/third_party/snappy/snappy-c.h b/third_party/snappy/snappy-c.h
new file mode 100644
index 0000000000..32aa0c6b8b
--- /dev/null
+++ b/third_party/snappy/snappy-c.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2011 Martin Gieseking <martin.gieseking@uos.de>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Plain C interface (a wrapper around the C++ implementation).
+ */
+
+#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
+#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+
+/*
+ * Return values; see the documentation for each function to know
+ * what each can return.
+ */
+typedef enum {
+  SNAPPY_OK = 0,
+  SNAPPY_INVALID_INPUT = 1,
+  SNAPPY_BUFFER_TOO_SMALL = 2
+} snappy_status;
+
+/*
+ * Takes the data stored in "input[0..input_length-1]" and stores
+ * it in the array pointed to by "compressed".
+ *
+ * <compressed_length> signals the space available in "compressed".
+ * If it is not at least equal to "snappy_max_compressed_length(input_length)",
+ * SNAPPY_BUFFER_TOO_SMALL is returned. After successful compression,
+ * <compressed_length> contains the true length of the compressed output,
+ * and SNAPPY_OK is returned.
+ *
+ * Example:
+ *   size_t output_length = snappy_max_compressed_length(input_length);
+ *   char* output = (char*)malloc(output_length);
+ *   if (snappy_compress(input, input_length, output, &output_length)
+ *       == SNAPPY_OK) {
+ *     ... Process(output, output_length) ...
+ *   }
+ *   free(output);
+ */
+snappy_status snappy_compress(const char* input,
+                              size_t input_length,
+                              char* compressed,
+                              size_t* compressed_length);
+
+/*
+ * Given data in "compressed[0..compressed_length-1]" generated by
+ * calling the snappy_compress routine, this routine stores
+ * the uncompressed data to
+ *   uncompressed[0..uncompressed_length-1].
+ * Returns failure (a value not equal to SNAPPY_OK) if the message
+ * is corrupted and could not be decrypted.
+ *
+ * <uncompressed_length> signals the space available in "uncompressed".
+ * If it is not at least equal to the value returned by
+ * snappy_uncompressed_length for this stream, SNAPPY_BUFFER_TOO_SMALL
+ * is returned. After successful decompression, <uncompressed_length>
+ * contains the true length of the decompressed output.
+ *
+ * Example:
+ *   size_t output_length;
+ *   if (snappy_uncompressed_length(input, input_length, &output_length)
+ *       != SNAPPY_OK) {
+ *     ... fail ...
+ *   }
+ *   char* output = (char*)malloc(output_length);
+ *   if (snappy_uncompress(input, input_length, output, &output_length)
+ *       == SNAPPY_OK) {
+ *     ... Process(output, output_length) ...
+ *   }
+ *   free(output);
+ */
+snappy_status snappy_uncompress(const char* compressed,
+                                size_t compressed_length,
+                                char* uncompressed,
+                                size_t* uncompressed_length);
+
+/*
+ * Returns the maximal size of the compressed representation of
+ * input data that is "source_length" bytes in length.
+ */
+size_t snappy_max_compressed_length(size_t source_length);
+
+/*
+ * REQUIRES: "compressed[]" was produced by snappy_compress()
+ * Returns SNAPPY_OK and stores the length of the uncompressed data in
+ * *result normally. Returns SNAPPY_INVALID_INPUT on parsing error.
+ * This operation takes O(1) time.
+ */
+snappy_status snappy_uncompressed_length(const char* compressed,
+                                         size_t compressed_length,
+                                         size_t* result);
+
+/*
+ * Check if the contents of "compressed[]" can be uncompressed successfully.
+ * Does not return the uncompressed data; if so, returns SNAPPY_OK,
+ * or if not, returns SNAPPY_INVALID_INPUT.
+ * Takes time proportional to compressed_length, but is usually at least a
+ * factor of four faster than actual decompression.
+ */
+snappy_status snappy_validate_compressed_buffer(const char* compressed,
+                                                size_t compressed_length);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  /* THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
diff --git a/third_party/snappy/snappy-internal.h b/third_party/snappy/snappy-internal.h
new file mode 100644
index 0000000000..0923f399a3
--- /dev/null
+++ b/third_party/snappy/snappy-internal.h
@@ -0,0 +1,395 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Internals shared between the Snappy implementation and its unittest.
+
+#ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
+#define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
+
+#include "snappy-stubs-internal.h"
+
+#if SNAPPY_HAVE_SSSE3
+// Please do not replace with <x86intrin.h> or with headers that assume more
+// advanced SSE versions without checking with all the OWNERS.
+#include <emmintrin.h>
+#include <tmmintrin.h>
+#endif
+
+#if SNAPPY_HAVE_NEON
+#include <arm_neon.h>
+#endif
+
+#if SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON
+#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 1
+#else
+#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 0
+#endif
+
+namespace snappy {
+namespace internal {
+
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+#if SNAPPY_HAVE_SSSE3
+using V128 = __m128i;
+#elif SNAPPY_HAVE_NEON
+using V128 = uint8x16_t;
+#endif
+
+// Load 128 bits of integer data. `src` must be 16-byte aligned.
+inline V128 V128_Load(const V128* src);
+
+// Load 128 bits of integer data. `src` does not need to be aligned.
+inline V128 V128_LoadU(const V128* src);
+
+// Store 128 bits of integer data. `dst` does not need to be aligned.
+inline void V128_StoreU(V128* dst, V128 val);
+
+// Shuffle packed 8-bit integers using a shuffle mask.
+// Each packed integer in the shuffle mask must be in [0,16).
+inline V128 V128_Shuffle(V128 input, V128 shuffle_mask);
+
+// Constructs V128 with 16 chars |c|.
+inline V128 V128_DupChar(char c);
+
+#if SNAPPY_HAVE_SSSE3
+inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); }
+
+inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); }
+
+inline void V128_StoreU(V128* dst, V128 val) { _mm_storeu_si128(dst, val); }
+
+inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
+  return _mm_shuffle_epi8(input, shuffle_mask);
+}
+
+inline V128 V128_DupChar(char c) { return _mm_set1_epi8(c); }
+
+#elif SNAPPY_HAVE_NEON
+inline V128 V128_Load(const V128* src) {
+  return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
+}
+
+inline V128 V128_LoadU(const V128* src) {
+  return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
+}
+
+inline void V128_StoreU(V128* dst, V128 val) {
+  vst1q_u8(reinterpret_cast<uint8_t*>(dst), val);
+}
+
+inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
+  assert(vminvq_u8(shuffle_mask) >= 0 && vmaxvq_u8(shuffle_mask) <= 15);
+  return vqtbl1q_u8(input, shuffle_mask);
+}
+
+inline V128 V128_DupChar(char c) { return vdupq_n_u8(c); }
+#endif
+#endif  // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+
+// Working memory performs a single allocation to hold all scratch space
+// required for compression.
+class WorkingMemory {
+ public:
+  explicit WorkingMemory(size_t input_size);
+  ~WorkingMemory();
+
+  // Allocates and clears a hash table using memory in "*this",
+  // stores the number of buckets in "*table_size" and returns a pointer to
+  // the base of the hash table.
+  uint16_t* GetHashTable(size_t fragment_size, int* table_size) const;
+  char* GetScratchInput() const { return input_; }
+  char* GetScratchOutput() const { return output_; }
+
+ private:
+  char* mem_;        // the allocated memory, never nullptr
+  size_t size_;      // the size of the allocated memory, never 0
+  uint16_t* table_;  // the pointer to the hashtable
+  char* input_;      // the pointer to the input scratch buffer
+  char* output_;     // the pointer to the output scratch buffer
+
+  // No copying
+  WorkingMemory(const WorkingMemory&);
+  void operator=(const WorkingMemory&);
+};
+
+// Flat array compression that does not emit the "uncompressed length"
+// prefix. Compresses "input" string to the "*op" buffer.
+//
+// REQUIRES: "input_length <= kBlockSize"
+// REQUIRES: "op" points to an array of memory that is at least
+// "MaxCompressedLength(input_length)" in size.
+// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+// REQUIRES: "table_size" is a power of two
+//
+// Returns an "end" pointer into "op" buffer.
+// "end - op" is the compressed size of "input".
+char* CompressFragment(const char* input,
+                       size_t input_length,
+                       char* op,
+                       uint16_t* table,
+                       const int table_size);
+
+// Find the largest n such that
+//
+//   s1[0,n-1] == s2[0,n-1]
+//   and n <= (s2_limit - s2).
+//
+// Return make_pair(n, n < 8).
+// Does not read *s2_limit or beyond.
+// Does not read *(s1 + (s2_limit - s2)) or beyond.
+// Requires that s2_limit >= s2.
+//
+// In addition populate *data with the next 5 bytes from the end of the match.
+// This is only done if 8 bytes are available (s2_limit - s2 >= 8). The point is
+// that on some arch's this can be done faster in this routine than subsequent
+// loading from s2 + n.
+//
+// Separate implementation for 64-bit, little-endian cpus.
+#if !SNAPPY_IS_BIG_ENDIAN && \
+    (defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || \
+     defined(ARCH_ARM))
+static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
+                                                      const char* s2,
+                                                      const char* s2_limit,
+                                                      uint64_t* data) {
+  assert(s2_limit >= s2);
+  size_t matched = 0;
+
+  // This block isn't necessary for correctness; we could just start looping
+  // immediately.  As an optimization though, it is useful.  It creates some not
+  // uncommon code paths that determine, without extra effort, whether the match
+  // length is less than 8.  In short, we are hoping to avoid a conditional
+  // branch, and perhaps get better code layout from the C++ compiler.
+  if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) {
+    uint64_t a1 = UNALIGNED_LOAD64(s1);
+    uint64_t a2 = UNALIGNED_LOAD64(s2);
+    if (SNAPPY_PREDICT_TRUE(a1 != a2)) {
+      // This code is critical for performance. The reason is that it determines
+      // how much to advance `ip` (s2). This obviously depends on both the loads
+      // from the `candidate` (s1) and `ip`. Furthermore the next `candidate`
+      // depends on the advanced `ip` calculated here through a load, hash and
+      // new candidate hash lookup (a lot of cycles). This makes s1 (ie.
+      // `candidate`) the variable that limits throughput. This is the reason we
+      // go through hoops to have this function update `data` for the next iter.
+      // The straightforward code would use *data, given by
+      //
+      // *data = UNALIGNED_LOAD64(s2 + matched_bytes) (Latency of 5 cycles),
+      //
+      // as input for the hash table lookup to find next candidate. However
+      // this forces the load on the data dependency chain of s1, because
+      // matched_bytes directly depends on s1. However matched_bytes is 0..7, so
+      // we can also calculate *data by
+      //
+      // *data = AlignRight(UNALIGNED_LOAD64(s2), UNALIGNED_LOAD64(s2 + 8),
+      //                    matched_bytes);
+      //
+      // The loads do not depend on s1 anymore and are thus off the bottleneck.
+      // The straightforward implementation on x86_64 would be to use
+      //
+      // shrd rax, rdx, cl  (cl being matched_bytes * 8)
+      //
+      // unfortunately shrd with a variable shift has a 4 cycle latency. So this
+      // only wins 1 cycle. The BMI2 shrx instruction is a 1 cycle variable
+      // shift instruction but can only shift 64 bits. If we focus on just
+      // obtaining the least significant 4 bytes, we can obtain this by
+      //
+      // *data = ConditionalMove(matched_bytes < 4, UNALIGNED_LOAD64(s2),
+      //     UNALIGNED_LOAD64(s2 + 4) >> ((matched_bytes & 3) * 8);
+      //
+      // Writen like above this is not a big win, the conditional move would be
+      // a cmp followed by a cmov (2 cycles) followed by a shift (1 cycle).
+      // However matched_bytes < 4 is equal to
+      // static_cast<uint32_t>(xorval) != 0. Writen that way, the conditional
+      // move (2 cycles) can execute in parallel with FindLSBSetNonZero64
+      // (tzcnt), which takes 3 cycles.
+      uint64_t xorval = a1 ^ a2;
+      int shift = Bits::FindLSBSetNonZero64(xorval);
+      size_t matched_bytes = shift >> 3;
+      uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
+#ifndef __x86_64__
+      a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
+#else
+      // Ideally this would just be
+      //
+      // a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
+      //
+      // However clang correctly infers that the above statement participates on
+      // a critical data dependency chain and thus, unfortunately, refuses to
+      // use a conditional move (it's tuned to cut data dependencies). In this
+      // case there is a longer parallel chain anyway AND this will be fairly
+      // unpredictable.
+      asm("testl %k2, %k2\n\t"
+          "cmovzq %1, %0\n\t"
+          : "+r"(a2)
+          : "r"(a3), "r"(xorval)
+          : "cc");
+#endif
+      *data = a2 >> (shift & (3 * 8));
+      return std::pair<size_t, bool>(matched_bytes, true);
+    } else {
+      matched = 8;
+      s2 += 8;
+    }
+  }
+
+  // Find out how long the match is. We loop over the data 64 bits at a
+  // time until we find a 64-bit block that doesn't match; then we find
+  // the first non-matching bit and use that to calculate the total
+  // length of the match.
+  while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) {
+    uint64_t a1 = UNALIGNED_LOAD64(s1 + matched);
+    uint64_t a2 = UNALIGNED_LOAD64(s2);
+    if (a1 == a2) {
+      s2 += 8;
+      matched += 8;
+    } else {
+      uint64_t xorval = a1 ^ a2;
+      int shift = Bits::FindLSBSetNonZero64(xorval);
+      size_t matched_bytes = shift >> 3;
+      uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
+#ifndef __x86_64__
+      a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
+#else
+      asm("testl %k2, %k2\n\t"
+          "cmovzq %1, %0\n\t"
+          : "+r"(a2)
+          : "r"(a3), "r"(xorval)
+          : "cc");
+#endif
+      *data = a2 >> (shift & (3 * 8));
+      matched += matched_bytes;
+      assert(matched >= 8);
+      return std::pair<size_t, bool>(matched, false);
+    }
+  }
+  while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) {
+    if (s1[matched] == *s2) {
+      ++s2;
+      ++matched;
+    } else {
+      if (s2 <= s2_limit - 8) {
+        *data = UNALIGNED_LOAD64(s2);
+      }
+      return std::pair<size_t, bool>(matched, matched < 8);
+    }
+  }
+  return std::pair<size_t, bool>(matched, matched < 8);
+}
+#else
+static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
+                                                      const char* s2,
+                                                      const char* s2_limit,
+                                                      uint64_t* data) {
+  // Implementation based on the x86-64 version, above.
+  assert(s2_limit >= s2);
+  int matched = 0;
+
+  while (s2 <= s2_limit - 4 &&
+         UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
+    s2 += 4;
+    matched += 4;
+  }
+  if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) {
+    uint32_t x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
+    int matching_bits = Bits::FindLSBSetNonZero(x);
+    matched += matching_bits >> 3;
+    s2 += matching_bits >> 3;
+  } else {
+    while ((s2 < s2_limit) && (s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    }
+  }
+  if (s2 <= s2_limit - 8) *data = LittleEndian::Load64(s2);
+  return std::pair<size_t, bool>(matched, matched < 8);
+}
+#endif
+
+// Lookup tables for decompression code.  Give --snappy_dump_decompression_table
+// to the unit test to recompute char_table.
+
+enum {
+  LITERAL = 0,
+  COPY_1_BYTE_OFFSET = 1,  // 3 bit length + 3 bits of offset in opcode
+  COPY_2_BYTE_OFFSET = 2,
+  COPY_4_BYTE_OFFSET = 3
+};
+static const int kMaximumTagLength = 5;  // COPY_4_BYTE_OFFSET plus the actual offset.
+
+// Data stored per entry in lookup table:
+//      Range   Bits-used       Description
+//      ------------------------------------
+//      1..64   0..7            Literal/copy length encoded in opcode byte
+//      0..7    8..10           Copy offset encoded in opcode byte / 256
+//      0..4    11..13          Extra bytes after opcode
+//
+// We use eight bits for the length even though 7 would have sufficed
+// because of efficiency reasons:
+//      (1) Extracting a byte is faster than a bit-field
+//      (2) It properly aligns copy offset so we do not need a <<8
+static constexpr uint16_t char_table[256] = {
+    // clang-format off
+  0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
+  0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
+  0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
+  0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
+  0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
+  0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
+  0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
+  0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
+  0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
+  0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
+  0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
+  0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
+  0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
+  0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
+  0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
+  0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
+  0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
+  0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
+  0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
+  0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
+  0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
+  0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
+  0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
+  0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
+  0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
+  0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
+  0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
+  0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
+  0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
+  0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
+  0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
+  0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040,
+    // clang-format on
+};
+
+}  // end namespace internal
+}  // end namespace snappy
+
+#endif  // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
diff --git a/third_party/snappy/snappy-sinksource.cc b/third_party/snappy/snappy-sinksource.cc
new file mode 100644
index 0000000000..8214964a7e
--- /dev/null
+++ b/third_party/snappy/snappy-sinksource.cc
@@ -0,0 +1,121 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stddef.h>
+#include <cstring>
+
+#include "snappy-sinksource.h"
+
+namespace snappy {
+
+Source::~Source() = default;
+
+Sink::~Sink() = default;
+
+char* Sink::GetAppendBuffer(size_t length, char* scratch) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)length;
+
+  return scratch;
+}
+
+char* Sink::GetAppendBufferVariable(
+      size_t min_size, size_t desired_size_hint, char* scratch,
+      size_t scratch_size, size_t* allocated_size) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)min_size;
+  (void)desired_size_hint;
+
+  *allocated_size = scratch_size;
+  return scratch;
+}
+
+void Sink::AppendAndTakeOwnership(
+    char* bytes, size_t n,
+    void (*deleter)(void*, const char*, size_t),
+    void *deleter_arg) {
+  Append(bytes, n);
+  (*deleter)(deleter_arg, bytes, n);
+}
+
+ByteArraySource::~ByteArraySource() = default;
+
+size_t ByteArraySource::Available() const { return left_; }
+
+const char* ByteArraySource::Peek(size_t* len) {
+  *len = left_;
+  return ptr_;
+}
+
+void ByteArraySource::Skip(size_t n) {
+  left_ -= n;
+  ptr_ += n;
+}
+
+UncheckedByteArraySink::~UncheckedByteArraySink() { }
+
+void UncheckedByteArraySink::Append(const char* data, size_t n) {
+  // Do no copying if the caller filled in the result of GetAppendBuffer()
+  if (data != dest_) {
+    std::memcpy(dest_, data, n);
+  }
+  dest_ += n;
+}
+
+char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)len;
+  (void)scratch;
+
+  return dest_;
+}
+
+void UncheckedByteArraySink::AppendAndTakeOwnership(
+    char* bytes, size_t n,
+    void (*deleter)(void*, const char*, size_t),
+    void *deleter_arg) {
+  if (bytes != dest_) {
+    std::memcpy(dest_, bytes, n);
+    (*deleter)(deleter_arg, bytes, n);
+  }
+  dest_ += n;
+}
+
+char* UncheckedByteArraySink::GetAppendBufferVariable(
+      size_t min_size, size_t desired_size_hint, char* scratch,
+      size_t scratch_size, size_t* allocated_size) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)min_size;
+  (void)scratch;
+  (void)scratch_size;
+
+  *allocated_size = desired_size_hint;
+  return dest_;
+}
+
+}  // namespace snappy
diff --git a/third_party/snappy/snappy-sinksource.h b/third_party/snappy/snappy-sinksource.h
new file mode 100644
index 0000000000..3c74e1bb6e
--- /dev/null
+++ b/third_party/snappy/snappy-sinksource.h
@@ -0,0 +1,182 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
+#define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
+
+#include <stddef.h>
+
+namespace snappy {
+
+// A Sink is an interface that consumes a sequence of bytes.
+class Sink {
+ public:
+  Sink() { }
+  virtual ~Sink();
+
+  // Append "bytes[0,n-1]" to this.
+  virtual void Append(const char* bytes, size_t n) = 0;
+
+  // Returns a writable buffer of the specified length for appending.
+  // May return a pointer to the caller-owned scratch buffer which
+  // must have at least the indicated length.  The returned buffer is
+  // only valid until the next operation on this Sink.
+  //
+  // After writing at most "length" bytes, call Append() with the
+  // pointer returned from this function and the number of bytes
+  // written.  Many Append() implementations will avoid copying
+  // bytes if this function returned an internal buffer.
+  //
+  // If a non-scratch buffer is returned, the caller may only pass a
+  // prefix of it to Append().  That is, it is not correct to pass an
+  // interior pointer of the returned array to Append().
+  //
+  // The default implementation always returns the scratch buffer.
+  virtual char* GetAppendBuffer(size_t length, char* scratch);
+
+  // For higher performance, Sink implementations can provide custom
+  // AppendAndTakeOwnership() and GetAppendBufferVariable() methods.
+  // These methods can reduce the number of copies done during
+  // compression/decompression.
+
+  // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes"
+  // and calls the deleter function as (*deleter)(deleter_arg, bytes, n)
+  // to free the buffer. deleter function must be non NULL.
+  //
+  // The default implementation just calls Append and frees "bytes".
+  // Other implementations may avoid a copy while appending the buffer.
+  virtual void AppendAndTakeOwnership(
+      char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
+      void *deleter_arg);
+
+  // Returns a writable buffer for appending and writes the buffer's capacity to
+  // *allocated_size. Guarantees *allocated_size >= min_size.
+  // May return a pointer to the caller-owned scratch buffer which must have
+  // scratch_size >= min_size.
+  //
+  // The returned buffer is only valid until the next operation
+  // on this ByteSink.
+  //
+  // After writing at most *allocated_size bytes, call Append() with the
+  // pointer returned from this function and the number of bytes written.
+  // Many Append() implementations will avoid copying bytes if this function
+  // returned an internal buffer.
+  //
+  // If the sink implementation allocates or reallocates an internal buffer,
+  // it should use the desired_size_hint if appropriate. If a caller cannot
+  // provide a reasonable guess at the desired capacity, it should set
+  // desired_size_hint = 0.
+  //
+  // If a non-scratch buffer is returned, the caller may only pass
+  // a prefix to it to Append(). That is, it is not correct to pass an
+  // interior pointer to Append().
+  //
+  // The default implementation always returns the scratch buffer.
+  virtual char* GetAppendBufferVariable(
+      size_t min_size, size_t desired_size_hint, char* scratch,
+      size_t scratch_size, size_t* allocated_size);
+
+ private:
+  // No copying
+  Sink(const Sink&);
+  void operator=(const Sink&);
+};
+
+// A Source is an interface that yields a sequence of bytes
+class Source {
+ public:
+  Source() { }
+  virtual ~Source();
+
+  // Return the number of bytes left to read from the source
+  virtual size_t Available() const = 0;
+
+  // Peek at the next flat region of the source.  Does not reposition
+  // the source.  The returned region is empty iff Available()==0.
+  //
+  // Returns a pointer to the beginning of the region and store its
+  // length in *len.
+  //
+  // The returned region is valid until the next call to Skip() or
+  // until this object is destroyed, whichever occurs first.
+  //
+  // The returned region may be larger than Available() (for example
+  // if this ByteSource is a view on a substring of a larger source).
+  // The caller is responsible for ensuring that it only reads the
+  // Available() bytes.
+  virtual const char* Peek(size_t* len) = 0;
+
+  // Skip the next n bytes.  Invalidates any buffer returned by
+  // a previous call to Peek().
+  // REQUIRES: Available() >= n
+  virtual void Skip(size_t n) = 0;
+
+ private:
+  // No copying
+  Source(const Source&);
+  void operator=(const Source&);
+};
+
+// A Source implementation that yields the contents of a flat array
+class ByteArraySource : public Source {
+ public:
+  ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { }
+  ~ByteArraySource() override;
+  size_t Available() const override;
+  const char* Peek(size_t* len) override;
+  void Skip(size_t n) override;
+ private:
+  const char* ptr_;
+  size_t left_;
+};
+
+// A Sink implementation that writes to a flat array without any bound checks.
+class UncheckedByteArraySink : public Sink {
+ public:
+  explicit UncheckedByteArraySink(char* dest) : dest_(dest) { }
+  ~UncheckedByteArraySink() override;
+  void Append(const char* data, size_t n) override;
+  char* GetAppendBuffer(size_t len, char* scratch) override;
+  char* GetAppendBufferVariable(
+      size_t min_size, size_t desired_size_hint, char* scratch,
+      size_t scratch_size, size_t* allocated_size) override;
+  void AppendAndTakeOwnership(
+      char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
+      void *deleter_arg) override;
+
+  // Return the current output pointer so that a caller can see how
+  // many bytes were produced.
+  // Note: this is not a Sink method.
+  char* CurrentDestination() const { return dest_; }
+ private:
+  char* dest_;
+};
+
+}  // namespace snappy
+
+#endif  // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
diff --git a/third_party/snappy/snappy-stubs-internal.cc b/third_party/snappy/snappy-stubs-internal.cc
new file mode 100644
index 0000000000..0bc8c2d344
--- /dev/null
+++ b/third_party/snappy/snappy-stubs-internal.cc
@@ -0,0 +1,42 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+#include <string>
+
+#include "snappy-stubs-internal.h"
+
+namespace snappy {
+
+void Varint::Append32(std::string* s, uint32_t value) {
+  char buf[Varint::kMax32];
+  const char* p = Varint::Encode32(buf, value);
+  s->append(buf, p - buf);
+}
+
+}  // namespace snappy
diff --git a/third_party/snappy/snappy-stubs-internal.h b/third_party/snappy/snappy-stubs-internal.h
new file mode 100644
index 0000000000..1548ed7ac7
--- /dev/null
+++ b/third_party/snappy/snappy-stubs-internal.h
@@ -0,0 +1,525 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Various stubs for the open-source version of Snappy.
+
+#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
+#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdint.h>
+
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+#include <string>
+
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif  // defined(_MSC_VER)
+
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \
+    __msan_unpoison((address), (size))
+#else
+#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */
+#endif  // __has_feature(memory_sanitizer)
+
+#include "snappy-stubs-public.h"
+
+// Used to enable 64-bit optimized versions of some routines.
+#if defined(__PPC64__) || defined(__powerpc64__)
+#define ARCH_PPC 1
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define ARCH_ARM 1
+#endif
+
+// Needed by OS X, among others.
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// The size of an array, if known at compile-time.
+// Will give unexpected results if used on a pointer.
+// We undefine it first, since some compilers already have a definition.
+#ifdef ARRAYSIZE
+#undef ARRAYSIZE
+#endif
+#define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))}
+
+// Static prediction hints.
+#if HAVE_BUILTIN_EXPECT
+#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#else
+#define SNAPPY_PREDICT_FALSE(x) x
+#define SNAPPY_PREDICT_TRUE(x) x
+#endif  // HAVE_BUILTIN_EXPECT
+
+// Inlining hints.
+#if HAVE_ATTRIBUTE_ALWAYS_INLINE
+#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
+#else
+#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+#endif  // HAVE_ATTRIBUTE_ALWAYS_INLINE
+
+// Stubbed version of ABSL_FLAG.
+//
+// In the open source version, flags can only be changed at compile time.
+#define SNAPPY_FLAG(flag_type, flag_name, default_value, help) \
+  flag_type FLAGS_ ## flag_name = default_value
+
+namespace snappy {
+
+// Stubbed version of absl::GetFlag().
+template <typename T>
+inline T GetFlag(T flag) { return flag; }
+
+static const uint32_t kuint32max = std::numeric_limits<uint32_t>::max();
+static const int64_t kint64max = std::numeric_limits<int64_t>::max();
+
+// Potentially unaligned loads and stores.
+
+inline uint16_t UNALIGNED_LOAD16(const void *p) {
+  // Compiles to a single movzx/ldrh on clang/gcc/msvc.
+  uint16_t v;
+  std::memcpy(&v, p, sizeof(v));
+  return v;
+}
+
+inline uint32_t UNALIGNED_LOAD32(const void *p) {
+  // Compiles to a single mov/ldr on clang/gcc/msvc.
+  uint32_t v;
+  std::memcpy(&v, p, sizeof(v));
+  return v;
+}
+
+inline uint64_t UNALIGNED_LOAD64(const void *p) {
+  // Compiles to a single mov/ldr on clang/gcc/msvc.
+  uint64_t v;
+  std::memcpy(&v, p, sizeof(v));
+  return v;
+}
+
+inline void UNALIGNED_STORE16(void *p, uint16_t v) {
+  // Compiles to a single mov/strh on clang/gcc/msvc.
+  std::memcpy(p, &v, sizeof(v));
+}
+
+inline void UNALIGNED_STORE32(void *p, uint32_t v) {
+  // Compiles to a single mov/str on clang/gcc/msvc.
+  std::memcpy(p, &v, sizeof(v));
+}
+
+inline void UNALIGNED_STORE64(void *p, uint64_t v) {
+  // Compiles to a single mov/str on clang/gcc/msvc.
+  std::memcpy(p, &v, sizeof(v));
+}
+
+// Convert to little-endian storage, opposite of network format.
+// Convert x from host to little endian: x = LittleEndian.FromHost(x);
+// convert x from little endian to host: x = LittleEndian.ToHost(x);
+//
+//  Store values into unaligned memory converting to little endian order:
+//    LittleEndian.Store16(p, x);
+//
+//  Load unaligned values stored in little endian converting to host order:
+//    x = LittleEndian.Load16(p);
+class LittleEndian {
+ public:
+  // Functions to do unaligned loads and stores in little-endian order.
+  static inline uint16_t Load16(const void *ptr) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
+    return (static_cast<uint16_t>(buffer[0])) |
+            (static_cast<uint16_t>(buffer[1]) << 8);
+#else
+    // memcpy() turns into a single instruction early in the optimization
+    // pipeline (relatively to a series of byte accesses). So, using memcpy
+    // instead of byte accesses may lead to better decisions in more stages of
+    // the optimization pipeline.
+    uint16_t value;
+    std::memcpy(&value, ptr, 2);
+    return value;
+#endif
+  }
+
+  static inline uint32_t Load32(const void *ptr) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
+    return (static_cast<uint32_t>(buffer[0])) |
+            (static_cast<uint32_t>(buffer[1]) << 8) |
+            (static_cast<uint32_t>(buffer[2]) << 16) |
+            (static_cast<uint32_t>(buffer[3]) << 24);
+#else
+    // See Load16() for the rationale of using memcpy().
+    uint32_t value;
+    std::memcpy(&value, ptr, 4);
+    return value;
+#endif
+  }
+
+  static inline uint64_t Load64(const void *ptr) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
+    return (static_cast<uint64_t>(buffer[0])) |
+            (static_cast<uint64_t>(buffer[1]) << 8) |
+            (static_cast<uint64_t>(buffer[2]) << 16) |
+            (static_cast<uint64_t>(buffer[3]) << 24) |
+            (static_cast<uint64_t>(buffer[4]) << 32) |
+            (static_cast<uint64_t>(buffer[5]) << 40) |
+            (static_cast<uint64_t>(buffer[6]) << 48) |
+            (static_cast<uint64_t>(buffer[7]) << 56);
+#else
+    // See Load16() for the rationale of using memcpy().
+    uint64_t value;
+    std::memcpy(&value, ptr, 8);
+    return value;
+#endif
+  }
+
+  static inline void Store16(void *dst, uint16_t value) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
+    buffer[0] = static_cast<uint8_t>(value);
+    buffer[1] = static_cast<uint8_t>(value >> 8);
+#else
+    // See Load16() for the rationale of using memcpy().
+    std::memcpy(dst, &value, 2);
+#endif
+  }
+
+  static void Store32(void *dst, uint32_t value) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
+    buffer[0] = static_cast<uint8_t>(value);
+    buffer[1] = static_cast<uint8_t>(value >> 8);
+    buffer[2] = static_cast<uint8_t>(value >> 16);
+    buffer[3] = static_cast<uint8_t>(value >> 24);
+#else
+    // See Load16() for the rationale of using memcpy().
+    std::memcpy(dst, &value, 4);
+#endif
+  }
+
+  static void Store64(void* dst, uint64_t value) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
+    buffer[0] = static_cast<uint8_t>(value);
+    buffer[1] = static_cast<uint8_t>(value >> 8);
+    buffer[2] = static_cast<uint8_t>(value >> 16);
+    buffer[3] = static_cast<uint8_t>(value >> 24);
+    buffer[4] = static_cast<uint8_t>(value >> 32);
+    buffer[5] = static_cast<uint8_t>(value >> 40);
+    buffer[6] = static_cast<uint8_t>(value >> 48);
+    buffer[7] = static_cast<uint8_t>(value >> 56);
+#else
+    // See Load16() for the rationale of using memcpy().
+    std::memcpy(dst, &value, 8);
+#endif
+  }
+
+  static inline constexpr bool IsLittleEndian() {
+#if SNAPPY_IS_BIG_ENDIAN
+    return false;
+#else
+    return true;
+#endif  // SNAPPY_IS_BIG_ENDIAN
+  }
+};
+
+// Some bit-manipulation functions.
+class Bits {
+ public:
+  // Return floor(log2(n)) for positive integer n.
+  static int Log2FloorNonZero(uint32_t n);
+
+  // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
+  static int Log2Floor(uint32_t n);
+
+  // Return the first set least / most significant bit, 0-indexed.  Returns an
+  // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
+  // that it's 0-indexed.
+  static int FindLSBSetNonZero(uint32_t n);
+
+  static int FindLSBSetNonZero64(uint64_t n);
+
+ private:
+  // No copying
+  Bits(const Bits&);
+  void operator=(const Bits&);
+};
+
+#if HAVE_BUILTIN_CTZ
+
+inline int Bits::Log2FloorNonZero(uint32_t n) {
+  assert(n != 0);
+  // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof
+  // represents subtraction in base 2 and observes that there's no carry.
+  //
+  // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x).
+  // Using "31 ^" here instead of "31 -" allows the optimizer to strip the
+  // function body down to _bit_scan_reverse(x).
+  return 31 ^ __builtin_clz(n);
+}
+
+inline int Bits::Log2Floor(uint32_t n) {
+  return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
+}
+
+inline int Bits::FindLSBSetNonZero(uint32_t n) {
+  assert(n != 0);
+  return __builtin_ctz(n);
+}
+
+#elif defined(_MSC_VER)
+
+inline int Bits::Log2FloorNonZero(uint32_t n) {
+  assert(n != 0);
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
+  unsigned long where;
+  _BitScanReverse(&where, n);
+  return static_cast<int>(where);
+}
+
+inline int Bits::Log2Floor(uint32_t n) {
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
+  unsigned long where;
+  if (_BitScanReverse(&where, n))
+    return static_cast<int>(where);
+  return -1;
+}
+
+inline int Bits::FindLSBSetNonZero(uint32_t n) {
+  assert(n != 0);
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
+  unsigned long where;
+  if (_BitScanForward(&where, n))
+    return static_cast<int>(where);
+  return 32;
+}
+
+#else  // Portable versions.
+
+inline int Bits::Log2FloorNonZero(uint32_t n) {
+  assert(n != 0);
+
+  int log = 0;
+  uint32_t value = n;
+  for (int i = 4; i >= 0; --i) {
+    int shift = (1 << i);
+    uint32_t x = value >> shift;
+    if (x != 0) {
+      value = x;
+      log += shift;
+    }
+  }
+  assert(value == 1);
+  return log;
+}
+
+inline int Bits::Log2Floor(uint32_t n) {
+  return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
+}
+
+inline int Bits::FindLSBSetNonZero(uint32_t n) {
+  assert(n != 0);
+
+  int rc = 31;
+  for (int i = 4, shift = 1 << 4; i >= 0; --i) {
+    const uint32_t x = n << shift;
+    if (x != 0) {
+      n = x;
+      rc -= shift;
+    }
+    shift >>= 1;
+  }
+  return rc;
+}
+
+#endif  // End portable versions.
+
+#if HAVE_BUILTIN_CTZ
+
+inline int Bits::FindLSBSetNonZero64(uint64_t n) {
+  assert(n != 0);
+  return __builtin_ctzll(n);
+}
+
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
+// _BitScanForward64() is only available on x64 and ARM64.
+
+inline int Bits::FindLSBSetNonZero64(uint64_t n) {
+  assert(n != 0);
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
+  unsigned long where;
+  if (_BitScanForward64(&where, n))
+    return static_cast<int>(where);
+  return 64;
+}
+
+#else  // Portable version.
+
+// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
+inline int Bits::FindLSBSetNonZero64(uint64_t n) {
+  assert(n != 0);
+
+  const uint32_t bottombits = static_cast<uint32_t>(n);
+  if (bottombits == 0) {
+    // Bottom bits are zero, so scan the top bits.
+    return 32 + FindLSBSetNonZero(static_cast<uint32_t>(n >> 32));
+  } else {
+    return FindLSBSetNonZero(bottombits);
+  }
+}
+
+#endif  // HAVE_BUILTIN_CTZ
+
+// Variable-length integer encoding.
+class Varint {
+ public:
+  // Maximum lengths of varint encoding of uint32_t.
+  static const int kMax32 = 5;
+
+  // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
+  // Never reads a character at or beyond limit.  If a valid/terminated varint32
+  // was found in the range, stores it in *OUTPUT and returns a pointer just
+  // past the last byte of the varint32. Else returns NULL.  On success,
+  // "result <= limit".
+  static const char* Parse32WithLimit(const char* ptr, const char* limit,
+                                      uint32_t* OUTPUT);
+
+  // REQUIRES   "ptr" points to a buffer of length sufficient to hold "v".
+  // EFFECTS    Encodes "v" into "ptr" and returns a pointer to the
+  //            byte just past the last encoded byte.
+  static char* Encode32(char* ptr, uint32_t v);
+
+  // EFFECTS    Appends the varint representation of "value" to "*s".
+  static void Append32(std::string* s, uint32_t value);
+};
+
+inline const char* Varint::Parse32WithLimit(const char* p,
+                                            const char* l,
+                                            uint32_t* OUTPUT) {
+  const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
+  const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
+  uint32_t b, result;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result = b & 127;          if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) <<  7; if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done;
+  return NULL;       // Value is too long to be a varint32
+ done:
+  *OUTPUT = result;
+  return reinterpret_cast<const char*>(ptr);
+}
+
+inline char* Varint::Encode32(char* sptr, uint32_t v) {
+  // Operate on characters as unsigneds
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(sptr);
+  static const uint8_t B = 128;
+  if (v < (1 << 7)) {
+    *(ptr++) = static_cast<uint8_t>(v);
+  } else if (v < (1 << 14)) {
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 7);
+  } else if (v < (1 << 21)) {
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>((v >> 7) | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 14);
+  } else if (v < (1 << 28)) {
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>((v >> 7) | B);
+    *(ptr++) = static_cast<uint8_t>((v >> 14) | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 21);
+  } else {
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>((v>>7) | B);
+    *(ptr++) = static_cast<uint8_t>((v>>14) | B);
+    *(ptr++) = static_cast<uint8_t>((v>>21) | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 28);
+  }
+  return reinterpret_cast<char*>(ptr);
+}
+
+// If you know the internal layout of the std::string in use, you can
+// replace this function with one that resizes the string without
+// filling the new space with zeros (if applicable) --
+// it will be non-portable but faster.
+inline void STLStringResizeUninitialized(std::string* s, size_t new_size) {
+  s->resize(new_size);
+}
+
+// Return a mutable char* pointing to a string's internal buffer,
+// which may not be null-terminated. Writing through this pointer will
+// modify the string.
+//
+// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
+// next call to a string method that invalidates iterators.
+//
+// As of 2006-04, there is no standard-blessed way of getting a
+// mutable reference to a string's internal buffer. However, issue 530
+// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
+// proposes this as the method. It will officially be part of the standard
+// for C++0x. This should already work on all current implementations.
+inline char* string_as_array(std::string* str) {
+  return str->empty() ? NULL : &*str->begin();
+}
+
+}  // namespace snappy
+
+#endif  // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
diff --git a/third_party/snappy/snappy-stubs-public.h.in b/third_party/snappy/snappy-stubs-public.h.in
new file mode 100644
index 0000000000..02947fabd5
--- /dev/null
+++ b/third_party/snappy/snappy-stubs-public.h.in
@@ -0,0 +1,63 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Various type stubs for the open-source version of Snappy.
+//
+// This file cannot include config.h, as it is included from snappy.h,
+// which is a public header. Instead, snappy-stubs-public.h is generated by
+// from snappy-stubs-public.h.in at configure time.
+
+#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
+#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
+
+#include <cstddef>
+
+#if ${HAVE_SYS_UIO_H_01}  // HAVE_SYS_UIO_H
+#include <sys/uio.h>
+#endif  // HAVE_SYS_UIO_H
+
+#define SNAPPY_MAJOR ${PROJECT_VERSION_MAJOR}
+#define SNAPPY_MINOR ${PROJECT_VERSION_MINOR}
+#define SNAPPY_PATCHLEVEL ${PROJECT_VERSION_PATCH}
+#define SNAPPY_VERSION \
+    ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
+
+namespace snappy {
+
+#if !${HAVE_SYS_UIO_H_01}  // !HAVE_SYS_UIO_H
+// Windows does not have an iovec type, yet the concept is universally useful.
+// It is simple to define it ourselves, so we put it inside our own namespace.
+struct iovec {
+  void* iov_base;
+  size_t iov_len;
+};
+#endif  // !HAVE_SYS_UIO_H
+
+}  // namespace snappy
+
+#endif  // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
diff --git a/third_party/snappy/snappy.cc b/third_party/snappy/snappy.cc
new file mode 100644
index 0000000000..d4147185d6
--- /dev/null
+++ b/third_party/snappy/snappy.cc
@@ -0,0 +1,2427 @@
+// Copyright 2005 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "snappy-internal.h"
+#include "snappy-sinksource.h"
+#include "snappy.h"
+#if !defined(SNAPPY_HAVE_BMI2)
+// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2
+// specifically, but it does define __AVX2__ when AVX2 support is available.
+// Fortunately, AVX2 was introduced in Haswell, just like BMI2.
+//
+// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So,
+// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which
+// case issuing BMI2 instructions results in a compiler error.
+#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
+#define SNAPPY_HAVE_BMI2 1
+#else
+#define SNAPPY_HAVE_BMI2 0
+#endif
+#endif  // !defined(SNAPPY_HAVE_BMI2)
+
+#if !defined(SNAPPY_HAVE_X86_CRC32)
+#if defined(__SSE4_2__)
+#define SNAPPY_HAVE_X86_CRC32 1
+#else
+#define SNAPPY_HAVE_X86_CRC32 0
+#endif
+#endif  // !defined(SNAPPY_HAVE_X86_CRC32)
+
+#if !defined(SNAPPY_HAVE_NEON_CRC32)
+#if SNAPPY_HAVE_NEON && defined(__ARM_FEATURE_CRC32)
+#define SNAPPY_HAVE_NEON_CRC32 1
+#else
+#define SNAPPY_HAVE_NEON_CRC32 0
+#endif
+#endif  // !defined(SNAPPY_HAVE_NEON_CRC32)
+
+#if SNAPPY_HAVE_BMI2 || SNAPPY_HAVE_X86_CRC32
+// Please do not replace with <x86intrin.h>. or with headers that assume more
+// advanced SSE versions without checking with all the OWNERS.
+#include <immintrin.h>
+#elif SNAPPY_HAVE_NEON_CRC32
+#include <arm_acle.h>
+#endif
+
+#if defined(__GNUC__)
+#define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3)
+#else
+#define SNAPPY_PREFETCH(ptr) (void)(ptr)
+#endif
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace snappy {
+
+namespace {
+
+// The amount of slop bytes writers are using for unconditional copies.
+constexpr int kSlopBytes = 64;
+
+using internal::char_table;
+using internal::COPY_1_BYTE_OFFSET;
+using internal::COPY_2_BYTE_OFFSET;
+using internal::COPY_4_BYTE_OFFSET;
+using internal::kMaximumTagLength;
+using internal::LITERAL;
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+using internal::V128;
+using internal::V128_Load;
+using internal::V128_LoadU;
+using internal::V128_Shuffle;
+using internal::V128_StoreU;
+using internal::V128_DupChar;
+#endif
+
+// We translate the information encoded in a tag through a lookup table to a
+// format that requires fewer instructions to decode. Effectively we store
+// the length minus the tag part of the offset. The lowest significant byte
+// thus stores the length. While total length - offset is given by
+// entry - ExtractOffset(type). The nice thing is that the subtraction
+// immediately sets the flags for the necessary check that offset >= length.
+// This folds the cmp with sub. We engineer the long literals and copy-4 to
+// always fail this check, so their presence doesn't affect the fast path.
+// To prevent literals from triggering the guard against offset < length (offset
+// does not apply to literals) the table is giving them a spurious offset of
+// 256.
+inline constexpr int16_t MakeEntry(int16_t len, int16_t offset) {
+  return len - (offset << 8);
+}
+
+inline constexpr int16_t LengthMinusOffset(int data, int type) {
+  return type == 3   ? 0xFF                    // copy-4 (or type == 3)
+         : type == 2 ? MakeEntry(data + 1, 0)  // copy-2
+         : type == 1 ? MakeEntry((data & 7) + 4, data >> 3)  // copy-1
+         : data < 60 ? MakeEntry(data + 1, 1)  // note spurious offset.
+                     : 0xFF;                   // long literal
+}
+
+inline constexpr int16_t LengthMinusOffset(uint8_t tag) {
+  return LengthMinusOffset(tag >> 2, tag & 3);
+}
+
+template <size_t... Ints>
+struct index_sequence {};
+
+template <std::size_t N, size_t... Is>
+struct make_index_sequence : make_index_sequence<N - 1, N - 1, Is...> {};
+
+template <size_t... Is>
+struct make_index_sequence<0, Is...> : index_sequence<Is...> {};
+
+template <size_t... seq>
+constexpr std::array<int16_t, 256> MakeTable(index_sequence<seq...>) {
+  return std::array<int16_t, 256>{LengthMinusOffset(seq)...};
+}
+
+alignas(64) const std::array<int16_t, 256> kLengthMinusOffset =
+    MakeTable(make_index_sequence<256>{});
+
+// Given a table of uint16_t whose size is mask / 2 + 1, return a pointer to the
+// relevant entry, if any, for the given bytes.  Any hash function will do,
+// but a good hash function reduces the number of collisions and thus yields
+// better compression for compressible input.
+//
+// REQUIRES: mask is 2 * (table_size - 1), and table_size is a power of two.
+inline uint16_t* TableEntry(uint16_t* table, uint32_t bytes, uint32_t mask) {
+  // Our choice is quicker-and-dirtier than the typical hash function;
+  // empirically, that seems beneficial.  The upper bits of kMagic * bytes are a
+  // higher-quality hash than the lower bits, so when using kMagic * bytes we
+  // also shift right to get a higher-quality end result.  There's no similar
+  // issue with a CRC because all of the output bits of a CRC are equally good
+  // "hashes." So, a CPU instruction for CRC, if available, tends to be a good
+  // choice.
+#if SNAPPY_HAVE_NEON_CRC32
+  // We use mask as the second arg to the CRC function, as it's about to
+  // be used anyway; it'd be equally correct to use 0 or some constant.
+  // Mathematically, _mm_crc32_u32 (or similar) is a function of the
+  // xor of its arguments.
+  const uint32_t hash = __crc32cw(bytes, mask);
+#elif SNAPPY_HAVE_X86_CRC32
+  const uint32_t hash = _mm_crc32_u32(bytes, mask);
+#else
+  constexpr uint32_t kMagic = 0x1e35a7bd;
+  const uint32_t hash = (kMagic * bytes) >> (31 - kMaxHashTableBits);
+#endif
+  return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
+                                     (hash & mask));
+}
+
+}  // namespace
+
+size_t MaxCompressedLength(size_t source_bytes) {
+  // Compressed data can be defined as:
+  //    compressed := item* literal*
+  //    item       := literal* copy
+  //
+  // The trailing literal sequence has a space blowup of at most 62/60
+  // since a literal of length 60 needs one tag byte + one extra byte
+  // for length information.
+  //
+  // Item blowup is trickier to measure.  Suppose the "copy" op copies
+  // 4 bytes of data.  Because of a special check in the encoding code,
+  // we produce a 4-byte copy only if the offset is < 65536.  Therefore
+  // the copy op takes 3 bytes to encode, and this type of item leads
+  // to at most the 62/60 blowup for representing literals.
+  //
+  // Suppose the "copy" op copies 5 bytes of data.  If the offset is big
+  // enough, it will take 5 bytes to encode the copy op.  Therefore the
+  // worst case here is a one-byte literal followed by a five-byte copy.
+  // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
+  //
+  // This last factor dominates the blowup, so the final estimate is:
+  return 32 + source_bytes + source_bytes / 6;
+}
+
+namespace {
+
+void UnalignedCopy64(const void* src, void* dst) {
+  char tmp[8];
+  std::memcpy(tmp, src, 8);
+  std::memcpy(dst, tmp, 8);
+}
+
+void UnalignedCopy128(const void* src, void* dst) {
+  // std::memcpy() gets vectorized when the appropriate compiler options are
+  // used. For example, x86 compilers targeting SSE2+ will optimize to an SSE2
+  // load and store.
+  char tmp[16];
+  std::memcpy(tmp, src, 16);
+  std::memcpy(dst, tmp, 16);
+}
+
+template <bool use_16bytes_chunk>
+inline void ConditionalUnalignedCopy128(const char* src, char* dst) {
+  if (use_16bytes_chunk) {
+    UnalignedCopy128(src, dst);
+  } else {
+    UnalignedCopy64(src, dst);
+    UnalignedCopy64(src + 8, dst + 8);
+  }
+}
+
+// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
+// for handling COPY operations where the input and output regions may overlap.
+// For example, suppose:
+//    src       == "ab"
+//    op        == src + 2
+//    op_limit  == op + 20
+// After IncrementalCopySlow(src, op, op_limit), the result will have eleven
+// copies of "ab"
+//    ababababababababababab
+// Note that this does not match the semantics of either std::memcpy() or
+// std::memmove().
+inline char* IncrementalCopySlow(const char* src, char* op,
+                                 char* const op_limit) {
+  // TODO: Remove pragma when LLVM is aware this
+  // function is only called in cold regions and when cold regions don't get
+  // vectorized or unrolled.
+#ifdef __clang__
+#pragma clang loop unroll(disable)
+#endif
+  while (op < op_limit) {
+    *op++ = *src++;
+  }
+  return op_limit;
+}
+
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+
+// Computes the bytes for shuffle control mask (please read comments on
+// 'pattern_generation_masks' as well) for the given index_offset and
+// pattern_size. For example, when the 'offset' is 6, it will generate a
+// repeating pattern of size 6. So, the first 16 byte indexes will correspond to
+// the pattern-bytes {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3} and the
+// next 16 byte indexes will correspond to the pattern-bytes {4, 5, 0, 1, 2, 3,
+// 4, 5, 0, 1, 2, 3, 4, 5, 0, 1}. These byte index sequences are generated by
+// calling MakePatternMaskBytes(0, 6, index_sequence<16>()) and
+// MakePatternMaskBytes(16, 6, index_sequence<16>()) respectively.
+template <size_t... indexes>
+inline constexpr std::array<char, sizeof...(indexes)> MakePatternMaskBytes(
+    int index_offset, int pattern_size, index_sequence<indexes...>) {
+  return {static_cast<char>((index_offset + indexes) % pattern_size)...};
+}
+
+// Computes the shuffle control mask bytes array for given pattern-sizes and
+// returns an array.
+template <size_t... pattern_sizes_minus_one>
+inline constexpr std::array<std::array<char, sizeof(V128)>,
+                            sizeof...(pattern_sizes_minus_one)>
+MakePatternMaskBytesTable(int index_offset,
+                          index_sequence<pattern_sizes_minus_one...>) {
+  return {
+      MakePatternMaskBytes(index_offset, pattern_sizes_minus_one + 1,
+                           make_index_sequence</*indexes=*/sizeof(V128)>())...};
+}
+
+// This is an array of shuffle control masks that can be used as the source
+// operand for PSHUFB to permute the contents of the destination XMM register
+// into a repeating byte pattern.
+alignas(16) constexpr std::array<std::array<char, sizeof(V128)>,
+                                 16> pattern_generation_masks =
+    MakePatternMaskBytesTable(
+        /*index_offset=*/0,
+        /*pattern_sizes_minus_one=*/make_index_sequence<16>());
+
+// Similar to 'pattern_generation_masks', this table is used to "rotate" the
+// pattern so that we can copy the *next 16 bytes* consistent with the pattern.
+// Basically, pattern_reshuffle_masks is a continuation of
+// pattern_generation_masks. It follows that, pattern_reshuffle_masks is same as
+// pattern_generation_masks for offsets 1, 2, 4, 8 and 16.
+alignas(16) constexpr std::array<std::array<char, sizeof(V128)>,
+                                 16> pattern_reshuffle_masks =
+    MakePatternMaskBytesTable(
+        /*index_offset=*/16,
+        /*pattern_sizes_minus_one=*/make_index_sequence<16>());
+
+SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+static inline V128 LoadPattern(const char* src, const size_t pattern_size) {
+  V128 generation_mask = V128_Load(reinterpret_cast<const V128*>(
+      pattern_generation_masks[pattern_size - 1].data()));
+  // Uninitialized bytes are masked out by the shuffle mask.
+  // TODO: remove annotation and macro defs once MSan is fixed.
+  SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(src + pattern_size, 16 - pattern_size);
+  return V128_Shuffle(V128_LoadU(reinterpret_cast<const V128*>(src)),
+                      generation_mask);
+}
+
+SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+static inline std::pair<V128 /* pattern */, V128 /* reshuffle_mask */>
+LoadPatternAndReshuffleMask(const char* src, const size_t pattern_size) {
+  V128 pattern = LoadPattern(src, pattern_size);
+
+  // This mask will generate the next 16 bytes in-place. Doing so enables us to
+  // write data by at most 4 V128_StoreU.
+  //
+  // For example, suppose pattern is:        abcdefabcdefabcd
+  // Shuffling with this mask will generate: efabcdefabcdefab
+  // Shuffling again will generate:          cdefabcdefabcdef
+  V128 reshuffle_mask = V128_Load(reinterpret_cast<const V128*>(
+      pattern_reshuffle_masks[pattern_size - 1].data()));
+  return {pattern, reshuffle_mask};
+}
+
+#endif  // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+
+// Fallback for when we need to copy while extending the pattern, for example
+// copying 10 bytes from 3 positions back abc -> abcabcabcabca.
+//
+// REQUIRES: [dst - offset, dst + 64) is a valid address range.
+SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+static inline bool Copy64BytesWithPatternExtension(char* dst, size_t offset) {
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+  if (SNAPPY_PREDICT_TRUE(offset <= 16)) {
+    switch (offset) {
+      case 0:
+        return false;
+      case 1: {
+        // TODO: Ideally we should memset, move back once the
+        // codegen issues are fixed.
+        V128 pattern = V128_DupChar(dst[-1]);
+        for (int i = 0; i < 4; i++) {
+          V128_StoreU(reinterpret_cast<V128*>(dst + 16 * i), pattern);
+        }
+        return true;
+      }
+      case 2:
+      case 4:
+      case 8:
+      case 16: {
+        V128 pattern = LoadPattern(dst - offset, offset);
+        for (int i = 0; i < 4; i++) {
+          V128_StoreU(reinterpret_cast<V128*>(dst + 16 * i), pattern);
+        }
+        return true;
+      }
+      default: {
+        auto pattern_and_reshuffle_mask =
+            LoadPatternAndReshuffleMask(dst - offset, offset);
+        V128 pattern = pattern_and_reshuffle_mask.first;
+        V128 reshuffle_mask = pattern_and_reshuffle_mask.second;
+        for (int i = 0; i < 4; i++) {
+          V128_StoreU(reinterpret_cast<V128*>(dst + 16 * i), pattern);
+          pattern = V128_Shuffle(pattern, reshuffle_mask);
+        }
+        return true;
+      }
+    }
+  }
+#else
+  if (SNAPPY_PREDICT_TRUE(offset < 16)) {
+    if (SNAPPY_PREDICT_FALSE(offset == 0)) return false;
+    // Extend the pattern to the first 16 bytes.
+    // The simpler formulation of `dst[i - offset]` induces undefined behavior.
+    for (int i = 0; i < 16; i++) dst[i] = (dst - offset)[i];
+    // Find a multiple of pattern >= 16.
+    static std::array<uint8_t, 16> pattern_sizes = []() {
+      std::array<uint8_t, 16> res;
+      for (int i = 1; i < 16; i++) res[i] = (16 / i + 1) * i;
+      return res;
+    }();
+    offset = pattern_sizes[offset];
+    for (int i = 1; i < 4; i++) {
+      std::memcpy(dst + i * 16, dst + i * 16 - offset, 16);
+    }
+    return true;
+  }
+#endif  // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+
+  // Very rare.
+  for (int i = 0; i < 4; i++) {
+    std::memcpy(dst + i * 16, dst + i * 16 - offset, 16);
+  }
+  return true;
+}
+
+// Copy [src, src+(op_limit-op)) to [op, op_limit) but faster than
+// IncrementalCopySlow. buf_limit is the address past the end of the writable
+// region of the buffer.
+inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
+                             char* const buf_limit) {
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+  constexpr int big_pattern_size_lower_bound = 16;
+#else
+  constexpr int big_pattern_size_lower_bound = 8;
+#endif
+
+  // Terminology:
+  //
+  // slop = buf_limit - op
+  // pat  = op - src
+  // len  = op_limit - op
+  assert(src < op);
+  assert(op < op_limit);
+  assert(op_limit <= buf_limit);
+  // NOTE: The copy tags use 3 or 6 bits to store the copy length, so len <= 64.
+  assert(op_limit - op <= 64);
+  // NOTE: In practice the compressor always emits len >= 4, so it is ok to
+  // assume that to optimize this function, but this is not guaranteed by the
+  // compression format, so we have to also handle len < 4 in case the input
+  // does not satisfy these conditions.
+
+  size_t pattern_size = op - src;
+  // The cases are split into different branches to allow the branch predictor,
+  // FDO, and static prediction hints to work better. For each input we list the
+  // ratio of invocations that match each condition.
+  //
+  // input        slop < 16   pat < 8  len > 16
+  // ------------------------------------------
+  // html|html4|cp   0%         1.01%    27.73%
+  // urls            0%         0.88%    14.79%
+  // jpg             0%        64.29%     7.14%
+  // pdf             0%         2.56%    58.06%
+  // txt[1-4]        0%         0.23%     0.97%
+  // pb              0%         0.96%    13.88%
+  // bin             0.01%     22.27%    41.17%
+  //
+  // It is very rare that we don't have enough slop for doing block copies. It
+  // is also rare that we need to expand a pattern. Small patterns are common
+  // for incompressible formats and for those we are plenty fast already.
+  // Lengths are normally not greater than 16 but they vary depending on the
+  // input. In general if we always predict len <= 16 it would be an ok
+  // prediction.
+  //
+  // In order to be fast we want a pattern >= 16 bytes (or 8 bytes in non-SSE)
+  // and an unrolled loop copying 1x 16 bytes (or 2x 8 bytes in non-SSE) at a
+  // time.
+
+  // Handle the uncommon case where pattern is less than 16 (or 8 in non-SSE)
+  // bytes.
+  if (pattern_size < big_pattern_size_lower_bound) {
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+    // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB
+    // to permute the register's contents in-place into a repeating sequence of
+    // the first "pattern_size" bytes.
+    // For example, suppose:
+    //    src       == "abc"
+    //    op        == op + 3
+    // After V128_Shuffle(), "pattern" will have five copies of "abc"
+    // followed by one byte of slop: abcabcabcabcabca.
+    //
+    // The non-SSE fallback implementation suffers from store-forwarding stalls
+    // because its loads and stores partly overlap. By expanding the pattern
+    // in-place, we avoid the penalty.
+
+    // Typically, the op_limit is the gating factor so try to simplify the loop
+    // based on that.
+    if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 15)) {
+      auto pattern_and_reshuffle_mask =
+          LoadPatternAndReshuffleMask(src, pattern_size);
+      V128 pattern = pattern_and_reshuffle_mask.first;
+      V128 reshuffle_mask = pattern_and_reshuffle_mask.second;
+
+      // There is at least one, and at most four 16-byte blocks. Writing four
+      // conditionals instead of a loop allows FDO to layout the code with
+      // respect to the actual probabilities of each length.
+      // TODO: Replace with loop with trip count hint.
+      V128_StoreU(reinterpret_cast<V128*>(op), pattern);
+
+      if (op + 16 < op_limit) {
+        pattern = V128_Shuffle(pattern, reshuffle_mask);
+        V128_StoreU(reinterpret_cast<V128*>(op + 16), pattern);
+      }
+      if (op + 32 < op_limit) {
+        pattern = V128_Shuffle(pattern, reshuffle_mask);
+        V128_StoreU(reinterpret_cast<V128*>(op + 32), pattern);
+      }
+      if (op + 48 < op_limit) {
+        pattern = V128_Shuffle(pattern, reshuffle_mask);
+        V128_StoreU(reinterpret_cast<V128*>(op + 48), pattern);
+      }
+      return op_limit;
+    }
+    char* const op_end = buf_limit - 15;
+    if (SNAPPY_PREDICT_TRUE(op < op_end)) {
+      auto pattern_and_reshuffle_mask =
+          LoadPatternAndReshuffleMask(src, pattern_size);
+      V128 pattern = pattern_and_reshuffle_mask.first;
+      V128 reshuffle_mask = pattern_and_reshuffle_mask.second;
+
+      // This code path is relatively cold however so we save code size
+      // by avoiding unrolling and vectorizing.
+      //
+      // TODO: Remove pragma when when cold regions don't get
+      // vectorized or unrolled.
+#ifdef __clang__
+#pragma clang loop unroll(disable)
+#endif
+      do {
+        V128_StoreU(reinterpret_cast<V128*>(op), pattern);
+        pattern = V128_Shuffle(pattern, reshuffle_mask);
+        op += 16;
+      } while (SNAPPY_PREDICT_TRUE(op < op_end));
+    }
+    return IncrementalCopySlow(op - pattern_size, op, op_limit);
+#else   // !SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+    // If plenty of buffer space remains, expand the pattern to at least 8
+    // bytes. The way the following loop is written, we need 8 bytes of buffer
+    // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10
+    // bytes if pattern_size is 2.  Precisely encoding that is probably not
+    // worthwhile; instead, invoke the slow path if we cannot write 11 bytes
+    // (because 11 are required in the worst case).
+    if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) {
+      while (pattern_size < 8) {
+        UnalignedCopy64(src, op);
+        op += pattern_size;
+        pattern_size *= 2;
+      }
+      if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
+    } else {
+      return IncrementalCopySlow(src, op, op_limit);
+    }
+#endif  // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+  }
+  assert(pattern_size >= big_pattern_size_lower_bound);
+  constexpr bool use_16bytes_chunk = big_pattern_size_lower_bound == 16;
+
+  // Copy 1x 16 bytes (or 2x 8 bytes in non-SSE) at a time. Because op - src can
+  // be < 16 in non-SSE, a single UnalignedCopy128 might overwrite data in op.
+  // UnalignedCopy64 is safe because expanding the pattern to at least 8 bytes
+  // guarantees that op - src >= 8.
+  //
+  // Typically, the op_limit is the gating factor so try to simplify the loop
+  // based on that.
+  if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 15)) {
+    // There is at least one, and at most four 16-byte blocks. Writing four
+    // conditionals instead of a loop allows FDO to layout the code with respect
+    // to the actual probabilities of each length.
+    // TODO: Replace with loop with trip count hint.
+    ConditionalUnalignedCopy128<use_16bytes_chunk>(src, op);
+    if (op + 16 < op_limit) {
+      ConditionalUnalignedCopy128<use_16bytes_chunk>(src + 16, op + 16);
+    }
+    if (op + 32 < op_limit) {
+      ConditionalUnalignedCopy128<use_16bytes_chunk>(src + 32, op + 32);
+    }
+    if (op + 48 < op_limit) {
+      ConditionalUnalignedCopy128<use_16bytes_chunk>(src + 48, op + 48);
+    }
+    return op_limit;
+  }
+
+  // Fall back to doing as much as we can with the available slop in the
+  // buffer. This code path is relatively cold however so we save code size by
+  // avoiding unrolling and vectorizing.
+  //
+  // TODO: Remove pragma when when cold regions don't get vectorized
+  // or unrolled.
+#ifdef __clang__
+#pragma clang loop unroll(disable)
+#endif
+  for (char* op_end = buf_limit - 16; op < op_end; op += 16, src += 16) {
+    ConditionalUnalignedCopy128<use_16bytes_chunk>(src, op);
+  }
+  if (op >= op_limit) return op_limit;
+
+  // We only take this branch if we didn't have enough slop and we can do a
+  // single 8 byte copy.
+  if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
+    UnalignedCopy64(src, op);
+    src += 8;
+    op += 8;
+  }
+  return IncrementalCopySlow(src, op, op_limit);
+}
+
+}  // namespace
+
+template <bool allow_fast_path>
+static inline char* EmitLiteral(char* op, const char* literal, int len) {
+  // The vast majority of copies are below 16 bytes, for which a
+  // call to std::memcpy() is overkill. This fast path can sometimes
+  // copy up to 15 bytes too much, but that is okay in the
+  // main loop, since we have a bit to go on for both sides:
+  //
+  //   - The input will always have kInputMarginBytes = 15 extra
+  //     available bytes, as long as we're in the main loop, and
+  //     if not, allow_fast_path = false.
+  //   - The output will always have 32 spare bytes (see
+  //     MaxCompressedLength).
+  assert(len > 0);  // Zero-length literals are disallowed
+  int n = len - 1;
+  if (allow_fast_path && len <= 16) {
+    // Fits in tag byte
+    *op++ = LITERAL | (n << 2);
+
+    UnalignedCopy128(literal, op);
+    return op + len;
+  }
+
+  if (n < 60) {
+    // Fits in tag byte
+    *op++ = LITERAL | (n << 2);
+  } else {
+    int count = (Bits::Log2Floor(n) >> 3) + 1;
+    assert(count >= 1);
+    assert(count <= 4);
+    *op++ = LITERAL | ((59 + count) << 2);
+    // Encode in upcoming bytes.
+    // Write 4 bytes, though we may care about only 1 of them. The output buffer
+    // is guaranteed to have at least 3 more spaces left as 'len >= 61' holds
+    // here and there is a std::memcpy() of size 'len' below.
+    LittleEndian::Store32(op, n);
+    op += count;
+  }
+  // When allow_fast_path is true, we can overwrite up to 16 bytes.
+  if (allow_fast_path) {
+    char* destination = op;
+    const char* source = literal;
+    const char* end = destination + len;
+    do {
+      std::memcpy(destination, source, 16);
+      destination += 16;
+      source += 16;
+    } while (destination < end);
+  } else {
+    std::memcpy(op, literal, len);
+  }
+  return op + len;
+}
+
+template <bool len_less_than_12>
+static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) {
+  assert(len <= 64);
+  assert(len >= 4);
+  assert(offset < 65536);
+  assert(len_less_than_12 == (len < 12));
+
+  if (len_less_than_12) {
+    uint32_t u = (len << 2) + (offset << 8);
+    uint32_t copy1 = COPY_1_BYTE_OFFSET - (4 << 2) + ((offset >> 3) & 0xe0);
+    uint32_t copy2 = COPY_2_BYTE_OFFSET - (1 << 2);
+    // It turns out that offset < 2048 is a difficult to predict branch.
+    // `perf record` shows this is the highest percentage of branch misses in
+    // benchmarks. This code produces branch free code, the data dependency
+    // chain that bottlenecks the throughput is so long that a few extra
+    // instructions are completely free (IPC << 6 because of data deps).
+    u += offset < 2048 ? copy1 : copy2;
+    LittleEndian::Store32(op, u);
+    op += offset < 2048 ? 2 : 3;
+  } else {
+    // Write 4 bytes, though we only care about 3 of them.  The output buffer
+    // is required to have some slack, so the extra byte won't overrun it.
+    uint32_t u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
+    LittleEndian::Store32(op, u);
+    op += 3;
+  }
+  return op;
+}
+
+template <bool len_less_than_12>
+static inline char* EmitCopy(char* op, size_t offset, size_t len) {
+  assert(len_less_than_12 == (len < 12));
+  if (len_less_than_12) {
+    return EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
+  } else {
+    // A special case for len <= 64 might help, but so far measurements suggest
+    // it's in the noise.
+
+    // Emit 64 byte copies but make sure to keep at least four bytes reserved.
+    while (SNAPPY_PREDICT_FALSE(len >= 68)) {
+      op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 64);
+      len -= 64;
+    }
+
+    // One or two copies will now finish the job.
+    if (len > 64) {
+      op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 60);
+      len -= 60;
+    }
+
+    // Emit remainder.
+    if (len < 12) {
+      op = EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
+    } else {
+      op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, len);
+    }
+    return op;
+  }
+}
+
+bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
+  uint32_t v = 0;
+  const char* limit = start + n;
+  if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
+    *result = v;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+namespace {
+uint32_t CalculateTableSize(uint32_t input_size) {
+  static_assert(
+      kMaxHashTableSize >= kMinHashTableSize,
+      "kMaxHashTableSize should be greater or equal to kMinHashTableSize.");
+  if (input_size > kMaxHashTableSize) {
+    return kMaxHashTableSize;
+  }
+  if (input_size < kMinHashTableSize) {
+    return kMinHashTableSize;
+  }
+  // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1.
+  // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)).
+  return 2u << Bits::Log2Floor(input_size - 1);
+}
+}  // namespace
+
+namespace internal {
+WorkingMemory::WorkingMemory(size_t input_size) {
+  const size_t max_fragment_size = std::min(input_size, kBlockSize);
+  const size_t table_size = CalculateTableSize(max_fragment_size);
+  size_ = table_size * sizeof(*table_) + max_fragment_size +
+          MaxCompressedLength(max_fragment_size);
+  mem_ = std::allocator<char>().allocate(size_);
+  table_ = reinterpret_cast<uint16_t*>(mem_);
+  input_ = mem_ + table_size * sizeof(*table_);
+  output_ = input_ + max_fragment_size;
+}
+
+WorkingMemory::~WorkingMemory() {
+  std::allocator<char>().deallocate(mem_, size_);
+}
+
+uint16_t* WorkingMemory::GetHashTable(size_t fragment_size,
+                                      int* table_size) const {
+  const size_t htsize = CalculateTableSize(fragment_size);
+  memset(table_, 0, htsize * sizeof(*table_));
+  *table_size = htsize;
+  return table_;
+}
+}  // end namespace internal
+
+// Flat array compression that does not emit the "uncompressed length"
+// prefix. Compresses "input" string to the "*op" buffer.
+//
+// REQUIRES: "input" is at most "kBlockSize" bytes long.
+// REQUIRES: "op" points to an array of memory that is at least
+// "MaxCompressedLength(input.size())" in size.
+// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+// REQUIRES: "table_size" is a power of two
+//
+// Returns an "end" pointer into "op" buffer.
+// "end - op" is the compressed size of "input".
+namespace internal {
+char* CompressFragment(const char* input, size_t input_size, char* op,
+                       uint16_t* table, const int table_size) {
+  // "ip" is the input pointer, and "op" is the output pointer.
+  const char* ip = input;
+  assert(input_size <= kBlockSize);
+  assert((table_size & (table_size - 1)) == 0);  // table must be power of two
+  const uint32_t mask = 2 * (table_size - 1);
+  const char* ip_end = input + input_size;
+  const char* base_ip = ip;
+
+  const size_t kInputMarginBytes = 15;
+  if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
+    const char* ip_limit = input + input_size - kInputMarginBytes;
+
+    for (uint32_t preload = LittleEndian::Load32(ip + 1);;) {
+      // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
+      // [next_emit, ip_end) after the main loop.
+      const char* next_emit = ip++;
+      uint64_t data = LittleEndian::Load64(ip);
+      // The body of this loop calls EmitLiteral once and then EmitCopy one or
+      // more times.  (The exception is that when we're close to exhausting
+      // the input we goto emit_remainder.)
+      //
+      // In the first iteration of this loop we're just starting, so
+      // there's nothing to copy, so calling EmitLiteral once is
+      // necessary.  And we only start a new iteration when the
+      // current iteration has determined that a call to EmitLiteral will
+      // precede the next call to EmitCopy (if any).
+      //
+      // Step 1: Scan forward in the input looking for a 4-byte-long match.
+      // If we get close to exhausting the input then goto emit_remainder.
+      //
+      // Heuristic match skipping: If 32 bytes are scanned with no matches
+      // found, start looking only at every other byte. If 32 more bytes are
+      // scanned (or skipped), look at every third byte, etc.. When a match is
+      // found, immediately go back to looking at every byte. This is a small
+      // loss (~5% performance, ~0.1% density) for compressible data due to more
+      // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+      // win since the compressor quickly "realizes" the data is incompressible
+      // and doesn't bother looking for matches everywhere.
+      //
+      // The "skip" variable keeps track of how many bytes there are since the
+      // last match; dividing it by 32 (ie. right-shifting by five) gives the
+      // number of bytes to move ahead for each iteration.
+      uint32_t skip = 32;
+
+      const char* candidate;
+      if (ip_limit - ip >= 16) {
+        auto delta = ip - base_ip;
+        for (int j = 0; j < 4; ++j) {
+          for (int k = 0; k < 4; ++k) {
+            int i = 4 * j + k;
+            // These for-loops are meant to be unrolled. So we can freely
+            // special case the first iteration to use the value already
+            // loaded in preload.
+            uint32_t dword = i == 0 ? preload : static_cast<uint32_t>(data);
+            assert(dword == LittleEndian::Load32(ip + i));
+            uint16_t* table_entry = TableEntry(table, dword, mask);
+            candidate = base_ip + *table_entry;
+            assert(candidate >= base_ip);
+            assert(candidate < ip + i);
+            *table_entry = delta + i;
+            if (SNAPPY_PREDICT_FALSE(LittleEndian::Load32(candidate) == dword)) {
+              *op = LITERAL | (i << 2);
+              UnalignedCopy128(next_emit, op + 1);
+              ip += i;
+              op = op + i + 2;
+              goto emit_match;
+            }
+            data >>= 8;
+          }
+          data = LittleEndian::Load64(ip + 4 * j + 4);
+        }
+        ip += 16;
+        skip += 16;
+      }
+      while (true) {
+        assert(static_cast<uint32_t>(data) == LittleEndian::Load32(ip));
+        uint16_t* table_entry = TableEntry(table, data, mask);
+        uint32_t bytes_between_hash_lookups = skip >> 5;
+        skip += bytes_between_hash_lookups;
+        const char* next_ip = ip + bytes_between_hash_lookups;
+        if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
+          ip = next_emit;
+          goto emit_remainder;
+        }
+        candidate = base_ip + *table_entry;
+        assert(candidate >= base_ip);
+        assert(candidate < ip);
+
+        *table_entry = ip - base_ip;
+        if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
+                                LittleEndian::Load32(candidate))) {
+          break;
+        }
+        data = LittleEndian::Load32(next_ip);
+        ip = next_ip;
+      }
+
+      // Step 2: A 4-byte match has been found.  We'll later see if more
+      // than 4 bytes match.  But, prior to the match, input
+      // bytes [next_emit, ip) are unmatched.  Emit them as "literal bytes."
+      assert(next_emit + 16 <= ip_end);
+      op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit, ip - next_emit);
+
+      // Step 3: Call EmitCopy, and then see if another EmitCopy could
+      // be our next move.  Repeat until we find no match for the
+      // input immediately after what was consumed by the last EmitCopy call.
+      //
+      // If we exit this loop normally then we need to call EmitLiteral next,
+      // though we don't yet know how big the literal will be.  We handle that
+      // by proceeding to the next iteration of the main loop.  We also can exit
+      // this loop via goto if we get close to exhausting the input.
+    emit_match:
+      do {
+        // We have a 4-byte match at ip, and no need to emit any
+        // "literal bytes" prior to ip.
+        const char* base = ip;
+        std::pair<size_t, bool> p =
+            FindMatchLength(candidate + 4, ip + 4, ip_end, &data);
+        size_t matched = 4 + p.first;
+        ip += matched;
+        size_t offset = base - candidate;
+        assert(0 == memcmp(base, candidate, matched));
+        if (p.second) {
+          op = EmitCopy</*len_less_than_12=*/true>(op, offset, matched);
+        } else {
+          op = EmitCopy</*len_less_than_12=*/false>(op, offset, matched);
+        }
+        if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        // Expect 5 bytes to match
+        assert((data & 0xFFFFFFFFFF) ==
+               (LittleEndian::Load64(ip) & 0xFFFFFFFFFF));
+        // We are now looking for a 4-byte match again.  We read
+        // table[Hash(ip, mask)] for that.  To improve compression,
+        // we also update table[Hash(ip - 1, mask)] and table[Hash(ip, mask)].
+        *TableEntry(table, LittleEndian::Load32(ip - 1), mask) =
+            ip - base_ip - 1;
+        uint16_t* table_entry = TableEntry(table, data, mask);
+        candidate = base_ip + *table_entry;
+        *table_entry = ip - base_ip;
+        // Measurements on the benchmarks have shown the following probabilities
+        // for the loop to exit (ie. avg. number of iterations is reciprocal).
+        // BM_Flat/6  txt1    p = 0.3-0.4
+        // BM_Flat/7  txt2    p = 0.35
+        // BM_Flat/8  txt3    p = 0.3-0.4
+        // BM_Flat/9  txt3    p = 0.34-0.4
+        // BM_Flat/10 pb      p = 0.4
+        // BM_Flat/11 gaviota p = 0.1
+        // BM_Flat/12 cp      p = 0.5
+        // BM_Flat/13 c       p = 0.3
+      } while (static_cast<uint32_t>(data) == LittleEndian::Load32(candidate));
+      // Because the least significant 5 bytes matched, we can utilize data
+      // for the next iteration.
+      preload = data >> 8;
+    }
+  }
+
+emit_remainder:
+  // Emit the remaining bytes as a literal
+  if (ip < ip_end) {
+    op = EmitLiteral</*allow_fast_path=*/false>(op, ip, ip_end - ip);
+  }
+
+  return op;
+}
+}  // end namespace internal
+
+// Called back at avery compression call to trace parameters and sizes.
+static inline void Report(const char *algorithm, size_t compressed_size,
+                          size_t uncompressed_size) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)algorithm;
+  (void)compressed_size;
+  (void)uncompressed_size;
+}
+
+// Signature of output types needed by decompression code.
+// The decompression code is templatized on a type that obeys this
+// signature so that we do not pay virtual function call overhead in
+// the middle of a tight decompression loop.
+//
+// class DecompressionWriter {
+//  public:
+//   // Called before decompression
+//   void SetExpectedLength(size_t length);
+//
+//   // For performance a writer may choose to donate the cursor variable to the
+//   // decompression function. The decompression will inject it in all its
+//   // function calls to the writer. Keeping the important output cursor as a
+//   // function local stack variable allows the compiler to keep it in
+//   // register, which greatly aids performance by avoiding loads and stores of
+//   // this variable in the fast path loop iterations.
+//   T GetOutputPtr() const;
+//
+//   // At end of decompression the loop donates the ownership of the cursor
+//   // variable back to the writer by calling this function.
+//   void SetOutputPtr(T op);
+//
+//   // Called after decompression
+//   bool CheckLength() const;
+//
+//   // Called repeatedly during decompression
+//   // Each function get a pointer to the op (output pointer), that the writer
+//   // can use and update. Note it's important that these functions get fully
+//   // inlined so that no actual address of the local variable needs to be
+//   // taken.
+//   bool Append(const char* ip, size_t length, T* op);
+//   bool AppendFromSelf(uint32_t offset, size_t length, T* op);
+//
+//   // The rules for how TryFastAppend differs from Append are somewhat
+//   // convoluted:
+//   //
+//   //  - TryFastAppend is allowed to decline (return false) at any
+//   //    time, for any reason -- just "return false" would be
+//   //    a perfectly legal implementation of TryFastAppend.
+//   //    The intention is for TryFastAppend to allow a fast path
+//   //    in the common case of a small append.
+//   //  - TryFastAppend is allowed to read up to <available> bytes
+//   //    from the input buffer, whereas Append is allowed to read
+//   //    <length>. However, if it returns true, it must leave
+//   //    at least five (kMaximumTagLength) bytes in the input buffer
+//   //    afterwards, so that there is always enough space to read the
+//   //    next tag without checking for a refill.
+//   //  - TryFastAppend must always return decline (return false)
+//   //    if <length> is 61 or more, as in this case the literal length is not
+//   //    decoded fully. In practice, this should not be a big problem,
+//   //    as it is unlikely that one would implement a fast path accepting
+//   //    this much data.
+//   //
+//   bool TryFastAppend(const char* ip, size_t available, size_t length, T* op);
+// };
+
+static inline uint32_t ExtractLowBytes(const uint32_t& v, int n) {
+  assert(n >= 0);
+  assert(n <= 4);
+#if SNAPPY_HAVE_BMI2
+  return _bzhi_u32(v, 8 * n);
+#else
+  // This needs to be wider than uint32_t otherwise `mask << 32` will be
+  // undefined.
+  uint64_t mask = 0xffffffff;
+  return v & ~(mask << (8 * n));
+#endif
+}
+
+static inline bool LeftShiftOverflows(uint8_t value, uint32_t shift) {
+  assert(shift < 32);
+  static const uint8_t masks[] = {
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  //
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  //
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  //
+      0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe};
+  return (value & masks[shift]) != 0;
+}
+
+inline bool Copy64BytesWithPatternExtension(ptrdiff_t dst, size_t offset) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)dst;
+  return offset != 0;
+}
+
+// Copies between size bytes and 64 bytes from src to dest.  size cannot exceed
+// 64.  More than size bytes, but never exceeding 64, might be copied if doing
+// so gives better performance.  [src, src + size) must not overlap with
+// [dst, dst + size), but [src, src + 64) may overlap with [dst, dst + 64).
+void MemCopy64(char* dst, const void* src, size_t size) {
+  // Always copy this many bytes.  If that's below size then copy the full 64.
+  constexpr int kShortMemCopy = 32;
+
+  assert(size <= 64);
+  assert(std::less_equal<const void*>()(static_cast<const char*>(src) + size,
+                                        dst) ||
+         std::less_equal<const void*>()(dst + size, src));
+
+  // We know that src and dst are at least size bytes apart. However, because we
+  // might copy more than size bytes the copy still might overlap past size.
+  // E.g. if src and dst appear consecutively in memory (src + size >= dst).
+  // TODO: Investigate wider copies on other platforms.
+#if defined(__x86_64__) && defined(__AVX__)
+  assert(kShortMemCopy <= 32);
+  __m256i data = _mm256_lddqu_si256(static_cast<const __m256i *>(src));
+  _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), data);
+  // Profiling shows that nearly all copies are short.
+  if (SNAPPY_PREDICT_FALSE(size > kShortMemCopy)) {
+    data = _mm256_lddqu_si256(static_cast<const __m256i *>(src) + 1);
+    _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst) + 1, data);
+  }
+#else
+  std::memmove(dst, src, kShortMemCopy);
+  // Profiling shows that nearly all copies are short.
+  if (SNAPPY_PREDICT_FALSE(size > kShortMemCopy)) {
+    std::memmove(dst + kShortMemCopy,
+                 static_cast<const uint8_t*>(src) + kShortMemCopy,
+                 64 - kShortMemCopy);
+  }
+#endif
+}
+
+void MemCopy64(ptrdiff_t dst, const void* src, size_t size) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)dst;
+  (void)src;
+  (void)size;
+}
+
+void ClearDeferred(const void** deferred_src, size_t* deferred_length,
+                   uint8_t* safe_source) {
+  *deferred_src = safe_source;
+  *deferred_length = 0;
+}
+
+void DeferMemCopy(const void** deferred_src, size_t* deferred_length,
+                  const void* src, size_t length) {
+  *deferred_src = src;
+  *deferred_length = length;
+}
+
+SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+inline size_t AdvanceToNextTagARMOptimized(const uint8_t** ip_p, size_t* tag) {
+  const uint8_t*& ip = *ip_p;
+  // This section is crucial for the throughput of the decompression loop.
+  // The latency of an iteration is fundamentally constrained by the
+  // following data chain on ip.
+  // ip -> c = Load(ip) -> delta1 = (c & 3)        -> ip += delta1 or delta2
+  //                       delta2 = ((c >> 2) + 1)    ip++
+  // This is different from X86 optimizations because ARM has conditional add
+  // instruction (csinc) and it removes several register moves.
+  const size_t tag_type = *tag & 3;
+  const bool is_literal = (tag_type == 0);
+  if (is_literal) {
+    size_t next_literal_tag = (*tag >> 2) + 1;
+    *tag = ip[next_literal_tag];
+    ip += next_literal_tag + 1;
+  } else {
+    *tag = ip[tag_type];
+    ip += tag_type + 1;
+  }
+  return tag_type;
+}
+
+SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+inline size_t AdvanceToNextTagX86Optimized(const uint8_t** ip_p, size_t* tag) {
+  const uint8_t*& ip = *ip_p;
+  // This section is crucial for the throughput of the decompression loop.
+  // The latency of an iteration is fundamentally constrained by the
+  // following data chain on ip.
+  // ip -> c = Load(ip) -> ip1 = ip + 1 + (c & 3) -> ip = ip1 or ip2
+  //                       ip2 = ip + 2 + (c >> 2)
+  // This amounts to 8 cycles.
+  // 5 (load) + 1 (c & 3) + 1 (lea ip1, [ip + (c & 3) + 1]) + 1 (cmov)
+  size_t literal_len = *tag >> 2;
+  size_t tag_type = *tag;
+  bool is_literal;
+#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
+  // TODO clang misses the fact that the (c & 3) already correctly
+  // sets the zero flag.
+  asm("and $3, %k[tag_type]\n\t"
+      : [tag_type] "+r"(tag_type), "=@ccz"(is_literal)
+      :: "cc");
+#else
+  tag_type &= 3;
+  is_literal = (tag_type == 0);
+#endif
+  // TODO
+  // This is code is subtle. Loading the values first and then cmov has less
+  // latency then cmov ip and then load. However clang would move the loads
+  // in an optimization phase, volatile prevents this transformation.
+  // Note that we have enough slop bytes (64) that the loads are always valid.
+  size_t tag_literal =
+      static_cast<const volatile uint8_t*>(ip)[1 + literal_len];
+  size_t tag_copy = static_cast<const volatile uint8_t*>(ip)[tag_type];
+  *tag = is_literal ? tag_literal : tag_copy;
+  const uint8_t* ip_copy = ip + 1 + tag_type;
+  const uint8_t* ip_literal = ip + 2 + literal_len;
+  ip = is_literal ? ip_literal : ip_copy;
+#if defined(__GNUC__) && defined(__x86_64__)
+  // TODO Clang is "optimizing" zero-extension (a totally free
+  // operation) this means that after the cmov of tag, it emits another movzb
+  // tag, byte(tag). It really matters as it's on the core chain. This dummy
+  // asm, persuades clang to do the zero-extension at the load (it's automatic)
+  // removing the expensive movzb.
+  asm("" ::"r"(tag_copy));
+#endif
+  return tag_type;
+}
+
+// Extract the offset for copy-1 and copy-2 returns 0 for literals or copy-4.
+inline uint32_t ExtractOffset(uint32_t val, size_t tag_type) {
+  // For x86 non-static storage works better. For ARM static storage is better.
+  // TODO: Once the array is recognized as a register, improve the
+  // readability for x86.
+#if defined(__x86_64__)
+  constexpr uint64_t kExtractMasksCombined = 0x0000FFFF00FF0000ull;
+  uint16_t result;
+  memcpy(&result,
+         reinterpret_cast<const char*>(&kExtractMasksCombined) + 2 * tag_type,
+         sizeof(result));
+  return val & result;
+#elif defined(__aarch64__)
+  constexpr uint64_t kExtractMasksCombined = 0x0000FFFF00FF0000ull;
+  return val & static_cast<uint32_t>(
+      (kExtractMasksCombined >> (tag_type * 16)) & 0xFFFF);
+#else
+  static constexpr uint32_t kExtractMasks[4] = {0, 0xFF, 0xFFFF, 0};
+  return val & kExtractMasks[tag_type];
+#endif
+};
+
+// Core decompression loop, when there is enough data available.
+// Decompresses the input buffer [ip, ip_limit) into the output buffer
+// [op, op_limit_min_slop). Returning when either we are too close to the end
+// of the input buffer, or we exceed op_limit_min_slop or when a exceptional
+// tag is encountered (literal of length > 60) or a copy-4.
+// Returns {ip, op} at the points it stopped decoding.
+// TODO This function probably does not need to be inlined, as it
+// should decode large chunks at a time. This allows runtime dispatch to
+// implementations based on CPU capability (BMI2 / perhaps 32 / 64 byte memcpy).
+template <typename T>
+std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
+    const uint8_t* ip, const uint8_t* ip_limit, ptrdiff_t op, T op_base,
+    ptrdiff_t op_limit_min_slop) {
+  // If deferred_src is invalid point it here.
+  uint8_t safe_source[64];
+  const void* deferred_src;
+  size_t deferred_length;
+  ClearDeferred(&deferred_src, &deferred_length, safe_source);
+
+  // We unroll the inner loop twice so we need twice the spare room.
+  op_limit_min_slop -= kSlopBytes;
+  if (2 * (kSlopBytes + 1) < ip_limit - ip && op < op_limit_min_slop) {
+    const uint8_t* const ip_limit_min_slop = ip_limit - 2 * kSlopBytes - 1;
+    ip++;
+    // ip points just past the tag and we are touching at maximum kSlopBytes
+    // in an iteration.
+    size_t tag = ip[-1];
+#if defined(__clang__) && defined(__aarch64__)
+    // Workaround for https://bugs.llvm.org/show_bug.cgi?id=51317
+    // when loading 1 byte, clang for aarch64 doesn't realize that it(ldrb)
+    // comes with free zero-extension, so clang generates another
+    // 'and xn, xm, 0xff' before it use that as the offset. This 'and' is
+    // redundant and can be removed by adding this dummy asm, which gives
+    // clang a hint that we're doing the zero-extension at the load.
+    asm("" ::"r"(tag));
+#endif
+    do {
+      // The throughput is limited by instructions, unrolling the inner loop
+      // twice reduces the amount of instructions checking limits and also
+      // leads to reduced mov's.
+
+      SNAPPY_PREFETCH(ip + 128);
+      for (int i = 0; i < 2; i++) {
+        const uint8_t* old_ip = ip;
+        assert(tag == ip[-1]);
+        // For literals tag_type = 0, hence we will always obtain 0 from
+        // ExtractLowBytes. For literals offset will thus be kLiteralOffset.
+        ptrdiff_t len_min_offset = kLengthMinusOffset[tag];
+#if defined(__aarch64__)
+        size_t tag_type = AdvanceToNextTagARMOptimized(&ip, &tag);
+#else
+        size_t tag_type = AdvanceToNextTagX86Optimized(&ip, &tag);
+#endif
+        uint32_t next = LittleEndian::Load32(old_ip);
+        size_t len = len_min_offset & 0xFF;
+        len_min_offset -= ExtractOffset(next, tag_type);
+        if (SNAPPY_PREDICT_FALSE(len_min_offset > 0)) {
+          if (SNAPPY_PREDICT_FALSE(len & 0x80)) {
+            // Exceptional case (long literal or copy 4).
+            // Actually doing the copy here is negatively impacting the main
+            // loop due to compiler incorrectly allocating a register for
+            // this fallback. Hence we just break.
+          break_loop:
+            ip = old_ip;
+            goto exit;
+          }
+          // Only copy-1 or copy-2 tags can get here.
+          assert(tag_type == 1 || tag_type == 2);
+          std::ptrdiff_t delta = (op + deferred_length) + len_min_offset - len;
+          // Guard against copies before the buffer start.
+          // Execute any deferred MemCopy since we write to dst here.
+          MemCopy64(op_base + op, deferred_src, deferred_length);
+          op += deferred_length;
+          ClearDeferred(&deferred_src, &deferred_length, safe_source);
+          if (SNAPPY_PREDICT_FALSE(delta < 0 ||
+                                  !Copy64BytesWithPatternExtension(
+                                      op_base + op, len - len_min_offset))) {
+            goto break_loop;
+          }
+          // We aren't deferring this copy so add length right away.
+          op += len;
+          continue;
+        }
+        std::ptrdiff_t delta = (op + deferred_length) + len_min_offset - len;
+        if (SNAPPY_PREDICT_FALSE(delta < 0)) {
+          // Due to the spurious offset in literals have this will trigger
+          // at the start of a block when op is still smaller than 256.
+          if (tag_type != 0) goto break_loop;
+          MemCopy64(op_base + op, deferred_src, deferred_length);
+          op += deferred_length;
+          DeferMemCopy(&deferred_src, &deferred_length, old_ip, len);
+          continue;
+        }
+
+        // For copies we need to copy from op_base + delta, for literals
+        // we need to copy from ip instead of from the stream.
+        const void* from =
+            tag_type ? reinterpret_cast<void*>(op_base + delta) : old_ip;
+        MemCopy64(op_base + op, deferred_src, deferred_length);
+        op += deferred_length;
+        DeferMemCopy(&deferred_src, &deferred_length, from, len);
+      }
+    } while (ip < ip_limit_min_slop &&
+             (op + deferred_length) < op_limit_min_slop);
+  exit:
+    ip--;
+    assert(ip <= ip_limit);
+  }
+  // If we deferred a copy then we can perform.  If we are up to date then we
+  // might not have enough slop bytes and could run past the end.
+  if (deferred_length) {
+    MemCopy64(op_base + op, deferred_src, deferred_length);
+    op += deferred_length;
+    ClearDeferred(&deferred_src, &deferred_length, safe_source);
+  }
+  return {ip, op};
+}
+
+// Helper class for decompression
+class SnappyDecompressor {
+ private:
+  Source* reader_;        // Underlying source of bytes to decompress
+  const char* ip_;        // Points to next buffered byte
+  const char* ip_limit_;  // Points just past buffered bytes
+  // If ip < ip_limit_min_maxtaglen_ it's safe to read kMaxTagLength from
+  // buffer.
+  const char* ip_limit_min_maxtaglen_;
+  uint32_t peeked_;                  // Bytes peeked from reader (need to skip)
+  bool eof_;                         // Hit end of input without an error?
+  char scratch_[kMaximumTagLength];  // See RefillTag().
+
+  // Ensure that all of the tag metadata for the next tag is available
+  // in [ip_..ip_limit_-1].  Also ensures that [ip,ip+4] is readable even
+  // if (ip_limit_ - ip_ < 5).
+  //
+  // Returns true on success, false on error or end of input.
+  bool RefillTag();
+
+  void ResetLimit(const char* ip) {
+    ip_limit_min_maxtaglen_ =
+        ip_limit_ - std::min<ptrdiff_t>(ip_limit_ - ip, kMaximumTagLength - 1);
+  }
+
+ public:
+  explicit SnappyDecompressor(Source* reader)
+      : reader_(reader), ip_(NULL), ip_limit_(NULL), peeked_(0), eof_(false) {}
+
+  ~SnappyDecompressor() {
+    // Advance past any bytes we peeked at from the reader
+    reader_->Skip(peeked_);
+  }
+
+  // Returns true iff we have hit the end of the input without an error.
+  bool eof() const { return eof_; }
+
+  // Read the uncompressed length stored at the start of the compressed data.
+  // On success, stores the length in *result and returns true.
+  // On failure, returns false.
+  bool ReadUncompressedLength(uint32_t* result) {
+    assert(ip_ == NULL);  // Must not have read anything yet
+    // Length is encoded in 1..5 bytes
+    *result = 0;
+    uint32_t shift = 0;
+    while (true) {
+      if (shift >= 32) return false;
+      size_t n;
+      const char* ip = reader_->Peek(&n);
+      if (n == 0) return false;
+      const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
+      reader_->Skip(1);
+      uint32_t val = c & 0x7f;
+      if (LeftShiftOverflows(static_cast<uint8_t>(val), shift)) return false;
+      *result |= val << shift;
+      if (c < 128) {
+        break;
+      }
+      shift += 7;
+    }
+    return true;
+  }
+
+  // Process the next item found in the input.
+  // Returns true if successful, false on error or end of input.
+  template <class Writer>
+#if defined(__GNUC__) && defined(__x86_64__)
+  __attribute__((aligned(32)))
+#endif
+  void
+  DecompressAllTags(Writer* writer) {
+    const char* ip = ip_;
+    ResetLimit(ip);
+    auto op = writer->GetOutputPtr();
+    // We could have put this refill fragment only at the beginning of the loop.
+    // However, duplicating it at the end of each branch gives the compiler more
+    // scope to optimize the <ip_limit_ - ip> expression based on the local
+    // context, which overall increases speed.
+#define MAYBE_REFILL()                                      \
+  if (SNAPPY_PREDICT_FALSE(ip >= ip_limit_min_maxtaglen_)) { \
+    ip_ = ip;                                               \
+    if (SNAPPY_PREDICT_FALSE(!RefillTag())) goto exit;       \
+    ip = ip_;                                               \
+    ResetLimit(ip);                                         \
+  }                                                         \
+  preload = static_cast<uint8_t>(*ip)
+
+    // At the start of the for loop below the least significant byte of preload
+    // contains the tag.
+    uint32_t preload;
+    MAYBE_REFILL();
+    for (;;) {
+      {
+        ptrdiff_t op_limit_min_slop;
+        auto op_base = writer->GetBase(&op_limit_min_slop);
+        if (op_base) {
+          auto res =
+              DecompressBranchless(reinterpret_cast<const uint8_t*>(ip),
+                                   reinterpret_cast<const uint8_t*>(ip_limit_),
+                                   op - op_base, op_base, op_limit_min_slop);
+          ip = reinterpret_cast<const char*>(res.first);
+          op = op_base + res.second;
+          MAYBE_REFILL();
+        }
+      }
+      const uint8_t c = static_cast<uint8_t>(preload);
+      ip++;
+
+      // Ratio of iterations that have LITERAL vs non-LITERAL for different
+      // inputs.
+      //
+      // input          LITERAL  NON_LITERAL
+      // -----------------------------------
+      // html|html4|cp   23%        77%
+      // urls            36%        64%
+      // jpg             47%        53%
+      // pdf             19%        81%
+      // txt[1-4]        25%        75%
+      // pb              24%        76%
+      // bin             24%        76%
+      if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
+        size_t literal_length = (c >> 2) + 1u;
+        if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length, &op)) {
+          assert(literal_length < 61);
+          ip += literal_length;
+          // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend()
+          // will not return true unless there's already at least five spare
+          // bytes in addition to the literal.
+          preload = static_cast<uint8_t>(*ip);
+          continue;
+        }
+        if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
+          // Long literal.
+          const size_t literal_length_length = literal_length - 60;
+          literal_length =
+              ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) +
+              1;
+          ip += literal_length_length;
+        }
+
+        size_t avail = ip_limit_ - ip;
+        while (avail < literal_length) {
+          if (!writer->Append(ip, avail, &op)) goto exit;
+          literal_length -= avail;
+          reader_->Skip(peeked_);
+          size_t n;
+          ip = reader_->Peek(&n);
+          avail = n;
+          peeked_ = avail;
+          if (avail == 0) goto exit;
+          ip_limit_ = ip + avail;
+          ResetLimit(ip);
+        }
+        if (!writer->Append(ip, literal_length, &op)) goto exit;
+        ip += literal_length;
+        MAYBE_REFILL();
+      } else {
+        if (SNAPPY_PREDICT_FALSE((c & 3) == COPY_4_BYTE_OFFSET)) {
+          const size_t copy_offset = LittleEndian::Load32(ip);
+          const size_t length = (c >> 2) + 1;
+          ip += 4;
+
+          if (!writer->AppendFromSelf(copy_offset, length, &op)) goto exit;
+        } else {
+          const ptrdiff_t entry = kLengthMinusOffset[c];
+          preload = LittleEndian::Load32(ip);
+          const uint32_t trailer = ExtractLowBytes(preload, c & 3);
+          const uint32_t length = entry & 0xff;
+          assert(length > 0);
+
+          // copy_offset/256 is encoded in bits 8..10.  By just fetching
+          // those bits, we get copy_offset (since the bit-field starts at
+          // bit 8).
+          const uint32_t copy_offset = trailer - entry + length;
+          if (!writer->AppendFromSelf(copy_offset, length, &op)) goto exit;
+
+          ip += (c & 3);
+          // By using the result of the previous load we reduce the critical
+          // dependency chain of ip to 4 cycles.
+          preload >>= (c & 3) * 8;
+          if (ip < ip_limit_min_maxtaglen_) continue;
+        }
+        MAYBE_REFILL();
+      }
+    }
+#undef MAYBE_REFILL
+  exit:
+    writer->SetOutputPtr(op);
+  }
+};
+
+constexpr uint32_t CalculateNeeded(uint8_t tag) {
+  return ((tag & 3) == 0 && tag >= (60 * 4))
+             ? (tag >> 2) - 58
+             : (0x05030201 >> ((tag * 8) & 31)) & 0xFF;
+}
+
+#if __cplusplus >= 201402L
+constexpr bool VerifyCalculateNeeded() {
+  for (int i = 0; i < 1; i++) {
+    if (CalculateNeeded(i) != (char_table[i] >> 11) + 1) return false;
+  }
+  return true;
+}
+
+// Make sure CalculateNeeded is correct by verifying it against the established
+// table encoding the number of added bytes needed.
+static_assert(VerifyCalculateNeeded(), "");
+#endif  // c++14
+
+bool SnappyDecompressor::RefillTag() {
+  const char* ip = ip_;
+  if (ip == ip_limit_) {
+    // Fetch a new fragment from the reader
+    reader_->Skip(peeked_);  // All peeked bytes are used up
+    size_t n;
+    ip = reader_->Peek(&n);
+    peeked_ = n;
+    eof_ = (n == 0);
+    if (eof_) return false;
+    ip_limit_ = ip + n;
+  }
+
+  // Read the tag character
+  assert(ip < ip_limit_);
+  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
+  // At this point make sure that the data for the next tag is consecutive.
+  // For copy 1 this means the next 2 bytes (tag and 1 byte offset)
+  // For copy 2 the next 3 bytes (tag and 2 byte offset)
+  // For copy 4 the next 5 bytes (tag and 4 byte offset)
+  // For all small literals we only need 1 byte buf for literals 60...63 the
+  // length is encoded in 1...4 extra bytes.
+  const uint32_t needed = CalculateNeeded(c);
+  assert(needed <= sizeof(scratch_));
+
+  // Read more bytes from reader if needed
+  uint32_t nbuf = ip_limit_ - ip;
+  if (nbuf < needed) {
+    // Stitch together bytes from ip and reader to form the word
+    // contents.  We store the needed bytes in "scratch_".  They
+    // will be consumed immediately by the caller since we do not
+    // read more than we need.
+    std::memmove(scratch_, ip, nbuf);
+    reader_->Skip(peeked_);  // All peeked bytes are used up
+    peeked_ = 0;
+    while (nbuf < needed) {
+      size_t length;
+      const char* src = reader_->Peek(&length);
+      if (length == 0) return false;
+      uint32_t to_add = std::min<uint32_t>(needed - nbuf, length);
+      std::memcpy(scratch_ + nbuf, src, to_add);
+      nbuf += to_add;
+      reader_->Skip(to_add);
+    }
+    assert(nbuf == needed);
+    ip_ = scratch_;
+    ip_limit_ = scratch_ + needed;
+  } else if (nbuf < kMaximumTagLength) {
+    // Have enough bytes, but move into scratch_ so that we do not
+    // read past end of input
+    std::memmove(scratch_, ip, nbuf);
+    reader_->Skip(peeked_);  // All peeked bytes are used up
+    peeked_ = 0;
+    ip_ = scratch_;
+    ip_limit_ = scratch_ + nbuf;
+  } else {
+    // Pass pointer to buffer returned by reader_.
+    ip_ = ip;
+  }
+  return true;
+}
+
+template <typename Writer>
+static bool InternalUncompress(Source* r, Writer* writer) {
+  // Read the uncompressed length from the front of the compressed input
+  SnappyDecompressor decompressor(r);
+  uint32_t uncompressed_len = 0;
+  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
+
+  return InternalUncompressAllTags(&decompressor, writer, r->Available(),
+                                   uncompressed_len);
+}
+
+template <typename Writer>
+static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
+                                      Writer* writer, uint32_t compressed_len,
+                                      uint32_t uncompressed_len) {
+  Report("snappy_uncompress", compressed_len, uncompressed_len);
+
+  writer->SetExpectedLength(uncompressed_len);
+
+  // Process the entire input
+  decompressor->DecompressAllTags(writer);
+  writer->Flush();
+  return (decompressor->eof() && writer->CheckLength());
+}
+
+bool GetUncompressedLength(Source* source, uint32_t* result) {
+  SnappyDecompressor decompressor(source);
+  return decompressor.ReadUncompressedLength(result);
+}
+
+size_t Compress(Source* reader, Sink* writer) {
+  size_t written = 0;
+  size_t N = reader->Available();
+  const size_t uncompressed_size = N;
+  char ulength[Varint::kMax32];
+  char* p = Varint::Encode32(ulength, N);
+  writer->Append(ulength, p - ulength);
+  written += (p - ulength);
+
+  internal::WorkingMemory wmem(N);
+
+  while (N > 0) {
+    // Get next block to compress (without copying if possible)
+    size_t fragment_size;
+    const char* fragment = reader->Peek(&fragment_size);
+    assert(fragment_size != 0);  // premature end of input
+    const size_t num_to_read = std::min(N, kBlockSize);
+    size_t bytes_read = fragment_size;
+
+    size_t pending_advance = 0;
+    if (bytes_read >= num_to_read) {
+      // Buffer returned by reader is large enough
+      pending_advance = num_to_read;
+      fragment_size = num_to_read;
+    } else {
+      char* scratch = wmem.GetScratchInput();
+      std::memcpy(scratch, fragment, bytes_read);
+      reader->Skip(bytes_read);
+
+      while (bytes_read < num_to_read) {
+        fragment = reader->Peek(&fragment_size);
+        size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
+        std::memcpy(scratch + bytes_read, fragment, n);
+        bytes_read += n;
+        reader->Skip(n);
+      }
+      assert(bytes_read == num_to_read);
+      fragment = scratch;
+      fragment_size = num_to_read;
+    }
+    assert(fragment_size == num_to_read);
+
+    // Get encoding table for compression
+    int table_size;
+    uint16_t* table = wmem.GetHashTable(num_to_read, &table_size);
+
+    // Compress input_fragment and append to dest
+    const int max_output = MaxCompressedLength(num_to_read);
+
+    // Need a scratch buffer for the output, in case the byte sink doesn't
+    // have room for us directly.
+
+    // Since we encode kBlockSize regions followed by a region
+    // which is <= kBlockSize in length, a previously allocated
+    // scratch_output[] region is big enough for this iteration.
+    char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
+    char* end = internal::CompressFragment(fragment, fragment_size, dest, table,
+                                           table_size);
+    writer->Append(dest, end - dest);
+    written += (end - dest);
+
+    N -= num_to_read;
+    reader->Skip(pending_advance);
+  }
+
+  Report("snappy_compress", written, uncompressed_size);
+
+  return written;
+}
+
+// -----------------------------------------------------------------------
+// IOVec interfaces
+// -----------------------------------------------------------------------
+
+// A `Source` implementation that yields the contents of an `iovec` array. Note
+// that `total_size` is the total number of bytes to be read from the elements
+// of `iov` (_not_ the total number of elements in `iov`).
+class SnappyIOVecReader : public Source {
+ public:
+  SnappyIOVecReader(const struct iovec* iov, size_t total_size)
+      : curr_iov_(iov),
+        curr_pos_(total_size > 0 ? reinterpret_cast<const char*>(iov->iov_base)
+                                 : nullptr),
+        curr_size_remaining_(total_size > 0 ? iov->iov_len : 0),
+        total_size_remaining_(total_size) {
+    // Skip empty leading `iovec`s.
+    if (total_size > 0 && curr_size_remaining_ == 0) Advance();
+  }
+
+  ~SnappyIOVecReader() = default;
+
+  size_t Available() const { return total_size_remaining_; }
+
+  const char* Peek(size_t* len) {
+    *len = curr_size_remaining_;
+    return curr_pos_;
+  }
+
+  void Skip(size_t n) {
+    while (n >= curr_size_remaining_ && n > 0) {
+      n -= curr_size_remaining_;
+      Advance();
+    }
+    curr_size_remaining_ -= n;
+    total_size_remaining_ -= n;
+    curr_pos_ += n;
+  }
+
+ private:
+  // Advances to the next nonempty `iovec` and updates related variables.
+  void Advance() {
+    do {
+      assert(total_size_remaining_ >= curr_size_remaining_);
+      total_size_remaining_ -= curr_size_remaining_;
+      if (total_size_remaining_ == 0) {
+        curr_pos_ = nullptr;
+        curr_size_remaining_ = 0;
+        return;
+      }
+      ++curr_iov_;
+      curr_pos_ = reinterpret_cast<const char*>(curr_iov_->iov_base);
+      curr_size_remaining_ = curr_iov_->iov_len;
+    } while (curr_size_remaining_ == 0);
+  }
+
+  // The `iovec` currently being read.
+  const struct iovec* curr_iov_;
+  // The location in `curr_iov_` currently being read.
+  const char* curr_pos_;
+  // The amount of unread data in `curr_iov_`.
+  size_t curr_size_remaining_;
+  // The amount of unread data in the entire input array.
+  size_t total_size_remaining_;
+};
+
+// A type that writes to an iovec.
+// Note that this is not a "ByteSink", but a type that matches the
+// Writer template argument to SnappyDecompressor::DecompressAllTags().
+class SnappyIOVecWriter {
+ private:
+  // output_iov_end_ is set to iov + count and used to determine when
+  // the end of the iovs is reached.
+  const struct iovec* output_iov_end_;
+
+#if !defined(NDEBUG)
+  const struct iovec* output_iov_;
+#endif  // !defined(NDEBUG)
+
+  // Current iov that is being written into.
+  const struct iovec* curr_iov_;
+
+  // Pointer to current iov's write location.
+  char* curr_iov_output_;
+
+  // Remaining bytes to write into curr_iov_output.
+  size_t curr_iov_remaining_;
+
+  // Total bytes decompressed into output_iov_ so far.
+  size_t total_written_;
+
+  // Maximum number of bytes that will be decompressed into output_iov_.
+  size_t output_limit_;
+
+  static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) {
+    return reinterpret_cast<char*>(iov->iov_base) + offset;
+  }
+
+ public:
+  // Does not take ownership of iov. iov must be valid during the
+  // entire lifetime of the SnappyIOVecWriter.
+  inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
+      : output_iov_end_(iov + iov_count),
+#if !defined(NDEBUG)
+        output_iov_(iov),
+#endif  // !defined(NDEBUG)
+        curr_iov_(iov),
+        curr_iov_output_(iov_count ? reinterpret_cast<char*>(iov->iov_base)
+                                   : nullptr),
+        curr_iov_remaining_(iov_count ? iov->iov_len : 0),
+        total_written_(0),
+        output_limit_(-1) {
+  }
+
+  inline void SetExpectedLength(size_t len) { output_limit_ = len; }
+
+  inline bool CheckLength() const { return total_written_ == output_limit_; }
+
+  inline bool Append(const char* ip, size_t len, char**) {
+    if (total_written_ + len > output_limit_) {
+      return false;
+    }
+
+    return AppendNoCheck(ip, len);
+  }
+
+  char* GetOutputPtr() { return nullptr; }
+  char* GetBase(ptrdiff_t*) { return nullptr; }
+  void SetOutputPtr(char* op) {
+    // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+    (void)op;
+  }
+
+  inline bool AppendNoCheck(const char* ip, size_t len) {
+    while (len > 0) {
+      if (curr_iov_remaining_ == 0) {
+        // This iovec is full. Go to the next one.
+        if (curr_iov_ + 1 >= output_iov_end_) {
+          return false;
+        }
+        ++curr_iov_;
+        curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
+        curr_iov_remaining_ = curr_iov_->iov_len;
+      }
+
+      const size_t to_write = std::min(len, curr_iov_remaining_);
+      std::memcpy(curr_iov_output_, ip, to_write);
+      curr_iov_output_ += to_write;
+      curr_iov_remaining_ -= to_write;
+      total_written_ += to_write;
+      ip += to_write;
+      len -= to_write;
+    }
+
+    return true;
+  }
+
+  inline bool TryFastAppend(const char* ip, size_t available, size_t len,
+                            char**) {
+    const size_t space_left = output_limit_ - total_written_;
+    if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
+        curr_iov_remaining_ >= 16) {
+      // Fast path, used for the majority (about 95%) of invocations.
+      UnalignedCopy128(ip, curr_iov_output_);
+      curr_iov_output_ += len;
+      curr_iov_remaining_ -= len;
+      total_written_ += len;
+      return true;
+    }
+
+    return false;
+  }
+
+  inline bool AppendFromSelf(size_t offset, size_t len, char**) {
+    // See SnappyArrayWriter::AppendFromSelf for an explanation of
+    // the "offset - 1u" trick.
+    if (offset - 1u >= total_written_) {
+      return false;
+    }
+    const size_t space_left = output_limit_ - total_written_;
+    if (len > space_left) {
+      return false;
+    }
+
+    // Locate the iovec from which we need to start the copy.
+    const iovec* from_iov = curr_iov_;
+    size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_;
+    while (offset > 0) {
+      if (from_iov_offset >= offset) {
+        from_iov_offset -= offset;
+        break;
+      }
+
+      offset -= from_iov_offset;
+      --from_iov;
+#if !defined(NDEBUG)
+      assert(from_iov >= output_iov_);
+#endif  // !defined(NDEBUG)
+      from_iov_offset = from_iov->iov_len;
+    }
+
+    // Copy <len> bytes starting from the iovec pointed to by from_iov_index to
+    // the current iovec.
+    while (len > 0) {
+      assert(from_iov <= curr_iov_);
+      if (from_iov != curr_iov_) {
+        const size_t to_copy =
+            std::min(from_iov->iov_len - from_iov_offset, len);
+        AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy);
+        len -= to_copy;
+        if (len > 0) {
+          ++from_iov;
+          from_iov_offset = 0;
+        }
+      } else {
+        size_t to_copy = curr_iov_remaining_;
+        if (to_copy == 0) {
+          // This iovec is full. Go to the next one.
+          if (curr_iov_ + 1 >= output_iov_end_) {
+            return false;
+          }
+          ++curr_iov_;
+          curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
+          curr_iov_remaining_ = curr_iov_->iov_len;
+          continue;
+        }
+        if (to_copy > len) {
+          to_copy = len;
+        }
+        assert(to_copy > 0);
+
+        IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset),
+                        curr_iov_output_, curr_iov_output_ + to_copy,
+                        curr_iov_output_ + curr_iov_remaining_);
+        curr_iov_output_ += to_copy;
+        curr_iov_remaining_ -= to_copy;
+        from_iov_offset += to_copy;
+        total_written_ += to_copy;
+        len -= to_copy;
+      }
+    }
+
+    return true;
+  }
+
+  inline void Flush() {}
+};
+
+bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
+                          const struct iovec* iov, size_t iov_cnt) {
+  ByteArraySource reader(compressed, compressed_length);
+  return RawUncompressToIOVec(&reader, iov, iov_cnt);
+}
+
+bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
+                          size_t iov_cnt) {
+  SnappyIOVecWriter output(iov, iov_cnt);
+  return InternalUncompress(compressed, &output);
+}
+
+// -----------------------------------------------------------------------
+// Flat array interfaces
+// -----------------------------------------------------------------------
+
+// A type that writes to a flat array.
+// Note that this is not a "ByteSink", but a type that matches the
+// Writer template argument to SnappyDecompressor::DecompressAllTags().
+class SnappyArrayWriter {
+ private:
+  char* base_;
+  char* op_;
+  char* op_limit_;
+  // If op < op_limit_min_slop_ then it's safe to unconditionally write
+  // kSlopBytes starting at op.
+  char* op_limit_min_slop_;
+
+ public:
+  inline explicit SnappyArrayWriter(char* dst)
+      : base_(dst),
+        op_(dst),
+        op_limit_(dst),
+        op_limit_min_slop_(dst) {}  // Safe default see invariant.
+
+  inline void SetExpectedLength(size_t len) {
+    op_limit_ = op_ + len;
+    // Prevent pointer from being past the buffer.
+    op_limit_min_slop_ = op_limit_ - std::min<size_t>(kSlopBytes - 1, len);
+  }
+
+  inline bool CheckLength() const { return op_ == op_limit_; }
+
+  char* GetOutputPtr() { return op_; }
+  char* GetBase(ptrdiff_t* op_limit_min_slop) {
+    *op_limit_min_slop = op_limit_min_slop_ - base_;
+    return base_;
+  }
+  void SetOutputPtr(char* op) { op_ = op; }
+
+  inline bool Append(const char* ip, size_t len, char** op_p) {
+    char* op = *op_p;
+    const size_t space_left = op_limit_ - op;
+    if (space_left < len) return false;
+    std::memcpy(op, ip, len);
+    *op_p = op + len;
+    return true;
+  }
+
+  inline bool TryFastAppend(const char* ip, size_t available, size_t len,
+                            char** op_p) {
+    char* op = *op_p;
+    const size_t space_left = op_limit_ - op;
+    if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
+      // Fast path, used for the majority (about 95%) of invocations.
+      UnalignedCopy128(ip, op);
+      *op_p = op + len;
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+  inline bool AppendFromSelf(size_t offset, size_t len, char** op_p) {
+    assert(len > 0);
+    char* const op = *op_p;
+    assert(op >= base_);
+    char* const op_end = op + len;
+
+    // Check if we try to append from before the start of the buffer.
+    if (SNAPPY_PREDICT_FALSE(static_cast<size_t>(op - base_) < offset))
+      return false;
+
+    if (SNAPPY_PREDICT_FALSE((kSlopBytes < 64 && len > kSlopBytes) ||
+                            op >= op_limit_min_slop_ || offset < len)) {
+      if (op_end > op_limit_ || offset == 0) return false;
+      *op_p = IncrementalCopy(op - offset, op, op_end, op_limit_);
+      return true;
+    }
+    std::memmove(op, op - offset, kSlopBytes);
+    *op_p = op_end;
+    return true;
+  }
+  inline size_t Produced() const {
+    assert(op_ >= base_);
+    return op_ - base_;
+  }
+  inline void Flush() {}
+};
+
+bool RawUncompress(const char* compressed, size_t compressed_length,
+                   char* uncompressed) {
+  ByteArraySource reader(compressed, compressed_length);
+  return RawUncompress(&reader, uncompressed);
+}
+
+bool RawUncompress(Source* compressed, char* uncompressed) {
+  SnappyArrayWriter output(uncompressed);
+  return InternalUncompress(compressed, &output);
+}
+
+bool Uncompress(const char* compressed, size_t compressed_length,
+                std::string* uncompressed) {
+  size_t ulength;
+  if (!GetUncompressedLength(compressed, compressed_length, &ulength)) {
+    return false;
+  }
+  // On 32-bit builds: max_size() < kuint32max.  Check for that instead
+  // of crashing (e.g., consider externally specified compressed data).
+  if (ulength > uncompressed->max_size()) {
+    return false;
+  }
+  STLStringResizeUninitialized(uncompressed, ulength);
+  return RawUncompress(compressed, compressed_length,
+                       string_as_array(uncompressed));
+}
+
+// A Writer that drops everything on the floor and just does validation
+class SnappyDecompressionValidator {
+ private:
+  size_t expected_;
+  size_t produced_;
+
+ public:
+  inline SnappyDecompressionValidator() : expected_(0), produced_(0) {}
+  inline void SetExpectedLength(size_t len) { expected_ = len; }
+  size_t GetOutputPtr() { return produced_; }
+  size_t GetBase(ptrdiff_t* op_limit_min_slop) {
+    *op_limit_min_slop = std::numeric_limits<ptrdiff_t>::max() - kSlopBytes + 1;
+    return 1;
+  }
+  void SetOutputPtr(size_t op) { produced_ = op; }
+  inline bool CheckLength() const { return expected_ == produced_; }
+  inline bool Append(const char* ip, size_t len, size_t* produced) {
+    // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+    (void)ip;
+
+    *produced += len;
+    return *produced <= expected_;
+  }
+  inline bool TryFastAppend(const char* ip, size_t available, size_t length,
+                            size_t* produced) {
+    // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+    (void)ip;
+    (void)available;
+    (void)length;
+    (void)produced;
+
+    return false;
+  }
+  inline bool AppendFromSelf(size_t offset, size_t len, size_t* produced) {
+    // See SnappyArrayWriter::AppendFromSelf for an explanation of
+    // the "offset - 1u" trick.
+    if (*produced <= offset - 1u) return false;
+    *produced += len;
+    return *produced <= expected_;
+  }
+  inline void Flush() {}
+};
+
+bool IsValidCompressedBuffer(const char* compressed, size_t compressed_length) {
+  ByteArraySource reader(compressed, compressed_length);
+  SnappyDecompressionValidator writer;
+  return InternalUncompress(&reader, &writer);
+}
+
+bool IsValidCompressed(Source* compressed) {
+  SnappyDecompressionValidator writer;
+  return InternalUncompress(compressed, &writer);
+}
+
+void RawCompress(const char* input, size_t input_length, char* compressed,
+                 size_t* compressed_length) {
+  ByteArraySource reader(input, input_length);
+  UncheckedByteArraySink writer(compressed);
+  Compress(&reader, &writer);
+
+  // Compute how many bytes were added
+  *compressed_length = (writer.CurrentDestination() - compressed);
+}
+
+void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
+                          char* compressed, size_t* compressed_length) {
+  SnappyIOVecReader reader(iov, uncompressed_length);
+  UncheckedByteArraySink writer(compressed);
+  Compress(&reader, &writer);
+
+  // Compute how many bytes were added.
+  *compressed_length = writer.CurrentDestination() - compressed;
+}
+
+size_t Compress(const char* input, size_t input_length,
+                std::string* compressed) {
+  // Pre-grow the buffer to the max length of the compressed output
+  STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
+
+  size_t compressed_length;
+  RawCompress(input, input_length, string_as_array(compressed),
+              &compressed_length);
+  compressed->erase(compressed_length);
+  return compressed_length;
+}
+
+size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
+                         std::string* compressed) {
+  // Compute the number of bytes to be compressed.
+  size_t uncompressed_length = 0;
+  for (size_t i = 0; i < iov_cnt; ++i) {
+    uncompressed_length += iov[i].iov_len;
+  }
+
+  // Pre-grow the buffer to the max length of the compressed output.
+  STLStringResizeUninitialized(compressed, MaxCompressedLength(
+      uncompressed_length));
+
+  size_t compressed_length;
+  RawCompressFromIOVec(iov, uncompressed_length, string_as_array(compressed),
+                       &compressed_length);
+  compressed->erase(compressed_length);
+  return compressed_length;
+}
+
+// -----------------------------------------------------------------------
+// Sink interface
+// -----------------------------------------------------------------------
+
+// A type that decompresses into a Sink. The template parameter
+// Allocator must export one method "char* Allocate(int size);", which
+// allocates a buffer of "size" and appends that to the destination.
+template <typename Allocator>
+class SnappyScatteredWriter {
+  Allocator allocator_;
+
+  // We need random access into the data generated so far.  Therefore
+  // we keep track of all of the generated data as an array of blocks.
+  // All of the blocks except the last have length kBlockSize.
+  std::vector<char*> blocks_;
+  size_t expected_;
+
+  // Total size of all fully generated blocks so far
+  size_t full_size_;
+
+  // Pointer into current output block
+  char* op_base_;   // Base of output block
+  char* op_ptr_;    // Pointer to next unfilled byte in block
+  char* op_limit_;  // Pointer just past block
+  // If op < op_limit_min_slop_ then it's safe to unconditionally write
+  // kSlopBytes starting at op.
+  char* op_limit_min_slop_;
+
+  inline size_t Size() const { return full_size_ + (op_ptr_ - op_base_); }
+
+  bool SlowAppend(const char* ip, size_t len);
+  bool SlowAppendFromSelf(size_t offset, size_t len);
+
+ public:
+  inline explicit SnappyScatteredWriter(const Allocator& allocator)
+      : allocator_(allocator),
+        full_size_(0),
+        op_base_(NULL),
+        op_ptr_(NULL),
+        op_limit_(NULL),
+        op_limit_min_slop_(NULL) {}
+  char* GetOutputPtr() { return op_ptr_; }
+  char* GetBase(ptrdiff_t* op_limit_min_slop) {
+    *op_limit_min_slop = op_limit_min_slop_ - op_base_;
+    return op_base_;
+  }
+  void SetOutputPtr(char* op) { op_ptr_ = op; }
+
+  inline void SetExpectedLength(size_t len) {
+    assert(blocks_.empty());
+    expected_ = len;
+  }
+
+  inline bool CheckLength() const { return Size() == expected_; }
+
+  // Return the number of bytes actually uncompressed so far
+  inline size_t Produced() const { return Size(); }
+
+  inline bool Append(const char* ip, size_t len, char** op_p) {
+    char* op = *op_p;
+    size_t avail = op_limit_ - op;
+    if (len <= avail) {
+      // Fast path
+      std::memcpy(op, ip, len);
+      *op_p = op + len;
+      return true;
+    } else {
+      op_ptr_ = op;
+      bool res = SlowAppend(ip, len);
+      *op_p = op_ptr_;
+      return res;
+    }
+  }
+
+  inline bool TryFastAppend(const char* ip, size_t available, size_t length,
+                            char** op_p) {
+    char* op = *op_p;
+    const int space_left = op_limit_ - op;
+    if (length <= 16 && available >= 16 + kMaximumTagLength &&
+        space_left >= 16) {
+      // Fast path, used for the majority (about 95%) of invocations.
+      UnalignedCopy128(ip, op);
+      *op_p = op + length;
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  inline bool AppendFromSelf(size_t offset, size_t len, char** op_p) {
+    char* op = *op_p;
+    assert(op >= op_base_);
+    // Check if we try to append from before the start of the buffer.
+    if (SNAPPY_PREDICT_FALSE((kSlopBytes < 64 && len > kSlopBytes) ||
+                            static_cast<size_t>(op - op_base_) < offset ||
+                            op >= op_limit_min_slop_ || offset < len)) {
+      if (offset == 0) return false;
+      if (SNAPPY_PREDICT_FALSE(static_cast<size_t>(op - op_base_) < offset ||
+                              op + len > op_limit_)) {
+        op_ptr_ = op;
+        bool res = SlowAppendFromSelf(offset, len);
+        *op_p = op_ptr_;
+        return res;
+      }
+      *op_p = IncrementalCopy(op - offset, op, op + len, op_limit_);
+      return true;
+    }
+    // Fast path
+    char* const op_end = op + len;
+    std::memmove(op, op - offset, kSlopBytes);
+    *op_p = op_end;
+    return true;
+  }
+
+  // Called at the end of the decompress. We ask the allocator
+  // write all blocks to the sink.
+  inline void Flush() { allocator_.Flush(Produced()); }
+};
+
+template <typename Allocator>
+bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
+  size_t avail = op_limit_ - op_ptr_;
+  while (len > avail) {
+    // Completely fill this block
+    std::memcpy(op_ptr_, ip, avail);
+    op_ptr_ += avail;
+    assert(op_limit_ - op_ptr_ == 0);
+    full_size_ += (op_ptr_ - op_base_);
+    len -= avail;
+    ip += avail;
+
+    // Bounds check
+    if (full_size_ + len > expected_) return false;
+
+    // Make new block
+    size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
+    op_base_ = allocator_.Allocate(bsize);
+    op_ptr_ = op_base_;
+    op_limit_ = op_base_ + bsize;
+    op_limit_min_slop_ = op_limit_ - std::min<size_t>(kSlopBytes - 1, bsize);
+
+    blocks_.push_back(op_base_);
+    avail = bsize;
+  }
+
+  std::memcpy(op_ptr_, ip, len);
+  op_ptr_ += len;
+  return true;
+}
+
+template <typename Allocator>
+bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
+                                                         size_t len) {
+  // Overflow check
+  // See SnappyArrayWriter::AppendFromSelf for an explanation of
+  // the "offset - 1u" trick.
+  const size_t cur = Size();
+  if (offset - 1u >= cur) return false;
+  if (expected_ - cur < len) return false;
+
+  // Currently we shouldn't ever hit this path because Compress() chops the
+  // input into blocks and does not create cross-block copies. However, it is
+  // nice if we do not rely on that, since we can get better compression if we
+  // allow cross-block copies and thus might want to change the compressor in
+  // the future.
+  // TODO Replace this with a properly optimized path. This is not
+  // triggered right now. But this is so super slow, that it would regress
+  // performance unacceptably if triggered.
+  size_t src = cur - offset;
+  char* op = op_ptr_;
+  while (len-- > 0) {
+    char c = blocks_[src >> kBlockLog][src & (kBlockSize - 1)];
+    if (!Append(&c, 1, &op)) {
+      op_ptr_ = op;
+      return false;
+    }
+    src++;
+  }
+  op_ptr_ = op;
+  return true;
+}
+
+class SnappySinkAllocator {
+ public:
+  explicit SnappySinkAllocator(Sink* dest) : dest_(dest) {}
+  ~SnappySinkAllocator() {}
+
+  char* Allocate(int size) {
+    Datablock block(new char[size], size);
+    blocks_.push_back(block);
+    return block.data;
+  }
+
+  // We flush only at the end, because the writer wants
+  // random access to the blocks and once we hand the
+  // block over to the sink, we can't access it anymore.
+  // Also we don't write more than has been actually written
+  // to the blocks.
+  void Flush(size_t size) {
+    size_t size_written = 0;
+    for (Datablock& block : blocks_) {
+      size_t block_size = std::min<size_t>(block.size, size - size_written);
+      dest_->AppendAndTakeOwnership(block.data, block_size,
+                                    &SnappySinkAllocator::Deleter, NULL);
+      size_written += block_size;
+    }
+    blocks_.clear();
+  }
+
+ private:
+  struct Datablock {
+    char* data;
+    size_t size;
+    Datablock(char* p, size_t s) : data(p), size(s) {}
+  };
+
+  static void Deleter(void* arg, const char* bytes, size_t size) {
+    // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+    (void)arg;
+    (void)size;
+
+    delete[] bytes;
+  }
+
+  Sink* dest_;
+  std::vector<Datablock> blocks_;
+
+  // Note: copying this object is allowed
+};
+
+size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
+  SnappySinkAllocator allocator(uncompressed);
+  SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
+  InternalUncompress(compressed, &writer);
+  return writer.Produced();
+}
+
+bool Uncompress(Source* compressed, Sink* uncompressed) {
+  // Read the uncompressed length from the front of the compressed input
+  SnappyDecompressor decompressor(compressed);
+  uint32_t uncompressed_len = 0;
+  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
+    return false;
+  }
+
+  char c;
+  size_t allocated_size;
+  char* buf = uncompressed->GetAppendBufferVariable(1, uncompressed_len, &c, 1,
+                                                    &allocated_size);
+
+  const size_t compressed_len = compressed->Available();
+  // If we can get a flat buffer, then use it, otherwise do block by block
+  // uncompression
+  if (allocated_size >= uncompressed_len) {
+    SnappyArrayWriter writer(buf);
+    bool result = InternalUncompressAllTags(&decompressor, &writer,
+                                            compressed_len, uncompressed_len);
+    uncompressed->Append(buf, writer.Produced());
+    return result;
+  } else {
+    SnappySinkAllocator allocator(uncompressed);
+    SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
+    return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
+                                     uncompressed_len);
+  }
+}
+
+}  // namespace snappy
diff --git a/third_party/snappy/snappy.h b/third_party/snappy/snappy.h
new file mode 100644
index 0000000000..e12b658ebd
--- /dev/null
+++ b/third_party/snappy/snappy.h
@@ -0,0 +1,222 @@
+// Copyright 2005 and onwards Google Inc.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// A light-weight compression algorithm.  It is designed for speed of
+// compression and decompression, rather than for the utmost in space
+// savings.
+//
+// For getting better compression ratios when you are compressing data
+// with long repeated sequences or compressing data that is similar to
+// other data, while still compressing fast, you might look at first
+// using BMDiff and then compressing the output of BMDiff with
+// Snappy.
+
+#ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
+#define THIRD_PARTY_SNAPPY_SNAPPY_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "snappy-stubs-public.h"
+
+namespace snappy {
+  class Source;
+  class Sink;
+
+  // ------------------------------------------------------------------------
+  // Generic compression/decompression routines.
+  // ------------------------------------------------------------------------
+
+  // Compress the bytes read from "*source" and append to "*sink". Return the
+  // number of bytes written.
+  size_t Compress(Source* source, Sink* sink);
+
+  // Find the uncompressed length of the given stream, as given by the header.
+  // Note that the true length could deviate from this; the stream could e.g.
+  // be truncated.
+  //
+  // Also note that this leaves "*source" in a state that is unsuitable for
+  // further operations, such as RawUncompress(). You will need to rewind
+  // or recreate the source yourself before attempting any further calls.
+  bool GetUncompressedLength(Source* source, uint32_t* result);
+
+  // ------------------------------------------------------------------------
+  // Higher-level string based routines (should be sufficient for most users)
+  // ------------------------------------------------------------------------
+
+  // Sets "*compressed" to the compressed version of "input[0..input_length-1]".
+  // Original contents of *compressed are lost.
+  //
+  // REQUIRES: "input[]" is not an alias of "*compressed".
+  size_t Compress(const char* input, size_t input_length,
+                  std::string* compressed);
+
+  // Same as `Compress` above but taking an `iovec` array as input. Note that
+  // this function preprocesses the inputs to compute the sum of
+  // `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use
+  // `RawCompressFromIOVec` below.
+  size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
+                           std::string* compressed);
+
+  // Decompresses "compressed[0..compressed_length-1]" to "*uncompressed".
+  // Original contents of "*uncompressed" are lost.
+  //
+  // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
+  //
+  // returns false if the message is corrupted and could not be decompressed
+  bool Uncompress(const char* compressed, size_t compressed_length,
+                  std::string* uncompressed);
+
+  // Decompresses "compressed" to "*uncompressed".
+  //
+  // returns false if the message is corrupted and could not be decompressed
+  bool Uncompress(Source* compressed, Sink* uncompressed);
+
+  // This routine uncompresses as much of the "compressed" as possible
+  // into sink.  It returns the number of valid bytes added to sink
+  // (extra invalid bytes may have been added due to errors; the caller
+  // should ignore those). The emitted data typically has length
+  // GetUncompressedLength(), but may be shorter if an error is
+  // encountered.
+  size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed);
+
+  // ------------------------------------------------------------------------
+  // Lower-level character array based routines.  May be useful for
+  // efficiency reasons in certain circumstances.
+  // ------------------------------------------------------------------------
+
+  // REQUIRES: "compressed" must point to an area of memory that is at
+  // least "MaxCompressedLength(input_length)" bytes in length.
+  //
+  // Takes the data stored in "input[0..input_length]" and stores
+  // it in the array pointed to by "compressed".
+  //
+  // "*compressed_length" is set to the length of the compressed output.
+  //
+  // Example:
+  //    char* output = new char[snappy::MaxCompressedLength(input_length)];
+  //    size_t output_length;
+  //    RawCompress(input, input_length, output, &output_length);
+  //    ... Process(output, output_length) ...
+  //    delete [] output;
+  void RawCompress(const char* input,
+                   size_t input_length,
+                   char* compressed,
+                   size_t* compressed_length);
+
+  // Same as `RawCompress` above but taking an `iovec` array as input. Note that
+  // `uncompressed_length` is the total number of bytes to be read from the
+  // elements of `iov` (_not_ the number of elements in `iov`).
+  void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
+                            char* compressed, size_t* compressed_length);
+
+  // Given data in "compressed[0..compressed_length-1]" generated by
+  // calling the Snappy::Compress routine, this routine
+  // stores the uncompressed data to
+  //    uncompressed[0..GetUncompressedLength(compressed)-1]
+  // returns false if the message is corrupted and could not be decrypted
+  bool RawUncompress(const char* compressed, size_t compressed_length,
+                     char* uncompressed);
+
+  // Given data from the byte source 'compressed' generated by calling
+  // the Snappy::Compress routine, this routine stores the uncompressed
+  // data to
+  //    uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1]
+  // returns false if the message is corrupted and could not be decrypted
+  bool RawUncompress(Source* compressed, char* uncompressed);
+
+  // Given data in "compressed[0..compressed_length-1]" generated by
+  // calling the Snappy::Compress routine, this routine
+  // stores the uncompressed data to the iovec "iov". The number of physical
+  // buffers in "iov" is given by iov_cnt and their cumulative size
+  // must be at least GetUncompressedLength(compressed). The individual buffers
+  // in "iov" must not overlap with each other.
+  //
+  // returns false if the message is corrupted and could not be decrypted
+  bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
+                            const struct iovec* iov, size_t iov_cnt);
+
+  // Given data from the byte source 'compressed' generated by calling
+  // the Snappy::Compress routine, this routine stores the uncompressed
+  // data to the iovec "iov". The number of physical
+  // buffers in "iov" is given by iov_cnt and their cumulative size
+  // must be at least GetUncompressedLength(compressed). The individual buffers
+  // in "iov" must not overlap with each other.
+  //
+  // returns false if the message is corrupted and could not be decrypted
+  bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
+                            size_t iov_cnt);
+
+  // Returns the maximal size of the compressed representation of
+  // input data that is "source_bytes" bytes in length;
+  size_t MaxCompressedLength(size_t source_bytes);
+
+  // REQUIRES: "compressed[]" was produced by RawCompress() or Compress()
+  // Returns true and stores the length of the uncompressed data in
+  // *result normally.  Returns false on parsing error.
+  // This operation takes O(1) time.
+  bool GetUncompressedLength(const char* compressed, size_t compressed_length,
+                             size_t* result);
+
+  // Returns true iff the contents of "compressed[]" can be uncompressed
+  // successfully.  Does not return the uncompressed data.  Takes
+  // time proportional to compressed_length, but is usually at least
+  // a factor of four faster than actual decompression.
+  bool IsValidCompressedBuffer(const char* compressed,
+                               size_t compressed_length);
+
+  // Returns true iff the contents of "compressed" can be uncompressed
+  // successfully.  Does not return the uncompressed data.  Takes
+  // time proportional to *compressed length, but is usually at least
+  // a factor of four faster than actual decompression.
+  // On success, consumes all of *compressed.  On failure, consumes an
+  // unspecified prefix of *compressed.
+  bool IsValidCompressed(Source* compressed);
+
+  // The size of a compression block. Note that many parts of the compression
+  // code assumes that kBlockSize <= 65536; in particular, the hash table
+  // can only store 16-bit offsets, and EmitCopy() also assumes the offset
+  // is 65535 bytes or less. Note also that if you change this, it will
+  // affect the framing format (see framing_format.txt).
+  //
+  // Note that there might be older data around that is compressed with larger
+  // block sizes, so the decompression code should not rely on the
+  // non-existence of long backreferences.
+  static constexpr int kBlockLog = 16;
+  static constexpr size_t kBlockSize = 1 << kBlockLog;
+
+  static constexpr int kMinHashTableBits = 8;
+  static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
+
+  static constexpr int kMaxHashTableBits = 14;
+  static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
+}  // end namespace snappy
+
+#endif  // THIRD_PARTY_SNAPPY_SNAPPY_H__
diff --git a/third_party/versions b/third_party/versions
index 1c7f3a9a71..00ed287298 100644
--- a/third_party/versions
+++ b/third_party/versions
@@ -14,3 +14,4 @@ base64 commit-id: d354224643b1b1343cf4944c5cd2ff94e33c3768
 oatpp commit-id: 17ef2a7f6c8a932498799b2a5ae5aab2869975c7
 PGM-index commit-id: f578e68414c60f9869c5611575143645f75e0ce1
 eigen v3.4.0
+snappy v1.1.10
diff --git a/tools/generate_aggregate.py b/tools/generate_aggregate.py
index 61fc98d1b6..76827dfe77 100644
--- a/tools/generate_aggregate.py
+++ b/tools/generate_aggregate.py
@@ -52,7 +52,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         slt_file.write("\n")
         slt_file.write("query I\n")
         slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(
                 table_name, copy_path
             )
         )
@@ -139,7 +139,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         slt_file.write("\n")
         slt_file.write("query I\n")
         slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(
                 table_name, copy_path
             )
         )
diff --git a/tools/generate_big.py b/tools/generate_big.py
index 7ae7280745..0750dfb24d 100644
--- a/tools/generate_big.py
+++ b/tools/generate_big.py
@@ -33,7 +33,7 @@ def generate_test_varchar(
 
         slt_file.write("query I\n")
         slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(
                 table_name, copy_path)
         )
         slt_file.write("----\n")
@@ -88,7 +88,7 @@ def generate_test_embedding(
 
         slt_file.write("query I\n")
         slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(
                 table_name, copy_path)
         )
         slt_file.write("----\n")
diff --git a/tools/generate_big_point_query_test_fastroughfilter.py b/tools/generate_big_point_query_test_fastroughfilter.py
index 476b9c0634..7c77808cf6 100644
--- a/tools/generate_big_point_query_test_fastroughfilter.py
+++ b/tools/generate_big_point_query_test_fastroughfilter.py
@@ -35,7 +35,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         test_slt_file1.write("\n")
         test_slt_file1.write("statement ok\n")
         test_slt_file1.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(table_name1, copy_path))
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(table_name1, copy_path))
 
         for i in range(repeat_n):
             x = random.randint(row_n + 1, row_n * 3)
@@ -60,7 +60,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         test_slt_file2.write("\n")
         test_slt_file2.write("statement ok\n")
         test_slt_file2.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(table_name2, copy_path))
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(table_name2, copy_path))
 
         for i in range(repeat_n):
             x = random.randint(0, row_n - 1)
diff --git a/tools/generate_big_sparse.py b/tools/generate_big_sparse.py
index 57e2fcc130..e8e3842702 100644
--- a/tools/generate_big_sparse.py
+++ b/tools/generate_big_sparse.py
@@ -59,7 +59,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
 
         import_slt_file.write("statement ok\n")
         import_slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',');\n".format(table_name, copy_path)
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV);\n".format(table_name, copy_path)
         )
         import_slt_file.write("\n")
 
@@ -159,7 +159,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
 
         bmp_knn_slt_file.write("statement ok\n")
         bmp_knn_slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',');\n".format(table_name, copy_path)
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV);\n".format(table_name, copy_path)
         )
         bmp_knn_slt_file.write("\n")
 
diff --git a/tools/generate_compact.py b/tools/generate_compact.py
index 2c59b5edbb..3d24cb7cd7 100644
--- a/tools/generate_compact.py
+++ b/tools/generate_compact.py
@@ -49,7 +49,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
 
         slt_file.write("query I\n")
         slt_file.write(
-            "COPY {} FROM '{}/{}' WITH ( DELIMITER ',' );\n".format(
+            "COPY {} FROM '{}/{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(
                 table_name, copy_dir, csv_name
             )
         )
diff --git a/tools/generate_emvb_test_data.py b/tools/generate_emvb_test_data.py
index 912dc5533a..5ce60920d1 100644
--- a/tools/generate_emvb_test_data.py
+++ b/tools/generate_emvb_test_data.py
@@ -40,7 +40,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         top_slt_file.write("CREATE TABLE {} (c1 integer, c2 TENSOR(FLOAT, {}));\n".format(table_name, fix_dim))
         top_slt_file.write("\n")
         top_slt_file.write("statement ok\n")
-        top_slt_file.write("COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(table_name, copy_path))
+        top_slt_file.write("COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(table_name, copy_path))
         top_slt_file.write("\nstatement ok\n")
         top_slt_file.write("CREATE INDEX idx1 ON {} (c2) USING EMVB WITH ".format(table_name))
         top_slt_file.write("(pq_subspace_num = {}, pq_subspace_bits = {});\n".format(pq_subspace_num, pq_subspace_bits))
diff --git a/tools/generate_emvb_test_data_2.py b/tools/generate_emvb_test_data_2.py
index a0a97ce2fa..c5809d54e6 100644
--- a/tools/generate_emvb_test_data_2.py
+++ b/tools/generate_emvb_test_data_2.py
@@ -45,7 +45,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         top_slt_file.write("CREATE TABLE {} (c1 integer, c2 TENSOR(FLOAT, {}));\n".format(table_name, fix_dim))
         top_slt_file.write("\n")
         top_slt_file.write("statement ok\n")
-        top_slt_file.write("COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(table_name, copy_path))
+        top_slt_file.write("COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(table_name, copy_path))
         top_slt_file.write("\nstatement ok\n")
         top_slt_file.write(
             "CREATE INDEX idx1 ON {} (c2) USING EMVB WITH (pq_subspace_num = {}, pq_subspace_bits = {});\n".format(
diff --git a/tools/generate_hnsw_with_delete.py b/tools/generate_hnsw_with_delete.py
index 76bde17247..decdd63bd8 100644
--- a/tools/generate_hnsw_with_delete.py
+++ b/tools/generate_hnsw_with_delete.py
@@ -104,7 +104,7 @@ def find_nearest(delete_set: set[int]) -> dict[int, int]:
 
         slt_file.write("query I\n")
         slt_file.write(
-            "COPY {} FROM '/var/infinity/test_data/{}' WITH ( DELIMITER ',' );\n".format(
+            "COPY {} FROM '/var/infinity/test_data/{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(
                 table_name, csv_name
             )
         )
diff --git a/tools/generate_index_scan.py b/tools/generate_index_scan.py
index 0534ed77f5..a4a2d7e462 100644
--- a/tools/generate_index_scan.py
+++ b/tools/generate_index_scan.py
@@ -39,7 +39,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
 
         index_scan_slt_file.write("\nstatement ok\n")
         index_scan_slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(table_name, copy_path))
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(table_name, copy_path))
 
         index_scan_slt_file.write("\nstatement ok\n")
         index_scan_slt_file.write(
diff --git a/tools/generate_limit.py b/tools/generate_limit.py
index f123787367..38c1004881 100644
--- a/tools/generate_limit.py
+++ b/tools/generate_limit.py
@@ -36,7 +36,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         slt_file.write("\n")
         slt_file.write("query I\n")
         slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(
                 table_name, copy_path
             )
         )
diff --git a/tools/generate_many_import.py b/tools/generate_many_import.py
index 14eeda0700..c39d4ab468 100644
--- a/tools/generate_many_import.py
+++ b/tools/generate_many_import.py
@@ -50,7 +50,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         for _ in range(import_n):
             slt_file.write("statement ok\n")
             slt_file.write(
-                "COPY {} FROM '{}{}' WITH ( DELIMITER ',' );\n".format(
+                "COPY {} FROM '{}{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(
                     table_name, copy_dir, csv_name
                 )
             )
diff --git a/tools/generate_many_import_drop.py b/tools/generate_many_import_drop.py
index 10fd3e1ccb..650036d10c 100644
--- a/tools/generate_many_import_drop.py
+++ b/tools/generate_many_import_drop.py
@@ -53,7 +53,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
             for _ in range(import_n):
                 slt_file.write("statement ok\n")
                 slt_file.write(
-                    "COPY {} FROM '{}{}' WITH ( DELIMITER ',' );\n".format(
+                    "COPY {} FROM '{}{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(
                         table_name, copy_dir, csv_name
                     )
                 )
diff --git a/tools/generate_sort.py b/tools/generate_sort.py
index 152832804e..b7b572183e 100644
--- a/tools/generate_sort.py
+++ b/tools/generate_sort.py
@@ -34,7 +34,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         slt_file.write("\n")
         slt_file.write("query I\n")
         slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(
                 table_name, copy_path
             )
         )
diff --git a/tools/generate_top.py b/tools/generate_top.py
index ab133e5aa6..c949868608 100644
--- a/tools/generate_top.py
+++ b/tools/generate_top.py
@@ -40,7 +40,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         top_slt_file.write("\n")
         top_slt_file.write("statement ok\n")
         top_slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(table_name, copy_path))
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(table_name, copy_path))
         top_slt_file.write("\nquery I\n")
         top_slt_file.write(
             "SELECT * FROM {} order by c1 - c1, c2 desc, c1 + c1 limit 10 offset 3330;\n".format(table_name))
diff --git a/tools/generate_top_varchar.py b/tools/generate_top_varchar.py
index e177b6a7cc..1b545f1de2 100644
--- a/tools/generate_top_varchar.py
+++ b/tools/generate_top_varchar.py
@@ -46,7 +46,7 @@ def generate(generate_if_exists: bool, copy_dir: str):
         top_slt_file.write("\n")
         top_slt_file.write("statement ok\n")
         top_slt_file.write(
-            "COPY {} FROM '{}' WITH ( DELIMITER ',' );\n".format(table_name, copy_path))
+            "COPY {} FROM '{}' WITH ( DELIMITER ',', FORMAT CSV );\n".format(table_name, copy_path))
 
         x.sort()
         for lim_off in limit_offset: