Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQLite like API mode of chDB #283

Merged
merged 36 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
84d099e
Fix result buf copy in query_stable_v2
auxten Nov 2, 2024
50f33a6
Impl chdb_conn connect_chdb close_conn query_conn
auxten Nov 2, 2024
d073583
Update .clang-tidy
auxten Nov 2, 2024
61b875c
Basically works
auxten Nov 2, 2024
6ca4dd6
Fix SCOPE_EXIT
auxten Nov 2, 2024
7f21a4b
Fix output format
auxten Nov 4, 2024
d82a1a7
Fix chdb.h decl
auxten Nov 4, 2024
744fa13
Minimal changes on ClientBase
auxten Nov 4, 2024
aa94638
Fix get_error_msg name to getErrorMsg
auxten Nov 4, 2024
6827908
No exception on save default_database for now
auxten Nov 7, 2024
7de1e76
Add parquet(arrow) dep for local
auxten Nov 7, 2024
5d578b8
Use ArrowStream in cursor mode
auxten Nov 7, 2024
6a1c0d8
Handle result vec in CH loop
auxten Nov 11, 2024
b259bec
Fix cursor_wrapper close
auxten Nov 11, 2024
a4b8d9c
Fix exception handling
auxten Nov 12, 2024
b8f44e3
If conn_str empty, use :memory:
auxten Nov 13, 2024
a1a8fdd
Add pyarrow and pandas as dep
auxten Nov 18, 2024
af9761e
Fix close_conn
auxten Nov 18, 2024
cef9c1b
Add sqlitelike API for python
auxten Nov 18, 2024
47f78c9
Add test_conn_cursor
auxten Nov 18, 2024
2365709
Add trace utils
auxten Nov 18, 2024
1399019
Add .cursorignore
auxten Nov 18, 2024
ca08ea0
Fix some pylint issue
auxten Nov 18, 2024
40e7143
Fix lint
auxten Nov 21, 2024
ab080f7
Add keep_buf switch for local_result_v2
auxten Nov 21, 2024
233c3b1
Use sqlitelike API in DBAPI
auxten Nov 21, 2024
401255b
Fix error in example
auxten Nov 21, 2024
238e019
Fix unittest for DBAPI and SQLite like API
auxten Nov 21, 2024
a82d028
Fix lint
auxten Nov 21, 2024
0f1ae8c
Fix Python 3.8
auxten Nov 22, 2024
ec37ebe
Handle parameters without values
auxten Nov 22, 2024
4a61bbe
Add getQueryOutputSpan
auxten Nov 22, 2024
7b293dd
Add doc string for chdb.connect
auxten Nov 22, 2024
da541d1
Fix double free
auxten Nov 22, 2024
b5945e1
Fix null check
auxten Nov 22, 2024
7c3cdde
Test connect properties
auxten Nov 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Checks: [
'-bugprone-not-null-terminated-result',
'-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
'-bugprone-unchecked-optional-access',
'-bugprone-crtp-constructor-accessibility',
'-bugprone-suspicious-stringview-data-usage',

'-cert-dcl16-c',
'-cert-dcl37-c',
Expand All @@ -36,6 +38,7 @@ Checks: [
'-cert-msc51-cpp',
'-cert-oop54-cpp',
'-cert-oop57-cpp',
'-cert-err33-c', # Misreports on clang-19: it warns about all functions containing 'remove' in the name, not only about the standard library.

'-clang-analyzer-optin.performance.Padding',

Expand Down Expand Up @@ -94,10 +97,12 @@ Checks: [
'-modernize-pass-by-value',
'-modernize-return-braced-init-list',
'-modernize-use-auto',
'-modernize-use-constraints', # This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872
'-modernize-use-default-member-init',
'-modernize-use-emplace',
'-modernize-use-nodiscard',
'-modernize-use-trailing-return-type',
'-modernize-use-designated-initializers',

'-performance-enum-size',
'-performance-inefficient-string-concatenation',
Expand All @@ -121,7 +126,8 @@ Checks: [
'-readability-magic-numbers',
'-readability-named-parameter',
'-readability-redundant-declaration',
'-readability-redundant-inline-specifier',
'-readability-redundant-inline-specifier', # useful but incompatible with __attribute((always_inline))__ (aka. ALWAYS_INLINE, base/base/defines.h).
# ALWAYS_INLINE only has an effect if combined with `inline`: https://godbolt.org/z/Eefd74qdM
'-readability-redundant-member-init', # Useful but triggers another problem. Imagine a struct S with multiple String members. Structs are often instantiated via designated
# initializer S s{.s1 = [...], .s2 = [...], [...]}. In this case, compiler warning `missing-field-initializers` requires to specify all members which are not in-struct
# initialized (example: s1 in struct S { String s1; String s2{};}; is not in-struct initialized, therefore it must be specified at instantiation time). As explicitly
Expand All @@ -132,12 +138,7 @@ Checks: [
'-readability-uppercase-literal-suffix',
'-readability-use-anyofallof',

'-zircon-*',

# This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872
'-modernize-use-constraints',
# https://github.com/abseil/abseil-cpp/issues/1667
'-clang-analyzer-optin.core.EnumCastOutOfRange'
'-zircon-*'
]

WarningsAsErrors: '*'
Expand Down Expand Up @@ -172,4 +173,4 @@ CheckOptions:
performance-move-const-arg.CheckTriviallyCopyableMove: false
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
readability-identifier-naming.TypeTemplateParameterIgnoredRegexp: expr-type
cppcoreguidelines-avoid-do-while.IgnoreMacros: true
cppcoreguidelines-avoid-do-while.IgnoreMacros: true
1 change: 1 addition & 0 deletions .cursorignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
contrib/
3 changes: 3 additions & 0 deletions chdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,10 @@ def query(sql, output_format="CSV", path="", udf_path=""):
PyReader = _chdb.PyReader

from . import dbapi, session, udf, utils # noqa: E402
from .state import connect # noqa: E402

__all__ = [
"_chdb",
"PyReader",
"ChdbError",
"query",
Expand All @@ -99,4 +101,5 @@ def query(sql, output_format="CSV", path="", udf_path=""):
"session",
"udf",
"utils",
"connect",
]
189 changes: 36 additions & 153 deletions chdb/dbapi/connections.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from . import err
from .cursors import Cursor
from . import converters
from ..state import sqlitelike as chdb_stateful

DEBUG = False
VERBOSE = False
Expand All @@ -10,152 +10,78 @@
class Connection(object):
"""
Representation of a connection with chdb.

The proper way to get an instance of this class is to call
connect().

Accepts several arguments:

:param cursorclass: Custom cursor class to use.
:param path: Optional folder path to store database files on disk.

See `Connection <https://www.python.org/dev/peps/pep-0249/#connection-objects>`_ in the
specification.
"""

_closed = False
_session = None

def __init__(self, cursorclass=Cursor, path=None):

self._resp = None

# 1. pre-process params in init
self.encoding = 'utf8'

self.cursorclass = cursorclass

self._result = None
def __init__(self, path=None):
self._closed = False
self.encoding = "utf8"
self._affected_rows = 0
self._resp = None

self.connect(path)
# Initialize sqlitelike connection
connection_string = ":memory:" if path is None else f"file:{path}"
self._conn = chdb_stateful.Connection(connection_string)

def connect(self, path=None):
from chdb import session as chs
self._session = chs.Session(path)
self._closed = False
self._execute_command("select 1;")
self._read_query_result()
# Test connection with a simple query
cursor = self._conn.cursor()
cursor.execute("SELECT 1")
cursor.close()

def close(self):
"""
Send the quit message and close the socket.

See `Connection.close() <https://www.python.org/dev/peps/pep-0249/#Connection.close>`_
in the specification.

:raise Error: If the connection is already closed.
"""
"""Send the quit message and close the socket."""
if self._closed:
raise err.Error("Already closed")
self._closed = True
self._session = None
self._conn.close()

@property
def open(self):
"""Return True if the connection is open"""
return not self._closed

def commit(self):
"""
Commit changes to stable storage.

See `Connection.commit() <https://www.python.org/dev/peps/pep-0249/#commit>`_
in the specification.
"""
return
"""Commit changes to stable storage."""
# No-op for ClickHouse
pass

def rollback(self):
"""
Roll back the current transaction.

See `Connection.rollback() <https://www.python.org/dev/peps/pep-0249/#rollback>`_
in the specification.
"""
return
"""Roll back the current transaction."""
# No-op for ClickHouse
pass

def cursor(self, cursor=None):
"""
Create a new cursor to execute queries with.

:param cursor: The type of cursor to create; current only :py:class:`Cursor`
None means use Cursor.
"""
"""Create a new cursor to execute queries with."""
if self._closed:
raise err.Error("Connection closed")
if cursor:
return cursor(self)
return self.cursorclass(self)
return Cursor(self)
return Cursor(self)

# The following methods are INTERNAL USE ONLY (called from Cursor)
def query(self, sql):
if isinstance(sql, str):
sql = sql.encode(self.encoding, 'surrogateescape')
self._execute_command(sql)
self._affected_rows = self._read_query_result()
return self._affected_rows

def _execute_command(self, sql):
"""
:raise InterfaceError: If the connection is closed.
:raise ValueError: If no username was specified.
"""
def query(self, sql, fmt="ArrowStream"):
"""Execute a query and return the raw result."""
if self._closed:
raise err.InterfaceError("Connection closed")

if isinstance(sql, str):
sql = sql.encode(self.encoding)
sql = sql.encode(self.encoding, "surrogateescape")

if isinstance(sql, bytearray):
sql = bytes(sql)

# drop last command return
if self._resp is not None:
self._resp = None

if DEBUG:
print("DEBUG: query:", sql)
try:
res = self._session.query(sql, fmt="JSON")
if res.has_error():
raise err.DatabaseError(res.error_message())
self._resp = res.data()
result = self._conn.query(sql.decode(), fmt)
self._resp = result
return result
except Exception as error:
raise err.InterfaceError("query err: %s" % error)
raise err.InterfaceError(f"Query error: {error}")

def escape(self, obj, mapping=None):
"""Escape whatever value you pass to it.

Non-standard, for internal use; do not use this in your applications.
"""
if isinstance(obj, str):
return "'" + self.escape_string(obj) + "'"
if isinstance(obj, (bytes, bytearray)):
ret = self._quote_bytes(obj)
return ret
return converters.escape_item(obj, mapping=mapping)
"""Escape whatever value you pass to it."""
return converters.escape_item(obj, mapping)

def escape_string(self, s):
return converters.escape_string(s)

def _quote_bytes(self, s):
return converters.escape_bytes(s)

def _read_query_result(self):
self._result = None
result = CHDBResult(self)
result.read()
self._result = result
return result.affected_rows

def __enter__(self):
"""Context manager that returns a Cursor"""
return self.cursor()
Expand All @@ -166,52 +92,9 @@ def __exit__(self, exc, value, traceback):
self.rollback()
else:
self.commit()
self.close()

@property
def resp(self):
"""Return the last query response"""
return self._resp


class CHDBResult(object):
def __init__(self, connection):
"""
:type connection: Connection
"""
self.connection = connection
self.affected_rows = 0
self.insert_id = None
self.warning_count = 0
self.message = None
self.field_count = 0
self.description = None
self.rows = None
self.has_next = None

def read(self):
# Handle empty responses (for instance from CREATE TABLE)
if self.connection.resp is None:
return

try:
data = json.loads(self.connection.resp)
except Exception as error:
raise err.InterfaceError("Unsupported response format:" % error)

try:
self.field_count = len(data["meta"])
description = []
for meta in data["meta"]:
fields = [meta["name"], meta["type"]]
description.append(tuple(fields))
self.description = tuple(description)

rows = []
for line in data["data"]:
row = []
for i in range(self.field_count):
column_data = converters.convert_column_data(self.description[i][1], line[self.description[i][0]])
row.append(column_data)
rows.append(tuple(row))
self.rows = tuple(rows)
except Exception as error:
raise err.InterfaceError("Read return data err:" % error)
Loading
Loading