Skip to content

Commit

Permalink
Merge pull request #13 from auxten/hotfix-pyarrow-dep
Browse files Browse the repository at this point in the history
Try import pyarrow and pandas when necessary
  • Loading branch information
auxten authored Apr 18, 2023
2 parents a929964 + cd14bf7 commit 028329b
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 17 deletions.
16 changes: 11 additions & 5 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
name: Build

on: [push, pull_request]
on:
push:
tags:
- 'v*'
pull_request:
branches:
- pybind

jobs:
build_wheels_linux:
Expand Down Expand Up @@ -82,6 +88,7 @@ jobs:
export CC=/usr/bin/clang
export CXX=/usr/bin/clang++
bash ./chdb/build.sh
python3 -m pip install pandas pyarrow
bash -x ./chdb/test_smoke.sh
continue-on-error: false
- name: Check ccache statistics
Expand Down Expand Up @@ -148,7 +155,6 @@ jobs:
run: |
pwd
uname -a
export HOMEBREW_NO_AUTO_UPDATE=1
export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
brew install git ccache ninja libtool gettext llvm@15 gcc binutils grep findutils zstd
export PATH=$(brew --prefix llvm@15)/bin:$PATH
Expand Down Expand Up @@ -193,12 +199,13 @@ jobs:
export CXX=$(brew --prefix llvm@15)/bin/clang++
bash gen_manifest.sh
bash ./chdb/build.sh
python3 -m pip install pandas pyarrow
bash -x ./chdb/test_smoke.sh
continue-on-error: false
- name: Keep killall ccache and wait for ccache to finish
if: always()
run: |
sleep 300
sleep 60
while ps -ef | grep ccache | grep -v grep; do \
killall ccache; \
sleep 10; \
Expand Down Expand Up @@ -261,7 +268,6 @@ jobs:
run: |
pwd
uname -a
export HOMEBREW_NO_AUTO_UPDATE=1
export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
brew install git ccache ninja libtool gettext llvm@15 gcc binutils grep findutils zstd
export PATH=$(brew --prefix llvm@15)/bin:$PATH
Expand Down Expand Up @@ -349,7 +355,7 @@ jobs:
- name: Keep killall ccache and wait for ccache to finish
if: always()
run: |
sleep 300
sleep 60
while ps -ef | grep ccache | grep -v grep; do \
killall ccache; \
sleep 10; \
Expand Down
30 changes: 21 additions & 9 deletions chdb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import sys
import os
import pyarrow as pa

chdb_version = (0, 1, 0)
chdb_version = (0, 5, 0)
if sys.version_info[:2] >= (3, 7):
# get the path of the current file
current_path = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -23,19 +22,32 @@
except: # pragma: no cover
__version__ = "unknown"


def _to_arrowTable(res):
# return pyarrow table
def to_arrowTable(res):
"""convert res to arrow table"""
# try import pyarrow and pandas, if failed, raise ImportError with suggestion
try:
import pyarrow as pa
import pandas
except ImportError as e:
print(f'ImportError: {e}')
print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
raise ImportError('Failed to import pyarrow or pandas') from None

return pa.RecordBatchFileReader(res.get_memview()).read_all()

# return pandas dataframe
def to_df(r):
""""convert arrow table to Dataframe"""
t = _to_arrowTable(r)
t = to_arrowTable(r)
return t.to_pandas(use_threads=True)

# wrap _chdb functions
def query(sql, output_format="CSV", **kwargs):
if output_format.lower() == "dataframe":
r = _chdb.query(sql, "Arrow", **kwargs)
return to_df(r)
return _chdb.query(sql, output_format, **kwargs)
lower_output_format = output_format.lower()
if lower_output_format == "dataframe":
return to_df(_chdb.query(sql, "Arrow", **kwargs))
elif lower_output_format == 'arrowtable':
return to_arrowTable(_chdb.query(sql, "Arrow", **kwargs))
else:
return _chdb.query(sql, output_format, **kwargs)
23 changes: 20 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import sys
import re
import subprocess
import sysconfig
from setuptools import setup, Extension
Expand Down Expand Up @@ -57,6 +58,21 @@ def get_latest_git_tag(minor_ver_auto=False):
print(e)
raise

# replace the version in chdb/__init__.py, which is `chdb_version = (0, 1, 0)` by default
# regex replace the version string `chdb_version = (0, 1, 0)` with version parts
def fix_version_init(version):
# split version string into parts
p1, p2, p3 = version.split('.')
init_file = os.path.join(script_dir, "chdb", "__init__.py")
with open(init_file, "r+") as f:
init_content = f.read()
# regex replace the version string `chdb_version = (0, 1, 0)`
regPattern = r"chdb_version = \(\d+, \d+, \d+\)"
init_content = re.sub(regPattern, f"chdb_version = ({p1}, {p2}, {p3})", init_content)
f.seek(0)
f.write(init_content)
f.truncate()


# As of Python 3.6, CCompiler has a `has_flag` method.
# cf http://bugs.python.org/issue26689
Expand Down Expand Up @@ -147,15 +163,16 @@ def build_extensions(self):
extra_objects=[chdb_so],
),
]

# fix the version in chdb/__init__.py
versionStr = get_latest_git_tag()
fix_version_init(versionStr)
setup(
packages=['chdb'],
version=get_latest_git_tag(),
version=versionStr,
package_data={'chdb': [chdb_so]},
exclude_package_data={'': ['*.pyc', 'src/**']},
ext_modules=ext_modules,
python_requires='>=3.7',
install_requires=['pyarrow', 'pandas'],
cmdclass={'build_ext': BuildExt},
test_suite="tests",
zip_safe=False,
Expand Down

0 comments on commit 028329b

Please sign in to comment.