diff --git a/.all-contributorsrc b/.all-contributorsrc deleted file mode 100644 index 1dc4e2223b6..00000000000 --- a/.all-contributorsrc +++ /dev/null @@ -1,62 +0,0 @@ -{ - "files": [ - "README.md" - ], - "imageSize": 100, - "commit": false, - "commitType": "docs", - "commitConvention": "angular", - "contributors": [ - { - "login": "auxten", - "name": "auxten", - "avatar_url": "https://avatars.githubusercontent.com/u/240147?v=4", - "profile": "http://auxten.com", - "contributions": [ - "code" - ] - }, - { - "login": "lmangani", - "name": "Lorenzo Mangani", - "avatar_url": "https://avatars.githubusercontent.com/u/1423657?v=4", - "profile": "https://metrico.in", - "contributions": [ - "code" - ] - }, - { - "login": "laodouya", - "name": "laodouya", - "avatar_url": "https://avatars.githubusercontent.com/u/4847103?v=4", - "profile": "https://github.com/laodouya", - "contributions": [ - "code" - ] - }, - { - "login": "nmreadelf", - "name": "nmreadelf", - "avatar_url": "https://avatars.githubusercontent.com/u/7260482?v=4", - "profile": "https://github.com/nmreadelf", - "contributions": [ - "code" - ] - }, - { - "login": "reema93jain", - "name": "reema93jain", - "avatar_url": "https://avatars.githubusercontent.com/u/113460610?v=4", - "profile": "https://github.com/reema93jain", - "contributions": [ - "test" - ] - } - ], - "contributorsPerLine": 7, - "skipCi": true, - "repoType": "github", - "repoHost": "https://github.com", - "projectName": "chdb", - "projectOwner": "chdb-io" -} diff --git a/.github/workflows/auto_release.yml b/.github/workflows/auto_release.yml deleted file mode 100644 index f1a6b307b40..00000000000 --- a/.github/workflows/auto_release.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: AutoRelease - -env: - # Force the stdout and stderr streams to be unbuffered - PYTHONUNBUFFERED: 1 - -concurrency: - group: auto-release -on: # yamllint disable-line rule:truthy - # schedule: - # - cron: '0 10-16 * * 1-5' - workflow_dispatch: - -jobs: - CherryPick: - runs-on: [self-hosted, style-checker-aarch64] - steps: - - name: Set envs - # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/cherry_pick - ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - BUILD_NAME=fuzzers - EOF - - name: Download changed images - # even if artifact does not exist, e.g. on `do not test` label or failed Docker job - continue-on-error: true - uses: actions/download-artifact@v3 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - submodules: true - ref: ${{github.ref}} - - name: Build - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v3 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - libFuzzerTest: - needs: [BuilderFuzzers] - runs-on: [self-hosted, func-tester] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/libfuzzer - REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=libFuzzer tests - REPO_COPY=${{runner.temp}}/libfuzzer/ClickHouse - KILL_TIMEOUT=10800 - EOF - - name: Download changed images - # even if artifact does not exist, e.g. on `do not test` label or failed Docker job - continue-on-error: true - uses: actions/download-artifact@v3 - with: - name: changed_images - path: ${{ env.TEMP_PATH }} - - name: Download json reports - uses: actions/download-artifact@v3 - with: - path: ${{ env.REPORTS_PATH }} - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - - name: libFuzzer test - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" - python3 libfuzzer_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/pr_ci.yaml b/.github/workflows/pr_ci.yaml new file mode 100644 index 00000000000..1e9c779259d --- /dev/null +++ b/.github/workflows/pr_ci.yaml @@ -0,0 +1,75 @@ +name: Pull-CI + +on: + pull_request: + types: [opened, synchronize] + +jobs: + build: + env: + PYTHON_VERSIONS: "3.11" + + runs-on: self-hosted + steps: + - name: Check for chdb directory + run: | + if [ ! -d "/home/ubuntu/pr_runner/chdb" ]; then + echo "chdb directory does not exist. Checkout the repository." + mkdir -p /home/ubuntu/pr_runner/ + git clone https://github.com/chdb-io/chdb.git /home/ubuntu/pr_runner/chdb + fi + + - name: Check for ccache status + run: | + ccache -sv + + - name: Copy submodules + run: cp -a /builder_cache/contrib /home/ubuntu/pr_runner/chdb/ + + - name: Cleanup and update chdb directory + run: | + cd /home/ubuntu/pr_runner/chdb + git fetch origin || true + git reset --hard origin/${{ github.head_ref }} || true + git clean -fdx || true + git checkout -f --progress ${{ github.head_ref }} || true + git status -v || true + continue-on-error: true + + - name: Code style check + run: | + export PYENV_ROOT="$HOME/.pyenv" + [[ -d $PYENV_ROOT/bin ]] && export PATH="$PYENV_ROOT/bin:$PATH" + eval "$(pyenv init -)" + pyenv local 3.11 + python3 -m pip install flake8 + cd chdb && python3 -m flake8 + working-directory: /home/ubuntu/pr_runner/chdb + + - name: Cleanup dist directory + run: rm -rf /home/ubuntu/pr_runner/chdb/dist/* + + - name: Set PYTHON_VERSIONS environment variable + run: echo "PYTHON_VERSIONS=3.11" >> $GITHUB_ENV + + - name: Run build script + run: bash -x ./chdb/build_linux_arm64.sh + working-directory: /home/ubuntu/pr_runner/chdb + + - name: Check ccache statistics + run: | + ccache -s + ls -lh chdb + df -h + working-directory: /home/ubuntu/pr_runner/chdb + + - name: Audit wheels + run: | + export PYENV_ROOT="$HOME/.pyenv" + [[ -d $PYENV_ROOT/bin ]] && export PATH="$PYENV_ROOT/bin:$PATH" + eval "$(pyenv init -)" + pyenv local 3.11 + ls -lh dist + python3 -m pip install auditwheel + python3 -m auditwheel -v repair -w dist/ --plat manylinux_2_17_aarch64 dist/*.whl + working-directory: /home/ubuntu/pr_runner/chdb diff --git a/.github/workflows/pull_request_approved.yml b/.github/workflows/pull_request_approved.yml deleted file mode 100644 index 3de4978ad68..00000000000 --- a/.github/workflows/pull_request_approved.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: PullRequestApprovedCI - -env: - # Force the stdout and stderr streams to be unbuffered - PYTHONUNBUFFERED: 1 - -on: # yamllint disable-line rule:truthy - pull_request_review: - types: - - submitted - -jobs: - MergeOnApproval: - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - - name: Merge approved PR - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 merge_pr.py --check-approved diff --git a/chdb/.flake8 b/chdb/.flake8 new file mode 100644 index 00000000000..fa9f8a89ada --- /dev/null +++ b/chdb/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 130 +extend-ignore = E722 diff --git a/chdb/__init__.py b/chdb/__init__.py index 645b4535ac6..b0ce8fc62cb 100644 --- a/chdb/__init__.py +++ b/chdb/__init__.py @@ -2,11 +2,15 @@ import os +class ChdbError(Exception): + """Base class for exceptions in this module.""" + + _arrow_format = set({"dataframe", "arrowtable"}) _process_result_format_funs = { - "dataframe" : lambda x : to_df(x), - "arrowtable": lambda x : to_arrowTable(x) - } + "dataframe": lambda x: to_df(x), + "arrowtable": lambda x: to_arrowTable(x) +} # If any UDF is defined, the path of the UDF will be set to this variable # and the path will be deleted when the process exits @@ -33,7 +37,7 @@ # Change here if project is renamed and does not equal the package name dist_name = __name__ __version__ = ".".join(map(str, chdb_version)) -except: # pragma: no cover +except: # noqa __version__ = "unknown" @@ -42,8 +46,8 @@ def to_arrowTable(res): """convert res to arrow table""" # try import pyarrow and pandas, if failed, raise ImportError with suggestion try: - import pyarrow as pa - import pandas + import pyarrow as pa # noqa + import pandas as pd # noqa except ImportError as e: print(f"ImportError: {e}") print('Please install pyarrow and pandas via "pip install pyarrow pandas"') @@ -66,10 +70,14 @@ def query(sql, output_format="CSV", path="", udf_path=""): if udf_path != "": g_udf_path = udf_path lower_output_format = output_format.lower() - result_func = _process_result_format_funs.get(lower_output_format, lambda x : x) + result_func = _process_result_format_funs.get(lower_output_format, lambda x: x) if lower_output_format in _arrow_format: output_format = "Arrow" res = _chdb.query(sql, output_format, path=path, udf_path=g_udf_path) if res.has_error(): - raise Exception(res.error_message()) + raise ChdbError(res.error_message()) return result_func(res) + + +__all__ = ["ChdbError", "query", "chdb_version", + "engine_version", "to_df", "to_arrowTable"] diff --git a/chdb/__main__.py b/chdb/__main__.py index caa6fda079b..eaf164c24eb 100644 --- a/chdb/__main__.py +++ b/chdb/__main__.py @@ -1,7 +1,7 @@ -import sys import argparse from .__init__ import query + def main(): prog = 'python -m chdb' description = ('''A simple command line interface for chdb @@ -27,5 +27,6 @@ def main(): temp = res.data() print(temp, end="") + if __name__ == '__main__': main() diff --git a/chdb/dataframe/__init__.py b/chdb/dataframe/__init__.py index e45dd670e0c..e74e32a30af 100644 --- a/chdb/dataframe/__init__.py +++ b/chdb/dataframe/__init__.py @@ -1,7 +1,7 @@ # try import pyarrow and pandas, if failed, raise ImportError with suggestion try: - import pyarrow as pa - import pandas as pd + import pyarrow as pa # noqa + import pandas as pd # noqa except ImportError as e: print(f'ImportError: {e}') print('Please install pyarrow and pandas via "pip install pyarrow pandas"') @@ -11,7 +11,7 @@ if pd.__version__[0] < '2': print('Please upgrade pandas to version 2.0.0 or higher to have better performance') -from .query import Table, pandas_read_parquet # noqa: C0413 +from .query import Table, pandas_read_parquet # noqa: C0413 query = Table.queryStatic diff --git a/chdb/dataframe/query.py b/chdb/dataframe/query.py index 6482da429e3..37bae7f679c 100644 --- a/chdb/dataframe/query.py +++ b/chdb/dataframe/query.py @@ -327,7 +327,7 @@ def memfd_create(name: str = None) -> int: try: fd = os.memfd_create(name, flags=os.MFD_CLOEXEC) return fd - except: + except: # noqa return -1 return -1 diff --git a/chdb/dbapi/__init__.py b/chdb/dbapi/__init__.py index d0ebfda937e..ffce07c1b7e 100644 --- a/chdb/dbapi/__init__.py +++ b/chdb/dbapi/__init__.py @@ -1,9 +1,4 @@ -from .converters import escape_dict, escape_sequence, escape_string from .constants import FIELD_TYPE -from .err import ( - Warning, Error, InterfaceError, DataError, - DatabaseError, OperationalError, IntegrityError, InternalError, - NotSupportedError, ProgrammingError) from . import connections as _orig_conn from .. import chdb_version diff --git a/chdb/dbapi/constants/FIELD_TYPE.py b/chdb/dbapi/constants/FIELD_TYPE.py index 2bc7713424a..b8b4486605f 100644 --- a/chdb/dbapi/constants/FIELD_TYPE.py +++ b/chdb/dbapi/constants/FIELD_TYPE.py @@ -29,4 +29,3 @@ CHAR = TINY INTERVAL = ENUM - diff --git a/chdb/dbapi/cursors.py b/chdb/dbapi/cursors.py index c8f67a77337..ee9e0fa5e8c 100644 --- a/chdb/dbapi/cursors.py +++ b/chdb/dbapi/cursors.py @@ -295,4 +295,3 @@ def _conv_row(self, row): if row is None: return None return self.dict_type(zip(self._fields, row)) - diff --git a/chdb/dbapi/times.py b/chdb/dbapi/times.py index 9afa599677a..4497dacf6a9 100644 --- a/chdb/dbapi/times.py +++ b/chdb/dbapi/times.py @@ -18,4 +18,3 @@ def TimeFromTicks(ticks): def TimestampFromTicks(ticks): return datetime(*localtime(ticks)[:6]) - diff --git a/chdb/session/__init__.py b/chdb/session/__init__.py index 97ee55ff812..73cbc079fbe 100644 --- a/chdb/session/__init__.py +++ b/chdb/session/__init__.py @@ -1 +1,3 @@ -from .state import * \ No newline at end of file +from .state import Session + +__all__ = ["Session"] diff --git a/chdb/session/state.py b/chdb/session/state.py index 767197a8848..fa478221ebd 100644 --- a/chdb/session/state.py +++ b/chdb/session/state.py @@ -1,7 +1,7 @@ import tempfile import shutil -from chdb import query, g_udf_path +from chdb import query class Session: @@ -37,7 +37,7 @@ def __exit__(self, exc_type, exc_value, traceback): def cleanup(self): try: shutil.rmtree(self._path) - except: + except: # noqa pass def query(self, sql, fmt="CSV"): diff --git a/chdb/udf/__init__.py b/chdb/udf/__init__.py index ed5e929ed1c..89bb588b7c8 100644 --- a/chdb/udf/__init__.py +++ b/chdb/udf/__init__.py @@ -1 +1,3 @@ -from .udf import * \ No newline at end of file +from .udf import chdb_udf, generate_udf + +__all__ = ["chdb_udf", "generate_udf"] diff --git a/chdb/udf/udf.py b/chdb/udf/udf.py index bc7c1477be5..a92b0de4433 100644 --- a/chdb/udf/udf.py +++ b/chdb/udf/udf.py @@ -82,7 +82,7 @@ def decorator(func): args = list(sig.parameters.keys()) src = inspect.getsource(func) src = textwrap.dedent(src) - udf_body = src.split("\n", 1)[1] # remove the first line "@chdb_udf()" + udf_body = src.split("\n", 1)[1] # remove the first line "@chdb_udf()" # create tmp dir and make sure the dir is deleted when the process exits if chdb.g_udf_path == "": chdb.g_udf_path = tempfile.mkdtemp() @@ -92,7 +92,7 @@ def decorator(func): def _cleanup(): try: shutil.rmtree(chdb.g_udf_path) - except: + except: # noqa pass generate_udf(func_name, args, return_type, udf_body) diff --git a/tox.ini b/tox.ini index c7c9ef0dbfd..7008c858047 100644 --- a/tox.ini +++ b/tox.ini @@ -7,9 +7,6 @@ minversion = 3.24 envlist = py38, py39, py310, py311 isolated_build = True -[flake8] -max-line-length = 120 - [testenv] description = Build and test the package setenv =