diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index ef554a1b0ff6..6b05f1fe9f43 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -138,19 +138,26 @@ jobs: ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ - DockerServerImages: + DockerServerImage: needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Docker server and keeper images + test_name: Docker server image runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself run_command: | - cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse + DockerKeeperImage: + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker keeper image + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + run_command: | python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6d150f37a27c..5d57e6fc1d82 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -35,7 +35,7 @@ jobs: - name: PrepareRunConfig id: runconfig run: | - python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --rebuild-all-binaries --outfile ${{ runner.temp }}/ci_run_data.json + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json echo "::group::CI configuration" python3 -m json.tool ${{ runner.temp }}/ci_run_data.json @@ -242,20 +242,26 @@ jobs: ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ - DockerServerImages: + DockerServerImage: needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Docker server and keeper images + test_name: Docker server image runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - # FIXME: avoid using 0 checkout - checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself run_command: | - cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head \ --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse + DockerKeeperImage: + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker keeper image + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + run_command: | python3 docker_server.py --release-type head \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ @@ -313,22 +319,15 @@ jobs: run_command: | python3 build_report_check.py "$CHECK_NAME" MarkReleaseReady: + needs: [RunConfig, BuilderBinDarwin, BuilderBinDarwinAarch64, BuilderDebRelease, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} - needs: - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - - BuilderDebRelease - - BuilderDebAarch64 - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - - name: Mark Commit Release Ready - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 mark_release_ready.py + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Mark Commit Release Ready + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + run_command: | + python3 mark_release_ready.py ############################################################################################ #################################### INSTALL PACKAGES ###################################### ############################################################################################ diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index b3ac2135e503..9c08363f674c 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -104,7 +104,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Fast tests + test_name: Fast test runner_type: builder data: ${{ needs.RunConfig.outputs.data }} run_command: | @@ -273,19 +273,26 @@ jobs: ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ - DockerServerImages: + DockerServerImage: needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Docker server and keeper images + test_name: Docker server image runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself run_command: | - cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse + DockerKeeperImage: + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker keeper image + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + run_command: | python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ @@ -484,11 +491,11 @@ jobs: run_command: | TEMP_PATH="${TEMP_PATH}/integration" \ python3 integration_test_check.py "Integration $CHECK_NAME" \ - --validate-bugfix --post-commit-status=file || echo 'ignore exit code' + --validate-bugfix --post-commit-status=file || echo "ignore exit code" TEMP_PATH="${TEMP_PATH}/stateless" \ python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \ - --validate-bugfix --post-commit-status=file || echo 'ignore exit code' + --validate-bugfix --post-commit-status=file || echo "ignore exit code" python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/functional_commit_status.tsv" "${TEMP_PATH}/integration/integration_commit_status.tsv" ############################################################################################## diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 69229ef75df0..c5d8294b999f 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -41,7 +41,7 @@ jobs: id: runconfig run: | echo "::group::configure CI run" - python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --rebuild-all-binaries --outfile ${{ runner.temp }}/ci_run_data.json + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json echo "::endgroup::" echo "::group::CI run configure results" python3 -m json.tool ${{ runner.temp }}/ci_run_data.json @@ -153,19 +153,26 @@ jobs: ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ - DockerServerImages: + DockerServerImage: needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Docker server and keeper images + test_name: Docker server image runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 run_command: | - cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse + DockerKeeperImage: + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker keeper image + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + run_command: | python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ @@ -456,7 +463,8 @@ jobs: FinishCheck: if: ${{ !failure() && !cancelled() }} needs: - - DockerServerImages + - DockerServerImage + - DockerKeeperImage - BuilderReport - BuilderSpecialReport - MarkReleaseReady diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index 0a3945829ca5..e4fc9f0b1d38 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -55,7 +55,7 @@ jobs: python3 ./utils/security-generator/generate_security.py > SECURITY.md git diff HEAD - name: Create Pull Request - uses: peter-evans/create-pull-request@v3 + uses: peter-evans/create-pull-request@v6 with: author: "robot-clickhouse " token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} diff --git a/.gitmodules b/.gitmodules index 68016bf8c5bf..a618104f3642 100644 --- a/.gitmodules +++ b/.gitmodules @@ -99,7 +99,7 @@ url = https://github.com/awslabs/aws-c-event-stream [submodule "aws-c-common"] path = contrib/aws-c-common - url = https://github.com/ClickHouse/aws-c-common + url = https://github.com/awslabs/aws-c-common.git [submodule "aws-checksums"] path = contrib/aws-checksums url = https://github.com/awslabs/aws-checksums diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b36142cc9fd..50db3292ca88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,2164 +1,178 @@ ### Table of Contents -**[ClickHouse release v23.12, 2023-12-28](#2312)**
-**[ClickHouse release v23.11, 2023-12-06](#2311)**
-**[ClickHouse release v23.10, 2023-11-02](#2310)**
-**[ClickHouse release v23.9, 2023-09-28](#239)**
-**[ClickHouse release v23.8 LTS, 2023-08-31](#238)**
-**[ClickHouse release v23.7, 2023-07-27](#237)**
-**[ClickHouse release v23.6, 2023-06-30](#236)**
-**[ClickHouse release v23.5, 2023-06-08](#235)**
-**[ClickHouse release v23.4, 2023-04-26](#234)**
-**[ClickHouse release v23.3 LTS, 2023-03-30](#233)**
-**[ClickHouse release v23.2, 2023-02-23](#232)**
-**[ClickHouse release v23.1, 2023-01-25](#231)**
-**[Changelog for 2022](https://clickhouse.com/docs/en/whats-new/changelog/2022/)**
+**[ClickHouse release v24.1, 2024-01-30](#241)**
+**[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**
-# 2023 Changelog +# 2024 Changelog -### ClickHouse release 23.12, 2023-12-28 +### ClickHouse release 24.1, 2024-01-30 -#### Backward Incompatible Change -* Fix check for non-deterministic functions in TTL expressions. Previously, you could create a TTL expression with non-deterministic functions in some cases, which could lead to undefined behavior later. This fixes [#37250](https://github.com/ClickHouse/ClickHouse/issues/37250). Disallow TTL expressions that don't depend on any columns of a table by default. It can be allowed back by `SET allow_suspicious_ttl_expressions = 1` or `SET compatibility = '23.11'`. Closes [#37286](https://github.com/ClickHouse/ClickHouse/issues/37286). [#51858](https://github.com/ClickHouse/ClickHouse/pull/51858) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for the `OPTIMIZE` is not allowed by default (it can be unlocked with the `allow_experimental_replacing_merge_with_cleanup` setting). [#58267](https://github.com/ClickHouse/ClickHouse/pull/58267) ([Alexander Tokmakov](https://github.com/tavplubix)). This fixes [#57930](https://github.com/ClickHouse/ClickHouse/issues/57930). This closes [#54988](https://github.com/ClickHouse/ClickHouse/issues/54988). This closes [#54570](https://github.com/ClickHouse/ClickHouse/issues/54570). This closes [#50346](https://github.com/ClickHouse/ClickHouse/issues/50346). This closes [#47579](https://github.com/ClickHouse/ClickHouse/issues/47579). The feature has to be removed because it is not good. We have to remove it as quickly as possible, because there is no other option. [#57932](https://github.com/ClickHouse/ClickHouse/pull/57932) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### New Feature -* Implement Refreshable Materialized Views, requested in [#33919](https://github.com/ClickHouse/ClickHouse/issues/33919). [#56946](https://github.com/ClickHouse/ClickHouse/pull/56946) ([Michael Kolupaev](https://github.com/al13n321), [Michael Guzov](https://github.com/koloshmet)). -* Introduce `PASTE JOIN`, which allows users to join tables without `ON` clause simply by row numbers. Example: `SELECT * FROM (SELECT number AS a FROM numbers(2)) AS t1 PASTE JOIN (SELECT number AS a FROM numbers(2) ORDER BY a DESC) AS t2`. [#57995](https://github.com/ClickHouse/ClickHouse/pull/57995) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* The `ORDER BY` clause now supports specifying `ALL`, meaning that ClickHouse sorts by all columns in the `SELECT` clause. Example: `SELECT col1, col2 FROM tab WHERE [...] ORDER BY ALL`. [#57875](https://github.com/ClickHouse/ClickHouse/pull/57875) ([zhongyuankai](https://github.com/zhongyuankai)). -* Added a new mutation command `ALTER TABLE APPLY DELETED MASK`, which allows to enforce applying of mask written by lightweight delete and to remove rows marked as deleted from disk. [#57433](https://github.com/ClickHouse/ClickHouse/pull/57433) ([Anton Popov](https://github.com/CurtizJ)). -* A handler `/binary` opens a visual viewer of symbols inside the ClickHouse binary. [#58211](https://github.com/ClickHouse/ClickHouse/pull/58211) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added a new SQL function `sqid` to generate Sqids (https://sqids.org/), example: `SELECT sqid(125, 126)`. [#57512](https://github.com/ClickHouse/ClickHouse/pull/57512) ([Robert Schulze](https://github.com/rschu1ze)). -* Add a new function `seriesPeriodDetectFFT` to detect series period using FFT. [#57574](https://github.com/ClickHouse/ClickHouse/pull/57574) ([Bhavna Jindal](https://github.com/bhavnajindal)). -* Add an HTTP endpoint for checking if Keeper is ready to accept traffic. [#55876](https://github.com/ClickHouse/ClickHouse/pull/55876) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* Add 'union' mode for schema inference. In this mode the resulting table schema is the union of all files schemas (so schema is inferred from each file). The mode of schema inference is controlled by a setting `schema_inference_mode` with two possible values - `default` and `union`. Closes [#55428](https://github.com/ClickHouse/ClickHouse/issues/55428). [#55892](https://github.com/ClickHouse/ClickHouse/pull/55892) ([Kruglov Pavel](https://github.com/Avogar)). -* Add new setting `input_format_csv_try_infer_numbers_from_strings` that allows to infer numbers from strings in CSV format. Closes [#56455](https://github.com/ClickHouse/ClickHouse/issues/56455). [#56859](https://github.com/ClickHouse/ClickHouse/pull/56859) ([Kruglov Pavel](https://github.com/Avogar)). -* When the number of databases or tables exceeds a configurable threshold, show a warning to the user. [#57375](https://github.com/ClickHouse/ClickHouse/pull/57375) ([凌涛](https://github.com/lingtaolf)). -* Dictionary with `HASHED_ARRAY` (and `COMPLEX_KEY_HASHED_ARRAY`) layout supports `SHARDS` similarly to `HASHED`. [#57544](https://github.com/ClickHouse/ClickHouse/pull/57544) ([vdimir](https://github.com/vdimir)). -* Add asynchronous metrics for total primary key bytes and total allocated primary key bytes in memory. [#57551](https://github.com/ClickHouse/ClickHouse/pull/57551) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `SHA512_256` function. [#57645](https://github.com/ClickHouse/ClickHouse/pull/57645) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `FORMAT_BYTES` as an alias for `formatReadableSize`. [#57592](https://github.com/ClickHouse/ClickHouse/pull/57592) ([Bharat Nallan](https://github.com/bharatnc)). -* Allow passing optional session token to the `s3` table function. [#57850](https://github.com/ClickHouse/ClickHouse/pull/57850) ([Shani Elharrar](https://github.com/shanielh)). -* Introduce a new setting `http_make_head_request`. If it is turned off, the URL table engine will not do a HEAD request to determine the file size. This is needed to support inefficient, misconfigured, or not capable HTTP servers. [#54602](https://github.com/ClickHouse/ClickHouse/pull/54602) ([Fionera](https://github.com/fionera)). -* It is now possible to refer to ALIAS column in index (non-primary-key) definitions (issue [#55650](https://github.com/ClickHouse/ClickHouse/issues/55650)). Example: `CREATE TABLE tab(col UInt32, col_alias ALIAS col + 1, INDEX idx (col_alias) TYPE minmax) ENGINE = MergeTree ORDER BY col;`. [#57546](https://github.com/ClickHouse/ClickHouse/pull/57546) ([Robert Schulze](https://github.com/rschu1ze)). -* Added a new setting `readonly` which can be used to specify an S3 disk is read only. It can be useful to create a table on a disk of `s3_plain` type, while having read only access to the underlying S3 bucket. [#57977](https://github.com/ClickHouse/ClickHouse/pull/57977) ([Pengyuan Bian](https://github.com/bianpengyuan)). -* The primary key analysis in MergeTree tables will now be applied to predicates that include the virtual column `_part_offset` (optionally with `_part`). This feature can serve as a special kind of a secondary index. [#58224](https://github.com/ClickHouse/ClickHouse/pull/58224) ([Amos Bird](https://github.com/amosbird)). - -#### Performance Improvement -* Extract non-intersecting parts ranges from MergeTree table during FINAL processing. That way we can avoid additional FINAL logic for this non-intersecting parts ranges. In case when amount of duplicate values with same primary key is low, performance will be almost the same as without FINAL. Improve reading performance for MergeTree FINAL when `do_not_merge_across_partitions_select_final` setting is set. [#58120](https://github.com/ClickHouse/ClickHouse/pull/58120) ([Maksim Kita](https://github.com/kitaisreal)). -* Made copy between s3 disks using a s3-server-side copy instead of copying through the buffer. Improves `BACKUP/RESTORE` operations and `clickhouse-disks copy` command. [#56744](https://github.com/ClickHouse/ClickHouse/pull/56744) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Hash JOIN respects setting `max_joined_block_size_rows` and do not produce large blocks for `ALL JOIN`. [#56996](https://github.com/ClickHouse/ClickHouse/pull/56996) ([vdimir](https://github.com/vdimir)). -* Release memory for aggregation earlier. This may avoid unnecessary external aggregation. [#57691](https://github.com/ClickHouse/ClickHouse/pull/57691) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Improve performance of string serialization. [#57717](https://github.com/ClickHouse/ClickHouse/pull/57717) ([Maksim Kita](https://github.com/kitaisreal)). -* Support trivial count optimization for `Merge`-engine tables. [#57867](https://github.com/ClickHouse/ClickHouse/pull/57867) ([skyoct](https://github.com/skyoct)). -* Optimized aggregation in some cases. [#57872](https://github.com/ClickHouse/ClickHouse/pull/57872) ([Anton Popov](https://github.com/CurtizJ)). -* The `hasAny` function can now take advantage of the full-text skipping indices. [#57878](https://github.com/ClickHouse/ClickHouse/pull/57878) ([Jpnock](https://github.com/Jpnock)). -* Function `if(cond, then, else)` (and its alias `cond ? then : else`) were optimized to use branch-free evaluation. [#57885](https://github.com/ClickHouse/ClickHouse/pull/57885) ([zhanglistar](https://github.com/zhanglistar)). -* MergeTree automatically derive `do_not_merge_across_partitions_select_final` setting if partition key expression contains only columns from primary key expression. [#58218](https://github.com/ClickHouse/ClickHouse/pull/58218) ([Maksim Kita](https://github.com/kitaisreal)). -* Speedup `MIN` and `MAX` for native types. [#58231](https://github.com/ClickHouse/ClickHouse/pull/58231) ([Raúl Marín](https://github.com/Algunenano)). -* Implement `SLRU` cache policy for filesystem cache. [#57076](https://github.com/ClickHouse/ClickHouse/pull/57076) ([Kseniia Sumarokova](https://github.com/kssenii)). -* The limit for the number of connections per endpoint for background fetches was raised from `15` to the value of `background_fetches_pool_size` setting. - MergeTree-level setting `replicated_max_parallel_fetches_for_host` became obsolete - MergeTree-level settings `replicated_fetches_http_connection_timeout`, `replicated_fetches_http_send_timeout` and `replicated_fetches_http_receive_timeout` are moved to the Server-level. - Setting `keep_alive_timeout` is added to the list of Server-level settings. [#57523](https://github.com/ClickHouse/ClickHouse/pull/57523) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Make querying `system.filesystem_cache` not memory intensive. [#57687](https://github.com/ClickHouse/ClickHouse/pull/57687) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Reduce memory usage on strings deserialization. [#57787](https://github.com/ClickHouse/ClickHouse/pull/57787) ([Maksim Kita](https://github.com/kitaisreal)). -* More efficient constructor for Enum - it makes sense when Enum has a boatload of values. [#57887](https://github.com/ClickHouse/ClickHouse/pull/57887) ([Duc Canh Le](https://github.com/canhld94)). -* An improvement for reading from the filesystem cache: always use `pread` method. [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)). -* Add optimization for AND notEquals chain in logical expression optimizer. This optimization is only available with the experimental Analyzer enabled. [#58214](https://github.com/ClickHouse/ClickHouse/pull/58214) ([Kevin Mingtarja](https://github.com/kevinmingtarja)). - -#### Improvement -* Support for soft memory limit in Keeper. It will refuse requests if the memory usage is close to the maximum. [#57271](https://github.com/ClickHouse/ClickHouse/pull/57271) ([Han Fei](https://github.com/hanfei1991)). [#57699](https://github.com/ClickHouse/ClickHouse/pull/57699) ([Han Fei](https://github.com/hanfei1991)). -* Make inserts into distributed tables handle updated cluster configuration properly. When the list of cluster nodes is dynamically updated, the Directory Monitor of the distribution table will update it. [#42826](https://github.com/ClickHouse/ClickHouse/pull/42826) ([zhongyuankai](https://github.com/zhongyuankai)). -* Do not allow creating a replicated table with inconsistent merge parameters. [#56833](https://github.com/ClickHouse/ClickHouse/pull/56833) ([Duc Canh Le](https://github.com/canhld94)). -* Show uncompressed size in `system.tables`. [#56618](https://github.com/ClickHouse/ClickHouse/issues/56618). [#57186](https://github.com/ClickHouse/ClickHouse/pull/57186) ([Chen Lixiang](https://github.com/chenlx0)). -* Add `skip_unavailable_shards` as a setting for `Distributed` tables that is similar to the corresponding query-level setting. Closes [#43666](https://github.com/ClickHouse/ClickHouse/issues/43666). [#57218](https://github.com/ClickHouse/ClickHouse/pull/57218) ([Gagan Goel](https://github.com/tntnatbry)). -* The function `substring` (aliases: `substr`, `mid`) can now be used with `Enum` types. Previously, the first function argument had to be a value of type `String` or `FixedString`. This improves compatibility with 3rd party tools such as Tableau via MySQL interface. [#57277](https://github.com/ClickHouse/ClickHouse/pull/57277) ([Serge Klochkov](https://github.com/slvrtrn)). -* Function `format` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). This is important to calculate `SELECT format('The {0} to all questions is {1}', 'answer', 42)`. [#57549](https://github.com/ClickHouse/ClickHouse/pull/57549) ([Robert Schulze](https://github.com/rschu1ze)). -* Allows to use the `date_trunc` function with a case-insensitive first argument. Both cases are now supported: `SELECT date_trunc('day', now())` and `SELECT date_trunc('DAY', now())`. [#57624](https://github.com/ClickHouse/ClickHouse/pull/57624) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Better hints when a table doesn't exist. [#57342](https://github.com/ClickHouse/ClickHouse/pull/57342) ([Bharat Nallan](https://github.com/bharatnc)). -* Allow to overwrite `max_partition_size_to_drop` and `max_table_size_to_drop` server settings in query time. [#57452](https://github.com/ClickHouse/ClickHouse/pull/57452) ([Jordi Villar](https://github.com/jrdi)). -* Slightly better inference of unnamed tupes in JSON formats. [#57751](https://github.com/ClickHouse/ClickHouse/pull/57751) ([Kruglov Pavel](https://github.com/Avogar)). -* Add support for read-only flag when connecting to Keeper (fixes [#53749](https://github.com/ClickHouse/ClickHouse/issues/53749)). [#57479](https://github.com/ClickHouse/ClickHouse/pull/57479) ([Mikhail Koviazin](https://github.com/mkmkme)). -* Fix possible distributed sends stuck due to "No such file or directory" (during recovering a batch from disk). Fix possible issues with `error_count` from `system.distribution_queue` (in case of `distributed_directory_monitor_max_sleep_time_ms` >5min). Introduce profile event to track async INSERT failures - `DistributedAsyncInsertionFailures`. [#57480](https://github.com/ClickHouse/ClickHouse/pull/57480) ([Azat Khuzhin](https://github.com/azat)). -* Support PostgreSQL generated columns and default column values in `MaterializedPostgreSQL` (experimental feature). Closes [#40449](https://github.com/ClickHouse/ClickHouse/issues/40449). [#57568](https://github.com/ClickHouse/ClickHouse/pull/57568) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow to apply some filesystem cache config settings changes without server restart. [#57578](https://github.com/ClickHouse/ClickHouse/pull/57578) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Properly handling PostgreSQL table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot](https://github.com/myrrc)). -* Expose the total number of errors occurred since last server restart as a `ClickHouseErrorMetric_ALL` metric. [#57627](https://github.com/ClickHouse/ClickHouse/pull/57627) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Allow nodes in the configuration file with `from_env`/`from_zk` reference and non empty element with replace=1. [#57628](https://github.com/ClickHouse/ClickHouse/pull/57628) ([Azat Khuzhin](https://github.com/azat)). -* A table function `fuzzJSON` which allows generating a lot of malformed JSON for fuzzing. [#57646](https://github.com/ClickHouse/ClickHouse/pull/57646) ([Julia Kartseva](https://github.com/jkartseva)). -* Allow IPv6 to UInt128 conversion and binary arithmetic. [#57707](https://github.com/ClickHouse/ClickHouse/pull/57707) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Add a setting for `async inserts deduplication cache` - how long we wait for cache update. Deprecate setting `async_block_ids_cache_min_update_interval_ms`. Now cache is updated only in case of conflicts. [#57743](https://github.com/ClickHouse/ClickHouse/pull/57743) ([alesapin](https://github.com/alesapin)). -* `sleep()` function now can be cancelled with `KILL QUERY`. [#57746](https://github.com/ClickHouse/ClickHouse/pull/57746) ([Vitaly Baranov](https://github.com/vitlibar)). -* Forbid `CREATE TABLE ... AS SELECT` queries for `Replicated` table engines in the experimental `Replicated` database because they are not supported. Reference [#35408](https://github.com/ClickHouse/ClickHouse/issues/35408). [#57796](https://github.com/ClickHouse/ClickHouse/pull/57796) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix and improve transforming queries for external databases, to recursively obtain all compatible predicates. [#57888](https://github.com/ClickHouse/ClickHouse/pull/57888) ([flynn](https://github.com/ucasfl)). -* Support dynamic reloading of the filesystem cache size. Closes [#57866](https://github.com/ClickHouse/ClickHouse/issues/57866). [#57897](https://github.com/ClickHouse/ClickHouse/pull/57897) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Correctly support `system.stack_trace` for threads with blocked SIGRTMIN (these threads can exist in low-quality external libraries such as Apache rdkafka). [#57907](https://github.com/ClickHouse/ClickHouse/pull/57907) ([Azat Khuzhin](https://github.com/azat)). Aand also send signal to the threads only if it is not blocked to avoid waiting `storage_system_stack_trace_pipe_read_timeout_ms` when it does not make any sense. [#58136](https://github.com/ClickHouse/ClickHouse/pull/58136) ([Azat Khuzhin](https://github.com/azat)). -* Tolerate keeper failures in the quorum inserts' check. [#57986](https://github.com/ClickHouse/ClickHouse/pull/57986) ([Raúl Marín](https://github.com/Algunenano)). -* Add max/peak RSS (`MemoryResidentMax`) into system.asynchronous_metrics. [#58095](https://github.com/ClickHouse/ClickHouse/pull/58095) ([Azat Khuzhin](https://github.com/azat)). -* This PR allows users to use s3-style links (`https://` and `s3://`) without mentioning region if it's not default. Also find the correct region if the user mentioned the wrong one. [#58148](https://github.com/ClickHouse/ClickHouse/pull/58148) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* `clickhouse-format --obfuscate` will know about Settings, MergeTreeSettings, and time zones and keep their names unchanged. [#58179](https://github.com/ClickHouse/ClickHouse/pull/58179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added explicit `finalize()` function in `ZipArchiveWriter`. Simplify too complicated code in `ZipArchiveWriter`. This fixes [#58074](https://github.com/ClickHouse/ClickHouse/issues/58074). [#58202](https://github.com/ClickHouse/ClickHouse/pull/58202) ([Vitaly Baranov](https://github.com/vitlibar)). -* Make caches with the same path use the same cache objects. This behaviour existed before, but was broken in 23.4. If such caches with the same path have different set of cache settings, an exception will be thrown, that this is not allowed. [#58264](https://github.com/ClickHouse/ClickHouse/pull/58264) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Parallel replicas (experimental feature): friendly settings [#57542](https://github.com/ClickHouse/ClickHouse/pull/57542) ([Igor Nikonov](https://github.com/devcrafter)). -* Parallel replicas (experimental feature): announcement response handling improvement [#57749](https://github.com/ClickHouse/ClickHouse/pull/57749) ([Igor Nikonov](https://github.com/devcrafter)). -* Parallel replicas (experimental feature): give more respect to `min_number_of_marks` in `ParallelReplicasReadingCoordinator` [#57763](https://github.com/ClickHouse/ClickHouse/pull/57763) ([Nikita Taranov](https://github.com/nickitat)). -* Parallel replicas (experimental feature): disable parallel replicas with IN (subquery) [#58133](https://github.com/ClickHouse/ClickHouse/pull/58133) ([Igor Nikonov](https://github.com/devcrafter)). -* Parallel replicas (experimental feature): add profile event 'ParallelReplicasUsedCount' [#58173](https://github.com/ClickHouse/ClickHouse/pull/58173) ([Igor Nikonov](https://github.com/devcrafter)). -* Non POST requests such as HEAD will be readonly similar to GET. [#58060](https://github.com/ClickHouse/ClickHouse/pull/58060) ([San](https://github.com/santrancisco)). -* Add `bytes_uncompressed` column to `system.part_log` [#58167](https://github.com/ClickHouse/ClickHouse/pull/58167) ([Jordi Villar](https://github.com/jrdi)). -* Add base backup name to `system.backups` and `system.backup_log` tables [#58178](https://github.com/ClickHouse/ClickHouse/pull/58178) ([Pradeep Chhetri](https://github.com/chhetripradeep)). -* Add support for specifying query parameters in the command line in clickhouse-local [#58210](https://github.com/ClickHouse/ClickHouse/pull/58210) ([Pradeep Chhetri](https://github.com/chhetripradeep)). - -#### Build/Testing/Packaging Improvement -* Randomize more settings [#39663](https://github.com/ClickHouse/ClickHouse/pull/39663) ([Anton Popov](https://github.com/CurtizJ)). -* Randomize disabled optimizations in CI [#57315](https://github.com/ClickHouse/ClickHouse/pull/57315) ([Raúl Marín](https://github.com/Algunenano)). -* Allow usage of Azure-related table engines/functions on macOS. [#51866](https://github.com/ClickHouse/ClickHouse/pull/51866) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* ClickHouse Fast Test now uses Musl instead of GLibc. [#57711](https://github.com/ClickHouse/ClickHouse/pull/57711) ([Alexey Milovidov](https://github.com/alexey-milovidov)). The fully-static Musl build is available to download from the CI. -* Run ClickBench for every commit. This closes [#57708](https://github.com/ClickHouse/ClickHouse/issues/57708). [#57712](https://github.com/ClickHouse/ClickHouse/pull/57712) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove the usage of a harmful C/POSIX `select` function from external libraries. [#57467](https://github.com/ClickHouse/ClickHouse/pull/57467) ([Igor Nikonov](https://github.com/devcrafter)). -* Settings only available in ClickHouse Cloud will be also present in the open-source ClickHouse build for convenience. [#57638](https://github.com/ClickHouse/ClickHouse/pull/57638) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Fixed a possibility of sorting order breakage in TTL GROUP BY [#49103](https://github.com/ClickHouse/ClickHouse/pull/49103) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix: split `lttb` bucket strategy, first bucket and last bucket should only contain single point [#57003](https://github.com/ClickHouse/ClickHouse/pull/57003) ([FFish](https://github.com/wxybear)). -* Fix possible deadlock in the `Template` format during sync after error [#57004](https://github.com/ClickHouse/ClickHouse/pull/57004) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix early stop while parsing a file with skipping lots of errors [#57006](https://github.com/ClickHouse/ClickHouse/pull/57006) ([Kruglov Pavel](https://github.com/Avogar)). -* Prevent dictionary's ACL bypass via the `dictionary` table function [#57362](https://github.com/ClickHouse/ClickHouse/pull/57362) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix another case of a "non-ready set" error found by Fuzzer. [#57423](https://github.com/ClickHouse/ClickHouse/pull/57423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix several issues regarding PostgreSQL `array_ndims` usage. [#57436](https://github.com/ClickHouse/ClickHouse/pull/57436) ([Ryan Jacobs](https://github.com/ryanmjacobs)). -* Fix RWLock inconsistency after write lock timeout [#57454](https://github.com/ClickHouse/ClickHouse/pull/57454) ([Vitaly Baranov](https://github.com/vitlibar)). Fix RWLock inconsistency after write lock timeout (again) [#57733](https://github.com/ClickHouse/ClickHouse/pull/57733) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix: don't exclude ephemeral column when building pushing to view chain [#57461](https://github.com/ClickHouse/ClickHouse/pull/57461) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* MaterializedPostgreSQL (experimental issue): fix issue [#41922](https://github.com/ClickHouse/ClickHouse/issues/41922), add test for [#41923](https://github.com/ClickHouse/ClickHouse/issues/41923) [#57515](https://github.com/ClickHouse/ClickHouse/pull/57515) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities. [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Fix crash in clickhouse-local [#57553](https://github.com/ClickHouse/ClickHouse/pull/57553) ([Nikolay Degterinsky](https://github.com/evillique)). -* A fix for Hash JOIN. [#57564](https://github.com/ClickHouse/ClickHouse/pull/57564) ([vdimir](https://github.com/vdimir)). -* Fix possible error in PostgreSQL source [#57567](https://github.com/ClickHouse/ClickHouse/pull/57567) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix type correction in Hash JOIN for nested LowCardinality. [#57614](https://github.com/ClickHouse/ClickHouse/pull/57614) ([vdimir](https://github.com/vdimir)). -* Avoid hangs of `system.stack_trace` by correctly prohibiting parallel reading from it. [#57641](https://github.com/ClickHouse/ClickHouse/pull/57641) ([Azat Khuzhin](https://github.com/azat)). -* Fix an error for aggregation of sparse columns with `any(...) RESPECT NULL` [#57710](https://github.com/ClickHouse/ClickHouse/pull/57710) ([Azat Khuzhin](https://github.com/azat)). -* Fix unary operators parsing [#57713](https://github.com/ClickHouse/ClickHouse/pull/57713) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix dependency loading for the experimental table engine `MaterializedPostgreSQL`. [#57754](https://github.com/ClickHouse/ClickHouse/pull/57754) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix retries for disconnected nodes for BACKUP/RESTORE ON CLUSTER [#57764](https://github.com/ClickHouse/ClickHouse/pull/57764) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix result of external aggregation in case of partially materialized projection [#57790](https://github.com/ClickHouse/ClickHouse/pull/57790) ([Anton Popov](https://github.com/CurtizJ)). -* Fix merge in aggregation functions with `*Map` combinator [#57795](https://github.com/ClickHouse/ClickHouse/pull/57795) ([Anton Popov](https://github.com/CurtizJ)). -* Disable `system.kafka_consumers` because it has a bug. [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)). -* Fix LowCardinality keys support in Merge JOIN. [#57827](https://github.com/ClickHouse/ClickHouse/pull/57827) ([vdimir](https://github.com/vdimir)). -* A fix for `InterpreterCreateQuery` related to the sample block. [#57855](https://github.com/ClickHouse/ClickHouse/pull/57855) ([Maksim Kita](https://github.com/kitaisreal)). -* `addresses_expr` were ignored for named collections from PostgreSQL. [#57874](https://github.com/ClickHouse/ClickHouse/pull/57874) ([joelynch](https://github.com/joelynch)). -* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)). Then it was rewritten from Rust to C++ for better [memory-safety](https://www.memorysafety.org/). [#57994](https://github.com/ClickHouse/ClickHouse/pull/57994) ([Raúl Marín](https://github.com/Algunenano)). -* Normalize function names in `CREATE INDEX` [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)). -* Fix literal alias misclassification [#57988](https://github.com/ClickHouse/ClickHouse/pull/57988) ([Chen768959](https://github.com/Chen768959)). -* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix integer overflow in the `Poco` library, related to `UTF32Encoding` [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)). -* Fix parallel replicas (experimental feature) in presence of a scalar subquery with a big integer value [#58118](https://github.com/ClickHouse/ClickHouse/pull/58118) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix `accurateCastOrNull` for out-of-range `DateTime` [#58139](https://github.com/ClickHouse/ClickHouse/pull/58139) ([Andrey Zvonov](https://github.com/zvonand)). -* Fix possible `PARAMETER_OUT_OF_BOUND` error during subcolumns reading from a wide part in MergeTree [#58175](https://github.com/ClickHouse/ClickHouse/pull/58175) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix a slow-down of CREATE VIEW with an enormous number of subqueries [#58220](https://github.com/ClickHouse/ClickHouse/pull/58220) ([Tao Wang](https://github.com/wangtZJU)). -* Fix parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)). - - -### ClickHouse release 23.11, 2023-12-06 - -#### Backward Incompatible Change -* The default ClickHouse server configuration file has enabled `access_management` (user manipulation by SQL queries) and `named_collection_control` (manipulation of named collection by SQL queries) for the `default` user by default. This closes [#56482](https://github.com/ClickHouse/ClickHouse/issues/56482). [#56619](https://github.com/ClickHouse/ClickHouse/pull/56619) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Multiple improvements for `RESPECT NULLS`/`IGNORE NULLS` for window functions. If you use them as aggregate functions and store the states of aggregate functions with these modifiers, they might become incompatible. [#57189](https://github.com/ClickHouse/ClickHouse/pull/57189) ([Raúl Marín](https://github.com/Algunenano)). -* Remove optimization `optimize_move_functions_out_of_any`. [#57190](https://github.com/ClickHouse/ClickHouse/pull/57190) ([Raúl Marín](https://github.com/Algunenano)). -* Formatters `%l`/`%k`/`%c` in function `parseDateTime` are now able to parse hours/months without leading zeros, e.g. `select parseDateTime('2023-11-26 8:14', '%F %k:%i')` now works. Set `parsedatetime_parse_without_leading_zeros = 0` to restore the previous behavior which required two digits. Function `formatDateTime` is now also able to print hours/months without leading zeros. This is controlled by setting `formatdatetime_format_without_leading_zeros` but off by default to not break existing use cases. [#55872](https://github.com/ClickHouse/ClickHouse/pull/55872) ([Azat Khuzhin](https://github.com/azat)). -* You can no longer use the aggregate function `avgWeighted` with arguments of type `Decimal`. Workaround: convert arguments to `Float64`. This closes [#43928](https://github.com/ClickHouse/ClickHouse/issues/43928). This closes [#31768](https://github.com/ClickHouse/ClickHouse/issues/31768). This closes [#56435](https://github.com/ClickHouse/ClickHouse/issues/56435). If you have used this function inside materialized views or projections with `Decimal` arguments, contact support@clickhouse.com. Fixed error in aggregate function `sumMap` and made it slower around 1.5..2 times. It does not matter because the function is garbage anyway. This closes [#54955](https://github.com/ClickHouse/ClickHouse/issues/54955). This closes [#53134](https://github.com/ClickHouse/ClickHouse/issues/53134). This closes [#55148](https://github.com/ClickHouse/ClickHouse/issues/55148). Fix a bug in function `groupArraySample` - it used the same random seed in case more than one aggregate state is generated in a query. [#56350](https://github.com/ClickHouse/ClickHouse/pull/56350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### New Feature -* Added server setting `async_load_databases` for asynchronous loading of databases and tables. Speeds up the server start time. Applies to databases with `Ordinary`, `Atomic` and `Replicated` engines. Their tables load metadata asynchronously. Query to a table increases the priority of the load job and waits for it to be done. Added a new table `system.asynchronous_loader` for introspection. [#49351](https://github.com/ClickHouse/ClickHouse/pull/49351) ([Sergei Trifonov](https://github.com/serxa)). -* Add system table `blob_storage_log`. It allows auditing all the data written to S3 and other object storages. [#52918](https://github.com/ClickHouse/ClickHouse/pull/52918) ([vdimir](https://github.com/vdimir)). -* Use statistics to order prewhere conditions better. [#53240](https://github.com/ClickHouse/ClickHouse/pull/53240) ([Han Fei](https://github.com/hanfei1991)). -* Added support for compression in the Keeper's protocol. It can be enabled on the ClickHouse side by using this flag `use_compression` inside `zookeeper` section. Keep in mind that only ClickHouse Keeper supports compression, while Apache ZooKeeper does not. Resolves [#49507](https://github.com/ClickHouse/ClickHouse/issues/49507). [#54957](https://github.com/ClickHouse/ClickHouse/pull/54957) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Introduce the feature `storage_metadata_write_full_object_key`. If it is set as `true` then metadata files are written with the new format. With that format ClickHouse stores full remote object key in the metadata file which allows better flexibility and optimization. [#55566](https://github.com/ClickHouse/ClickHouse/pull/55566) ([Sema Checherinda](https://github.com/CheSema)). -* Add new settings and syntax to protect named collections' fields from being overridden. This is meant to prevent a malicious user from obtaining unauthorized access to secrets. [#55782](https://github.com/ClickHouse/ClickHouse/pull/55782) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Add `hostname` column to all system log tables - it is useful if you make the system tables replicated, shared, or distributed. [#55894](https://github.com/ClickHouse/ClickHouse/pull/55894) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `CHECK ALL TABLES` query. [#56022](https://github.com/ClickHouse/ClickHouse/pull/56022) ([vdimir](https://github.com/vdimir)). -* Added function `fromDaysSinceYearZero` which is similar to MySQL's `FROM_DAYS`. E.g. `SELECT fromDaysSinceYearZero(739136)` returns `2023-09-08`. [#56088](https://github.com/ClickHouse/ClickHouse/pull/56088) ([Joanna Hulboj](https://github.com/jh0x)). -* Add an external Python tool to view backups and to extract information from them without using ClickHouse. [#56268](https://github.com/ClickHouse/ClickHouse/pull/56268) ([Vitaly Baranov](https://github.com/vitlibar)). -* Implement a new setting called `preferred_optimize_projection_name`. If it is set to a non-empty string, the specified projection would be used if possible instead of choosing from all the candidates. [#56309](https://github.com/ClickHouse/ClickHouse/pull/56309) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add 4-letter command for yielding/resigning leadership (https://github.com/ClickHouse/ClickHouse/issues/56352). [#56354](https://github.com/ClickHouse/ClickHouse/pull/56354) ([Pradeep Chhetri](https://github.com/chhetripradeep)). [#56620](https://github.com/ClickHouse/ClickHouse/pull/56620) ([Pradeep Chhetri](https://github.com/chhetripradeep)). -* Added a new SQL function, `arrayRandomSample(arr, k)` which returns a sample of k elements from the input array. Similar functionality could previously be achieved only with less convenient syntax, e.g. `SELECT arrayReduce('groupArraySample(3)', range(10))`. [#56416](https://github.com/ClickHouse/ClickHouse/pull/56416) ([Robert Schulze](https://github.com/rschu1ze)). -* Added support for `Float16` type data to use in `.npy` files. Closes [#56344](https://github.com/ClickHouse/ClickHouse/issues/56344). [#56424](https://github.com/ClickHouse/ClickHouse/pull/56424) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Added a system view `information_schema.statistics` for better compatibility with Tableau Online. [#56425](https://github.com/ClickHouse/ClickHouse/pull/56425) ([Serge Klochkov](https://github.com/slvrtrn)). -* Add `system.symbols` table useful for introspection of the binary. [#56548](https://github.com/ClickHouse/ClickHouse/pull/56548) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Configurable dashboards. Queries for charts are now loaded using a query, which by default uses a new `system.dashboards` table. [#56771](https://github.com/ClickHouse/ClickHouse/pull/56771) ([Sergei Trifonov](https://github.com/serxa)). -* Introduce `fileCluster` table function - it is useful if you mount a shared filesystem (NFS and similar) into the `user_files` directory. [#56868](https://github.com/ClickHouse/ClickHouse/pull/56868) ([Andrey Zvonov](https://github.com/zvonand)). -* Add `_size` virtual column with file size in bytes to `s3/file/hdfs/url/azureBlobStorage` engines. [#57126](https://github.com/ClickHouse/ClickHouse/pull/57126) ([Kruglov Pavel](https://github.com/Avogar)). -* Expose the number of errors for each error code occurred on a server since last restart from the Prometheus endpoint. [#57209](https://github.com/ClickHouse/ClickHouse/pull/57209) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* ClickHouse keeper reports its running availability zone at `/keeper/availability-zone` path. This can be configured via `us-west-1a`. [#56715](https://github.com/ClickHouse/ClickHouse/pull/56715) ([Jianfei Hu](https://github.com/incfly)). -* Make ALTER materialized_view MODIFY QUERY non experimental and deprecate `allow_experimental_alter_materialized_view_structure` setting. Fixes [#15206](https://github.com/ClickHouse/ClickHouse/issues/15206). [#57311](https://github.com/ClickHouse/ClickHouse/pull/57311) ([alesapin](https://github.com/alesapin)). -* Setting `join_algorithm` respects specified order [#51745](https://github.com/ClickHouse/ClickHouse/pull/51745) ([vdimir](https://github.com/vdimir)). -* Add support for the [well-known Protobuf types](https://protobuf.dev/reference/protobuf/google.protobuf/) in the Protobuf format. [#56741](https://github.com/ClickHouse/ClickHouse/pull/56741) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). - -#### Performance Improvement -* Adaptive timeouts for interacting with S3. The first attempt is made with low send and receive timeouts. [#56314](https://github.com/ClickHouse/ClickHouse/pull/56314) ([Sema Checherinda](https://github.com/CheSema)). -* Increase the default value of `max_concurrent_queries` from 100 to 1000. This makes sense when there is a large number of connecting clients, which are slowly sending or receiving data, so the server is not limited by CPU, or when the number of CPU cores is larger than 100. Also, enable the concurrency control by default, and set the desired number of query processing threads in total as twice the number of CPU cores. It improves performance in scenarios with a very large number of concurrent queries. [#46927](https://github.com/ClickHouse/ClickHouse/pull/46927) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Support parallel evaluation of window functions. Fixes [#34688](https://github.com/ClickHouse/ClickHouse/issues/34688). [#39631](https://github.com/ClickHouse/ClickHouse/pull/39631) ([Dmitry Novik](https://github.com/novikd)). -* `Numbers` table engine (of the `system.numbers` table) now analyzes the condition to generate the needed subset of data, like table's index. [#50909](https://github.com/ClickHouse/ClickHouse/pull/50909) ([JackyWoo](https://github.com/JackyWoo)). -* Improved the performance of filtering by `IN (...)` condition for `Merge` table engine. [#54905](https://github.com/ClickHouse/ClickHouse/pull/54905) ([Nikita Taranov](https://github.com/nickitat)). -* An improvement which takes place when the filesystem cache is full and there are big reads. [#55158](https://github.com/ClickHouse/ClickHouse/pull/55158) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add ability to disable checksums for S3 to avoid excessive pass over the file (this is controlled by the setting `s3_disable_checksum`). [#55559](https://github.com/ClickHouse/ClickHouse/pull/55559) ([Azat Khuzhin](https://github.com/azat)). -* Now we read synchronously from remote tables when data is in page cache (like we do for local tables). It is faster, it doesn't require synchronisation inside the thread pool, and doesn't hesitate to do `seek`-s on local FS, and reduces CPU wait. [#55841](https://github.com/ClickHouse/ClickHouse/pull/55841) ([Nikita Taranov](https://github.com/nickitat)). -* Optimization for getting value from `map`, `arrayElement`. It will bring about 30% speedup. - reduce the reserved memory - reduce the `resize` call. [#55957](https://github.com/ClickHouse/ClickHouse/pull/55957) ([lgbo](https://github.com/lgbo-ustc)). -* Optimization of multi-stage filtering with AVX-512. The performance experiments of the OnTime dataset on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring the improvements of 7.4%, 5.9%, 4.7%, 3.0%, and 4.6% to the QPS of the query Q2, Q3, Q4, Q5 and Q6 respectively while having no impact on others. [#56079](https://github.com/ClickHouse/ClickHouse/pull/56079) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Limit the number of threads busy inside the query profiler. If there are more - they will skip profiling. [#56105](https://github.com/ClickHouse/ClickHouse/pull/56105) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Decrease the amount of virtual function calls in window functions. [#56120](https://github.com/ClickHouse/ClickHouse/pull/56120) ([Maksim Kita](https://github.com/kitaisreal)). -* Allow recursive Tuple field pruning in ORC data format to speed up scaning. [#56122](https://github.com/ClickHouse/ClickHouse/pull/56122) ([李扬](https://github.com/taiyang-li)). -* Trivial count optimization for `Npy` data format: queries like `select count() from 'data.npy'` will work much more fast because of caching the results. [#56304](https://github.com/ClickHouse/ClickHouse/pull/56304) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Queries with aggregation and a large number of streams will use less amount of memory during the plan's construction. [#57074](https://github.com/ClickHouse/ClickHouse/pull/57074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Improve performance of executing queries for use cases with many users and highly concurrent queries (>2000 QPS) by optimizing the access to ProcessList. [#57106](https://github.com/ClickHouse/ClickHouse/pull/57106) ([Andrej Hoos](https://github.com/adikus)). -* Trivial improvement on array join, reuse some intermediate results. [#57183](https://github.com/ClickHouse/ClickHouse/pull/57183) ([李扬](https://github.com/taiyang-li)). -* There are cases when stack unwinding was slow. Not anymore. [#57221](https://github.com/ClickHouse/ClickHouse/pull/57221) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Now we use default read pool for reading from external storage when `max_streams = 1`. It is beneficial when read prefetches are enabled. [#57334](https://github.com/ClickHouse/ClickHouse/pull/57334) ([Nikita Taranov](https://github.com/nickitat)). -* Keeper improvement: improve memory-usage during startup by delaying log preprocessing. [#55660](https://github.com/ClickHouse/ClickHouse/pull/55660) ([Antonio Andelic](https://github.com/antonio2368)). -* Improved performance of glob matching for `File` and `HDFS` storages. [#56141](https://github.com/ClickHouse/ClickHouse/pull/56141) ([Andrey Zvonov](https://github.com/zvonand)). -* Posting lists in experimental full text indexes are now compressed which reduces their size by 10-30%. [#56226](https://github.com/ClickHouse/ClickHouse/pull/56226) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Parallelise `BackupEntriesCollector` in backups. [#56312](https://github.com/ClickHouse/ClickHouse/pull/56312) ([Kseniia Sumarokova](https://github.com/kssenii)). - -#### Improvement -* Add a new `MergeTree` setting `add_implicit_sign_column_constraint_for_collapsing_engine` (disabled by default). When enabled, it adds an implicit CHECK constraint for `CollapsingMergeTree` tables that restricts the value of the `Sign` column to be only -1 or 1. [#56701](https://github.com/ClickHouse/ClickHouse/issues/56701). [#56986](https://github.com/ClickHouse/ClickHouse/pull/56986) ([Kevin Mingtarja](https://github.com/kevinmingtarja)). -* Enable adding new disk to storage configuration without restart. [#56367](https://github.com/ClickHouse/ClickHouse/pull/56367) ([Duc Canh Le](https://github.com/canhld94)). -* Support creating and materializing index in the same alter query, also support "modify TTL" and "materialize TTL" in the same query. Closes [#55651](https://github.com/ClickHouse/ClickHouse/issues/55651). [#56331](https://github.com/ClickHouse/ClickHouse/pull/56331) ([flynn](https://github.com/ucasfl)). -* Add a new table function named `fuzzJSON` with rows containing perturbed versions of the source JSON string with random variations. [#56490](https://github.com/ClickHouse/ClickHouse/pull/56490) ([Julia Kartseva](https://github.com/jkartseva)). -* Engine `Merge` filters the records according to the row policies of the underlying tables, so you don't have to create another row policy on a `Merge` table. [#50209](https://github.com/ClickHouse/ClickHouse/pull/50209) ([Ilya Golshtein](https://github.com/ilejn)). -* Add a setting `max_execution_time_leaf` to limit the execution time on shard for distributed query, and `timeout_overflow_mode_leaf` to control the behaviour if timeout happens. [#51823](https://github.com/ClickHouse/ClickHouse/pull/51823) ([Duc Canh Le](https://github.com/canhld94)). -* Add ClickHouse setting to disable tunneling for HTTPS requests over HTTP proxy. [#55033](https://github.com/ClickHouse/ClickHouse/pull/55033) ([Arthur Passos](https://github.com/arthurpassos)). -* Set `background_fetches_pool_size` to 16, background_schedule_pool_size to 512 that is better for production usage with frequent small insertions. [#54327](https://github.com/ClickHouse/ClickHouse/pull/54327) ([Denny Crane](https://github.com/den-crane)). -* While read data from a csv format file, and at end of line is `\r` , which not followed by `\n`, then we will enconter the exception as follows `Cannot parse CSV format: found \r (CR) not followed by \n (LF). Line must end by \n (LF) or \r\n (CR LF) or \n\r.` In clickhouse, the csv end of line must be `\n` or `\r\n` or `\n\r`, so the `\r` must be followed by `\n`, but in some suitation, the csv input data is abnormal, like above, `\r` is at end of line. [#54340](https://github.com/ClickHouse/ClickHouse/pull/54340) ([KevinyhZou](https://github.com/KevinyhZou)). -* Update Arrow library to release-13.0.0 that supports new encodings. Closes [#44505](https://github.com/ClickHouse/ClickHouse/issues/44505). [#54800](https://github.com/ClickHouse/ClickHouse/pull/54800) ([Kruglov Pavel](https://github.com/Avogar)). -* Improve performance of ON CLUSTER queries by removing heavy system calls to get all network interfaces when looking for local ip address in the DDL entry hosts list. [#54909](https://github.com/ClickHouse/ClickHouse/pull/54909) ([Duc Canh Le](https://github.com/canhld94)). -* Fixed accounting of memory allocated before attaching a thread to a query or a user. [#56089](https://github.com/ClickHouse/ClickHouse/pull/56089) ([Nikita Taranov](https://github.com/nickitat)). -* Add support for `LARGE_LIST` in Apache Arrow formats. [#56118](https://github.com/ClickHouse/ClickHouse/pull/56118) ([edef](https://github.com/edef1c)). -* Allow manual compaction of `EmbeddedRocksDB` via `OPTIMIZE` query. [#56225](https://github.com/ClickHouse/ClickHouse/pull/56225) ([Azat Khuzhin](https://github.com/azat)). -* Add ability to specify BlockBasedTableOptions for `EmbeddedRocksDB` tables. [#56264](https://github.com/ClickHouse/ClickHouse/pull/56264) ([Azat Khuzhin](https://github.com/azat)). -* `SHOW COLUMNS` now displays MySQL's equivalent data type name when the connection was made through the MySQL protocol. Previously, this was the case when setting `use_mysql_types_in_show_columns = 1`. The setting is retained but made obsolete. [#56277](https://github.com/ClickHouse/ClickHouse/pull/56277) ([Robert Schulze](https://github.com/rschu1ze)). -* Fixed possible `The local set of parts of table doesn't look like the set of parts in ZooKeeper` error if server was restarted just after `TRUNCATE` or `DROP PARTITION`. [#56282](https://github.com/ClickHouse/ClickHouse/pull/56282) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fixed handling of non-const query strings in functions `formatQuery`/ `formatQuerySingleLine`. Also added `OrNull` variants of both functions that return a NULL when a query cannot be parsed instead of throwing an exception. [#56327](https://github.com/ClickHouse/ClickHouse/pull/56327) ([Robert Schulze](https://github.com/rschu1ze)). -* Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Queries to `system.replicas` initiate requests to ZooKeeper when certain columns are queried. When there are thousands of tables these requests might produce a considerable load on ZooKeeper. If there are multiple simultaneous queries to `system.replicas` they do same requests multiple times. The change is to "deduplicate" requests from concurrent queries. [#56420](https://github.com/ClickHouse/ClickHouse/pull/56420) ([Alexander Gololobov](https://github.com/davenger)). -* Fix translation to MySQL compatible query for querying external databases. [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)). -* Add support for backing up and restoring tables using `KeeperMap` engine. [#56460](https://github.com/ClickHouse/ClickHouse/pull/56460) ([Antonio Andelic](https://github.com/antonio2368)). -* 404 response for CompleteMultipartUpload has to be rechecked. Operation could be done on server even if client got timeout or other network errors. The next retry of CompleteMultipartUpload receives 404 response. If the object key exists that operation is considered as successful. [#56475](https://github.com/ClickHouse/ClickHouse/pull/56475) ([Sema Checherinda](https://github.com/CheSema)). -* Enable the HTTP OPTIONS method by default - it simplifies requesting ClickHouse from a web browser. [#56483](https://github.com/ClickHouse/ClickHouse/pull/56483) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The value for `dns_max_consecutive_failures` was changed by mistake in [#46550](https://github.com/ClickHouse/ClickHouse/issues/46550) - this is reverted and adjusted to a better value. Also, increased the HTTP keep-alive timeout to a reasonable value from production. [#56485](https://github.com/ClickHouse/ClickHouse/pull/56485) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Load base backups lazily (a base backup won't be loaded until it's needed). Also add some log message and profile events for backups. [#56516](https://github.com/ClickHouse/ClickHouse/pull/56516) ([Vitaly Baranov](https://github.com/vitlibar)). -* Setting `query_cache_store_results_of_queries_with_nondeterministic_functions` (with values `false` or `true`) was marked obsolete. It was replaced by setting `query_cache_nondeterministic_function_handling`, a three-valued enum that controls how the query cache handles queries with non-deterministic functions: a) throw an exception (default behavior), b) save the non-deterministic query result regardless, or c) ignore, i.e. don't throw an exception and don't cache the result. [#56519](https://github.com/ClickHouse/ClickHouse/pull/56519) ([Robert Schulze](https://github.com/rschu1ze)). -* Rewrite equality with `is null` check in JOIN ON section. Experimental *Analyzer only*. [#56538](https://github.com/ClickHouse/ClickHouse/pull/56538) ([vdimir](https://github.com/vdimir)). -* Function`concat` now supports arbitrary argument types (instead of only String and FixedString arguments). This makes it behave more similar to MySQL `concat` implementation. For example, `SELECT concat('ab', 42)` now returns `ab42`. [#56540](https://github.com/ClickHouse/ClickHouse/pull/56540) ([Serge Klochkov](https://github.com/slvrtrn)). -* Allow getting cache configuration from 'named_collection' section in config or from SQL created named collections. [#56541](https://github.com/ClickHouse/ClickHouse/pull/56541) ([Kseniia Sumarokova](https://github.com/kssenii)). -* PostgreSQL database engine: Make the removal of outdated tables less aggressive with unsuccessful postgres connection. [#56609](https://github.com/ClickHouse/ClickHouse/pull/56609) ([jsc0218](https://github.com/jsc0218)). -* It took too much time to connnect to PG when URL is not right, so the relevant query stucks there and get cancelled. [#56648](https://github.com/ClickHouse/ClickHouse/pull/56648) ([jsc0218](https://github.com/jsc0218)). -* Keeper improvement: disable compressed logs by default in Keeper. [#56763](https://github.com/ClickHouse/ClickHouse/pull/56763) ([Antonio Andelic](https://github.com/antonio2368)). -* Add config setting `wait_dictionaries_load_at_startup`. [#56782](https://github.com/ClickHouse/ClickHouse/pull/56782) ([Vitaly Baranov](https://github.com/vitlibar)). -* There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fetching a part waits when that part is fully committed on remote replica. It is better not send part in PreActive state. In case of zero copy this is mandatory restriction. [#56808](https://github.com/ClickHouse/ClickHouse/pull/56808) ([Sema Checherinda](https://github.com/CheSema)). -* Fix possible postgresql logical replication conversion error when using experimental `MaterializedPostgreSQL`. [#53721](https://github.com/ClickHouse/ClickHouse/pull/53721) ([takakawa](https://github.com/takakawa)). -* Implement user-level setting `alter_move_to_space_execute_async` which allow to execute queries `ALTER TABLE ... MOVE PARTITION|PART TO DISK|VOLUME` asynchronously. The size of pool for background executions is controlled by `background_move_pool_size`. Default behavior is synchronous execution. Fixes [#47643](https://github.com/ClickHouse/ClickHouse/issues/47643). [#56809](https://github.com/ClickHouse/ClickHouse/pull/56809) ([alesapin](https://github.com/alesapin)). -* Able to filter by engine when scanning system.tables, avoid unnecessary (potentially time-consuming) connection. [#56813](https://github.com/ClickHouse/ClickHouse/pull/56813) ([jsc0218](https://github.com/jsc0218)). -* Show `total_bytes` and `total_rows` in system tables for RocksDB storage. [#56816](https://github.com/ClickHouse/ClickHouse/pull/56816) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* Allow basic commands in ALTER for TEMPORARY tables. [#56892](https://github.com/ClickHouse/ClickHouse/pull/56892) ([Sergey](https://github.com/icuken)). -* LZ4 compression. Buffer compressed block in a rare case when out buffer capacity is not enough for writing compressed block directly to out's buffer. [#56938](https://github.com/ClickHouse/ClickHouse/pull/56938) ([Sema Checherinda](https://github.com/CheSema)). -* Add metrics for the number of queued jobs, which is useful for the IO thread pool. [#56958](https://github.com/ClickHouse/ClickHouse/pull/56958) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add a setting for PostgreSQL table engine setting in the config file. Added a check for the setting Added documentation around the additional setting. [#56959](https://github.com/ClickHouse/ClickHouse/pull/56959) ([Peignon Melvyn](https://github.com/melvynator)). -* Function `concat` can now be called with a single argument, e.g., `SELECT concat('abc')`. This makes its behavior more consistent with MySQL's concat implementation. [#57000](https://github.com/ClickHouse/ClickHouse/pull/57000) ([Serge Klochkov](https://github.com/slvrtrn)). -* Signs all `x-amz-*` headers as required by AWS S3 docs. [#57001](https://github.com/ClickHouse/ClickHouse/pull/57001) ([Arthur Passos](https://github.com/arthurpassos)). -* Function `fromDaysSinceYearZero` (alias: `FROM_DAYS`) can now be used with unsigned and signed integer types (previously, it had to be an unsigned integer). This improve compatibility with 3rd party tools such as Tableau Online. [#57002](https://github.com/ClickHouse/ClickHouse/pull/57002) ([Serge Klochkov](https://github.com/slvrtrn)). -* Add `system.s3queue_log` to default config. [#57036](https://github.com/ClickHouse/ClickHouse/pull/57036) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Change the default for `wait_dictionaries_load_at_startup` to true, and use this setting only if `dictionaries_lazy_load` is false. [#57133](https://github.com/ClickHouse/ClickHouse/pull/57133) ([Vitaly Baranov](https://github.com/vitlibar)). -* Check dictionary source type on creation even if `dictionaries_lazy_load` is enabled. [#57134](https://github.com/ClickHouse/ClickHouse/pull/57134) ([Vitaly Baranov](https://github.com/vitlibar)). -* Plan-level optimizations can now be enabled/disabled individually. Previously, it was only possible to disable them all. The setting which previously did that (`query_plan_enable_optimizations`) is retained and can still be used to disable all optimizations. [#57152](https://github.com/ClickHouse/ClickHouse/pull/57152) ([Robert Schulze](https://github.com/rschu1ze)). -* The server's exit code will correspond to the exception code. For example, if the server cannot start due to memory limit, it will exit with the code 241 = MEMORY_LIMIT_EXCEEDED. In previous versions, the exit code for exceptions was always 70 = Poco::Util::ExitCode::EXIT_SOFTWARE. [#57153](https://github.com/ClickHouse/ClickHouse/pull/57153) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Do not demangle and symbolize stack frames from `functional` C++ header. [#57201](https://github.com/ClickHouse/ClickHouse/pull/57201) ([Mike Kot](https://github.com/myrrc)). -* HTTP server page `/dashboard` now supports charts with multiple lines. [#57236](https://github.com/ClickHouse/ClickHouse/pull/57236) ([Sergei Trifonov](https://github.com/serxa)). -* The `max_memory_usage_in_client` command line option supports a string value with a suffix (K, M, G, etc). Closes [#56879](https://github.com/ClickHouse/ClickHouse/issues/56879). [#57273](https://github.com/ClickHouse/ClickHouse/pull/57273) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.2.0 to v1.3.1 . Also fixed a bug in case of BOF (Block On Fault) = 0, changed to handle page faults by falling back to SW path. [#57291](https://github.com/ClickHouse/ClickHouse/pull/57291) ([jasperzhu](https://github.com/jinjunzh)). -* Increase default `replicated_deduplication_window` of MergeTree settings from 100 to 1k. [#57335](https://github.com/ClickHouse/ClickHouse/pull/57335) ([sichenzhao](https://github.com/sichenzhao)). -* Stop using `INCONSISTENT_METADATA_FOR_BACKUP` that much. If possible prefer to continue scanning instead of stopping and starting the scanning for backup from the beginning. [#57385](https://github.com/ClickHouse/ClickHouse/pull/57385) ([Vitaly Baranov](https://github.com/vitlibar)). - -#### Build/Testing/Packaging Improvement -* Add SQLLogic test. [#56078](https://github.com/ClickHouse/ClickHouse/pull/56078) ([Han Fei](https://github.com/hanfei1991)). -* Make `clickhouse-local` and `clickhouse-client` available under short names (`ch`, `chl`, `chc`) for usability. [#56634](https://github.com/ClickHouse/ClickHouse/pull/56634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Optimized build size further by removing unused code from external libraries. [#56786](https://github.com/ClickHouse/ClickHouse/pull/56786) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add automatic check that there are no large translation units. [#56559](https://github.com/ClickHouse/ClickHouse/pull/56559) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Lower the size of the single-binary distribution. This closes [#55181](https://github.com/ClickHouse/ClickHouse/issues/55181). [#56617](https://github.com/ClickHouse/ClickHouse/pull/56617) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Information about the sizes of every translation unit and binary file after each build will be sent to the CI database in ClickHouse Cloud. This closes [#56107](https://github.com/ClickHouse/ClickHouse/issues/56107). [#56636](https://github.com/ClickHouse/ClickHouse/pull/56636) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Certain files of "Apache Arrow" library (which we use only for non-essential things like parsing the arrow format) were rebuilt all the time regardless of the build cache. This is fixed. [#56657](https://github.com/ClickHouse/ClickHouse/pull/56657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Avoid recompiling translation units depending on the autogenerated source file about version. [#56660](https://github.com/ClickHouse/ClickHouse/pull/56660) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Tracing data of the linker invocations will be sent to the CI database in ClickHouse Cloud. [#56725](https://github.com/ClickHouse/ClickHouse/pull/56725) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Use DWARF 5 debug symbols for the clickhouse binary (was DWARF 4 previously). [#56770](https://github.com/ClickHouse/ClickHouse/pull/56770) ([Michael Kolupaev](https://github.com/al13n321)). -* Add a new build option `SANITIZE_COVERAGE`. If it is enabled, the code is instrumented to track the coverage. The collected information is available inside ClickHouse with: (1) a new function `coverage` that returns an array of unique addresses in the code found after the previous coverage reset; (2) `SYSTEM RESET COVERAGE` query that resets the accumulated data. This allows us to compare the coverage of different tests, including differential code coverage. Continuation of [#20539](https://github.com/ClickHouse/ClickHouse/issues/20539). [#56102](https://github.com/ClickHouse/ClickHouse/pull/56102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Some of the stack frames might not be resolved when collecting stacks. In such cases the raw address might be helpful. [#56267](https://github.com/ClickHouse/ClickHouse/pull/56267) ([Alexander Gololobov](https://github.com/davenger)). -* Add an option to disable `libssh`. [#56333](https://github.com/ClickHouse/ClickHouse/pull/56333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Enable temporary_data_in_cache in S3 tests in CI. [#48425](https://github.com/ClickHouse/ClickHouse/pull/48425) ([vdimir](https://github.com/vdimir)). -* Set the max memory usage for clickhouse-client (`1G`) in the CI. [#56873](https://github.com/ClickHouse/ClickHouse/pull/56873) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Fix exerimental Analyzer - insertion from select with subquery referencing insertion table should process only insertion block. [#50857](https://github.com/ClickHouse/ClickHouse/pull/50857) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix a bug in `str_to_map` function. [#56423](https://github.com/ClickHouse/ClickHouse/pull/56423) ([Arthur Passos](https://github.com/arthurpassos)). -* Keeper `reconfig`: add timeout before yielding/taking leadership [#53481](https://github.com/ClickHouse/ClickHouse/pull/53481) ([Mike Kot](https://github.com/myrrc)). -* Fix incorrect header in grace hash join and filter pushdown [#53922](https://github.com/ClickHouse/ClickHouse/pull/53922) ([vdimir](https://github.com/vdimir)). -* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* RFC: Fix "Cannot find column X in source stream" for Distributed queries with LIMIT BY [#55836](https://github.com/ClickHouse/ClickHouse/pull/55836) ([Azat Khuzhin](https://github.com/azat)). -* Fix 'Cannot read from file:' while running client in a background [#55976](https://github.com/ClickHouse/ClickHouse/pull/55976) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix clickhouse-local exit on bad send_logs_level setting [#55994](https://github.com/ClickHouse/ClickHouse/pull/55994) ([Kruglov Pavel](https://github.com/Avogar)). -* Bug fix explain ast with parameterized view [#56004](https://github.com/ClickHouse/ClickHouse/pull/56004) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix ClickHouse-sourced dictionaries with an explicit query [#56236](https://github.com/ClickHouse/ClickHouse/pull/56236) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix inconsistency of "cast('0' as DateTime64(3))" and "cast('0' as Nullable(DateTime64(3)))" [#56286](https://github.com/ClickHouse/ClickHouse/pull/56286) ([李扬](https://github.com/taiyang-li)). -* Fix rare race condition related to Memory allocation failure [#56303](https://github.com/ClickHouse/ClickHouse/pull/56303) ([alesapin](https://github.com/alesapin)). -* Fix restore from backup with `flatten_nested` and `data_type_default_nullable` [#56306](https://github.com/ClickHouse/ClickHouse/pull/56306) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix crash in filterPushDown [#56380](https://github.com/ClickHouse/ClickHouse/pull/56380) ([vdimir](https://github.com/vdimir)). -* Fix restore from backup with mat view and dropped source table [#56383](https://github.com/ClickHouse/ClickHouse/pull/56383) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix nullable primary key in final (2) [#56452](https://github.com/ClickHouse/ClickHouse/pull/56452) ([Amos Bird](https://github.com/amosbird)). -* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix startup failure due to TTL dependency [#56489](https://github.com/ClickHouse/ClickHouse/pull/56489) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix ALTER COMMENT queries ON CLUSTER [#56491](https://github.com/ClickHouse/ClickHouse/pull/56491) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix empty NAMED COLLECTIONs [#56494](https://github.com/ClickHouse/ClickHouse/pull/56494) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix two cases of projection analysis. [#56502](https://github.com/ClickHouse/ClickHouse/pull/56502) ([Amos Bird](https://github.com/amosbird)). -* Fix handling of aliases in query cache [#56545](https://github.com/ClickHouse/ClickHouse/pull/56545) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix conversion from `Nullable(Enum)` to `Nullable(String)` [#56644](https://github.com/ClickHouse/ClickHouse/pull/56644) ([Nikolay Degterinsky](https://github.com/evillique)). -* More reliable log handling in Keeper [#56670](https://github.com/ClickHouse/ClickHouse/pull/56670) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix configuration merge for nodes with substitution attributes [#56694](https://github.com/ClickHouse/ClickHouse/pull/56694) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* Fix duplicate usage of table function input(). [#56695](https://github.com/ClickHouse/ClickHouse/pull/56695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix 'mutex lock failed: Invalid argument' in clickhouse-local during insert into function [#56710](https://github.com/ClickHouse/ClickHouse/pull/56710) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix Date text parsing in optimistic path [#56765](https://github.com/ClickHouse/ClickHouse/pull/56765) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* DatabaseReplicated: fix DDL query timeout after recovering a replica [#56796](https://github.com/ClickHouse/ClickHouse/pull/56796) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix incorrect nullable columns reporting in MySQL binary protocol [#56799](https://github.com/ClickHouse/ClickHouse/pull/56799) ([Serge Klochkov](https://github.com/slvrtrn)). -* Support Iceberg metadata files for metastore tables [#56810](https://github.com/ClickHouse/ClickHouse/pull/56810) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix TSAN report under transform [#56817](https://github.com/ClickHouse/ClickHouse/pull/56817) ([Raúl Marín](https://github.com/Algunenano)). -* Fix SET query and SETTINGS formatting [#56825](https://github.com/ClickHouse/ClickHouse/pull/56825) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix failure to start due to table dependency in joinGet [#56828](https://github.com/ClickHouse/ClickHouse/pull/56828) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix flattening existing Nested columns during ADD COLUMN [#56830](https://github.com/ClickHouse/ClickHouse/pull/56830) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix allow cr end of line for csv [#56901](https://github.com/ClickHouse/ClickHouse/pull/56901) ([KevinyhZou](https://github.com/KevinyhZou)). -* Fix `tryBase64Decode` with invalid input [#56913](https://github.com/ClickHouse/ClickHouse/pull/56913) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix generating deep nested columns in CapnProto/Protobuf schemas [#56941](https://github.com/ClickHouse/ClickHouse/pull/56941) ([Kruglov Pavel](https://github.com/Avogar)). -* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)). -* Fix sqlite file path validation [#56984](https://github.com/ClickHouse/ClickHouse/pull/56984) ([San](https://github.com/santrancisco)). -* S3Queue: fix metadata reference increment [#56990](https://github.com/ClickHouse/ClickHouse/pull/56990) ([Kseniia Sumarokova](https://github.com/kssenii)). -* S3Queue minor fix [#56999](https://github.com/ClickHouse/ClickHouse/pull/56999) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix file path validation for DatabaseFileSystem [#57029](https://github.com/ClickHouse/ClickHouse/pull/57029) ([San](https://github.com/santrancisco)). -* Fix `fuzzBits` with `ARRAY JOIN` [#57033](https://github.com/ClickHouse/ClickHouse/pull/57033) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix Nullptr dereference in partial merge join with joined_subquery_re… [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). -* Fix race condition in RemoteSource [#57052](https://github.com/ClickHouse/ClickHouse/pull/57052) ([Raúl Marín](https://github.com/Algunenano)). -* Implement `bitHammingDistance` for big integers [#57073](https://github.com/ClickHouse/ClickHouse/pull/57073) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* S3-style links bug fix [#57075](https://github.com/ClickHouse/ClickHouse/pull/57075) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Fix JSON_QUERY function with multiple numeric paths [#57096](https://github.com/ClickHouse/ClickHouse/pull/57096) ([KevinyhZou](https://github.com/KevinyhZou)). -* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)). -* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)). -* Ignore comments when comparing column descriptions [#57259](https://github.com/ClickHouse/ClickHouse/pull/57259) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)). -* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)). -* Keeper fix for changelog and snapshots [#57299](https://github.com/ClickHouse/ClickHouse/pull/57299) ([Antonio Andelic](https://github.com/antonio2368)). -* Ignore finished ON CLUSTER tasks if hostname changed [#57339](https://github.com/ClickHouse/ClickHouse/pull/57339) ([Alexander Tokmakov](https://github.com/tavplubix)). -* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)). -* FS cache: add a limit for background download [#57424](https://github.com/ClickHouse/ClickHouse/pull/57424) ([Kseniia Sumarokova](https://github.com/kssenii)). - - -### ClickHouse release 23.10, 2023-11-02 - -#### Backward Incompatible Change -* There is no longer an option to automatically remove broken data parts. This closes [#55174](https://github.com/ClickHouse/ClickHouse/issues/55174). [#55184](https://github.com/ClickHouse/ClickHouse/pull/55184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#55557](https://github.com/ClickHouse/ClickHouse/pull/55557) ([Jihyuk Bok](https://github.com/tomahawk28)). -* The obsolete in-memory data parts can no longer be read from the write-ahead log. If you have configured in-memory parts before, they have to be removed before the upgrade. [#55186](https://github.com/ClickHouse/ClickHouse/pull/55186) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove the integration with Meilisearch. Reason: it was compatible only with the old version 0.18. The recent version of Meilisearch changed the protocol and does not work anymore. Note: we would appreciate it if you help to return it back. [#55189](https://github.com/ClickHouse/ClickHouse/pull/55189) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Rename directory monitor concept into background INSERT. All the settings `*directory_monitor*` had been renamed to `distributed_background_insert*`. *Backward compatibility should be preserved* (since old settings had been added as an alias). [#55978](https://github.com/ClickHouse/ClickHouse/pull/55978) ([Azat Khuzhin](https://github.com/azat)). -* Do not interpret the `send_timeout` set on the client side as the `receive_timeout` on the server side and vise-versa. [#56035](https://github.com/ClickHouse/ClickHouse/pull/56035) ([Azat Khuzhin](https://github.com/azat)). -* Comparison of time intervals with different units will throw an exception. This closes [#55942](https://github.com/ClickHouse/ClickHouse/issues/55942). You might have occasionally rely on the previous behavior when the underlying numeric values were compared regardless of the units. [#56090](https://github.com/ClickHouse/ClickHouse/pull/56090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Rewrited the experimental `S3Queue` table engine completely: changed the way we keep information in zookeeper which allows to make less zookeeper requests, added caching of zookeeper state in cases when we know the state will not change, improved the polling from s3 process to make it less aggressive, changed the way ttl and max set for trached files is maintained, now it is a background process. Added `system.s3queue` and `system.s3queue_log` tables. Closes [#54998](https://github.com/ClickHouse/ClickHouse/issues/54998). [#54422](https://github.com/ClickHouse/ClickHouse/pull/54422) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Arbitrary paths on HTTP endpoint are no longer interpreted as a request to the `/query` endpoint. [#55521](https://github.com/ClickHouse/ClickHouse/pull/55521) ([Konstantin Bogdanov](https://github.com/thevar1able)). - -#### New Feature -* Add function `arrayFold(accumulator, x1, ..., xn -> expression, initial, array1, ..., arrayn)` which applies a lambda function to multiple arrays of the same cardinality and collects the result in an accumulator. [#49794](https://github.com/ClickHouse/ClickHouse/pull/49794) ([Lirikl](https://github.com/Lirikl)). -* Support for `Npy` format. `SELECT * FROM file('example_array.npy', Npy)`. [#55982](https://github.com/ClickHouse/ClickHouse/pull/55982) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* If a table has a space-filling curve in its key, e.g., `ORDER BY mortonEncode(x, y)`, the conditions on its arguments, e.g., `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30` can be used for indexing. A setting `analyze_index_with_space_filling_curves` is added to enable or disable this analysis. This closes [#41195](https://github.com/ClickHouse/ClickHouse/issue/41195). Continuation of [#4538](https://github.com/ClickHouse/ClickHouse/pull/4538). Continuation of [#6286](https://github.com/ClickHouse/ClickHouse/pull/6286). Continuation of [#28130](https://github.com/ClickHouse/ClickHouse/pull/28130). Continuation of [#41753](https://github.com/ClickHouse/ClickHouse/pull/#41753). [#55642](https://github.com/ClickHouse/ClickHouse/pull/55642) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* A new setting called `force_optimize_projection_name`, it takes a name of projection as an argument. If it's value set to a non-empty string, ClickHouse checks that this projection is used in the query at least once. Closes [#55331](https://github.com/ClickHouse/ClickHouse/issues/55331). [#56134](https://github.com/ClickHouse/ClickHouse/pull/56134) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Support asynchronous inserts with external data via native protocol. Previously it worked only if data is inlined into query. [#54730](https://github.com/ClickHouse/ClickHouse/pull/54730) ([Anton Popov](https://github.com/CurtizJ)). -* Added aggregation function `lttb` which uses the [Largest-Triangle-Three-Buckets](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm for downsampling data for visualization. [#53145](https://github.com/ClickHouse/ClickHouse/pull/53145) ([Sinan](https://github.com/sinsinan)). -* Query`CHECK TABLE` has better performance and usability (sends progress updates, cancellable). Support checking particular part with `CHECK TABLE ... PART 'part_name'`. [#53404](https://github.com/ClickHouse/ClickHouse/pull/53404) ([vdimir](https://github.com/vdimir)). -* Added function `jsonMergePatch`. When working with JSON data as strings, it provides a way to merge these strings (of JSON objects) together to form a single string containing a single JSON object. [#54364](https://github.com/ClickHouse/ClickHouse/pull/54364) ([Memo](https://github.com/Joeywzr)). -* The second part of Kusto Query Language dialect support. [Phase 1 implementation ](https://github.com/ClickHouse/ClickHouse/pull/37961) has been merged. [#42510](https://github.com/ClickHouse/ClickHouse/pull/42510) ([larryluogit](https://github.com/larryluogit)). -* Added a new SQL function, `arrayRandomSample(arr, k)` which returns a sample of k elements from the input array. Similar functionality could previously be achieved only with less convenient syntax, e.g. "SELECT arrayReduce('groupArraySample(3)', range(10))". [#54391](https://github.com/ClickHouse/ClickHouse/pull/54391) ([itayisraelov](https://github.com/itayisraelov)). -* Introduce `-ArgMin`/`-ArgMax` aggregate combinators which allow to aggregate by min/max values only. One use case can be found in [#54818](https://github.com/ClickHouse/ClickHouse/issues/54818). This PR also reorganize combinators into dedicated folder. [#54947](https://github.com/ClickHouse/ClickHouse/pull/54947) ([Amos Bird](https://github.com/amosbird)). -* Allow to drop cache for Protobuf format with `SYSTEM DROP SCHEMA FORMAT CACHE [FOR Protobuf]`. [#55064](https://github.com/ClickHouse/ClickHouse/pull/55064) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* Add external HTTP Basic authenticator. [#55199](https://github.com/ClickHouse/ClickHouse/pull/55199) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Added function `byteSwap` which reverses the bytes of unsigned integers. This is particularly useful for reversing values of types which are represented as unsigned integers internally such as IPv4. [#55211](https://github.com/ClickHouse/ClickHouse/pull/55211) ([Priyansh Agrawal](https://github.com/Priyansh121096)). -* Added function `formatQuery` which returns a formatted version (possibly spanning multiple lines) of a SQL query string. Also added function `formatQuerySingleLine` which does the same but the returned string will not contain linebreaks. [#55239](https://github.com/ClickHouse/ClickHouse/pull/55239) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Added `DWARF` input format that reads debug symbols from an ELF executable/library/object file. [#55450](https://github.com/ClickHouse/ClickHouse/pull/55450) ([Michael Kolupaev](https://github.com/al13n321)). -* Allow to save unparsed records and errors in RabbitMQ, NATS and FileLog engines. Add virtual columns `_error` and `_raw_message`(for NATS and RabbitMQ), `_raw_record` (for FileLog) that are filled when ClickHouse fails to parse new record. The behaviour is controlled under storage settings `nats_handle_error_mode` for NATS, `rabbitmq_handle_error_mode` for RabbitMQ, `handle_error_mode` for FileLog similar to `kafka_handle_error_mode`. If it's set to `default`, en exception will be thrown when ClickHouse fails to parse a record, if it's set to `stream`, erorr and raw record will be saved into virtual columns. Closes [#36035](https://github.com/ClickHouse/ClickHouse/issues/36035). [#55477](https://github.com/ClickHouse/ClickHouse/pull/55477) ([Kruglov Pavel](https://github.com/Avogar)). -* Keeper client improvement: add `get_all_children_number command` that returns number of all children nodes under a specific path. [#55485](https://github.com/ClickHouse/ClickHouse/pull/55485) ([guoxiaolong](https://github.com/guoxiaolongzte)). -* Keeper client improvement: add `get_direct_children_number` command that returns number of direct children nodes under a path. [#55898](https://github.com/ClickHouse/ClickHouse/pull/55898) ([xuzifu666](https://github.com/xuzifu666)). -* Add statement `SHOW SETTING setting_name` which is a simpler version of existing statement `SHOW SETTINGS`. [#55979](https://github.com/ClickHouse/ClickHouse/pull/55979) ([Maksim Kita](https://github.com/kitaisreal)). -* Added fields `substreams` and `filenames` to the `system.parts_columns` table. [#55108](https://github.com/ClickHouse/ClickHouse/pull/55108) ([Anton Popov](https://github.com/CurtizJ)). -* Add support for `SHOW MERGES` query. [#55815](https://github.com/ClickHouse/ClickHouse/pull/55815) ([megao](https://github.com/jetgm)). -* Introduce a setting `create_table_empty_primary_key_by_default` for default `ORDER BY ()`. [#55899](https://github.com/ClickHouse/ClickHouse/pull/55899) ([Srikanth Chekuri](https://github.com/srikanthccv)). - -#### Performance Improvement -* Add option `query_plan_preserve_num_streams_after_window_functions` to preserve the number of streams after evaluating window functions to allow parallel stream processing. [#50771](https://github.com/ClickHouse/ClickHouse/pull/50771) ([frinkr](https://github.com/frinkr)). -* Release more streams if data is small. [#53867](https://github.com/ClickHouse/ClickHouse/pull/53867) ([Jiebin Sun](https://github.com/jiebinn)). -* RoaringBitmaps being optimized before serialization. [#55044](https://github.com/ClickHouse/ClickHouse/pull/55044) ([UnamedRus](https://github.com/UnamedRus)). -* Posting lists in inverted indexes are now optimized to use the smallest possible representation for internal bitmaps. Depending on the repetitiveness of the data, this may significantly reduce the space consumption of inverted indexes. [#55069](https://github.com/ClickHouse/ClickHouse/pull/55069) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Fix contention on Context lock, this significantly improves performance for a lot of short-running concurrent queries. [#55121](https://github.com/ClickHouse/ClickHouse/pull/55121) ([Maksim Kita](https://github.com/kitaisreal)). -* Improved the performance of inverted index creation by 30%. This was achieved by replacing `std::unordered_map` with `absl::flat_hash_map`. [#55210](https://github.com/ClickHouse/ClickHouse/pull/55210) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Support ORC filter push down (rowgroup level). [#55330](https://github.com/ClickHouse/ClickHouse/pull/55330) ([李扬](https://github.com/taiyang-li)). -* Improve performance of external aggregation with a lot of temporary files. [#55489](https://github.com/ClickHouse/ClickHouse/pull/55489) ([Maksim Kita](https://github.com/kitaisreal)). -* Set a reasonable size for the marks cache for secondary indices by default to avoid loading the marks over and over again. [#55654](https://github.com/ClickHouse/ClickHouse/pull/55654) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Avoid unnecessary reconstruction of index granules when reading skip indexes. This addresses [#55653](https://github.com/ClickHouse/ClickHouse/issues/55653#issuecomment-1763766009). [#55683](https://github.com/ClickHouse/ClickHouse/pull/55683) ([Amos Bird](https://github.com/amosbird)). -* Cache CAST function in set during execution to improve the performance of function `IN` when set element type doesn't exactly match column type. [#55712](https://github.com/ClickHouse/ClickHouse/pull/55712) ([Duc Canh Le](https://github.com/canhld94)). -* Performance improvement for `ColumnVector::insertMany` and `ColumnVector::insertManyFrom`. [#55714](https://github.com/ClickHouse/ClickHouse/pull/55714) ([frinkr](https://github.com/frinkr)). -* Optimized Map subscript operations by predicting the next row's key position and reduce the comparisons. [#55929](https://github.com/ClickHouse/ClickHouse/pull/55929) ([lgbo](https://github.com/lgbo-ustc)). -* Support struct fields pruning in Parquet (in previous versions it didn't work in some cases). [#56117](https://github.com/ClickHouse/ClickHouse/pull/56117) ([lgbo](https://github.com/lgbo-ustc)). -* Add the ability to tune the number of parallel replicas used in a query execution based on the estimation of rows to read. [#51692](https://github.com/ClickHouse/ClickHouse/pull/51692) ([Raúl Marín](https://github.com/Algunenano)). -* Optimized external aggregation memory consumption in case many temporary files were generated. [#54798](https://github.com/ClickHouse/ClickHouse/pull/54798) ([Nikita Taranov](https://github.com/nickitat)). -* Distributed queries executed in `async_socket_for_remote` mode (default) now respect `max_threads` limit. Previously, some queries could create excessive threads (up to `max_distributed_connections`), causing server performance issues. [#53504](https://github.com/ClickHouse/ClickHouse/pull/53504) ([filimonov](https://github.com/filimonov)). -* Caching skip-able entries while executing DDL from Zookeeper distributed DDL queue. [#54828](https://github.com/ClickHouse/ClickHouse/pull/54828) ([Duc Canh Le](https://github.com/canhld94)). -* Experimental inverted indexes do not store tokens with too many matches (i.e. row ids in the posting list). This saves space and avoids ineffective index lookups when sequential scans would be equally fast or faster. The previous heuristics (`density` parameter passed to the index definition) that controlled when tokens would not be stored was too confusing for users. A much simpler heuristics based on parameter `max_rows_per_postings_list` (default: 64k) is introduced which directly controls the maximum allowed number of row ids in a postings list. [#55616](https://github.com/ClickHouse/ClickHouse/pull/55616) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Improve write performance to `EmbeddedRocksDB` tables. [#55732](https://github.com/ClickHouse/ClickHouse/pull/55732) ([Duc Canh Le](https://github.com/canhld94)). -* Improved overall resilience for ClickHouse in case of many parts within partition (more than 1000). It might reduce the number of `TOO_MANY_PARTS` errors. [#55526](https://github.com/ClickHouse/ClickHouse/pull/55526) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Reduced memory consumption during loading of hierarchical dictionaries. [#55838](https://github.com/ClickHouse/ClickHouse/pull/55838) ([Nikita Taranov](https://github.com/nickitat)). -* All dictionaries support setting `dictionary_use_async_executor`. [#55839](https://github.com/ClickHouse/ClickHouse/pull/55839) ([vdimir](https://github.com/vdimir)). -* Prevent excesive memory usage when deserializing AggregateFunctionTopKGenericData. [#55947](https://github.com/ClickHouse/ClickHouse/pull/55947) ([Raúl Marín](https://github.com/Algunenano)). -* On a Keeper with lots of watches AsyncMetrics threads can consume 100% of CPU for noticable time in `DB::KeeperStorage::getSessionsWithWatchesCount`. The fix is to avoid traversing heavy `watches` and `list_watches` sets. [#56054](https://github.com/ClickHouse/ClickHouse/pull/56054) ([Alexander Gololobov](https://github.com/davenger)). -* Add setting `optimize_trivial_approximate_count_query` to use `count` approximation for storage EmbeddedRocksDB. Enable trivial count for StorageJoin. [#55806](https://github.com/ClickHouse/ClickHouse/pull/55806) ([Duc Canh Le](https://github.com/canhld94)). - -#### Improvement -* Functions `toDayOfWeek` (MySQL alias: `DAYOFWEEK`), `toYearWeek` (`YEARWEEK`) and `toWeek` (`WEEK`) now supports `String` arguments. This makes its behavior consistent with MySQL's behavior. [#55589](https://github.com/ClickHouse/ClickHouse/pull/55589) ([Robert Schulze](https://github.com/rschu1ze)). -* Introduced setting `date_time_overflow_behavior` with possible values `ignore`, `throw`, `saturate` that controls the overflow behavior when converting from Date, Date32, DateTime64, Integer or Float to Date, Date32, DateTime or DateTime64. [#55696](https://github.com/ClickHouse/ClickHouse/pull/55696) ([Andrey Zvonov](https://github.com/zvonand)). -* Implement query parameters support for `ALTER TABLE ... ACTION PARTITION [ID] {parameter_name:ParameterType}`. Merges [#49516](https://github.com/ClickHouse/ClickHouse/issues/49516). Closes [#49449](https://github.com/ClickHouse/ClickHouse/issues/49449). [#55604](https://github.com/ClickHouse/ClickHouse/pull/55604) ([alesapin](https://github.com/alesapin)). -* Print processor ids in a prettier manner in EXPLAIN. [#48852](https://github.com/ClickHouse/ClickHouse/pull/48852) ([Vlad Seliverstov](https://github.com/behebot)). -* Creating a direct dictionary with a lifetime field will be rejected at create time (as the lifetime does not make sense for direct dictionaries). Fixes: [#27861](https://github.com/ClickHouse/ClickHouse/issues/27861). [#49043](https://github.com/ClickHouse/ClickHouse/pull/49043) ([Rory Crispin](https://github.com/RoryCrispin)). -* Allow parameters in queries with partitions like `ALTER TABLE t DROP PARTITION`. Closes [#49449](https://github.com/ClickHouse/ClickHouse/issues/49449). [#49516](https://github.com/ClickHouse/ClickHouse/pull/49516) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add a new column `xid` for `system.zookeeper_connection`. [#50702](https://github.com/ClickHouse/ClickHouse/pull/50702) ([helifu](https://github.com/helifu)). -* Display the correct server settings in `system.server_settings` after configuration reload. [#53774](https://github.com/ClickHouse/ClickHouse/pull/53774) ([helifu](https://github.com/helifu)). -* Add support for mathematical minus `−` character in queries, similar to `-`. [#54100](https://github.com/ClickHouse/ClickHouse/pull/54100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add replica groups to the experimental `Replicated` database engine. Closes [#53620](https://github.com/ClickHouse/ClickHouse/issues/53620). [#54421](https://github.com/ClickHouse/ClickHouse/pull/54421) ([Nikolay Degterinsky](https://github.com/evillique)). -* It is better to retry retriable s3 errors than totally fail the query. Set bigger value to the s3_retry_attempts by default. [#54770](https://github.com/ClickHouse/ClickHouse/pull/54770) ([Sema Checherinda](https://github.com/CheSema)). -* Add load balancing mode `hostname_levenshtein_distance`. [#54826](https://github.com/ClickHouse/ClickHouse/pull/54826) ([JackyWoo](https://github.com/JackyWoo)). -* Improve hiding secrets in logs. [#55089](https://github.com/ClickHouse/ClickHouse/pull/55089) ([Vitaly Baranov](https://github.com/vitlibar)). -* For now the projection analysis will be performed only on top of query plan. The setting `query_plan_optimize_projection` became obsolete (it was enabled by default long time ago). [#55112](https://github.com/ClickHouse/ClickHouse/pull/55112) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* When function `untuple` is now called on a tuple with named elements and itself has an alias (e.g. `select untuple(tuple(1)::Tuple(element_alias Int)) AS untuple_alias`), then the result column name is now generated from the untuple alias and the tuple element alias (in the example: "untuple_alias.element_alias"). [#55123](https://github.com/ClickHouse/ClickHouse/pull/55123) ([garcher22](https://github.com/garcher22)). -* Added setting `describe_include_virtual_columns`, which allows to include virtual columns of table into result of `DESCRIBE` query. Added setting `describe_compact_output`. If it is set to `true`, `DESCRIBE` query returns only names and types of columns without extra information. [#55129](https://github.com/ClickHouse/ClickHouse/pull/55129) ([Anton Popov](https://github.com/CurtizJ)). -* Sometimes `OPTIMIZE` with `optimize_throw_if_noop=1` may fail with an error `unknown reason` while the real cause of it - different projections in different parts. This behavior is fixed. [#55130](https://github.com/ClickHouse/ClickHouse/pull/55130) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Allow to have several `MaterializedPostgreSQL` tables following the same Postgres table. By default this behaviour is not enabled (for compatibility, because it is a backward-incompatible change), but can be turned on with setting `materialized_postgresql_use_unique_replication_consumer_identifier`. Closes [#54918](https://github.com/ClickHouse/ClickHouse/issues/54918). [#55145](https://github.com/ClickHouse/ClickHouse/pull/55145) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow to parse negative `DateTime64` and `DateTime` with fractional part from short strings. [#55146](https://github.com/ClickHouse/ClickHouse/pull/55146) ([Andrey Zvonov](https://github.com/zvonand)). -* To improve compatibility with MySQL, 1. `information_schema.tables` now includes the new field `table_rows`, and 2. `information_schema.columns` now includes the new field `extra`. [#55215](https://github.com/ClickHouse/ClickHouse/pull/55215) ([Robert Schulze](https://github.com/rschu1ze)). -* Clickhouse-client won't show "0 rows in set" if it is zero and if exception was thrown. [#55240](https://github.com/ClickHouse/ClickHouse/pull/55240) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Support rename table without keyword `TABLE` like `RENAME db.t1 to db.t2`. [#55373](https://github.com/ClickHouse/ClickHouse/pull/55373) ([凌涛](https://github.com/lingtaolf)). -* Add `internal_replication` to `system.clusters`. [#55377](https://github.com/ClickHouse/ClickHouse/pull/55377) ([Konstantin Morozov](https://github.com/k-morozov)). -* Select remote proxy resolver based on request protocol, add proxy feature docs and remove `DB::ProxyConfiguration::Protocol::ANY`. [#55430](https://github.com/ClickHouse/ClickHouse/pull/55430) ([Arthur Passos](https://github.com/arthurpassos)). -* Avoid retrying keeper operations on INSERT after table shutdown. [#55519](https://github.com/ClickHouse/ClickHouse/pull/55519) ([Azat Khuzhin](https://github.com/azat)). -* `SHOW COLUMNS` now correctly reports type `FixedString` as `BLOB` if setting `use_mysql_types_in_show_columns` is on. Also added two new settings, `mysql_map_string_to_text_in_show_columns` and `mysql_map_fixed_string_to_text_in_show_columns` to switch the output for types `String` and `FixedString` as `TEXT` or `BLOB`. [#55617](https://github.com/ClickHouse/ClickHouse/pull/55617) ([Serge Klochkov](https://github.com/slvrtrn)). -* During ReplicatedMergeTree tables startup clickhouse server checks set of parts for unexpected parts (exists locally, but not in zookeeper). All unexpected parts move to detached directory and instead of them server tries to restore some ancestor (covered) parts. Now server tries to restore closest ancestors instead of random covered parts. [#55645](https://github.com/ClickHouse/ClickHouse/pull/55645) ([alesapin](https://github.com/alesapin)). -* The advanced dashboard now supports draggable charts on touch devices. This closes [#54206](https://github.com/ClickHouse/ClickHouse/issues/54206). [#55649](https://github.com/ClickHouse/ClickHouse/pull/55649) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Use the default query format if declared when outputting exception with `http_write_exception_in_output_format`. [#55739](https://github.com/ClickHouse/ClickHouse/pull/55739) ([Raúl Marín](https://github.com/Algunenano)). -* Provide a better message for common MATERIALIZED VIEW pitfalls. [#55826](https://github.com/ClickHouse/ClickHouse/pull/55826) ([Raúl Marín](https://github.com/Algunenano)). -* If you dropped the current database, you will still be able to run some queries in `clickhouse-local` and switch to another database. This makes the behavior consistent with `clickhouse-client`. This closes [#55834](https://github.com/ClickHouse/ClickHouse/issues/55834). [#55853](https://github.com/ClickHouse/ClickHouse/pull/55853) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Functions `(add|subtract)(Year|Quarter|Month|Week|Day|Hour|Minute|Second|Millisecond|Microsecond|Nanosecond)` now support string-encoded date arguments, e.g. `SELECT addDays('2023-10-22', 1)`. This increases compatibility with MySQL and is needed by Tableau Online. [#55869](https://github.com/ClickHouse/ClickHouse/pull/55869) ([Robert Schulze](https://github.com/rschu1ze)). -* The setting `apply_deleted_mask` when disabled allows to read rows that where marked as deleted by lightweight DELETE queries. This is useful for debugging. [#55952](https://github.com/ClickHouse/ClickHouse/pull/55952) ([Alexander Gololobov](https://github.com/davenger)). -* Allow skipping `null` values when serailizing Tuple to json objects, which makes it possible to keep compatiability with Spark's `to_json` function, which is also useful for gluten. [#55956](https://github.com/ClickHouse/ClickHouse/pull/55956) ([李扬](https://github.com/taiyang-li)). -* Functions `(add|sub)Date` now support string-encoded date arguments, e.g. `SELECT addDate('2023-10-22 11:12:13', INTERVAL 5 MINUTE)`. The same support for string-encoded date arguments is added to the plus and minus operators, e.g. `SELECT '2023-10-23' + INTERVAL 1 DAY`. This increases compatibility with MySQL and is needed by Tableau Online. [#55960](https://github.com/ClickHouse/ClickHouse/pull/55960) ([Robert Schulze](https://github.com/rschu1ze)). -* Allow unquoted strings with CR (`\r`) in CSV format. Closes [#39930](https://github.com/ClickHouse/ClickHouse/issues/39930). [#56046](https://github.com/ClickHouse/ClickHouse/pull/56046) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow to run `clickhouse-keeper` using embedded config. [#56086](https://github.com/ClickHouse/ClickHouse/pull/56086) ([Maksim Kita](https://github.com/kitaisreal)). -* Set limit of the maximum configuration value for `queued.min.messages` to avoid problem with start fetching data with Kafka. [#56121](https://github.com/ClickHouse/ClickHouse/pull/56121) ([Stas Morozov](https://github.com/r3b-fish)). -* Fixed a typo in SQL function `minSampleSizeContinous` (renamed `minSampleSizeContinuous`). Old name is preserved for backward compatibility. This closes: [#56139](https://github.com/ClickHouse/ClickHouse/issues/56139). [#56143](https://github.com/ClickHouse/ClickHouse/pull/56143) ([Dorota Szeremeta](https://github.com/orotaday)). -* Print path for broken parts on disk before shutting down the server. Before this change if a part is corrupted on disk and server cannot start, it was almost impossible to understand which part is broken. This is fixed. [#56181](https://github.com/ClickHouse/ClickHouse/pull/56181) ([Duc Canh Le](https://github.com/canhld94)). - -#### Build/Testing/Packaging Improvement -* If the database in Docker is already initialized, it doesn't need to be initialized again upon subsequent launches. This can potentially fix the issue of infinite container restarts when the database fails to load within 1000 attempts (relevant for very large databases and multi-node setups). [#50724](https://github.com/ClickHouse/ClickHouse/pull/50724) ([Alexander Nikolaev](https://github.com/AlexNik)). -* Resource with source code including submodules is built in Darwin special build task. It may be used to build ClickHouse without checking out the submodules. [#51435](https://github.com/ClickHouse/ClickHouse/pull/51435) ([Ilya Yatsishin](https://github.com/qoega)). -* An error was occuring when building ClickHouse with the AVX series of instructions enabled globally (which isn't recommended). The reason is that snappy does not enable `SNAPPY_HAVE_X86_CRC32`. [#55049](https://github.com/ClickHouse/ClickHouse/pull/55049) ([monchickey](https://github.com/monchickey)). -* Solve issue with launching standalone `clickhouse-keeper` from `clickhouse-server` package. [#55226](https://github.com/ClickHouse/ClickHouse/pull/55226) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* In the tests, RabbitMQ version is updated to 3.12.6. Improved logs collection for RabbitMQ tests. [#55424](https://github.com/ClickHouse/ClickHouse/pull/55424) ([Ilya Yatsishin](https://github.com/qoega)). -* Modified the error message difference between openssl and boringssl to fix the functional test. [#55975](https://github.com/ClickHouse/ClickHouse/pull/55975) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). -* Use upstream repo for apache datasketches. [#55787](https://github.com/ClickHouse/ClickHouse/pull/55787) ([Nikita Taranov](https://github.com/nickitat)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Skip hardlinking inverted index files in mutation [#47663](https://github.com/ClickHouse/ClickHouse/pull/47663) ([cangyin](https://github.com/cangyin)). -* Fixed bug of `match` function (regex) with pattern containing alternation produces incorrect key condition. Closes #53222. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix 'Cannot find column' in read-in-order optimization with ARRAY JOIN [#51746](https://github.com/ClickHouse/ClickHouse/pull/51746) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Support missed experimental `Object(Nullable(json))` subcolumns in query. [#54052](https://github.com/ClickHouse/ClickHouse/pull/54052) ([zps](https://github.com/VanDarkholme7)). -* Re-add fix for `accurateCastOrNull` [#54629](https://github.com/ClickHouse/ClickHouse/pull/54629) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix detecting `DEFAULT` for columns of a Distributed table created without AS [#55060](https://github.com/ClickHouse/ClickHouse/pull/55060) ([Vitaly Baranov](https://github.com/vitlibar)). -* Proper cleanup in case of exception in ctor of ShellCommandSource [#55103](https://github.com/ClickHouse/ClickHouse/pull/55103) ([Alexander Gololobov](https://github.com/davenger)). -* Fix deadlock in LDAP assigned role update [#55119](https://github.com/ClickHouse/ClickHouse/pull/55119) ([Julian Maicher](https://github.com/jmaicher)). -* Suppress error statistics update for internal exceptions [#55128](https://github.com/ClickHouse/ClickHouse/pull/55128) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix deadlock in backups [#55132](https://github.com/ClickHouse/ClickHouse/pull/55132) ([alesapin](https://github.com/alesapin)). -* Fix storage Iceberg files retrieval [#55144](https://github.com/ClickHouse/ClickHouse/pull/55144) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix partition pruning of extra columns in set. [#55172](https://github.com/ClickHouse/ClickHouse/pull/55172) ([Amos Bird](https://github.com/amosbird)). -* Fix recalculation of skip indexes in ALTER UPDATE queries when table has adaptive granularity [#55202](https://github.com/ClickHouse/ClickHouse/pull/55202) ([Duc Canh Le](https://github.com/canhld94)). -* Fix for background download in fs cache [#55252](https://github.com/ClickHouse/ClickHouse/pull/55252) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Avoid possible memory leaks in compressors in case of missing buffer finalization [#55262](https://github.com/ClickHouse/ClickHouse/pull/55262) ([Azat Khuzhin](https://github.com/azat)). -* Fix functions execution over sparse columns [#55275](https://github.com/ClickHouse/ClickHouse/pull/55275) ([Azat Khuzhin](https://github.com/azat)). -* Fix incorrect merging of Nested for SELECT FINAL FROM SummingMergeTree [#55276](https://github.com/ClickHouse/ClickHouse/pull/55276) ([Azat Khuzhin](https://github.com/azat)). -* Fix bug with inability to drop detached partition in replicated merge tree on top of S3 without zero copy [#55309](https://github.com/ClickHouse/ClickHouse/pull/55309) ([alesapin](https://github.com/alesapin)). -* Fix a crash in MergeSortingPartialResultTransform (due to zero chunks after `remerge`) [#55335](https://github.com/ClickHouse/ClickHouse/pull/55335) ([Azat Khuzhin](https://github.com/azat)). -* Fix data-race in CreatingSetsTransform (on errors) due to throwing shared exception [#55338](https://github.com/ClickHouse/ClickHouse/pull/55338) ([Azat Khuzhin](https://github.com/azat)). -* Fix trash optimization (up to a certain extent) [#55353](https://github.com/ClickHouse/ClickHouse/pull/55353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix leak in StorageHDFS [#55370](https://github.com/ClickHouse/ClickHouse/pull/55370) ([Azat Khuzhin](https://github.com/azat)). -* Fix parsing of arrays in cast operator [#55417](https://github.com/ClickHouse/ClickHouse/pull/55417) ([Anton Popov](https://github.com/CurtizJ)). -* Fix filtering by virtual columns with OR filter in query [#55418](https://github.com/ClickHouse/ClickHouse/pull/55418) ([Azat Khuzhin](https://github.com/azat)). -* Fix MongoDB connection issues [#55419](https://github.com/ClickHouse/ClickHouse/pull/55419) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix MySQL interface boolean representation [#55427](https://github.com/ClickHouse/ClickHouse/pull/55427) ([Serge Klochkov](https://github.com/slvrtrn)). -* Fix MySQL text protocol DateTime formatting and LowCardinality(Nullable(T)) types reporting [#55479](https://github.com/ClickHouse/ClickHouse/pull/55479) ([Serge Klochkov](https://github.com/slvrtrn)). -* Make `use_mysql_types_in_show_columns` affect only `SHOW COLUMNS` [#55481](https://github.com/ClickHouse/ClickHouse/pull/55481) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix stack symbolizer parsing `DW_FORM_ref_addr` incorrectly and sometimes crashing [#55483](https://github.com/ClickHouse/ClickHouse/pull/55483) ([Michael Kolupaev](https://github.com/al13n321)). -* Destroy fiber in case of exception in cancelBefore in AsyncTaskExecutor [#55516](https://github.com/ClickHouse/ClickHouse/pull/55516) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix Query Parameters not working with custom HTTP handlers [#55521](https://github.com/ClickHouse/ClickHouse/pull/55521) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* Fix checking of non handled data for Values format [#55527](https://github.com/ClickHouse/ClickHouse/pull/55527) ([Azat Khuzhin](https://github.com/azat)). -* Fix 'Invalid cursor state' in odbc interacting with MS SQL Server [#55558](https://github.com/ClickHouse/ClickHouse/pull/55558) ([vdimir](https://github.com/vdimir)). -* Fix max execution time and 'break' overflow mode [#55577](https://github.com/ClickHouse/ClickHouse/pull/55577) ([Alexander Gololobov](https://github.com/davenger)). -* Fix crash in QueryNormalizer with cyclic aliases [#55602](https://github.com/ClickHouse/ClickHouse/pull/55602) ([vdimir](https://github.com/vdimir)). -* Disable wrong optimization and add a test [#55609](https://github.com/ClickHouse/ClickHouse/pull/55609) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Merging [#52352](https://github.com/ClickHouse/ClickHouse/issues/52352) [#55621](https://github.com/ClickHouse/ClickHouse/pull/55621) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add a test to avoid incorrect decimal sorting [#55662](https://github.com/ClickHouse/ClickHouse/pull/55662) ([Amos Bird](https://github.com/amosbird)). -* Fix progress bar for s3 and azure Cluster functions with url without globs [#55666](https://github.com/ClickHouse/ClickHouse/pull/55666) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix filtering by virtual columns with OR filter in query (resubmit) [#55678](https://github.com/ClickHouse/ClickHouse/pull/55678) ([Azat Khuzhin](https://github.com/azat)). -* Fixes and improvements for Iceberg storage [#55695](https://github.com/ClickHouse/ClickHouse/pull/55695) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix data race in CreatingSetsTransform (v2) [#55786](https://github.com/ClickHouse/ClickHouse/pull/55786) ([Azat Khuzhin](https://github.com/azat)). -* Throw exception when parsing illegal string as float if precise_float_parsing is true [#55861](https://github.com/ClickHouse/ClickHouse/pull/55861) ([李扬](https://github.com/taiyang-li)). -* Disable predicate pushdown if the CTE contains stateful functions [#55871](https://github.com/ClickHouse/ClickHouse/pull/55871) ([Raúl Marín](https://github.com/Algunenano)). -* Fix normalize ASTSelectWithUnionQuery, as it was stripping `FORMAT` from the query [#55887](https://github.com/ClickHouse/ClickHouse/pull/55887) ([flynn](https://github.com/ucasfl)). -* Try to fix possible segfault in Native ORC input format [#55891](https://github.com/ClickHouse/ClickHouse/pull/55891) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix window functions in case of sparse columns. [#55895](https://github.com/ClickHouse/ClickHouse/pull/55895) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* fix: StorageNull supports subcolumns [#55912](https://github.com/ClickHouse/ClickHouse/pull/55912) ([FFish](https://github.com/wxybear)). -* Do not write retriable errors for Replicated mutate/merge into error log [#55944](https://github.com/ClickHouse/ClickHouse/pull/55944) ([Azat Khuzhin](https://github.com/azat)). -* Fix `SHOW DATABASES LIMIT ` [#55962](https://github.com/ClickHouse/ClickHouse/pull/55962) ([Raúl Marín](https://github.com/Algunenano)). -* Fix autogenerated Protobuf schema with fields with underscore [#55974](https://github.com/ClickHouse/ClickHouse/pull/55974) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix dateTime64ToSnowflake64() with non-default scale [#55983](https://github.com/ClickHouse/ClickHouse/pull/55983) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix output/input of Arrow dictionary column [#55989](https://github.com/ClickHouse/ClickHouse/pull/55989) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix fetching schema from schema registry in AvroConfluent [#55991](https://github.com/ClickHouse/ClickHouse/pull/55991) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix 'Block structure mismatch' on concurrent ALTER and INSERTs in Buffer table [#55995](https://github.com/ClickHouse/ClickHouse/pull/55995) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix incorrect free space accounting for least_used JBOD policy [#56030](https://github.com/ClickHouse/ClickHouse/pull/56030) ([Azat Khuzhin](https://github.com/azat)). -* Fix missing scalar issue when evaluating subqueries inside table functions [#56057](https://github.com/ClickHouse/ClickHouse/pull/56057) ([Amos Bird](https://github.com/amosbird)). -* Fix wrong query result when http_write_exception_in_output_format=1 [#56135](https://github.com/ClickHouse/ClickHouse/pull/56135) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix schema cache for fallback JSON->JSONEachRow with changed settings [#56172](https://github.com/ClickHouse/ClickHouse/pull/56172) ([Kruglov Pavel](https://github.com/Avogar)). -* Add error handler to odbc-bridge [#56185](https://github.com/ClickHouse/ClickHouse/pull/56185) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). - - -### ClickHouse release 23.9, 2023-09-28 - -#### Backward Incompatible Change -* Remove the `status_info` configuration option and dictionaries status from the default Prometheus handler. [#54090](https://github.com/ClickHouse/ClickHouse/pull/54090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The experimental parts metadata cache is removed from the codebase. [#54215](https://github.com/ClickHouse/ClickHouse/pull/54215) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Disable setting `input_format_json_try_infer_numbers_from_strings` by default, so we don't try to infer numbers from strings in JSON formats by default to avoid possible parsing errors when sample data contains strings that looks like a number. [#55099](https://github.com/ClickHouse/ClickHouse/pull/55099) ([Kruglov Pavel](https://github.com/Avogar)). - -#### New Feature -* Improve schema inference from JSON formats: 1) Now it's possible to infer named Tuples from JSON objects without experimantal JSON type under a setting `input_format_json_try_infer_named_tuples_from_objects` in JSON formats. Previously without experimantal type JSON we could only infer JSON objects as Strings or Maps, now we can infer named Tuple. Resulting Tuple type will conain all keys of objects that were read in data sample during schema inference. It can be useful for reading structured JSON data without sparse objects. The setting is enabled by default. 2) Allow parsing JSON array into a column with type String under setting `input_format_json_read_arrays_as_strings`. It can help reading arrays with values with different types. 3) Allow to use type String for JSON keys with unkown types (`null`/`[]`/`{}`) in sample data under setting `input_format_json_infer_incomplete_types_as_strings`. Now in JSON formats we can read any value into String column and we can avoid getting error `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference by using type String for unknown types, so the data will be read successfully. [#54427](https://github.com/ClickHouse/ClickHouse/pull/54427) ([Kruglov Pavel](https://github.com/Avogar)). -* Added IO scheduling support for remote disks. Storage configuration for disk types `s3`, `s3_plain`, `hdfs` and `azure_blob_storage` can now contain `read_resource` and `write_resource` elements holding resource names. Scheduling policies for these resources can be configured in a separate server configuration section `resources`. Queries can be marked using setting `workload` and classified using server configuration section `workload_classifiers` to achieve diverse resource scheduling goals. More details in [the docs](https://clickhouse.com/docs/en/operations/workload-scheduling). [#47009](https://github.com/ClickHouse/ClickHouse/pull/47009) ([Sergei Trifonov](https://github.com/serxa)). Added "bandwidth_limit" IO scheduling node type. It allows you to specify `max_speed` and `max_burst` constraints on traffic passing though this node. [#54618](https://github.com/ClickHouse/ClickHouse/pull/54618) ([Sergei Trifonov](https://github.com/serxa)). -* Added new type of authentication based on SSH keys. It works only for the native TCP protocol. [#41109](https://github.com/ClickHouse/ClickHouse/pull/41109) ([George Gamezardashvili](https://github.com/InfJoker)). -* Added a new column `_block_number` for MergeTree tables. [#44532](https://github.com/ClickHouse/ClickHouse/issues/44532). [#47532](https://github.com/ClickHouse/ClickHouse/pull/47532) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Add `IF EMPTY` clause for `DROP TABLE` queries. [#48915](https://github.com/ClickHouse/ClickHouse/pull/48915) ([Pavel Novitskiy](https://github.com/pnovitskiy)). -* SQL functions `toString(datetime, timezone)` and `formatDateTime(datetime, format, timezone)` now support non-constant timezone arguments. [#53680](https://github.com/ClickHouse/ClickHouse/pull/53680) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add support for `ALTER TABLE MODIFY COMMENT`. Note: something similar was added by an external contributor a long time ago, but the feature did not work at all and only confused users. This closes [#36377](https://github.com/ClickHouse/ClickHouse/issues/36377). [#51304](https://github.com/ClickHouse/ClickHouse/pull/51304) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Note: this command does not propagate between replicas, so the replicas of a table could have different comments. -* Added `GCD` a.k.a. "greatest common denominator" as a new data compression codec. The codec computes the GCD of all column values, and then divides each value by the GCD. The GCD codec is a data preparation codec (similar to Delta and DoubleDelta) and cannot be used stand-alone. It works with data integer, decimal and date/time type. A viable use case for the GCD codec are column values that change (increase/decrease) in multiples of the GCD, e.g. 24 - 28 - 16 - 24 - 8 - 24 (assuming GCD = 4). [#53149](https://github.com/ClickHouse/ClickHouse/pull/53149) ([Alexander Nam](https://github.com/seshWCS)). -* Two new type aliases `DECIMAL(P)` (as shortcut for `DECIMAL(P, 0)` and `DECIMAL` (as shortcut for `DECIMAL(10, 0)`) were added. This makes ClickHouse more compatible with MySQL's SQL dialect. [#53328](https://github.com/ClickHouse/ClickHouse/pull/53328) ([Val Doroshchuk](https://github.com/valbok)). -* Added a new system log table `backup_log` to track all `BACKUP` and `RESTORE` operations. [#53638](https://github.com/ClickHouse/ClickHouse/pull/53638) ([Victor Krasnov](https://github.com/sirvickr)). -* Added a format setting `output_format_markdown_escape_special_characters` (default: false). The setting controls whether special characters like `!`, `#`, `$` etc. are escaped (i.e. prefixed by a backslash) in the `Markdown` output format. [#53860](https://github.com/ClickHouse/ClickHouse/pull/53860) ([irenjj](https://github.com/irenjj)). -* Add function `decodeHTMLComponent`. [#54097](https://github.com/ClickHouse/ClickHouse/pull/54097) ([Bharat Nallan](https://github.com/bharatnc)). -* Added `peak_threads_usage` to query_log table. [#54335](https://github.com/ClickHouse/ClickHouse/pull/54335) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Add `SHOW FUNCTIONS` support to clickhouse-client. [#54337](https://github.com/ClickHouse/ClickHouse/pull/54337) ([Julia Kartseva](https://github.com/wat-ze-hex)). -* Added function `toDaysSinceYearZero` with alias `TO_DAYS` (for compatibility with MySQL) which returns the number of days passed since `0001-01-01` (in Proleptic Gregorian Calendar). [#54479](https://github.com/ClickHouse/ClickHouse/pull/54479) ([Robert Schulze](https://github.com/rschu1ze)). Function `toDaysSinceYearZero` now supports arguments of type `DateTime` and `DateTime64`. [#54856](https://github.com/ClickHouse/ClickHouse/pull/54856) ([Serge Klochkov](https://github.com/slvrtrn)). -* Added functions `YYYYMMDDtoDate`, `YYYYMMDDtoDate32`, `YYYYMMDDhhmmssToDateTime` and `YYYYMMDDhhmmssToDateTime64`. They convert a date or date with time encoded as integer (e.g. 20230911) into a native date or date with time. As such, they provide the opposite functionality of existing functions `YYYYMMDDToDate`, `YYYYMMDDToDateTime`, `YYYYMMDDhhmmddToDateTime`, `YYYYMMDDhhmmddToDateTime64`. [#54509](https://github.com/ClickHouse/ClickHouse/pull/54509) ([Quanfa Fu](https://github.com/dentiscalprum)) ([Robert Schulze](https://github.com/rschu1ze)). -* Add several string distance functions, including `byteHammingDistance`, `editDistance`. [#54935](https://github.com/ClickHouse/ClickHouse/pull/54935) ([flynn](https://github.com/ucasfl)). -* Allow specifying the expiration date and, optionally, the time for user credentials with `VALID UNTIL datetime` clause. [#51261](https://github.com/ClickHouse/ClickHouse/pull/51261) ([Nikolay Degterinsky](https://github.com/evillique)). -* Allow S3-style URLs for table functions `s3`, `gcs`, `oss`. URL is automatically converted to HTTP. Example: `'s3://clickhouse-public-datasets/hits.csv'` is converted to `'https://clickhouse-public-datasets.s3.amazonaws.com/hits.csv'`. [#54931](https://github.com/ClickHouse/ClickHouse/pull/54931) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add new setting `print_pretty_type_names` to print pretty deep nested types like Tuple/Maps/Arrays. [#55095](https://github.com/ClickHouse/ClickHouse/pull/55095) ([Kruglov Pavel](https://github.com/Avogar)). - -#### Performance Improvement -* Speed up reading from S3 by enabling prefetches by default. [#53709](https://github.com/ClickHouse/ClickHouse/pull/53709) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Do not implicitly read PK and version columns in lonely parts if unnecessary for queries with FINAL. [#53919](https://github.com/ClickHouse/ClickHouse/pull/53919) ([Duc Canh Le](https://github.com/canhld94)). -* Optimize group by constant keys. Will optimize queries with group by `_file/_path` after https://github.com/ClickHouse/ClickHouse/pull/53529. [#53549](https://github.com/ClickHouse/ClickHouse/pull/53549) ([Kruglov Pavel](https://github.com/Avogar)). -* Improve performance of sorting for `Decimal` columns. Improve performance of insertion into `MergeTree` if ORDER BY contains a `Decimal` column. Improve performance of sorting when data is already sorted or almost sorted. [#35961](https://github.com/ClickHouse/ClickHouse/pull/35961) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance for huge query analysis. Fixes [#51224](https://github.com/ClickHouse/ClickHouse/issues/51224). [#51469](https://github.com/ClickHouse/ClickHouse/pull/51469) ([frinkr](https://github.com/frinkr)). -* An optimization to rewrite `COUNT(DISTINCT ...)` and various `uniq` variants to `count` if it is selected from a subquery with GROUP BY. [#52082](https://github.com/ClickHouse/ClickHouse/pull/52082) [#52645](https://github.com/ClickHouse/ClickHouse/pull/52645) ([JackyWoo](https://github.com/JackyWoo)). -* Remove manual calls to `mmap/mremap/munmap` and delegate all this work to `jemalloc` - and it slightly improves performance. [#52792](https://github.com/ClickHouse/ClickHouse/pull/52792) ([Nikita Taranov](https://github.com/nickitat)). -* Fixed high in CPU consumption when working with NATS. [#54399](https://github.com/ClickHouse/ClickHouse/pull/54399) ([Vasilev Pyotr](https://github.com/vahpetr)). -* Since we use separate instructions for executing `toString` with datetime argument, it is possible to improve performance a bit for non-datetime arguments and have some parts of the code cleaner. Follows up [#53680](https://github.com/ClickHouse/ClickHouse/issues/53680). [#54443](https://github.com/ClickHouse/ClickHouse/pull/54443) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Instead of serializing json elements into a `std::stringstream`, this PR try to put the serialization result into `ColumnString` direclty. [#54613](https://github.com/ClickHouse/ClickHouse/pull/54613) ([lgbo](https://github.com/lgbo-ustc)). -* Enable ORDER BY optimization for reading data in corresponding order from a MergeTree table in case that the table is behind a view. [#54628](https://github.com/ClickHouse/ClickHouse/pull/54628) ([Vitaly Baranov](https://github.com/vitlibar)). -* Improve JSON SQL functions by reusing `GeneratorJSONPath` and removing several shared pointers. [#54735](https://github.com/ClickHouse/ClickHouse/pull/54735) ([lgbo](https://github.com/lgbo-ustc)). -* Keeper tries to batch flush requests for better performance. [#53049](https://github.com/ClickHouse/ClickHouse/pull/53049) ([Antonio Andelic](https://github.com/antonio2368)). -* Now `clickhouse-client` processes files in parallel in case of `INFILE 'glob_expression'`. Closes [#54218](https://github.com/ClickHouse/ClickHouse/issues/54218). [#54533](https://github.com/ClickHouse/ClickHouse/pull/54533) ([Max K.](https://github.com/mkaynov)). -* Allow to use primary key for IN function where primary key column types are different from `IN` function right side column types. Example: `SELECT id FROM test_table WHERE id IN (SELECT '5')`. Closes [#48936](https://github.com/ClickHouse/ClickHouse/issues/48936). [#54544](https://github.com/ClickHouse/ClickHouse/pull/54544) ([Maksim Kita](https://github.com/kitaisreal)). -* Hash JOIN tries to shrink internal buffers consuming half of maximal available memory (set by `max_bytes_in_join`). [#54584](https://github.com/ClickHouse/ClickHouse/pull/54584) ([vdimir](https://github.com/vdimir)). -* Respect `max_block_size` for array join to avoid possible OOM. Close [#54290](https://github.com/ClickHouse/ClickHouse/issues/54290). [#54664](https://github.com/ClickHouse/ClickHouse/pull/54664) ([李扬](https://github.com/taiyang-li)). -* Reuse HTTP connections in the `s3` table function. [#54812](https://github.com/ClickHouse/ClickHouse/pull/54812) ([Michael Kolupaev](https://github.com/al13n321)). -* Replace the linear search in `MergeTreeRangeReader::Stream::ceilRowsToCompleteGranules` with a binary search. [#54869](https://github.com/ClickHouse/ClickHouse/pull/54869) ([usurai](https://github.com/usurai)). - -#### Experimental Feature -* The creation of `Annoy` indexes can now be parallelized using setting `max_threads_for_annoy_index_creation`. [#54047](https://github.com/ClickHouse/ClickHouse/pull/54047) ([Robert Schulze](https://github.com/rschu1ze)). -* Parallel replicas over distributed don't read from all replicas [#54199](https://github.com/ClickHouse/ClickHouse/pull/54199) ([Igor Nikonov](https://github.com/devcrafter)). - -#### Improvement -* Allow to replace long names of files of columns in `MergeTree` data parts to hashes of names. It helps to avoid `File name too long` error in some cases. [#50612](https://github.com/ClickHouse/ClickHouse/pull/50612) ([Anton Popov](https://github.com/CurtizJ)). -* Parse data in `JSON` format as `JSONEachRow` if failed to parse metadata. It will allow to read files with `.json` extension even if real format is JSONEachRow. Closes [#45740](https://github.com/ClickHouse/ClickHouse/issues/45740). [#54405](https://github.com/ClickHouse/ClickHouse/pull/54405) ([Kruglov Pavel](https://github.com/Avogar)). -* Output valid JSON/XML on excetpion during HTTP query execution. Add setting `http_write_exception_in_output_format` to enable/disable this behaviour (enabled by default). [#52853](https://github.com/ClickHouse/ClickHouse/pull/52853) ([Kruglov Pavel](https://github.com/Avogar)). -* View `information_schema.tables` now has a new field `data_length` which shows the approximate size of the data on disk. Required to run queries generated by Amazon QuickSight. [#55037](https://github.com/ClickHouse/ClickHouse/pull/55037) ([Robert Schulze](https://github.com/rschu1ze)). -* The MySQL interface gained a minimal implementation of prepared statements, just enough to allow a connection from Tableau Online to ClickHouse via the MySQL connector. [#54115](https://github.com/ClickHouse/ClickHouse/pull/54115) ([Serge Klochkov](https://github.com/slvrtrn)). Please note: the prepared statements implementation is pretty minimal, we do not support arguments binding yet, it is not required in this particular Tableau online use case. It will be implemented as a follow-up if necessary after extensive testing of Tableau Online in case we discover issues. -* Support case-insensitive and dot-all matching modes in `regexp_tree` dictionaries. [#50906](https://github.com/ClickHouse/ClickHouse/pull/50906) ([Johann Gan](https://github.com/johanngan)). -* Keeper improvement: Add a `createIfNotExists` Keeper command. [#48855](https://github.com/ClickHouse/ClickHouse/pull/48855) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* More precise integer type inference, fix [#51236](https://github.com/ClickHouse/ClickHouse/issues/51236). [#53003](https://github.com/ClickHouse/ClickHouse/pull/53003) ([Chen768959](https://github.com/Chen768959)). -* Introduced resolving of charsets in the string literals for MaterializedMySQL. [#53220](https://github.com/ClickHouse/ClickHouse/pull/53220) ([Val Doroshchuk](https://github.com/valbok)). -* Fix a subtle issue with a rarely used `EmbeddedRocksDB` table engine in an extremely rare scenario: sometimes the `EmbeddedRocksDB` table engine does not close files correctly in NFS after running `DROP TABLE`. [#53502](https://github.com/ClickHouse/ClickHouse/pull/53502) ([Mingliang Pan](https://github.com/liangliangpan)). -* `RESTORE TABLE ON CLUSTER` must create replicated tables with a matching UUID on hosts. Otherwise the macro `{uuid}` in ZooKeeper path can't work correctly after RESTORE. This PR implements that. [#53765](https://github.com/ClickHouse/ClickHouse/pull/53765) ([Vitaly Baranov](https://github.com/vitlibar)). -* Added restore setting `restore_broken_parts_as_detached`: if it's true the RESTORE process won't stop on broken parts while restoring, instead all the broken parts will be copied to the `detached` folder with the prefix `broken-from-backup'. If it's false the RESTORE process will stop on the first broken part (if any). The default value is false. [#53877](https://github.com/ClickHouse/ClickHouse/pull/53877) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add `elapsed_ns` field to HTTP headers X-ClickHouse-Progress and X-ClickHouse-Summary. [#54179](https://github.com/ClickHouse/ClickHouse/pull/54179) ([joelynch](https://github.com/joelynch)). -* Implementation of `reconfig` (https://github.com/ClickHouse/ClickHouse/pull/49450), `sync`, and `exists` commands for keeper-client. [#54201](https://github.com/ClickHouse/ClickHouse/pull/54201) ([pufit](https://github.com/pufit)). -* `clickhouse-local` and `clickhouse-client` now allow to specify the `--query` parameter multiple times, e.g. `./clickhouse-client --query "SELECT 1" --query "SELECT 2"`. This syntax is slightly more intuitive than `./clickhouse-client --multiquery "SELECT 1;S ELECT 2"`, a bit easier to script (e.g. `queries.push_back('--query "$q"')`) and more consistent with the behavior of existing parameter `--queries-file` (e.g. `./clickhouse client --queries-file queries1.sql --queries-file queries2.sql`). [#54249](https://github.com/ClickHouse/ClickHouse/pull/54249) ([Robert Schulze](https://github.com/rschu1ze)). -* Add sub-second precision to `formatReadableTimeDelta`. [#54250](https://github.com/ClickHouse/ClickHouse/pull/54250) ([Andrey Zvonov](https://github.com/zvonand)). -* Enable `allow_remove_stale_moving_parts` by default. [#54260](https://github.com/ClickHouse/ClickHouse/pull/54260) ([vdimir](https://github.com/vdimir)). -* Fix using count from cache and improve progress bar for reading from archives. [#54271](https://github.com/ClickHouse/ClickHouse/pull/54271) ([Kruglov Pavel](https://github.com/Avogar)). -* Add support for S3 credentials using SSO. To define a profile to be used with SSO, set `AWS_PROFILE` environment variable. [#54347](https://github.com/ClickHouse/ClickHouse/pull/54347) ([Antonio Andelic](https://github.com/antonio2368)). -* Support NULL as default for nested types Array/Tuple/Map for input formats. Closes [#51100](https://github.com/ClickHouse/ClickHouse/issues/51100). [#54351](https://github.com/ClickHouse/ClickHouse/pull/54351) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow reading some unusual configuration of chunks from Arrow/Parquet formats. [#54370](https://github.com/ClickHouse/ClickHouse/pull/54370) ([Arthur Passos](https://github.com/arthurpassos)). -* Add `STD` alias to `stddevPop` function for MySQL compatibility. Closes [#54274](https://github.com/ClickHouse/ClickHouse/issues/54274). [#54382](https://github.com/ClickHouse/ClickHouse/pull/54382) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add `addDate` function for compatibility with MySQL and `subDate` for consistency. Reference [#54275](https://github.com/ClickHouse/ClickHouse/issues/54275). [#54400](https://github.com/ClickHouse/ClickHouse/pull/54400) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add `modification_time` into `system.detached_parts`. [#54506](https://github.com/ClickHouse/ClickHouse/pull/54506) ([Azat Khuzhin](https://github.com/azat)). -* Added a setting `splitby_max_substrings_includes_remaining_string` which controls if functions "splitBy*()" with argument "max_substring" > 0 include the remaining string (if any) in the result array (Python/Spark semantics) or not. The default behavior does not change. [#54518](https://github.com/ClickHouse/ClickHouse/pull/54518) ([Robert Schulze](https://github.com/rschu1ze)). -* Better integer types inference for `Int64`/`UInt64` fields. Continuation of [#53003](https://github.com/ClickHouse/ClickHouse/pull/53003). Now it works also for nested types like Arrays of Arrays and for functions like `map/tuple`. Issue: [#51236](https://github.com/ClickHouse/ClickHouse/issues/51236). [#54553](https://github.com/ClickHouse/ClickHouse/pull/54553) ([Kruglov Pavel](https://github.com/Avogar)). -* Added array operations for multiplying, dividing and modulo on scalar. Works in each way, for example `5 * [5, 5]` and `[5, 5] * 5` - both cases are possible. [#54608](https://github.com/ClickHouse/ClickHouse/pull/54608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add optional `version` argument to `rm` command in `keeper-client` to support safer deletes. [#54708](https://github.com/ClickHouse/ClickHouse/pull/54708) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Disable killing the server by systemd (that may lead to data loss when using Buffer tables). [#54744](https://github.com/ClickHouse/ClickHouse/pull/54744) ([Azat Khuzhin](https://github.com/azat)). -* Added field `is_deterministic` to system table `system.functions` which indicates whether the result of a function is stable between two invocations (given exactly the same inputs) or not. [#54766](https://github.com/ClickHouse/ClickHouse/pull/54766) [#55035](https://github.com/ClickHouse/ClickHouse/pull/55035) ([Robert Schulze](https://github.com/rschu1ze)). -* Made the views in schema `information_schema` more compatible with the equivalent views in MySQL (i.e. modified and extended them) up to a point where Tableau Online is able to connect to ClickHouse. More specifically: 1. The type of field `information_schema.tables.table_type` changed from Enum8 to String. 2. Added fields `table_comment` and `table_collation` to view `information_schema.table`. 3. Added views `information_schema.key_column_usage` and `referential_constraints`. 4. Replaced uppercase aliases in `information_schema` views with concrete uppercase columns. [#54773](https://github.com/ClickHouse/ClickHouse/pull/54773) ([Serge Klochkov](https://github.com/slvrtrn)). -* The query cache now returns an error if the user tries to cache the result of a query with a non-deterministic function such as `now`, `randomString` and `dictGet`. Compared to the previous behavior (silently don't cache the result), this reduces confusion and surprise for users. [#54801](https://github.com/ClickHouse/ClickHouse/pull/54801) ([Robert Schulze](https://github.com/rschu1ze)). -* Forbid special columns like materialized/ephemeral/alias for `file`/`s3`/`url`/... storages, fix insert into ephemeral columns from files. Closes [#53477](https://github.com/ClickHouse/ClickHouse/issues/53477). [#54803](https://github.com/ClickHouse/ClickHouse/pull/54803) ([Kruglov Pavel](https://github.com/Avogar)). -* More configurable collecting metadata for backup. [#54804](https://github.com/ClickHouse/ClickHouse/pull/54804) ([Vitaly Baranov](https://github.com/vitlibar)). -* `clickhouse-local`'s log file (if enabled with --server_logs_file flag) will now prefix each line with timestamp, thread id, etc, just like `clickhouse-server`. [#54807](https://github.com/ClickHouse/ClickHouse/pull/54807) ([Michael Kolupaev](https://github.com/al13n321)). -* Field `is_obsolete` in the `system.merge_tree_settings` table - it is now 1 for obsolete merge tree settings. Previously, only the description indicated that the setting is obsolete. [#54837](https://github.com/ClickHouse/ClickHouse/pull/54837) ([Robert Schulze](https://github.com/rschu1ze)). -* Make it possible to use plural when using interval literals. `INTERVAL 2 HOURS` should be equivalent to `INTERVAL 2 HOUR`. [#54860](https://github.com/ClickHouse/ClickHouse/pull/54860) ([Jordi Villar](https://github.com/jrdi)). -* Always allow the creation of a projection with `Nullable` PK. This fixes [#54814](https://github.com/ClickHouse/ClickHouse/issues/54814). [#54895](https://github.com/ClickHouse/ClickHouse/pull/54895) ([Amos Bird](https://github.com/amosbird)). -* Retry backup's S3 operations after connection reset failure. [#54900](https://github.com/ClickHouse/ClickHouse/pull/54900) ([Vitaly Baranov](https://github.com/vitlibar)). -* Make the exception message exact in case of the maximum value of a settings is less than the minimum value. [#54925](https://github.com/ClickHouse/ClickHouse/pull/54925) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* `LIKE`, `match`, and other regular expressions matching functions now allow matching with patterns containing non-UTF-8 substrings by falling back to binary matching. Example: you can use `string LIKE '\xFE\xFF%'` to detect BOM. This closes [#54486](https://github.com/ClickHouse/ClickHouse/issues/54486). [#54942](https://github.com/ClickHouse/ClickHouse/pull/54942) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added `ContextLockWaitMicroseconds` profile event. [#55029](https://github.com/ClickHouse/ClickHouse/pull/55029) ([Maksim Kita](https://github.com/kitaisreal)). -* The Keeper dynamically adjusts log levels. [#50372](https://github.com/ClickHouse/ClickHouse/pull/50372) ([helifu](https://github.com/helifu)). -* Added function `timestamp` for compatibility with MySQL. Closes [#54275](https://github.com/ClickHouse/ClickHouse/issues/54275). [#54639](https://github.com/ClickHouse/ClickHouse/pull/54639) ([Nikolay Degterinsky](https://github.com/evillique)). - -#### Build/Testing/Packaging Improvement -* Bumped the compiler of official and continuous integration builds of ClickHouse from Clang 16 to 17. [#53831](https://github.com/ClickHouse/ClickHouse/pull/53831) ([Robert Schulze](https://github.com/rschu1ze)). -* Regenerated tld data for lookups (`tldLookup.generated.cpp`). [#54269](https://github.com/ClickHouse/ClickHouse/pull/54269) ([Bharat Nallan](https://github.com/bharatnc)). -* Remove the redundant `clickhouse-keeper-client` symlink. [#54587](https://github.com/ClickHouse/ClickHouse/pull/54587) ([Tomas Barton](https://github.com/deric)). -* Use `/usr/bin/env` to resolve bash - now it supports Nix OS. [#54603](https://github.com/ClickHouse/ClickHouse/pull/54603) ([Fionera](https://github.com/fionera)). -* CMake added `PROFILE_CPU` option needed to perform `perf record` without using a DWARF call graph. [#54917](https://github.com/ClickHouse/ClickHouse/pull/54917) ([Maksim Kita](https://github.com/kitaisreal)). -* If the linker is different than LLD, stop with a fatal error. [#55036](https://github.com/ClickHouse/ClickHouse/pull/55036) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Replaced the library to handle (encode/decode) base64 values from Turbo-Base64 to aklomp-base64. Both are SIMD-accelerated on x86 and ARM but 1. the license of the latter (BSD-2) is more favorable for ClickHouse, Turbo64 switched in the meantime to GPL-3, 2. with more GitHub stars, aklomp-base64 seems more future-proof, 3. aklomp-base64 has a slightly nicer API (which is arguably subjective), and 4. aklomp-base64 does not require us to hack around bugs (like non-threadsafe initialization). Note: aklomp-base64 rejects unpadded base64 values whereas Turbo-Base64 decodes them on a best-effort basis. RFC-4648 leaves it open whether padding is mandatory or not, but depending on the context this may be a behavioral change to be aware of. [#54119](https://github.com/ClickHouse/ClickHouse/pull/54119) ([Mikhail Koviazin](https://github.com/mkmkme)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Fix REPLACE/MOVE PARTITION with zero-copy replication (note: "zero-copy replication" is an experimental feature) [#54193](https://github.com/ClickHouse/ClickHouse/pull/54193) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix zero copy locks with hardlinks (note: "zero-copy replication" is an experimental feature) [#54859](https://github.com/ClickHouse/ClickHouse/pull/54859) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix zero copy garbage (note: "zero-copy replication" is an experimental feature) [#54550](https://github.com/ClickHouse/ClickHouse/pull/54550) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Pass HTTP retry timeout as milliseconds (it was incorrect before). [#54438](https://github.com/ClickHouse/ClickHouse/pull/54438) ([Duc Canh Le](https://github.com/canhld94)). -* Fix misleading error message in OUTFILE with `CapnProto`/`Protobuf` [#52870](https://github.com/ClickHouse/ClickHouse/pull/52870) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix summary reporting with parallel replicas with LIMIT [#53050](https://github.com/ClickHouse/ClickHouse/pull/53050) ([Raúl Marín](https://github.com/Algunenano)). -* Fix throttling of BACKUPs from/to S3 (in case native copy was not used) and in some other places as well [#53336](https://github.com/ClickHouse/ClickHouse/pull/53336) ([Azat Khuzhin](https://github.com/azat)). -* Fix IO throttling during copying whole directories [#53338](https://github.com/ClickHouse/ClickHouse/pull/53338) ([Azat Khuzhin](https://github.com/azat)). -* Fix: moved to prewhere condition actions can lose column [#53492](https://github.com/ClickHouse/ClickHouse/pull/53492) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fixed internal error when replacing with byte-equal parts [#53735](https://github.com/ClickHouse/ClickHouse/pull/53735) ([Pedro Riera](https://github.com/priera)). -* Fix: require columns participating in interpolate expression [#53754](https://github.com/ClickHouse/ClickHouse/pull/53754) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix cluster discovery initialization + setting up fail points in config [#54113](https://github.com/ClickHouse/ClickHouse/pull/54113) ([vdimir](https://github.com/vdimir)). -* Fix issues in `accurateCastOrNull` [#54136](https://github.com/ClickHouse/ClickHouse/pull/54136) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix nullable primary key with the FINAL modifier [#54164](https://github.com/ClickHouse/ClickHouse/pull/54164) ([Amos Bird](https://github.com/amosbird)). -* Fixed error that prevented insertion in replicated materialized view of new data in presence of duplicated data. [#54184](https://github.com/ClickHouse/ClickHouse/pull/54184) ([Pedro Riera](https://github.com/priera)). -* Fix: allow `IPv6` for bloom filter [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* fix possible type mismatch with `IPv4` [#54212](https://github.com/ClickHouse/ClickHouse/pull/54212) ([Bharat Nallan](https://github.com/bharatnc)). -* Fix `system.data_skipping_indices` for recreated indices [#54225](https://github.com/ClickHouse/ClickHouse/pull/54225) ([Artur Malchanau](https://github.com/Hexta)). -* fix name clash for multiple join rewriter v2 [#54240](https://github.com/ClickHouse/ClickHouse/pull/54240) ([Tao Wang](https://github.com/wangtZJU)). -* Fix unexpected errors in `system.errors` after join [#54306](https://github.com/ClickHouse/ClickHouse/pull/54306) ([vdimir](https://github.com/vdimir)). -* Fix `isZeroOrNull(NULL)` [#54316](https://github.com/ClickHouse/ClickHouse/pull/54316) ([flynn](https://github.com/ucasfl)). -* Fix: parallel replicas over distributed with `prefer_localhost_replica` = 1 [#54334](https://github.com/ClickHouse/ClickHouse/pull/54334) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix logical error in vertical merge + replacing merge tree + optimize cleanup [#54368](https://github.com/ClickHouse/ClickHouse/pull/54368) ([alesapin](https://github.com/alesapin)). -* Fix possible error `URI contains invalid characters` in the `s3` table function [#54373](https://github.com/ClickHouse/ClickHouse/pull/54373) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix segfault in AST optimization of `arrayExists` function [#54379](https://github.com/ClickHouse/ClickHouse/pull/54379) ([Nikolay Degterinsky](https://github.com/evillique)). -* Check for overflow before addition in `analysisOfVariance` function [#54385](https://github.com/ClickHouse/ClickHouse/pull/54385) ([Antonio Andelic](https://github.com/antonio2368)). -* Reproduce and fix the bug in removeSharedRecursive [#54430](https://github.com/ClickHouse/ClickHouse/pull/54430) ([Sema Checherinda](https://github.com/CheSema)). -* Fix possible incorrect result with SimpleAggregateFunction in PREWHERE and FINAL [#54436](https://github.com/ClickHouse/ClickHouse/pull/54436) ([Azat Khuzhin](https://github.com/azat)). -* Fix filtering parts with indexHint for non analyzer [#54449](https://github.com/ClickHouse/ClickHouse/pull/54449) ([Azat Khuzhin](https://github.com/azat)). -* Fix aggregate projections with normalized states [#54480](https://github.com/ClickHouse/ClickHouse/pull/54480) ([Amos Bird](https://github.com/amosbird)). -* `clickhouse-local`: something for multiquery parameter [#54498](https://github.com/ClickHouse/ClickHouse/pull/54498) ([CuiShuoGuo](https://github.com/bakam412)). -* `clickhouse-local` supports `--database` command line argument [#54503](https://github.com/ClickHouse/ClickHouse/pull/54503) ([vdimir](https://github.com/vdimir)). -* Fix possible parsing error in `-WithNames` formats with disabled `input_format_with_names_use_header` [#54513](https://github.com/ClickHouse/ClickHouse/pull/54513) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix rare case of CHECKSUM_DOESNT_MATCH error [#54549](https://github.com/ClickHouse/ClickHouse/pull/54549) ([alesapin](https://github.com/alesapin)). -* Fix sorting of UNION ALL of already sorted results [#54564](https://github.com/ClickHouse/ClickHouse/pull/54564) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix snapshot install in Keeper [#54572](https://github.com/ClickHouse/ClickHouse/pull/54572) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix race in `ColumnUnique` [#54575](https://github.com/ClickHouse/ClickHouse/pull/54575) ([Nikita Taranov](https://github.com/nickitat)). -* Annoy/Usearch index: Fix LOGICAL_ERROR during build-up with default values [#54600](https://github.com/ClickHouse/ClickHouse/pull/54600) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix serialization of `ColumnDecimal` [#54601](https://github.com/ClickHouse/ClickHouse/pull/54601) ([Nikita Taranov](https://github.com/nickitat)). -* Fix schema inference for *Cluster functions for column names with spaces [#54635](https://github.com/ClickHouse/ClickHouse/pull/54635) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix using structure from insertion tables in case of defaults and explicit insert columns [#54655](https://github.com/ClickHouse/ClickHouse/pull/54655) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix: avoid using regex match, possibly containing alternation, as a key condition. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix ReplacingMergeTree with vertical merge and cleanup [#54706](https://github.com/ClickHouse/ClickHouse/pull/54706) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix virtual columns having incorrect values after ORDER BY [#54811](https://github.com/ClickHouse/ClickHouse/pull/54811) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix filtering parts with indexHint for non analyzer [#54825](https://github.com/ClickHouse/ClickHouse/pull/54825) [#54449](https://github.com/ClickHouse/ClickHouse/pull/54449) ([Azat Khuzhin](https://github.com/azat)). -* Fix Keeper segfault during shutdown [#54841](https://github.com/ClickHouse/ClickHouse/pull/54841) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix `Invalid number of rows in Chunk` in MaterializedPostgreSQL [#54844](https://github.com/ClickHouse/ClickHouse/pull/54844) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Move obsolete format settings to separate section [#54855](https://github.com/ClickHouse/ClickHouse/pull/54855) ([Kruglov Pavel](https://github.com/Avogar)). -* Rebuild `minmax_count_projection` when partition key gets modified [#54943](https://github.com/ClickHouse/ClickHouse/pull/54943) ([Amos Bird](https://github.com/amosbird)). -* Fix bad cast to `ColumnVector` in function `if` [#55019](https://github.com/ClickHouse/ClickHouse/pull/55019) ([Kruglov Pavel](https://github.com/Avogar)). -* Prevent attaching parts from tables with different projections or indices [#55062](https://github.com/ClickHouse/ClickHouse/pull/55062) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Store NULL in scalar result map for empty subquery result [#52240](https://github.com/ClickHouse/ClickHouse/pull/52240) ([vdimir](https://github.com/vdimir)). -* Fix `FINAL` produces invalid read ranges in a rare case [#54934](https://github.com/ClickHouse/ClickHouse/pull/54934) ([Nikita Taranov](https://github.com/nickitat)). -* Fix: insert quorum w/o keeper retries [#55026](https://github.com/ClickHouse/ClickHouse/pull/55026) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix simple state with nullable [#55030](https://github.com/ClickHouse/ClickHouse/pull/55030) ([Pedro Riera](https://github.com/priera)). - - -### ClickHouse release 23.8 LTS, 2023-08-31 - -#### Backward Incompatible Change -* If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). -* `clickhouse-benchmark` will establish connections in parallel when invoked with `--concurrency` more than one. Previously it was unusable if you ran it with 1000 concurrent connections from Europe to the US. Correct calculation of QPS for connections with high latency. Backward incompatible change: the option for JSON output of `clickhouse-benchmark` is removed. If you've used this option, you can also extract data from the `system.query_log` in JSON format as a workaround. [#53293](https://github.com/ClickHouse/ClickHouse/pull/53293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The `microseconds` column is removed from the `system.text_log`, and the `milliseconds` column is removed from the `system.metric_log`, because they are redundant in the presence of the `event_time_microseconds` column. [#53601](https://github.com/ClickHouse/ClickHouse/pull/53601) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Deprecate the metadata cache feature. It is experimental and we have never used it. The feature is dangerous: [#51182](https://github.com/ClickHouse/ClickHouse/issues/51182). Remove the `system.merge_tree_metadata_cache` system table. The metadata cache is still available in this version but will be removed soon. This closes [#39197](https://github.com/ClickHouse/ClickHouse/issues/39197). [#51303](https://github.com/ClickHouse/ClickHouse/pull/51303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Disable support for 3DES in TLS connections. [#52893](https://github.com/ClickHouse/ClickHouse/pull/52893) ([Kenji Noguchi](https://github.com/knoguchi)). - -#### New Feature -* Direct import from zip/7z/tar archives. Example: `file('*.zip :: *.csv')`. [#50321](https://github.com/ClickHouse/ClickHouse/pull/50321) ([nikitakeba](https://github.com/nikitakeba)). -* Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#45322](https://github.com/ClickHouse/ClickHouse/pull/45322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Added table function `azureBlobStorageCluster`. The supported set of features is very similar to table function `s3Cluster`. [#50795](https://github.com/ClickHouse/ClickHouse/pull/50795) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Allow using `cluster`, `clusterAllReplicas`, `remote`, and `remoteSecure` without table name in issue [#50808](https://github.com/ClickHouse/ClickHouse/issues/50808). [#50848](https://github.com/ClickHouse/ClickHouse/pull/50848) ([Yangkuan Liu](https://github.com/LiuYangkuan)). -* A system table to monitor Kafka consumers. [#50999](https://github.com/ClickHouse/ClickHouse/pull/50999) ([Ilya Golshtein](https://github.com/ilejn)). -* Added `max_sessions_for_user` setting. [#51724](https://github.com/ClickHouse/ClickHouse/pull/51724) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* New functions `toUTCTimestamp/fromUTCTimestamp` to act same as spark's `to_utc_timestamp/from_utc_timestamp`. [#52117](https://github.com/ClickHouse/ClickHouse/pull/52117) ([KevinyhZou](https://github.com/KevinyhZou)). -* Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to input/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controlled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/output using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)). -* A new field `query_cache_usage` in `system.query_log` now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)). -* Add new function `startsWithUTF8` and `endsWithUTF8`. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)). -* Allow variable number of columns in TSV/CustomSeparated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)). -* Added `SYSTEM STOP/START PULLING REPLICATION LOG` queries (for testing `ReplicatedMergeTree`). [#52881](https://github.com/ClickHouse/ClickHouse/pull/52881) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Allow to execute constant non-deterministic functions in mutations on initiator. [#53129](https://github.com/ClickHouse/ClickHouse/pull/53129) ([Anton Popov](https://github.com/CurtizJ)). -* Add input format `One` that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)). -* Add `tupleConcat` function. Closes [#52759](https://github.com/ClickHouse/ClickHouse/issues/52759). [#53239](https://github.com/ClickHouse/ClickHouse/pull/53239) ([Nikolay Degterinsky](https://github.com/evillique)). -* Support `TRUNCATE DATABASE` operation. [#53261](https://github.com/ClickHouse/ClickHouse/pull/53261) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `max_threads_for_indexes` setting to limit number of threads used for primary key processing. [#53313](https://github.com/ClickHouse/ClickHouse/pull/53313) ([jorisgio](https://github.com/jorisgio)). -* Re-add SipHash keyed functions. [#53525](https://github.com/ClickHouse/ClickHouse/pull/53525) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* ([#52755](https://github.com/ClickHouse/ClickHouse/issues/52755) , [#52895](https://github.com/ClickHouse/ClickHouse/issues/52895)) Added functions `arrayRotateLeft`, `arrayRotateRight`, `arrayShiftLeft`, `arrayShiftRight`. [#53557](https://github.com/ClickHouse/ClickHouse/pull/53557) ([Mikhail Koviazin](https://github.com/mkmkme)). -* Add column `name` to `system.clusters` as an alias to cluster. [#53605](https://github.com/ClickHouse/ClickHouse/pull/53605) ([irenjj](https://github.com/irenjj)). -* The advanced dashboard now allows mass editing (save/load). [#53608](https://github.com/ClickHouse/ClickHouse/pull/53608) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The advanced dashboard now has an option to maximize charts and move them around. [#53622](https://github.com/ClickHouse/ClickHouse/pull/53622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added support for adding and subtracting arrays: `[5,2] + [1,7]`. Division and multiplication were not implemented due to confusion between pointwise multiplication and the scalar product of arguments. Closes [#49939](https://github.com/ClickHouse/ClickHouse/issues/49939). [#52625](https://github.com/ClickHouse/ClickHouse/pull/52625) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add support for string literals as table names. Closes [#52178](https://github.com/ClickHouse/ClickHouse/issues/52178). [#52635](https://github.com/ClickHouse/ClickHouse/pull/52635) ([hendrik-m](https://github.com/hendrik-m)). - -#### Experimental Feature -* Add new table engine `S3Queue` for streaming data import from s3. Closes [#37012](https://github.com/ClickHouse/ClickHouse/issues/37012). [#49086](https://github.com/ClickHouse/ClickHouse/pull/49086) ([s-kat](https://github.com/s-kat)). It is not ready to use. Do not use it. -* Enable parallel reading from replicas over distributed table. Related to [#49708](https://github.com/ClickHouse/ClickHouse/issues/49708). [#53005](https://github.com/ClickHouse/ClickHouse/pull/53005) ([Igor Nikonov](https://github.com/devcrafter)). -* Add experimental support for HNSW as approximate neighbor search method. [#53447](https://github.com/ClickHouse/ClickHouse/pull/53447) ([Davit Vardanyan](https://github.com/davvard)). This is currently intended for those who continue working on the implementation. Do not use it. - -#### Performance Improvement -* Parquet filter pushdown. I.e. when reading Parquet files, row groups (chunks of the file) are skipped based on the WHERE condition and the min/max values in each column. In particular, if the file is roughly sorted by some column, queries that filter by a short range of that column will be much faster. [#52951](https://github.com/ClickHouse/ClickHouse/pull/52951) ([Michael Kolupaev](https://github.com/al13n321)). -* Optimize reading small row groups by batching them together in Parquet. Closes [#53069](https://github.com/ClickHouse/ClickHouse/issues/53069). [#53281](https://github.com/ClickHouse/ClickHouse/pull/53281) ([Kruglov Pavel](https://github.com/Avogar)). -* Optimize count from files in most input formats. Closes [#44334](https://github.com/ClickHouse/ClickHouse/issues/44334). [#53637](https://github.com/ClickHouse/ClickHouse/pull/53637) ([Kruglov Pavel](https://github.com/Avogar)). -* Use filter by file/path before reading in `url`/`file`/`hdfs` table functions. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)). -* Enable JIT compilation for AArch64, PowerPC, SystemZ, RISC-V. [#38217](https://github.com/ClickHouse/ClickHouse/pull/38217) ([Maksim Kita](https://github.com/kitaisreal)). -* Add setting `rewrite_count_distinct_if_with_count_distinct_implementation` to rewrite `countDistinctIf` with `count_distinct_implementation`. Closes [#30642](https://github.com/ClickHouse/ClickHouse/issues/30642). [#46051](https://github.com/ClickHouse/ClickHouse/pull/46051) ([flynn](https://github.com/ucasfl)). -* Speed up merging of states of `uniq` and `uniqExact` aggregate functions by parallelizing conversion before merge. [#50748](https://github.com/ClickHouse/ClickHouse/pull/50748) ([Jiebin Sun](https://github.com/jiebinn)). -* Optimize aggregation performance of nullable string key when using a large number of variable length keys. [#51399](https://github.com/ClickHouse/ClickHouse/pull/51399) ([LiuNeng](https://github.com/liuneng1994)). -* Add a pass in Analyzer for time filter optimization with preimage. The performance experiments of SSB on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of 8.5% to the geomean QPS when the experimental analyzer is enabled. [#52091](https://github.com/ClickHouse/ClickHouse/pull/52091) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Optimize the merge if all hash sets are single-level in the `uniqExact` (COUNT DISTINCT) function. [#52973](https://github.com/ClickHouse/ClickHouse/pull/52973) ([Jiebin Sun](https://github.com/jiebinn)). -* `Join` table engine: do not clone hash join data structure with all columns. [#53046](https://github.com/ClickHouse/ClickHouse/pull/53046) ([Duc Canh Le](https://github.com/canhld94)). -* Implement native `ORC` input format without the "apache arrow" library to improve performance. [#53324](https://github.com/ClickHouse/ClickHouse/pull/53324) ([李扬](https://github.com/taiyang-li)). -* The dashboard will tell the server to compress the data, which is useful for large time frames over slow internet connections. For example, one chart with 86400 points can be 1.5 MB uncompressed and 60 KB compressed with `br`. [#53569](https://github.com/ClickHouse/ClickHouse/pull/53569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Better utilization of thread pool for BACKUPs and RESTOREs. [#53649](https://github.com/ClickHouse/ClickHouse/pull/53649) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Load filesystem cache metadata on startup in parallel. Configured by `load_metadata_threads` (default: 1) cache config setting. Related to [#52037](https://github.com/ClickHouse/ClickHouse/issues/52037). [#52943](https://github.com/ClickHouse/ClickHouse/pull/52943) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Improve `move_primary_key_columns_to_end_of_prewhere`. [#53337](https://github.com/ClickHouse/ClickHouse/pull/53337) ([Han Fei](https://github.com/hanfei1991)). -* This optimizes the interaction with ClickHouse Keeper. Previously the caller could register the same watch callback multiple times. In that case each entry was consuming memory and the same callback was called multiple times which didn't make much sense. In order to avoid this the caller could have some logic to not add the same watch multiple times. With this change this deduplication is done internally if the watch callback is passed via shared_ptr. [#53452](https://github.com/ClickHouse/ClickHouse/pull/53452) ([Alexander Gololobov](https://github.com/davenger)). -* Cache number of rows in files for count in file/s3/url/hdfs/azure functions. The cache can be enabled/disabled by setting `use_cache_for_count_from_files` (enabled by default). Continuation of https://github.com/ClickHouse/ClickHouse/pull/53637. [#53692](https://github.com/ClickHouse/ClickHouse/pull/53692) ([Kruglov Pavel](https://github.com/Avogar)). -* More careful thread management will improve the speed of the S3 table function over a large number of files by more than ~25%. [#53668](https://github.com/ClickHouse/ClickHouse/pull/53668) ([pufit](https://github.com/pufit)). - -#### Improvement -* Add `stderr_reaction` configuration/setting to control the reaction (none, log or throw) when external command stderr has data. This helps make debugging external command easier. [#43210](https://github.com/ClickHouse/ClickHouse/pull/43210) ([Amos Bird](https://github.com/amosbird)). -* Add `partition` column to the `system part_log` and merge table. [#48990](https://github.com/ClickHouse/ClickHouse/pull/48990) ([Jianfei Hu](https://github.com/incfly)). -* The sizes of the (index) uncompressed/mark, mmap and query caches can now be configured dynamically at runtime (without server restart). [#51446](https://github.com/ClickHouse/ClickHouse/pull/51446) ([Robert Schulze](https://github.com/rschu1ze)). -* If a dictionary is created with a complex key, automatically choose the "complex key" layout variant. [#49587](https://github.com/ClickHouse/ClickHouse/pull/49587) ([xiebin](https://github.com/xbthink)). -* Add setting `use_concurrency_control` for better testing of the new concurrency control feature. [#49618](https://github.com/ClickHouse/ClickHouse/pull/49618) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added suggestions for mistyped names for databases and tables. [#49801](https://github.com/ClickHouse/ClickHouse/pull/49801) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* While read small files from HDFS by Gluten, we found that it will cost more times when compare to directly query by Spark. And we did something with that. [#50063](https://github.com/ClickHouse/ClickHouse/pull/50063) ([KevinyhZou](https://github.com/KevinyhZou)). -* There were too many worthless error logs after session expiration, which we didn't like. [#50171](https://github.com/ClickHouse/ClickHouse/pull/50171) ([helifu](https://github.com/helifu)). -* Introduce fallback ZooKeeper sessions which are time-bound. Fixed `index` column in system.zookeeper_connection for DNS addresses. [#50424](https://github.com/ClickHouse/ClickHouse/pull/50424) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Add ability to log when max_partitions_per_insert_block is reached. [#50948](https://github.com/ClickHouse/ClickHouse/pull/50948) ([Sean Haynes](https://github.com/seandhaynes)). -* Added a bunch of custom commands to clickhouse-keeper-client (mostly to make ClickHouse debugging easier). [#51117](https://github.com/ClickHouse/ClickHouse/pull/51117) ([pufit](https://github.com/pufit)). -* Updated check for connection string in `azureBlobStorage` table function as connection string with "sas" does not always begin with the default endpoint and updated connection URL to include "sas" token after adding Azure's container to URL. [#51141](https://github.com/ClickHouse/ClickHouse/pull/51141) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix description for filtering sets in the `full_sorting_merge` JOIN algorithm. [#51329](https://github.com/ClickHouse/ClickHouse/pull/51329) ([Tanay Tummalapalli](https://github.com/ttanay)). -* Fixed memory consumption in `Aggregator` when `max_block_size` is huge. [#51566](https://github.com/ClickHouse/ClickHouse/pull/51566) ([Nikita Taranov](https://github.com/nickitat)). -* Add `SYSTEM SYNC FILESYSTEM CACHE` command. It will compare in-memory state of filesystem cache with what it has on disk and fix in-memory state if needed. This is only needed if you are making manual interventions in on-disk data, which is highly discouraged. [#51622](https://github.com/ClickHouse/ClickHouse/pull/51622) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Attempt to create a generic proxy resolver for CH while keeping backwards compatibility with existing S3 storage conf proxy resolver. [#51749](https://github.com/ClickHouse/ClickHouse/pull/51749) ([Arthur Passos](https://github.com/arthurpassos)). -* Support reading tuple subcolumns from file/s3/hdfs/url/azureBlobStorage table functions. [#51806](https://github.com/ClickHouse/ClickHouse/pull/51806) ([Kruglov Pavel](https://github.com/Avogar)). -* Function `arrayIntersect` now returns the values in the order, corresponding to the first argument. Closes [#27622](https://github.com/ClickHouse/ClickHouse/issues/27622). [#51850](https://github.com/ClickHouse/ClickHouse/pull/51850) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add new queries, which allow to create/drop of access entities in specified access storage or move access entities from one access storage to another. [#51912](https://github.com/ClickHouse/ClickHouse/pull/51912) ([pufit](https://github.com/pufit)). -* Make `ALTER TABLE FREEZE` queries not replicated in the Replicated database engine. [#52064](https://github.com/ClickHouse/ClickHouse/pull/52064) ([Mike Kot](https://github.com/myrrc)). -* Added possibility to flush system tables on unexpected shutdown. [#52174](https://github.com/ClickHouse/ClickHouse/pull/52174) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix the case when `s3` table function refused to work with pre-signed URLs. close [#50846](https://github.com/ClickHouse/ClickHouse/issues/50846). [#52310](https://github.com/ClickHouse/ClickHouse/pull/52310) ([chen](https://github.com/xiedeyantu)). -* Add column `name` as an alias to `event` and `metric` in the `system.events` and `system.metrics` tables. Closes [#51257](https://github.com/ClickHouse/ClickHouse/issues/51257). [#52315](https://github.com/ClickHouse/ClickHouse/pull/52315) ([chen](https://github.com/xiedeyantu)). -* Added support of syntax `CREATE UNIQUE INDEX` in parser as a no-op for better SQL compatibility. `UNIQUE` index is not supported. Set `create_index_ignore_unique = 1` to ignore UNIQUE keyword in queries. [#52320](https://github.com/ClickHouse/ClickHouse/pull/52320) ([Ilya Yatsishin](https://github.com/qoega)). -* Add support of predefined macro (`{database}` and `{table}`) in some Kafka engine settings: topic, consumer, client_id, etc. [#52386](https://github.com/ClickHouse/ClickHouse/pull/52386) ([Yury Bogomolov](https://github.com/ybogo)). -* Disable updating the filesystem cache during backup/restore. Filesystem cache must not be updated during backup/restore, it seems it just slows down the process without any profit (because the BACKUP command can read a lot of data and it's no use to put all the data to the filesystem cache and immediately evict it). [#52402](https://github.com/ClickHouse/ClickHouse/pull/52402) ([Vitaly Baranov](https://github.com/vitlibar)). -* The configuration of S3 endpoint allow using it from the root, and append '/' automatically if needed. [#47809](https://github.com/ClickHouse/ClickHouse/issues/47809). [#52600](https://github.com/ClickHouse/ClickHouse/pull/52600) ([xiaolei565](https://github.com/xiaolei565)). -* For clickhouse-local allow positional options and populate global UDF settings (user_scripts_path and user_defined_executable_functions_config). [#52643](https://github.com/ClickHouse/ClickHouse/pull/52643) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* `system.asynchronous_metrics` now includes metrics "QueryCacheEntries" and "QueryCacheBytes" to inspect the query cache. [#52650](https://github.com/ClickHouse/ClickHouse/pull/52650) ([Robert Schulze](https://github.com/rschu1ze)). -* Added possibility to use `s3_storage_class` parameter in the `SETTINGS` clause of the `BACKUP` statement for backups to S3. [#52658](https://github.com/ClickHouse/ClickHouse/pull/52658) ([Roman Vasin](https://github.com/rvasin)). -* Add utility `print-backup-info.py` which parses a backup metadata file and prints information about the backup. [#52690](https://github.com/ClickHouse/ClickHouse/pull/52690) ([Vitaly Baranov](https://github.com/vitlibar)). -* Closes [#49510](https://github.com/ClickHouse/ClickHouse/issues/49510). Currently we have database and table names case-sensitive, but BI tools query `information_schema` sometimes in lowercase, sometimes in uppercase. For this reason we have `information_schema` database, containing lowercase tables, such as `information_schema.tables` and `INFORMATION_SCHEMA` database, containing uppercase tables, such as `INFORMATION_SCHEMA.TABLES`. But some tools are querying `INFORMATION_SCHEMA.tables` and `information_schema.TABLES`. The proposed solution is to duplicate both lowercase and uppercase tables in lowercase and uppercase `information_schema` database. [#52695](https://github.com/ClickHouse/ClickHouse/pull/52695) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Query`CHECK TABLE` has better performance and usability (sends progress updates, cancellable). [#52745](https://github.com/ClickHouse/ClickHouse/pull/52745) ([vdimir](https://github.com/vdimir)). -* Add support for `modulo`, `intDiv`, `intDivOrZero` for tuples by distributing them across tuple's elements. [#52758](https://github.com/ClickHouse/ClickHouse/pull/52758) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Search for default `yaml` and `yml` configs in clickhouse-client after `xml`. [#52767](https://github.com/ClickHouse/ClickHouse/pull/52767) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* When merging into non-'clickhouse' rooted configuration, configs with different root node name just bypassed without exception. [#52770](https://github.com/ClickHouse/ClickHouse/pull/52770) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52779](https://github.com/ClickHouse/ClickHouse/pull/52779) ([alesapin](https://github.com/alesapin)). -* Add `precise_float_parsing` setting to switch float parsing methods (fast/precise). [#52791](https://github.com/ClickHouse/ClickHouse/pull/52791) ([Andrey Zvonov](https://github.com/zvonand)). -* Use the same default paths for `clickhouse-keeper` (symlink) as for `clickhouse-keeper` (executable). [#52861](https://github.com/ClickHouse/ClickHouse/pull/52861) ([Vitaly Baranov](https://github.com/vitlibar)). -* Improve error message for table function `remote`. Closes [#40220](https://github.com/ClickHouse/ClickHouse/issues/40220). [#52959](https://github.com/ClickHouse/ClickHouse/pull/52959) ([jiyoungyoooo](https://github.com/jiyoungyoooo)). -* Added the possibility to specify custom storage policy in the `SETTINGS` clause of `RESTORE` queries. [#52970](https://github.com/ClickHouse/ClickHouse/pull/52970) ([Victor Krasnov](https://github.com/sirvickr)). -* Add the ability to throttle the S3 requests on backup operations (`BACKUP` and `RESTORE` commands now honor `s3_max_[get/put]_[rps/burst]`). [#52974](https://github.com/ClickHouse/ClickHouse/pull/52974) ([Daniel Pozo Escalona](https://github.com/danipozo)). -* Add settings to ignore ON CLUSTER clause in queries for management of replicated user-defined functions or access control entities with replicated storage. [#52975](https://github.com/ClickHouse/ClickHouse/pull/52975) ([Aleksei Filatov](https://github.com/aalexfvk)). -* EXPLAIN actions for JOIN step. [#53006](https://github.com/ClickHouse/ClickHouse/pull/53006) ([Maksim Kita](https://github.com/kitaisreal)). -* Make `hasTokenOrNull` and `hasTokenCaseInsensitiveOrNull` return null for empty needles. [#53059](https://github.com/ClickHouse/ClickHouse/pull/53059) ([ltrk2](https://github.com/ltrk2)). -* Allow to restrict allowed paths for filesystem caches. Mainly useful for dynamic disks. If in server config `filesystem_caches_path` is specified, all filesystem caches' paths will be restricted to this directory. E.g. if the `path` in cache config is relative - it will be put in `filesystem_caches_path`; if `path` in cache config is absolute, it will be required to lie inside `filesystem_caches_path`. If `filesystem_caches_path` is not specified in config, then behaviour will be the same as in earlier versions. [#53124](https://github.com/ClickHouse/ClickHouse/pull/53124) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#53127](https://github.com/ClickHouse/ClickHouse/pull/53127) ([pufit](https://github.com/pufit)). -* Add diagnostic info about file name during schema inference - it helps when you process multiple files with globs. [#53135](https://github.com/ClickHouse/ClickHouse/pull/53135) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Client will load suggestions using the main connection if the second connection is not allowed to create a session. [#53177](https://github.com/ClickHouse/ClickHouse/pull/53177) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Add EXCEPT clause to `SYSTEM STOP/START LISTEN QUERIES [ALL/DEFAULT/CUSTOM]` query, for example `SYSTEM STOP LISTEN QUERIES ALL EXCEPT TCP, HTTP`. [#53280](https://github.com/ClickHouse/ClickHouse/pull/53280) ([Nikolay Degterinsky](https://github.com/evillique)). -* Change the default of `max_concurrent_queries` from 100 to 1000. It's ok to have many concurrent queries if they are not heavy, and mostly waiting for the network. Note: don't confuse concurrent queries and QPS: for example, ClickHouse server can do tens of thousands of QPS with less than 100 concurrent queries. [#53285](https://github.com/ClickHouse/ClickHouse/pull/53285) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Limit number of concurrent background partition optimize merges. [#53405](https://github.com/ClickHouse/ClickHouse/pull/53405) ([Duc Canh Le](https://github.com/canhld94)). -* Added a setting `allow_moving_table_directory_to_trash` that allows to ignore `Directory for table data already exists` error when replicating/recovering a `Replicated` database. [#53425](https://github.com/ClickHouse/ClickHouse/pull/53425) ([Alexander Tokmakov](https://github.com/tavplubix)). -* If server settings `asynchronous_metrics_update_period_s` and `asynchronous_heavy_metrics_update_period_s` are misconfigured to 0, it will now fail gracefully instead of terminating the application. [#53428](https://github.com/ClickHouse/ClickHouse/pull/53428) ([Robert Schulze](https://github.com/rschu1ze)). -* The ClickHouse server now respects memory limits changed via cgroups when reloading its configuration. [#53455](https://github.com/ClickHouse/ClickHouse/pull/53455) ([Robert Schulze](https://github.com/rschu1ze)). -* Add ability to turn off flush of Distributed tables on `DETACH`, `DROP`, or server shutdown. [#53501](https://github.com/ClickHouse/ClickHouse/pull/53501) ([Azat Khuzhin](https://github.com/azat)). -* The `domainRFC` function now supports IPv6 in square brackets. [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)). -* Use longer timeout for S3 CopyObject requests, which are used in backups. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)). -* Added server setting `aggregate_function_group_array_max_element_size`. This setting is used to limit array size for `groupArray` function at serialization. The default value is `16777215`. [#53550](https://github.com/ClickHouse/ClickHouse/pull/53550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* `SCHEMA` was added as alias for `DATABASE` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). -* Add asynchronous metrics about tables in the system database. For example, `TotalBytesOfMergeTreeTablesSystem`. This closes [#53603](https://github.com/ClickHouse/ClickHouse/issues/53603). [#53604](https://github.com/ClickHouse/ClickHouse/pull/53604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* SQL editor in the Play UI and Dashboard will not use Grammarly. [#53614](https://github.com/ClickHouse/ClickHouse/pull/53614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* As expert-level settings, it is now possible to (1) configure the size_ratio (i.e. the relative size of the protected queue) of the [index] mark/uncompressed caches, (2) configure the cache policy of the index mark and index uncompressed caches. [#53657](https://github.com/ClickHouse/ClickHouse/pull/53657) ([Robert Schulze](https://github.com/rschu1ze)). -* Added client info validation to the query packet in TCPHandler. [#53673](https://github.com/ClickHouse/ClickHouse/pull/53673) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Retry loading parts in case of network errors while interaction with Microsoft Azure. [#53750](https://github.com/ClickHouse/ClickHouse/pull/53750) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Stacktrace for exceptions, Materailized view exceptions are propagated. [#53766](https://github.com/ClickHouse/ClickHouse/pull/53766) ([Ilya Golshtein](https://github.com/ilejn)). -* If no hostname or port were specified, keeper client will try to search for a connection string in the ClickHouse's config.xml. [#53769](https://github.com/ClickHouse/ClickHouse/pull/53769) ([pufit](https://github.com/pufit)). -* Add profile event `PartsLockMicroseconds` which shows the amount of microseconds we hold the data parts lock in MergeTree table engine family. [#53797](https://github.com/ClickHouse/ClickHouse/pull/53797) ([alesapin](https://github.com/alesapin)). -* Make reconnect limit in RAFT limits configurable for keeper. This configuration can help to make keeper to rebuild connection with peers quicker if the current connection is broken. [#53817](https://github.com/ClickHouse/ClickHouse/pull/53817) ([Pengyuan Bian](https://github.com/bianpengyuan)). -* Ignore foreign keys in tables definition to improve compatibility with MySQL, so a user wouldn't need to rewrite his SQL of the foreign key part, ref [#53380](https://github.com/ClickHouse/ClickHouse/issues/53380). [#53864](https://github.com/ClickHouse/ClickHouse/pull/53864) ([jsc0218](https://github.com/jsc0218)). - -#### Build/Testing/Packaging Improvement -* Don't expose symbols from ClickHouse binary to dynamic linker. It might fix [#43933](https://github.com/ClickHouse/ClickHouse/issues/43933). [#47475](https://github.com/ClickHouse/ClickHouse/pull/47475) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add https://github.com/elliotchance/sqltest to CI to report the SQL 2016 conformance. [#52293](https://github.com/ClickHouse/ClickHouse/pull/52293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Upgrade PRQL to 0.9.3. [#53060](https://github.com/ClickHouse/ClickHouse/pull/53060) ([Maximilian Roos](https://github.com/max-sixty)). -* System tables from CI checks are exported to ClickHouse Cloud. [#53086](https://github.com/ClickHouse/ClickHouse/pull/53086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud. [#53100](https://github.com/ClickHouse/ClickHouse/pull/53100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Speed up Debug and Tidy builds. [#53178](https://github.com/ClickHouse/ClickHouse/pull/53178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Speed up the build by removing tons and tonnes of garbage. One of the frequently included headers was poisoned by boost. [#53180](https://github.com/ClickHouse/ClickHouse/pull/53180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove even more garbage. [#53182](https://github.com/ClickHouse/ClickHouse/pull/53182) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The function `arrayAUC` was using heavy C++ templates - ditched them. [#53183](https://github.com/ClickHouse/ClickHouse/pull/53183) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Some translation units were always rebuilt regardless of ccache. The culprit is found and fixed. [#53184](https://github.com/ClickHouse/ClickHouse/pull/53184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Export logs from CI in stateful tests to ClickHouse Cloud. [#53351](https://github.com/ClickHouse/ClickHouse/pull/53351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Export logs from CI in stress tests. [#53353](https://github.com/ClickHouse/ClickHouse/pull/53353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Export logs from CI in fuzzer. [#53354](https://github.com/ClickHouse/ClickHouse/pull/53354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Follow up for [#53418](https://github.com/ClickHouse/ClickHouse/issues/53418). Small improvements for install_check.py, adding tests for proper ENV parameters passing to the main process on `init.d start`. [#53457](https://github.com/ClickHouse/ClickHouse/pull/53457) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Reorganize file management in CMake to prevent potential duplications. For instance, `indexHint.cpp` is duplicated in both `dbms_sources` and `clickhouse_functions_sources`. [#53621](https://github.com/ClickHouse/ClickHouse/pull/53621) ([Amos Bird](https://github.com/amosbird)). -* Upgrade snappy to 1.1.10. [#53672](https://github.com/ClickHouse/ClickHouse/pull/53672) ([李扬](https://github.com/taiyang-li)). -* Slightly improve cmake build by sanitizing some dependencies and removing some duplicates. Each commit includes a short description of the changes made. [#53759](https://github.com/ClickHouse/ClickHouse/pull/53759) ([Amos Bird](https://github.com/amosbird)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Do not reset (experimental) Annoy index during build-up with more than one mark [#51325](https://github.com/ClickHouse/ClickHouse/pull/51325) ([Tian Xinhui](https://github.com/xinhuitian)). -* Fix usage of temporary directories during RESTORE [#51493](https://github.com/ClickHouse/ClickHouse/pull/51493) ([Azat Khuzhin](https://github.com/azat)). -* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Support IPv4 and IPv6 data types as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* A fix for checksum of compress marks [#51777](https://github.com/ClickHouse/ClickHouse/pull/51777) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix mistakenly comma parsing as part of datetime in CSV best effort parsing [#51950](https://github.com/ClickHouse/ClickHouse/pull/51950) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't throw exception when executable UDF has parameters [#51961](https://github.com/ClickHouse/ClickHouse/pull/51961) ([Nikita Taranov](https://github.com/nickitat)). -* Fix recalculation of skip indexes and projections in `ALTER DELETE` queries [#52530](https://github.com/ClickHouse/ClickHouse/pull/52530) ([Anton Popov](https://github.com/CurtizJ)). -* MaterializedMySQL: Fix the infinite loop in ReadBuffer::read [#52621](https://github.com/ClickHouse/ClickHouse/pull/52621) ([Val Doroshchuk](https://github.com/valbok)). -* Load suggestion only with `clickhouse` dialect [#52628](https://github.com/ClickHouse/ClickHouse/pull/52628) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Init and destroy ares channel on demand. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix filtering by virtual columns with OR expression [#52653](https://github.com/ClickHouse/ClickHouse/pull/52653) ([Azat Khuzhin](https://github.com/azat)). -* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). -* Fix named collections on cluster [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). -* Fix reading of unnecessary column in case of multistage `PREWHERE` [#52689](https://github.com/ClickHouse/ClickHouse/pull/52689) ([Anton Popov](https://github.com/CurtizJ)). -* Fix unexpected sort result on multi columns with nulls first direction [#52761](https://github.com/ClickHouse/ClickHouse/pull/52761) ([copperybean](https://github.com/copperybean)). -* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)). -* clickhouse-keeper: fix implementation of server with poll. [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). -* Make regexp analyzer recognize named capturing groups [#52840](https://github.com/ClickHouse/ClickHouse/pull/52840) ([Han Fei](https://github.com/hanfei1991)). -* Fix possible assert in `~PushingAsyncPipelineExecutor` in clickhouse-local [#52862](https://github.com/ClickHouse/ClickHouse/pull/52862) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix reading of empty `Nested(Array(LowCardinality(...)))` [#52949](https://github.com/ClickHouse/ClickHouse/pull/52949) ([Anton Popov](https://github.com/CurtizJ)). -* Added new tests for session_log and fixed the inconsistency between login and logout. [#52958](https://github.com/ClickHouse/ClickHouse/pull/52958) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). -* Convert sparse column format to full in CreateSetAndFilterOnTheFlyStep [#53000](https://github.com/ClickHouse/ClickHouse/pull/53000) ([vdimir](https://github.com/vdimir)). -* Fix rare race condition with empty key prefix directory deletion in fs cache [#53055](https://github.com/ClickHouse/ClickHouse/pull/53055) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix query_id in part_log with async flush queries [#53103](https://github.com/ClickHouse/ClickHouse/pull/53103) ([Raúl Marín](https://github.com/Algunenano)). -* Fix possible error from cache "Read unexpected size" [#53121](https://github.com/ClickHouse/ClickHouse/pull/53121) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix "Not-ready Set" exception [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix character escaping in the PostgreSQL engine [#53250](https://github.com/ClickHouse/ClickHouse/pull/53250) ([Nikolay Degterinsky](https://github.com/evillique)). -* Experimental session_log table: Added new tests for session_log and fixed the inconsistency between login and logout. [#53255](https://github.com/ClickHouse/ClickHouse/pull/53255) ([Alexey Gerasimchuck](https://github.com/Demilivor)). Fixed inconsistency between login success and logout [#53302](https://github.com/ClickHouse/ClickHouse/pull/53302) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix "Context has expired" error in dictionaries [#53342](https://github.com/ClickHouse/ClickHouse/pull/53342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). -* Forbid use_structure_from_insertion_table_in_table_functions when execute Scalar [#53348](https://github.com/ClickHouse/ClickHouse/pull/53348) ([flynn](https://github.com/ucasfl)). -* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fixed system.data_skipping_indices for MaterializedMySQL [#53381](https://github.com/ClickHouse/ClickHouse/pull/53381) ([Filipp Ozinov](https://github.com/bakwc)). -* Fix processing single carriage return in TSV file segmentation engine [#53407](https://github.com/ClickHouse/ClickHouse/pull/53407) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `Context has expired` error properly [#53433](https://github.com/ClickHouse/ClickHouse/pull/53433) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix `timeout_overflow_mode` when having subquery in the rhs of IN [#53439](https://github.com/ClickHouse/ClickHouse/pull/53439) ([Duc Canh Le](https://github.com/canhld94)). -* Fix an unexpected behavior in [#53152](https://github.com/ClickHouse/ClickHouse/issues/53152) [#53440](https://github.com/ClickHouse/ClickHouse/pull/53440) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Fix JSON_QUERY Function parse error while path is all number [#53470](https://github.com/ClickHouse/ClickHouse/pull/53470) ([KevinyhZou](https://github.com/KevinyhZou)). -* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final [#53511](https://github.com/ClickHouse/ClickHouse/pull/53511) ([Vasily Nemkov](https://github.com/Enmk)). -* Flush async insert queue first on shutdown [#53547](https://github.com/ClickHouse/ClickHouse/pull/53547) ([joelynch](https://github.com/joelynch)). -* Fix crash in join on sparse columna [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). -* Fix possible UB in Set skipping index for functions with incorrect args [#53559](https://github.com/ClickHouse/ClickHouse/pull/53559) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible UB in inverted indexes (experimental feature) [#53560](https://github.com/ClickHouse/ClickHouse/pull/53560) ([Azat Khuzhin](https://github.com/azat)). -* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix number of dropped granules in EXPLAIN PLAN index=1 [#53616](https://github.com/ClickHouse/ClickHouse/pull/53616) ([wangxiaobo](https://github.com/wzb5212)). -* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). -* Prepared set cache in mutation pipeline stuck [#53645](https://github.com/ClickHouse/ClickHouse/pull/53645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries. [#53677](https://github.com/ClickHouse/ClickHouse/pull/53677) ([VanDarkholme7](https://github.com/VanDarkholme7)). -* Fix filter pushdown for full_sorting_merge join [#53699](https://github.com/ClickHouse/ClickHouse/pull/53699) ([vdimir](https://github.com/vdimir)). -* Try to fix bug with `NULL::LowCardinality(Nullable(...)) NOT IN` [#53706](https://github.com/ClickHouse/ClickHouse/pull/53706) ([Andrey Zvonov](https://github.com/zvonand)). -* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). -* `transform`: correctly handle default column with multiple rows [#53742](https://github.com/ClickHouse/ClickHouse/pull/53742) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix fuzzer crash in parseDateTime [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). -* MaterializedPostgreSQL: fix uncaught exception in getCreateTableQueryImpl [#53832](https://github.com/ClickHouse/ClickHouse/pull/53832) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible segfault while using PostgreSQL engine [#53847](https://github.com/ClickHouse/ClickHouse/pull/53847) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix named_collection_admin alias [#54066](https://github.com/ClickHouse/ClickHouse/pull/54066) ([Kseniia Sumarokova](https://github.com/kssenii)). - -### ClickHouse release 23.7, 2023-07-27 - -#### Backward Incompatible Change -* Add `NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). -* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after *downgrade* (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). -* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### New Feature -* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). -* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). -* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). -* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). -* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). -* Implement support of encrypted elements in configuration file. Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from `` section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). -* Grace Hash Join algorithm is now applicable to FULL and RIGHT JOINs. [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). -* Add `SYSTEM STOP LISTEN` query for more graceful termination. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add `input_format_csv_allow_variable_number_of_columns` options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Another boring feature: add function `substring_index`, as in Spark or MySQL. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). -* A system table `jemalloc_bins` to show stats for jemalloc bins. Example `SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10`. Enjoy. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). -* Add `RowBinaryWithDefaults` format with extra byte before each column as a flag for using the column's default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). -* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). -* Added new `initcap` / `initcapUTF8` functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). -* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). -* Added Peak Memory Usage statistic to HTTP headers. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Added new `hasSubsequence` (+`CaseInsensitive` and `UTF8` versions) functions to match subsequences in strings. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). -* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). -* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). -* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). -* Implement KQL-style formatting for the `Interval` data type. This is only needed for compatibility with the `Kusto` query language. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). -* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). -* Aliases `current_database` and a new function `current_schemas` for compatibility with PostgreSQL. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). -* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). -* Support `async_deduplication_token` for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). -* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). - -#### Performance Improvement -* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. A downgrade may require to set `ratio_of_defaults_for_sparse_serialization=0.9375` [55153](https://github.com/ClickHouse/ClickHouse/issues/55153). You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). -* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). -* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). -* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). -* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). -* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). -* Move conditions with primary key columns to the end of PREWHERE chain. The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). -* Speed up `COUNT(DISTINCT)` for String types by inlining SipHash. The performance experiments of *OnTime* on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of *11.6%* to the QPS of the query *Q8* while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1`. This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823). This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173). [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). -* Reduce the number of syscalls in `FileCache::loadMetadata` - this speeds up server startup if the filesystem cache is configured. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). -* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). -* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). - -#### Improvement -* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). -* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). -* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow filtering HTTP headers for the URL/S3 table functions with the new `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). -* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix two issues in `geoHash` functions. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). -* Log async insert flush queries into `system.query_log`. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). -* Functions `date_diff` and `age` now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). -* A third-party product depending on ClickHouse (Gluten: a Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). -* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). -* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). -* Add column `modification_time` into `system.parts_columns`. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). -* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). -* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). -* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Added support for function `range` of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). -* Improve MySQL compatibility of the statement `SHOW INDEX`. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). -* Cache dictionary now requests only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). -* Fixed the case when settings were not applied for EXPLAIN query when FORMAT was provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). -* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). -* Var-Int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). -* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). -* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). -* Log messages are written to the `system.text_log` from the server startup. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). -* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception was thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Avro input format now supports Union even if it contains only a single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). -* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). -* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). -* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). -* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093). [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). -* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). -* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). -* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). - -#### Experimental Feature -* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). This is controlled by the setting `output_format_parquet_use_custom_encoder` which is disabled by default, because the feature is non-ideal. -* Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release. -* (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). -* (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). -* (experimental MaterializedMySQL) Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). -* (experimental MaterializedMySQL) Now unquoted UTF-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). -* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). -* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). -* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). It is suspected that this feature is incomplete. - -#### Build/Testing/Packaging Improvement -* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add integration test check with the enabled Analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). -* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). -* Update Cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). -* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). -* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Fix MaterializedPostgreSQL syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). -* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). -* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). -* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). -* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). -* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). -* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). -* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). -* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). -* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). -* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). -* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). -* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). -* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). -* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). -* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). -* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). -* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). -* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). -* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). -* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). -* Fix `countSubstrings` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). -* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). -* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). -* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). -* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). -* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). - - -### ClickHouse release 23.6, 2023-06-29 +### ClickHouse release master (b4a5b6060ea) FIXME as compared to v23.12.1.1368-stable (a2faa65b080) #### Backward Incompatible Change -* Delete feature `do_not_evict_index_and_mark_files` in the fs cache. This feature was only making things worse. [#51253](https://github.com/ClickHouse/ClickHouse/pull/51253) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Remove ALTER support for experimental LIVE VIEW. [#51287](https://github.com/ClickHouse/ClickHouse/pull/51287) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Decrease the default values for `http_max_field_value_size` and `http_max_field_name_size` to 128 KiB. [#51163](https://github.com/ClickHouse/ClickHouse/pull/51163) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* CGroups metrics related to CPU are replaced with one metric, `CGroupMaxCPU` for better usability. The `Normalized` CPU usage metrics will be normalized to CGroups limits instead of the total number of CPUs when they are set. This closes [#50836](https://github.com/ClickHouse/ClickHouse/issues/50836). [#50835](https://github.com/ClickHouse/ClickHouse/pull/50835) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)). +* The function `reverseDNSQuery` is no longer available. This closes [#58368](https://github.com/ClickHouse/ClickHouse/issues/58368). [#58369](https://github.com/ClickHouse/ClickHouse/pull/58369) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable various changes to improve the access control in the configuration file. These changes affect the behavior, and you check the `config.xml` in the `access_control_improvements` section. In case you are not confident, keep the values in the configuration file as they were in the previous version. [#58584](https://github.com/ClickHouse/ClickHouse/pull/58584) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve the operation of `sumMapFiltered` with NaN values. NaN values are now placed at the end (instead of randomly) and considered different from any values. `-0` is now also treated as equal to `0`; since 0 values are discarded, `-0` values are discarded too. [#58959](https://github.com/ClickHouse/ClickHouse/pull/58959) ([Raúl Marín](https://github.com/Algunenano)). +* The function `visibleWidth` will behave according to the docs. In previous versions, it simply counted code points after string serialization, like the `lengthUTF8` function, but didn't consider zero-width and combining characters, full-width characters, tabs, and deletes. Now the behavior is changed accordingly. If you want to keep the old behavior, set `function_visible_width_behavior` to `0`, or set `compatibility` to `23.12` or lower. [#59022](https://github.com/ClickHouse/ClickHouse/pull/59022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `Kusto` dialect is disabled until these two bugs will be fixed: [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037) and [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036). [#59305](https://github.com/ClickHouse/ClickHouse/pull/59305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Any attempt to use `Kusto` will result in exception. +* More efficient implementation of the `FINAL` modifier no longer guarantees preserving the order even if `max_threads = 1`. If you counted on the previous behavior, set `enable_vertical_final` to 0 or `compatibility` to `23.12`. #### New Feature -* The function `transform` as well as `CASE` with value matching started to support all data types. This closes [#29730](https://github.com/ClickHouse/ClickHouse/issues/29730). This closes [#32387](https://github.com/ClickHouse/ClickHouse/issues/32387). This closes [#50827](https://github.com/ClickHouse/ClickHouse/issues/50827). This closes [#31336](https://github.com/ClickHouse/ClickHouse/issues/31336). This closes [#40493](https://github.com/ClickHouse/ClickHouse/issues/40493). [#51351](https://github.com/ClickHouse/ClickHouse/pull/51351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added option `--rename_files_after_processing `. This closes [#34207](https://github.com/ClickHouse/ClickHouse/issues/34207). [#49626](https://github.com/ClickHouse/ClickHouse/pull/49626) ([alekseygolub](https://github.com/alekseygolub)). -* Add support for `TRUNCATE` modifier in `INTO OUTFILE` clause. Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). -* Add table engine `Redis` and table function `redis`. It allows querying external Redis servers. [#50150](https://github.com/ClickHouse/ClickHouse/pull/50150) ([JackyWoo](https://github.com/JackyWoo)). -* Allow to skip empty files in file/s3/url/hdfs table functions using settings `s3_skip_empty_files`, `hdfs_skip_empty_files`, `engine_file_skip_empty_files`, `engine_url_skip_empty_files`. [#50364](https://github.com/ClickHouse/ClickHouse/pull/50364) ([Kruglov Pavel](https://github.com/Avogar)). -* Add a new setting named `use_mysql_types_in_show_columns` to alter the `SHOW COLUMNS` SQL statement to display MySQL equivalent types when a client is connected via the MySQL compatibility port. [#49577](https://github.com/ClickHouse/ClickHouse/pull/49577) ([Thomas Panetti](https://github.com/tpanetti)). -* Clickhouse-client can now be called with a connection string instead of "--host", "--port", "--user" etc. [#50689](https://github.com/ClickHouse/ClickHouse/pull/50689) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Add setting `session_timezone`; it is used as the default timezone for a session when not explicitly specified. [#44149](https://github.com/ClickHouse/ClickHouse/pull/44149) ([Andrey Zvonov](https://github.com/zvonand)). -* Codec DEFLATE_QPL is now controlled via server setting "enable_deflate_qpl_codec" (default: false) instead of setting "allow_experimental_codecs". This marks DEFLATE_QPL non-experimental. [#50775](https://github.com/ClickHouse/ClickHouse/pull/50775) ([Robert Schulze](https://github.com/rschu1ze)). +* Implement Variant data type that represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). Variant type is available under a setting `allow_experimental_variant_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#58047](https://github.com/ClickHouse/ClickHouse/pull/58047) ([Kruglov Pavel](https://github.com/Avogar)). +* Certain settings (currently `min_compress_block_size` and `max_compress_block_size`) can now be specified at column-level where they take precedence over the corresponding table-level setting. Example: `CREATE TABLE tab (col String SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840)) ENGINE = MergeTree ORDER BY tuple();`. [#55201](https://github.com/ClickHouse/ClickHouse/pull/55201) ([Duc Canh Le](https://github.com/canhld94)). +* Add `quantileDD` aggregate function as well as the corresponding `quantilesDD` and `medianDD`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)). +* Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)). +* Add function `arrayShingles` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)). +* Added functions `punycodeEncode`, `punycodeDecode`, `idnaEncode` and `idnaDecode` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)). +* Added string similarity functions `dramerauLevenshteinDistance`, `jaroSimilarity` and `jaroWinklerSimilarity`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)). +* Add two settings `output_format_compression_level` to change output compression level and `output_format_compression_zstd_window_log` to explicitly set compression window size and enable long-range mode for zstd compression if output compression method is `zstd`. Applied for `INTO OUTFILE` and when writing to table functions `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`. [#58539](https://github.com/ClickHouse/ClickHouse/pull/58539) ([Duc Canh Le](https://github.com/canhld94)). +* Automatically disable ANSI escape sequences in Pretty formats if the output is not a terminal. Add new `auto` mode to setting `output_format_pretty_color`. [#58614](https://github.com/ClickHouse/ClickHouse/pull/58614) ([Shaun Struwig](https://github.com/Blargian)). +* Added function `sqidDecode` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow to read Bool values into String in JSON input formats. It's done under a setting `input_format_json_read_bools_as_strings` that is enabled by default. [#58561](https://github.com/ClickHouse/ClickHouse/pull/58561) ([Kruglov Pavel](https://github.com/Avogar)). +* Added function `seriesDecomposeSTL` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Introduced MySQL Binlog Client for MaterializedMySQL: One binlog connection for many databases. [#57323](https://github.com/ClickHouse/ClickHouse/pull/57323) ([Val Doroshchuk](https://github.com/valbok)). +* Intel QuickAssist Technology (QAT) provides hardware-accelerated compression and cryptograpy. ClickHouse got a new compression codec `ZSTD_QAT` which utilizes QAT for zstd compression. The codec uses [Intel's QATlib](https://github.com/intel/qatlib) and [Inte's QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Right now, only compression can be accelerated in hardware (a software fallback kicks in in case QAT could not be initialized), decompression always runs in software. [#57509](https://github.com/ClickHouse/ClickHouse/pull/57509) ([jasperzhu](https://github.com/jinjunzh)). +* Implementing the new way how object storage keys are generated for s3 disks. Now the format could be defined in terms of `re2` regex syntax with `key_template` option in disc description. [#57663](https://github.com/ClickHouse/ClickHouse/pull/57663) ([Sema Checherinda](https://github.com/CheSema)). +* Table system.dropped_tables_parts contains parts of system.dropped_tables tables (dropped but not yet removed tables). [#58038](https://github.com/ClickHouse/ClickHouse/pull/58038) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add settings `max_materialized_views_size_for_table` to limit the number of materialized views attached to a table. [#58068](https://github.com/ClickHouse/ClickHouse/pull/58068) ([zhongyuankai](https://github.com/zhongyuankai)). +* `clickhouse-format` improvements: support INSERT queries with `VALUES`; support comments (use `--comments` to output them); support `--max_line_length` option to format only long queries in multiline. [#58246](https://github.com/ClickHouse/ClickHouse/pull/58246) ([vdimir](https://github.com/vdimir)). +* Attach all system tables in `clickhouse-local`, including `system.parts`. This closes [#58312](https://github.com/ClickHouse/ClickHouse/issues/58312). [#58359](https://github.com/ClickHouse/ClickHouse/pull/58359) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support for `Enum` data types in function `transform`. This closes [#58241](https://github.com/ClickHouse/ClickHouse/issues/58241). [#58360](https://github.com/ClickHouse/ClickHouse/pull/58360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add table `system.database_engines`. [#58390](https://github.com/ClickHouse/ClickHouse/pull/58390) ([Bharat Nallan](https://github.com/bharatnc)). Allow registering database engines independently in the codebase. [#58365](https://github.com/ClickHouse/ClickHouse/pull/58365) ([Bharat Nallan](https://github.com/bharatnc)). Allow registering interpreters independently. [#58443](https://github.com/ClickHouse/ClickHouse/pull/58443) ([Bharat Nallan](https://github.com/bharatnc)). +* Added `FROM ` modifier for `SYSTEM SYNC REPLICA LIGHTWEIGHT` query. With the `FROM` modifier ensures we wait for fetches and drop-ranges only for the specified source replicas, as well as any replica not in zookeeper or with an empty source_replica. [#58393](https://github.com/ClickHouse/ClickHouse/pull/58393) ([Jayme Bird](https://github.com/jaymebrd)). +* Added setting `update_insert_deduplication_token_in_dependent_materialized_views`. This setting allows to update insert deduplication token with table identifier during insert in dependent materialized views. Closes [#59165](https://github.com/ClickHouse/ClickHouse/issues/59165). [#59238](https://github.com/ClickHouse/ClickHouse/pull/59238) ([Maksim Kita](https://github.com/kitaisreal)). +* Added statement `SYSTEM RELOAD ASYNCHRONOUS METRICS` which updates the asynchronous metrics. Mostly useful for testing and development. [#53710](https://github.com/ClickHouse/ClickHouse/pull/53710) ([Robert Schulze](https://github.com/rschu1ze)). #### Performance Improvement -* Improved scheduling of merge selecting and cleanup tasks in `ReplicatedMergeTree`. The tasks will not be executed too frequently when there's nothing to merge or cleanup. Added settings `max_merge_selecting_sleep_ms`, `merge_selecting_sleep_slowdown_factor`, `max_cleanup_delay_period` and `cleanup_thread_preferred_points_per_iteration`. It should close [#31919](https://github.com/ClickHouse/ClickHouse/issues/31919). [#50107](https://github.com/ClickHouse/ClickHouse/pull/50107) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Make filter push down through cross join. [#50605](https://github.com/ClickHouse/ClickHouse/pull/50605) ([Han Fei](https://github.com/hanfei1991)). -* Improve performance with enabled QueryProfiler using thread-local timer_id instead of global object. [#48778](https://github.com/ClickHouse/ClickHouse/pull/48778) ([Jiebin Sun](https://github.com/jiebinn)). -* Rewrite CapnProto input/output format to improve its performance. Map column names and CapnProto fields case insensitive, fix reading/writing of nested structure fields. [#49752](https://github.com/ClickHouse/ClickHouse/pull/49752) ([Kruglov Pavel](https://github.com/Avogar)). -* Optimize parquet write performance for parallel threads. [#50102](https://github.com/ClickHouse/ClickHouse/pull/50102) ([Hongbin Ma](https://github.com/binmahone)). -* Disable `parallelize_output_from_storages` for processing MATERIALIZED VIEWs and storages with one block only. [#50214](https://github.com/ClickHouse/ClickHouse/pull/50214) ([Azat Khuzhin](https://github.com/azat)). -* Merge PR [#46558](https://github.com/ClickHouse/ClickHouse/pull/46558). Avoid block permutation during sort if the block is already sorted. [#50697](https://github.com/ClickHouse/ClickHouse/pull/50697) ([Alexey Milovidov](https://github.com/alexey-milovidov), [Maksim Kita](https://github.com/kitaisreal)). -* Make multiple list requests to ZooKeeper in parallel to speed up reading from system.zookeeper table. [#51042](https://github.com/ClickHouse/ClickHouse/pull/51042) ([Alexander Gololobov](https://github.com/davenger)). -* Speedup initialization of DateTime lookup tables for time zones. This should reduce startup/connect time of clickhouse-client especially in debug build as it is rather heavy. [#51347](https://github.com/ClickHouse/ClickHouse/pull/51347) ([Alexander Gololobov](https://github.com/davenger)). -* Fix data lakes slowness because of synchronous head requests. (Related to Iceberg/Deltalake/Hudi being slow with a lot of files). [#50976](https://github.com/ClickHouse/ClickHouse/pull/50976) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Do not read all the columns from right GLOBAL JOIN table. [#50721](https://github.com/ClickHouse/ClickHouse/pull/50721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). - -#### Experimental Feature -* Support parallel replicas with the analyzer. [#50441](https://github.com/ClickHouse/ClickHouse/pull/50441) ([Raúl Marín](https://github.com/Algunenano)). -* Add random sleep before large merges/mutations execution to split load more evenly between replicas in case of zero-copy replication. [#51282](https://github.com/ClickHouse/ClickHouse/pull/51282) ([alesapin](https://github.com/alesapin)). -* Do not replicate `ALTER PARTITION` queries and mutations through `Replicated` database if it has only one shard and the underlying table is `ReplicatedMergeTree`. [#51049](https://github.com/ClickHouse/ClickHouse/pull/51049) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Coordination for parallel replicas is rewritten for better parallelism and cache locality. It has been tested for linear scalability on hundreds of replicas. It also got support for reading in order. [#57968](https://github.com/ClickHouse/ClickHouse/pull/57968) ([Nikita Taranov](https://github.com/nickitat)). +* Replace HTTP outgoing buffering based with the native ClickHouse buffers. Add bytes counting metrics for interfaces. [#56064](https://github.com/ClickHouse/ClickHouse/pull/56064) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Large aggregation states of `uniqExact` will be merged in parallel in distrubuted queries. [#59009](https://github.com/ClickHouse/ClickHouse/pull/59009) ([Nikita Taranov](https://github.com/nickitat)). +* Lower memory usage after reading from `MergeTree` tables. [#59290](https://github.com/ClickHouse/ClickHouse/pull/59290) ([Anton Popov](https://github.com/CurtizJ)). +* Lower memory usage in vertical merges. [#59340](https://github.com/ClickHouse/ClickHouse/pull/59340) ([Anton Popov](https://github.com/CurtizJ)). +* Avoid huge memory consumption during Keeper startup for more cases. [#58455](https://github.com/ClickHouse/ClickHouse/pull/58455) ([Antonio Andelic](https://github.com/antonio2368)). +* Keeper improvement: reduce Keeper's memory usage for stored nodes. [#59002](https://github.com/ClickHouse/ClickHouse/pull/59002) ([Antonio Andelic](https://github.com/antonio2368)). +* More cache-friendly final implementation. Note on the behaviour change: previously queries with `FINAL` modifier that read with a single stream (e.g. `max_threads = 1`) produced sorted output without explicitly provided `ORDER BY` clause. This is no longer guaranteed when `enable_vertical_final = true` (and it is so by default). [#54366](https://github.com/ClickHouse/ClickHouse/pull/54366) ([Duc Canh Le](https://github.com/canhld94)). +* Bypass extra copying in `ReadBufferFromIStream` which is used, e.g., for reading from S3. [#56961](https://github.com/ClickHouse/ClickHouse/pull/56961) ([Nikita Taranov](https://github.com/nickitat)). +* Optimize array element function when input is Array(Map)/Array(Array(Num)/Array(Array(String))/Array(BigInt)/Array(Decimal). The previous implementations did more allocations than needed. The optimization speed up is up to ~6x especially when input type is Array(Map). [#56403](https://github.com/ClickHouse/ClickHouse/pull/56403) ([李扬](https://github.com/taiyang-li)). +* Read column once while reading more than one subcolumn from it in compact parts. [#57631](https://github.com/ClickHouse/ClickHouse/pull/57631) ([Kruglov Pavel](https://github.com/Avogar)). +* Rewrite the AST of `sum(column + constant)` function. This is available as an optimization pass for Analyzer [#57853](https://github.com/ClickHouse/ClickHouse/pull/57853) ([Jiebin Sun](https://github.com/jiebinn)). +* The evaluation of function `match` now utilizes skipping indices `ngrambf_v1` and `tokenbf_v1`. [#57882](https://github.com/ClickHouse/ClickHouse/pull/57882) ([凌涛](https://github.com/lingtaolf)). +* The evaluation of function `match` now utilizes inverted indices. [#58284](https://github.com/ClickHouse/ClickHouse/pull/58284) ([凌涛](https://github.com/lingtaolf)). +* MergeTree `FINAL` does not compare rows from same non-L0 part. [#58142](https://github.com/ClickHouse/ClickHouse/pull/58142) ([Duc Canh Le](https://github.com/canhld94)). +* Speed up iota calls (filling array with consecutive numbers). [#58271](https://github.com/ClickHouse/ClickHouse/pull/58271) ([Raúl Marín](https://github.com/Algunenano)). +* Speedup MIN/MAX for non-numeric types. [#58334](https://github.com/ClickHouse/ClickHouse/pull/58334) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize the combination of filters (like in multi-stage PREWHERE) with BMI2/SSE intrinsics [#58800](https://github.com/ClickHouse/ClickHouse/pull/58800) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Use one thread less in `clickhouse-local`. [#58968](https://github.com/ClickHouse/ClickHouse/pull/58968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve the `multiIf` function performance when the type is Nullable. [#57745](https://github.com/ClickHouse/ClickHouse/pull/57745) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)). +* Lower memory consumption in backups to S3. [#58962](https://github.com/ClickHouse/ClickHouse/pull/58962) ([Vitaly Baranov](https://github.com/vitlibar)). #### Improvement -* Relax the thresholds for "too many parts" to be more modern. Return the backpressure during long-running insert queries. [#50856](https://github.com/ClickHouse/ClickHouse/pull/50856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow to cast IPv6 to IPv4 address for CIDR ::ffff:0:0/96 (IPv4-mapped addresses). [#49759](https://github.com/ClickHouse/ClickHouse/pull/49759) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Update MongoDB protocol to support MongoDB 5.1 version and newer. Support for the versions with the old protocol (<3.6) is preserved. Closes [#45621](https://github.com/ClickHouse/ClickHouse/issues/45621), [#49879](https://github.com/ClickHouse/ClickHouse/issues/49879). [#50061](https://github.com/ClickHouse/ClickHouse/pull/50061) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add setting `input_format_max_bytes_to_read_for_schema_inference` to limit the number of bytes to read in schema inference. Closes [#50577](https://github.com/ClickHouse/ClickHouse/issues/50577). [#50592](https://github.com/ClickHouse/ClickHouse/pull/50592) ([Kruglov Pavel](https://github.com/Avogar)). -* Respect setting `input_format_null_as_default` in schema inference. [#50602](https://github.com/ClickHouse/ClickHouse/pull/50602) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow to skip trailing empty lines in CSV/TSV/CustomSeparated formats via settings `input_format_csv_skip_trailing_empty_lines`, `input_format_tsv_skip_trailing_empty_lines` and `input_format_custom_skip_trailing_empty_lines` (disabled by default). Closes [#49315](https://github.com/ClickHouse/ClickHouse/issues/49315). [#50635](https://github.com/ClickHouse/ClickHouse/pull/50635) ([Kruglov Pavel](https://github.com/Avogar)). -* Functions "toDateOrDefault|OrNull" and "accuateCast[OrDefault|OrNull]" now correctly parse numeric arguments. [#50709](https://github.com/ClickHouse/ClickHouse/pull/50709) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Support CSV with whitespace or `\t` field delimiters, and these delimiters are supported in Spark. [#50712](https://github.com/ClickHouse/ClickHouse/pull/50712) ([KevinyhZou](https://github.com/KevinyhZou)). -* Settings `number_of_mutations_to_delay` and `number_of_mutations_to_throw` are enabled by default now with values 500 and 1000 respectively. [#50726](https://github.com/ClickHouse/ClickHouse/pull/50726) ([Anton Popov](https://github.com/CurtizJ)). -* The dashboard correctly shows missing values. This closes [#50831](https://github.com/ClickHouse/ClickHouse/issues/50831). [#50832](https://github.com/ClickHouse/ClickHouse/pull/50832) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added the possibility to use date and time arguments in the syslog timestamp format in functions `parseDateTimeBestEffort*` and `parseDateTime64BestEffort*`. [#50925](https://github.com/ClickHouse/ClickHouse/pull/50925) ([Victor Krasnov](https://github.com/sirvickr)). -* Command line parameter "--password" in clickhouse-client can now be specified only once. [#50966](https://github.com/ClickHouse/ClickHouse/pull/50966) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Use `hash_of_all_files` from `system.parts` to check identity of parts during on-cluster backups. [#50997](https://github.com/ClickHouse/ClickHouse/pull/50997) ([Vitaly Baranov](https://github.com/vitlibar)). -* The system table zookeeper_connection connected_time identifies the time when the connection is established (standard format), and session_uptime_elapsed_seconds is added, which labels the duration of the established connection session (in seconds). [#51026](https://github.com/ClickHouse/ClickHouse/pull/51026) ([郭小龙](https://github.com/guoxiaolongzte)). -* Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). -* Add total_bytes_to_read to the Progress packet in TCP protocol for better Progress bar. [#51158](https://github.com/ClickHouse/ClickHouse/pull/51158) ([Kruglov Pavel](https://github.com/Avogar)). -* Better checking of data parts on disks with filesystem cache. [#51164](https://github.com/ClickHouse/ClickHouse/pull/51164) ([Anton Popov](https://github.com/CurtizJ)). -* Fix sometimes not correct current_elements_num in fs cache. [#51242](https://github.com/ClickHouse/ClickHouse/pull/51242) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added comments (brief descriptions) to all columns of system tables. There are several reasons for this: - We use system tables a lot, and sometimes it could be very difficult for developer to understand the purpose and the meaning of a particular column. - We change (add new ones or modify existing) system tables a lot and the documentation for them is always outdated. For example take a look at the documentation page for [`system.parts`](https://clickhouse.com/docs/en/operations/system-tables/parts). It misses a lot of columns - We would like to eventually generate documentation directly from ClickHouse. [#58356](https://github.com/ClickHouse/ClickHouse/pull/58356) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow queries without aliases for subqueries for `PASTE JOIN`. [#58654](https://github.com/ClickHouse/ClickHouse/pull/58654) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Enable `MySQL`/`MariaDB` integration on macOS. This closes [#21191](https://github.com/ClickHouse/ClickHouse/issues/21191). [#46316](https://github.com/ClickHouse/ClickHouse/pull/46316) ([Alexey Milovidov](https://github.com/alexey-milovidov)) ([Robert Schulze](https://github.com/rschu1ze)). +* Disable `max_rows_in_set_to_optimize_join` by default. [#56396](https://github.com/ClickHouse/ClickHouse/pull/56396) ([vdimir](https://github.com/vdimir)). +* Add `` config parameter that allows avoiding resolving hostnames in ON CLUSTER DDL queries and Replicated database engines. This mitigates the possibility of the queue being stuck in case of a change in cluster definition. Closes [#57573](https://github.com/ClickHouse/ClickHouse/issues/57573). [#57603](https://github.com/ClickHouse/ClickHouse/pull/57603) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase `load_metadata_threads` to 16 for the filesystem cache. It will make the server start up faster. [#57732](https://github.com/ClickHouse/ClickHouse/pull/57732) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to throttle merges/mutations (`max_mutations_bandwidth_for_server`/`max_merges_bandwidth_for_server`). [#57877](https://github.com/ClickHouse/ClickHouse/pull/57877) ([Azat Khuzhin](https://github.com/azat)). +* Replaced undocumented (boolean) column `is_hot_reloadable` in system table `system.server_settings` by (Enum8) column `changeable_without_restart` with possible values `No`, `Yes`, `IncreaseOnly` and `DecreaseOnly`. Also documented the column. [#58029](https://github.com/ClickHouse/ClickHouse/pull/58029) ([skyoct](https://github.com/skyoct)). +* Cluster discovery supports setting username and password, close [#58063](https://github.com/ClickHouse/ClickHouse/issues/58063). [#58123](https://github.com/ClickHouse/ClickHouse/pull/58123) ([vdimir](https://github.com/vdimir)). +* Support query parameters in `ALTER TABLE ... PART`. [#58297](https://github.com/ClickHouse/ClickHouse/pull/58297) ([Azat Khuzhin](https://github.com/azat)). +* Create consumers for Kafka tables on the fly (but keep them for some period - `kafka_consumers_pool_ttl_ms`, since last used), this should fix problem with statistics for `system.kafka_consumers` (that does not consumed when nobody reads from Kafka table, which leads to live memory leak and slow table detach) and also this PR enables stats for `system.kafka_consumers` by default again. [#58310](https://github.com/ClickHouse/ClickHouse/pull/58310) ([Azat Khuzhin](https://github.com/azat)). +* `sparkBar` as an alias to `sparkbar`. [#58335](https://github.com/ClickHouse/ClickHouse/pull/58335) ([凌涛](https://github.com/lingtaolf)). +* Avoid sending `ComposeObject` requests after upload to `GCS`. [#58343](https://github.com/ClickHouse/ClickHouse/pull/58343) ([Azat Khuzhin](https://github.com/azat)). +* Correctly handle keys with dot in the name in configurations XMLs. [#58354](https://github.com/ClickHouse/ClickHouse/pull/58354) ([Azat Khuzhin](https://github.com/azat)). +* Make function `format` return constant on constant arguments. This closes [#58355](https://github.com/ClickHouse/ClickHouse/issues/58355). [#58358](https://github.com/ClickHouse/ClickHouse/pull/58358) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Adding a setting `max_estimated_execution_time` to separate `max_execution_time` and `max_estimated_execution_time`. [#58402](https://github.com/ClickHouse/ClickHouse/pull/58402) ([Zhang Yifan](https://github.com/zhangyifan27)). +* Provide a hint when an invalid database engine name is used. [#58444](https://github.com/ClickHouse/ClickHouse/pull/58444) ([Bharat Nallan](https://github.com/bharatnc)). +* Add settings for better control of indexes type in Arrow dictionary. Use signed integer type for indexes by default as Arrow recommends. Closes [#57401](https://github.com/ClickHouse/ClickHouse/issues/57401). [#58519](https://github.com/ClickHouse/ClickHouse/pull/58519) ([Kruglov Pavel](https://github.com/Avogar)). +* Implement [#58575](https://github.com/ClickHouse/ClickHouse/issues/58575) Support `CLICKHOUSE_PASSWORD_FILE ` environment variable when running the docker image. [#58583](https://github.com/ClickHouse/ClickHouse/pull/58583) ([Eyal Halpern Shalev](https://github.com/Eyal-Shalev)). +* When executing some queries, which require a lot of streams for reading data, the error `"Paste JOIN requires sorted tables only"` was previously thrown. Now the numbers of streams resize to 1 in that case. [#58608](https://github.com/ClickHouse/ClickHouse/pull/58608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Better message for INVALID_IDENTIFIER error. [#58703](https://github.com/ClickHouse/ClickHouse/pull/58703) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Improved handling of signed numeric literals in normalizeQuery. [#58710](https://github.com/ClickHouse/ClickHouse/pull/58710) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Support Point data type for MySQL. [#58721](https://github.com/ClickHouse/ClickHouse/pull/58721) ([Kseniia Sumarokova](https://github.com/kssenii)). +* When comparing a Float32 column and a const string, read the string as Float32 (instead of Float64). [#58724](https://github.com/ClickHouse/ClickHouse/pull/58724) ([Raúl Marín](https://github.com/Algunenano)). +* Improve S3 compatibility, add ECloud EOS storage support. [#58786](https://github.com/ClickHouse/ClickHouse/pull/58786) ([xleoken](https://github.com/xleoken)). +* Allow `KILL QUERY` to cancel backups / restores. This PR also makes running backups and restores visible in `system.processes`. Also, there is a new setting in the server configuration now - `shutdown_wait_backups_and_restores` (default=true) which makes the server either wait on shutdown for all running backups and restores to finish or just cancel them. [#58804](https://github.com/ClickHouse/ClickHouse/pull/58804) ([Vitaly Baranov](https://github.com/vitlibar)). +* Avro format to support ZSTD codec. Closes [#58735](https://github.com/ClickHouse/ClickHouse/issues/58735). [#58805](https://github.com/ClickHouse/ClickHouse/pull/58805) ([flynn](https://github.com/ucasfl)). +* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#58835](https://github.com/ClickHouse/ClickHouse/pull/58835) ([Serge Klochkov](https://github.com/slvrtrn)). +* A better exception message while conflict of creating dictionary and table with the same name. [#58841](https://github.com/ClickHouse/ClickHouse/pull/58841) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Make sure that for custom (created from SQL) disks ether `filesystem_caches_path` (a common directory prefix for all filesystem caches) or `custom_cached_disks_base_directory` (a common directory prefix for only filesystem caches created from custom disks) is specified in server config. `custom_cached_disks_base_directory` has higher priority for custom disks over `filesystem_caches_path`, which is used if the former one is absent. Filesystem cache setting `path` must lie inside that directory, otherwise exception will be thrown preventing disk to be created. This will not affect disks created on an older version and server was upgraded - then the exception will not be thrown to allow the server to successfully start). `custom_cached_disks_base_directory` is added to default server config as `/var/lib/clickhouse/caches/`. Closes [#57825](https://github.com/ClickHouse/ClickHouse/issues/57825). [#58869](https://github.com/ClickHouse/ClickHouse/pull/58869) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MySQL interface gained compatibility with `SHOW WARNINGS`/`SHOW COUNT(*) WARNINGS` queries, though the returned result is always an empty set. [#58929](https://github.com/ClickHouse/ClickHouse/pull/58929) ([Serge Klochkov](https://github.com/slvrtrn)). +* Skip unavailable replicas when executing parallel distributed `INSERT SELECT`. [#58931](https://github.com/ClickHouse/ClickHouse/pull/58931) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Display word-descriptive log level while enabling structured log formatting in json. [#58936](https://github.com/ClickHouse/ClickHouse/pull/58936) ([Tim Liou](https://github.com/wheatdog)). +* MySQL interface gained support for `CAST(x AS SIGNED)` and `CAST(x AS UNSIGNED)` statements via data type aliases: `SIGNED` for Int64, and `UNSIGNED` for UInt64. This improves compatibility with BI tools such as Looker Studio. [#58954](https://github.com/ClickHouse/ClickHouse/pull/58954) ([Serge Klochkov](https://github.com/slvrtrn)). +* Change working directory to the data path in docker container. [#58975](https://github.com/ClickHouse/ClickHouse/pull/58975) ([cangyin](https://github.com/cangyin)). +* Added setting for Azure Blob Storage `azure_max_unexpected_write_error_retries` , can also be set from config under azure section. [#59001](https://github.com/ClickHouse/ClickHouse/pull/59001) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Allow server to start with broken data lake table. Closes [#58625](https://github.com/ClickHouse/ClickHouse/issues/58625). [#59080](https://github.com/ClickHouse/ClickHouse/pull/59080) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to ignore schema evolution in the `Iceberg` table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)). +* Prohibit mutable operations (`INSERT`/`ALTER`/`OPTIMIZE`/...) on read-only/write-once storages with a proper `TABLE_IS_READ_ONLY` error (to avoid leftovers). Avoid leaving left-overs on write-once disks (`format_version.txt`) on `CREATE`/`ATTACH`. Ignore `DROP` for `ReplicatedMergeTree` (so as for `MergeTree`). Fix iterating over `s3_plain` (`MetadataStorageFromPlainObjectStorage::iterateDirectory`). Note read-only is `web` disk, and write-once is `s3_plain`. [#59170](https://github.com/ClickHouse/ClickHouse/pull/59170) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug in the experimental `_block_number` column which could lead to logical error during complex combination of `ALTER`s and `merge`s. Fixes [#56202](https://github.com/ClickHouse/ClickHouse/issues/56202). Replaces [#58601](https://github.com/ClickHouse/ClickHouse/issues/58601). [#59295](https://github.com/ClickHouse/ClickHouse/pull/59295) ([alesapin](https://github.com/alesapin)). +* Play UI understands when an exception is returned inside JSON. Adjustment for [#52853](https://github.com/ClickHouse/ClickHouse/issues/52853). [#59303](https://github.com/ClickHouse/ClickHouse/pull/59303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `/binary` HTTP handler allows to specify user, host, and optionally, password in the query string. [#59311](https://github.com/ClickHouse/ClickHouse/pull/59311) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support backups for compressed in-memory tables. This closes [#57893](https://github.com/ClickHouse/ClickHouse/issues/57893). [#59315](https://github.com/ClickHouse/ClickHouse/pull/59315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support the `FORMAT` clause in `BACKUP` and `RESTORE` queries. [#59338](https://github.com/ClickHouse/ClickHouse/pull/59338) ([Vitaly Baranov](https://github.com/vitlibar)). +* Function `concatWithSeparator` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). For example, `SELECT concatWithSeparator('.', 'number', 1)` now returns `number.1`. [#59341](https://github.com/ClickHouse/ClickHouse/pull/59341) ([Robert Schulze](https://github.com/rschu1ze)). #### Build/Testing/Packaging Improvement -* Add embedded keeper-client to standalone keeper binary. [#50964](https://github.com/ClickHouse/ClickHouse/pull/50964) ([pufit](https://github.com/pufit)). -* Actual LZ4 version is used now. [#50621](https://github.com/ClickHouse/ClickHouse/pull/50621) ([Nikita Taranov](https://github.com/nickitat)). -* ClickHouse server will print the list of changed settings on fatal errors. This closes [#51137](https://github.com/ClickHouse/ClickHouse/issues/51137). [#51138](https://github.com/ClickHouse/ClickHouse/pull/51138) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow building ClickHouse with clang-17. [#51300](https://github.com/ClickHouse/ClickHouse/pull/51300) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* [SQLancer](https://github.com/sqlancer/sqlancer) check is considered stable as bugs that were triggered by it are fixed. Now failures of SQLancer check will be reported as failed check status. [#51340](https://github.com/ClickHouse/ClickHouse/pull/51340) ([Ilya Yatsishin](https://github.com/qoega)). -* Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Improve aliases for clickhouse binary (now `ch`/`clickhouse` is `clickhouse-local` or `clickhouse` depends on the arguments) and add bash completion for new aliases. [#58344](https://github.com/ClickHouse/ClickHouse/pull/58344) ([Azat Khuzhin](https://github.com/azat)). +* Add settings changes check to CI to check that all settings changes are reflected in settings changes history. [#58555](https://github.com/ClickHouse/ClickHouse/pull/58555) ([Kruglov Pavel](https://github.com/Avogar)). +* Use tables directly attached from S3 in stateful tests. [#58791](https://github.com/ClickHouse/ClickHouse/pull/58791) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Save the whole `fuzzer.log` as an archive instead of the last 100k lines. `tail -n 100000` often removes lines with table definitions. Example:. [#58821](https://github.com/ClickHouse/ClickHouse/pull/58821) ([Dmitry Novik](https://github.com/novikd)). +* Enable Rust on macOS with Aarch64 (this will add fuzzy search in client with skim and the PRQL language, though I don't think that are people who host ClickHouse on darwin, so it is mostly for fuzzy search in client I would say). [#59272](https://github.com/ClickHouse/ClickHouse/pull/59272) ([Azat Khuzhin](https://github.com/azat)). +* Fix aggregation issue in mixed x86_64 and ARM clusters [#59132](https://github.com/ClickHouse/ClickHouse/pull/59132) ([Harry Lee](https://github.com/HarryLeeIBM)). #### Bug Fix (user-visible misbehavior in an official stable release) -* Report loading status for executable dictionaries correctly [#48775](https://github.com/ClickHouse/ClickHouse/pull/48775) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Proper mutation of skip indices and projections [#50104](https://github.com/ClickHouse/ClickHouse/pull/50104) ([Amos Bird](https://github.com/amosbird)). -* Cleanup moving parts [#50489](https://github.com/ClickHouse/ClickHouse/pull/50489) ([vdimir](https://github.com/vdimir)). -* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix Log family table return wrong rows count after truncate [#50585](https://github.com/ClickHouse/ClickHouse/pull/50585) ([flynn](https://github.com/ucasfl)). -* Fix bug in `uniqExact` parallel merging [#50590](https://github.com/ClickHouse/ClickHouse/pull/50590) ([Nikita Taranov](https://github.com/nickitat)). -* Revert recent grace hash join changes [#50699](https://github.com/ClickHouse/ClickHouse/pull/50699) ([vdimir](https://github.com/vdimir)). -* Query Cache: Try to fix bad cast from `ColumnConst` to `ColumnVector` [#50704](https://github.com/ClickHouse/ClickHouse/pull/50704) ([Robert Schulze](https://github.com/rschu1ze)). -* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)). -* SummingMergeTree support for DateTime64 [#50797](https://github.com/ClickHouse/ClickHouse/pull/50797) ([Jordi Villar](https://github.com/jrdi)). -* Add compatibility setting for non-const timezones [#50834](https://github.com/ClickHouse/ClickHouse/pull/50834) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix hashing of LDAP params in the cache entries [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). -* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix checking the lock file too often while writing a backup [#50889](https://github.com/ClickHouse/ClickHouse/pull/50889) ([Vitaly Baranov](https://github.com/vitlibar)). -* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix race in the Azure blob storage iterator [#50936](https://github.com/ClickHouse/ClickHouse/pull/50936) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix erroneous `sort_description` propagation in `CreatingSets` [#50955](https://github.com/ClickHouse/ClickHouse/pull/50955) ([Nikita Taranov](https://github.com/nickitat)). -* Fix Iceberg v2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)). -* MaterializedMySQL: Keep parentheses for empty table overrides [#50977](https://github.com/ClickHouse/ClickHouse/pull/50977) ([Val Doroshchuk](https://github.com/valbok)). -* Fix crash in BackupCoordinationStageSync::setError() [#51012](https://github.com/ClickHouse/ClickHouse/pull/51012) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)). -* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix ineffective query cache for SELECTs with subqueries [#51132](https://github.com/ClickHouse/ClickHouse/pull/51132) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix Set index with constant nullable comparison. [#51205](https://github.com/ClickHouse/ClickHouse/pull/51205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix a crash in s3 and s3Cluster functions [#51209](https://github.com/ClickHouse/ClickHouse/pull/51209) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix a crash with compiled expressions [#51231](https://github.com/ClickHouse/ClickHouse/pull/51231) ([LiuNeng](https://github.com/liuneng1994)). -* Fix use-after-free in StorageURL when switching URLs [#51260](https://github.com/ClickHouse/ClickHouse/pull/51260) ([Michael Kolupaev](https://github.com/al13n321)). -* Updated check for parameterized view [#51272](https://github.com/ClickHouse/ClickHouse/pull/51272) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix multiple writing of same file to backup [#51299](https://github.com/ClickHouse/ClickHouse/pull/51299) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove garbage from function `transform` [#51350](https://github.com/ClickHouse/ClickHouse/pull/51350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - - -### ClickHouse release 23.5, 2023-06-08 - -#### Upgrade Notes -* Compress marks and primary key by default. It significantly reduces the cold query time. Upgrade notes: the support for compressed marks and primary key has been added in version 22.9. If you turned on compressed marks or primary key or installed version 23.5 or newer, which has compressed marks or primary key on by default, you will not be able to downgrade to version 22.8 or earlier. You can also explicitly disable compressed marks or primary keys by specifying the `compress_marks` and `compress_primary_key` settings in the `` section of the server configuration file. **Upgrade notes:** If you upgrade from versions prior to 22.9, you should either upgrade all replicas at once or disable the compression before upgrade, or upgrade through an intermediate version, where the compressed marks are supported but not enabled by default, such as 23.3. [#42587](https://github.com/ClickHouse/ClickHouse/pull/42587) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Make local object storage work consistently with s3 object storage, fix problem with append (closes [#48465](https://github.com/ClickHouse/ClickHouse/issues/48465)), make it configurable as independent storage. The change is backward incompatible because the cache on top of local object storage is not compatible to previous versions. [#48791](https://github.com/ClickHouse/ClickHouse/pull/48791) ([Kseniia Sumarokova](https://github.com/kssenii)). -* The experimental feature "in-memory data parts" is removed. The data format is still supported, but the settings are no-op, and compact or wide parts will be used instead. This closes [#45409](https://github.com/ClickHouse/ClickHouse/issues/45409). [#49429](https://github.com/ClickHouse/ClickHouse/pull/49429) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Changed default values of settings `parallelize_output_from_storages` and `input_format_parquet_preserve_order`. This allows ClickHouse to reorder rows when reading from files (e.g. CSV or Parquet), greatly improving performance in many cases. To restore the old behavior of preserving order, use `parallelize_output_from_storages = 0`, `input_format_parquet_preserve_order = 1`. [#49479](https://github.com/ClickHouse/ClickHouse/pull/49479) ([Michael Kolupaev](https://github.com/al13n321)). -* Make projections production-ready. Add the `optimize_use_projections` setting to control whether the projections will be selected for SELECT queries. The setting `allow_experimental_projection_optimization` is obsolete and does nothing. [#49719](https://github.com/ClickHouse/ClickHouse/pull/49719) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Mark `joinGet` as non-deterministic (so as `dictGet`). It allows using them in mutations without an extra setting. [#49843](https://github.com/ClickHouse/ClickHouse/pull/49843) ([Azat Khuzhin](https://github.com/azat)). -* Revert the "`groupArray` returns cannot be nullable" change (due to binary compatibility breakage for `groupArray`/`groupArrayLast`/`groupArraySample` over `Nullable` types, which likely will lead to `TOO_LARGE_ARRAY_SIZE` or `CANNOT_READ_ALL_DATA`). [#49971](https://github.com/ClickHouse/ClickHouse/pull/49971) ([Azat Khuzhin](https://github.com/azat)). -* Setting `enable_memory_bound_merging_of_aggregation_results` is enabled by default. If you update from version prior to 22.12, we recommend to set this flag to `false` until update is finished. [#50319](https://github.com/ClickHouse/ClickHouse/pull/50319) ([Nikita Taranov](https://github.com/nickitat)). - -#### New Feature -* Added storage engine AzureBlobStorage and azureBlobStorage table function. The supported set of features is very similar to storage/table function S3 [#50604] (https://github.com/ClickHouse/ClickHouse/pull/50604) ([alesapin](https://github.com/alesapin)) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni). -* Added native ClickHouse Keeper CLI Client, it is available as `clickhouse keeper-client` [#47414](https://github.com/ClickHouse/ClickHouse/pull/47414) ([pufit](https://github.com/pufit)). -* Add `urlCluster` table function. Refactor all *Cluster table functions to reduce code duplication. Make schema inference work for all possible *Cluster function signatures and for named collections. Closes [#38499](https://github.com/ClickHouse/ClickHouse/issues/38499). [#45427](https://github.com/ClickHouse/ClickHouse/pull/45427) ([attack204](https://github.com/attack204)), Pavel Kruglov. -* The query cache can now be used for production workloads. [#47977](https://github.com/ClickHouse/ClickHouse/pull/47977) ([Robert Schulze](https://github.com/rschu1ze)). The query cache can now support queries with totals and extremes modifier. [#48853](https://github.com/ClickHouse/ClickHouse/pull/48853) ([Robert Schulze](https://github.com/rschu1ze)). Make `allow_experimental_query_cache` setting as obsolete for backward-compatibility. It was removed in https://github.com/ClickHouse/ClickHouse/pull/47977. [#49934](https://github.com/ClickHouse/ClickHouse/pull/49934) ([Timur Solodovnikov](https://github.com/tsolodov)). -* Geographical data types (`Point`, `Ring`, `Polygon`, and `MultiPolygon`) are production-ready. [#50022](https://github.com/ClickHouse/ClickHouse/pull/50022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add schema inference to PostgreSQL, MySQL, MeiliSearch, and SQLite table engines. Closes [#49972](https://github.com/ClickHouse/ClickHouse/issues/49972). [#50000](https://github.com/ClickHouse/ClickHouse/pull/50000) ([Nikolay Degterinsky](https://github.com/evillique)). -* Password type in queries like `CREATE USER u IDENTIFIED BY 'p'` will be automatically set according to the setting `default_password_type` in the `config.xml` on the server. Closes [#42915](https://github.com/ClickHouse/ClickHouse/issues/42915). [#44674](https://github.com/ClickHouse/ClickHouse/pull/44674) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add bcrypt password authentication type. Closes [#34599](https://github.com/ClickHouse/ClickHouse/issues/34599). [#44905](https://github.com/ClickHouse/ClickHouse/pull/44905) ([Nikolay Degterinsky](https://github.com/evillique)). -* Introduces new keyword `INTO OUTFILE 'file.txt' APPEND`. [#48880](https://github.com/ClickHouse/ClickHouse/pull/48880) ([alekar](https://github.com/alekar)). -* Added `system.zookeeper_connection` table that shows information about Keeper connections. [#45245](https://github.com/ClickHouse/ClickHouse/pull/45245) ([mateng915](https://github.com/mateng0915)). -* Add new function `generateRandomStructure` that generates random table structure. It can be used in combination with table function `generateRandom`. [#47409](https://github.com/ClickHouse/ClickHouse/pull/47409) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow the use of `CASE` without an `ELSE` branch and extended `transform` to deal with more types. Also fix some issues that made transform() return incorrect results when decimal types were mixed with other numeric types. [#48300](https://github.com/ClickHouse/ClickHouse/pull/48300) ([Salvatore Mesoraca](https://github.com/aiven-sal)). This closes #2655. This closes #9596. This closes #38666. -* Added [server-side encryption using KMS keys](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) with S3 tables, and the `header` setting with S3 disks. Closes [#48723](https://github.com/ClickHouse/ClickHouse/issues/48723). [#48724](https://github.com/ClickHouse/ClickHouse/pull/48724) ([Johann Gan](https://github.com/johanngan)). -* Add MemoryTracker for the background tasks (merges and mutation). Introduces `merges_mutations_memory_usage_soft_limit` and `merges_mutations_memory_usage_to_ram_ratio` settings that represent the soft memory limit for merges and mutations. If this limit is reached ClickHouse won't schedule new merge or mutation tasks. Also `MergesMutationsMemoryTracking` metric is introduced to allow observing current memory usage of background tasks. Resubmit [#46089](https://github.com/ClickHouse/ClickHouse/issues/46089). Closes [#48774](https://github.com/ClickHouse/ClickHouse/issues/48774). [#48787](https://github.com/ClickHouse/ClickHouse/pull/48787) ([Dmitry Novik](https://github.com/novikd)). -* Function `dotProduct` work for array. [#49050](https://github.com/ClickHouse/ClickHouse/pull/49050) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Support statement `SHOW INDEX` to improve compatibility with MySQL. [#49158](https://github.com/ClickHouse/ClickHouse/pull/49158) ([Robert Schulze](https://github.com/rschu1ze)). -* Add virtual column `_file` and `_path` support to table function `url`. - Improve error message for table function `url`. - resolves [#49231](https://github.com/ClickHouse/ClickHouse/issues/49231) - resolves [#49232](https://github.com/ClickHouse/ClickHouse/issues/49232). [#49356](https://github.com/ClickHouse/ClickHouse/pull/49356) ([Ziyi Tan](https://github.com/Ziy1-Tan)). -* Adding the `grants` field in the users.xml file, which allows specifying grants for users. [#49381](https://github.com/ClickHouse/ClickHouse/pull/49381) ([pufit](https://github.com/pufit)). -* Support full/right join by using grace hash join algorithm. [#49483](https://github.com/ClickHouse/ClickHouse/pull/49483) ([lgbo](https://github.com/lgbo-ustc)). -* `WITH FILL` modifier groups filling by sorting prefix. Controlled by `use_with_fill_by_sorting_prefix` setting (enabled by default). Related to [#33203](https://github.com/ClickHouse/ClickHouse/issues/33203)#issuecomment-1418736794. [#49503](https://github.com/ClickHouse/ClickHouse/pull/49503) ([Igor Nikonov](https://github.com/devcrafter)). -* Clickhouse-client now accepts queries after "--multiquery" when "--query" (or "-q") is absent. example: clickhouse-client --multiquery "select 1; select 2;". [#49870](https://github.com/ClickHouse/ClickHouse/pull/49870) ([Alexey Gerasimchuk](https://github.com/Demilivor)). -* Add separate `handshake_timeout` for receiving Hello packet from replica. Closes [#48854](https://github.com/ClickHouse/ClickHouse/issues/48854). [#49948](https://github.com/ClickHouse/ClickHouse/pull/49948) ([Kruglov Pavel](https://github.com/Avogar)). -* Added a function "space" which repeats a space as many times as specified. [#50103](https://github.com/ClickHouse/ClickHouse/pull/50103) ([Robert Schulze](https://github.com/rschu1ze)). -* Added --input_format_csv_trim_whitespaces option. [#50215](https://github.com/ClickHouse/ClickHouse/pull/50215) ([Alexey Gerasimchuk](https://github.com/Demilivor)). -* Allow the `dictGetAll` function for regexp tree dictionaries to return values from multiple matches as arrays. Closes [#50254](https://github.com/ClickHouse/ClickHouse/issues/50254). [#50255](https://github.com/ClickHouse/ClickHouse/pull/50255) ([Johann Gan](https://github.com/johanngan)). -* Added `toLastDayOfWeek` function to round a date or a date with time up to the nearest Saturday or Sunday. [#50315](https://github.com/ClickHouse/ClickHouse/pull/50315) ([Victor Krasnov](https://github.com/sirvickr)). -* Ability to ignore a skip index by specifying `ignore_data_skipping_indices`. [#50329](https://github.com/ClickHouse/ClickHouse/pull/50329) ([Boris Kuschel](https://github.com/bkuschel)). -* Add `system.user_processes` table and `SHOW USER PROCESSES` query to show memory info and ProfileEvents on user level. [#50492](https://github.com/ClickHouse/ClickHouse/pull/50492) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Add server and format settings `display_secrets_in_show_and_select` for displaying secrets of tables, databases, table functions, and dictionaries. Add privilege `displaySecretsInShowAndSelect` controlling which users can view secrets. [#46528](https://github.com/ClickHouse/ClickHouse/pull/46528) ([Mike Kot](https://github.com/myrrc)). -* Allow to set up a ROW POLICY for all tables that belong to a DATABASE. [#47640](https://github.com/ClickHouse/ClickHouse/pull/47640) ([Ilya Golshtein](https://github.com/ilejn)). - -#### Performance Improvement -* Compress marks and primary key by default. It significantly reduces the cold query time. Upgrade notes: the support for compressed marks and primary key has been added in version 22.9. If you turned on compressed marks or primary key or installed version 23.5 or newer, which has compressed marks or primary key on by default, you will not be able to downgrade to version 22.8 or earlier. You can also explicitly disable compressed marks or primary keys by specifying the `compress_marks` and `compress_primary_key` settings in the `` section of the server configuration file. [#42587](https://github.com/ClickHouse/ClickHouse/pull/42587) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* New setting s3_max_inflight_parts_for_one_file sets the limit of concurrently loaded parts with multipart upload request in scope of one file. [#49961](https://github.com/ClickHouse/ClickHouse/pull/49961) ([Sema Checherinda](https://github.com/CheSema)). -* When reading from multiple files reduce parallel parsing threads for each file. Resolves [#42192](https://github.com/ClickHouse/ClickHouse/issues/42192). [#46661](https://github.com/ClickHouse/ClickHouse/pull/46661) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Use aggregate projection only if it reads fewer granules than normal reading. It should help in case if query hits the PK of the table, but not the projection. Fixes [#49150](https://github.com/ClickHouse/ClickHouse/issues/49150). [#49417](https://github.com/ClickHouse/ClickHouse/pull/49417) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Do not store blocks in `ANY` hash join if nothing is inserted. [#48633](https://github.com/ClickHouse/ClickHouse/pull/48633) ([vdimir](https://github.com/vdimir)). -* Fixes aggregate combinator `-If` when JIT compiled, and enable JIT compilation for aggregate functions. Closes [#48120](https://github.com/ClickHouse/ClickHouse/issues/48120). [#49083](https://github.com/ClickHouse/ClickHouse/pull/49083) ([Igor Nikonov](https://github.com/devcrafter)). -* For reading from remote tables we use smaller tasks (instead of reading the whole part) to make tasks stealing work * task size is determined by size of columns to read * always use 1mb buffers for reading from s3 * boundaries of cache segments aligned to 1mb so they have decent size even with small tasks. it also should prevent fragmentation. [#49287](https://github.com/ClickHouse/ClickHouse/pull/49287) ([Nikita Taranov](https://github.com/nickitat)). -* Introduced settings: - `merge_max_block_size_bytes` to limit the amount of memory used for background operations. - `vertical_merge_algorithm_min_bytes_to_activate` to add another condition to activate vertical merges. [#49313](https://github.com/ClickHouse/ClickHouse/pull/49313) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Default size of a read buffer for reading from local filesystem changed to a slightly better value. Also two new settings are introduced: `max_read_buffer_size_local_fs` and `max_read_buffer_size_remote_fs`. [#49321](https://github.com/ClickHouse/ClickHouse/pull/49321) ([Nikita Taranov](https://github.com/nickitat)). -* Improve memory usage and speed of `SPARSE_HASHED`/`HASHED` dictionaries (e.g. `SPARSE_HASHED` now eats 2.6x less memory, and is ~2x faster). [#49380](https://github.com/ClickHouse/ClickHouse/pull/49380) ([Azat Khuzhin](https://github.com/azat)). -* Optimize the `system.query_log` and `system.query_thread_log` tables by applying `LowCardinality` when appropriate. The queries over these tables will be faster. [#49530](https://github.com/ClickHouse/ClickHouse/pull/49530) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Better performance when reading local `Parquet` files (through parallel reading). [#49539](https://github.com/ClickHouse/ClickHouse/pull/49539) ([Michael Kolupaev](https://github.com/al13n321)). -* Improve the performance of `RIGHT/FULL JOIN` by up to 2 times in certain scenarios, especially when joining a small left table with a large right table. [#49585](https://github.com/ClickHouse/ClickHouse/pull/49585) ([lgbo](https://github.com/lgbo-ustc)). -* Improve performance of BLAKE3 by 11% by enabling LTO for Rust. [#49600](https://github.com/ClickHouse/ClickHouse/pull/49600) ([Azat Khuzhin](https://github.com/azat)). Now it is on par with C++. -* Optimize the structure of the `system.opentelemetry_span_log`. Use `LowCardinality` where appropriate. Although this table is generally stupid (it is using the Map data type even for common attributes), it will be slightly better. [#49647](https://github.com/ClickHouse/ClickHouse/pull/49647) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Try to reserve hash table's size in `grace_hash` join. [#49816](https://github.com/ClickHouse/ClickHouse/pull/49816) ([lgbo](https://github.com/lgbo-ustc)). -* Parallel merge of `uniqExactIf` states. Closes [#49885](https://github.com/ClickHouse/ClickHouse/issues/49885). [#50285](https://github.com/ClickHouse/ClickHouse/pull/50285) ([flynn](https://github.com/ucasfl)). -* Keeper improvement: add `CheckNotExists` request to Keeper, which allows to improve the performance of Replicated tables. [#48897](https://github.com/ClickHouse/ClickHouse/pull/48897) ([Antonio Andelic](https://github.com/antonio2368)). -* Keeper performance improvements: avoid serializing same request twice while processing. Cache deserialization results of large requests. Controlled by new coordination setting `min_request_size_for_cache`. [#49004](https://github.com/ClickHouse/ClickHouse/pull/49004) ([Antonio Andelic](https://github.com/antonio2368)). -* Reduced number of `List` ZooKeeper requests when selecting parts to merge and a lot of partitions do not have anything to merge. [#49637](https://github.com/ClickHouse/ClickHouse/pull/49637) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Rework locking in the FS cache [#44985](https://github.com/ClickHouse/ClickHouse/pull/44985) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Disable pure parallel replicas if trivial count optimization is possible. [#50594](https://github.com/ClickHouse/ClickHouse/pull/50594) ([Raúl Marín](https://github.com/Algunenano)). -* Don't send head request for all keys in Iceberg schema inference, only for keys that are used for reaing data. [#50203](https://github.com/ClickHouse/ClickHouse/pull/50203) ([Kruglov Pavel](https://github.com/Avogar)). -* Setting `enable_memory_bound_merging_of_aggregation_results` is enabled by default. [#50319](https://github.com/ClickHouse/ClickHouse/pull/50319) ([Nikita Taranov](https://github.com/nickitat)). - -#### Experimental Feature -* `DEFLATE_QPL` codec lower the minimum simd version to SSE 4.2. [doc change in qpl](https://github.com/intel/qpl/commit/3f8f5cea27739f5261e8fd577dc233ffe88bf679) - Intel® QPL relies on a run-time kernels dispatcher and cpuid check to choose the best available implementation(sse/avx2/avx512) - restructured cmakefile for qpl build in clickhouse to align with latest upstream qpl. [#49811](https://github.com/ClickHouse/ClickHouse/pull/49811) ([jasperzhu](https://github.com/jinjunzh)). -* Add initial support to do JOINs with pure parallel replicas. [#49544](https://github.com/ClickHouse/ClickHouse/pull/49544) ([Raúl Marín](https://github.com/Algunenano)). -* More parallelism on `Outdated` parts removal with "zero-copy replication". [#49630](https://github.com/ClickHouse/ClickHouse/pull/49630) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Parallel Replicas: 1) Fixed an error `NOT_FOUND_COLUMN_IN_BLOCK` in case of using parallel replicas with non-replicated storage with disabled setting `parallel_replicas_for_non_replicated_merge_tree` 2) Now `allow_experimental_parallel_reading_from_replicas` have 3 possible values - 0, 1 and 2. 0 - disabled, 1 - enabled, silently disable them in case of failure (in case of FINAL or JOIN), 2 - enabled, throw an exception in case of failure. 3) If FINAL modifier is used in SELECT query and parallel replicas are enabled, ClickHouse will try to disable them if `allow_experimental_parallel_reading_from_replicas` is set to 1 and throw an exception otherwise. [#50195](https://github.com/ClickHouse/ClickHouse/pull/50195) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* When parallel replicas are enabled they will always skip unavailable servers (the behavior is controlled by the setting `skip_unavailable_shards`, enabled by default and can be only disabled). This closes: [#48565](https://github.com/ClickHouse/ClickHouse/issues/48565). [#50293](https://github.com/ClickHouse/ClickHouse/pull/50293) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). - -#### Improvement -* The `BACKUP` command will not decrypt data from encrypted disks while making a backup. Instead the data will be stored in a backup in encrypted form. Such backups can be restored only to an encrypted disk with the same (or extended) list of encryption keys. [#48896](https://github.com/ClickHouse/ClickHouse/pull/48896) ([Vitaly Baranov](https://github.com/vitlibar)). -* Added possibility to use temporary tables in FROM part of ATTACH PARTITION FROM and REPLACE PARTITION FROM. [#49436](https://github.com/ClickHouse/ClickHouse/pull/49436) ([Roman Vasin](https://github.com/rvasin)). -* Added setting `async_insert` for `MergeTree` tables. It has the same meaning as query-level setting `async_insert` and enables asynchronous inserts for specific table. Note: it doesn't take effect for insert queries from `clickhouse-client`, use query-level setting in that case. [#49122](https://github.com/ClickHouse/ClickHouse/pull/49122) ([Anton Popov](https://github.com/CurtizJ)). -* Add support for size suffixes in quota creation statement parameters. [#49087](https://github.com/ClickHouse/ClickHouse/pull/49087) ([Eridanus](https://github.com/Eridanus117)). -* Extend `first_value` and `last_value` to accept NULL. [#46467](https://github.com/ClickHouse/ClickHouse/pull/46467) ([lgbo](https://github.com/lgbo-ustc)). -* Add alias `str_to_map` and `mapFromString` for `extractKeyValuePairs`. closes https://github.com/clickhouse/clickhouse/issues/47185. [#49466](https://github.com/ClickHouse/ClickHouse/pull/49466) ([flynn](https://github.com/ucasfl)). -* Add support for CGroup version 2 for asynchronous metrics about the memory usage and availability. This closes [#37983](https://github.com/ClickHouse/ClickHouse/issues/37983). [#45999](https://github.com/ClickHouse/ClickHouse/pull/45999) ([sichenzhao](https://github.com/sichenzhao)). -* Cluster table functions should always skip unavailable shards. close [#46314](https://github.com/ClickHouse/ClickHouse/issues/46314). [#46765](https://github.com/ClickHouse/ClickHouse/pull/46765) ([zk_kiger](https://github.com/zk-kiger)). -* Allow CSV file to contain empty columns in its header. [#47496](https://github.com/ClickHouse/ClickHouse/pull/47496) ([你不要过来啊](https://github.com/iiiuwioajdks)). -* Add Google Cloud Storage S3 compatible table function `gcs`. Like the `oss` and `cosn` functions, it is just an alias over the `s3` table function, and it does not bring any new features. [#47815](https://github.com/ClickHouse/ClickHouse/pull/47815) ([Kuba Kaflik](https://github.com/jkaflik)). -* Add ability to use strict parts size for S3 (compatibility with CloudFlare R2 S3 Storage). [#48492](https://github.com/ClickHouse/ClickHouse/pull/48492) ([Azat Khuzhin](https://github.com/azat)). -* Added new columns with info about `Replicated` database replicas to `system.clusters`: `database_shard_name`, `database_replica_name`, `is_active`. Added an optional `FROM SHARD` clause to `SYSTEM DROP DATABASE REPLICA` query. [#48548](https://github.com/ClickHouse/ClickHouse/pull/48548) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Add a new column `zookeeper_name` in system.replicas, to indicate on which (auxiliary) zookeeper cluster the replicated table's metadata is stored. [#48549](https://github.com/ClickHouse/ClickHouse/pull/48549) ([cangyin](https://github.com/cangyin)). -* `IN` operator support the comparison of `Date` and `Date32`. Closes [#48736](https://github.com/ClickHouse/ClickHouse/issues/48736). [#48806](https://github.com/ClickHouse/ClickHouse/pull/48806) ([flynn](https://github.com/ucasfl)). -* Support for erasure codes in `HDFS`, author: @M1eyu2018, @tomscut. [#48833](https://github.com/ClickHouse/ClickHouse/pull/48833) ([M1eyu](https://github.com/M1eyu2018)). -* Implement SYSTEM DROP REPLICA from auxiliary ZooKeeper clusters, may be close [#48931](https://github.com/ClickHouse/ClickHouse/issues/48931). [#48932](https://github.com/ClickHouse/ClickHouse/pull/48932) ([wangxiaobo](https://github.com/wzb5212)). -* Add Array data type to MongoDB. Closes [#48598](https://github.com/ClickHouse/ClickHouse/issues/48598). [#48983](https://github.com/ClickHouse/ClickHouse/pull/48983) ([Nikolay Degterinsky](https://github.com/evillique)). -* Support storing `Interval` data types in tables. [#49085](https://github.com/ClickHouse/ClickHouse/pull/49085) ([larryluogit](https://github.com/larryluogit)). -* Allow using `ntile` window function without explicit window frame definition: `ntile(3) OVER (ORDER BY a)`, close [#46763](https://github.com/ClickHouse/ClickHouse/issues/46763). [#49093](https://github.com/ClickHouse/ClickHouse/pull/49093) ([vdimir](https://github.com/vdimir)). -* Added settings (`number_of_mutations_to_delay`, `number_of_mutations_to_throw`) to delay or throw `ALTER` queries that create mutations (`ALTER UPDATE`, `ALTER DELETE`, `ALTER MODIFY COLUMN`, ...) in case when table already has a lot of unfinished mutations. [#49117](https://github.com/ClickHouse/ClickHouse/pull/49117) ([Anton Popov](https://github.com/CurtizJ)). -* Catch exception from `create_directories` in filesystem cache. [#49203](https://github.com/ClickHouse/ClickHouse/pull/49203) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Copies embedded examples to a new field `example` in `system.functions` to supplement the field `description`. [#49222](https://github.com/ClickHouse/ClickHouse/pull/49222) ([Dan Roscigno](https://github.com/DanRoscigno)). -* Enable connection options for the MongoDB dictionary. Example: ``` xml localhost 27017 test dictionary_source ssl=true ``` ### Documentation entry for user-facing changes. [#49225](https://github.com/ClickHouse/ClickHouse/pull/49225) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Added an alias `asymptotic` for `asymp` computational method for `kolmogorovSmirnovTest`. Improved documentation. [#49286](https://github.com/ClickHouse/ClickHouse/pull/49286) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Aggregation function groupBitAnd/Or/Xor now work on signed integer data. This makes them consistent with the behavior of scalar functions bitAnd/Or/Xor. [#49292](https://github.com/ClickHouse/ClickHouse/pull/49292) ([exmy](https://github.com/exmy)). -* Split function-documentation into more fine-granular fields. [#49300](https://github.com/ClickHouse/ClickHouse/pull/49300) ([Robert Schulze](https://github.com/rschu1ze)). -* Use multiple threads shared between all tables within a server to load outdated data parts. The the size of the pool and its queue is controlled by `max_outdated_parts_loading_thread_pool_size` and `outdated_part_loading_thread_pool_queue_size` settings. [#49317](https://github.com/ClickHouse/ClickHouse/pull/49317) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Don't overestimate the size of processed data for `LowCardinality` columns when they share dictionaries between blocks. This closes [#49322](https://github.com/ClickHouse/ClickHouse/issues/49322). See also [#48745](https://github.com/ClickHouse/ClickHouse/issues/48745). [#49323](https://github.com/ClickHouse/ClickHouse/pull/49323) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Parquet writer now uses reasonable row group size when invoked through `OUTFILE`. [#49325](https://github.com/ClickHouse/ClickHouse/pull/49325) ([Michael Kolupaev](https://github.com/al13n321)). -* Allow restricted keywords like `ARRAY` as an alias if the alias is quoted. Closes [#49324](https://github.com/ClickHouse/ClickHouse/issues/49324). [#49360](https://github.com/ClickHouse/ClickHouse/pull/49360) ([Nikolay Degterinsky](https://github.com/evillique)). -* Data parts loading and deletion jobs were moved to shared server-wide pools instead of per-table pools. Pools sizes are controlled via settings `max_active_parts_loading_thread_pool_size`, `max_outdated_parts_loading_thread_pool_size` and `max_parts_cleaning_thread_pool_size` in top-level config. Table-level settings `max_part_loading_threads` and `max_part_removal_threads` became obsolete. [#49474](https://github.com/ClickHouse/ClickHouse/pull/49474) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Allow `?password=pass` in URL of the Play UI. Password is replaced in browser history. [#49505](https://github.com/ClickHouse/ClickHouse/pull/49505) ([Mike Kot](https://github.com/myrrc)). -* Allow reading zero-size objects from remote filesystems. (because empty files are not backup'd, so we might end up with zero blobs in metadata file). Closes [#49480](https://github.com/ClickHouse/ClickHouse/issues/49480). [#49519](https://github.com/ClickHouse/ClickHouse/pull/49519) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Attach thread MemoryTracker to `total_memory_tracker` after `ThreadGroup` detached. [#49527](https://github.com/ClickHouse/ClickHouse/pull/49527) ([Dmitry Novik](https://github.com/novikd)). -* Fix parameterized views when a query parameter is used multiple times in the query. [#49556](https://github.com/ClickHouse/ClickHouse/pull/49556) ([Azat Khuzhin](https://github.com/azat)). -* Release memory allocated for the last sent ProfileEvents snapshot in the context of a query. Followup [#47564](https://github.com/ClickHouse/ClickHouse/issues/47564). [#49561](https://github.com/ClickHouse/ClickHouse/pull/49561) ([Dmitry Novik](https://github.com/novikd)). -* Function "makeDate" now provides a MySQL-compatible overload (year & day of the year argument). [#49603](https://github.com/ClickHouse/ClickHouse/pull/49603) ([Robert Schulze](https://github.com/rschu1ze)). -* Support `dictionary` table function for `RegExpTreeDictionary`. [#49666](https://github.com/ClickHouse/ClickHouse/pull/49666) ([Han Fei](https://github.com/hanfei1991)). -* Added weighted fair IO scheduling policy. Added dynamic resource manager, which allows IO scheduling hierarchy to be updated in runtime w/o server restarts. [#49671](https://github.com/ClickHouse/ClickHouse/pull/49671) ([Sergei Trifonov](https://github.com/serxa)). -* Add compose request after multipart upload to GCS. This enables the usage of copy operation on objects uploaded with the multipart upload. It's recommended to set `s3_strict_upload_part_size` to some value because compose request can fail on objects created with parts of different sizes. [#49693](https://github.com/ClickHouse/ClickHouse/pull/49693) ([Antonio Andelic](https://github.com/antonio2368)). -* For the `extractKeyValuePairs` function: improve the "best-effort" parsing logic to accept `key_value_delimiter` as a valid part of the value. This also simplifies branching and might even speed up things a bit. [#49760](https://github.com/ClickHouse/ClickHouse/pull/49760) ([Arthur Passos](https://github.com/arthurpassos)). -* Add `initial_query_id` field for system.processors_profile_log [#49777](https://github.com/ClickHouse/ClickHouse/pull/49777) ([helifu](https://github.com/helifu)). -* System log tables can now have custom sorting keys. [#49778](https://github.com/ClickHouse/ClickHouse/pull/49778) ([helifu](https://github.com/helifu)). -* A new field `partitions` to `system.query_log` is used to indicate which partitions are participating in the calculation. [#49779](https://github.com/ClickHouse/ClickHouse/pull/49779) ([helifu](https://github.com/helifu)). -* Added `enable_the_endpoint_id_with_zookeeper_name_prefix` setting for `ReplicatedMergeTree` (disabled by default). When enabled, it adds ZooKeeper cluster name to table's interserver communication endpoint. It avoids `Duplicate interserver IO endpoint` errors when having replicated tables with the same path, but different auxiliary ZooKeepers. [#49780](https://github.com/ClickHouse/ClickHouse/pull/49780) ([helifu](https://github.com/helifu)). -* Add query parameters to `clickhouse-local`. Closes [#46561](https://github.com/ClickHouse/ClickHouse/issues/46561). [#49785](https://github.com/ClickHouse/ClickHouse/pull/49785) ([Nikolay Degterinsky](https://github.com/evillique)). -* Allow loading dictionaries and functions from YAML by default. In previous versions, it required editing the `dictionaries_config` or `user_defined_executable_functions_config` in the configuration file, as they expected `*.xml` files. [#49812](https://github.com/ClickHouse/ClickHouse/pull/49812) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The Kafka table engine now allows to use alias columns. [#49824](https://github.com/ClickHouse/ClickHouse/pull/49824) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* Add setting to limit the max number of pairs produced by `extractKeyValuePairs`, a safeguard to avoid using way too much memory. [#49836](https://github.com/ClickHouse/ClickHouse/pull/49836) ([Arthur Passos](https://github.com/arthurpassos)). -* Add support for (an unusual) case where the arguments in the `IN` operator are single-element tuples. [#49844](https://github.com/ClickHouse/ClickHouse/pull/49844) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* `bitHammingDistance` function support `String` and `FixedString` data type. Closes [#48827](https://github.com/ClickHouse/ClickHouse/issues/48827). [#49858](https://github.com/ClickHouse/ClickHouse/pull/49858) ([flynn](https://github.com/ucasfl)). -* Fix timeout resetting errors in the client on OS X. [#49863](https://github.com/ClickHouse/ClickHouse/pull/49863) ([alekar](https://github.com/alekar)). -* Add support for big integers, such as UInt128, Int128, UInt256, and Int256 in the function `bitCount`. This enables Hamming distance over large bit masks for AI applications. [#49867](https://github.com/ClickHouse/ClickHouse/pull/49867) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fingerprints to be used instead of key IDs in encrypted disks. This simplifies the configuration of encrypted disks. [#49882](https://github.com/ClickHouse/ClickHouse/pull/49882) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add UUID data type to PostgreSQL. Closes [#49739](https://github.com/ClickHouse/ClickHouse/issues/49739). [#49894](https://github.com/ClickHouse/ClickHouse/pull/49894) ([Nikolay Degterinsky](https://github.com/evillique)). -* Function `toUnixTimestamp` now accepts `Date` and `Date32` arguments. [#49989](https://github.com/ClickHouse/ClickHouse/pull/49989) ([Victor Krasnov](https://github.com/sirvickr)). -* Charge only server memory for dictionaries. [#49995](https://github.com/ClickHouse/ClickHouse/pull/49995) ([Azat Khuzhin](https://github.com/azat)). -* The server will allow using the `SQL_*` settings such as `SQL_AUTO_IS_NULL` as no-ops for MySQL compatibility. This closes [#49927](https://github.com/ClickHouse/ClickHouse/issues/49927). [#50013](https://github.com/ClickHouse/ClickHouse/pull/50013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Preserve initial_query_id for ON CLUSTER queries, which is useful for introspection (under `distributed_ddl_entry_format_version=5`). [#50015](https://github.com/ClickHouse/ClickHouse/pull/50015) ([Azat Khuzhin](https://github.com/azat)). -* Preserve backward incompatibility for renamed settings by using aliases (`allow_experimental_projection_optimization` for `optimize_use_projections`, `allow_experimental_lightweight_delete` for `enable_lightweight_delete`). [#50044](https://github.com/ClickHouse/ClickHouse/pull/50044) ([Azat Khuzhin](https://github.com/azat)). -* Support passing FQDN through setting my_hostname to register cluster node in keeper. Add setting of invisible to support multi compute groups. A compute group as a cluster, is invisible to other compute groups. [#50186](https://github.com/ClickHouse/ClickHouse/pull/50186) ([Yangkuan Liu](https://github.com/LiuYangkuan)). -* Fix PostgreSQL reading all the data even though `LIMIT n` could be specified. [#50187](https://github.com/ClickHouse/ClickHouse/pull/50187) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add new profile events for queries with subqueries (`QueriesWithSubqueries`/`SelectQueriesWithSubqueries`/`InsertQueriesWithSubqueries`). [#50204](https://github.com/ClickHouse/ClickHouse/pull/50204) ([Azat Khuzhin](https://github.com/azat)). -* Adding the roles field in the users.xml file, which allows specifying roles with grants via a config file. [#50278](https://github.com/ClickHouse/ClickHouse/pull/50278) ([pufit](https://github.com/pufit)). -* Report `CGroupCpuCfsPeriod` and `CGroupCpuCfsQuota` in AsynchronousMetrics. - Respect cgroup v2 memory limits during server startup. [#50379](https://github.com/ClickHouse/ClickHouse/pull/50379) ([alekar](https://github.com/alekar)). -* Add a signal handler for SIGQUIT to work the same way as SIGINT. Closes [#50298](https://github.com/ClickHouse/ClickHouse/issues/50298). [#50435](https://github.com/ClickHouse/ClickHouse/pull/50435) ([Nikolay Degterinsky](https://github.com/evillique)). -* In case JSON parse fails due to the large size of the object output the last position to allow debugging. [#50474](https://github.com/ClickHouse/ClickHouse/pull/50474) ([Valentin Alexeev](https://github.com/valentinalexeev)). -* Support decimals with not fixed size. Closes [#49130](https://github.com/ClickHouse/ClickHouse/issues/49130). [#50586](https://github.com/ClickHouse/ClickHouse/pull/50586) ([Kruglov Pavel](https://github.com/Avogar)). - -#### Build/Testing/Packaging Improvement -* New and improved `keeper-bench`. Everything can be customized from YAML/XML file: - request generator - each type of request generator can have a specific set of fields - multi requests can be generated just by doing the same under `multi` key - for each request or subrequest in multi a `weight` field can be defined to control distribution - define trees that need to be setup for a test run - hosts can be defined with all timeouts customizable and it's possible to control how many sessions to generate for each host - integers defined with `min_value` and `max_value` fields are random number generators. [#48547](https://github.com/ClickHouse/ClickHouse/pull/48547) ([Antonio Andelic](https://github.com/antonio2368)). -* Io_uring is not supported on macos, don't choose it when running tests on local to avoid occasional failures. [#49250](https://github.com/ClickHouse/ClickHouse/pull/49250) ([Frank Chen](https://github.com/FrankChen021)). -* Support named fault injection for testing. [#49361](https://github.com/ClickHouse/ClickHouse/pull/49361) ([Han Fei](https://github.com/hanfei1991)). -* Allow running ClickHouse in the OS where the `prctl` (process control) syscall is not available, such as AWS Lambda. [#49538](https://github.com/ClickHouse/ClickHouse/pull/49538) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fixed the issue of build conflict between contrib/isa-l and isa-l in qpl [49296](https://github.com/ClickHouse/ClickHouse/issues/49296). [#49584](https://github.com/ClickHouse/ClickHouse/pull/49584) ([jasperzhu](https://github.com/jinjunzh)). -* Utilities are now only build if explicitly requested ("-DENABLE_UTILS=1") instead of by default, this reduces link times in typical development builds. [#49620](https://github.com/ClickHouse/ClickHouse/pull/49620) ([Robert Schulze](https://github.com/rschu1ze)). -* Pull build description of idxd-config into a separate CMake file to avoid accidental removal in future. [#49651](https://github.com/ClickHouse/ClickHouse/pull/49651) ([jasperzhu](https://github.com/jinjunzh)). -* Add CI check with an enabled analyzer in the master. Follow-up [#49562](https://github.com/ClickHouse/ClickHouse/issues/49562). [#49668](https://github.com/ClickHouse/ClickHouse/pull/49668) ([Dmitry Novik](https://github.com/novikd)). -* Switch to LLVM/clang 16. [#49678](https://github.com/ClickHouse/ClickHouse/pull/49678) ([Azat Khuzhin](https://github.com/azat)). -* Allow building ClickHouse with clang-17. [#49851](https://github.com/ClickHouse/ClickHouse/pull/49851) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#50410](https://github.com/ClickHouse/ClickHouse/pull/50410) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* ClickHouse is now easier to be integrated into other cmake projects. [#49991](https://github.com/ClickHouse/ClickHouse/pull/49991) ([Amos Bird](https://github.com/amosbird)). (Which is strongly discouraged - Alexey Milovidov). -* Fix strange additional QEMU logging after [#47151](https://github.com/ClickHouse/ClickHouse/issues/47151), see https://s3.amazonaws.com/clickhouse-test-reports/50078/a4743996ee4f3583884d07bcd6501df0cfdaa346/stateless_tests__release__databasereplicated__[3_4].html. [#50442](https://github.com/ClickHouse/ClickHouse/pull/50442) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* ClickHouse can work on Linux RISC-V 6.1.22. This closes [#50456](https://github.com/ClickHouse/ClickHouse/issues/50456). [#50457](https://github.com/ClickHouse/ClickHouse/pull/50457) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Bump internal protobuf to v3.18 (fixes bogus CVE-2022-1941). [#50400](https://github.com/ClickHouse/ClickHouse/pull/50400) ([Robert Schulze](https://github.com/rschu1ze)). -* Bump internal libxml2 to v2.10.4 (fixes bogus CVE-2023-28484 and bogus CVE-2023-29469). [#50402](https://github.com/ClickHouse/ClickHouse/pull/50402) ([Robert Schulze](https://github.com/rschu1ze)). -* Bump c-ares to v1.19.1 (bogus CVE-2023-32067, bogus CVE-2023-31130, bogus CVE-2023-31147). [#50403](https://github.com/ClickHouse/ClickHouse/pull/50403) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix bogus CVE-2022-2469 in libgsasl. [#50404](https://github.com/ClickHouse/ClickHouse/pull/50404) ([Robert Schulze](https://github.com/rschu1ze)). - -#### Bug Fix (user-visible misbehavior in an official stable release) - -* ActionsDAG: fix wrong optimization [#47584](https://github.com/ClickHouse/ClickHouse/pull/47584) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Correctly handle concurrent snapshots in Keeper [#48466](https://github.com/ClickHouse/ClickHouse/pull/48466) ([Antonio Andelic](https://github.com/antonio2368)). -* MergeTreeMarksLoader holds DataPart instead of DataPartStorage [#48515](https://github.com/ClickHouse/ClickHouse/pull/48515) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Sequence state fix [#48603](https://github.com/ClickHouse/ClickHouse/pull/48603) ([Ilya Golshtein](https://github.com/ilejn)). -* Back/Restore concurrency check on previous fails [#48726](https://github.com/ClickHouse/ClickHouse/pull/48726) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix Attaching a table with non-existent ZK path does not increase the ReadonlyReplica metric [#48954](https://github.com/ClickHouse/ClickHouse/pull/48954) ([wangxiaobo](https://github.com/wzb5212)). -* Fix possible terminate called for uncaught exception in some places [#49112](https://github.com/ClickHouse/ClickHouse/pull/49112) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix key not found error for queries with multiple StorageJoin [#49137](https://github.com/ClickHouse/ClickHouse/pull/49137) ([vdimir](https://github.com/vdimir)). -* Fix wrong query result when using nullable primary key [#49172](https://github.com/ClickHouse/ClickHouse/pull/49172) ([Duc Canh Le](https://github.com/canhld94)). -* Fix reinterpretAs*() on big endian machines [#49198](https://github.com/ClickHouse/ClickHouse/pull/49198) ([Suzy Wang](https://github.com/SuzyWangIBMer)). -* (Experimental zero-copy replication) Lock zero copy parts more atomically [#49211](https://github.com/ClickHouse/ClickHouse/pull/49211) ([alesapin](https://github.com/alesapin)). -* Fix race on Outdated parts loading [#49223](https://github.com/ClickHouse/ClickHouse/pull/49223) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix all key value is null and group use rollup return wrong answer [#49282](https://github.com/ClickHouse/ClickHouse/pull/49282) ([Shuai li](https://github.com/loneylee)). -* Fix calculating load_factor for HASHED dictionaries with SHARDS [#49319](https://github.com/ClickHouse/ClickHouse/pull/49319) ([Azat Khuzhin](https://github.com/azat)). -* Disallow configuring compression CODECs for alias columns [#49363](https://github.com/ClickHouse/ClickHouse/pull/49363) ([Timur Solodovnikov](https://github.com/tsolodov)). -* Fix bug in removal of existing part directory [#49365](https://github.com/ClickHouse/ClickHouse/pull/49365) ([alesapin](https://github.com/alesapin)). -* Properly fix GCS when HMAC is used [#49390](https://github.com/ClickHouse/ClickHouse/pull/49390) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix fuzz bug when subquery set is not built when reading from remote() [#49425](https://github.com/ClickHouse/ClickHouse/pull/49425) ([Alexander Gololobov](https://github.com/davenger)). -* Invert `shutdown_wait_unfinished_queries` [#49427](https://github.com/ClickHouse/ClickHouse/pull/49427) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* (Experimental zero-copy replication) Fix another zero copy bug [#49473](https://github.com/ClickHouse/ClickHouse/pull/49473) ([alesapin](https://github.com/alesapin)). -* Fix postgres database setting [#49481](https://github.com/ClickHouse/ClickHouse/pull/49481) ([Mal Curtis](https://github.com/snikch)). -* Correctly handle `s3Cluster` arguments [#49490](https://github.com/ClickHouse/ClickHouse/pull/49490) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix bug in TraceCollector destructor. [#49508](https://github.com/ClickHouse/ClickHouse/pull/49508) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix AsynchronousReadIndirectBufferFromRemoteFS breaking on short seeks [#49525](https://github.com/ClickHouse/ClickHouse/pull/49525) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix dictionaries loading order [#49560](https://github.com/ClickHouse/ClickHouse/pull/49560) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Forbid the change of data type of Object('json') column [#49563](https://github.com/ClickHouse/ClickHouse/pull/49563) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix stress test (Logical error: Expected 7134 >= 11030) [#49623](https://github.com/ClickHouse/ClickHouse/pull/49623) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix bug in DISTINCT [#49628](https://github.com/ClickHouse/ClickHouse/pull/49628) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix: DISTINCT in order with zero values in non-sorted columns [#49636](https://github.com/ClickHouse/ClickHouse/pull/49636) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix one-off error in big integers found by UBSan with fuzzer [#49645](https://github.com/ClickHouse/ClickHouse/pull/49645) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix reading from sparse columns after restart [#49660](https://github.com/ClickHouse/ClickHouse/pull/49660) ([Anton Popov](https://github.com/CurtizJ)). -* Fix assert in SpanHolder::finish() with fibers [#49673](https://github.com/ClickHouse/ClickHouse/pull/49673) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix short circuit functions and mutations with sparse arguments [#49716](https://github.com/ClickHouse/ClickHouse/pull/49716) ([Anton Popov](https://github.com/CurtizJ)). -* Fix writing appended files to incremental backups [#49725](https://github.com/ClickHouse/ClickHouse/pull/49725) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix "There is no physical column _row_exists in table" error occurring during lightweight delete mutation on a table with Object column. [#49737](https://github.com/ClickHouse/ClickHouse/pull/49737) ([Alexander Gololobov](https://github.com/davenger)). -* Fix msan issue in randomStringUTF8(uneven number) [#49750](https://github.com/ClickHouse/ClickHouse/pull/49750) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix aggregate function kolmogorovSmirnovTest [#49768](https://github.com/ClickHouse/ClickHouse/pull/49768) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Fix settings aliases in native protocol [#49776](https://github.com/ClickHouse/ClickHouse/pull/49776) ([Azat Khuzhin](https://github.com/azat)). -* Fix `arrayMap` with array of tuples with single argument [#49789](https://github.com/ClickHouse/ClickHouse/pull/49789) ([Anton Popov](https://github.com/CurtizJ)). -* Fix per-query IO/BACKUPs throttling settings [#49797](https://github.com/ClickHouse/ClickHouse/pull/49797) ([Azat Khuzhin](https://github.com/azat)). -* Fix setting NULL in profile definition [#49831](https://github.com/ClickHouse/ClickHouse/pull/49831) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix a bug with projections and the aggregate_functions_null_for_empty setting (for query_plan_optimize_projection) [#49873](https://github.com/ClickHouse/ClickHouse/pull/49873) ([Amos Bird](https://github.com/amosbird)). -* Fix processing pending batch for Distributed async INSERT after restart [#49884](https://github.com/ClickHouse/ClickHouse/pull/49884) ([Azat Khuzhin](https://github.com/azat)). -* Fix assertion in CacheMetadata::doCleanup [#49914](https://github.com/ClickHouse/ClickHouse/pull/49914) ([Kseniia Sumarokova](https://github.com/kssenii)). -* fix `is_prefix` in OptimizeRegularExpression [#49919](https://github.com/ClickHouse/ClickHouse/pull/49919) ([Han Fei](https://github.com/hanfei1991)). -* Fix metrics `WriteBufferFromS3Bytes`, `WriteBufferFromS3Microseconds` and `WriteBufferFromS3RequestsErrors` [#49930](https://github.com/ClickHouse/ClickHouse/pull/49930) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* Fix IPv6 encoding in protobuf [#49933](https://github.com/ClickHouse/ClickHouse/pull/49933) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix possible Logical error on bad Nullable parsing for text formats [#49960](https://github.com/ClickHouse/ClickHouse/pull/49960) ([Kruglov Pavel](https://github.com/Avogar)). -* Add setting output_format_parquet_compliant_nested_types to produce more compatible Parquet files [#50001](https://github.com/ClickHouse/ClickHouse/pull/50001) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix logical error in stress test "Not enough space to add ..." [#50021](https://github.com/ClickHouse/ClickHouse/pull/50021) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Avoid deadlock when starting table in attach thread of `ReplicatedMergeTree` [#50026](https://github.com/ClickHouse/ClickHouse/pull/50026) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix assert in SpanHolder::finish() with fibers attempt 2 [#50034](https://github.com/ClickHouse/ClickHouse/pull/50034) ([Kruglov Pavel](https://github.com/Avogar)). -* Add proper escaping for DDL OpenTelemetry context serialization [#50045](https://github.com/ClickHouse/ClickHouse/pull/50045) ([Azat Khuzhin](https://github.com/azat)). -* Fix reporting broken projection parts [#50052](https://github.com/ClickHouse/ClickHouse/pull/50052) ([Amos Bird](https://github.com/amosbird)). -* JIT compilation not equals NaN fix [#50056](https://github.com/ClickHouse/ClickHouse/pull/50056) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix crashing in case of Replicated database without arguments [#50058](https://github.com/ClickHouse/ClickHouse/pull/50058) ([Azat Khuzhin](https://github.com/azat)). -* Fix crash with `multiIf` and constant condition and nullable arguments [#50123](https://github.com/ClickHouse/ClickHouse/pull/50123) ([Anton Popov](https://github.com/CurtizJ)). -* Fix invalid index analysis for date related keys [#50153](https://github.com/ClickHouse/ClickHouse/pull/50153) ([Amos Bird](https://github.com/amosbird)). -* do not allow modify order by when there are no order by cols [#50154](https://github.com/ClickHouse/ClickHouse/pull/50154) ([Han Fei](https://github.com/hanfei1991)). -* Fix broken index analysis when binary operator contains a null constant argument [#50177](https://github.com/ClickHouse/ClickHouse/pull/50177) ([Amos Bird](https://github.com/amosbird)). -* clickhouse-client: disallow usage of `--query` and `--queries-file` at the same time [#50210](https://github.com/ClickHouse/ClickHouse/pull/50210) ([Alexey Gerasimchuk](https://github.com/Demilivor)). -* Fix UB for INTO OUTFILE extensions (APPEND / AND STDOUT) and WATCH EVENTS [#50216](https://github.com/ClickHouse/ClickHouse/pull/50216) ([Azat Khuzhin](https://github.com/azat)). -* Fix skipping spaces at end of row in CustomSeparatedIgnoreSpaces format [#50224](https://github.com/ClickHouse/ClickHouse/pull/50224) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix iceberg metadata parsing [#50232](https://github.com/ClickHouse/ClickHouse/pull/50232) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix nested distributed SELECT in WITH clause [#50234](https://github.com/ClickHouse/ClickHouse/pull/50234) ([Azat Khuzhin](https://github.com/azat)). -* Fix msan issue in keyed siphash [#50245](https://github.com/ClickHouse/ClickHouse/pull/50245) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix bugs in Poco sockets in non-blocking mode, use true non-blocking sockets [#50252](https://github.com/ClickHouse/ClickHouse/pull/50252) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix checksum calculation for backup entries [#50264](https://github.com/ClickHouse/ClickHouse/pull/50264) ([Vitaly Baranov](https://github.com/vitlibar)). -* Comparison functions NaN fix [#50287](https://github.com/ClickHouse/ClickHouse/pull/50287) ([Maksim Kita](https://github.com/kitaisreal)). -* JIT aggregation nullable key fix [#50291](https://github.com/ClickHouse/ClickHouse/pull/50291) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix clickhouse-local crashing when writing empty Arrow or Parquet output [#50328](https://github.com/ClickHouse/ClickHouse/pull/50328) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix crash when Pool::Entry::disconnect() is called [#50334](https://github.com/ClickHouse/ClickHouse/pull/50334) ([Val Doroshchuk](https://github.com/valbok)). -* Improved fetch part by holding directory lock longer [#50339](https://github.com/ClickHouse/ClickHouse/pull/50339) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix bitShift* functions with both constant arguments [#50343](https://github.com/ClickHouse/ClickHouse/pull/50343) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix Keeper deadlock on exception when preprocessing requests. [#50387](https://github.com/ClickHouse/ClickHouse/pull/50387) ([frinkr](https://github.com/frinkr)). -* Fix hashing of const integer values [#50421](https://github.com/ClickHouse/ClickHouse/pull/50421) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix merge_tree_min_rows_for_seek/merge_tree_min_bytes_for_seek for data skipping indexes [#50432](https://github.com/ClickHouse/ClickHouse/pull/50432) ([Azat Khuzhin](https://github.com/azat)). -* Limit the number of in-flight tasks for loading outdated parts [#50450](https://github.com/ClickHouse/ClickHouse/pull/50450) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Keeper fix: apply uncommitted state after snapshot install [#50483](https://github.com/ClickHouse/ClickHouse/pull/50483) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix incorrect constant folding [#50536](https://github.com/ClickHouse/ClickHouse/pull/50536) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix logical error in stress test (Not enough space to add ...) [#50583](https://github.com/ClickHouse/ClickHouse/pull/50583) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix converting Null to LowCardinality(Nullable) in values table function [#50637](https://github.com/ClickHouse/ClickHouse/pull/50637) ([Kruglov Pavel](https://github.com/Avogar)). -* Revert invalid RegExpTreeDictionary optimization [#50642](https://github.com/ClickHouse/ClickHouse/pull/50642) ([Johann Gan](https://github.com/johanngan)). - -### ClickHouse release 23.4, 2023-04-26 - -#### Backward Incompatible Change -* Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). -* This change makes sense only if you are using the virtual filesystem cache. If `path` in the virtual filesystem cache configuration is not empty and is not an absolute path, then it will be put in `/caches/`. [#48784](https://github.com/ClickHouse/ClickHouse/pull/48784) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Primary/secondary indices and sorting keys with identical expressions are now rejected. This behavior can be disabled using setting `allow_suspicious_indices`. [#48536](https://github.com/ClickHouse/ClickHouse/pull/48536) ([凌涛](https://github.com/lingtaolf)). - -#### New Feature -* Support new aggregate function `quantileGK`/`quantilesGK`, like [approx_percentile](https://spark.apache.org/docs/latest/api/sql/index.html#approx_percentile) in spark. Greenwald-Khanna algorithm refer to http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf. [#46428](https://github.com/ClickHouse/ClickHouse/pull/46428) ([李扬](https://github.com/taiyang-li)). -* Add a statement `SHOW COLUMNS` which shows distilled information from system.columns. [#48017](https://github.com/ClickHouse/ClickHouse/pull/48017) ([Robert Schulze](https://github.com/rschu1ze)). -* Added `LIGHTWEIGHT` and `PULL` modifiers for `SYSTEM SYNC REPLICA` query. `LIGHTWEIGHT` version waits for fetches and drop-ranges only (merges and mutations are ignored). `PULL` version pulls new entries from ZooKeeper and does not wait for them. Fixes [#47794](https://github.com/ClickHouse/ClickHouse/issues/47794). [#48085](https://github.com/ClickHouse/ClickHouse/pull/48085) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Add `kafkaMurmurHash` function for compatibility with Kafka DefaultPartitioner. Closes [#47834](https://github.com/ClickHouse/ClickHouse/issues/47834). [#48185](https://github.com/ClickHouse/ClickHouse/pull/48185) ([Nikolay Degterinsky](https://github.com/evillique)). -* Allow to easily create a user with the same grants as the current user by using `GRANT CURRENT GRANTS`. [#48262](https://github.com/ClickHouse/ClickHouse/pull/48262) ([pufit](https://github.com/pufit)). -* Add statistical aggregate function `kolmogorovSmirnovTest`. Close [#48228](https://github.com/ClickHouse/ClickHouse/issues/48228). [#48325](https://github.com/ClickHouse/ClickHouse/pull/48325) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Added a `lost_part_count` column to the `system.replicas` table. The column value shows the total number of lost parts in the corresponding table. Value is stored in zookeeper and can be used instead of not persistent `ReplicatedDataLoss` profile event for monitoring. [#48526](https://github.com/ClickHouse/ClickHouse/pull/48526) ([Sergei Trifonov](https://github.com/serxa)). -* Add `soundex` function for compatibility. Closes [#39880](https://github.com/ClickHouse/ClickHouse/issues/39880). [#48567](https://github.com/ClickHouse/ClickHouse/pull/48567) ([FriendLey](https://github.com/FriendLey)). -* Support `Map` type for JSONExtract. [#48629](https://github.com/ClickHouse/ClickHouse/pull/48629) ([李扬](https://github.com/taiyang-li)). -* Add `PrettyJSONEachRow` format to output pretty JSON with new line delimiters and 4 space indents. [#48898](https://github.com/ClickHouse/ClickHouse/pull/48898) ([Kruglov Pavel](https://github.com/Avogar)). -* Add `ParquetMetadata` input format to read Parquet file metadata. [#48911](https://github.com/ClickHouse/ClickHouse/pull/48911) ([Kruglov Pavel](https://github.com/Avogar)). -* Add `extractKeyValuePairs` function to extract key value pairs from strings. Input strings might contain noise (i.e. log files / do not need to be 100% formatted in key-value-pair format), the algorithm will look for key value pairs matching the arguments passed to the function. As of now, function accepts the following arguments: `data_column` (mandatory), `key_value_pair_delimiter` (defaults to `:`), `pair_delimiters` (defaults to `\space \, \;`) and `quoting_character` (defaults to double quotes). [#43606](https://github.com/ClickHouse/ClickHouse/pull/43606) ([Arthur Passos](https://github.com/arthurpassos)). -* Functions replaceOne(), replaceAll(), replaceRegexpOne() and replaceRegexpAll() can now be called with non-const pattern and replacement arguments. [#46589](https://github.com/ClickHouse/ClickHouse/pull/46589) ([Robert Schulze](https://github.com/rschu1ze)). -* Added functions to work with columns of type `Map`: `mapConcat`, `mapSort`, `mapExists`. [#48071](https://github.com/ClickHouse/ClickHouse/pull/48071) ([Anton Popov](https://github.com/CurtizJ)). - -#### Performance Improvement -* Reading files in `Parquet` format is now much faster. IO and decoding are parallelized (controlled by `max_threads` setting), and only required data ranges are read. [#47964](https://github.com/ClickHouse/ClickHouse/pull/47964) ([Michael Kolupaev](https://github.com/al13n321)). -* If we run a mutation with IN (subquery) like this: `ALTER TABLE t UPDATE col='new value' WHERE id IN (SELECT id FROM huge_table)` and the table `t` has multiple parts than for each part a set for subquery `SELECT id FROM huge_table` is built in memory. And if there are many parts then this might consume a lot of memory (and lead to an OOM) and CPU. The solution is to introduce a short-lived cache of sets that are currently being built by mutation tasks. If another task of the same mutation is executed concurrently it can look up the set in the cache, wait for it to be built and reuse it. [#46835](https://github.com/ClickHouse/ClickHouse/pull/46835) ([Alexander Gololobov](https://github.com/davenger)). -* Only check dependencies if necessary when applying `ALTER TABLE` queries. [#48062](https://github.com/ClickHouse/ClickHouse/pull/48062) ([Raúl Marín](https://github.com/Algunenano)). -* Optimize function `mapUpdate`. [#48118](https://github.com/ClickHouse/ClickHouse/pull/48118) ([Anton Popov](https://github.com/CurtizJ)). -* Now an internal query to local replica is sent explicitly and data from it received through loopback interface. Setting `prefer_localhost_replica` is not respected for parallel replicas. This is needed for better scheduling and makes the code cleaner: the initiator is only responsible for coordinating of the reading process and merging results, continuously answering for requests while all the secondary queries read the data. Note: Using loopback interface is not so performant, otherwise some replicas could starve for tasks which could lead to even slower query execution and not utilizing all possible resources. The initialization of the coordinator is now even more lazy. All incoming requests contain the information about the reading algorithm we initialize the coordinator with it when first request comes. If any replica decides to read with a different algorithm–an exception will be thrown and a query will be aborted. [#48246](https://github.com/ClickHouse/ClickHouse/pull/48246) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Do not build set for the right side of `IN` clause with subquery when it is used only for analysis of skip indexes, and they are disabled by setting (`use_skip_indexes=0`). Previously it might affect the performance of queries. [#48299](https://github.com/ClickHouse/ClickHouse/pull/48299) ([Anton Popov](https://github.com/CurtizJ)). -* Query processing is parallelized right after reading `FROM file(...)`. Related to [#38755](https://github.com/ClickHouse/ClickHouse/issues/38755). [#48525](https://github.com/ClickHouse/ClickHouse/pull/48525) ([Igor Nikonov](https://github.com/devcrafter)). Query processing is parallelized right after reading from any data source. Affected data sources are mostly simple or external storages like table functions `url`, `file`. [#48727](https://github.com/ClickHouse/ClickHouse/pull/48727) ([Igor Nikonov](https://github.com/devcrafter)). This is controlled by the setting `parallelize_output_from_storages` which is not enabled by default. -* Lowered contention of ThreadPool mutex (may increase performance for a huge amount of small jobs). [#48750](https://github.com/ClickHouse/ClickHouse/pull/48750) ([Sergei Trifonov](https://github.com/serxa)). -* Reduce memory usage for multiple `ALTER DELETE` mutations. [#48522](https://github.com/ClickHouse/ClickHouse/pull/48522) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Remove the excessive connection attempts if the `skip_unavailable_shards` setting is enabled. [#48771](https://github.com/ClickHouse/ClickHouse/pull/48771) ([Azat Khuzhin](https://github.com/azat)). - -#### Experimental Feature -* Entries in the query cache are now squashed to max_block_size and compressed. [#45912](https://github.com/ClickHouse/ClickHouse/pull/45912) ([Robert Schulze](https://github.com/rschu1ze)). -* It is now possible to define per-user quotas in the query cache. [#48284](https://github.com/ClickHouse/ClickHouse/pull/48284) ([Robert Schulze](https://github.com/rschu1ze)). -* Some fixes for parallel replicas [#48433](https://github.com/ClickHouse/ClickHouse/pull/48433) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Implement zero-copy-replication (an experimental feature) on encrypted disks. [#48741](https://github.com/ClickHouse/ClickHouse/pull/48741) ([Vitaly Baranov](https://github.com/vitlibar)). - -#### Improvement -* Increase default value for `connect_timeout_with_failover_ms` to 1000 ms (because of adding async connections in https://github.com/ClickHouse/ClickHouse/pull/47229) . Closes [#5188](https://github.com/ClickHouse/ClickHouse/issues/5188). [#49009](https://github.com/ClickHouse/ClickHouse/pull/49009) ([Kruglov Pavel](https://github.com/Avogar)). -* Several improvements around data lakes: - Make `Iceberg` work with non-partitioned data. - Support `Iceberg` format version v2 (previously only v1 was supported) - Support reading partitioned data for `DeltaLake`/`Hudi` - Faster reading of `DeltaLake` metadata by using Delta's checkpoint files - Fixed incorrect `Hudi` reads: previously it incorrectly chose which data to read and therefore was able to read correctly only small size tables - Made these engines to pickup updates of changed data (previously the state was set on table creation) - Make proper testing for `Iceberg`/`DeltaLake`/`Hudi` using spark. [#47307](https://github.com/ClickHouse/ClickHouse/pull/47307) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add async connection to socket and async writing to socket. Make creating connections and sending query/external tables async across shards. Refactor code with fibers. Closes [#46931](https://github.com/ClickHouse/ClickHouse/issues/46931). We will be able to increase `connect_timeout_with_failover_ms` by default after this PR (https://github.com/ClickHouse/ClickHouse/issues/5188). [#47229](https://github.com/ClickHouse/ClickHouse/pull/47229) ([Kruglov Pavel](https://github.com/Avogar)). -* Support config sections `keeper`/`keeper_server` as an alternative to `zookeeper`. Close [#34766](https://github.com/ClickHouse/ClickHouse/issues/34766) , [#34767](https://github.com/ClickHouse/ClickHouse/issues/34767). [#35113](https://github.com/ClickHouse/ClickHouse/pull/35113) ([李扬](https://github.com/taiyang-li)). -* It is possible to set _secure_ flag in named_collections for a dictionary with a ClickHouse table source. Addresses [#38450](https://github.com/ClickHouse/ClickHouse/issues/38450) . [#46323](https://github.com/ClickHouse/ClickHouse/pull/46323) ([Ilya Golshtein](https://github.com/ilejn)). -* `bitCount` function support `FixedString` and `String` data type. [#49044](https://github.com/ClickHouse/ClickHouse/pull/49044) ([flynn](https://github.com/ucasfl)). -* Added configurable retries for all operations with [Zoo]Keeper for Backup queries. [#47224](https://github.com/ClickHouse/ClickHouse/pull/47224) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Enable `use_environment_credentials` for S3 by default, so the entire provider chain is constructed by default. [#47397](https://github.com/ClickHouse/ClickHouse/pull/47397) ([Antonio Andelic](https://github.com/antonio2368)). -* Currently, the JSON_VALUE function is similar as spark's get_json_object function, which support to get value from JSON string by a path like '$.key'. But still has something different - 1. in spark's get_json_object will return null while the path is not exist, but in JSON_VALUE will return empty string; - 2. in spark's get_json_object will return a complex type value, such as a JSON object/array value, but in JSON_VALUE will return empty string. [#47494](https://github.com/ClickHouse/ClickHouse/pull/47494) ([KevinyhZou](https://github.com/KevinyhZou)). -* For `use_structure_from_insertion_table_in_table_functions` more flexible insert table structure propagation to table function. Fixed an issue with name mapping and using virtual columns. No more need for 'auto' setting. [#47962](https://github.com/ClickHouse/ClickHouse/pull/47962) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Do not continue retrying to connect to Keeper if the query is killed or over limits. [#47985](https://github.com/ClickHouse/ClickHouse/pull/47985) ([Raúl Marín](https://github.com/Algunenano)). -* Support Enum output/input in `BSONEachRow`, allow all map key types and avoid extra calculations on output. [#48122](https://github.com/ClickHouse/ClickHouse/pull/48122) ([Kruglov Pavel](https://github.com/Avogar)). -* Support more ClickHouse types in `ORC`/`Arrow`/`Parquet` formats: Enum(8|16), (U)Int(128|256), Decimal256 (for ORC), allow reading IPv4 from Int32 values (ORC outputs IPv4 as Int32, and we couldn't read it back), fix reading Nullable(IPv6) from binary data for `ORC`. [#48126](https://github.com/ClickHouse/ClickHouse/pull/48126) ([Kruglov Pavel](https://github.com/Avogar)). -* Add columns `perform_ttl_move_on_insert`, `load_balancing` for table `system.storage_policies`, modify column `volume_type` type to `Enum8`. [#48167](https://github.com/ClickHouse/ClickHouse/pull/48167) ([lizhuoyu5](https://github.com/lzydmxy)). -* Added support for `BACKUP ALL` command which backups all tables and databases, including temporary and system ones. [#48189](https://github.com/ClickHouse/ClickHouse/pull/48189) ([Vitaly Baranov](https://github.com/vitlibar)). -* Function mapFromArrays supports `Map` type as an input. [#48207](https://github.com/ClickHouse/ClickHouse/pull/48207) ([李扬](https://github.com/taiyang-li)). -* The output of some SHOW PROCESSLIST is now sorted. [#48241](https://github.com/ClickHouse/ClickHouse/pull/48241) ([Robert Schulze](https://github.com/rschu1ze)). -* Per-query/per-server throttling for remote IO/local IO/BACKUPs (server settings: `max_remote_read_network_bandwidth_for_server`, `max_remote_write_network_bandwidth_for_server`, `max_local_read_bandwidth_for_server`, `max_local_write_bandwidth_for_server`, `max_backup_bandwidth_for_server`, settings: `max_remote_read_network_bandwidth`, `max_remote_write_network_bandwidth`, `max_local_read_bandwidth`, `max_local_write_bandwidth`, `max_backup_bandwidth`). [#48242](https://github.com/ClickHouse/ClickHouse/pull/48242) ([Azat Khuzhin](https://github.com/azat)). -* Support more types in `CapnProto` format: Map, (U)Int(128|256), Decimal(128|256). Allow integer conversions during input/output. [#48257](https://github.com/ClickHouse/ClickHouse/pull/48257) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't throw CURRENT_WRITE_BUFFER_IS_EXHAUSTED for normal behaviour. [#48288](https://github.com/ClickHouse/ClickHouse/pull/48288) ([Raúl Marín](https://github.com/Algunenano)). -* Add new setting `keeper_map_strict_mode` which enforces extra guarantees on operations made on top of `KeeperMap` tables. [#48293](https://github.com/ClickHouse/ClickHouse/pull/48293) ([Antonio Andelic](https://github.com/antonio2368)). -* Check primary key type for simple dictionary is native unsigned integer type Add setting `check_dictionary_primary_key ` for compatibility(set `check_dictionary_primary_key =false` to disable checking). [#48335](https://github.com/ClickHouse/ClickHouse/pull/48335) ([lizhuoyu5](https://github.com/lzydmxy)). -* Don't replicate mutations for `KeeperMap` because it's unnecessary. [#48354](https://github.com/ClickHouse/ClickHouse/pull/48354) ([Antonio Andelic](https://github.com/antonio2368)). -* Allow to write/read unnamed tuple as nested Message in Protobuf format. Tuple elements and Message fields are matched by position. [#48390](https://github.com/ClickHouse/ClickHouse/pull/48390) ([Kruglov Pavel](https://github.com/Avogar)). -* Support `additional_table_filters` and `additional_result_filter` settings in the new planner. Also, add a documentation entry for `additional_result_filter`. [#48405](https://github.com/ClickHouse/ClickHouse/pull/48405) ([Dmitry Novik](https://github.com/novikd)). -* `parseDateTime` now understands format string '%f' (fractional seconds). [#48420](https://github.com/ClickHouse/ClickHouse/pull/48420) ([Robert Schulze](https://github.com/rschu1ze)). -* Format string "%f" in formatDateTime() now prints "000000" if the formatted value has no fractional seconds, the previous behavior (single zero) can be restored using setting "formatdatetime_f_prints_single_zero = 1". [#48422](https://github.com/ClickHouse/ClickHouse/pull/48422) ([Robert Schulze](https://github.com/rschu1ze)). -* Don't replicate DELETE and TRUNCATE for KeeperMap. [#48434](https://github.com/ClickHouse/ClickHouse/pull/48434) ([Antonio Andelic](https://github.com/antonio2368)). -* Generate valid Decimals and Bools in generateRandom function. [#48436](https://github.com/ClickHouse/ClickHouse/pull/48436) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow trailing commas in expression list of SELECT query, for example `SELECT a, b, c, FROM table`. Closes [#37802](https://github.com/ClickHouse/ClickHouse/issues/37802). [#48438](https://github.com/ClickHouse/ClickHouse/pull/48438) ([Nikolay Degterinsky](https://github.com/evillique)). -* Override `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables with `--user` and `--password` client parameters. Closes [#38909](https://github.com/ClickHouse/ClickHouse/issues/38909). [#48440](https://github.com/ClickHouse/ClickHouse/pull/48440) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added retries to loading of data parts in `MergeTree` tables in case of retryable errors. [#48442](https://github.com/ClickHouse/ClickHouse/pull/48442) ([Anton Popov](https://github.com/CurtizJ)). -* Add support for `Date`, `Date32`, `DateTime`, `DateTime64` data types to `arrayMin`, `arrayMax`, `arrayDifference` functions. Closes [#21645](https://github.com/ClickHouse/ClickHouse/issues/21645). [#48445](https://github.com/ClickHouse/ClickHouse/pull/48445) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add support for `{server_uuid}` macro. It is useful for identifying replicas in autoscaled clusters when new replicas are constantly added and removed in runtime. This closes [#48554](https://github.com/ClickHouse/ClickHouse/issues/48554). [#48563](https://github.com/ClickHouse/ClickHouse/pull/48563) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The installation script will create a hard link instead of copying if it is possible. [#48578](https://github.com/ClickHouse/ClickHouse/pull/48578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Support `SHOW TABLE` syntax meaning the same as `SHOW CREATE TABLE`. Closes [#48580](https://github.com/ClickHouse/ClickHouse/issues/48580). [#48591](https://github.com/ClickHouse/ClickHouse/pull/48591) ([flynn](https://github.com/ucasfl)). -* HTTP temporary buffers now support working by evicting data from the virtual filesystem cache. [#48664](https://github.com/ClickHouse/ClickHouse/pull/48664) ([Vladimir C](https://github.com/vdimir)). -* Make Schema inference works for `CREATE AS SELECT`. Closes [#47599](https://github.com/ClickHouse/ClickHouse/issues/47599). [#48679](https://github.com/ClickHouse/ClickHouse/pull/48679) ([flynn](https://github.com/ucasfl)). -* Added a `replicated_max_mutations_in_one_entry` setting for `ReplicatedMergeTree` that allows limiting the number of mutation commands per one `MUTATE_PART` entry (default is 10000). [#48731](https://github.com/ClickHouse/ClickHouse/pull/48731) ([Alexander Tokmakov](https://github.com/tavplubix)). -* In AggregateFunction types, don't count unused arena bytes as `read_bytes`. [#48745](https://github.com/ClickHouse/ClickHouse/pull/48745) ([Raúl Marín](https://github.com/Algunenano)). -* Fix some MySQL-related settings not being handled with the MySQL dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)). -* If a user set `max_single_part_upload_size` to a very large value, it can lead to a crash due to a bug in the AWS S3 SDK. This fixes [#47679](https://github.com/ClickHouse/ClickHouse/issues/47679). [#48816](https://github.com/ClickHouse/ClickHouse/pull/48816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix data race in `RabbitMQ` ([report](https://pastila.nl/?004f7100/de1505289ab5bb355e67ebe6c7cc8707)), refactor the code. [#48845](https://github.com/ClickHouse/ClickHouse/pull/48845) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add aliases `name` and `part_name` form `system.parts` and `system.part_log`. Closes [#48718](https://github.com/ClickHouse/ClickHouse/issues/48718). [#48850](https://github.com/ClickHouse/ClickHouse/pull/48850) ([sichenzhao](https://github.com/sichenzhao)). -* Functions "arrayDifferenceSupport()", "arrayCumSum()" and "arrayCumSumNonNegative()" now support input arrays of wide integer types (U)Int128/256. [#48866](https://github.com/ClickHouse/ClickHouse/pull/48866) ([cluster](https://github.com/infdahai)). -* Multi-line history in clickhouse-client is now no longer padded. This makes pasting more natural. [#48870](https://github.com/ClickHouse/ClickHouse/pull/48870) ([Joanna Hulboj](https://github.com/jh0x)). -* Implement a slight improvement for the rare case when ClickHouse is run inside LXC and LXCFS is used. The LXCFS has an issue: sometimes it returns an error "Transport endpoint is not connected" on reading from the file inside `/proc`. This error was correctly logged into ClickHouse's server log. We have additionally workaround this issue by reopening a file. This is a minuscule change. [#48922](https://github.com/ClickHouse/ClickHouse/pull/48922) ([Real](https://github.com/RunningXie)). -* Improve memory accounting for prefetches. Randomise prefetch settings In CI. [#48973](https://github.com/ClickHouse/ClickHouse/pull/48973) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Correctly set headers for native copy operations on GCS. [#48981](https://github.com/ClickHouse/ClickHouse/pull/48981) ([Antonio Andelic](https://github.com/antonio2368)). -* Add support for specifying setting names in the command line with dashes instead of underscores, for example, `--max-threads` instead of `--max_threads`. Additionally, support Unicode dash characters like `—` instead of `--` - this is useful when you communicate with a team in another company, and a manager from that team copy-pasted code from MS Word. [#48985](https://github.com/ClickHouse/ClickHouse/pull/48985) ([alekseygolub](https://github.com/alekseygolub)). -* Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)). -* Improve the embedded dashboard. Close [#46671](https://github.com/ClickHouse/ClickHouse/issues/46671). [#49036](https://github.com/ClickHouse/ClickHouse/pull/49036) ([Kevin Zhang](https://github.com/Kinzeng)). -* Add profile events for log messages, so you can easily see the count of log messages by severity. [#49042](https://github.com/ClickHouse/ClickHouse/pull/49042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* In previous versions, the `LineAsString` format worked inconsistently when the parallel parsing was enabled or not, in presence of DOS or macOS Classic line breaks. This closes [#49039](https://github.com/ClickHouse/ClickHouse/issues/49039). [#49052](https://github.com/ClickHouse/ClickHouse/pull/49052) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The exception message about the unparsed query parameter will also tell about the name of the parameter. Reimplement [#48878](https://github.com/ClickHouse/ClickHouse/issues/48878). Close [#48772](https://github.com/ClickHouse/ClickHouse/issues/48772). [#49061](https://github.com/ClickHouse/ClickHouse/pull/49061) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### Build/Testing/Packaging Improvement -* Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Reduce the number of dependencies in the header files to speed up the build. [#47984](https://github.com/ClickHouse/ClickHouse/pull/47984) ([Dmitry Novik](https://github.com/novikd)). -* Randomize compression of marks and indices in tests. [#48286](https://github.com/ClickHouse/ClickHouse/pull/48286) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Bump internal ZSTD from 1.5.4 to 1.5.5. [#46797](https://github.com/ClickHouse/ClickHouse/pull/46797) ([Robert Schulze](https://github.com/rschu1ze)). -* Randomize vertical merges from compact to wide parts in tests. [#48287](https://github.com/ClickHouse/ClickHouse/pull/48287) ([Raúl Marín](https://github.com/Algunenano)). -* Support for CRC32 checksum in HDFS. Fix performance issues. [#48614](https://github.com/ClickHouse/ClickHouse/pull/48614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove remainders of GCC support. [#48671](https://github.com/ClickHouse/ClickHouse/pull/48671) ([Robert Schulze](https://github.com/rschu1ze)). -* Add CI run with new analyzer infrastructure enabled. [#48719](https://github.com/ClickHouse/ClickHouse/pull/48719) ([Dmitry Novik](https://github.com/novikd)). - -#### Bug Fix (user-visible misbehavior in an official stable release) - -* Fix system.query_views_log for MVs that are pushed from background threads [#46668](https://github.com/ClickHouse/ClickHouse/pull/46668) ([Azat Khuzhin](https://github.com/azat)). -* Fix several `RENAME COLUMN` bugs [#46946](https://github.com/ClickHouse/ClickHouse/pull/46946) ([alesapin](https://github.com/alesapin)). -* Fix minor hiliting issues in clickhouse-format [#47610](https://github.com/ClickHouse/ClickHouse/pull/47610) ([Natasha Murashkina](https://github.com/murfel)). -* Fix a bug in LLVM's libc++ leading to a crash for uploading parts to S3 which size is greater than INT_MAX [#47693](https://github.com/ClickHouse/ClickHouse/pull/47693) ([Azat Khuzhin](https://github.com/azat)). -* Fix overflow in the `sparkbar` function [#48121](https://github.com/ClickHouse/ClickHouse/pull/48121) ([Vladimir C](https://github.com/vdimir)). -* Fix race in S3 [#48190](https://github.com/ClickHouse/ClickHouse/pull/48190) ([Anton Popov](https://github.com/CurtizJ)). -* Disable JIT for aggregate functions due to inconsistent behavior [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix alter formatting (minor) [#48289](https://github.com/ClickHouse/ClickHouse/pull/48289) ([Natasha Murashkina](https://github.com/murfel)). -* Fix CPU usage in RabbitMQ (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix crash in EXPLAIN PIPELINE for Merge over Distributed [#48320](https://github.com/ClickHouse/ClickHouse/pull/48320) ([Azat Khuzhin](https://github.com/azat)). -* Fix serializing LowCardinality as Arrow dictionary [#48361](https://github.com/ClickHouse/ClickHouse/pull/48361) ([Kruglov Pavel](https://github.com/Avogar)). -* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)). -* Fix possible SYSTEM SYNC REPLICA stuck in case of DROP/REPLACE PARTITION [#48391](https://github.com/ClickHouse/ClickHouse/pull/48391) ([Azat Khuzhin](https://github.com/azat)). -* Fix a startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Don't check dependencies when renaming system tables automatically [#48431](https://github.com/ClickHouse/ClickHouse/pull/48431) ([Raúl Marín](https://github.com/Algunenano)). -* Update only affected rows in KeeperMap storage [#48435](https://github.com/ClickHouse/ClickHouse/pull/48435) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix possible segfault in the VFS cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). -* `toTimeZone` function throws an error when no constant string is provided [#48471](https://github.com/ClickHouse/ClickHouse/pull/48471) ([Jordi Villar](https://github.com/jrdi)). -* Fix logical error with IPv4 in Protobuf, add support for Date32 [#48486](https://github.com/ClickHouse/ClickHouse/pull/48486) ([Kruglov Pavel](https://github.com/Avogar)). -* "changed" flag in system.settings was calculated incorrectly for settings with multiple values [#48516](https://github.com/ClickHouse/ClickHouse/pull/48516) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Fix storage `Memory` with enabled compression [#48517](https://github.com/ClickHouse/ClickHouse/pull/48517) ([Anton Popov](https://github.com/CurtizJ)). -* Fix bracketed-paste mode messing up password input in the event of client reconnection [#48528](https://github.com/ClickHouse/ClickHouse/pull/48528) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix an uncaught exception in case of parallel loader for hashed dictionaries [#48571](https://github.com/ClickHouse/ClickHouse/pull/48571) ([Azat Khuzhin](https://github.com/azat)). -* The `groupArray` aggregate function correctly works for empty result over nullable types [#48593](https://github.com/ClickHouse/ClickHouse/pull/48593) ([lgbo](https://github.com/lgbo-ustc)). -* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Allow IPv4 comparison operators with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix possible error from cache [#48636](https://github.com/ClickHouse/ClickHouse/pull/48636) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Async inserts with empty data will no longer throw exception. [#48663](https://github.com/ClickHouse/ClickHouse/pull/48663) ([Anton Popov](https://github.com/CurtizJ)). -* Fix table dependencies in case of failed RENAME TABLE [#48683](https://github.com/ClickHouse/ClickHouse/pull/48683) ([Azat Khuzhin](https://github.com/azat)). -* If the primary key has duplicate columns (which is only possible for projections), in previous versions it might lead to a bug [#48838](https://github.com/ClickHouse/ClickHouse/pull/48838) ([Amos Bird](https://github.com/amosbird)). -* Fix for a race condition in ZooKeeper when joining send_thread/receive_thread [#48849](https://github.com/ClickHouse/ClickHouse/pull/48849) ([Alexander Gololobov](https://github.com/davenger)). -* Fix unexpected part name error when trying to drop a ignored detached part with zero copy replication [#48862](https://github.com/ClickHouse/ClickHouse/pull/48862) ([Michael Lex](https://github.com/mlex)). -* Fix reading `Date32` Parquet/Arrow column into not a `Date32` column [#48864](https://github.com/ClickHouse/ClickHouse/pull/48864) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `UNKNOWN_IDENTIFIER` error while selecting from table with row policy and column with dots [#48976](https://github.com/ClickHouse/ClickHouse/pull/48976) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix aggregation by empty nullable strings [#48999](https://github.com/ClickHouse/ClickHouse/pull/48999) ([LiuNeng](https://github.com/liuneng1994)). - -### ClickHouse release 23.3 LTS, 2023-03-30 - -#### Upgrade Notes -* Lightweight DELETEs are production ready and enabled by default. The `DELETE` query for MergeTree tables is now available by default. -* The behavior of `*domain*RFC` and `netloc` functions is slightly changed: relaxed the set of symbols that are allowed in the URL authority for better conformance. [#46841](https://github.com/ClickHouse/ClickHouse/pull/46841) ([Azat Khuzhin](https://github.com/azat)). -* Prohibited creating tables based on KafkaEngine with DEFAULT/EPHEMERAL/ALIAS/MATERIALIZED statements for columns. [#47138](https://github.com/ClickHouse/ClickHouse/pull/47138) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* An "asynchronous connection drain" feature is removed. Related settings and metrics are removed as well. It was an internal feature, so the removal should not affect users who had never heard about that feature. [#47486](https://github.com/ClickHouse/ClickHouse/pull/47486) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Support 256-bit Decimal data type (more than 38 digits) in `arraySum`/`Min`/`Max`/`Avg`/`Product`, `arrayCumSum`/`CumSumNonNegative`, `arrayDifference`, array construction, IN operator, query parameters, `groupArrayMovingSum`, statistical functions, `min`/`max`/`any`/`argMin`/`argMax`, PostgreSQL wire protocol, MySQL table engine and function, `sumMap`, `mapAdd`, `mapSubtract`, `arrayIntersect`. Add support for big integers in `arrayIntersect`. Statistical aggregate functions involving moments (such as `corr` or various `TTest`s) will use `Float64` as their internal representation (they were using `Decimal128` before this change, but it was pointless), and these functions can return `nan` instead of `inf` in case of infinite variance. Some functions were allowed on `Decimal256` data types but returned `Decimal128` in previous versions - now it is fixed. This closes [#47569](https://github.com/ClickHouse/ClickHouse/issues/47569). This closes [#44864](https://github.com/ClickHouse/ClickHouse/issues/44864). This closes [#28335](https://github.com/ClickHouse/ClickHouse/issues/28335). [#47594](https://github.com/ClickHouse/ClickHouse/pull/47594) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Make backup_threads/restore_threads server settings (instead of user settings). [#47881](https://github.com/ClickHouse/ClickHouse/pull/47881) ([Azat Khuzhin](https://github.com/azat)). -* Do not allow const and non-deterministic secondary indices [#46839](https://github.com/ClickHouse/ClickHouse/pull/46839) ([Anton Popov](https://github.com/CurtizJ)). - -#### New Feature -* Add a new mode for splitting the work on replicas using settings `parallel_replicas_custom_key` and `parallel_replicas_custom_key_filter_type`. If the cluster consists of a single shard with multiple replicas, up to `max_parallel_replicas` will be randomly picked and turned into shards. For each shard, a corresponding filter is added to the query on the initiator before being sent to the shard. If the cluster consists of multiple shards, it will behave the same as `sample_key` but with the possibility to define an arbitrary key. [#45108](https://github.com/ClickHouse/ClickHouse/pull/45108) ([Antonio Andelic](https://github.com/antonio2368)). -* An option to display partial result on cancel: Added query setting `partial_result_on_first_cancel` allowing the canceled query (e.g. due to Ctrl-C) to return a partial result. [#45689](https://github.com/ClickHouse/ClickHouse/pull/45689) ([Alexey Perevyshin](https://github.com/alexX512)). -* Added support of arbitrary tables engines for temporary tables (except for Replicated and KeeperMap engines). Close [#31497](https://github.com/ClickHouse/ClickHouse/issues/31497). [#46071](https://github.com/ClickHouse/ClickHouse/pull/46071) ([Roman Vasin](https://github.com/rvasin)). -* Add support for replication of user-defined SQL functions using centralized storage in Keeper. [#46085](https://github.com/ClickHouse/ClickHouse/pull/46085) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Implement `system.server_settings` (similar to `system.settings`), which will contain server configurations. [#46550](https://github.com/ClickHouse/ClickHouse/pull/46550) ([pufit](https://github.com/pufit)). -* Support for `UNDROP TABLE` query. Closes [#46811](https://github.com/ClickHouse/ClickHouse/issues/46811). [#47241](https://github.com/ClickHouse/ClickHouse/pull/47241) ([chen](https://github.com/xiedeyantu)). -* Allow separate grants for named collections (e.g. to be able to give `SHOW/CREATE/ALTER/DROP named collection` access only to certain collections, instead of all at once). Closes [#40894](https://github.com/ClickHouse/ClickHouse/issues/40894). Add new access type `NAMED_COLLECTION_CONTROL` which is not given to user default unless explicitly added to the user config (is required to be able to do `GRANT ALL`), also `show_named_collections` is no longer obligatory to be manually specified for user default to be able to have full access rights as was in 23.2. [#46241](https://github.com/ClickHouse/ClickHouse/pull/46241) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow nested custom disks. Previously custom disks supported only flat disk structure. [#47106](https://github.com/ClickHouse/ClickHouse/pull/47106) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Introduce a function `widthBucket` (with a `WIDTH_BUCKET` alias for compatibility). [#42974](https://github.com/ClickHouse/ClickHouse/issues/42974). [#46790](https://github.com/ClickHouse/ClickHouse/pull/46790) ([avoiderboi](https://github.com/avoiderboi)). -* Add new function `parseDateTime`/`parseDateTimeInJodaSyntax` according to the specified format string. parseDateTime parses String to DateTime in MySQL syntax, parseDateTimeInJodaSyntax parses in Joda syntax. [#46815](https://github.com/ClickHouse/ClickHouse/pull/46815) ([李扬](https://github.com/taiyang-li)). -* Use `dummy UInt8` for the default structure of table function `null`. Closes [#46930](https://github.com/ClickHouse/ClickHouse/issues/46930). [#47006](https://github.com/ClickHouse/ClickHouse/pull/47006) ([flynn](https://github.com/ucasfl)). -* Support for date format with a comma, like `Dec 15, 2021` in the `parseDateTimeBestEffort` function. Closes [#46816](https://github.com/ClickHouse/ClickHouse/issues/46816). [#47071](https://github.com/ClickHouse/ClickHouse/pull/47071) ([chen](https://github.com/xiedeyantu)). -* Add settings `http_wait_end_of_query` and `http_response_buffer_size` that corresponds to URL params `wait_end_of_query` and `buffer_size` for the HTTP interface. This allows changing these settings in the profiles. [#47108](https://github.com/ClickHouse/ClickHouse/pull/47108) ([Vladimir C](https://github.com/vdimir)). -* Add `system.dropped_tables` table that shows tables that were dropped from `Atomic` databases but were not completely removed yet. [#47364](https://github.com/ClickHouse/ClickHouse/pull/47364) ([chen](https://github.com/xiedeyantu)). -* Add `INSTR` as alias of `positionCaseInsensitive` for MySQL compatibility. Closes [#47529](https://github.com/ClickHouse/ClickHouse/issues/47529). [#47535](https://github.com/ClickHouse/ClickHouse/pull/47535) ([flynn](https://github.com/ucasfl)). -* Added `toDecimalString` function allowing to convert numbers to string with fixed precision. [#47838](https://github.com/ClickHouse/ClickHouse/pull/47838) ([Andrey Zvonov](https://github.com/zvonand)). -* Add a merge tree setting `max_number_of_mutations_for_replica`. It limits the number of part mutations per replica to the specified amount. Zero means no limit on the number of mutations per replica (the execution can still be constrained by other settings). [#48047](https://github.com/ClickHouse/ClickHouse/pull/48047) ([Vladimir C](https://github.com/vdimir)). -* Add the Map-related function `mapFromArrays`, which allows the creation of a map from a pair of arrays. [#31125](https://github.com/ClickHouse/ClickHouse/pull/31125) ([李扬](https://github.com/taiyang-li)). -* Allow control of compression in Parquet/ORC/Arrow output formats, adds support for more compression input formats. This closes [#13541](https://github.com/ClickHouse/ClickHouse/issues/13541). [#47114](https://github.com/ClickHouse/ClickHouse/pull/47114) ([Kruglov Pavel](https://github.com/Avogar)). -* Add SSL User Certificate authentication to the native protocol. Closes [#47077](https://github.com/ClickHouse/ClickHouse/issues/47077). [#47596](https://github.com/ClickHouse/ClickHouse/pull/47596) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add *OrNull() and *OrZero() variants for `parseDateTime`, add alias `str_to_date` for MySQL parity. [#48000](https://github.com/ClickHouse/ClickHouse/pull/48000) ([Robert Schulze](https://github.com/rschu1ze)). -* Added operator `REGEXP` (similar to operators "LIKE", "IN", "MOD" etc.) for better compatibility with MySQL [#47869](https://github.com/ClickHouse/ClickHouse/pull/47869) ([Robert Schulze](https://github.com/rschu1ze)). - -#### Performance Improvement -* Marks in memory are now compressed, using 3-6x less memory. [#47290](https://github.com/ClickHouse/ClickHouse/pull/47290) ([Michael Kolupaev](https://github.com/al13n321)). -* Backups for large numbers of files were unbelievably slow in previous versions. Not anymore. Now they are unbelievably fast. [#47251](https://github.com/ClickHouse/ClickHouse/pull/47251) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Introduced a separate thread pool for backup's IO operations. This will allow scaling it independently of other pools and increase performance. [#47174](https://github.com/ClickHouse/ClickHouse/pull/47174) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). Use MultiRead request and retries for collecting metadata at the final stage of backup processing. [#47243](https://github.com/ClickHouse/ClickHouse/pull/47243) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). If a backup and restoring data are both in S3 then server-side copy should be used from now on. [#47546](https://github.com/ClickHouse/ClickHouse/pull/47546) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fixed excessive reading in queries with `FINAL`. [#47801](https://github.com/ClickHouse/ClickHouse/pull/47801) ([Nikita Taranov](https://github.com/nickitat)). -* Setting `max_final_threads` would be set to the number of cores at server startup (by the same algorithm as used for `max_threads`). This improves the concurrency of `final` execution on servers with high number of CPUs. [#47915](https://github.com/ClickHouse/ClickHouse/pull/47915) ([Nikita Taranov](https://github.com/nickitat)). -* Allow executing reading pipeline for DIRECT dictionary with CLICKHOUSE source in multiple threads. To enable set `dictionary_use_async_executor=1` in `SETTINGS` section for source in `CREATE DICTIONARY` statement. [#47986](https://github.com/ClickHouse/ClickHouse/pull/47986) ([Vladimir C](https://github.com/vdimir)). -* Optimize one nullable key aggregate performance. [#45772](https://github.com/ClickHouse/ClickHouse/pull/45772) ([LiuNeng](https://github.com/liuneng1994)). -* Implemented lowercase `tokenbf_v1` index utilization for `hasTokenOrNull`, `hasTokenCaseInsensitive` and `hasTokenCaseInsensitiveOrNull`. [#46252](https://github.com/ClickHouse/ClickHouse/pull/46252) ([ltrk2](https://github.com/ltrk2)). -* Optimize functions `position` and `LIKE` by searching the first two chars using SIMD. [#46289](https://github.com/ClickHouse/ClickHouse/pull/46289) ([Jiebin Sun](https://github.com/jiebinn)). -* Optimize queries from the `system.detached_parts`, which could be significantly large. Added several sources with respect to the block size limitation; in each block, an IO thread pool is used to calculate the part size, i.e. to make syscalls in parallel. [#46624](https://github.com/ClickHouse/ClickHouse/pull/46624) ([Sema Checherinda](https://github.com/CheSema)). -* Increase the default value of `max_replicated_merges_in_queue` for ReplicatedMergeTree tables from 16 to 1000. It allows faster background merge operation on clusters with a very large number of replicas, such as clusters with shared storage in ClickHouse Cloud. [#47050](https://github.com/ClickHouse/ClickHouse/pull/47050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Updated `clickhouse-copier` to use `GROUP BY` instead of `DISTINCT` to get the list of partitions. For large tables, this reduced the select time from over 500s to under 1s. [#47386](https://github.com/ClickHouse/ClickHouse/pull/47386) ([Clayton McClure](https://github.com/cmcclure-twilio)). -* Fix performance degradation in `ASOF JOIN`. [#47544](https://github.com/ClickHouse/ClickHouse/pull/47544) ([Ongkong](https://github.com/ongkong)). -* Even more batching in Keeper. Improve performance by avoiding breaking batches on read requests. [#47978](https://github.com/ClickHouse/ClickHouse/pull/47978) ([Antonio Andelic](https://github.com/antonio2368)). -* Allow PREWHERE for Merge with different DEFAULT expressions for columns. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)). - -#### Experimental Feature -* Parallel replicas: Improved the overall performance by better utilizing the local replica, and forbid the reading with parallel replicas from non-replicated MergeTree by default. [#47858](https://github.com/ClickHouse/ClickHouse/pull/47858) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Support filter push down to left table for JOIN with `Join`, `Dictionary` and `EmbeddedRocksDB` tables if the experimental Analyzer is enabled. [#47280](https://github.com/ClickHouse/ClickHouse/pull/47280) ([Maksim Kita](https://github.com/kitaisreal)). -* Now ReplicatedMergeTree with zero copy replication has less load to Keeper. [#47676](https://github.com/ClickHouse/ClickHouse/pull/47676) ([alesapin](https://github.com/alesapin)). -* Fix create materialized view with MaterializedPostgreSQL [#40807](https://github.com/ClickHouse/ClickHouse/pull/40807) ([Maksim Buren](https://github.com/maks-buren630501)). - -#### Improvement -* Enable `input_format_json_ignore_unknown_keys_in_named_tuple` by default. [#46742](https://github.com/ClickHouse/ClickHouse/pull/46742) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow errors to be ignored while pushing to MATERIALIZED VIEW (add new setting `materialized_views_ignore_errors`, by default to `false`, but it is set to `true` for flushing logs to `system.*_log` tables unconditionally). [#46658](https://github.com/ClickHouse/ClickHouse/pull/46658) ([Azat Khuzhin](https://github.com/azat)). -* Track the file queue of distributed sends in memory. [#45491](https://github.com/ClickHouse/ClickHouse/pull/45491) ([Azat Khuzhin](https://github.com/azat)). -* Now `X-ClickHouse-Query-Id` and `X-ClickHouse-Timezone` headers are added to responses in all queries via HTTP protocol. Previously it was done only for `SELECT` queries. [#46364](https://github.com/ClickHouse/ClickHouse/pull/46364) ([Anton Popov](https://github.com/CurtizJ)). -* External tables from `MongoDB`: support for connection to a replica set via a URI with a host:port enum and support for the readPreference option in MongoDB dictionaries. Example URI: mongodb://db0.example.com:27017,db1.example.com:27017,db2.example.com:27017/?replicaSet=myRepl&readPreference=primary. [#46524](https://github.com/ClickHouse/ClickHouse/pull/46524) ([artem-yadr](https://github.com/artem-yadr)). -* This improvement should be invisible for users. Re-implement projection analysis on top of query plan. Added setting `query_plan_optimize_projection=1` to switch between old and new version. Fixes [#44963](https://github.com/ClickHouse/ClickHouse/issues/44963). [#46537](https://github.com/ClickHouse/ClickHouse/pull/46537) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Use Parquet format v2 instead of v1 in output format by default. Add setting `output_format_parquet_version` to control parquet version, possible values `1.0`, `2.4`, `2.6`, `2.latest` (default). [#46617](https://github.com/ClickHouse/ClickHouse/pull/46617) ([Kruglov Pavel](https://github.com/Avogar)). -* It is now possible to use the new configuration syntax to configure Kafka topics with periods (`.`) in their name. [#46752](https://github.com/ClickHouse/ClickHouse/pull/46752) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix heuristics that check hyperscan patterns for problematic repeats. [#46819](https://github.com/ClickHouse/ClickHouse/pull/46819) ([Robert Schulze](https://github.com/rschu1ze)). -* Don't report ZK node exists to system.errors when a block was created concurrently by a different replica. [#46820](https://github.com/ClickHouse/ClickHouse/pull/46820) ([Raúl Marín](https://github.com/Algunenano)). -* Increase the limit for opened files in `clickhouse-local`. It will be able to read from `web` tables on servers with a huge number of CPU cores. Do not back off reading from the URL table engine in case of too many opened files. This closes [#46852](https://github.com/ClickHouse/ClickHouse/issues/46852). [#46853](https://github.com/ClickHouse/ClickHouse/pull/46853) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Exceptions thrown when numbers cannot be parsed now have an easier-to-read exception message. [#46917](https://github.com/ClickHouse/ClickHouse/pull/46917) ([Robert Schulze](https://github.com/rschu1ze)). -* Added update `system.backups` after every processed task to track the progress of backups. [#46989](https://github.com/ClickHouse/ClickHouse/pull/46989) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* Allow types conversion in Native input format. Add settings `input_format_native_allow_types_conversion` that controls it (enabled by default). [#46990](https://github.com/ClickHouse/ClickHouse/pull/46990) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow IPv4 in the `range` function to generate IP ranges. [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Improve exception message when it's impossible to move a part from one volume/disk to another. [#47032](https://github.com/ClickHouse/ClickHouse/pull/47032) ([alesapin](https://github.com/alesapin)). -* Support `Bool` type in `JSONType` function. Previously `Null` type was mistakenly returned for bool values. [#47046](https://github.com/ClickHouse/ClickHouse/pull/47046) ([Anton Popov](https://github.com/CurtizJ)). -* Use `_request_body` parameter to configure predefined HTTP queries. [#47086](https://github.com/ClickHouse/ClickHouse/pull/47086) ([Constantine Peresypkin](https://github.com/pkit)). -* Automatic indentation in the built-in UI SQL editor when Enter is pressed. [#47113](https://github.com/ClickHouse/ClickHouse/pull/47113) ([Alexey Korepanov](https://github.com/alexkorep)). -* Self-extraction with 'sudo' will attempt to set uid and gid of extracted files to running user. [#47116](https://github.com/ClickHouse/ClickHouse/pull/47116) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Previously, the `repeat` function's second argument only accepted an unsigned integer type, which meant it could not accept values such as -1. This behavior differed from that of the Spark function. In this update, the repeat function has been modified to match the behavior of the Spark function. It now accepts the same types of inputs, including negative integers. Extensive testing has been performed to verify the correctness of the updated implementation. [#47134](https://github.com/ClickHouse/ClickHouse/pull/47134) ([KevinyhZou](https://github.com/KevinyhZou)). Note: the changelog entry was rewritten by ChatGPT. -* Remove `::__1` part from stacktraces. Display `std::basic_string ClickHouse release 23.2, 2023-02-23 - -#### Backward Incompatible Change -* Extend function "toDayOfWeek()" (alias: "DAYOFWEEK") with a mode argument that encodes whether the week starts on Monday or Sunday and whether counting starts at 0 or 1. For consistency with other date time functions, the mode argument was inserted between the time and the time zone arguments. This breaks existing usage of the (previously undocumented) 2-argument syntax "toDayOfWeek(time, time_zone)". A fix is to rewrite the function into "toDayOfWeek(time, 0, time_zone)". [#45233](https://github.com/ClickHouse/ClickHouse/pull/45233) ([Robert Schulze](https://github.com/rschu1ze)). -* Rename setting `max_query_cache_size` to `filesystem_cache_max_download_size`. [#45614](https://github.com/ClickHouse/ClickHouse/pull/45614) ([Kseniia Sumarokova](https://github.com/kssenii)). -* The `default` user will not have permissions for access type `SHOW NAMED COLLECTION` by default (e.g. `default` user will no longer be able to grant ALL to other users as it was before, therefore this PR is backward incompatible). [#46010](https://github.com/ClickHouse/ClickHouse/pull/46010) ([Kseniia Sumarokova](https://github.com/kssenii)). -* If the SETTINGS clause is specified before the FORMAT clause, the settings will be applied to formatting as well. [#46003](https://github.com/ClickHouse/ClickHouse/pull/46003) ([Azat Khuzhin](https://github.com/azat)). -* Remove support for setting `materialized_postgresql_allow_automatic_update` (which was by default turned off). [#46106](https://github.com/ClickHouse/ClickHouse/pull/46106) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Slightly improve performance of `countDigits` on realistic datasets. This closed [#44518](https://github.com/ClickHouse/ClickHouse/issues/44518). In previous versions, `countDigits(0)` returned `0`; now it returns `1`, which is more correct, and follows the existing documentation. [#46187](https://github.com/ClickHouse/ClickHouse/pull/46187) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Disallow creation of new columns compressed by a combination of codecs "Delta" or "DoubleDelta" followed by codecs "Gorilla" or "FPC". This can be bypassed using setting "allow_suspicious_codecs = true". [#45652](https://github.com/ClickHouse/ClickHouse/pull/45652) ([Robert Schulze](https://github.com/rschu1ze)). - -#### New Feature -* Add `StorageIceberg` and table function `iceberg` to access iceberg table store on S3. [#45384](https://github.com/ClickHouse/ClickHouse/pull/45384) ([flynn](https://github.com/ucasfl)). -* Allow configuring storage as `SETTINGS disk = ''` (instead of `storage_policy`) and with explicit disk creation `SETTINGS disk = disk(type=s3, ...)`. [#41976](https://github.com/ClickHouse/ClickHouse/pull/41976) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Expose `ProfileEvents` counters in `system.part_log`. [#38614](https://github.com/ClickHouse/ClickHouse/pull/38614) ([Bharat Nallan](https://github.com/bharatnc)). -* Enrichment of the existing `ReplacingMergeTree` engine to allow duplicate the insertion. It leverages the power of both `ReplacingMergeTree` and `CollapsingMergeTree` in one MergeTree engine. Deleted data are not returned when queried, but not removed from disk neither. [#41005](https://github.com/ClickHouse/ClickHouse/pull/41005) ([youennL-cs](https://github.com/youennL-cs)). -* Add `generateULID` function. Closes [#36536](https://github.com/ClickHouse/ClickHouse/issues/36536). [#44662](https://github.com/ClickHouse/ClickHouse/pull/44662) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add `corrMatrix` aggregate function, calculating each two columns. In addition, since Aggregatefunctions `covarSamp` and `covarPop` are similar to `corr`, I add `covarSampMatrix`, `covarPopMatrix` by the way. @alexey-milovidov closes [#44587](https://github.com/ClickHouse/ClickHouse/issues/44587). [#44680](https://github.com/ClickHouse/ClickHouse/pull/44680) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Introduce arrayShuffle function for random array permutations. [#45271](https://github.com/ClickHouse/ClickHouse/pull/45271) ([Joanna Hulboj](https://github.com/jh0x)). -* Support types `FIXED_SIZE_BINARY` type in Arrow, `FIXED_LENGTH_BYTE_ARRAY` in `Parquet` and match them to `FixedString`. Add settings `output_format_parquet_fixed_string_as_fixed_byte_array/output_format_arrow_fixed_string_as_fixed_byte_array` to control default output type for FixedString. Closes [#45326](https://github.com/ClickHouse/ClickHouse/issues/45326). [#45340](https://github.com/ClickHouse/ClickHouse/pull/45340) ([Kruglov Pavel](https://github.com/Avogar)). -* Add a new column `last_exception_time` to system.replication_queue. [#45457](https://github.com/ClickHouse/ClickHouse/pull/45457) ([Frank Chen](https://github.com/FrankChen021)). -* Add two new functions which allow for user-defined keys/seeds with SipHash{64,128}. [#45513](https://github.com/ClickHouse/ClickHouse/pull/45513) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Allow a three-argument version for table function `format`. close [#45808](https://github.com/ClickHouse/ClickHouse/issues/45808). [#45873](https://github.com/ClickHouse/ClickHouse/pull/45873) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Add `JodaTime` format support for 'x','w','S'. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. [#46073](https://github.com/ClickHouse/ClickHouse/pull/46073) ([zk_kiger](https://github.com/zk-kiger)). -* Support window function `ntile`. ([lgbo](https://github.com/lgbo-ustc)). -* Add setting `final` to implicitly apply the `FINAL` modifier to every table. [#40945](https://github.com/ClickHouse/ClickHouse/pull/40945) ([Arthur Passos](https://github.com/arthurpassos)). -* Added `arrayPartialSort` and `arrayPartialReverseSort` functions. [#46296](https://github.com/ClickHouse/ClickHouse/pull/46296) ([Joanna Hulboj](https://github.com/jh0x)). -* The new http parameter `client_protocol_version` allows setting a client protocol version for HTTP responses using the Native format. [#40397](https://github.com/ClickHouse/ClickHouse/issues/40397). [#46360](https://github.com/ClickHouse/ClickHouse/pull/46360) ([Geoff Genz](https://github.com/genzgd)). -* Add new function `regexpExtract`, like spark function `REGEXP_EXTRACT` for compatibility. It is similar to the existing function `extract`. [#46469](https://github.com/ClickHouse/ClickHouse/pull/46469) ([李扬](https://github.com/taiyang-li)). -* Add new function `JSONArrayLength`, which returns the number of elements in the outermost JSON array. The function returns NULL if the input JSON string is invalid. [#46631](https://github.com/ClickHouse/ClickHouse/pull/46631) ([李扬](https://github.com/taiyang-li)). - -#### Performance Improvement -* The introduced logic works if PREWHERE condition is a conjunction of multiple conditions (cond1 AND cond2 AND ... ). It groups those conditions that require reading the same columns into steps. After each step the corresponding part of the full condition is computed and the result rows might be filtered. This allows to read fewer rows in the next steps thus saving IO bandwidth and doing less computation. This logic is disabled by default for now. It will be enabled by default in one of the future releases once it is known to not have any regressions, so it is highly encouraged to be used for testing. It can be controlled by 2 settings: "enable_multiple_prewhere_read_steps" and "move_all_conditions_to_prewhere". [#46140](https://github.com/ClickHouse/ClickHouse/pull/46140) ([Alexander Gololobov](https://github.com/davenger)). -* An option added to aggregate partitions independently if table partition key and group by key are compatible. Controlled by the setting `allow_aggregate_partitions_independently`. Disabled by default because of limited applicability (please refer to the docs). [#45364](https://github.com/ClickHouse/ClickHouse/pull/45364) ([Nikita Taranov](https://github.com/nickitat)). -* Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#45681](https://github.com/ClickHouse/ClickHouse/pull/45681) [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)). -* Optimize `Parquet` reader by using batch reader. [#45878](https://github.com/ClickHouse/ClickHouse/pull/45878) ([LiuNeng](https://github.com/liuneng1994)). -* Add new `local_filesystem_read_method` method `io_uring` based on the asynchronous Linux [io_uring](https://kernel.dk/io_uring.pdf) subsystem, improving read performance almost universally compared to the default `pread` method. [#38456](https://github.com/ClickHouse/ClickHouse/pull/38456) ([Saulius Valatka](https://github.com/sauliusvl)). -* Rewrite aggregate functions with `if` expression as argument when logically equivalent. For example, `avg(if(cond, col, null))` can be rewritten to avgIf(cond, col). It is helpful in performance. [#44730](https://github.com/ClickHouse/ClickHouse/pull/44730) ([李扬](https://github.com/taiyang-li)). -* Improve lower/upper function performance with avx512 instructions. [#37894](https://github.com/ClickHouse/ClickHouse/pull/37894) ([yaqi-zhao](https://github.com/yaqi-zhao)). -* Remove the limitation that on systems with >=32 cores and SMT disabled ClickHouse uses only half of the cores (the case when you disable Hyper Threading in BIOS). [#44973](https://github.com/ClickHouse/ClickHouse/pull/44973) ([Robert Schulze](https://github.com/rschu1ze)). -* Improve performance of function `multiIf` by columnar executing, speed up by 2.3x. [#45296](https://github.com/ClickHouse/ClickHouse/pull/45296) ([李扬](https://github.com/taiyang-li)). -* Add fast path for function `position` when the needle is empty. [#45382](https://github.com/ClickHouse/ClickHouse/pull/45382) ([李扬](https://github.com/taiyang-li)). -* Enable `query_plan_remove_redundant_sorting` optimization by default. Optimization implemented in [#45420](https://github.com/ClickHouse/ClickHouse/issues/45420). [#45567](https://github.com/ClickHouse/ClickHouse/pull/45567) ([Igor Nikonov](https://github.com/devcrafter)). -* Increased HTTP Transfer Encoding chunk size to improve performance of large queries using the HTTP interface. [#45593](https://github.com/ClickHouse/ClickHouse/pull/45593) ([Geoff Genz](https://github.com/genzgd)). -* Fixed performance of short `SELECT` queries that read from tables with large number of `Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)). -* Improve performance of filtering for big integers and decimal types. [#45949](https://github.com/ClickHouse/ClickHouse/pull/45949) ([李扬](https://github.com/taiyang-li)). -* This change could effectively reduce the overhead of obtaining the filter from ColumnNullable(UInt8) and improve the overall query performance. To evaluate the impact of this change, we adopted TPC-H benchmark but revised the column types from non-nullable to nullable, and we measured the QPS of its queries as the performance indicator. [#45962](https://github.com/ClickHouse/ClickHouse/pull/45962) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Make the `_part` and `_partition_id` virtual column be `LowCardinality(String)` type. Closes [#45964](https://github.com/ClickHouse/ClickHouse/issues/45964). [#45975](https://github.com/ClickHouse/ClickHouse/pull/45975) ([flynn](https://github.com/ucasfl)). -* Improve the performance of Decimal conversion when the scale does not change. [#46095](https://github.com/ClickHouse/ClickHouse/pull/46095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow to increase prefetching for read data. [#46168](https://github.com/ClickHouse/ClickHouse/pull/46168) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Rewrite `arrayExists(x -> x = 1, arr)` -> `has(arr, 1)`, which improve performance by 1.34x. [#46188](https://github.com/ClickHouse/ClickHouse/pull/46188) ([李扬](https://github.com/taiyang-li)). -* Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Update zstd to v1.5.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge/mutation is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. [#46280](https://github.com/ClickHouse/ClickHouse/pull/46280) ([Raúl Marín](https://github.com/Algunenano)). -* Fix performance degradation caused by [#39737](https://github.com/ClickHouse/ClickHouse/issues/39737). [#46309](https://github.com/ClickHouse/ClickHouse/pull/46309) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The `replicas_status` handle will answer quickly even in case of a large replication queue. [#46310](https://github.com/ClickHouse/ClickHouse/pull/46310) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add avx512 support for aggregate function `sum`, function unary arithmetic, function comparison. [#37870](https://github.com/ClickHouse/ClickHouse/pull/37870) ([zhao zhou](https://github.com/zzachimed)). -* Rewrote the code around marks distribution and the overall coordination of the reading in order to achieve the maximum performance improvement. This closes [#34527](https://github.com/ClickHouse/ClickHouse/issues/34527). [#43772](https://github.com/ClickHouse/ClickHouse/pull/43772) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Remove redundant DISTINCT clauses in query (subqueries). Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding DISTINCT clauses. Can be enabled via `query_plan_remove_redundant_distinct` setting. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#44176](https://github.com/ClickHouse/ClickHouse/pull/44176) ([Igor Nikonov](https://github.com/devcrafter)). -* A few query rewrite optimizations: `sumIf(123, cond) -> 123 * countIf(1, cond)`, `sum(if(cond, 123, 0)) -> 123 * countIf(cond)`, `sum(if(cond, 0, 123)) -> 123 * countIf(not(cond))` [#44728](https://github.com/ClickHouse/ClickHouse/pull/44728) ([李扬](https://github.com/taiyang-li)). -* Improved how memory bound merging and aggregation in order on top query plan interact. Previously we fell back to explicit sorting for AIO in some cases when it wasn't actually needed. [#45892](https://github.com/ClickHouse/ClickHouse/pull/45892) ([Nikita Taranov](https://github.com/nickitat)). -* Concurrent merges are scheduled using round-robin by default to ensure fair and starvation-free operation. Previously in heavily overloaded shards, big merges could possibly be starved by smaller merges due to the use of strict priority scheduling. Added `background_merges_mutations_scheduling_policy` server config option to select scheduling algorithm (`round_robin` or `shortest_task_first`). [#46247](https://github.com/ClickHouse/ClickHouse/pull/46247) ([Sergei Trifonov](https://github.com/serxa)). - -#### Improvement -* Enable retries for INSERT by default in case of ZooKeeper session loss. We already use it in production. [#46308](https://github.com/ClickHouse/ClickHouse/pull/46308) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add ability to ignore unknown keys in JSON object for named tuples (`input_format_json_ignore_unknown_keys_in_named_tuple`). [#45678](https://github.com/ClickHouse/ClickHouse/pull/45678) ([Azat Khuzhin](https://github.com/azat)). -* Support optimizing the `where` clause with sorting key expression move to `prewhere` for query with `final`. [#38893](https://github.com/ClickHouse/ClickHouse/issues/38893). [#38950](https://github.com/ClickHouse/ClickHouse/pull/38950) ([hexiaoting](https://github.com/hexiaoting)). -* Add new metrics for backups: num_processed_files and processed_files_size described actual number of processed files. [#42244](https://github.com/ClickHouse/ClickHouse/pull/42244) ([Aleksandr](https://github.com/AVMusorin)). -* Added retries on interserver DNS errors. [#43179](https://github.com/ClickHouse/ClickHouse/pull/43179) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Keeper improvement: try preallocating space on the disk to avoid undefined out-of-space issues. Introduce setting `max_log_file_size` for the maximum size of Keeper's Raft log files. [#44370](https://github.com/ClickHouse/ClickHouse/pull/44370) ([Antonio Andelic](https://github.com/antonio2368)). -* Optimize behavior for a replica delay api logic in case the replica is read-only. [#45148](https://github.com/ClickHouse/ClickHouse/pull/45148) ([mateng915](https://github.com/mateng0915)). -* Ask for the password in clickhouse-client interactively in a case when the empty password is wrong. Closes [#46702](https://github.com/ClickHouse/ClickHouse/issues/46702). [#46730](https://github.com/ClickHouse/ClickHouse/pull/46730) ([Nikolay Degterinsky](https://github.com/evillique)). -* Mark `Gorilla` compression on columns of non-Float* type as suspicious. [#45376](https://github.com/ClickHouse/ClickHouse/pull/45376) ([Robert Schulze](https://github.com/rschu1ze)). -* Show replica name that is executing a merge in the `postpone_reason` column. [#45458](https://github.com/ClickHouse/ClickHouse/pull/45458) ([Frank Chen](https://github.com/FrankChen021)). -* Save exception stack trace in part_log. [#45459](https://github.com/ClickHouse/ClickHouse/pull/45459) ([Frank Chen](https://github.com/FrankChen021)). -* The `regexp_tree` dictionary is polished and now it is compatible with https://github.com/ua-parser/uap-core. [#45631](https://github.com/ClickHouse/ClickHouse/pull/45631) ([Han Fei](https://github.com/hanfei1991)). -* Updated checking of `SYSTEM SYNC REPLICA`, resolves [#45508](https://github.com/ClickHouse/ClickHouse/issues/45508) [#45648](https://github.com/ClickHouse/ClickHouse/pull/45648) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Rename setting `replication_alter_partitions_sync` to `alter_sync`. [#45659](https://github.com/ClickHouse/ClickHouse/pull/45659) ([Antonio Andelic](https://github.com/antonio2368)). -* The `generateRandom` table function and the engine now support `LowCardinality` data types. This is useful for testing, for example you can write `INSERT INTO table SELECT * FROM generateRandom() LIMIT 1000`. This is needed to debug [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590). [#45661](https://github.com/ClickHouse/ClickHouse/pull/45661) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The experimental query result cache now provides more modular configuration settings. [#45679](https://github.com/ClickHouse/ClickHouse/pull/45679) ([Robert Schulze](https://github.com/rschu1ze)). -* Renamed "query result cache" to "query cache". [#45682](https://github.com/ClickHouse/ClickHouse/pull/45682) ([Robert Schulze](https://github.com/rschu1ze)). -* add `SYSTEM SYNC FILE CACHE` command. It will do the `sync` syscall. [#8921](https://github.com/ClickHouse/ClickHouse/issues/8921). [#45685](https://github.com/ClickHouse/ClickHouse/pull/45685) ([DR](https://github.com/freedomDR)). -* Add a new S3 setting `allow_head_object_request`. This PR makes usage of `GetObjectAttributes` request instead of `HeadObject` introduced in https://github.com/ClickHouse/ClickHouse/pull/45288 optional (and disabled by default). [#45701](https://github.com/ClickHouse/ClickHouse/pull/45701) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add ability to override connection settings based on connection names (that said that now you can forget about storing password for each connection, you can simply put everything into `~/.clickhouse-client/config.xml` and even use different history files for them, which can be also useful). [#45715](https://github.com/ClickHouse/ClickHouse/pull/45715) ([Azat Khuzhin](https://github.com/azat)). -* Arrow format: support the duration type. Closes [#45669](https://github.com/ClickHouse/ClickHouse/issues/45669). [#45750](https://github.com/ClickHouse/ClickHouse/pull/45750) ([flynn](https://github.com/ucasfl)). -* Extend the logging in the Query Cache to improve investigations of the caching behavior. [#45751](https://github.com/ClickHouse/ClickHouse/pull/45751) ([Robert Schulze](https://github.com/rschu1ze)). -* The query cache's server-level settings are now reconfigurable at runtime. [#45758](https://github.com/ClickHouse/ClickHouse/pull/45758) ([Robert Schulze](https://github.com/rschu1ze)). -* Hide password in logs when a table function's arguments are specified with a named collection. [#45774](https://github.com/ClickHouse/ClickHouse/pull/45774) ([Vitaly Baranov](https://github.com/vitlibar)). -* Improve internal S3 client to correctly deduce regions and redirections for different types of URLs. [#45783](https://github.com/ClickHouse/ClickHouse/pull/45783) ([Antonio Andelic](https://github.com/antonio2368)). -* Add support for Map, IPv4 and IPv6 types in generateRandom. Mostly useful for testing. [#45785](https://github.com/ClickHouse/ClickHouse/pull/45785) ([Raúl Marín](https://github.com/Algunenano)). -* Support empty/notEmpty for IP types. [#45799](https://github.com/ClickHouse/ClickHouse/pull/45799) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* The column `num_processed_files` was split into two columns: `num_files` (for BACKUP) and `files_read` (for RESTORE). The column `processed_files_size` was split into two columns: `total_size` (for BACKUP) and `bytes_read` (for RESTORE). [#45800](https://github.com/ClickHouse/ClickHouse/pull/45800) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add support for `SHOW ENGINES` query for MySQL compatibility. [#45859](https://github.com/ClickHouse/ClickHouse/pull/45859) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Improved how the obfuscator deals with queries. [#45867](https://github.com/ClickHouse/ClickHouse/pull/45867) ([Raúl Marín](https://github.com/Algunenano)). -* Improve behaviour of conversion into Date for boundary value 65535 (2149-06-06). [#46042](https://github.com/ClickHouse/ClickHouse/pull/46042) [#45914](https://github.com/ClickHouse/ClickHouse/pull/45914) ([Joanna Hulboj](https://github.com/jh0x)). -* Add setting `check_referential_table_dependencies` to check referential dependencies on `DROP TABLE`. This PR solves [#38326](https://github.com/ClickHouse/ClickHouse/issues/38326). [#45936](https://github.com/ClickHouse/ClickHouse/pull/45936) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix `tupleElement` to return `Null` when having `Null` argument. Closes [#45894](https://github.com/ClickHouse/ClickHouse/issues/45894). [#45952](https://github.com/ClickHouse/ClickHouse/pull/45952) ([flynn](https://github.com/ucasfl)). -* Throw an error on no files satisfying the S3 wildcard. Closes [#45587](https://github.com/ClickHouse/ClickHouse/issues/45587). [#45957](https://github.com/ClickHouse/ClickHouse/pull/45957) ([chen](https://github.com/xiedeyantu)). -* Use cluster state data to check concurrent backup/restore. [#45982](https://github.com/ClickHouse/ClickHouse/pull/45982) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* ClickHouse Client: Use "exact" matching for fuzzy search, which has correct case ignorance and more appropriate algorithm for matching SQL queries. [#46000](https://github.com/ClickHouse/ClickHouse/pull/46000) ([Azat Khuzhin](https://github.com/azat)). -* Forbid wrong create View syntax `CREATE View X TO Y AS SELECT`. Closes [#4331](https://github.com/ClickHouse/ClickHouse/issues/4331). [#46043](https://github.com/ClickHouse/ClickHouse/pull/46043) ([flynn](https://github.com/ucasfl)). -* Storage `Log` family support setting the `storage_policy`. Closes [#43421](https://github.com/ClickHouse/ClickHouse/issues/43421). [#46044](https://github.com/ClickHouse/ClickHouse/pull/46044) ([flynn](https://github.com/ucasfl)). -* Improve `JSONColumns` format when the result is empty. Closes [#46024](https://github.com/ClickHouse/ClickHouse/issues/46024). [#46053](https://github.com/ClickHouse/ClickHouse/pull/46053) ([flynn](https://github.com/ucasfl)). -* Add reference implementation for SipHash128. [#46065](https://github.com/ClickHouse/ClickHouse/pull/46065) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Add a new metric to record allocations times and bytes using mmap. [#46068](https://github.com/ClickHouse/ClickHouse/pull/46068) ([李扬](https://github.com/taiyang-li)). -* Currently for functions like `leftPad`, `rightPad`, `leftPadUTF8`, `rightPadUTF8`, the second argument `length` must be UInt8|16|32|64|128|256. Which is too strict for clickhouse users, besides, it is not consistent with other similar functions like `arrayResize`, `substring` and so on. [#46103](https://github.com/ClickHouse/ClickHouse/pull/46103) ([李扬](https://github.com/taiyang-li)). -* Fix assertion in the `welchTTest` function in debug build when the resulting statistics is NaN. Unified the behavior with other similar functions. Change the behavior of `studentTTest` to return NaN instead of throwing an exception because the previous behavior was inconvenient. This closes [#41176](https://github.com/ClickHouse/ClickHouse/issues/41176) This closes [#42162](https://github.com/ClickHouse/ClickHouse/issues/42162). [#46141](https://github.com/ClickHouse/ClickHouse/pull/46141) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* More convenient usage of big integers and ORDER BY WITH FILL. Allow using plain integers for start and end points in WITH FILL when ORDER BY big (128-bit and 256-bit) integers. Fix the wrong result for big integers with negative start or end points. This closes [#16733](https://github.com/ClickHouse/ClickHouse/issues/16733). [#46152](https://github.com/ClickHouse/ClickHouse/pull/46152) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add `parts`, `active_parts` and `total_marks` columns to `system.tables` on [issue](https://github.com/ClickHouse/ClickHouse/issues/44336). [#46161](https://github.com/ClickHouse/ClickHouse/pull/46161) ([attack204](https://github.com/attack204)). -* Functions "multi[Fuzzy]Match(Any|AnyIndex|AllIndices}" now reject regexes which will likely evaluate very slowly in vectorscan. [#46167](https://github.com/ClickHouse/ClickHouse/pull/46167) ([Robert Schulze](https://github.com/rschu1ze)). -* When `insert_null_as_default` is enabled and column doesn't have defined default value, the default of column type will be used. Also this PR fixes using default values on nulls in case of LowCardinality columns. [#46171](https://github.com/ClickHouse/ClickHouse/pull/46171) ([Kruglov Pavel](https://github.com/Avogar)). -* Prefer explicitly defined access keys for S3 clients. If `use_environment_credentials` is set to `true`, and the user has provided the access key through query or config, they will be used instead of the ones from the environment variable. [#46191](https://github.com/ClickHouse/ClickHouse/pull/46191) ([Antonio Andelic](https://github.com/antonio2368)). -* Add an alias "DATE_FORMAT()" for function "formatDateTime()" to improve compatibility with MySQL's SQL dialect, extend function `formatDateTime` with substitutions "a", "b", "c", "h", "i", "k", "l" "r", "s", "W". ### Documentation entry for user-facing changes User-readable short description: `DATE_FORMAT` is an alias of `formatDateTime`. Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. (Provide link to [formatDateTime](https://clickhouse.com/docs/en/sql-reference/functions/date-time-functions/#formatdatetime)). [#46302](https://github.com/ClickHouse/ClickHouse/pull/46302) ([Jake Bamrah](https://github.com/JakeBamrah)). -* Add `ProfileEvents` and `CurrentMetrics` about the callback tasks for parallel replicas (`s3Cluster` and `MergeTree` tables). [#46313](https://github.com/ClickHouse/ClickHouse/pull/46313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add support for `DELETE` and `UPDATE` for tables using `KeeperMap` storage engine. [#46330](https://github.com/ClickHouse/ClickHouse/pull/46330) ([Antonio Andelic](https://github.com/antonio2368)). -* Allow writing RENAME queries with query parameters. Resolves [#45778](https://github.com/ClickHouse/ClickHouse/issues/45778). [#46407](https://github.com/ClickHouse/ClickHouse/pull/46407) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix parameterized SELECT queries with REPLACE transformer. Resolves [#33002](https://github.com/ClickHouse/ClickHouse/issues/33002). [#46420](https://github.com/ClickHouse/ClickHouse/pull/46420) ([Nikolay Degterinsky](https://github.com/evillique)). -* Exclude the internal database used for temporary/external tables from the calculation of asynchronous metric "NumberOfDatabases". This makes the behavior consistent with system table "system.databases". [#46435](https://github.com/ClickHouse/ClickHouse/pull/46435) ([Robert Schulze](https://github.com/rschu1ze)). -* Added `last_exception_time` column into distribution_queue table. [#46564](https://github.com/ClickHouse/ClickHouse/pull/46564) ([Aleksandr](https://github.com/AVMusorin)). -* Support for IN clause with parameter in parameterized views. [#46583](https://github.com/ClickHouse/ClickHouse/pull/46583) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Do not load named collections on server startup (load them on first access instead). [#46607](https://github.com/ClickHouse/ClickHouse/pull/46607) ([Kseniia Sumarokova](https://github.com/kssenii)). - - -#### Build/Testing/Packaging Improvement -* Introduce GWP-ASan implemented by the LLVM runtime. This closes [#27039](https://github.com/ClickHouse/ClickHouse/issues/27039). [#45226](https://github.com/ClickHouse/ClickHouse/pull/45226) ([Han Fei](https://github.com/hanfei1991)). -* We want to make our tests less stable and more flaky: add randomization for merge tree settings in tests. [#38983](https://github.com/ClickHouse/ClickHouse/pull/38983) ([Anton Popov](https://github.com/CurtizJ)). -* Enable the HDFS support in PowerPC and which helps to fixes the following functional tests 02113_hdfs_assert.sh, 02244_hdfs_cluster.sql and 02368_cancel_write_into_hdfs.sh. [#44949](https://github.com/ClickHouse/ClickHouse/pull/44949) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). -* Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* ClickHouse's fork of poco was moved from "contrib/" to "base/poco/". [#46075](https://github.com/ClickHouse/ClickHouse/pull/46075) ([Robert Schulze](https://github.com/rschu1ze)). -* Add an option for `clickhouse-watchdog` to restart the child process. This does not make a lot of use. [#46312](https://github.com/ClickHouse/ClickHouse/pull/46312) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* If the environment variable `CLICKHOUSE_DOCKER_RESTART_ON_EXIT` is set to 1, the Docker container will run `clickhouse-server` as a child instead of the first process, and restart it when it exited. [#46391](https://github.com/ClickHouse/ClickHouse/pull/46391) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix Systemd service file. [#46461](https://github.com/ClickHouse/ClickHouse/pull/46461) ([SuperDJY](https://github.com/cmsxbc)). -* Raised the minimum Clang version needed to build ClickHouse from 12 to 15. [#46710](https://github.com/ClickHouse/ClickHouse/pull/46710) ([Robert Schulze](https://github.com/rschu1ze)). -* Upgrade Intel QPL from v0.3.0 to v1.0.0 2. Build libaccel-config and link it statically to QPL library instead of dynamically. [#45809](https://github.com/ClickHouse/ClickHouse/pull/45809) ([jasperzhu](https://github.com/jinjunzh)). - - -#### Bug Fix (user-visible misbehavior in official stable release) - -* Flush data exactly by `rabbitmq_flush_interval_ms` or by `rabbitmq_max_block_size` in `StorageRabbitMQ`. Closes [#42389](https://github.com/ClickHouse/ClickHouse/issues/42389). Closes [#45160](https://github.com/ClickHouse/ClickHouse/issues/45160). [#44404](https://github.com/ClickHouse/ClickHouse/pull/44404) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Use PODArray to render in sparkBar function, so we can control the memory usage. Close [#44467](https://github.com/ClickHouse/ClickHouse/issues/44467). [#44489](https://github.com/ClickHouse/ClickHouse/pull/44489) ([Duc Canh Le](https://github.com/canhld94)). -* Fix functions (quantilesExactExclusive, quantilesExactInclusive) return unsorted array element. [#45379](https://github.com/ClickHouse/ClickHouse/pull/45379) ([wujunfu](https://github.com/wujunfu)). -* Fix uncaught exception in HTTPHandler when open telemetry is enabled. [#45456](https://github.com/ClickHouse/ClickHouse/pull/45456) ([Frank Chen](https://github.com/FrankChen021)). -* Don't infer Dates from 8 digit numbers. It could lead to wrong data to be read. [#45581](https://github.com/ClickHouse/ClickHouse/pull/45581) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixes to correctly use `odbc_bridge_use_connection_pooling` setting. [#45591](https://github.com/ClickHouse/ClickHouse/pull/45591) ([Bharat Nallan](https://github.com/bharatnc)). -* When the callback in the cache is called, it is possible that this cache is destructed. To keep it safe, we capture members by value. It's also safe for task schedule because it will be deactivated before storage is destroyed. Resolve [#45548](https://github.com/ClickHouse/ClickHouse/issues/45548). [#45601](https://github.com/ClickHouse/ClickHouse/pull/45601) ([Han Fei](https://github.com/hanfei1991)). -* Fix data corruption when codecs Delta or DoubleDelta are combined with codec Gorilla. [#45615](https://github.com/ClickHouse/ClickHouse/pull/45615) ([Robert Schulze](https://github.com/rschu1ze)). -* Correctly check types when using N-gram bloom filter index to avoid invalid reads. [#45617](https://github.com/ClickHouse/ClickHouse/pull/45617) ([Antonio Andelic](https://github.com/antonio2368)). -* A couple of segfaults have been reported around `c-ares`. They were introduced in my previous pull requests. I have fixed them with the help of Alexander Tokmakov. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). -* Set compression method and level for backup Closes [#45690](https://github.com/ClickHouse/ClickHouse/issues/45690). [#45737](https://github.com/ClickHouse/ClickHouse/pull/45737) ([Pradeep Chhetri](https://github.com/chhetripradeep)). -* Should use `select_query_typed.limitByOffset` instead of `select_query_typed.limitOffset`. [#45817](https://github.com/ClickHouse/ClickHouse/pull/45817) ([刘陶峰](https://github.com/taofengliu)). -* When use experimental analyzer, queries like `SELECT number FROM numbers(100) LIMIT 10 OFFSET 10;` get wrong results (empty result for this sql). That is caused by an unnecessary offset step added by planner. [#45822](https://github.com/ClickHouse/ClickHouse/pull/45822) ([刘陶峰](https://github.com/taofengliu)). -* Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Bugfix IPv6 parser for mixed ip4 address with missed first octet (like `::.1.2.3`). [#45871](https://github.com/ClickHouse/ClickHouse/pull/45871) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Add the `query_kind` column to the `system.processes` table and the `SHOW PROCESSLIST` query. Remove duplicate code. It fixes a bug: the global configuration parameter `max_concurrent_select_queries` was not respected to queries with `INTERSECT` or `EXCEPT` chains. [#45872](https://github.com/ClickHouse/ClickHouse/pull/45872) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix crash in a function `stochasticLinearRegression`. Found by WingFuzz. [#45985](https://github.com/ClickHouse/ClickHouse/pull/45985) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix crash in `SELECT` queries with `INTERSECT` and `EXCEPT` modifiers that read data from tables with enabled sparse columns (controlled by setting `ratio_of_defaults_for_sparse_serialization`). [#45987](https://github.com/ClickHouse/ClickHouse/pull/45987) ([Anton Popov](https://github.com/CurtizJ)). -* Fix read in order optimization for DESC sorting with FINAL, close [#45815](https://github.com/ClickHouse/ClickHouse/issues/45815). [#46009](https://github.com/ClickHouse/ClickHouse/pull/46009) ([Vladimir C](https://github.com/vdimir)). -* Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)). -* Fix elapsed column in system.processes (10x error). [#46047](https://github.com/ClickHouse/ClickHouse/pull/46047) ([Azat Khuzhin](https://github.com/azat)). -* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix environment variable substitution in the configuration when a parameter already has a value. This closes [#46131](https://github.com/ClickHouse/ClickHouse/issues/46131). This closes [#9547](https://github.com/ClickHouse/ClickHouse/issues/9547). [#46144](https://github.com/ClickHouse/ClickHouse/pull/46144) ([pufit](https://github.com/pufit)). -* Fix incorrect predicate push down with grouping sets. Closes [#45947](https://github.com/ClickHouse/ClickHouse/issues/45947). [#46151](https://github.com/ClickHouse/ClickHouse/pull/46151) ([flynn](https://github.com/ucasfl)). -* Fix possible pipeline stuck error on `fulls_sorting_join` with constant keys. [#46175](https://github.com/ClickHouse/ClickHouse/pull/46175) ([Vladimir C](https://github.com/vdimir)). -* Never rewrite tuple functions as literals during formatting to avoid incorrect results. [#46232](https://github.com/ClickHouse/ClickHouse/pull/46232) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix possible out of bounds error while reading LowCardinality(Nullable) in Arrow format. [#46270](https://github.com/ClickHouse/ClickHouse/pull/46270) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Fix possible crash which can be caused by an integer overflow while deserializing aggregating state of a function that stores HashTable. [#46349](https://github.com/ClickHouse/ClickHouse/pull/46349) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed a LOGICAL_ERROR on an attempt to execute `ALTER ... MOVE PART ... TO TABLE`. This type of query was never actually supported. [#46359](https://github.com/ClickHouse/ClickHouse/pull/46359) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix s3Cluster schema inference in parallel distributed insert select when `parallel_distributed_insert_select` is enabled. [#46381](https://github.com/ClickHouse/ClickHouse/pull/46381) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix queries like `ALTER TABLE ... UPDATE nested.arr1 = nested.arr2 ...`, where `arr1` and `arr2` are fields of the same `Nested` column. [#46387](https://github.com/ClickHouse/ClickHouse/pull/46387) ([Anton Popov](https://github.com/CurtizJ)). -* Scheduler may fail to schedule a task. If it happens, the whole MulityPartUpload should be aborted and `UploadHelper` must wait for already scheduled tasks. [#46451](https://github.com/ClickHouse/ClickHouse/pull/46451) ([Dmitry Novik](https://github.com/novikd)). -* Fix PREWHERE for Merge with different default types (fixes some `NOT_FOUND_COLUMN_IN_BLOCK` when the default type for the column differs, also allow `PREWHERE` when the type of column is the same across tables, and prohibit it, only if it differs). [#46454](https://github.com/ClickHouse/ClickHouse/pull/46454) ([Azat Khuzhin](https://github.com/azat)). -* Fix a crash that could happen when constant values are used in `ORDER BY`. Fixes [#46466](https://github.com/ClickHouse/ClickHouse/issues/46466). [#46493](https://github.com/ClickHouse/ClickHouse/pull/46493) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Do not throw exception if `disk` setting was specified on query level, but `storage_policy` was specified in config merge tree settings section. `disk` will override setting from config. [#46533](https://github.com/ClickHouse/ClickHouse/pull/46533) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* fixes [#46557](https://github.com/ClickHouse/ClickHouse/issues/46557). [#46611](https://github.com/ClickHouse/ClickHouse/pull/46611) ([Alexander Gololobov](https://github.com/davenger)). -* Fix endless restarts of clickhouse-server systemd unit if server cannot start within 1m30sec (Disable timeout logic for starting clickhouse-server from systemd service). [#46613](https://github.com/ClickHouse/ClickHouse/pull/46613) ([Azat Khuzhin](https://github.com/azat)). -* Allocated during asynchronous inserts memory buffers were deallocated in the global context and MemoryTracker counters for corresponding user and query were not updated correctly. That led to false positive OOM exceptions. [#46622](https://github.com/ClickHouse/ClickHouse/pull/46622) ([Dmitry Novik](https://github.com/novikd)). -* Updated to not clear on_expression from table_join as its used by future analyze runs resolves [#45185](https://github.com/ClickHouse/ClickHouse/issues/45185). [#46487](https://github.com/ClickHouse/ClickHouse/pull/46487) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). - - -### ClickHouse release 23.1, 2023-01-26 - -### ClickHouse release 23.1 - -#### Upgrade Notes -* The `SYSTEM RESTART DISK` query becomes a no-op. [#44647](https://github.com/ClickHouse/ClickHouse/pull/44647) ([alesapin](https://github.com/alesapin)). -* The `PREALLOCATE` option for `HASHED`/`SPARSE_HASHED` dictionaries becomes a no-op. [#45388](https://github.com/ClickHouse/ClickHouse/pull/45388) ([Azat Khuzhin](https://github.com/azat)). It does not give significant advantages anymore. -* Disallow `Gorilla` codec on columns of non-Float32 or non-Float64 type. [#45252](https://github.com/ClickHouse/ClickHouse/pull/45252) ([Robert Schulze](https://github.com/rschu1ze)). It was pointless and led to inconsistencies. -* Parallel quorum inserts might work incorrectly with `*MergeTree` tables created with the deprecated syntax. Therefore, parallel quorum inserts support is completely disabled for such tables. It does not affect tables created with a new syntax. [#45430](https://github.com/ClickHouse/ClickHouse/pull/45430) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Use the `GetObjectAttributes` request instead of the `HeadObject` request to get the size of an object in AWS S3. This change fixes handling endpoints without explicit regions after updating the AWS SDK, for example. [#45288](https://github.com/ClickHouse/ClickHouse/pull/45288) ([Vitaly Baranov](https://github.com/vitlibar)). AWS S3 and Minio are tested, but keep in mind that various S3-compatible services (GCS, R2, B2) may have subtle incompatibilities. This change also may require you to adjust the ACL to allow the `GetObjectAttributes` request. -* Forbid paths in timezone names. For example, a timezone name like `/usr/share/zoneinfo/Asia/Aden` is not allowed; the IANA timezone database name like `Asia/Aden` should be used. [#44225](https://github.com/ClickHouse/ClickHouse/pull/44225) ([Kruglov Pavel](https://github.com/Avogar)). -* Queries combining equijoin and constant expressions (e.g., `JOIN ON t1.x = t2.x AND 1 = 1`) are forbidden due to incorrect results. [#44016](https://github.com/ClickHouse/ClickHouse/pull/44016) ([Vladimir C](https://github.com/vdimir)). - - -#### New Feature -* Dictionary source for extracting keys by traversing regular expressions tree. It can be used for User-Agent parsing. [#40878](https://github.com/ClickHouse/ClickHouse/pull/40878) ([Vage Ogannisian](https://github.com/nooblose)). [#43858](https://github.com/ClickHouse/ClickHouse/pull/43858) ([Han Fei](https://github.com/hanfei1991)). -* Added parametrized view functionality, now it's possible to specify query parameters for the View table engine. resolves [#40907](https://github.com/ClickHouse/ClickHouse/issues/40907). [#41687](https://github.com/ClickHouse/ClickHouse/pull/41687) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Add `quantileInterpolatedWeighted`/`quantilesInterpolatedWeighted` functions. [#38252](https://github.com/ClickHouse/ClickHouse/pull/38252) ([Bharat Nallan](https://github.com/bharatnc)). -* Array join support for the `Map` type, like the function "explode" in Spark. [#43239](https://github.com/ClickHouse/ClickHouse/pull/43239) ([李扬](https://github.com/taiyang-li)). -* Support SQL standard binary and hex string literals. [#43785](https://github.com/ClickHouse/ClickHouse/pull/43785) ([Mo Xuan](https://github.com/mo-avatar)). -* Allow formatting `DateTime` in Joda-Time style. Refer to [the Joda-Time docs](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html). [#43818](https://github.com/ClickHouse/ClickHouse/pull/43818) ([李扬](https://github.com/taiyang-li)). -* Implemented a fractional second formatter (`%f`) for `formatDateTime`. [#44060](https://github.com/ClickHouse/ClickHouse/pull/44060) ([ltrk2](https://github.com/ltrk2)). [#44497](https://github.com/ClickHouse/ClickHouse/pull/44497) ([Alexander Gololobov](https://github.com/davenger)). -* Added `age` function to calculate the difference between two dates or dates with time values expressed as the number of full units. Closes [#41115](https://github.com/ClickHouse/ClickHouse/issues/41115). [#44421](https://github.com/ClickHouse/ClickHouse/pull/44421) ([Robert Schulze](https://github.com/rschu1ze)). -* Add `Null` source for dictionaries. Closes [#44240](https://github.com/ClickHouse/ClickHouse/issues/44240). [#44502](https://github.com/ClickHouse/ClickHouse/pull/44502) ([mayamika](https://github.com/mayamika)). -* Allow configuring the S3 storage class with the `s3_storage_class` configuration option. Such as `STANDARD/INTELLIGENT_TIERING` Closes [#44443](https://github.com/ClickHouse/ClickHouse/issues/44443). [#44707](https://github.com/ClickHouse/ClickHouse/pull/44707) ([chen](https://github.com/xiedeyantu)). -* Insert default values in case of missing elements in JSON object while parsing named tuple. Add setting `input_format_json_defaults_for_missing_elements_in_named_tuple` that controls this behaviour. Closes [#45142](https://github.com/ClickHouse/ClickHouse/issues/45142)#issuecomment-1380153217. [#45231](https://github.com/ClickHouse/ClickHouse/pull/45231) ([Kruglov Pavel](https://github.com/Avogar)). -* Record server startup time in ProfileEvents (`ServerStartupMilliseconds`). Resolves [#43188](https://github.com/ClickHouse/ClickHouse/issues/43188). [#45250](https://github.com/ClickHouse/ClickHouse/pull/45250) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Refactor and Improve streaming engines Kafka/RabbitMQ/NATS and add support for all formats, also refactor formats a bit: - Fix producing messages in row-based formats with suffixes/prefixes. Now every message is formatted completely with all delimiters and can be parsed back using input format. - Support block-based formats like Native, Parquet, ORC, etc. Every block is formatted as a separate message. The number of rows in one message depends on the block size, so you can control it via the setting `max_block_size`. - Add new engine settings `kafka_max_rows_per_message/rabbitmq_max_rows_per_message/nats_max_rows_per_message`. They control the number of rows formatted in one message in row-based formats. Default value: 1. - Fix high memory consumption in the NATS table engine. - Support arbitrary binary data in NATS producer (previously it worked only with strings contained \0 at the end) - Add missing Kafka/RabbitMQ/NATS engine settings in the documentation. - Refactor producing and consuming in Kafka/RabbitMQ/NATS, separate it from WriteBuffers/ReadBuffers semantic. - Refactor output formats: remove callbacks on each row used in Kafka/RabbitMQ/NATS (now we don't use callbacks there), allow to use IRowOutputFormat directly, clarify row end and row between delimiters, make it possible to reset output format to start formatting again - Add proper implementation in formatRow function (bonus after formats refactoring). [#42777](https://github.com/ClickHouse/ClickHouse/pull/42777) ([Kruglov Pavel](https://github.com/Avogar)). -* Support reading/writing `Nested` tables as `List` of `Struct` in `CapnProto` format. Read/write `Decimal32/64` as `Int32/64`. Closes [#43319](https://github.com/ClickHouse/ClickHouse/issues/43319). [#43379](https://github.com/ClickHouse/ClickHouse/pull/43379) ([Kruglov Pavel](https://github.com/Avogar)). -* Added a `message_format_string` column to `system.text_log`. The column contains a pattern that was used to format the message. [#44543](https://github.com/ClickHouse/ClickHouse/pull/44543) ([Alexander Tokmakov](https://github.com/tavplubix)). This allows various analytics over the ClickHouse logs. -* Try to autodetect headers with column names (and maybe types) for CSV/TSV/CustomSeparated input formats. -Add settings input_format_tsv/csv/custom_detect_header that enable this behaviour (enabled by default). Closes [#44640](https://github.com/ClickHouse/ClickHouse/issues/44640). [#44953](https://github.com/ClickHouse/ClickHouse/pull/44953) ([Kruglov Pavel](https://github.com/Avogar)). - -#### Experimental Feature -* Add an experimental inverted index as a new secondary index type for efficient text search. [#38667](https://github.com/ClickHouse/ClickHouse/pull/38667) ([larryluogit](https://github.com/larryluogit)). -* Add experimental query result cache. [#43797](https://github.com/ClickHouse/ClickHouse/pull/43797) ([Robert Schulze](https://github.com/rschu1ze)). -* Added extendable and configurable scheduling subsystem for IO requests (not yet integrated with IO code itself). [#41840](https://github.com/ClickHouse/ClickHouse/pull/41840) ([Sergei Trifonov](https://github.com/serxa)). This feature does nothing at all, enjoy. -* Added `SYSTEM DROP DATABASE REPLICA` that removes metadata of a dead replica of a `Replicated` database. Resolves [#41794](https://github.com/ClickHouse/ClickHouse/issues/41794). [#42807](https://github.com/ClickHouse/ClickHouse/pull/42807) ([Alexander Tokmakov](https://github.com/tavplubix)). - -#### Performance Improvement -* Do not load inactive parts at startup of `MergeTree` tables. [#42181](https://github.com/ClickHouse/ClickHouse/pull/42181) ([Anton Popov](https://github.com/CurtizJ)). -* Improved latency of reading from storage `S3` and table function `s3` with large numbers of small files. Now settings `remote_filesystem_read_method` and `remote_filesystem_read_prefetch` take effect while reading from storage `S3`. [#43726](https://github.com/ClickHouse/ClickHouse/pull/43726) ([Anton Popov](https://github.com/CurtizJ)). -* Optimization for reading struct fields in Parquet/ORC files. Only the required fields are loaded. [#44484](https://github.com/ClickHouse/ClickHouse/pull/44484) ([lgbo](https://github.com/lgbo-ustc)). -* Two-level aggregation algorithm was mistakenly disabled for queries over the HTTP interface. It was enabled back, and it leads to a major performance improvement. [#45450](https://github.com/ClickHouse/ClickHouse/pull/45450) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Added mmap support for StorageFile, which should improve the performance of clickhouse-local. [#43927](https://github.com/ClickHouse/ClickHouse/pull/43927) ([pufit](https://github.com/pufit)). -* Added sharding support in HashedDictionary to allow parallel load (almost linear scaling based on number of shards). [#40003](https://github.com/ClickHouse/ClickHouse/pull/40003) ([Azat Khuzhin](https://github.com/azat)). -* Speed up query parsing. [#42284](https://github.com/ClickHouse/ClickHouse/pull/42284) ([Raúl Marín](https://github.com/Algunenano)). -* Always replace OR chain `expr = x1 OR ... OR expr = xN` to `expr IN (x1, ..., xN)` in the case where `expr` is a `LowCardinality` column. Setting `optimize_min_equality_disjunction_chain_length` is ignored in this case. [#42889](https://github.com/ClickHouse/ClickHouse/pull/42889) ([Guo Wangyang](https://github.com/guowangy)). -* Slightly improve performance by optimizing the code around ThreadStatus. [#43586](https://github.com/ClickHouse/ClickHouse/pull/43586) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Optimize the column-wise ternary logic evaluation by achieving auto-vectorization. In the performance test of this [microbenchmark](https://github.com/ZhiguoZh/ClickHouse/blob/20221123-ternary-logic-opt-example/src/Functions/examples/associative_applier_perf.cpp), we've observed a peak **performance gain** of **21x** on the ICX device (Intel Xeon Platinum 8380 CPU). [#43669](https://github.com/ClickHouse/ClickHouse/pull/43669) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Avoid acquiring read locks in the `system.tables` table if possible. [#43840](https://github.com/ClickHouse/ClickHouse/pull/43840) ([Raúl Marín](https://github.com/Algunenano)). -* Optimize ThreadPool. The performance experiments of SSB (Star Schema Benchmark) on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) shows that this change could effectively decrease the lock contention for ThreadPoolImpl::mutex by **75%**, increasing the CPU utilization and improving the overall performance by **2.4%**. [#44308](https://github.com/ClickHouse/ClickHouse/pull/44308) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Now the optimisation for predicting the hash table size is applied only if the cached hash table size is sufficiently large (thresholds were determined empirically and hardcoded). [#44455](https://github.com/ClickHouse/ClickHouse/pull/44455) ([Nikita Taranov](https://github.com/nickitat)). -* Small performance improvement for asynchronous reading from remote filesystems. [#44868](https://github.com/ClickHouse/ClickHouse/pull/44868) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add fast path for: - `col like '%%'`; - `col like '%'`; - `col not like '%'`; - `col not like '%'`; - `match(col, '.*')`. [#45244](https://github.com/ClickHouse/ClickHouse/pull/45244) ([李扬](https://github.com/taiyang-li)). -* Slightly improve happy path optimisation in filtering (WHERE clause). [#45289](https://github.com/ClickHouse/ClickHouse/pull/45289) ([Nikita Taranov](https://github.com/nickitat)). -* Provide monotonicity info for `toUnixTimestamp64*` to enable more algebraic optimizations for index analysis. [#44116](https://github.com/ClickHouse/ClickHouse/pull/44116) ([Nikita Taranov](https://github.com/nickitat)). -* Allow the configuration of temporary data for query processing (spilling to disk) to cooperate with the filesystem cache (taking up the space from the cache disk) [#43972](https://github.com/ClickHouse/ClickHouse/pull/43972) ([Vladimir C](https://github.com/vdimir)). This mainly improves [ClickHouse Cloud](https://clickhouse.cloud/), but can be used for self-managed setups as well, if you know what to do. -* Make `system.replicas` table do parallel fetches of replicas statuses. Closes [#43918](https://github.com/ClickHouse/ClickHouse/issues/43918). [#43998](https://github.com/ClickHouse/ClickHouse/pull/43998) ([Nikolay Degterinsky](https://github.com/evillique)). -* Optimize memory consumption during backup to S3: files to S3 now will be copied directly without using `WriteBufferFromS3` (which could use a lot of memory). [#45188](https://github.com/ClickHouse/ClickHouse/pull/45188) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add a cache for async block ids. This will reduce the number of requests of ZooKeeper when we enable async inserts deduplication. [#45106](https://github.com/ClickHouse/ClickHouse/pull/45106) ([Han Fei](https://github.com/hanfei1991)). - -#### Improvement - -* Use structure from insertion table in generateRandom without arguments. [#45239](https://github.com/ClickHouse/ClickHouse/pull/45239) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow to implicitly convert floats stored in string fields of JSON to integers in `JSONExtract` functions. E.g. `JSONExtract('{"a": "1000.111"}', 'a', 'UInt64')` -> `1000`, previously it returned 0. [#45432](https://github.com/ClickHouse/ClickHouse/pull/45432) ([Anton Popov](https://github.com/CurtizJ)). -* Added fields `supports_parallel_parsing` and `supports_parallel_formatting` to table `system.formats` for better introspection. [#45499](https://github.com/ClickHouse/ClickHouse/pull/45499) ([Anton Popov](https://github.com/CurtizJ)). -* Improve reading CSV field in CustomSeparated/Template format. Closes [#42352](https://github.com/ClickHouse/ClickHouse/issues/42352) Closes [#39620](https://github.com/ClickHouse/ClickHouse/issues/39620). [#43332](https://github.com/ClickHouse/ClickHouse/pull/43332) ([Kruglov Pavel](https://github.com/Avogar)). -* Unify query elapsed time measurements. [#43455](https://github.com/ClickHouse/ClickHouse/pull/43455) ([Raúl Marín](https://github.com/Algunenano)). -* Improve automatic usage of structure from insertion table in table functions file/hdfs/s3 when virtual columns are present in a select query, it fixes the possible error `Block structure mismatch` or `number of columns mismatch`. [#43695](https://github.com/ClickHouse/ClickHouse/pull/43695) ([Kruglov Pavel](https://github.com/Avogar)). -* Add support for signed arguments in the function `range`. Fixes [#43333](https://github.com/ClickHouse/ClickHouse/issues/43333). [#43733](https://github.com/ClickHouse/ClickHouse/pull/43733) ([sanyu](https://github.com/wineternity)). -* Remove redundant sorting, for example, sorting related ORDER BY clauses in subqueries. Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding `ORDER BY` clauses, but more generic, since it's applied to any redundant sorting steps (not only caused by ORDER BY clause) and applied to subqueries of any depth. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#43905](https://github.com/ClickHouse/ClickHouse/pull/43905) ([Igor Nikonov](https://github.com/devcrafter)). -* Add the ability to disable deduplication of files for BACKUP (for backups without deduplication ATTACH can be used instead of full RESTORE). For example `BACKUP foo TO S3(...) SETTINGS deduplicate_files=0` (default `deduplicate_files=1`). [#43947](https://github.com/ClickHouse/ClickHouse/pull/43947) ([Azat Khuzhin](https://github.com/azat)). -* Refactor and improve schema inference for text formats. Add new setting `schema_inference_make_columns_nullable` that controls making result types `Nullable` (enabled by default);. [#44019](https://github.com/ClickHouse/ClickHouse/pull/44019) ([Kruglov Pavel](https://github.com/Avogar)). -* Better support for `PROXYv1` protocol. [#44135](https://github.com/ClickHouse/ClickHouse/pull/44135) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Add information about the latest part check by cleanup threads into `system.parts` table. [#44244](https://github.com/ClickHouse/ClickHouse/pull/44244) ([Dmitry Novik](https://github.com/novikd)). -* Disable table functions in readonly mode for inserts. [#44290](https://github.com/ClickHouse/ClickHouse/pull/44290) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Add a setting `simultaneous_parts_removal_limit` to allow limiting the number of parts being processed by one iteration of CleanupThread. [#44461](https://github.com/ClickHouse/ClickHouse/pull/44461) ([Dmitry Novik](https://github.com/novikd)). -* Do not initialize ReadBufferFromS3 when only virtual columns are needed in a query. This may be helpful to [#44246](https://github.com/ClickHouse/ClickHouse/issues/44246). [#44493](https://github.com/ClickHouse/ClickHouse/pull/44493) ([chen](https://github.com/xiedeyantu)). -* Prevent duplicate column names hints. Closes [#44130](https://github.com/ClickHouse/ClickHouse/issues/44130). [#44519](https://github.com/ClickHouse/ClickHouse/pull/44519) ([Joanna Hulboj](https://github.com/jh0x)). -* Allow macro substitution in endpoint of disks. Resolve [#40951](https://github.com/ClickHouse/ClickHouse/issues/40951). [#44533](https://github.com/ClickHouse/ClickHouse/pull/44533) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Improve schema inference when `input_format_json_read_object_as_string` is enabled. [#44546](https://github.com/ClickHouse/ClickHouse/pull/44546) ([Kruglov Pavel](https://github.com/Avogar)). -* Add a user-level setting `database_replicated_allow_replicated_engine_arguments` which allows banning the creation of `ReplicatedMergeTree` tables with arguments in `DatabaseReplicated`. [#44566](https://github.com/ClickHouse/ClickHouse/pull/44566) ([alesapin](https://github.com/alesapin)). -* Prevent users from mistakenly specifying zero (invalid) value for `index_granularity`. This closes [#44536](https://github.com/ClickHouse/ClickHouse/issues/44536). [#44578](https://github.com/ClickHouse/ClickHouse/pull/44578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added possibility to set path to service keytab file in `keytab` parameter in `kerberos` section of config.xml. [#44594](https://github.com/ClickHouse/ClickHouse/pull/44594) ([Roman Vasin](https://github.com/rvasin)). -* Use already written part of the query for fuzzy search (pass to the `skim` library, which is written in Rust and linked statically to ClickHouse). [#44600](https://github.com/ClickHouse/ClickHouse/pull/44600) ([Azat Khuzhin](https://github.com/azat)). -* Enable `input_format_json_read_objects_as_strings` by default to be able to read nested JSON objects while JSON Object type is experimental. [#44657](https://github.com/ClickHouse/ClickHouse/pull/44657) ([Kruglov Pavel](https://github.com/Avogar)). -* Improvement for deduplication of async inserts: when users do duplicate async inserts, we should deduplicate inside the memory before we query Keeper. [#44682](https://github.com/ClickHouse/ClickHouse/pull/44682) ([Han Fei](https://github.com/hanfei1991)). -* Input/output `Avro` format will parse bool type as ClickHouse bool type. [#44684](https://github.com/ClickHouse/ClickHouse/pull/44684) ([Kruglov Pavel](https://github.com/Avogar)). -* Support Bool type in Arrow/Parquet/ORC. Closes [#43970](https://github.com/ClickHouse/ClickHouse/issues/43970). [#44698](https://github.com/ClickHouse/ClickHouse/pull/44698) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't greedily parse beyond the quotes when reading UUIDs - it may lead to mistakenly successful parsing of incorrect data. [#44686](https://github.com/ClickHouse/ClickHouse/pull/44686) ([Raúl Marín](https://github.com/Algunenano)). -* Infer UInt64 in case of Int64 overflow and fix some transforms in schema inference. [#44696](https://github.com/ClickHouse/ClickHouse/pull/44696) ([Kruglov Pavel](https://github.com/Avogar)). -* Previously dependency resolving inside `Replicated` database was done in a hacky way, and now it's done right using an explicit graph. [#44697](https://github.com/ClickHouse/ClickHouse/pull/44697) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix `output_format_pretty_row_numbers` does not preserve the counter across the blocks. Closes [#44815](https://github.com/ClickHouse/ClickHouse/issues/44815). [#44832](https://github.com/ClickHouse/ClickHouse/pull/44832) ([flynn](https://github.com/ucasfl)). -* Don't report errors in `system.errors` due to parts being merged concurrently with the background cleanup process. [#44874](https://github.com/ClickHouse/ClickHouse/pull/44874) ([Raúl Marín](https://github.com/Algunenano)). -* Optimize and fix metrics for Distributed async INSERT. [#44922](https://github.com/ClickHouse/ClickHouse/pull/44922) ([Azat Khuzhin](https://github.com/azat)). -* Added settings to disallow concurrent backups and restores resolves [#43891](https://github.com/ClickHouse/ClickHouse/issues/43891) Implementation: * Added server-level settings to disallow concurrent backups and restores, which are read and set when BackupWorker is created in Context. * Settings are set to true by default. * Before starting backup or restores, added a check to see if any other backups/restores are running. For internal requests, it checks if it is from the self node using backup_uuid. [#45072](https://github.com/ClickHouse/ClickHouse/pull/45072) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Add `` config parameter for system logs. [#45320](https://github.com/ClickHouse/ClickHouse/pull/45320) ([Stig Bakken](https://github.com/stigsb)). - -#### Build/Testing/Packaging Improvement -* Statically link with the `skim` library (it is written in Rust) for fuzzy search in clickhouse client/local history. [#44239](https://github.com/ClickHouse/ClickHouse/pull/44239) ([Azat Khuzhin](https://github.com/azat)). -* We removed support for shared linking because of Rust. Actually, Rust is only an excuse for this removal, and we wanted to remove it nevertheless. [#44828](https://github.com/ClickHouse/ClickHouse/pull/44828) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The `SQLite` library is updated to the latest. It is used for the SQLite database and table integration engines. Also, fixed a false-positive TSan report. This closes [#45027](https://github.com/ClickHouse/ClickHouse/issues/45027). [#45031](https://github.com/ClickHouse/ClickHouse/pull/45031) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* CRC-32 changes to address the WeakHash collision issue in PowerPC. [#45144](https://github.com/ClickHouse/ClickHouse/pull/45144) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). -* Update aws-c* submodules [#43020](https://github.com/ClickHouse/ClickHouse/pull/43020) ([Vitaly Baranov](https://github.com/vitlibar)). -* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Introduce a [website](https://aretestsgreenyet.com/) for the status of ClickHouse CI. [Source](https://github.com/ClickHouse/aretestsgreenyet). - -#### Bug Fix - -* Replace domain IP types (IPv4, IPv6) with native. [#43221](https://github.com/ClickHouse/ClickHouse/pull/43221) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). It automatically fixes some missing implementations in the code. -* Fix the backup process if mutations get killed during the backup process. [#45351](https://github.com/ClickHouse/ClickHouse/pull/45351) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix the `Invalid number of rows in Chunk` exception message. [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). -* Fix possible use of an uninitialized value after executing expressions after sorting. Closes [#43386](https://github.com/ClickHouse/ClickHouse/issues/43386) [#43635](https://github.com/ClickHouse/ClickHouse/pull/43635) ([Kruglov Pavel](https://github.com/Avogar)). -* Better handling of NULL in aggregate combinators, fix possible segfault/logical error while using an obscure optimization `optimize_rewrite_sum_if_to_count_if`. Closes [#43758](https://github.com/ClickHouse/ClickHouse/issues/43758). [#43813](https://github.com/ClickHouse/ClickHouse/pull/43813) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix CREATE USER/ROLE query settings constraints. [#43993](https://github.com/ClickHouse/ClickHouse/pull/43993) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fixed bug with non-parsable default value for `EPHEMERAL` column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix parsing of bad version from compatibility setting. [#44224](https://github.com/ClickHouse/ClickHouse/pull/44224) ([Kruglov Pavel](https://github.com/Avogar)). -* Bring interval subtraction from datetime in line with addition. [#44241](https://github.com/ClickHouse/ClickHouse/pull/44241) ([ltrk2](https://github.com/ltrk2)). -* Remove limits on the maximum size of the result for view. [#44261](https://github.com/ClickHouse/ClickHouse/pull/44261) ([lizhuoyu5](https://github.com/lzydmxy)). -* Fix possible logical error in cache if `do_not_evict_index_and_mrk_files=1`. Closes [#42142](https://github.com/ClickHouse/ClickHouse/issues/42142). [#44268](https://github.com/ClickHouse/ClickHouse/pull/44268) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible too early cache write interruption in write-through cache (caching could be stopped due to false assumption when it shouldn't have). [#44289](https://github.com/ClickHouse/ClickHouse/pull/44289) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible crash in the case function `IN` with constant arguments was used as a constant argument together with `LowCardinality`. Fixes [#44221](https://github.com/ClickHouse/ClickHouse/issues/44221). [#44346](https://github.com/ClickHouse/ClickHouse/pull/44346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix support for complex parameters (like arrays) of parametric aggregate functions. This closes [#30975](https://github.com/ClickHouse/ClickHouse/issues/30975). The aggregate function `sumMapFiltered` was unusable in distributed queries before this change. [#44358](https://github.com/ClickHouse/ClickHouse/pull/44358) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix reading ObjectId in BSON schema inference. [#44382](https://github.com/ClickHouse/ClickHouse/pull/44382) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix race which can lead to premature temp parts removal before merge finishes in ReplicatedMergeTree. This issue could lead to errors like `No such file or directory: xxx`. Fixes [#43983](https://github.com/ClickHouse/ClickHouse/issues/43983). [#44383](https://github.com/ClickHouse/ClickHouse/pull/44383) ([alesapin](https://github.com/alesapin)). -* Some invalid `SYSTEM ... ON CLUSTER` queries worked in an unexpected way if a cluster name was not specified. It's fixed, now invalid queries throw `SYNTAX_ERROR` as they should. Fixes [#44264](https://github.com/ClickHouse/ClickHouse/issues/44264). [#44387](https://github.com/ClickHouse/ClickHouse/pull/44387) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix reading Map type in ORC format. [#44400](https://github.com/ClickHouse/ClickHouse/pull/44400) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix reading columns that are not presented in input data in Parquet/ORC formats. Previously it could lead to error `INCORRECT_NUMBER_OF_COLUMNS`. Closes [#44333](https://github.com/ClickHouse/ClickHouse/issues/44333). [#44405](https://github.com/ClickHouse/ClickHouse/pull/44405) ([Kruglov Pavel](https://github.com/Avogar)). -* Previously the `bar` function used the same '▋' (U+258B "Left five eighths block") character to display both 5/8 and 6/8 bars. This change corrects this behavior by using '▊' (U+258A "Left three quarters block") for displaying 6/8 bar. [#44410](https://github.com/ClickHouse/ClickHouse/pull/44410) ([Alexander Gololobov](https://github.com/davenger)). -* Placing profile settings after profile settings constraints in the configuration file made constraints ineffective. [#44411](https://github.com/ClickHouse/ClickHouse/pull/44411) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* Fix `SYNTAX_ERROR` while running `EXPLAIN AST INSERT` queries with data. Closes [#44207](https://github.com/ClickHouse/ClickHouse/issues/44207). [#44413](https://github.com/ClickHouse/ClickHouse/pull/44413) ([save-my-heart](https://github.com/save-my-heart)). -* Fix reading bool value with CRLF in CSV format. Closes [#44401](https://github.com/ClickHouse/ClickHouse/issues/44401). [#44442](https://github.com/ClickHouse/ClickHouse/pull/44442) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't execute and/or/if/multiIf on a LowCardinality dictionary, so the result type cannot be LowCardinality. It could lead to the error `Illegal column ColumnLowCardinality` in some cases. Fixes [#43603](https://github.com/ClickHouse/ClickHouse/issues/43603). [#44469](https://github.com/ClickHouse/ClickHouse/pull/44469) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix mutations with the setting `max_streams_for_merge_tree_reading`. [#44472](https://github.com/ClickHouse/ClickHouse/pull/44472) ([Anton Popov](https://github.com/CurtizJ)). -* Fix potential null pointer dereference with GROUPING SETS in ASTSelectQuery::formatImpl ([#43049](https://github.com/ClickHouse/ClickHouse/issues/43049)). [#44479](https://github.com/ClickHouse/ClickHouse/pull/44479) ([Robert Schulze](https://github.com/rschu1ze)). -* Validate types in table function arguments, CAST function arguments, JSONAsObject schema inference according to settings. [#44501](https://github.com/ClickHouse/ClickHouse/pull/44501) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix IN function with LowCardinality and const column, close [#44503](https://github.com/ClickHouse/ClickHouse/issues/44503). [#44506](https://github.com/ClickHouse/ClickHouse/pull/44506) ([Duc Canh Le](https://github.com/canhld94)). -* Fixed a bug in the normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of the function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Projections do not work in presence of WITH ROLLUP, WITH CUBE and WITH TOTALS. In previous versions, a query produced an exception instead of skipping the usage of projections. This closes [#44614](https://github.com/ClickHouse/ClickHouse/issues/44614). This closes [#42772](https://github.com/ClickHouse/ClickHouse/issues/42772). [#44615](https://github.com/ClickHouse/ClickHouse/pull/44615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Async blocks were not cleaned because the function `get all blocks sorted by time` didn't get async blocks. [#44651](https://github.com/ClickHouse/ClickHouse/pull/44651) ([Han Fei](https://github.com/hanfei1991)). -* Fix `LOGICAL_ERROR` `The top step of the right pipeline should be ExpressionStep` for JOIN with subquery, UNION, and TOTALS. Fixes [#43687](https://github.com/ClickHouse/ClickHouse/issues/43687). [#44673](https://github.com/ClickHouse/ClickHouse/pull/44673) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Avoid `std::out_of_range` exception in the Executable table engine. [#44681](https://github.com/ClickHouse/ClickHouse/pull/44681) ([Kruglov Pavel](https://github.com/Avogar)). -* Do not apply `optimize_syntax_fuse_functions` to quantiles on AST, close [#44712](https://github.com/ClickHouse/ClickHouse/issues/44712). [#44713](https://github.com/ClickHouse/ClickHouse/pull/44713) ([Vladimir C](https://github.com/vdimir)). -* Fix bug with wrong type in Merge table and PREWHERE, close [#43324](https://github.com/ClickHouse/ClickHouse/issues/43324). [#44716](https://github.com/ClickHouse/ClickHouse/pull/44716) ([Vladimir C](https://github.com/vdimir)). -* Fix a possible crash during shutdown (while destroying TraceCollector). Fixes [#44757](https://github.com/ClickHouse/ClickHouse/issues/44757). [#44758](https://github.com/ClickHouse/ClickHouse/pull/44758) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix a possible crash in distributed query processing. The crash could happen if a query with totals or extremes returned an empty result and there are mismatched types in the Distributed and the local tables. Fixes [#44738](https://github.com/ClickHouse/ClickHouse/issues/44738). [#44760](https://github.com/ClickHouse/ClickHouse/pull/44760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix fsync for fetches (`min_compressed_bytes_to_fsync_after_fetch`)/small files (ttl.txt, columns.txt) in mutations (`min_rows_to_fsync_after_merge`/`min_compressed_bytes_to_fsync_after_merge`). [#44781](https://github.com/ClickHouse/ClickHouse/pull/44781) ([Azat Khuzhin](https://github.com/azat)). -* A rare race condition was possible when querying the `system.parts` or `system.parts_columns` tables in the presence of parts being moved between disks. Introduced in [#41145](https://github.com/ClickHouse/ClickHouse/issues/41145). [#44809](https://github.com/ClickHouse/ClickHouse/pull/44809) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix the error `Context has expired` which could appear with enabled projections optimization. Can be reproduced for queries with specific functions, like `dictHas/dictGet` which use context in runtime. Fixes [#44844](https://github.com/ClickHouse/ClickHouse/issues/44844). [#44850](https://github.com/ClickHouse/ClickHouse/pull/44850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* A fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Ignore cases when hardware monitor sensors cannot be read instead of showing a full exception message in logs. [#44895](https://github.com/ClickHouse/ClickHouse/pull/44895) ([Raúl Marín](https://github.com/Algunenano)). -* Use `max_delay_to_insert` value in case the calculated time to delay INSERT exceeds the setting value. Related to [#44902](https://github.com/ClickHouse/ClickHouse/issues/44902). [#44916](https://github.com/ClickHouse/ClickHouse/pull/44916) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix error `Different order of columns in UNION subquery` for queries with `UNION`. Fixes [#44866](https://github.com/ClickHouse/ClickHouse/issues/44866). [#44920](https://github.com/ClickHouse/ClickHouse/pull/44920) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Delay for INSERT can be calculated incorrectly, which can lead to always using `max_delay_to_insert` setting as delay instead of a correct value. Using simple formula `max_delay_to_insert * (parts_over_threshold/max_allowed_parts_over_threshold)` i.e. delay grows proportionally to parts over threshold. Closes [#44902](https://github.com/ClickHouse/ClickHouse/issues/44902). [#44954](https://github.com/ClickHouse/ClickHouse/pull/44954) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix alter table TTL error when a wide part has the lightweight delete mask. [#44959](https://github.com/ClickHouse/ClickHouse/pull/44959) ([Mingliang Pan](https://github.com/liangliangpan)). -* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native [#43221](https://github.com/ClickHouse/ClickHouse/issues/43221). [#45024](https://github.com/ClickHouse/ClickHouse/pull/45024) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#45043](https://github.com/ClickHouse/ClickHouse/pull/45043) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* A buffer overflow was possible in the parser. Found by fuzzer. [#45047](https://github.com/ClickHouse/ClickHouse/pull/45047) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix possible cannot-read-all-data error in storage FileLog. Closes [#45051](https://github.com/ClickHouse/ClickHouse/issues/45051), [#38257](https://github.com/ClickHouse/ClickHouse/issues/38257). [#45057](https://github.com/ClickHouse/ClickHouse/pull/45057) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Memory efficient aggregation (setting `distributed_aggregation_memory_efficient`) is disabled when grouping sets are present in the query. [#45058](https://github.com/ClickHouse/ClickHouse/pull/45058) ([Nikita Taranov](https://github.com/nickitat)). -* Fix `RANGE_HASHED` dictionary to count range columns as part of the primary key during updates when `update_field` is specified. Closes [#44588](https://github.com/ClickHouse/ClickHouse/issues/44588). [#45061](https://github.com/ClickHouse/ClickHouse/pull/45061) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix error `Cannot capture column` for `LowCardinality` captured argument of nested lambda. Fixes [#45028](https://github.com/ClickHouse/ClickHouse/issues/45028). [#45065](https://github.com/ClickHouse/ClickHouse/pull/45065) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix the wrong query result of `additional_table_filters` (additional filter was not applied) in case the minmax/count projection is used. [#45133](https://github.com/ClickHouse/ClickHouse/pull/45133) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed bug in `histogram` function accepting negative values. [#45147](https://github.com/ClickHouse/ClickHouse/pull/45147) ([simpleton](https://github.com/rgzntrade)). -* Fix wrong column nullability in StoreageJoin, close [#44940](https://github.com/ClickHouse/ClickHouse/issues/44940). [#45184](https://github.com/ClickHouse/ClickHouse/pull/45184) ([Vladimir C](https://github.com/vdimir)). -* Fix `background_fetches_pool_size` settings reload (increase at runtime). [#45189](https://github.com/ClickHouse/ClickHouse/pull/45189) ([Raúl Marín](https://github.com/Algunenano)). -* Correctly process `SELECT` queries on KV engines (e.g. KeeperMap, EmbeddedRocksDB) using `IN` on the key with subquery producing different type. [#45215](https://github.com/ClickHouse/ClickHouse/pull/45215) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix logical error in SEMI JOIN & join_use_nulls in some cases, close [#45163](https://github.com/ClickHouse/ClickHouse/issues/45163), close [#45209](https://github.com/ClickHouse/ClickHouse/issues/45209). [#45230](https://github.com/ClickHouse/ClickHouse/pull/45230) ([Vladimir C](https://github.com/vdimir)). -* Fix heap-use-after-free in reading from s3. [#45253](https://github.com/ClickHouse/ClickHouse/pull/45253) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix bug when the Avro Union type is ['null', Nested type], closes [#45275](https://github.com/ClickHouse/ClickHouse/issues/45275). Fix bug that incorrectly infers `bytes` type to `Float`. [#45276](https://github.com/ClickHouse/ClickHouse/pull/45276) ([flynn](https://github.com/ucasfl)). -* Throw a correct exception when explicit PREWHERE cannot be used with a table using the storage engine `Merge`. [#45319](https://github.com/ClickHouse/ClickHouse/pull/45319) ([Antonio Andelic](https://github.com/antonio2368)). -* Under WSL1 Ubuntu self-extracting ClickHouse fails to decompress due to inconsistency - /proc/self/maps reporting 32bit file's inode, while stat reporting 64bit inode. [#45339](https://github.com/ClickHouse/ClickHouse/pull/45339) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix race in Distributed table startup (that could lead to processing file of async INSERT multiple times). [#45360](https://github.com/ClickHouse/ClickHouse/pull/45360) ([Azat Khuzhin](https://github.com/azat)). -* Fix a possible crash while reading from storage `S3` and table function `s3` in the case when `ListObject` request has failed. [#45371](https://github.com/ClickHouse/ClickHouse/pull/45371) ([Anton Popov](https://github.com/CurtizJ)). -* Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in XML config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Fix s3Cluster schema inference when structure from insertion table is used in `INSERT INTO ... SELECT * FROM s3Cluster` queries. [#45422](https://github.com/ClickHouse/ClickHouse/pull/45422) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix bug in JSON/BSONEachRow parsing with HTTP that could lead to using default values for some columns instead of values from data. [#45424](https://github.com/ClickHouse/ClickHouse/pull/45424) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed bug (Code: 632. DB::Exception: Unexpected data ... after parsed IPv6 value ...) with typed parsing of IP types from text source. [#45425](https://github.com/ClickHouse/ClickHouse/pull/45425) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* close [#45297](https://github.com/ClickHouse/ClickHouse/issues/45297) Add check for empty regular expressions. [#45428](https://github.com/ClickHouse/ClickHouse/pull/45428) ([Han Fei](https://github.com/hanfei1991)). -* Fix possible (likely distributed) query hung. [#45448](https://github.com/ClickHouse/ClickHouse/pull/45448) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible deadlock with `allow_asynchronous_read_from_io_pool_for_merge_tree` enabled in case of exception from `ThreadPool::schedule`. [#45481](https://github.com/ClickHouse/ClickHouse/pull/45481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix possible in-use table after DETACH. [#45493](https://github.com/ClickHouse/ClickHouse/pull/45493) ([Azat Khuzhin](https://github.com/azat)). -* Fix rare abort in the case when a query is canceled and parallel parsing was used during its execution. [#45498](https://github.com/ClickHouse/ClickHouse/pull/45498) ([Anton Popov](https://github.com/CurtizJ)). -* Fix a race between Distributed table creation and INSERT into it (could lead to CANNOT_LINK during INSERT into the table). [#45502](https://github.com/ClickHouse/ClickHouse/pull/45502) ([Azat Khuzhin](https://github.com/azat)). -* Add proper default (SLRU) to cache policy getter. Closes [#45514](https://github.com/ClickHouse/ClickHouse/issues/45514). [#45524](https://github.com/ClickHouse/ClickHouse/pull/45524) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Disallow array join in mutations closes [#42637](https://github.com/ClickHouse/ClickHouse/issues/42637) [#44447](https://github.com/ClickHouse/ClickHouse/pull/44447) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix for qualified asterisks with alias table name and column transformer. Resolves [#44736](https://github.com/ClickHouse/ClickHouse/issues/44736). [#44755](https://github.com/ClickHouse/ClickHouse/pull/44755) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). - -## [Changelog for 2022](https://clickhouse.com/docs/en/whats-new/changelog/2022) +* Add join keys conversion for nested LowCardinality [#51550](https://github.com/ClickHouse/ClickHouse/pull/51550) ([vdimir](https://github.com/vdimir)). +* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a bug with projections and the `aggregate_functions_null_for_empty` setting during insertion. [#56944](https://github.com/ClickHouse/ClickHouse/pull/56944) ([Amos Bird](https://github.com/amosbird)). +* Fixed potential exception due to stale profile UUID [#57263](https://github.com/ClickHouse/ClickHouse/pull/57263) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)). +* Ignore MVs with dropped target table during pushing to views [#57520](https://github.com/ClickHouse/ClickHouse/pull/57520) ([Kruglov Pavel](https://github.com/Avogar)). +* Eliminate possible race between ALTER_METADATA and MERGE_PARTS [#57755](https://github.com/ClickHouse/ClickHouse/pull/57755) ([Azat Khuzhin](https://github.com/azat)). +* Fix the expressions order bug in group by with rollup [#57786](https://github.com/ClickHouse/ClickHouse/pull/57786) ([Chen768959](https://github.com/Chen768959)). +* A fix for the obsolete "zero-copy" replication feature: Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow users to work with symlinks in user_files_path [#58447](https://github.com/ClickHouse/ClickHouse/pull/58447) ([Duc Canh Le](https://github.com/canhld94)). +* Fix a crash when graphite table does not have an agg function [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)). +* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)). +* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)). +* Enable ordinary databases while restoration [#58520](https://github.com/ClickHouse/ClickHouse/pull/58520) ([Jihyuk Bok](https://github.com/tomahawk28)). +* Fix Apache Hive threadpool reading for ORC/Parquet/... [#58537](https://github.com/ClickHouse/ClickHouse/pull/58537) ([sunny](https://github.com/sunny19930321)). +* Hide credentials in `system.backup_log`'s `base_backup_name` column [#58550](https://github.com/ClickHouse/ClickHouse/pull/58550) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* `toStartOfInterval` for milli- microsencods values rounding [#58557](https://github.com/ClickHouse/ClickHouse/pull/58557) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Disable `max_joined_block_rows` in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)). +* Fix join using nullable in the old analyzer [#58596](https://github.com/ClickHouse/ClickHouse/pull/58596) ([vdimir](https://github.com/vdimir)). +* `makeDateTime64`: Allow non-const fraction argument [#58597](https://github.com/ClickHouse/ClickHouse/pull/58597) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible NULL dereference during symbolizing inline frames [#58607](https://github.com/ClickHouse/ClickHouse/pull/58607) ([Azat Khuzhin](https://github.com/azat)). +* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix broken partition key analysis when doing projection optimization [#58638](https://github.com/ClickHouse/ClickHouse/pull/58638) ([Amos Bird](https://github.com/amosbird)). +* Query cache: Fix per-user quota [#58731](https://github.com/ClickHouse/ClickHouse/pull/58731) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)). +* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Don't process requests in Keeper during shutdown [#58765](https://github.com/ClickHouse/ClickHouse/pull/58765) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix a null pointer dereference in `SlabsPolygonIndex::find` [#58771](https://github.com/ClickHouse/ClickHouse/pull/58771) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). +* A fix for unexpected accumulation of memory usage while creating a huge number of tables by CREATE and DROP. [#58831](https://github.com/ClickHouse/ClickHouse/pull/58831) ([Maksim Kita](https://github.com/kitaisreal)). +* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Restriction for the access key id for s3. [#58900](https://github.com/ClickHouse/ClickHouse/pull/58900) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible crash in clickhouse-local during loading suggestions [#58907](https://github.com/ClickHouse/ClickHouse/pull/58907) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash when `indexHint` is used [#58911](https://github.com/ClickHouse/ClickHouse/pull/58911) ([Dmitry Novik](https://github.com/novikd)). +* Fix StorageURL forgetting headers on server restart [#58933](https://github.com/ClickHouse/ClickHouse/pull/58933) ([Michael Kolupaev](https://github.com/al13n321)). +* Analyzer: fix storage replacement with insertion block [#58958](https://github.com/ClickHouse/ClickHouse/pull/58958) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix seek in ReadBufferFromZipArchive [#58966](https://github.com/ClickHouse/ClickHouse/pull/58966) ([Michael Kolupaev](https://github.com/al13n321)). +* A fix for experimental inverted indices (don't use in production): `DROP INDEX` of inverted index now removes all relevant files from persistence [#59040](https://github.com/ClickHouse/ClickHouse/pull/59040) ([mochi](https://github.com/MochiXu)). +* Fix data race on query_factories_info [#59049](https://github.com/ClickHouse/ClickHouse/pull/59049) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable "Too many redirects" error retry [#59099](https://github.com/ClickHouse/ClickHouse/pull/59099) ([skyoct](https://github.com/skyoct)). +* Fix not started database shutdown deadlock [#59137](https://github.com/ClickHouse/ClickHouse/pull/59137) ([Sergei Trifonov](https://github.com/serxa)). +* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix crash with nullable timezone for `toString` [#59190](https://github.com/ClickHouse/ClickHouse/pull/59190) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix abort in iceberg metadata on bad file paths [#59275](https://github.com/ClickHouse/ClickHouse/pull/59275) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix architecture name in select of Rust target [#59307](https://github.com/ClickHouse/ClickHouse/pull/59307) ([p1rattttt](https://github.com/p1rattttt)). +* Fix a logical error about "not-ready set" for querying from `system.tables` with a subquery in the IN clause. [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +## [Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023) diff --git a/SECURITY.md b/SECURITY.md index a200e172a3b5..79ca0269838b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 24.1 | ✔️ | | 23.12 | ✔️ | | 23.11 | ✔️ | -| 23.10 | ✔️ | +| 23.10 | ❌ | | 23.9 | ❌ | | 23.8 | ✔️ | | 23.7 | ❌ | diff --git a/base/base/sort.h b/base/base/sort.h index 1a8145877632..99bf8a0830e1 100644 --- a/base/base/sort.h +++ b/base/base/sort.h @@ -64,19 +64,14 @@ using ComparatorWrapper = Comparator; #include -template -void nth_element(RandomIt first, RandomIt nth, RandomIt last) +template +void nth_element(RandomIt first, RandomIt nth, RandomIt last, Compare compare) { - using value_type = typename std::iterator_traits::value_type; - using comparator = std::less; - - comparator compare; - ComparatorWrapper compare_wrapper = compare; - #ifndef NDEBUG ::shuffle(first, last); #endif + ComparatorWrapper compare_wrapper = compare; ::miniselect::floyd_rivest_select(first, nth, last, compare_wrapper); #ifndef NDEBUG @@ -87,6 +82,15 @@ void nth_element(RandomIt first, RandomIt nth, RandomIt last) #endif } +template +void nth_element(RandomIt first, RandomIt nth, RandomIt last) +{ + using value_type = typename std::iterator_traits::value_type; + using comparator = std::less; + + ::nth_element(first, nth, last, comparator()); +} + template void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare) { diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h index ffe3766dfec7..cf2027186625 100644 --- a/base/poco/Foundation/include/Poco/Logger.h +++ b/base/poco/Foundation/include/Poco/Logger.h @@ -33,7 +33,8 @@ namespace Poco class Exception; - +class Logger; +using LoggerPtr = std::shared_ptr; class Foundation_API Logger : public Channel /// Logger is a special Channel that acts as the main @@ -870,6 +871,11 @@ class Foundation_API Logger : public Channel /// If the Logger does not yet exist, it is created, based /// on its parent logger. + static LoggerPtr getShared(const std::string & name); + /// Returns a shared pointer to the Logger with the given name. + /// If the Logger does not yet exist, it is created, based + /// on its parent logger. + static Logger & unsafeGet(const std::string & name); /// Returns a reference to the Logger with the given name. /// If the Logger does not yet exist, it is created, based @@ -885,6 +891,11 @@ class Foundation_API Logger : public Channel /// given name. The Logger's Channel and log level as set as /// specified. + static LoggerPtr createShared(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION); + /// Creates and returns a shared pointer to a Logger with the + /// given name. The Logger's Channel and log level as set as + /// specified. + static Logger & root(); /// Returns a reference to the root logger, which is the ultimate /// ancestor of all Loggers. @@ -893,13 +904,6 @@ class Foundation_API Logger : public Channel /// Returns a pointer to the Logger with the given name if it /// exists, or a null pointer otherwise. - static void destroy(const std::string & name); - /// Destroys the logger with the specified name. Does nothing - /// if the logger is not found. - /// - /// After a logger has been destroyed, all references to it - /// become invalid. - static void shutdown(); /// Shuts down the logging framework and releases all /// Loggers. @@ -929,8 +933,6 @@ class Foundation_API Logger : public Channel static const std::string ROOT; /// The name of the root logger (""). protected: - typedef std::map LoggerMap; - Logger(const std::string & name, Channel * pChannel, int level); ~Logger(); @@ -938,6 +940,7 @@ class Foundation_API Logger : public Channel void log(const std::string & text, Message::Priority prio, const char * file, int line); static std::string format(const std::string & fmt, int argc, std::string argv[]); + static Logger & unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION); static Logger & parent(const std::string & name); static void add(Logger * pLogger); static Logger * find(const std::string & name); @@ -950,9 +953,6 @@ class Foundation_API Logger : public Channel std::string _name; Channel * _pChannel; std::atomic_int _level; - - static LoggerMap * _pLoggerMap; - static Mutex _mapMtx; }; diff --git a/base/poco/Foundation/include/Poco/RefCountedObject.h b/base/poco/Foundation/include/Poco/RefCountedObject.h index 4ad32e30cad9..db966089e006 100644 --- a/base/poco/Foundation/include/Poco/RefCountedObject.h +++ b/base/poco/Foundation/include/Poco/RefCountedObject.h @@ -38,15 +38,15 @@ class Foundation_API RefCountedObject /// Creates the RefCountedObject. /// The initial reference count is one. - void duplicate() const; - /// Increments the object's reference count. + size_t duplicate() const; + /// Increments the object's reference count, returns reference count before call. - void release() const throw(); + size_t release() const throw(); /// Decrements the object's reference count /// and deletes the object if the count - /// reaches zero. + /// reaches zero, returns reference count before call. - int referenceCount() const; + size_t referenceCount() const; /// Returns the reference count. protected: @@ -57,36 +57,40 @@ class Foundation_API RefCountedObject RefCountedObject(const RefCountedObject &); RefCountedObject & operator=(const RefCountedObject &); - mutable AtomicCounter _counter; + mutable std::atomic _counter; }; // // inlines // -inline int RefCountedObject::referenceCount() const +inline size_t RefCountedObject::referenceCount() const { - return _counter.value(); + return _counter.load(std::memory_order_acquire); } -inline void RefCountedObject::duplicate() const +inline size_t RefCountedObject::duplicate() const { - ++_counter; + return _counter.fetch_add(1, std::memory_order_acq_rel); } -inline void RefCountedObject::release() const throw() +inline size_t RefCountedObject::release() const throw() { + size_t reference_count_before = _counter.fetch_sub(1, std::memory_order_acq_rel); + try { - if (--_counter == 0) + if (reference_count_before == 1) delete this; } catch (...) { poco_unexpected(); } + + return reference_count_before; } diff --git a/base/poco/Foundation/src/Logger.cpp b/base/poco/Foundation/src/Logger.cpp index 3d5de585b4f9..cfc063c89791 100644 --- a/base/poco/Foundation/src/Logger.cpp +++ b/base/poco/Foundation/src/Logger.cpp @@ -20,12 +20,38 @@ #include "Poco/NumberParser.h" #include "Poco/String.h" +#include +#include + +namespace +{ + +std::mutex & getLoggerMutex() +{ + auto get_logger_mutex_placeholder_memory = []() + { + static char buffer[sizeof(std::mutex)]{}; + return buffer; + }; + + static std::mutex * logger_mutex = new (get_logger_mutex_placeholder_memory()) std::mutex(); + return *logger_mutex; +} + +struct LoggerEntry +{ + Poco::Logger * logger; + bool owned_by_shared_ptr = false; +}; + +using LoggerMap = std::unordered_map; +LoggerMap * _pLoggerMap = nullptr; + +} namespace Poco { -Logger::LoggerMap* Logger::_pLoggerMap = 0; -Mutex Logger::_mapMtx; const std::string Logger::ROOT; @@ -73,7 +99,7 @@ void Logger::setProperty(const std::string& name, const std::string& value) setChannel(LoggingRegistry::defaultRegistry().channelForName(value)); else if (name == "level") setLevel(value); - else + else Channel::setProperty(name, value); } @@ -112,17 +138,17 @@ void Logger::dump(const std::string& msg, const void* buffer, std::size_t length void Logger::setLevel(const std::string& name, int level) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); if (_pLoggerMap) { std::string::size_type len = name.length(); - for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) + for (auto & it : *_pLoggerMap) { - if (len == 0 || - (it->first.compare(0, len, name) == 0 && (it->first.length() == len || it->first[len] == '.'))) + if (len == 0 || + (it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.'))) { - it->second->setLevel(level); + it.second.logger->setLevel(level); } } } @@ -131,17 +157,17 @@ void Logger::setLevel(const std::string& name, int level) void Logger::setChannel(const std::string& name, Channel* pChannel) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); if (_pLoggerMap) { std::string::size_type len = name.length(); - for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) + for (auto & it : *_pLoggerMap) { if (len == 0 || - (it->first.compare(0, len, name) == 0 && (it->first.length() == len || it->first[len] == '.'))) + (it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.'))) { - it->second->setChannel(pChannel); + it.second.logger->setChannel(pChannel); } } } @@ -150,17 +176,17 @@ void Logger::setChannel(const std::string& name, Channel* pChannel) void Logger::setProperty(const std::string& loggerName, const std::string& propertyName, const std::string& value) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); if (_pLoggerMap) { std::string::size_type len = loggerName.length(); - for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) + for (auto & it : *_pLoggerMap) { if (len == 0 || - (it->first.compare(0, len, loggerName) == 0 && (it->first.length() == len || it->first[len] == '.'))) + (it.first.compare(0, len, loggerName) == 0 && (it.first.length() == len || it.first[len] == '.'))) { - it->second->setProperty(propertyName, value); + it.second.logger->setProperty(propertyName, value); } } } @@ -280,11 +306,88 @@ void Logger::formatDump(std::string& message, const void* buffer, std::size_t le } +namespace +{ + +struct LoggerDeleter +{ + void operator()(Poco::Logger * logger) + { + std::lock_guard lock(getLoggerMutex()); + + /// If logger infrastructure is destroyed just decrement logger reference count + if (!_pLoggerMap) + { + logger->release(); + return; + } + + auto it = _pLoggerMap->find(logger->name()); + assert(it != _pLoggerMap->end()); + + /** If reference count is 1, this means this shared pointer owns logger + * and need destroy it. + */ + size_t reference_count_before_release = logger->release(); + if (reference_count_before_release == 1) + { + assert(it->second.owned_by_shared_ptr); + _pLoggerMap->erase(it); + } + } +}; + + +inline LoggerPtr makeLoggerPtr(Logger & logger) +{ + return std::shared_ptr(&logger, LoggerDeleter()); +} + +} + + Logger& Logger::get(const std::string& name) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); + + Logger & logger = unsafeGet(name); + + /** If there are already shared pointer created for this logger + * we need to increment Logger reference count and now logger + * is owned by logger infrastructure. + */ + auto it = _pLoggerMap->find(name); + if (it->second.owned_by_shared_ptr) + { + it->second.logger->duplicate(); + it->second.owned_by_shared_ptr = false; + } + + return logger; +} + + +LoggerPtr Logger::getShared(const std::string & name) +{ + std::lock_guard lock(getLoggerMutex()); + bool logger_exists = _pLoggerMap && _pLoggerMap->contains(name); + + Logger & logger = unsafeGet(name); + + /** If logger already exists, then this shared pointer does not own it. + * If logger does not exists, logger infrastructure could be already destroyed + * or logger was created. + */ + if (logger_exists) + { + logger.duplicate(); + } + else if (_pLoggerMap) + { + _pLoggerMap->find(name)->second.owned_by_shared_ptr = true; + } - return unsafeGet(name); + return makeLoggerPtr(logger); } @@ -310,18 +413,24 @@ Logger& Logger::unsafeGet(const std::string& name) Logger& Logger::create(const std::string& name, Channel* pChannel, int level) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); - if (find(name)) throw ExistsException(); - Logger* pLogger = new Logger(name, pChannel, level); - add(pLogger); - return *pLogger; + return unsafeCreate(name, pChannel, level); } +LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int level) +{ + std::lock_guard lock(getLoggerMutex()); + + Logger & logger = unsafeCreate(name, pChannel, level); + _pLoggerMap->find(name)->second.owned_by_shared_ptr = true; + + return makeLoggerPtr(logger); +} Logger& Logger::root() { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); return unsafeGet(ROOT); } @@ -329,7 +438,7 @@ Logger& Logger::root() Logger* Logger::has(const std::string& name) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); return find(name); } @@ -337,14 +446,18 @@ Logger* Logger::has(const std::string& name) void Logger::shutdown() { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); if (_pLoggerMap) { - for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) + for (auto & it : *_pLoggerMap) { - it->second->release(); + if (it.second.owned_by_shared_ptr) + continue; + + it.second.logger->release(); } + delete _pLoggerMap; _pLoggerMap = 0; } @@ -357,31 +470,15 @@ Logger* Logger::find(const std::string& name) { LoggerMap::iterator it = _pLoggerMap->find(name); if (it != _pLoggerMap->end()) - return it->second; + return it->second.logger; } return 0; } -void Logger::destroy(const std::string& name) -{ - Mutex::ScopedLock lock(_mapMtx); - - if (_pLoggerMap) - { - LoggerMap::iterator it = _pLoggerMap->find(name); - if (it != _pLoggerMap->end()) - { - it->second->release(); - _pLoggerMap->erase(it); - } - } -} - - void Logger::names(std::vector& names) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); names.clear(); if (_pLoggerMap) @@ -393,6 +490,14 @@ void Logger::names(std::vector& names) } } +Logger& Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level) +{ + if (find(name)) throw ExistsException(); + Logger* pLogger = new Logger(name, pChannel, level); + add(pLogger); + + return *pLogger; +} Logger& Logger::parent(const std::string& name) { @@ -478,7 +583,8 @@ void Logger::add(Logger* pLogger) { if (!_pLoggerMap) _pLoggerMap = new LoggerMap; - _pLoggerMap->insert(LoggerMap::value_type(pLogger->name(), pLogger)); + + _pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/}); } diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index e5a8c0648089..885080a3e384 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54482) +SET(VERSION_REVISION 54483) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 1) +SET(VERSION_MINOR 2) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH a2faa65b080a587026c86844f3a20c74d23a86f8) -SET(VERSION_DESCRIBE v24.1.1.1-testing) -SET(VERSION_STRING 24.1.1.1) +SET(VERSION_GITHASH 5a024dfc0936e062770d0cfaad0805b57c1fba17) +SET(VERSION_DESCRIBE v24.2.1.1-testing) +SET(VERSION_STRING 24.2.1.1) # end of autochange diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 3882b51227e0..bc4a029721df 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -79,7 +79,10 @@ if (SANITIZE_COVERAGE) # But the actual coverage will be enabled on per-library basis: for ClickHouse code, but not for 3rd-party. set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard,pc-table") -endif() -set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table") -set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table) + set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table") + set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table) +else() + set (WITHOUT_COVERAGE_FLAGS "") + set (WITHOUT_COVERAGE_FLAGS_LIST "") +endif() diff --git a/contrib/aws b/contrib/aws index ca02358dcc7c..4ec215f3607c 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit ca02358dcc7ce3ab733dd4cbcc32734eecfa4ee3 +Subproject commit 4ec215f3607c2111bf2cc91ba842046a6b5eb0c4 diff --git a/contrib/aws-c-auth b/contrib/aws-c-auth index 97133a2b5dbc..baeffa791d9d 160000 --- a/contrib/aws-c-auth +++ b/contrib/aws-c-auth @@ -1 +1 @@ -Subproject commit 97133a2b5dbca1ccdf88cd6f44f39d0531d27d12 +Subproject commit baeffa791d9d1cf61460662a6d9ac2186aaf05df diff --git a/contrib/aws-c-cal b/contrib/aws-c-cal index 85dd7664b786..9453687ff549 160000 --- a/contrib/aws-c-cal +++ b/contrib/aws-c-cal @@ -1 +1 @@ -Subproject commit 85dd7664b786a389c6fb1a6f031ab4bb2282133d +Subproject commit 9453687ff5493ba94eaccf8851200565c4364c77 diff --git a/contrib/aws-c-common b/contrib/aws-c-common index 45dcb2849c89..80f21b3cac5a 160000 --- a/contrib/aws-c-common +++ b/contrib/aws-c-common @@ -1 +1 @@ -Subproject commit 45dcb2849c891dba2100b270b4676765c92949ff +Subproject commit 80f21b3cac5ac51c6b8a62c7d2a5ef58a75195ee diff --git a/contrib/aws-c-compression b/contrib/aws-c-compression index b517b7decd0d..99ec79ee2970 160000 --- a/contrib/aws-c-compression +++ b/contrib/aws-c-compression @@ -1 +1 @@ -Subproject commit b517b7decd0dac30be2162f5186c250221c53aff +Subproject commit 99ec79ee2970f1a045d4ced1501b97ee521f2f85 diff --git a/contrib/aws-c-event-stream b/contrib/aws-c-event-stream index 2f9b60c42f90..08f24e384e5b 160000 --- a/contrib/aws-c-event-stream +++ b/contrib/aws-c-event-stream @@ -1 +1 @@ -Subproject commit 2f9b60c42f90840ec11822acda3d8cdfa97a773d +Subproject commit 08f24e384e5be20bcffa42b49213d24dad7881ae diff --git a/contrib/aws-c-http b/contrib/aws-c-http index dd3446198794..a082f8a2067e 160000 --- a/contrib/aws-c-http +++ b/contrib/aws-c-http @@ -1 +1 @@ -Subproject commit dd34461987947672444d0bc872c5a733dfdb9711 +Subproject commit a082f8a2067e4a31db73f1d4ffd702a8dc0f7089 diff --git a/contrib/aws-c-io b/contrib/aws-c-io index d58ed4f272b1..11ce3c750a1d 160000 --- a/contrib/aws-c-io +++ b/contrib/aws-c-io @@ -1 +1 @@ -Subproject commit d58ed4f272b1cb4f89ac9196526ceebe5f2b0d89 +Subproject commit 11ce3c750a1dac7b04069fc5bff89e97e91bad4d diff --git a/contrib/aws-c-mqtt b/contrib/aws-c-mqtt index 33c3455cec82..6d36cd372623 160000 --- a/contrib/aws-c-mqtt +++ b/contrib/aws-c-mqtt @@ -1 +1 @@ -Subproject commit 33c3455cec82b16feb940e12006cefd7b3ef4194 +Subproject commit 6d36cd3726233cb757468d0ea26f6cd8dad151ec diff --git a/contrib/aws-c-s3 b/contrib/aws-c-s3 index d7bfe602d692..de36fee8fe7a 160000 --- a/contrib/aws-c-s3 +++ b/contrib/aws-c-s3 @@ -1 +1 @@ -Subproject commit d7bfe602d6925948f1fff95784e3613cca6a3900 +Subproject commit de36fee8fe7ab02f10987877ae94a805bf440c1f diff --git a/contrib/aws-c-sdkutils b/contrib/aws-c-sdkutils index 208a701fa01e..fd8c0ba2e233 160000 --- a/contrib/aws-c-sdkutils +++ b/contrib/aws-c-sdkutils @@ -1 +1 @@ -Subproject commit 208a701fa01e99c7c8cc3dcebc8317da71362972 +Subproject commit fd8c0ba2e233997eaaefe82fb818b8b444b956d3 diff --git a/contrib/aws-checksums b/contrib/aws-checksums index ad53be196a25..321b805559c8 160000 --- a/contrib/aws-checksums +++ b/contrib/aws-checksums @@ -1 +1 @@ -Subproject commit ad53be196a25bbefa3700a01187fdce573a7d2d0 +Subproject commit 321b805559c8e911be5bddba13fcbd222a3e2d3a diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt index 950a0e06cd0e..abde20addafc 100644 --- a/contrib/aws-cmake/CMakeLists.txt +++ b/contrib/aws-cmake/CMakeLists.txt @@ -25,6 +25,7 @@ include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsFeatureTests.cmake") include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsThreadAffinity.cmake") include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsThreadName.cmake") include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsSIMD.cmake") +include("${ClickHouse_SOURCE_DIR}/contrib/aws-crt-cpp/cmake/AwsGetVersion.cmake") # Gather sources and options. @@ -35,6 +36,8 @@ set(AWS_PUBLIC_COMPILE_DEFS) set(AWS_PRIVATE_COMPILE_DEFS) set(AWS_PRIVATE_LIBS) +list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DINTEL_NO_ITTNOTIFY_API") + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DDEBUG_BUILD") endif() @@ -85,14 +88,20 @@ file(GLOB AWS_SDK_CORE_SRC "${AWS_SDK_CORE_DIR}/source/external/cjson/*.cpp" "${AWS_SDK_CORE_DIR}/source/external/tinyxml2/*.cpp" "${AWS_SDK_CORE_DIR}/source/http/*.cpp" + "${AWS_SDK_CORE_DIR}/source/http/crt/*.cpp" "${AWS_SDK_CORE_DIR}/source/http/standard/*.cpp" "${AWS_SDK_CORE_DIR}/source/internal/*.cpp" "${AWS_SDK_CORE_DIR}/source/monitoring/*.cpp" + "${AWS_SDK_CORE_DIR}/source/net/*.cpp" + "${AWS_SDK_CORE_DIR}/source/net/linux-shared/*.cpp" + "${AWS_SDK_CORE_DIR}/source/platform/linux-shared/*.cpp" + "${AWS_SDK_CORE_DIR}/source/smithy/tracing/*.cpp" "${AWS_SDK_CORE_DIR}/source/utils/*.cpp" "${AWS_SDK_CORE_DIR}/source/utils/base64/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/component-registry/*.cpp" "${AWS_SDK_CORE_DIR}/source/utils/crypto/*.cpp" - "${AWS_SDK_CORE_DIR}/source/utils/crypto/openssl/*.cpp" "${AWS_SDK_CORE_DIR}/source/utils/crypto/factory/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/crypto/openssl/*.cpp" "${AWS_SDK_CORE_DIR}/source/utils/event/*.cpp" "${AWS_SDK_CORE_DIR}/source/utils/json/*.cpp" "${AWS_SDK_CORE_DIR}/source/utils/logging/*.cpp" @@ -115,9 +124,8 @@ OPTION(USE_AWS_MEMORY_MANAGEMENT "Aws memory management" OFF) configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY) -list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MAJOR=1") -list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MINOR=10") -list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_PATCH=36") +aws_get_version(AWS_CRT_CPP_VERSION_MAJOR AWS_CRT_CPP_VERSION_MINOR AWS_CRT_CPP_VERSION_PATCH FULL_VERSION GIT_HASH) +configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${AWS_CRT_DIR}/include/aws/crt/Config.h" @ONLY) list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC}) @@ -176,6 +184,7 @@ file(GLOB AWS_COMMON_SRC "${AWS_COMMON_DIR}/source/*.c" "${AWS_COMMON_DIR}/source/external/*.c" "${AWS_COMMON_DIR}/source/posix/*.c" + "${AWS_COMMON_DIR}/source/linux/*.c" ) file(GLOB AWS_COMMON_ARCH_SRC diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp index 8a301b7e842f..f532d6abc0d2 160000 --- a/contrib/aws-crt-cpp +++ b/contrib/aws-crt-cpp @@ -1 +1 @@ -Subproject commit 8a301b7e842f1daed478090c869207300972379f +Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0 diff --git a/contrib/aws-s2n-tls b/contrib/aws-s2n-tls index 71f4794b7580..9a1e75454023 160000 --- a/contrib/aws-s2n-tls +++ b/contrib/aws-s2n-tls @@ -1 +1 @@ -Subproject commit 71f4794b7580cf780eb4aca77d69eded5d3c7bb4 +Subproject commit 9a1e75454023e952b366ce1eab9c54007250119f diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index 8adc2c0b23a3..4f60304d74d4 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -1,8 +1,5 @@ if (NOT ENABLE_LIBRARIES) set(DEFAULT_ENABLE_RUST FALSE) -elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64")) - message(STATUS "Rust is not available on aarch64-apple-darwin") - set(DEFAULT_ENABLE_RUST FALSE) else() list (APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake") find_package(Rust) @@ -19,27 +16,30 @@ message(STATUS "Checking Rust toolchain for current target") # See https://doc.rust-lang.org/nightly/rustc/platform-support.html -if((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) - set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") -elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") - set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") -elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) - set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl") -elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") - set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") -elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) - set(Rust_CARGO_TARGET "x86_64-apple-darwin") -elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) - set(Rust_CARGO_TARGET "x86_64-unknown-freebsd") -elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64") - set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu") -endif() - -if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le") - set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu") -endif() - -message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}") +if(DEFINED CMAKE_TOOLCHAIN_FILE) + if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le") + set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu") + elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) + set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") + elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") + set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") + elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) + set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl") + elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") + set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") + elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) + set(Rust_CARGO_TARGET "x86_64-apple-darwin") + elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64")) + set(Rust_CARGO_TARGET "aarch64-apple-darwin") + elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) + set(Rust_CARGO_TARGET "x86_64-unknown-freebsd") + elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64") + set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu") + else() + message(FATAL_ERROR "Unsupported rust target") + endif() + message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}") +endif () # FindRust.cmake list(APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake") diff --git a/contrib/curl b/contrib/curl index d755a5f7c009..7161cb17c01d 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit d755a5f7c009dd63a61b2c745180d8ba937cbfeb +Subproject commit 7161cb17c01dcff1dc5bf89a18437d9d729f1ecd diff --git a/contrib/libssh-cmake/CMakeLists.txt b/contrib/libssh-cmake/CMakeLists.txt index 7a3816d4dce7..eee3df832fa4 100644 --- a/contrib/libssh-cmake/CMakeLists.txt +++ b/contrib/libssh-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -option (ENABLE_SSH "Enable support for SSH keys and protocol" ON) +option (ENABLE_SSH "Enable support for SSH keys and protocol" ${ENABLE_LIBRARIES}) if (NOT ENABLE_SSH) message(STATUS "Not using SSH") diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index d09060912d84..76e620314a2d 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -1,5 +1,6 @@ -if (APPLE OR SANITIZE STREQUAL "undefined" OR SANITIZE STREQUAL "memory") - # llvm-tblgen, that is used during LLVM build, doesn't work with UBSan. +if (APPLE OR SANITIZE STREQUAL "memory") + # llvm-tblgen, that is used during LLVM build, will throw MSAN errors when running (breaking the build) + # TODO: Retest when upgrading LLVM or build only llvm-tblgen without sanitizers set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) set (ENABLE_DWARF_PARSER_DEFAULT OFF) else() diff --git a/contrib/simdjson b/contrib/simdjson index 1075e8609c4a..6060be2fdf62 160000 --- a/contrib/simdjson +++ b/contrib/simdjson @@ -1 +1 @@ -Subproject commit 1075e8609c4afa253162d441437af929c29e31bb +Subproject commit 6060be2fdf62edf4a8f51a8b0883d57d09397b30 diff --git a/contrib/update-submodules.sh b/contrib/update-submodules.sh index 7195de020bd7..072d7a5dc2f9 100755 --- a/contrib/update-submodules.sh +++ b/contrib/update-submodules.sh @@ -24,7 +24,7 @@ git config --file .gitmodules --get-regexp '.*path' | sed 's/[^ ]* //' | xargs - # We don't want to depend on any third-party CMake files. # To check it, find and delete them. grep -o -P '"contrib/[^"]+"' .gitmodules | - grep -v -P 'contrib/(llvm-project|google-protobuf|grpc|abseil-cpp|corrosion)' | + grep -v -P 'contrib/(llvm-project|google-protobuf|grpc|abseil-cpp|corrosion|aws-crt-cpp)' | xargs -I@ find @ \ -'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' \ -delete diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 4b5e8cd3970a..fe33bf9e0eaf 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.12.2.59" +ARG VERSION="24.1.2.5" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 452d8539a487..f0adadd2d591 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.12.2.59" +ARG VERSION="24.1.2.5" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 0cefa3c14cb9..bc15c99a0346 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.12.2.59" +ARG VERSION="24.1.2.5" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index a38f59dacac4..56ec01998496 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -22,7 +22,7 @@ RUN apt-get update \ zstd \ --yes --no-install-recommends -RUN pip3 install numpy scipy pandas Jinja2 +RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 5af050344151..d78c52f1fe69 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -211,6 +211,17 @@ function build echo "build_clickhouse_fasttest_binary: [ OK ] $BUILD_SECONDS_ELAPSED sec." \ | ts '%Y-%m-%d %H:%M:%S' \ | tee "$FASTTEST_OUTPUT/test_result.txt" + + ( + # This query should fail, and print stacktrace with proper symbol names (even on a stripped binary) + clickhouse_output=$(programs/clickhouse-stripped --stacktrace -q 'select' 2>&1 || :) + if [[ $clickhouse_output =~ DB::LocalServer::main ]]; then + echo "stripped_clickhouse_shows_symbols_names: [ OK ] 0 sec." + else + echo -e "stripped_clickhouse_shows_symbols_names: [ FAIL ] 0 sec. - clickhouse output:\n\n$clickhouse_output\n" + fi + ) | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_result.txt" + if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then mkdir -p "$FASTTEST_OUTPUT/binaries/" cp programs/clickhouse "$FASTTEST_OUTPUT/binaries/clickhouse" diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index b05aef76faf8..8882daa38ea3 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -23,13 +23,15 @@ if [ -f /sys/fs/cgroup/cgroup.controllers ]; then > /sys/fs/cgroup/cgroup.subtree_control fi -# In case of test hung it is convenient to use pytest --pdb to debug it, -# and on hung you can simply press Ctrl-C and it will spawn a python pdb, -# but on SIGINT dockerd will exit, so ignore it to preserve the daemon. -trap '' INT # Binding to an IP address without --tlsverify is deprecated. Startup is intentionally being slowed # unless --tls=false or --tlsverify=false is set -dockerd --host=unix:///var/run/docker.sock --tls=false --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log & +# +# In case of test hung it is convenient to use pytest --pdb to debug it, +# and on hung you can simply press Ctrl-C and it will spawn a python pdb, +# but on SIGINT dockerd will exit, so we spawn new session to ignore SIGINT by +# docker. +# Note, that if you will run it via runner, it will send SIGINT to docker anyway. +setsid dockerd --host=unix:///var/run/docker.sock --tls=false --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log & set +e reties=0 diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 638a2408748b..ea76447aef27 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -246,16 +246,19 @@ clickhouse-client -q "system flush logs" ||: stop_logs_replication # Try to get logs while server is running -successfuly_saved=0 -for table in query_log zookeeper_log trace_log transactions_info_log +failed_to_save_logs=0 +for table in query_log zookeeper_log trace_log transactions_info_log metric_log do - clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst - successfuly_saved=$? + err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst; } 2>&1 ) + echo "$err" + [[ "0" != "${#err}" ]] && failed_to_save_logs=1 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst - successfuly_saved=$((successfuly_saved | $?)) - clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst - successfuly_saved=$((successfuly_saved | $?)) + err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) + echo "$err" + [[ "0" != "${#err}" ]] && failed_to_save_logs=1 + err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 ) + echo "$err" + [[ "0" != "${#err}" ]] && failed_to_save_logs=1 fi done @@ -280,7 +283,7 @@ fi # If server crashed dump system logs with clickhouse-local -if [ $successfuly_saved -ne 0 ]; then +if [ $failed_to_save_logs -ne 0 ]; then # Compress tables. # # NOTE: @@ -288,12 +291,12 @@ if [ $successfuly_saved -ne 0 ]; then # directly # - even though ci auto-compress some files (but not *.tsv) it does this only # for files >64MB, we want this files to be compressed explicitly - for table in query_log zookeeper_log trace_log transactions_info_log + for table in query_log zookeeper_log trace_log transactions_info_log metric_log do - clickhouse-local "$data_path_config" --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: + clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: - clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: + clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: + clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: fi done fi diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index 6f0dabb52071..6e1834d6cded 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -78,6 +78,8 @@ function configure() randomize_config_boolean_value use_compression zookeeper fi + randomize_config_boolean_value allow_experimental_block_number_column block_number + # for clickhouse-server (via service) echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment # for clickhouse-client diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 9c0082093160..aaba5cc6a8c7 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -122,6 +122,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml rm /etc/clickhouse-server/config.d/storage_conf_02963.xml +rm /etc/clickhouse-server/config.d/block_number.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml diff --git a/docs/changelogs/v23.11.5.29-stable.md b/docs/changelogs/v23.11.5.29-stable.md new file mode 100644 index 000000000000..f73a21c20951 --- /dev/null +++ b/docs/changelogs/v23.11.5.29-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.11.5.29-stable (d83b108deca) FIXME as compared to v23.11.4.24-stable (e79d840d7fe) + +#### Improvement +* Backported in [#58815](https://github.com/ClickHouse/ClickHouse/issues/58815): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#59234](https://github.com/ClickHouse/ClickHouse/issues/59234): Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)). +* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). +* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* refine error message [#57991](https://github.com/ClickHouse/ClickHouse/pull/57991) ([Han Fei](https://github.com/hanfei1991)). +* Fix rare race in external sort/aggregation with temporary data in cache [#58013](https://github.com/ClickHouse/ClickHouse/pull/58013) ([Anton Popov](https://github.com/CurtizJ)). +* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v23.12.3.40-stable.md b/docs/changelogs/v23.12.3.40-stable.md new file mode 100644 index 000000000000..e2a9e3af4076 --- /dev/null +++ b/docs/changelogs/v23.12.3.40-stable.md @@ -0,0 +1,36 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.12.3.40-stable (a594704ae75) FIXME as compared to v23.12.2.59-stable (17ab210e761) + +#### Improvement +* Backported in [#58660](https://github.com/ClickHouse/ClickHouse/issues/58660): When executing some queries, which require a lot of streams for reading data, the error `"Paste JOIN requires sorted tables only"` was previously thrown. Now the numbers of streams resize to 1 in that case. [#58608](https://github.com/ClickHouse/ClickHouse/pull/58608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Backported in [#58817](https://github.com/ClickHouse/ClickHouse/issues/58817): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#59235](https://github.com/ClickHouse/ClickHouse/issues/59235): Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable max_joined_block_rows in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)). +* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)). +* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). +* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Change log level for super imporant message in Keeper [#59010](https://github.com/ClickHouse/ClickHouse/pull/59010) ([alesapin](https://github.com/alesapin)). +* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix fasttest by pinning pip dependencies [#59256](https://github.com/ClickHouse/ClickHouse/pull/59256) ([Azat Khuzhin](https://github.com/azat)). +* No debug symbols in Rust [#59306](https://github.com/ClickHouse/ClickHouse/pull/59306) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v24.1.1.2048-stable.md b/docs/changelogs/v24.1.1.2048-stable.md new file mode 100644 index 000000000000..8e4647da86e7 --- /dev/null +++ b/docs/changelogs/v24.1.1.2048-stable.md @@ -0,0 +1,438 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.1.1.2048-stable (5a024dfc093) FIXME as compared to v23.12.1.1368-stable (a2faa65b080) + +#### Backward Incompatible Change +* The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)). +* The function `reverseDNSQuery` is no longer available. This closes [#58368](https://github.com/ClickHouse/ClickHouse/issues/58368). [#58369](https://github.com/ClickHouse/ClickHouse/pull/58369) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable various changes to improve the access control in the configuration file. These changes affect the behavior, and you check the `config.xml` in the `access_control_improvements` section. In case you are not confident, keep the values in the configuration file as they were in the previous version. [#58584](https://github.com/ClickHouse/ClickHouse/pull/58584) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow queries without aliases for subqueries for `PASTE JOIN`. [#58654](https://github.com/ClickHouse/ClickHouse/pull/58654) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix sumMapFiltered with NaN values. NaN values are now placed at the end (instead of randomly) and considered different from any values. `-0` is now also treated as equal to `0`; since 0 values are discarded, `-0` values are discarded too. [#58959](https://github.com/ClickHouse/ClickHouse/pull/58959) ([Raúl Marín](https://github.com/Algunenano)). +* The function `visibleWidth` will behave according to the docs. In previous versions, it simply counted code points after string serialization, like the `lengthUTF8` function, but didn't consider zero-width and combining characters, full-width characters, tabs, and deletes. Now the behavior is changed accordingly. If you want to keep the old behavior, set `function_visible_width_behavior` to `0`, or set `compatibility` to `23.12` or lower. [#59022](https://github.com/ClickHouse/ClickHouse/pull/59022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Kusto dialect is disabled until these two bugs will be fixed: [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037) and [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036). [#59305](https://github.com/ClickHouse/ClickHouse/pull/59305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/ split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)). +* Added statement `SYSTEM RELOAD ASYNCHRONOUS METRICS` which updates the asynchronous metrics. Mostly useful for testing and development. [#53710](https://github.com/ClickHouse/ClickHouse/pull/53710) ([Robert Schulze](https://github.com/rschu1ze)). +* Certain settings (currently `min_compress_block_size` and `max_compress_block_size`) can now be specified at column-level where they take precedence over the corresponding table-level setting. Example: `CREATE TABLE tab (col String SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840)) ENGINE = MergeTree ORDER BY tuple();`. [#55201](https://github.com/ClickHouse/ClickHouse/pull/55201) ([Duc Canh Le](https://github.com/canhld94)). +* Add `quantileDDSketch` aggregate function as well as the corresponding `quantilesDDSketch` and `medianDDSketch`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)). +* Added function `seriesDecomposeSTL()` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Introduced MySQL Binlog Client for MaterializedMySQL: One binlog connection for many databases. [#57323](https://github.com/ClickHouse/ClickHouse/pull/57323) ([Val Doroshchuk](https://github.com/valbok)). +* Intel QuickAssist Technology (QAT) provides hardware-accelerated compression and cryptograpy. ClickHouse got a new compression codec `ZSTD_QAT` which utilizes QAT for zstd compression. The codec uses [Intel's QATlib](https://github.com/intel/qatlib) and [Inte's QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Right now, only compression can be accelerated in hardware (a software fallback kicks in in case QAT could not be initialized), decompression always runs in software. [#57509](https://github.com/ClickHouse/ClickHouse/pull/57509) ([jasperzhu](https://github.com/jinjunzh)). +* Implementing the new way how object storage keys are generated for s3 disks. Now the format could be defined in terms of `re2` regex syntax with `key_template` option in disc description. [#57663](https://github.com/ClickHouse/ClickHouse/pull/57663) ([Sema Checherinda](https://github.com/CheSema)). +* Table system.dropped_tables_parts contains parts of system.dropped_tables tables (dropped but not yet removed tables). [#58038](https://github.com/ClickHouse/ClickHouse/pull/58038) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Implement Variant data type that represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). Variant type is available under a setting `allow_experimental_variant_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#58047](https://github.com/ClickHouse/ClickHouse/pull/58047) ([Kruglov Pavel](https://github.com/Avogar)). +* Add settings `max_materialized_views_size_for_table` to limit the number of materialized views attached to a table. [#58068](https://github.com/ClickHouse/ClickHouse/pull/58068) ([zhongyuankai](https://github.com/zhongyuankai)). +* `clickhouse-format` improvements: * support INSERT queries with `VALUES` * support comments (use `--comments` to output them) * support `--max_line_length` option to format only long queries in multiline. [#58246](https://github.com/ClickHouse/ClickHouse/pull/58246) ([vdimir](https://github.com/vdimir)). +* Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add table `system.database_engines`. [#58390](https://github.com/ClickHouse/ClickHouse/pull/58390) ([Bharat Nallan](https://github.com/bharatnc)). +* Added FROM modifier for SYSTEM SYNC REPLICA LIGHTWEIGHT query. The FROM modifier ensures we wait for for fetches and drop-ranges only for the specified source replicas, as well as any replica not in zookeeper or with an empty source_replica. [#58393](https://github.com/ClickHouse/ClickHouse/pull/58393) ([Jayme Bird](https://github.com/jaymebrd)). +* Add function `arrayShingles()` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)). +* Added functions `punycodeEncode()`, `punycodeDecode()`, `idnaEncode()` and `idnaDecode()` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)). +* Added string similarity functions `dramerauLevenshteinDistance()`, `jaroSimilarity()` and `jaroWinklerSimilarity()`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)). +* Add two settings `output_format_compression_level` to change output compression level and `output_format_compression_zstd_window_log` to explicitly set compression window size and enable long-range mode for zstd compression if output compression method is `zstd`. Applied for `INTO OUTFILE` and when writing to table functions `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`. [#58539](https://github.com/ClickHouse/ClickHouse/pull/58539) ([Duc Canh Le](https://github.com/canhld94)). +* Automatically disable ANSI escape sequences in Pretty formats if the output is not a terminal. Add new `auto` mode to setting `output_format_pretty_color`. [#58614](https://github.com/ClickHouse/ClickHouse/pull/58614) ([Shaun Struwig](https://github.com/Blargian)). +* Added setting `update_insert_deduplication_token_in_dependent_materialized_views`. This setting allows to update insert deduplication token with table identifier during insert in dependent materialized views. Closes [#59165](https://github.com/ClickHouse/ClickHouse/issues/59165). [#59238](https://github.com/ClickHouse/ClickHouse/pull/59238) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Performance Improvement +* More cache-friendly final implementation. Note on the behaviour change: previously queries with `FINAL` modifier that read with a single stream (e.g. `max_threads=1`) produced sorted output without explicitly provided `ORDER BY` clause. This behaviour no longer exists when `enable_vertical_final = true` (and it is so by default). [#54366](https://github.com/ClickHouse/ClickHouse/pull/54366) ([Duc Canh Le](https://github.com/canhld94)). +* Optimize array element function when input is array(map)/array(array(num)/array(array(string))/array(bigint)/array(decimal). Current implementation causes too many reallocs. The optimization speed up by ~6x especially when input type is array(map). [#56403](https://github.com/ClickHouse/ClickHouse/pull/56403) ([李扬](https://github.com/taiyang-li)). +* Bypass `Poco::BasicBufferedStreamBuf` abstraction when reading from S3 (namely `ReadBufferFromIStream`) to avoid extra copying of data. [#56961](https://github.com/ClickHouse/ClickHouse/pull/56961) ([Nikita Taranov](https://github.com/nickitat)). +* Read column once while reading more that one subcolumn from it in Compact parts. [#57631](https://github.com/ClickHouse/ClickHouse/pull/57631) ([Kruglov Pavel](https://github.com/Avogar)). +* Rewrite the AST of sum(column + literal) function. [#57853](https://github.com/ClickHouse/ClickHouse/pull/57853) ([Jiebin Sun](https://github.com/jiebinn)). +* The evaluation of function `match()` now utilizes skipping indices `ngrambf_v1` and `tokenbf_v1`. [#57882](https://github.com/ClickHouse/ClickHouse/pull/57882) ([凌涛](https://github.com/lingtaolf)). +* Default coordinator for parallel replicas is rewritten for better cache locality (same mark ranges are almost always assigned to the same replicas). Consistent hashing is used also during work stealing, so better tail latency is expected. It has been tested for linear scalability on a hundred of replicas. [#57968](https://github.com/ClickHouse/ClickHouse/pull/57968) ([Nikita Taranov](https://github.com/nickitat)). +* MergeTree FINAL to not compare rows from same non-L0 part. [#58142](https://github.com/ClickHouse/ClickHouse/pull/58142) ([Duc Canh Le](https://github.com/canhld94)). +* Speed up iota calls (filling array with consecutive numbers). [#58271](https://github.com/ClickHouse/ClickHouse/pull/58271) ([Raúl Marín](https://github.com/Algunenano)). +* The evaluation of function `match()` now utilizes inverted indices. [#58284](https://github.com/ClickHouse/ClickHouse/pull/58284) ([凌涛](https://github.com/lingtaolf)). +* Speedup MIN/MAX for non numeric types. [#58334](https://github.com/ClickHouse/ClickHouse/pull/58334) ([Raúl Marín](https://github.com/Algunenano)). +* Enable JIT compilation for aggregation without a key. Closes [#41461](https://github.com/ClickHouse/ClickHouse/issues/41461). Originally [#53757](https://github.com/ClickHouse/ClickHouse/issues/53757). [#58440](https://github.com/ClickHouse/ClickHouse/pull/58440) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The performance experiments of **OnTime** on the Intel server with up to AVX2 (and BMI2) support show that this change could effectively improve the QPS of **Q2** and **Q3** by **5.0%** and **3.7%** through reducing the cycle ratio of the hotspot, **_DB::MergeTreeRangeReader::ReadResult::optimize_**, **from 11.48% to 1.09%** and **from 8.09% to 0.67%** respectively while having no impact on others. [#58800](https://github.com/ClickHouse/ClickHouse/pull/58800) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Use one thread less in `clickhouse-local`. [#58968](https://github.com/ClickHouse/ClickHouse/pull/58968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Large aggregation states of `uniqExact` will be merged in parallel in distrubuted queries. [#59009](https://github.com/ClickHouse/ClickHouse/pull/59009) ([Nikita Taranov](https://github.com/nickitat)). +* Lower memory usage after reading from `MergeTree` tables. [#59290](https://github.com/ClickHouse/ClickHouse/pull/59290) ([Anton Popov](https://github.com/CurtizJ)). +* Lower memory usage in vertical merges. [#59340](https://github.com/ClickHouse/ClickHouse/pull/59340) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* Enable MySQL/MariaDB on macOS. This closes [#21191](https://github.com/ClickHouse/ClickHouse/issues/21191). [#46316](https://github.com/ClickHouse/ClickHouse/pull/46316) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not interpret numbers with leading zeroes as octals. [#55575](https://github.com/ClickHouse/ClickHouse/pull/55575) ([Joanna Hulboj](https://github.com/jh0x)). +* Replace HTTP outgoing buffering based on std ostream with CH Buffer. Add bytes counting metrics for interfaces. [#56064](https://github.com/ClickHouse/ClickHouse/pull/56064) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Disable `max_rows_in_set_to_optimize_join` by default. [#56396](https://github.com/ClickHouse/ClickHouse/pull/56396) ([vdimir](https://github.com/vdimir)). +* Add `` config parameter that allows avoiding resolving hostnames in DDLWorker. This mitigates the possibility of the queue being stuck in case of a change in cluster definition. Closes [#57573](https://github.com/ClickHouse/ClickHouse/issues/57573). [#57603](https://github.com/ClickHouse/ClickHouse/pull/57603) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase `load_metadata_threads` to 16 for the filesystem cache. It will make the server start up faster. [#57732](https://github.com/ClickHouse/ClickHouse/pull/57732) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve the `multiIf` function performance when the type is Nullable. [#57745](https://github.com/ClickHouse/ClickHouse/pull/57745) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add ability to throttle merges/mutations (`max_mutations_bandwidth_for_server`/`max_merges_bandwidth_for_server`). [#57877](https://github.com/ClickHouse/ClickHouse/pull/57877) ([Azat Khuzhin](https://github.com/azat)). +* Replaced undocumented (boolean) column `is_hot_reloadable` in system table `system.server_settings` by (Enum8) column `changeable_without_restart` with possible values `No`, `Yes`, `IncreaseOnly` and `DecreaseOnly`. Also documented the column. [#58029](https://github.com/ClickHouse/ClickHouse/pull/58029) ([skyoct](https://github.com/skyoct)). +* ClusterDiscovery supports setting username and password, close [#58063](https://github.com/ClickHouse/ClickHouse/issues/58063). [#58123](https://github.com/ClickHouse/ClickHouse/pull/58123) ([vdimir](https://github.com/vdimir)). +* Support query parameters in ALTER TABLE ... PART. [#58297](https://github.com/ClickHouse/ClickHouse/pull/58297) ([Azat Khuzhin](https://github.com/azat)). +* Create consumers for Kafka tables on fly (but keep them for some period - `kafka_consumers_pool_ttl_ms`, since last used), this should fix problem with statistics for `system.kafka_consumers` (that does not consumed when nobody reads from Kafka table, which leads to live memory leak and slow table detach) and also this PR enables stats for `system.kafka_consumers` by default again. [#58310](https://github.com/ClickHouse/ClickHouse/pull/58310) ([Azat Khuzhin](https://github.com/azat)). +* Sparkbar as an alias to sparkbar. [#58335](https://github.com/ClickHouse/ClickHouse/pull/58335) ([凌涛](https://github.com/lingtaolf)). +* Avoid sending ComposeObject requests after upload to GCS. [#58343](https://github.com/ClickHouse/ClickHouse/pull/58343) ([Azat Khuzhin](https://github.com/azat)). +* Correctly handle keys with dot in the name in configurations XMLs. [#58354](https://github.com/ClickHouse/ClickHouse/pull/58354) ([Azat Khuzhin](https://github.com/azat)). +* Added comments (brief descriptions) to all columns of system tables. The are several reasons fro this: - We use system tables a lot and sometimes is could be very difficult for developer to understand the purpose and the meaning of a particular column. - We change (add new ones or modify existing) system tables a lot and the documentation for them is always outdated. For example take a look at the documentation page for [`system.parts`](https://clickhouse.com/docs/en/operations/system-tables/parts). It misses a lot of columns - We would like to eventually generate documentation directly from ClickHouse. [#58356](https://github.com/ClickHouse/ClickHouse/pull/58356) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make function `format` return constant on constant arguments. This closes [#58355](https://github.com/ClickHouse/ClickHouse/issues/58355). [#58358](https://github.com/ClickHouse/ClickHouse/pull/58358) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Attach all system tables in `clickhouse-local`, including `system.parts`. This closes [#58312](https://github.com/ClickHouse/ClickHouse/issues/58312). [#58359](https://github.com/ClickHouse/ClickHouse/pull/58359) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support for `Enum` data types in function `transform`. This closes [#58241](https://github.com/ClickHouse/ClickHouse/issues/58241). [#58360](https://github.com/ClickHouse/ClickHouse/pull/58360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow registering database engines independently. [#58365](https://github.com/ClickHouse/ClickHouse/pull/58365) ([Bharat Nallan](https://github.com/bharatnc)). +* Adding a setting `max_estimated_execution_time` to separate `max_execution_time` and `max_estimated_execution_time`. [#58402](https://github.com/ClickHouse/ClickHouse/pull/58402) ([Zhang Yifan](https://github.com/zhangyifan27)). +* Allow registering interpreters independently. [#58443](https://github.com/ClickHouse/ClickHouse/pull/58443) ([Bharat Nallan](https://github.com/bharatnc)). +* Provide hint when an invalid database engine name is used. [#58444](https://github.com/ClickHouse/ClickHouse/pull/58444) ([Bharat Nallan](https://github.com/bharatnc)). +* Avoid huge memory consumption during Keeper startup for more cases. [#58455](https://github.com/ClickHouse/ClickHouse/pull/58455) ([Antonio Andelic](https://github.com/antonio2368)). +* Add settings for better control of indexes type in Arrow dictionary. Use signed integer type for indexes by default as Arrow recommends. Closes [#57401](https://github.com/ClickHouse/ClickHouse/issues/57401). [#58519](https://github.com/ClickHouse/ClickHouse/pull/58519) ([Kruglov Pavel](https://github.com/Avogar)). +* Added function `sqidDecode()` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow to read Bool values into String in JSON input formats. It's done under a setting `input_format_json_read_bools_as_strings` that is enabled by default. [#58561](https://github.com/ClickHouse/ClickHouse/pull/58561) ([Kruglov Pavel](https://github.com/Avogar)). +* Implement [#58575](https://github.com/ClickHouse/ClickHouse/issues/58575) Support `CLICKHOUSE_PASSWORD_FILE ` environment variable when running the docker image. [#58583](https://github.com/ClickHouse/ClickHouse/pull/58583) ([Eyal Halpern Shalev](https://github.com/Eyal-Shalev)). +* When executing some queries, which require a lot of streams for reading data, the error `"Paste JOIN requires sorted tables only"` was previously thrown. Now the numbers of streams resize to 1 in that case. [#58608](https://github.com/ClickHouse/ClickHouse/pull/58608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)). +* Better message for INVALID_IDENTIFIER error. [#58703](https://github.com/ClickHouse/ClickHouse/pull/58703) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Improved handling of signed numeric literals in normalizeQuery. [#58710](https://github.com/ClickHouse/ClickHouse/pull/58710) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Support Point data type for MySQL. [#58721](https://github.com/ClickHouse/ClickHouse/pull/58721) ([Kseniia Sumarokova](https://github.com/kssenii)). +* When comparing a Float32 column and a const string, read the string as Float32 (instead of Float64). [#58724](https://github.com/ClickHouse/ClickHouse/pull/58724) ([Raúl Marín](https://github.com/Algunenano)). +* Improve S3 compatible, add Ecloud EOS storage support. [#58786](https://github.com/ClickHouse/ClickHouse/pull/58786) ([xleoken](https://github.com/xleoken)). +* Allow `KILL QUERY` to cancel backups / restores. This PR also makes running backups and restores visible in `system.processes`. Also there is a new setting in the server configuration now - `shutdown_wait_backups_and_restores` (default=true) which makes the server either wait on shutdown for all running backups and restores to finish or just cancel them. [#58804](https://github.com/ClickHouse/ClickHouse/pull/58804) ([Vitaly Baranov](https://github.com/vitlibar)). +* Avro format support Zstd codec. Closes [#58735](https://github.com/ClickHouse/ClickHouse/issues/58735). [#58805](https://github.com/ClickHouse/ClickHouse/pull/58805) ([flynn](https://github.com/ucasfl)). +* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#58835](https://github.com/ClickHouse/ClickHouse/pull/58835) ([Serge Klochkov](https://github.com/slvrtrn)). +* Fixing a problem described in [#58719](https://github.com/ClickHouse/ClickHouse/issues/58719). [#58841](https://github.com/ClickHouse/ClickHouse/pull/58841) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Make sure that for custom (created from SQL) disks ether `filesystem_caches_path` (a common directory prefix for all filesystem caches) or `custom_cached_disks_base_directory` (a common directory prefix for only filesystem caches created from custom disks) is specified in server config. `custom_cached_disks_base_directory` has higher priority for custom disks over `filesystem_caches_path`, which is used if the former one is absent. Filesystem cache setting `path` must lie inside that directory, otherwise exception will be thrown preventing disk to be created. This will not affect disks created on an older version and server was upgraded - then the exception will not be thrown to allow the server to successfully start). `custom_cached_disks_base_directory` is added to default server config as `/var/lib/clickhouse/caches/`. Closes [#57825](https://github.com/ClickHouse/ClickHouse/issues/57825). [#58869](https://github.com/ClickHouse/ClickHouse/pull/58869) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MySQL interface gained compatibility with `SHOW WARNINGS`/`SHOW COUNT(*) WARNINGS` queries, though the returned result is always an empty set. [#58929](https://github.com/ClickHouse/ClickHouse/pull/58929) ([Serge Klochkov](https://github.com/slvrtrn)). +* Skip unavailable replicas when executing parallel distributed `INSERT SELECT`. [#58931](https://github.com/ClickHouse/ClickHouse/pull/58931) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Display word-descriptive log level while enabling structured log formatting in json. [#58936](https://github.com/ClickHouse/ClickHouse/pull/58936) ([Tim Liou](https://github.com/wheatdog)). +* MySQL interface gained support for `CAST(x AS SIGNED)` and `CAST(x AS UNSIGNED)` statements via data type aliases: `SIGNED` for Int64, and `UNSIGNED` for UInt64. This improves compatibility with BI tools such as Looker Studio. [#58954](https://github.com/ClickHouse/ClickHouse/pull/58954) ([Serge Klochkov](https://github.com/slvrtrn)). +* Function `seriesDecomposeSTL()` now returns a baseline component as season + trend components. [#58961](https://github.com/ClickHouse/ClickHouse/pull/58961) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Fix memory management in copyDataToS3File. [#58962](https://github.com/ClickHouse/ClickHouse/pull/58962) ([Vitaly Baranov](https://github.com/vitlibar)). +* Change working directory to data path in docker container. [#58975](https://github.com/ClickHouse/ClickHouse/pull/58975) ([cangyin](https://github.com/cangyin)). +* Added setting for Azure Blob Storage `azure_max_unexpected_write_error_retries` , can also be set from config under azure section. [#59001](https://github.com/ClickHouse/ClickHouse/pull/59001) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Keeper improvement: reduce Keeper's memory usage for stored nodes. [#59002](https://github.com/ClickHouse/ClickHouse/pull/59002) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow server to start with broken data lake table. Closes [#58625](https://github.com/ClickHouse/ClickHouse/issues/58625). [#59080](https://github.com/ClickHouse/ClickHouse/pull/59080) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixes https://github.com/ClickHouse/ClickHouse/pull/59120#issuecomment-1906177350. [#59122](https://github.com/ClickHouse/ClickHouse/pull/59122) ([Arthur Passos](https://github.com/arthurpassos)). +* The state of URL's #hash in the dashboard is now compressed using [lz-string](https://github.com/pieroxy/lz-string). The default size of the state string is compressed from 6856B to 2823B. [#59124](https://github.com/ClickHouse/ClickHouse/pull/59124) ([Amos Bird](https://github.com/amosbird)). +* Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)). +* Prohibit mutable operations (`INSERT`/`ALTER`/`OPTIMIZE`/...) on read-only/write-once storages with a proper `TABLE_IS_READ_ONLY` error (to avoid leftovers). Avoid leaving left-overs on write-once disks (`format_version.txt`) on `CREATE`/`ATTACH`. Ignore `DROP` for `ReplicatedMergeTree` (so as for `MergeTree`). Fix iterating over `s3_plain` (`MetadataStorageFromPlainObjectStorage::iterateDirectory`). Note read-only is `web` disk, and write-once is `s3_plain`. [#59170](https://github.com/ClickHouse/ClickHouse/pull/59170) ([Azat Khuzhin](https://github.com/azat)). +* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#59293](https://github.com/ClickHouse/ClickHouse/pull/59293) ([Serge Klochkov](https://github.com/slvrtrn)). +* Fix bug in experimental `_block_number` column which could lead to logical error during complex combination of `ALTER`s and `merge`s. Fixes [#56202](https://github.com/ClickHouse/ClickHouse/issues/56202). Replaces [#58601](https://github.com/ClickHouse/ClickHouse/issues/58601). CC @SmitaRKulkarni. [#59295](https://github.com/ClickHouse/ClickHouse/pull/59295) ([alesapin](https://github.com/alesapin)). +* Play UI understands when an exception is returned inside JSON. Adjustment for [#52853](https://github.com/ClickHouse/ClickHouse/issues/52853). [#59303](https://github.com/ClickHouse/ClickHouse/pull/59303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `/binary` HTTP handler allows to specify user, host, and optionally, password in the query string. [#59311](https://github.com/ClickHouse/ClickHouse/pull/59311) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support backups for compressed in-memory tables. This closes [#57893](https://github.com/ClickHouse/ClickHouse/issues/57893). [#59315](https://github.com/ClickHouse/ClickHouse/pull/59315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve exception message of function regexp_extract, close [#56393](https://github.com/ClickHouse/ClickHouse/issues/56393). [#59319](https://github.com/ClickHouse/ClickHouse/pull/59319) ([李扬](https://github.com/taiyang-li)). +* Support the FORMAT clause in BACKUP and RESTORE queries. [#59338](https://github.com/ClickHouse/ClickHouse/pull/59338) ([Vitaly Baranov](https://github.com/vitlibar)). +* Function `concatWithSeparator()` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). For example, `SELECT concatWithSeparator('.', 'number', 1)` now returns `number.1`. [#59341](https://github.com/ClickHouse/ClickHouse/pull/59341) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Build/Testing/Packaging Improvement +* Improve aliases for clickhouse binary (now `ch`/`clickhouse` is `clickhouse-local` or `clickhouse` depends on the arguments) and add bash completion for new aliases. [#58344](https://github.com/ClickHouse/ClickHouse/pull/58344) ([Azat Khuzhin](https://github.com/azat)). +* Add settings changes check to CI to check that all settings changes are reflected in settings changes history. [#58555](https://github.com/ClickHouse/ClickHouse/pull/58555) ([Kruglov Pavel](https://github.com/Avogar)). +* Use tables directly attached from S3 in stateful tests. [#58791](https://github.com/ClickHouse/ClickHouse/pull/58791) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Save the whole `fuzzer.log` as an archive instead of the last 100k lines. `tail -n 100000` often removes lines with table definitions. Example:. [#58821](https://github.com/ClickHouse/ClickHouse/pull/58821) ([Dmitry Novik](https://github.com/novikd)). +* Enable Rust on OSX ARM64 (this will add fuzzy search in client with skim and prql language, though I don't think that are people who hosts ClickHouse on darwin, so it is mostly for fuzzy search in client I would say). [#59272](https://github.com/ClickHouse/ClickHouse/pull/59272) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Add join keys conversion for nested lowcardinality [#51550](https://github.com/ClickHouse/ClickHouse/pull/51550) ([vdimir](https://github.com/vdimir)). +* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a bug with projections and the aggregate_functions_null_for_empty setting during insertion. [#56944](https://github.com/ClickHouse/ClickHouse/pull/56944) ([Amos Bird](https://github.com/amosbird)). +* Fixed potential exception due to stale profile UUID [#57263](https://github.com/ClickHouse/ClickHouse/pull/57263) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)). +* Ignore MVs with dropped target table during pushing to views [#57520](https://github.com/ClickHouse/ClickHouse/pull/57520) ([Kruglov Pavel](https://github.com/Avogar)). +* [RFC] Eliminate possible race between ALTER_METADATA and MERGE_PARTS [#57755](https://github.com/ClickHouse/ClickHouse/pull/57755) ([Azat Khuzhin](https://github.com/azat)). +* Fix the exprs order bug in group by with rollup [#57786](https://github.com/ClickHouse/ClickHouse/pull/57786) ([Chen768959](https://github.com/Chen768959)). +* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow users to work with symlinks in user_files_path (again) [#58447](https://github.com/ClickHouse/ClickHouse/pull/58447) ([Duc Canh Le](https://github.com/canhld94)). +* Fix segfault when graphite table does not have agg function [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)). +* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)). +* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)). +* Enable ordinary databases while restoration [#58520](https://github.com/ClickHouse/ClickHouse/pull/58520) ([Jihyuk Bok](https://github.com/tomahawk28)). +* Fix hive threadpool read ORC/Parquet/... Failed [#58537](https://github.com/ClickHouse/ClickHouse/pull/58537) ([sunny](https://github.com/sunny19930321)). +* Hide credentials in system.backup_log base_backup_name column [#58550](https://github.com/ClickHouse/ClickHouse/pull/58550) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* toStartOfInterval for milli- microsencods values rounding [#58557](https://github.com/ClickHouse/ClickHouse/pull/58557) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Disable max_joined_block_rows in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)). +* Fix join using nullable in old analyzer [#58596](https://github.com/ClickHouse/ClickHouse/pull/58596) ([vdimir](https://github.com/vdimir)). +* `makeDateTime64()`: Allow non-const fraction argument [#58597](https://github.com/ClickHouse/ClickHouse/pull/58597) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible NULL dereference during symbolizing inline frames [#58607](https://github.com/ClickHouse/ClickHouse/pull/58607) ([Azat Khuzhin](https://github.com/azat)). +* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix broken partition key analysis when doing projection optimization [#58638](https://github.com/ClickHouse/ClickHouse/pull/58638) ([Amos Bird](https://github.com/amosbird)). +* Query cache: Fix per-user quota [#58731](https://github.com/ClickHouse/ClickHouse/pull/58731) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)). +* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Don't process requests in Keeper during shutdown [#58765](https://github.com/ClickHouse/ClickHouse/pull/58765) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix Segfault in `SlabsPolygonIndex::find` [#58771](https://github.com/ClickHouse/ClickHouse/pull/58771) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). +* Table CREATE DROP Poco::Logger memory leak fix [#58831](https://github.com/ClickHouse/ClickHouse/pull/58831) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix HTTP compressors finalization [#58846](https://github.com/ClickHouse/ClickHouse/pull/58846) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Restriction for the access key id for s3. [#58900](https://github.com/ClickHouse/ClickHouse/pull/58900) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible crash in clickhouse-local during loading suggestions [#58907](https://github.com/ClickHouse/ClickHouse/pull/58907) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash when indexHint() is used [#58911](https://github.com/ClickHouse/ClickHouse/pull/58911) ([Dmitry Novik](https://github.com/novikd)). +* Fix StorageURL forgetting headers on server restart [#58933](https://github.com/ClickHouse/ClickHouse/pull/58933) ([Michael Kolupaev](https://github.com/al13n321)). +* Analyzer: fix storage replacement with insertion block [#58958](https://github.com/ClickHouse/ClickHouse/pull/58958) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix seek in ReadBufferFromZipArchive [#58966](https://github.com/ClickHouse/ClickHouse/pull/58966) ([Michael Kolupaev](https://github.com/al13n321)). +* `DROP INDEX` of inverted index now removes all relevant files from persistence [#59040](https://github.com/ClickHouse/ClickHouse/pull/59040) ([mochi](https://github.com/MochiXu)). +* Fix data race on query_factories_info [#59049](https://github.com/ClickHouse/ClickHouse/pull/59049) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable "Too many redirects" error retry [#59099](https://github.com/ClickHouse/ClickHouse/pull/59099) ([skyoct](https://github.com/skyoct)). +* Fix aggregation issue in mixed x86_64 and ARM clusters [#59132](https://github.com/ClickHouse/ClickHouse/pull/59132) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fix not started database shutdown deadlock [#59137](https://github.com/ClickHouse/ClickHouse/pull/59137) ([Sergei Trifonov](https://github.com/serxa)). +* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix crash with nullable timezone for `toString` [#59190](https://github.com/ClickHouse/ClickHouse/pull/59190) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix abort in iceberg metadata on bad file paths [#59275](https://github.com/ClickHouse/ClickHouse/pull/59275) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix architecture name in select of Rust target [#59307](https://github.com/ClickHouse/ClickHouse/pull/59307) ([p1rattttt](https://github.com/p1rattttt)). +* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix lazy initialization in RabbitMQ [#59352](https://github.com/ClickHouse/ClickHouse/pull/59352) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Refreshable materialized views (takeover)"'. [#58296](https://github.com/ClickHouse/ClickHouse/pull/58296) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix an error in the release script - it didn't allow to make 23.12."'. [#58381](https://github.com/ClickHouse/ClickHouse/pull/58381) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "Use CH Buffer for HTTP out stream, add metrics for interfaces"'. [#58450](https://github.com/ClickHouse/ClickHouse/pull/58450) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Second attempt: Use CH Buffer for HTTP out stream, add metrics for interfaces'. [#58475](https://github.com/ClickHouse/ClickHouse/pull/58475) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* NO CL ENTRY: 'Revert "Merging [#53757](https://github.com/ClickHouse/ClickHouse/issues/53757)"'. [#58542](https://github.com/ClickHouse/ClickHouse/pull/58542) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Add support for MySQL `net_write_timeout` and `net_read_timeout` settings"'. [#58872](https://github.com/ClickHouse/ClickHouse/pull/58872) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Extend performance test norm_dist.xml"'. [#58989](https://github.com/ClickHouse/ClickHouse/pull/58989) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892)"'. [#58990](https://github.com/ClickHouse/ClickHouse/pull/58990) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Allow parallel replicas for JOIN with analyzer [part 1]."'. [#59059](https://github.com/ClickHouse/ClickHouse/pull/59059) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Consume leading zeroes when parsing a number in ConstantExpressionTemplate"'. [#59070](https://github.com/ClickHouse/ClickHouse/pull/59070) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert "Allow parallel replicas for JOIN with analyzer [part 1].""'. [#59076](https://github.com/ClickHouse/ClickHouse/pull/59076) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "Allow to attach partition from table with different partition expression when destination partition expression doesn't re-partition"'. [#59120](https://github.com/ClickHouse/ClickHouse/pull/59120) ([Robert Schulze](https://github.com/rschu1ze)). +* NO CL ENTRY: 'DisksApp.cpp: fix typo (specifiged → specified)'. [#59140](https://github.com/ClickHouse/ClickHouse/pull/59140) ([Nikolay Edigaryev](https://github.com/edigaryev)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Analyzer: Fix resolving subcolumns in JOIN [#49703](https://github.com/ClickHouse/ClickHouse/pull/49703) ([vdimir](https://github.com/vdimir)). +* Analyzer: always qualify execution names [#53705](https://github.com/ClickHouse/ClickHouse/pull/53705) ([Dmitry Novik](https://github.com/novikd)). +* Insert quorum: check host node version in addition [#55528](https://github.com/ClickHouse/ClickHouse/pull/55528) ([Igor Nikonov](https://github.com/devcrafter)). +* Remove more old code of projection analysis [#55579](https://github.com/ClickHouse/ClickHouse/pull/55579) ([Anton Popov](https://github.com/CurtizJ)). +* Better exception messages in input formats [#57053](https://github.com/ClickHouse/ClickHouse/pull/57053) ([Kruglov Pavel](https://github.com/Avogar)). +* Parallel replicas custom key: skip unavailable replicas [#57235](https://github.com/ClickHouse/ClickHouse/pull/57235) ([Igor Nikonov](https://github.com/devcrafter)). +* Small change in log message in MergeTreeDataMergerMutator [#57550](https://github.com/ClickHouse/ClickHouse/pull/57550) ([Nikita Taranov](https://github.com/nickitat)). +* fs cache: small optimization [#57615](https://github.com/ClickHouse/ClickHouse/pull/57615) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Customizable dependency failure handling for AsyncLoader [#57697](https://github.com/ClickHouse/ClickHouse/pull/57697) ([Sergei Trifonov](https://github.com/serxa)). +* Bring test back [#57700](https://github.com/ClickHouse/ClickHouse/pull/57700) ([Nikita Taranov](https://github.com/nickitat)). +* Change default database name in clickhouse-local to 'default' [#57774](https://github.com/ClickHouse/ClickHouse/pull/57774) ([Kruglov Pavel](https://github.com/Avogar)). +* Add option `--show-whitespaces-in-diff` to clickhouse-test [#57870](https://github.com/ClickHouse/ClickHouse/pull/57870) ([vdimir](https://github.com/vdimir)). +* Update `query_masking_rules` when reloading the config, attempt 2 [#57993](https://github.com/ClickHouse/ClickHouse/pull/57993) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Remove unneeded parameter `use_external_buffer` from `AsynchronousReadBuffer*` [#58077](https://github.com/ClickHouse/ClickHouse/pull/58077) ([Nikita Taranov](https://github.com/nickitat)). +* Print another message in Bugfix check if internal check had been failed [#58091](https://github.com/ClickHouse/ClickHouse/pull/58091) ([vdimir](https://github.com/vdimir)). +* Refactor StorageMerge virtual columns filtering. [#58255](https://github.com/ClickHouse/ClickHouse/pull/58255) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Analyzer: fix tuple comparison when result is always null [#58266](https://github.com/ClickHouse/ClickHouse/pull/58266) ([vdimir](https://github.com/vdimir)). +* Fix an error in the release script - it didn't allow to make 23.12. [#58288](https://github.com/ClickHouse/ClickHouse/pull/58288) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.12.1.1368-stable [#58290](https://github.com/ClickHouse/ClickHouse/pull/58290) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix test_storage_s3_queue/test.py::test_drop_table [#58293](https://github.com/ClickHouse/ClickHouse/pull/58293) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix timeout in 01732_race_condition_storage_join_long [#58298](https://github.com/ClickHouse/ClickHouse/pull/58298) ([vdimir](https://github.com/vdimir)). +* Handle another case for preprocessing in Keeper [#58308](https://github.com/ClickHouse/ClickHouse/pull/58308) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable max_bytes_before_external* in 00172_hits_joins [#58309](https://github.com/ClickHouse/ClickHouse/pull/58309) ([vdimir](https://github.com/vdimir)). +* Analyzer: support functional arguments in USING clause [#58317](https://github.com/ClickHouse/ClickHouse/pull/58317) ([Dmitry Novik](https://github.com/novikd)). +* Fixed logical error in CheckSortedTransform [#58318](https://github.com/ClickHouse/ClickHouse/pull/58318) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Refreshable materialized views again [#58320](https://github.com/ClickHouse/ClickHouse/pull/58320) ([Michael Kolupaev](https://github.com/al13n321)). +* Organize symbols from src/* into DB namespace [#58336](https://github.com/ClickHouse/ClickHouse/pull/58336) ([Amos Bird](https://github.com/amosbird)). +* Add a style check against DOS and Windows [#58345](https://github.com/ClickHouse/ClickHouse/pull/58345) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check what happen if remove array joined columns from KeyCondition [#58346](https://github.com/ClickHouse/ClickHouse/pull/58346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Upload time of the perf tests into artifacts as test_duration_ms [#58348](https://github.com/ClickHouse/ClickHouse/pull/58348) ([Azat Khuzhin](https://github.com/azat)). +* Keep exception format string in retries ctl [#58351](https://github.com/ClickHouse/ClickHouse/pull/58351) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix replication.lib helper (system.mutations has database not current_database) [#58352](https://github.com/ClickHouse/ClickHouse/pull/58352) ([Azat Khuzhin](https://github.com/azat)). +* Refactor StorageHDFS and StorageFile virtual columns filtering [#58353](https://github.com/ClickHouse/ClickHouse/pull/58353) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix suspended workers for AsyncLoader [#58362](https://github.com/ClickHouse/ClickHouse/pull/58362) ([Sergei Trifonov](https://github.com/serxa)). +* Remove stale events from README [#58364](https://github.com/ClickHouse/ClickHouse/pull/58364) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not fail the CI on an expired token [#58384](https://github.com/ClickHouse/ClickHouse/pull/58384) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a test for [#38534](https://github.com/ClickHouse/ClickHouse/issues/38534) [#58391](https://github.com/ClickHouse/ClickHouse/pull/58391) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix database engine validation inside database factory [#58395](https://github.com/ClickHouse/ClickHouse/pull/58395) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix bad formatting of the `timeDiff` compatibility alias [#58398](https://github.com/ClickHouse/ClickHouse/pull/58398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a comment; remove unused method; stop using pointers [#58399](https://github.com/ClickHouse/ClickHouse/pull/58399) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Make a test not depend on the lack of floating point associativity [#58439](https://github.com/ClickHouse/ClickHouse/pull/58439) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `02944_dynamically_change_filesystem_cache_size` [#58445](https://github.com/ClickHouse/ClickHouse/pull/58445) ([Nikolay Degterinsky](https://github.com/evillique)). +* Analyzer: Fix LOGICAL_ERROR with LowCardinality [#58457](https://github.com/ClickHouse/ClickHouse/pull/58457) ([Dmitry Novik](https://github.com/novikd)). +* Replace `std::regex` by re2 [#58458](https://github.com/ClickHouse/ClickHouse/pull/58458) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve perf tests [#58478](https://github.com/ClickHouse/ClickHouse/pull/58478) ([Raúl Marín](https://github.com/Algunenano)). +* Check if I can remove KeyCondition analysis on AST. [#58480](https://github.com/ClickHouse/ClickHouse/pull/58480) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix some thread pool settings not updating at runtime [#58485](https://github.com/ClickHouse/ClickHouse/pull/58485) ([Michael Kolupaev](https://github.com/al13n321)). +* Lower log levels for some Raft logs to new test level [#58487](https://github.com/ClickHouse/ClickHouse/pull/58487) ([Antonio Andelic](https://github.com/antonio2368)). +* PartsSplitter small refactoring [#58506](https://github.com/ClickHouse/ClickHouse/pull/58506) ([Maksim Kita](https://github.com/kitaisreal)). +* Sync content of the docker test images [#58507](https://github.com/ClickHouse/ClickHouse/pull/58507) ([Max K.](https://github.com/maxknv)). +* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)). +* Minor fixups for `sqid()` [#58517](https://github.com/ClickHouse/ClickHouse/pull/58517) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v23.12.2.59-stable [#58545](https://github.com/ClickHouse/ClickHouse/pull/58545) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.11.4.24-stable [#58546](https://github.com/ClickHouse/ClickHouse/pull/58546) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.8.9.54-lts [#58547](https://github.com/ClickHouse/ClickHouse/pull/58547) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.10.6.60-stable [#58548](https://github.com/ClickHouse/ClickHouse/pull/58548) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.19.32-lts [#58549](https://github.com/ClickHouse/ClickHouse/pull/58549) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update CHANGELOG.md [#58559](https://github.com/ClickHouse/ClickHouse/pull/58559) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix test 02932_kill_query_sleep [#58560](https://github.com/ClickHouse/ClickHouse/pull/58560) ([Vitaly Baranov](https://github.com/vitlibar)). +* CI fix. Add packager script to build digest [#58571](https://github.com/ClickHouse/ClickHouse/pull/58571) ([Max K.](https://github.com/maxknv)). +* fix and test that S3Clients are reused [#58573](https://github.com/ClickHouse/ClickHouse/pull/58573) ([Sema Checherinda](https://github.com/CheSema)). +* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not load database engines in suggest [#58586](https://github.com/ClickHouse/ClickHouse/pull/58586) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong message in Keeper [#58588](https://github.com/ClickHouse/ClickHouse/pull/58588) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add some missing LLVM includes [#58594](https://github.com/ClickHouse/ClickHouse/pull/58594) ([Raúl Marín](https://github.com/Algunenano)). +* Small fix in Keeper [#58598](https://github.com/ClickHouse/ClickHouse/pull/58598) ([Antonio Andelic](https://github.com/antonio2368)). +* Update analyzer_tech_debt.txt [#58599](https://github.com/ClickHouse/ClickHouse/pull/58599) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Simplify release.py script [#58600](https://github.com/ClickHouse/ClickHouse/pull/58600) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update analyzer_tech_debt.txt [#58602](https://github.com/ClickHouse/ClickHouse/pull/58602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Refactor stacktrace symbolizer to avoid copy-paste [#58610](https://github.com/ClickHouse/ClickHouse/pull/58610) ([Azat Khuzhin](https://github.com/azat)). +* Add intel AMX checking [#58617](https://github.com/ClickHouse/ClickHouse/pull/58617) ([Roman Glinskikh](https://github.com/omgronny)). +* Optional `client` argument for `S3Helper` [#58619](https://github.com/ClickHouse/ClickHouse/pull/58619) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add sorting to 02366_kql_summarize.sql [#58621](https://github.com/ClickHouse/ClickHouse/pull/58621) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove more projections code [#58628](https://github.com/ClickHouse/ClickHouse/pull/58628) ([Anton Popov](https://github.com/CurtizJ)). +* Remove finalize() from ~WriteBufferFromEncryptedFile [#58629](https://github.com/ClickHouse/ClickHouse/pull/58629) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update test_replicated_database/test.py [#58647](https://github.com/ClickHouse/ClickHouse/pull/58647) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try disabling `muzzy_decay_ms` in jemalloc [#58648](https://github.com/ClickHouse/ClickHouse/pull/58648) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix test_replicated_database::test_startup_without_zk flakiness [#58649](https://github.com/ClickHouse/ClickHouse/pull/58649) ([Azat Khuzhin](https://github.com/azat)). +* Fix 01600_remerge_sort_lowered_memory_bytes_ratio flakiness (due to settings randomization) [#58650](https://github.com/ClickHouse/ClickHouse/pull/58650) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer: Fix assertion in HashJoin with duplicate columns [#58652](https://github.com/ClickHouse/ClickHouse/pull/58652) ([vdimir](https://github.com/vdimir)). +* Document that `match()` can use `ngrambf_v1` and `tokenbf_v1` indexes [#58655](https://github.com/ClickHouse/ClickHouse/pull/58655) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix perf tests duration (checks.test_duration_ms) [#58656](https://github.com/ClickHouse/ClickHouse/pull/58656) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer: Correctly handle constant set in index [#58657](https://github.com/ClickHouse/ClickHouse/pull/58657) ([Dmitry Novik](https://github.com/novikd)). +* fix a typo in stress randomization setting [#58658](https://github.com/ClickHouse/ClickHouse/pull/58658) ([Sema Checherinda](https://github.com/CheSema)). +* Small follow-up to `std::regex` --> `re2` conversion ([#58458](https://github.com/ClickHouse/ClickHouse/issues/58458)) [#58678](https://github.com/ClickHouse/ClickHouse/pull/58678) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove `` from libcxx [#58681](https://github.com/ClickHouse/ClickHouse/pull/58681) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix bad log message [#58698](https://github.com/ClickHouse/ClickHouse/pull/58698) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some small improvements to version_helper from [#57203](https://github.com/ClickHouse/ClickHouse/issues/57203) [#58712](https://github.com/ClickHouse/ClickHouse/pull/58712) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Small fixes in different helpers [#58717](https://github.com/ClickHouse/ClickHouse/pull/58717) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix bug in new (not released yet) parallel replicas coordinator [#58722](https://github.com/ClickHouse/ClickHouse/pull/58722) ([Nikita Taranov](https://github.com/nickitat)). +* Analyzer: Fix LOGICAL_ERROR in CountDistinctPass [#58723](https://github.com/ClickHouse/ClickHouse/pull/58723) ([Dmitry Novik](https://github.com/novikd)). +* Fix reading of offsets subcolumn (`size0`) from `Nested` [#58729](https://github.com/ClickHouse/ClickHouse/pull/58729) ([Anton Popov](https://github.com/CurtizJ)). +* Fix Mac OS X [#58733](https://github.com/ClickHouse/ClickHouse/pull/58733) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix stress with generate-template-key [#58740](https://github.com/ClickHouse/ClickHouse/pull/58740) ([Sema Checherinda](https://github.com/CheSema)). +* more relaxed check [#58751](https://github.com/ClickHouse/ClickHouse/pull/58751) ([Sema Checherinda](https://github.com/CheSema)). +* Fix usage of small buffers for remote reading [#58768](https://github.com/ClickHouse/ClickHouse/pull/58768) ([Nikita Taranov](https://github.com/nickitat)). +* Add missing includes when _LIBCPP_REMOVE_TRANSITIVE_INCLUDES enabled [#58770](https://github.com/ClickHouse/ClickHouse/pull/58770) ([Artem Alperin](https://github.com/hdnpth)). +* Remove some code [#58772](https://github.com/ClickHouse/ClickHouse/pull/58772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove some code [#58790](https://github.com/ClickHouse/ClickHouse/pull/58790) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix trash in performance tests [#58794](https://github.com/ClickHouse/ClickHouse/pull/58794) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in Keeper [#58806](https://github.com/ClickHouse/ClickHouse/pull/58806) ([Antonio Andelic](https://github.com/antonio2368)). +* Increase log level to trace to help debug `00993_system_parts_race_condition_drop_zookeeper` [#58809](https://github.com/ClickHouse/ClickHouse/pull/58809) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* DatabaseCatalog background tasks add log names [#58832](https://github.com/ClickHouse/ClickHouse/pull/58832) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer: Resolve GROUPING function on shards [#58833](https://github.com/ClickHouse/ClickHouse/pull/58833) ([Dmitry Novik](https://github.com/novikd)). +* Allow parallel replicas for JOIN with analyzer [part 1]. [#58838](https://github.com/ClickHouse/ClickHouse/pull/58838) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `isRetry` method [#58839](https://github.com/ClickHouse/ClickHouse/pull/58839) ([alesapin](https://github.com/alesapin)). +* fs cache: fix data race in slru [#58842](https://github.com/ClickHouse/ClickHouse/pull/58842) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix reading from an invisible part in new (not released yet) parallel replicas coordinator [#58844](https://github.com/ClickHouse/ClickHouse/pull/58844) ([Nikita Taranov](https://github.com/nickitat)). +* Fix bad log message [#58849](https://github.com/ClickHouse/ClickHouse/pull/58849) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Set max_bytes_before_external_group_by in 01961_roaring_memory_tracking [#58863](https://github.com/ClickHouse/ClickHouse/pull/58863) ([vdimir](https://github.com/vdimir)). +* Fix `00089_group_by_arrays_of_fixed` with external aggregation [#58873](https://github.com/ClickHouse/ClickHouse/pull/58873) ([Antonio Andelic](https://github.com/antonio2368)). +* DiskWeb minor improvement in loading [#58874](https://github.com/ClickHouse/ClickHouse/pull/58874) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix RPN construction for indexHint [#58875](https://github.com/ClickHouse/ClickHouse/pull/58875) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: add test with GROUP BY on shards [#58876](https://github.com/ClickHouse/ClickHouse/pull/58876) ([Dmitry Novik](https://github.com/novikd)). +* Jepsen job to reuse builds [#58881](https://github.com/ClickHouse/ClickHouse/pull/58881) ([Max K.](https://github.com/maxknv)). +* Fix ambiguity in the setting description [#58883](https://github.com/ClickHouse/ClickHouse/pull/58883) ([Denny Crane](https://github.com/den-crane)). +* Less error prone interface of read buffers [#58886](https://github.com/ClickHouse/ClickHouse/pull/58886) ([Anton Popov](https://github.com/CurtizJ)). +* Add metric for keeper memory soft limit [#58890](https://github.com/ClickHouse/ClickHouse/pull/58890) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Add a test for [#47988](https://github.com/ClickHouse/ClickHouse/issues/47988) [#58893](https://github.com/ClickHouse/ClickHouse/pull/58893) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Whitespaces [#58894](https://github.com/ClickHouse/ClickHouse/pull/58894) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `AggregatingTransform` [#58896](https://github.com/ClickHouse/ClickHouse/pull/58896) ([Antonio Andelic](https://github.com/antonio2368)). +* Update SLRUFileCachePriority.cpp [#58898](https://github.com/ClickHouse/ClickHouse/pull/58898) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add tests for [#57193](https://github.com/ClickHouse/ClickHouse/issues/57193) [#58899](https://github.com/ClickHouse/ClickHouse/pull/58899) ([Raúl Marín](https://github.com/Algunenano)). +* Add log for already download binary in Jepsen [#58901](https://github.com/ClickHouse/ClickHouse/pull/58901) ([Antonio Andelic](https://github.com/antonio2368)). +* fs cache: minor refactoring [#58902](https://github.com/ClickHouse/ClickHouse/pull/58902) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Checking on flaky test_parallel_replicas_custom_key_failover [#58909](https://github.com/ClickHouse/ClickHouse/pull/58909) ([Igor Nikonov](https://github.com/devcrafter)). +* Style fix [#58913](https://github.com/ClickHouse/ClickHouse/pull/58913) ([Dmitry Novik](https://github.com/novikd)). +* Opentelemetry spans to analyze CPU and S3 bottlenecks on inserts [#58914](https://github.com/ClickHouse/ClickHouse/pull/58914) ([Alexander Gololobov](https://github.com/davenger)). +* Fix fault handler in case of thread (for fault handler) cannot be spawned [#58917](https://github.com/ClickHouse/ClickHouse/pull/58917) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer: Support GROUP BY injective function elimination [#58919](https://github.com/ClickHouse/ClickHouse/pull/58919) ([Dmitry Novik](https://github.com/novikd)). +* Cancel MasterCI in PRs [#58920](https://github.com/ClickHouse/ClickHouse/pull/58920) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix and test for azure [#58697](https://github.com/ClickHouse/ClickHouse/issues/58697) [#58921](https://github.com/ClickHouse/ClickHouse/pull/58921) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Extend performance test norm_dist.xml [#58922](https://github.com/ClickHouse/ClickHouse/pull/58922) ([Robert Schulze](https://github.com/rschu1ze)). +* Add regression test for parallel replicas (follow up [#58722](https://github.com/ClickHouse/ClickHouse/issues/58722), [#58844](https://github.com/ClickHouse/ClickHouse/issues/58844)) [#58923](https://github.com/ClickHouse/ClickHouse/pull/58923) ([Nikita Taranov](https://github.com/nickitat)). +* Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892) [#58927](https://github.com/ClickHouse/ClickHouse/pull/58927) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `FunctionToSubcolumnsPass` in debug build [#58930](https://github.com/ClickHouse/ClickHouse/pull/58930) ([Anton Popov](https://github.com/CurtizJ)). +* Call `getMaxFileDescriptorCount` once in Keeper [#58938](https://github.com/ClickHouse/ClickHouse/pull/58938) ([Antonio Andelic](https://github.com/antonio2368)). +* Add missing files to digests [#58942](https://github.com/ClickHouse/ClickHouse/pull/58942) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer: fix join column not found with compound identifiers [#58943](https://github.com/ClickHouse/ClickHouse/pull/58943) ([vdimir](https://github.com/vdimir)). +* CI: pr_info to provide event_type for job scripts [#58947](https://github.com/ClickHouse/ClickHouse/pull/58947) ([Max K.](https://github.com/maxknv)). +* Using the destination object for paths generation in S3copy. [#58949](https://github.com/ClickHouse/ClickHouse/pull/58949) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix data race in slru (2) [#58950](https://github.com/ClickHouse/ClickHouse/pull/58950) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky test_postgresql_replica_database_engine_2/test.py::test_dependent_loading [#58951](https://github.com/ClickHouse/ClickHouse/pull/58951) ([Kseniia Sumarokova](https://github.com/kssenii)). +* More safe way to dump system logs in tests [#58955](https://github.com/ClickHouse/ClickHouse/pull/58955) ([alesapin](https://github.com/alesapin)). +* Add a comment about sparse checkout [#58960](https://github.com/ClickHouse/ClickHouse/pull/58960) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow up to [#58357](https://github.com/ClickHouse/ClickHouse/issues/58357) [#58963](https://github.com/ClickHouse/ClickHouse/pull/58963) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better error message about tuples [#58971](https://github.com/ClickHouse/ClickHouse/pull/58971) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix timeout for prometheus exporter for HTTP/1.1 (due to keep-alive) [#58981](https://github.com/ClickHouse/ClickHouse/pull/58981) ([Azat Khuzhin](https://github.com/azat)). +* Fix 02891_array_shingles with analyzer [#58982](https://github.com/ClickHouse/ClickHouse/pull/58982) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix script name in SQL example in executable.md [#58984](https://github.com/ClickHouse/ClickHouse/pull/58984) ([Lino Uruñuela](https://github.com/Wachynaky)). +* Fix typo [#58986](https://github.com/ClickHouse/ClickHouse/pull/58986) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Revert flaky [#58992](https://github.com/ClickHouse/ClickHouse/pull/58992) ([Raúl Marín](https://github.com/Algunenano)). +* Revive: Parallel replicas custom key: skip unavailable replicas [#58993](https://github.com/ClickHouse/ClickHouse/pull/58993) ([Igor Nikonov](https://github.com/devcrafter)). +* Make performance test `test norm_dist.xml` more realistic [#58995](https://github.com/ClickHouse/ClickHouse/pull/58995) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 02404_memory_bound_merging with analyzer (follow up [#56419](https://github.com/ClickHouse/ClickHouse/issues/56419)) [#58996](https://github.com/ClickHouse/ClickHouse/pull/58996) ([Nikita Taranov](https://github.com/nickitat)). +* Add test for [#58930](https://github.com/ClickHouse/ClickHouse/issues/58930) [#58999](https://github.com/ClickHouse/ClickHouse/pull/58999) ([Anton Popov](https://github.com/CurtizJ)). +* initialization ConnectionTimeouts [#59000](https://github.com/ClickHouse/ClickHouse/pull/59000) ([Sema Checherinda](https://github.com/CheSema)). +* DiskWeb fix loading [#59006](https://github.com/ClickHouse/ClickHouse/pull/59006) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update log level for http buffer [#59008](https://github.com/ClickHouse/ClickHouse/pull/59008) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Change log level for super imporant message in Keeper [#59010](https://github.com/ClickHouse/ClickHouse/pull/59010) ([alesapin](https://github.com/alesapin)). +* Fix async loader stress test [#59011](https://github.com/ClickHouse/ClickHouse/pull/59011) ([Sergei Trifonov](https://github.com/serxa)). +* Remove `StaticResourceManager` [#59013](https://github.com/ClickHouse/ClickHouse/pull/59013) ([Sergei Trifonov](https://github.com/serxa)). +* preserve 'amz-sdk-invocation-id' and 'amz-sdk-request' headers with gcp [#59015](https://github.com/ClickHouse/ClickHouse/pull/59015) ([Sema Checherinda](https://github.com/CheSema)). +* Update rename.md [#59017](https://github.com/ClickHouse/ClickHouse/pull/59017) ([filimonov](https://github.com/filimonov)). +* очепятка [#59024](https://github.com/ClickHouse/ClickHouse/pull/59024) ([edpyt](https://github.com/edpyt)). +* Split resource scheduler off `IO/` into `Common/Scheduler/` [#59025](https://github.com/ClickHouse/ClickHouse/pull/59025) ([Sergei Trifonov](https://github.com/serxa)). +* Add a parameter for testing purposes [#59027](https://github.com/ClickHouse/ClickHouse/pull/59027) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix test 02932_kill_query_sleep when running with query cache [#59041](https://github.com/ClickHouse/ClickHouse/pull/59041) ([Vitaly Baranov](https://github.com/vitlibar)). +* CI: Jepsen: fix sanity check in ci.py [#59043](https://github.com/ClickHouse/ClickHouse/pull/59043) ([Max K.](https://github.com/maxknv)). +* CI: add ci_config classes for job and build names [#59046](https://github.com/ClickHouse/ClickHouse/pull/59046) ([Max K.](https://github.com/maxknv)). +* remove flaky test [#59066](https://github.com/ClickHouse/ClickHouse/pull/59066) ([Sema Checherinda](https://github.com/CheSema)). +* Followup to 57853 [#59068](https://github.com/ClickHouse/ClickHouse/pull/59068) ([Dmitry Novik](https://github.com/novikd)). +* Follow-up to [#59027](https://github.com/ClickHouse/ClickHouse/issues/59027) [#59075](https://github.com/ClickHouse/ClickHouse/pull/59075) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `test_parallel_replicas_invisible_parts` [#59077](https://github.com/ClickHouse/ClickHouse/pull/59077) ([Nikita Taranov](https://github.com/nickitat)). +* Increase max_bytes_before_external_group_by for 00165_jit_aggregate_functions [#59078](https://github.com/ClickHouse/ClickHouse/pull/59078) ([Raúl Marín](https://github.com/Algunenano)). +* Fix stateless/run.sh [#59079](https://github.com/ClickHouse/ClickHouse/pull/59079) ([Kseniia Sumarokova](https://github.com/kssenii)). +* CI: hot fix for reuse [#59081](https://github.com/ClickHouse/ClickHouse/pull/59081) ([Max K.](https://github.com/maxknv)). +* Fix server shutdown due to exception while loading metadata [#59083](https://github.com/ClickHouse/ClickHouse/pull/59083) ([Sergei Trifonov](https://github.com/serxa)). +* Coordinator returns ranges for reading in sorted order [#59089](https://github.com/ClickHouse/ClickHouse/pull/59089) ([Nikita Taranov](https://github.com/nickitat)). +* Raise timeout in 02294_decimal_second_errors [#59090](https://github.com/ClickHouse/ClickHouse/pull/59090) ([Raúl Marín](https://github.com/Algunenano)). +* Add `[[nodiscard]]` to a couple of methods [#59093](https://github.com/ClickHouse/ClickHouse/pull/59093) ([Nikita Taranov](https://github.com/nickitat)). +* Docs: Update integer and float aliases [#59100](https://github.com/ClickHouse/ClickHouse/pull/59100) ([Robert Schulze](https://github.com/rschu1ze)). +* Avoid election timeouts during startup in Keeper [#59102](https://github.com/ClickHouse/ClickHouse/pull/59102) ([Antonio Andelic](https://github.com/antonio2368)). +* Add missing setting max_estimated_execution_time in SettingsChangesHistory [#59104](https://github.com/ClickHouse/ClickHouse/pull/59104) ([Kruglov Pavel](https://github.com/Avogar)). +* Rename some inverted index test files [#59106](https://github.com/ClickHouse/ClickHouse/pull/59106) ([Robert Schulze](https://github.com/rschu1ze)). +* Further reduce runtime of `norm_distance.xml` [#59108](https://github.com/ClickHouse/ClickHouse/pull/59108) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor follow-up to [#53710](https://github.com/ClickHouse/ClickHouse/issues/53710) [#59109](https://github.com/ClickHouse/ClickHouse/pull/59109) ([Robert Schulze](https://github.com/rschu1ze)). +* Update stateless/run.sh [#59116](https://github.com/ClickHouse/ClickHouse/pull/59116) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Followup 57875 [#59117](https://github.com/ClickHouse/ClickHouse/pull/59117) ([Dmitry Novik](https://github.com/novikd)). +* Fixing build [#59130](https://github.com/ClickHouse/ClickHouse/pull/59130) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Capability check for `s3_plain` [#59145](https://github.com/ClickHouse/ClickHouse/pull/59145) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `02015_async_inserts_stress_long` [#59146](https://github.com/ClickHouse/ClickHouse/pull/59146) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix AggregateFunctionNothing result type issues introducing it with different names [#59147](https://github.com/ClickHouse/ClickHouse/pull/59147) ([vdimir](https://github.com/vdimir)). +* Fix url encoding issue [#59162](https://github.com/ClickHouse/ClickHouse/pull/59162) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Upgrade simdjson to v3.6.3 [#59166](https://github.com/ClickHouse/ClickHouse/pull/59166) ([Robert Schulze](https://github.com/rschu1ze)). +* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix broken cache for non-existing temp_path [#59172](https://github.com/ClickHouse/ClickHouse/pull/59172) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Move some headers [#59175](https://github.com/ClickHouse/ClickHouse/pull/59175) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Analyzer: Fix CTE name clash resolution [#59177](https://github.com/ClickHouse/ClickHouse/pull/59177) ([Dmitry Novik](https://github.com/novikd)). +* Fix another place with special symbols in the URL [#59184](https://github.com/ClickHouse/ClickHouse/pull/59184) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Actions dag build filter actions refactoring [#59228](https://github.com/ClickHouse/ClickHouse/pull/59228) ([Maksim Kita](https://github.com/kitaisreal)). +* Minor cleanup of msan usage [#59229](https://github.com/ClickHouse/ClickHouse/pull/59229) ([Robert Schulze](https://github.com/rschu1ze)). +* Load server configs in clickhouse local [#59231](https://github.com/ClickHouse/ClickHouse/pull/59231) ([pufit](https://github.com/pufit)). +* Make libssh build dependent on `-DENABLE_LIBRARIES` [#59242](https://github.com/ClickHouse/ClickHouse/pull/59242) ([Robert Schulze](https://github.com/rschu1ze)). +* Disable copy constructor for MultiVersion [#59244](https://github.com/ClickHouse/ClickHouse/pull/59244) ([Vitaly Baranov](https://github.com/vitlibar)). +* CI: fix ci configuration for nightly job [#59252](https://github.com/ClickHouse/ClickHouse/pull/59252) ([Max K.](https://github.com/maxknv)). +* Fix 02475_bson_each_row_format flakiness (due to small parsing block) [#59253](https://github.com/ClickHouse/ClickHouse/pull/59253) ([Azat Khuzhin](https://github.com/azat)). +* Improve pytest --pdb experience by preserving dockerd on SIGINT (v2) [#59255](https://github.com/ClickHouse/ClickHouse/pull/59255) ([Azat Khuzhin](https://github.com/azat)). +* Fix fasttest by pinning pip dependencies [#59256](https://github.com/ClickHouse/ClickHouse/pull/59256) ([Azat Khuzhin](https://github.com/azat)). +* Added AtomicLogger [#59273](https://github.com/ClickHouse/ClickHouse/pull/59273) ([Maksim Kita](https://github.com/kitaisreal)). +* Update test_reload_after_fail_in_cache_dictionary for analyzer [#59274](https://github.com/ClickHouse/ClickHouse/pull/59274) ([vdimir](https://github.com/vdimir)). +* Update run.sh [#59280](https://github.com/ClickHouse/ClickHouse/pull/59280) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add missing setting optimize_injective_functions_in_group_by to SettingsChangesHistory [#59283](https://github.com/ClickHouse/ClickHouse/pull/59283) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix perf tests (after sumMap starts to filter out -0.) [#59287](https://github.com/ClickHouse/ClickHouse/pull/59287) ([Azat Khuzhin](https://github.com/azat)). +* Use fresh ZooKeeper client on DROP (to have higher chances on success) [#59288](https://github.com/ClickHouse/ClickHouse/pull/59288) ([Azat Khuzhin](https://github.com/azat)). +* Additional check [#59292](https://github.com/ClickHouse/ClickHouse/pull/59292) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* No debug symbols in Rust [#59306](https://github.com/ClickHouse/ClickHouse/pull/59306) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix deadlock in `AsyncLoader::stop()` [#59308](https://github.com/ClickHouse/ClickHouse/pull/59308) ([Sergei Trifonov](https://github.com/serxa)). +* Speed up `00165_jit_aggregate_functions` [#59312](https://github.com/ClickHouse/ClickHouse/pull/59312) ([Nikita Taranov](https://github.com/nickitat)). +* CI: WA for issue with perf test with artifact reuse [#59325](https://github.com/ClickHouse/ClickHouse/pull/59325) ([Max K.](https://github.com/maxknv)). +* Fix typo [#59329](https://github.com/ClickHouse/ClickHouse/pull/59329) ([Raúl Marín](https://github.com/Algunenano)). +* Simplify query_run_metric_arrays in perf tests [#59333](https://github.com/ClickHouse/ClickHouse/pull/59333) ([Raúl Marín](https://github.com/Algunenano)). +* IVolume constructor improve exception message [#59335](https://github.com/ClickHouse/ClickHouse/pull/59335) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix upgrade check for new setting [#59343](https://github.com/ClickHouse/ClickHouse/pull/59343) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix sccache when building without coverage [#59345](https://github.com/ClickHouse/ClickHouse/pull/59345) ([Raúl Marín](https://github.com/Algunenano)). +* Loggers initialization fix [#59347](https://github.com/ClickHouse/ClickHouse/pull/59347) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting update_insert_deduplication_token_in_dependent_materialized_views to settings changes history [#59349](https://github.com/ClickHouse/ClickHouse/pull/59349) ([Maksim Kita](https://github.com/kitaisreal)). +* Slightly better memory usage in `AsynchronousBoundedReadBuffer` [#59354](https://github.com/ClickHouse/ClickHouse/pull/59354) ([Anton Popov](https://github.com/CurtizJ)). +* Try to make variant tests a bit faster [#59355](https://github.com/ClickHouse/ClickHouse/pull/59355) ([Kruglov Pavel](https://github.com/Avogar)). +* Minor typos in Settings.h [#59371](https://github.com/ClickHouse/ClickHouse/pull/59371) ([Jordi Villar](https://github.com/jrdi)). +* Rename `quantileDDSketch` to `quantileDD` [#59372](https://github.com/ClickHouse/ClickHouse/pull/59372) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v24.1.2.5-stable.md b/docs/changelogs/v24.1.2.5-stable.md new file mode 100644 index 000000000000..bac25c9b9ed6 --- /dev/null +++ b/docs/changelogs/v24.1.2.5-stable.md @@ -0,0 +1,14 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.1.2.5-stable (b2605dd4a5a) FIXME as compared to v24.1.1.2048-stable (5a024dfc093) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 1d3e7d4964e6..efbce54d44b5 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -109,6 +109,9 @@ Do not check for a particular wording of error message, it may change in the fut If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is run in CI in correct configurations, where the server is configured to support distributed queries. +### Working with Temporary Files + +Sometimes in a shell test you may need to create a file on the fly to work with. Keep in mind that some CI checks run tests in parallel, so if you are creating or removing a temporary file in your script without a unique name this can cause some of the CI checks, such as Flaky, to fail. To get around this you should use environment variable `$CLICKHOUSE_TEST_UNIQUE_NAME` to give temporary files a name unique to the test that is running. That way you can be sure that the file you are creating during setup or removing during cleanup is the file only in use by that test and not some other test which is running in parallel. ## Known Bugs {#known-bugs} diff --git a/docs/en/getting-started/example-datasets/noaa.md b/docs/en/getting-started/example-datasets/noaa.md new file mode 100644 index 000000000000..9a3ec7791b6f --- /dev/null +++ b/docs/en/getting-started/example-datasets/noaa.md @@ -0,0 +1,342 @@ +--- +slug: /en/getting-started/example-datasets/noaa +sidebar_label: NOAA Global Historical Climatology Network +sidebar_position: 1 +description: 2.5 billion rows of climate data for the last 120 yrs +--- + +# NOAA Global Historical Climatology Network + +This dataset contains weather measurements for the last 120 years. Each row is a measurement for a point in time and station. + +More precisely and according to the [origin of this data](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn): + +> GHCN-Daily is a dataset that contains daily observations over global land areas. It contains station-based measurements from land-based stations worldwide, about two-thirds of which are for precipitation measurements only (Menne et al., 2012). GHCN-Daily is a composite of climate records from numerous sources that were merged together and subjected to a common suite of quality assurance reviews (Durre et al., 2010). The archive includes the following meteorological elements: + + - Daily maximum temperature + - Daily minimum temperature + - Temperature at the time of observation + - Precipitation (i.e., rain, melted snow) + - Snowfall + - Snow depth + - Other elements where available + +## Downloading the data + +- A [pre-prepared version](#pre-prepared-data) of the data for ClickHouse, which has been cleansed, re-structured, and enriched. This data covers the years 1900 to 2022. +- [Download the original data](#original-data) and convert to the format required by ClickHouse. Users wanting to add their own columns may wish to explore this approach. + +### Pre-prepared data + +More specifically, rows have been removed that did not fail any quality assurance checks by Noaa. The data has also been restructured from a measurement per line to a row per station id and date, i.e. + +```csv +"station_id","date","tempAvg","tempMax","tempMin","precipitation","snowfall","snowDepth","percentDailySun","averageWindSpeed","maxWindSpeed","weatherType" +"AEM00041194","2022-07-30",347,0,308,0,0,0,0,0,0,0 +"AEM00041194","2022-07-31",371,413,329,0,0,0,0,0,0,0 +"AEM00041194","2022-08-01",384,427,357,0,0,0,0,0,0,0 +"AEM00041194","2022-08-02",381,424,352,0,0,0,0,0,0,0 +``` + +This is simpler to query and ensures the resulting table is less sparse. Finally, the data has also been enriched with latitude and longitude. + +This data is available in the following S3 location. Either download the data to your local filesystem (and insert using the ClickHouse client) or insert directly into ClickHouse (see [Inserting from S3](#inserting-from-s3)). + +To download: + +```bash +wget https://datasets-documentation.s3.eu-west-3.amazonaws.com/noaa/noaa_enriched.parquet +``` + +### Original data + +The following details the steps to download and transform the original data in preparation for loading into ClickHouse. + +#### Download + +To download the original data: + +```bash +for i in {1900..2023}; do wget https://noaa-ghcn-pds.s3.amazonaws.com/csv.gz/${i}.csv.gz; done +``` + +#### Sampling the data + +```bash +$ clickhouse-local --query "SELECT * FROM '2021.csv.gz' LIMIT 10" --format PrettyCompact +┌─c1──────────┬───────c2─┬─c3───┬──c4─┬─c5───┬─c6───┬─c7─┬───c8─┐ +│ AE000041196 │ 20210101 │ TMAX │ 278 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AE000041196 │ 20210101 │ PRCP │ 0 │ D │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AE000041196 │ 20210101 │ TAVG │ 214 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041194 │ 20210101 │ TMAX │ 266 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041194 │ 20210101 │ TMIN │ 178 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041194 │ 20210101 │ PRCP │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041194 │ 20210101 │ TAVG │ 217 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041217 │ 20210101 │ TMAX │ 262 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041217 │ 20210101 │ TMIN │ 155 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041217 │ 20210101 │ TAVG │ 202 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +└─────────────┴──────────┴──────┴─────┴──────┴──────┴────┴──────┘ +``` + +Summarizing the [format documentation](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn): + + +Summarizing the format documentation and the columns in order: + + - An 11 character station identification code. This itself encodes some useful information + - YEAR/MONTH/DAY = 8 character date in YYYYMMDD format (e.g. 19860529 = May 29, 1986) + - ELEMENT = 4 character indicator of element type. Effectively the measurement type. While there are many measurements available, we select the following: + - PRCP - Precipitation (tenths of mm) + - SNOW - Snowfall (mm) + - SNWD - Snow depth (mm) + - TMAX - Maximum temperature (tenths of degrees C) + - TAVG - Average temperature (tenths of a degree C) + - TMIN - Minimum temperature (tenths of degrees C) + - PSUN - Daily percent of possible sunshine (percent) + - AWND - Average daily wind speed (tenths of meters per second) + - WSFG - Peak gust wind speed (tenths of meters per second) + - WT** = Weather Type where ** defines the weather type. Full list of weather types here. +- DATA VALUE = 5 character data value for ELEMENT i.e. the value of the measurement. +- M-FLAG = 1 character Measurement Flag. This has 10 possible values. Some of these values indicate questionable data accuracy. We accept data where this is set to “P” - identified as missing presumed zero, as this is only relevant to the PRCP, SNOW and SNWD measurements. +- Q-FLAG is the measurement quality flag with 14 possible values. We are only interested in data with an empty value i.e. it did not fail any quality assurance checks. +- S-FLAG is the source flag for the observation. Not useful for our analysis and ignored. +- OBS-TIME = 4-character time of observation in hour-minute format (i.e. 0700 =7:00 am). Typically not present in older data. We ignore this for our purposes. + +A measurement per line would result in a sparse table structure in ClickHouse. We should transform to a row per time and station, with measurements as columns. First, we limit the dataset to those rows without issues i.e. where `qFlag` is equal to an empty string. + +#### Clean the data + +Using [ClickHouse local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local) we can filter rows that represent measurements of interest and pass our quality requirements: + +```bash +clickhouse local --query "SELECT count() +FROM file('*.csv.gz', CSV, 'station_id String, date String, measurement String, value Int64, mFlag String, qFlag String, sFlag String, obsTime String') WHERE qFlag = '' AND (measurement IN ('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TAVG', 'TMIN', 'PSUN', 'AWND', 'WSFG') OR startsWith(measurement, 'WT'))" + +2679264563 +``` + +With over 2.6 billion rows, this isn’t a fast query since it involves parsing all the files. On our 8 core machine, this takes around 160 seconds. + + +### Pivot data + +While the measurement per line structure can be used with ClickHouse, it will unnecessarily complicate future queries. Ideally, we need a row per station id and date, where each measurement type and associated value are a column i.e. + +```csv +"station_id","date","tempAvg","tempMax","tempMin","precipitation","snowfall","snowDepth","percentDailySun","averageWindSpeed","maxWindSpeed","weatherType" +"AEM00041194","2022-07-30",347,0,308,0,0,0,0,0,0,0 +"AEM00041194","2022-07-31",371,413,329,0,0,0,0,0,0,0 +"AEM00041194","2022-08-01",384,427,357,0,0,0,0,0,0,0 +"AEM00041194","2022-08-02",381,424,352,0,0,0,0,0,0,0 +``` + +Using ClickHouse local and a simple `GROUP BY`, we can repivot our data to this structure. To limit memory overhead, we do this one file at a time. + +```bash +for i in {1900..2022} +do +clickhouse-local --query "SELECT station_id, + toDate32(date) as date, + anyIf(value, measurement = 'TAVG') as tempAvg, + anyIf(value, measurement = 'TMAX') as tempMax, + anyIf(value, measurement = 'TMIN') as tempMin, + anyIf(value, measurement = 'PRCP') as precipitation, + anyIf(value, measurement = 'SNOW') as snowfall, + anyIf(value, measurement = 'SNWD') as snowDepth, + anyIf(value, measurement = 'PSUN') as percentDailySun, + anyIf(value, measurement = 'AWND') as averageWindSpeed, + anyIf(value, measurement = 'WSFG') as maxWindSpeed, + toUInt8OrZero(replaceOne(anyIf(measurement, startsWith(measurement, 'WT') AND value = 1), 'WT', '')) as weatherType +FROM file('$i.csv.gz', CSV, 'station_id String, date String, measurement String, value Int64, mFlag String, qFlag String, sFlag String, obsTime String') + WHERE qFlag = '' AND (measurement IN ('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TAVG', 'TMIN', 'PSUN', 'AWND', 'WSFG') OR startsWith(measurement, 'WT')) +GROUP BY station_id, date +ORDER BY station_id, date FORMAT CSV" >> "noaa.csv"; +done +``` + +This query produces a single 50GB file `noaa.csv`. + +### Enriching the data + +The data has no indication of location aside from a station id, which includes a prefix country code. Ideally, each station would have a latitude and longitude associated with it. To achieve this, NOAA conveniently provides the details of each station as a separate [ghcnd-stations.txt](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn#format-of-ghcnd-stationstxt-file). This file has [several columns](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn#format-of-ghcnd-stationstxt-file), of which five are useful to our future analysis: id, latitude, longitude, elevation, and name. + +```bash +wget http://noaa-ghcn-pds.s3.amazonaws.com/ghcnd-stations.txt +``` + +```bash +clickhouse local --query "WITH stations AS (SELECT id, lat, lon, elevation, splitByString(' GSN ',name)[1] as name FROM file('ghcnd-stations.txt', Regexp, 'id String, lat Float64, lon Float64, elevation Float32, name String')) +SELECT station_id, + date, + tempAvg, + tempMax, + tempMin, + precipitation, + snowfall, + snowDepth, + percentDailySun, + averageWindSpeed, + maxWindSpeed, + weatherType, + tuple(lon, lat) as location, + elevation, + name +FROM file('noaa.csv', CSV, + 'station_id String, date Date32, tempAvg Int32, tempMax Int32, tempMin Int32, precipitation Int32, snowfall Int32, snowDepth Int32, percentDailySun Int8, averageWindSpeed Int32, maxWindSpeed Int32, weatherType UInt8') as noaa LEFT OUTER + JOIN stations ON noaa.station_id = stations.id INTO OUTFILE 'noaa_enriched.parquet' FORMAT Parquet SETTINGS format_regexp='^(.{11})\s+(\-?\d{1,2}\.\d{4})\s+(\-?\d{1,3}\.\d{1,4})\s+(\-?\d*\.\d*)\s+(.*)\s+(?:[\d]*)'" +``` +This query takes a few minutes to run and produces a 6.4 GB file, `noaa_enriched.parquet`. + +## Create table + +Create a MergeTree table in ClickHouse (from the ClickHouse client). + +```sql +CREATE TABLE noaa +( + `station_id` LowCardinality(String), + `date` Date32, + `tempAvg` Int32 COMMENT 'Average temperature (tenths of a degrees C)', + `tempMax` Int32 COMMENT 'Maximum temperature (tenths of degrees C)', + `tempMin` Int32 COMMENT 'Minimum temperature (tenths of degrees C)', + `precipitation` UInt32 COMMENT 'Precipitation (tenths of mm)', + `snowfall` UInt32 COMMENT 'Snowfall (mm)', + `snowDepth` UInt32 COMMENT 'Snow depth (mm)', + `percentDailySun` UInt8 COMMENT 'Daily percent of possible sunshine (percent)', + `averageWindSpeed` UInt32 COMMENT 'Average daily wind speed (tenths of meters per second)', + `maxWindSpeed` UInt32 COMMENT 'Peak gust wind speed (tenths of meters per second)', + `weatherType` Enum8('Normal' = 0, 'Fog' = 1, 'Heavy Fog' = 2, 'Thunder' = 3, 'Small Hail' = 4, 'Hail' = 5, 'Glaze' = 6, 'Dust/Ash' = 7, 'Smoke/Haze' = 8, 'Blowing/Drifting Snow' = 9, 'Tornado' = 10, 'High Winds' = 11, 'Blowing Spray' = 12, 'Mist' = 13, 'Drizzle' = 14, 'Freezing Drizzle' = 15, 'Rain' = 16, 'Freezing Rain' = 17, 'Snow' = 18, 'Unknown Precipitation' = 19, 'Ground Fog' = 21, 'Freezing Fog' = 22), + `location` Point, + `elevation` Float32, + `name` LowCardinality(String) +) ENGINE = MergeTree() ORDER BY (station_id, date); + +``` + +## Inserting into ClickHouse + +### Inserting from local file + +Data can be inserted from a local file as follows (from the ClickHouse client): + +```sql +INSERT INTO noaa FROM INFILE '/noaa_enriched.parquet' +``` + +where `` represents the full path to the local file on disk. + +See [here](https://clickhouse.com/blog/real-world-data-noaa-climate-data#load-the-data) for how to speed this load up. + +### Inserting from S3 + +```sql +INSERT INTO noaa SELECT * +FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/noaa/noaa_enriched.parquet') + +``` +For how to speed this up, see our blog post on [tuning large data loads](https://clickhouse.com/blog/supercharge-your-clickhouse-data-loads-part2). + +## Sample queries + +### Highest temperature ever + +```sql +SELECT + tempMax / 10 AS maxTemp, + location, + name, + date +FROM blogs.noaa +WHERE tempMax > 500 +ORDER BY + tempMax DESC, + date ASC +LIMIT 5 + +┌─maxTemp─┬─location──────────┬─name───────────────────────────────────────────┬───────date─┐ +│ 56.7 │ (-116.8667,36.45) │ CA GREENLAND RCH │ 1913-07-10 │ +│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1949-08-20 │ +│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1949-09-18 │ +│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1952-07-17 │ +│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1952-09-04 │ +└─────────┴───────────────────┴────────────────────────────────────────────────┴────────────┘ + +5 rows in set. Elapsed: 0.514 sec. Processed 1.06 billion rows, 4.27 GB (2.06 billion rows/s., 8.29 GB/s.) +``` + +Reassuringly consistent with the [documented record](https://en.wikipedia.org/wiki/List_of_weather_records#Highest_temperatures_ever_recorded) at [Furnace Creek](https://www.google.com/maps/place/36%C2%B027'00.0%22N+116%C2%B052'00.1%22W/@36.1329666,-116.1104099,8.95z/data=!4m5!3m4!1s0x0:0xf2ed901b860f4446!8m2!3d36.45!4d-116.8667) as of 2023. + +### Best ski resorts + +Using a [list of ski resorts](https://gist.githubusercontent.com/gingerwizard/dd022f754fd128fdaf270e58fa052e35/raw/622e03c37460f17ef72907afe554cb1c07f91f23/ski_resort_stats.csv) in the united states and their respective locations, we join these against the top 1000 weather stations with the most in any month in the last 5 yrs. Sorting this join by [geoDistance](https://clickhouse.com/docs/en/sql-reference/functions/geo/coordinates/#geodistance) and restricting the results to those where the distance is less than 20km, we select the top result per resort and sort this by total snow. Note we also restrict resorts to those above 1800m, as a broad indicator of good skiing conditions. + +```sql +SELECT + resort_name, + total_snow / 1000 AS total_snow_m, + resort_location, + month_year +FROM +( + WITH resorts AS + ( + SELECT + resort_name, + state, + (lon, lat) AS resort_location, + 'US' AS code + FROM url('https://gist.githubusercontent.com/gingerwizard/dd022f754fd128fdaf270e58fa052e35/raw/622e03c37460f17ef72907afe554cb1c07f91f23/ski_resort_stats.csv', CSVWithNames) + ) + SELECT + resort_name, + highest_snow.station_id, + geoDistance(resort_location.1, resort_location.2, station_location.1, station_location.2) / 1000 AS distance_km, + highest_snow.total_snow, + resort_location, + station_location, + month_year + FROM + ( + SELECT + sum(snowfall) AS total_snow, + station_id, + any(location) AS station_location, + month_year, + substring(station_id, 1, 2) AS code + FROM noaa + WHERE (date > '2017-01-01') AND (code = 'US') AND (elevation > 1800) + GROUP BY + station_id, + toYYYYMM(date) AS month_year + ORDER BY total_snow DESC + LIMIT 1000 + ) AS highest_snow + INNER JOIN resorts ON highest_snow.code = resorts.code + WHERE distance_km < 20 + ORDER BY + resort_name ASC, + total_snow DESC + LIMIT 1 BY + resort_name, + station_id +) +ORDER BY total_snow DESC +LIMIT 5 + +┌─resort_name──────────┬─total_snow_m─┬─resort_location─┬─month_year─┐ +│ Sugar Bowl, CA │ 7.799 │ (-120.3,39.27) │ 201902 │ +│ Donner Ski Ranch, CA │ 7.799 │ (-120.34,39.31) │ 201902 │ +│ Boreal, CA │ 7.799 │ (-120.35,39.33) │ 201902 │ +│ Homewood, CA │ 4.926 │ (-120.17,39.08) │ 201902 │ +│ Alpine Meadows, CA │ 4.926 │ (-120.22,39.17) │ 201902 │ +└──────────────────────┴──────────────┴─────────────────┴────────────┘ + +5 rows in set. Elapsed: 0.750 sec. Processed 689.10 million rows, 3.20 GB (918.20 million rows/s., 4.26 GB/s.) +Peak memory usage: 67.66 MiB. +``` + +## Credits + +We would like to acknowledge the efforts of the Global Historical Climatology Network for preparing, cleansing, and distributing this data. We appreciate your efforts. + +Menne, M.J., I. Durre, B. Korzeniewski, S. McNeal, K. Thomas, X. Yin, S. Anthony, R. Ray, R.S. Vose, B.E.Gleason, and T.G. Houston, 2012: Global Historical Climatology Network - Daily (GHCN-Daily), Version 3. [indicate subset used following decimal, e.g. Version 3.25]. NOAA National Centers for Environmental Information. http://doi.org/10.7289/V5D21VHZ [17/08/2020] diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index a53844e792f5..518037a2c7cd 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -197,6 +197,29 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section). +## Aliases {#cli_aliases} + +- `\l` - SHOW DATABASES +- `\d` - SHOW TABLES +- `\c ` - USE DATABASE +- `.` - repeat the last query + + +## Shortkeys {#shortkeys_aliases} + +- `Alt (Option) + Shift + e` - open editor with current query. It is possible to set up an environment variable - `EDITOR`, by default vim is used. +- `Alt (Option) + #` - comment line. +- `Ctrl + r` - fuzzy history search. + +:::tip +To configure the correct work of meta key (Option) on MacOS: + +iTerm2: Go to Preferences -> Profile -> Keys -> Left Option key and click Esc+ +::: + +The full list with all available shortkeys - [replxx](https://github.com/AmokHuginnsson/replxx/blob/1f149bf/src/replxx_impl.cxx#L262). + + ## Connection string {#connection_string} clickhouse-client alternatively supports connecting to clickhouse server using a connection string similar to [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). It has the following syntax: diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index dfe62d591e32..005c7818eb15 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -163,7 +163,7 @@ key: value Corresponding XML: ``` xml -value +value ``` A nested XML node is represented by a YAML map: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 513e4d1cb789..a6d66d952cd8 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1935,7 +1935,7 @@ Possible values: Default value: `450`. -### async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms} +### async_insert_busy_timeout_max_ms {#async-insert-busy-timeout-max-ms} The maximum timeout in milliseconds since the first `INSERT` query before inserting collected data. @@ -1946,6 +1946,61 @@ Possible values: Default value: `200`. +### async_insert_poll_timeout_ms {#async-insert-poll-timeout-ms} + +Timeout in milliseconds for polling data from asynchronous insert queue. + +Possible values: + +- Positive integer. + +Default value: `10`. + +### async_insert_use_adaptive_busy_timeout {#allow-experimental-async-insert-adaptive-busy-timeout} + +Use adaptive asynchronous insert timeout. + +Possible values: + +- 0 - Disabled. +- 1 - Enabled. + +Default value: `0`. + +### async_insert_busy_timeout_min_ms {#async-insert-busy-timeout-min-ms} + +If adaptive asynchronous insert timeout is allowed through [async_insert_use_adaptive_busy_timeout](#allow-experimental-async-insert-adaptive-busy-timeout), the setting specifies the minimum value of the asynchronous insert timeout in milliseconds. It also serves as the initial value, which may be increased later by the adaptive algorithm, up to the [async_insert_busy_timeout_ms](#async_insert_busy_timeout_ms). + +Possible values: + +- Positive integer. + +Default value: `50`. + +### async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms} + +Alias for [`async_insert_busy_timeout_max_ms`](#async_insert_busy_timeout_max_ms). + +### async_insert_busy_timeout_increase_rate {#async-insert-busy-timeout-increase-rate} + +If adaptive asynchronous insert timeout is allowed through [async_insert_use_adaptive_busy_timeout](#allow-experimental-async-insert-adaptive-busy-timeout), the setting specifies the exponential growth rate at which the adaptive asynchronous insert timeout increases. + +Possible values: + +- A positive floating-point number. + +Default value: `0.2`. + +### async_insert_busy_timeout_decrease_rate {#async-insert-busy-timeout-decrease-rate} + +If adaptive asynchronous insert timeout is allowed through [async_insert_use_adaptive_busy_timeout](#allow-experimental-async-insert-adaptive-busy-timeout), the setting specifies the exponential growth rate at which the adaptive asynchronous insert timeout decreases. + +Possible values: + +- A positive floating-point number. + +Default value: `0.2`. + ### async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms} The maximum timeout in milliseconds since the last `INSERT` query before dumping collected data. If enabled, the settings prolongs the [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) with every `INSERT` query as long as [async_insert_max_data_size](#async-insert-max-data-size) is not exceeded. @@ -2040,6 +2095,32 @@ SELECT * FROM test_table └───┘ ``` +## update_insert_deduplication_token_in_dependent_materialized_views {#update-insert-deduplication-token-in-dependent-materialized-views} + +Allows to update `insert_deduplication_token` with table identifier during insert in dependent materialized views, if setting `deduplicate_blocks_in_dependent_materialized_views` is enabled and `insert_deduplication_token` is set. + +Possible values: + + 0 — Disabled. + 1 — Enabled. + +Default value: 0. + +Usage: + +If setting `deduplicate_blocks_in_dependent_materialized_views` is enabled, `insert_deduplication_token` is passed to dependent materialized views. But in complex INSERT flows it is possible that we want to avoid deduplication for dependent materialized views. + +Example: +``` +landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 + | | + └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ +``` + +In this example we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will be inserted into `ds_2_1`. Without `update_insert_deduplication_token_in_dependent_materialized_views` setting enabled, those two different blocks will be deduplicated, because different blocks from `mv_2_1` and `mv_2_2` will have the same `insert_deduplication_token`. + +If setting `update_insert_deduplication_token_in_dependent_materialized_views` is enabled, during each insert into dependent materialized views `insert_deduplication_token` is updated with table identifier, so block from `mv_2_1` and block from `mv_2_2` will have different `insert_deduplication_token` and will not be deduplicated. + ## insert_keeper_max_retries The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeeper) requests during insert into replicated MergeTree. Only Keeper requests which failed due to network error, Keeper session timeout, or request timeout are considered for retries. @@ -5165,7 +5246,7 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8); ## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key} When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files. -When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. +When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. Default value: `false`. @@ -5176,6 +5257,95 @@ When set to `false` than all attempts are made with identical timeouts. Default value: `true`. +## allow_experimental_variant_type {#allow_experimental_variant_type} + +Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md). + +Default value: `false`. + +## use_variant_as_common_type {#use_variant_as_common_type} + +Allows to use `Variant` type as a result type for [if](../../sql-reference/functions/conditional-functions.md/#if)/[multiIf](../../sql-reference/functions/conditional-functions.md/#multiif)/[array](../../sql-reference/functions/array-functions.md)/[map](../../sql-reference/functions/tuple-map-functions.md) functions when there is no common type for argument types. + +Example: + +```sql +SET use_variant_as_common_type = 1; +SELECT toTypeName(if(number % 2, number, range(number))) as variant_type FROM numbers(1); +SELECT if(number % 2, number, range(number)) as variant FROM numbers(5); +``` + +```text +┌─variant_type───────────────────┐ +│ Variant(Array(UInt64), UInt64) │ +└────────────────────────────────┘ +┌─variant───┐ +│ [] │ +│ 1 │ +│ [0,1] │ +│ 3 │ +│ [0,1,2,3] │ +└───────────┘ +``` + +```sql +SET use_variant_as_common_type = 1; +SELECT toTypeName(multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL)) AS variant_type FROM numbers(1); +SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4); +``` + +```text +─variant_type─────────────────────────┐ +│ Variant(Array(UInt8), String, UInt8) │ +└──────────────────────────────────────┘ + +┌─variant───────┐ +│ 42 │ +│ [1,2,3] │ +│ Hello, World! │ +│ ᴺᵁᴸᴸ │ +└───────────────┘ +``` + +```sql +SET use_variant_as_common_type = 1; +SELECT toTypeName(array(range(number), number, 'str_' || toString(number))) as array_of_variants_type from numbers(1); +SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3); +``` + +```text +┌─array_of_variants_type────────────────────────┐ +│ Array(Variant(Array(UInt64), String, UInt64)) │ +└───────────────────────────────────────────────┘ + +┌─array_of_variants─┐ +│ [[],0,'str_0'] │ +│ [[0],1,'str_1'] │ +│ [[0,1],2,'str_2'] │ +└───────────────────┘ +``` + +```sql +SET use_variant_as_common_type = 1; +SELECT toTypeName(map('a', range(number), 'b', number, 'c', 'str_' || toString(number))) as map_of_variants_type from numbers(1); +SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3); +``` + +```text +┌─map_of_variants_type────────────────────────────────┐ +│ Map(String, Variant(Array(UInt64), String, UInt64)) │ +└─────────────────────────────────────────────────────┘ + +┌─map_of_variants───────────────┐ +│ {'a':[],'b':0,'c':'str_0'} │ +│ {'a':[0],'b':1,'c':'str_1'} │ +│ {'a':[0,1],'b':2,'c':'str_2'} │ +└───────────────────────────────┘ +``` + + +Default value: `false`. + ## max_partition_size_to_drop Restriction on dropping partitions in query time. @@ -5206,4 +5376,4 @@ Allow to ignore schema evolution in Iceberg table engine and read all data using Enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. ::: -Default value: 'false'. \ No newline at end of file +Default value: 'false'. diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 3dec6345eb6f..898e6ae2e2cd 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -287,7 +287,7 @@ Number of threads in the HashedDictionary thread pool running a task. ### IOPrefetchThreads -Number of threads in the IO prefertch thread pool. +Number of threads in the IO prefetch thread pool. ### IOPrefetchThreadsActive diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 0dca7c525f27..3d4b8f62d2d2 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -25,6 +25,8 @@ Columns: - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions. +- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — The total count of sequential authentication failures. If the user entered the correct password before exceed `failed_sequential_authentications` threshold then the counter will be reset. +- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Maximum count of sequential authentication failures. - `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time). - `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time. diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index a04018ac2c8a..960903fa25fc 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -28,8 +28,10 @@ Columns: - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions. -- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time). -- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time. +- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total count of sequential authentication failures. If the user entered the correct password before exceed `failed_sequential_authentications` threshold then the counter will be reset. +- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum count of sequential authentication failures. +- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time). +- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Maximum of query execution time. ## See Also {#see-also} diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 8b7d7f85552b..6d5148ad9659 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -45,11 +45,11 @@ clickhouse-benchmark [keys] < queries_file; - `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. - `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1. - `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. -- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. - `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever). - `-r`, `--randomize` — Random order of queries execution if there is more than one input query. - `-s`, `--secure` — Using `TLS` connection. - `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). +- `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `--port` keys. - `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence. - `--cumulative` — Printing cumulative data instead of data per interval. - `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md new file mode 100644 index 000000000000..cc601c097fe2 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md @@ -0,0 +1,48 @@ + --- + toc_priority: 112 + --- + + # groupArraySorted {#groupArraySorted} + + Returns an array with the first N items in ascending order. + + ``` sql + groupArraySorted(N)(column) + ``` + + **Arguments** + + - `N` – The number of elements to return. + + If the parameter is omitted, default value is the size of input. + + - `column` – The value (Integer, String, Float and other Generic types). + + **Example** + + Gets the first 10 numbers: + + ``` sql + SELECT groupArraySorted(10)(number) FROM numbers(100) + ``` + + ``` text + ┌─groupArraySorted(10)(number)─┐ + │ [0,1,2,3,4,5,6,7,8,9] │ + └──────────────────────────────┘ + ``` + + + Gets all the String implementations of all numbers in column: + + ``` sql +SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5)); + + ``` + + ``` text + ┌─groupArraySorted(str)────────┐ + │ ['0','1','2','3','4'] │ + └──────────────────────────────┘ + ``` + \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 10bd3e11064e..93d4282c32bf 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -54,6 +54,7 @@ ClickHouse-specific aggregate functions: - [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md) - [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md) - [groupArraySample](./grouparraysample.md) +- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md) - [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md) @@ -88,7 +89,7 @@ ClickHouse-specific aggregate functions: - [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) - [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) - [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) -- [quantileDDSketch](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch) +- [quantileDD](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch) - [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md) - [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md) - [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) @@ -105,4 +106,3 @@ ClickHouse-specific aggregate functions: - [sparkBar](./sparkbar.md) - [sumCount](./sumcount.md) - [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md) - diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index 7467a47cf5fd..2a166c83dadb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -18,7 +18,7 @@ Functions: - `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). - `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). - `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16). -- `medianDDSketch` — Alias for [quantileDDSketch](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch). +- `medianDD` — Alias for [quantileDD](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md index 9cb73dfc9d8e..f9acd2e20cba 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch sidebar_position: 211 -title: quantileDDSketch +title: quantileDD --- -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DDSketch](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf). +Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DD](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf). **Syntax** @@ -44,13 +44,13 @@ Input table has an integer and a float columns: Query to calculate 0.75-quantile (third quartile): ``` sql -SELECT quantileDDSketch(0.01, 0.75)(a), quantileDDSketch(0.01, 0.75)(b) FROM example_table; +SELECT quantileDD(0.01, 0.75)(a), quantileDD(0.01, 0.75)(b) FROM example_table; ``` Result: ``` text -┌─quantileDDSketch(0.01, 0.75)(a)─┬─quantileDDSketch(0.01, 0.75)(b)─┐ +┌─quantileDD(0.01, 0.75)(a)─┬─quantileDD(0.01, 0.75)(b)─┐ │ 2.974233423476717 │ 1.01 │ └─────────────────────────────────┴─────────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index e5da6a9c1de9..e2a5bc53e321 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -9,7 +9,7 @@ sidebar_position: 201 Syntax: `quantiles(level1, level2, …)(x)` -All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDDSketch`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. +All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. ## quantilesExactExclusive diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index f727f0d75f7c..fd548a0d5a28 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -7,7 +7,7 @@ sidebar_label: JSON # JSON :::note -This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. +This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. ::: Stores JavaScript Object Notation (JSON) documents in a single column. @@ -15,7 +15,8 @@ Stores JavaScript Object Notation (JSON) documents in a single column. `JSON` is an alias for `Object('json')`. :::note -The JSON data type is an experimental feature. To use it, set `allow_experimental_object_type = 1`. +The JSON data type is an obsolete feature. Do not use it. +If you want to use it, set `allow_experimental_object_type = 1`. ::: ## Example diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md new file mode 100644 index 000000000000..17d518784201 --- /dev/null +++ b/docs/en/sql-reference/data-types/variant.md @@ -0,0 +1,274 @@ +--- +slug: /en/sql-reference/data-types/json +sidebar_position: 55 +sidebar_label: Variant +--- + +# Variant(T1, T2, T3, ...) + +This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type +has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). + +The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1). +Nested types can be arbitrary types except Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types. + +:::note +The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`. +::: + +## Creating Variant + +Using `Variant` type in table column definition: + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT v FROM test; +``` + +```text +┌─v─────────────┐ +│ ᴺᵁᴸᴸ │ +│ 42 │ +│ Hello, World! │ +│ [1,2,3] │ +└───────────────┘ +``` + +Using CAST from ordinary columns: + +```sql +SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant; +``` + +```text +┌─type_name──────────────────────────────┬─variant───────┐ +│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │ +└────────────────────────────────────────┴───────────────┘ +``` + +Using functions `if/multiIf` when arguments don't have common type (setting `use_variant_as_common_type` should be enabled for it): + +```sql +SET use_variant_as_common_type = 1; +SELECT if(number % 2, number, range(number)) as variant FROM numbers(5); +``` + +```text +┌─variant───┐ +│ [] │ +│ 1 │ +│ [0,1] │ +│ 3 │ +│ [0,1,2,3] │ +└───────────┘ +``` + +```sql +SET use_variant_as_common_type = 1; +SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4); +``` + +```text +┌─variant───────┐ +│ 42 │ +│ [1,2,3] │ +│ Hello, World! │ +│ ᴺᵁᴸᴸ │ +└───────────────┘ +``` + +Using functions 'array/map' if array elements/map values don't have common type (setting `use_variant_as_common_type` should be enabled for it): + +```sql +SET use_variant_as_common_type = 1; +SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3); +``` + +```text +┌─array_of_variants─┐ +│ [[],0,'str_0'] │ +│ [[0],1,'str_1'] │ +│ [[0,1],2,'str_2'] │ +└───────────────────┘ +``` + +```sql +SET use_variant_as_common_type = 1; +SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3); +``` + +```text +┌─map_of_variants───────────────┐ +│ {'a':[],'b':0,'c':'str_0'} │ +│ {'a':[0],'b':1,'c':'str_1'} │ +│ {'a':[0,1],'b':2,'c':'str_2'} │ +└───────────────────────────────┘ +``` + +## Reading Variant nested types as subcolumns + +Variant type supports reading a single nested type from a Variant column using the type name as a subcolumn. +So, if you have column `variant Variant(T1, T2, T3)` you can read a subcolumn of type `T2` using syntax `variant.T2`, +this subcolumn will have type `Nullable(T2)` if `T2` can be inside `Nullable` and `T2` otherwise. This subcolumn will +be the same size as original `Variant` column and will contain `NULL` values (or empty values if `T2` cannot be inside `Nullable`) +in all rows in which original `Variant` column doesn't have type `T2`. + +Variant subcolumns can be also read using function `variantElement(variant_column, type_name)`. + +Examples: + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT v, v.String, v.UInt64, v.`Array(UInt64)` FROM test; +``` + +```text +┌─v─────────────┬─v.String──────┬─v.UInt64─┬─v.Array(UInt64)─┐ +│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │ +│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└───────────────┴───────────────┴──────────┴─────────────────┘ +``` + +```sql +SELECT toTypeName(v.String), toTypeName(v.UInt64), toTypeName(v.`Array(UInt64)`) FROM test LIMIT 1; +``` + +```text +┌─toTypeName(v.String)─┬─toTypeName(v.UInt64)─┬─toTypeName(v.Array(UInt64))─┐ +│ Nullable(String) │ Nullable(UInt64) │ Array(UInt64) │ +└──────────────────────┴──────────────────────┴─────────────────────────────┘ +``` + +```sql +SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test; +``` + +```text +┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐ +│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │ +│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘ +``` + +To know what variant is stored in each row function `variantType(variant_column)` can be used. It returns `Enum` with variant type name for each row (or `'None'` if row is `NULL`). + +Example: + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT variantType(v) from test; +``` + +```text +┌─variantType(v)─┐ +│ None │ +│ UInt64 │ +│ String │ +│ Array(UInt64) │ +└────────────────┘ +``` + +```sql +SELECT toTypeName(variantType(v)) FROM test LIMIT 1; +``` + +```text +┌─toTypeName(variantType(v))──────────────────────────────────────────┐ +│ Enum8('None' = -1, 'Array(UInt64)' = 0, 'String' = 1, 'UInt64' = 2) │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## Conversion between Variant column and other columns + +There are 3 possible conversions that can be performed with Variant column. + +### Converting an ordinary column to a Variant column + +It is possible to convert ordinary column with type `T` to a `Variant` column containing this type: + +```sql +SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant; +``` + +```text +┌─type_name──────────────────────────────┬─variant───────┐ +│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │ +└────────────────────────────────────────┴───────────────┘ +``` + +### Converting a Variant column to an ordinary column + +It is possible to convert a `Variant` column to an ordinary column. In this case all nested variants will be converted to a destination type: + +```sql +CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('42.42'); +SELECT v::Nullable(Float64) FROM test; +``` + +```text +┌─CAST(v, 'Nullable(Float64)')─┐ +│ ᴺᵁᴸᴸ │ +│ 42 │ +│ 42.42 │ +└──────────────────────────────┘ +``` + +### Converting a Variant to another Variant + +It is possible to convert a `Variant` column to another `Variant` column, but only if the destination `Variant` column contains all nested types from the original `Variant`: + +```sql +CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('String'); +SELECT v::Variant(UInt64, String, Array(UInt64)) FROM test; +``` + +```text +┌─CAST(v, 'Variant(UInt64, String, Array(UInt64))')─┐ +│ ᴺᵁᴸᴸ │ +│ 42 │ +│ String │ +└───────────────────────────────────────────────────┘ +``` + + +## Reading Variant type from the data + +All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Variant` type. During data parsing ClickHouse tries to insert value into most appropriate variant type. + +Example: + +```sql +SELECT + v, + variantElement(v, 'String') AS str, + variantElement(v, 'UInt64') AS num, + variantElement(v, 'Float64') AS float, + variantElement(v, 'DateTime') AS date, + variantElement(v, 'Array(UInt64)') AS arr +FROM format(JSONEachRow, 'v Variant(String, UInt64, Float64, DateTime, Array(UInt64))', $$ +{"v" : "Hello, World!"}, +{"v" : 42}, +{"v" : 42.42}, +{"v" : "2020-01-01 00:00:00"}, +{"v" : [1, 2, 3]} +$$) +``` + +```text +┌─v───────────────────┬─str───────────┬──num─┬─float─┬────────────────date─┬─arr─────┐ +│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ ᴺᵁᴸᴸ │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42.42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │ +│ 2020-01-01 00:00:00 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 00:00:00 │ [] │ +│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└─────────────────────┴───────────────┴──────┴───────┴─────────────────────┴─────────┘ +``` diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 9f86aaf25027..080de94f8b76 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -1805,6 +1805,7 @@ Example of settings: ``` xml + postgresql-hostname 5432 clickhouse qwerty diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 35f9c7af2cef..d05e7bbfe51e 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2832,6 +2832,88 @@ Result: └─────────────────────────────────────────────────────────────────────────┘ ``` +## variantElement + +Extracts a column with specified type from a `Variant` column. + +**Syntax** + +```sql +variantElement(variant, type_name, [, default_value]) +``` + +**Arguments** + +- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md). +- `type_name` — The name of the variant type to extract. [String](../../sql-reference/data-types/string.md). +- `default_value` - The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional. + +**Returned value** + +- Subcolumn of a `Variant` column with specified type. + +**Example** + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test; +``` + +```text +┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐ +│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │ +│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘ +``` + +## variantType + +Returns the variant type name for each row of `Variant` column. If row contains NULL, it returns `'None'` for it. + +**Syntax** + +```sql +variantType(variant) +``` + +**Arguments** + +- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md). + +**Returned value** + +- Enum8 column with variant type name for each row. + +**Example** + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT variantType(v) FROM test; +``` + +```text +┌─variantType(v)─┐ +│ None │ +│ UInt64 │ +│ String │ +│ Array(UInt64) │ +└────────────────┘ +``` + +```sql +SELECT toTypeName(variantType(v)) FROM test LIMIT 1; +``` + +```text +┌─toTypeName(variantType(v))──────────────────────────────────────────┐ +│ Enum8('None' = -1, 'Array(UInt64)' = 0, 'String' = 1, 'UInt64' = 2) │ +└─────────────────────────────────────────────────────────────────────┘ +``` + ## minSampleSizeConversion Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples. diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a2f1b0d77529..60cb3ac4ac4e 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -515,7 +515,7 @@ Alias: `concat_ws` **Arguments** - sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- exprN — expression to be concatenated. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- exprN — expression to be concatenated. Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. **Returned values** diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index 144d832b36a7..016c3410944e 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -77,8 +77,8 @@ The number of data points in `series` should be at least twice the value of `per **Returned value** -- An array of three arrays where the first array include seasonal components, the second array - trend, -and the third array - residue component. +- An array of four arrays where the first array include seasonal components, the second array - trend, +the third array - residue component, and the fourth array - baseline(seasonal + trend) component. Type: [Array](../../sql-reference/data-types/array.md). @@ -107,6 +107,10 @@ Result: [ 0, 0.0000019073486, -0.0000019073486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0000019073486, 0, 0 + ], + [ + 10.1, 20.449999, 40.340004, 10.100001, 20.45, 40.34, 10.100001, 20.45, 40.34, 10.1, 20.45, 40.34, + 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.100002, 20.45, 40.34 ]] │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 114b8d5ffe32..5659a0565c54 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -112,7 +112,7 @@ Note that: For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. -- Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same order by key and the same primary key. - Both tables must have the same indices and projections. - Both tables must have the same storage policy. diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index 517e64e3e5b8..59045afdeb60 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -8,8 +8,6 @@ sidebar_label: VIEW You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. -The `allow_experimental_alter_materialized_view_structure` setting must be enabled. - This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. **Example with TO table** diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index a6ced870c18f..d16b40876c7c 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -21,7 +21,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table. -Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. +Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index f6158acd9a45..028d0b09a1a2 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -97,7 +97,7 @@ This feature is deprecated and will be removed in the future. For your convenience, the old documentation is located [here](https://pastila.nl/?00f32652/fdf07272a7b54bda7e13b919264e449f.md) -## Refreshable Materialized View {#refreshable-materialized-view} +## Refreshable Materialized View [Experimental] {#refreshable-materialized-view} ```sql CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name @@ -120,7 +120,8 @@ Differences from regular non-refreshable materialized views: :::note Refreshable materialized views are a work in progress. Setting `allow_experimental_refreshable_materialized_view = 1` is required for creating one. Current limitations: - * not compatible with Replicated database or table engines, + * not compatible with Replicated database or table engines + * It is not supported in ClickHouse Cloud * require [Atomic database engine](../../../engines/database-engines/atomic.md), * no retries for failed refresh - we just skip to the next scheduled refresh time, * no limit on number of concurrent refreshes. diff --git a/docs/en/sql-reference/statements/rename.md b/docs/en/sql-reference/statements/rename.md index bb62cc3af1c7..667ccbc6c930 100644 --- a/docs/en/sql-reference/statements/rename.md +++ b/docs/en/sql-reference/statements/rename.md @@ -9,10 +9,6 @@ sidebar_label: RENAME Renames databases, tables, or dictionaries. Several entities can be renamed in a single query. Note that the `RENAME` query with several entities is non-atomic operation. To swap entities names atomically, use the [EXCHANGE](./exchange.md) statement. -:::note -The `RENAME` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only. -::: - **Syntax** ```sql diff --git a/docs/ru/operations/system-tables/quota_usage.md b/docs/ru/operations/system-tables/quota_usage.md index 96f6debd24e6..46305e59da6c 100644 --- a/docs/ru/operations/system-tables/quota_usage.md +++ b/docs/ru/operations/system-tables/quota_usage.md @@ -26,8 +26,11 @@ slug: /ru/operations/system-tables/quota_usage - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций. -- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах. -- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса. +- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее количество неудачных попыток подряд ввести пароль. Если пользователь ввел верный пароль до преодоления порогового значения `max_failed_sequential_authentications` то счетчик неудачных попыток будет сброшен. +- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное количество неудачных попыток подряд ввести пароль. +- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах. +- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса. + ## Смотрите также {#see-also} diff --git a/docs/ru/operations/system-tables/quotas_usage.md b/docs/ru/operations/system-tables/quotas_usage.md index 27e7cdf8abea..4bc0f2e81ca1 100644 --- a/docs/ru/operations/system-tables/quotas_usage.md +++ b/docs/ru/operations/system-tables/quotas_usage.md @@ -29,9 +29,10 @@ slug: /ru/operations/system-tables/quotas_usage - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций. +- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Общее количество неудачных попыток подряд ввести пароль. Если пользователь ввел верный пароль до преодоления порогового значения `max_failed_sequential_authentications` то счетчик неудачных попыток будет сброшен. +- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Максимальное количество неудачных попыток подряд ввести пароль. - `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах. - `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса. - ## Смотрите также {#see-also} - [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) diff --git a/docs/ru/sql-reference/statements/alter/quota.md b/docs/ru/sql-reference/statements/alter/quota.md index 709baea6af00..c14b81c9bf34 100644 --- a/docs/ru/sql-reference/statements/alter/quota.md +++ b/docs/ru/sql-reference/statements/alter/quota.md @@ -22,7 +22,7 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name] Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md). -Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). +Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md). diff --git a/docs/ru/sql-reference/statements/create/quota.md b/docs/ru/sql-reference/statements/create/quota.md index 18eba6b5b1ae..398c52fdc733 100644 --- a/docs/ru/sql-reference/statements/create/quota.md +++ b/docs/ru/sql-reference/statements/create/quota.md @@ -20,7 +20,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] ``` Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md). -Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). +Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md). diff --git a/programs/copier/ClusterCopier.h b/programs/copier/ClusterCopier.h index 063b13e90780..01f8b30f5463 100644 --- a/programs/copier/ClusterCopier.h +++ b/programs/copier/ClusterCopier.h @@ -20,7 +20,7 @@ class ClusterCopier : WithMutableContext const String & host_id_, const String & proxy_database_name_, ContextMutablePtr context_, - Poco::Logger * log_) + LoggerRawPtr log_) : WithMutableContext(context_), task_zookeeper_path(task_path_), host_id(host_id_), @@ -230,7 +230,7 @@ class ClusterCopier : WithMutableContext bool experimental_use_sample_offset{false}; - Poco::Logger * log; + LoggerRawPtr log; UInt64 max_table_tries = 3; UInt64 max_shard_partition_tries = 3; diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h index 36dcfa508425..bbdec230d2df 100644 --- a/programs/copier/ZooKeeperStaff.h +++ b/programs/copier/ZooKeeperStaff.h @@ -177,7 +177,7 @@ class CleanStateClock auto watch_callback = [my_stale = stale] (const Coordination::WatchResponse & rsp) { - auto logger = &Poco::Logger::get("ClusterCopier"); + auto logger = getLogger("ClusterCopier"); if (rsp.error == Coordination::Error::ZOK) { switch (rsp.type) diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index 7ed4499efbd2..fa66a69687c2 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -375,7 +375,7 @@ int KeeperClient::main(const std::vector & /* args */) if (!config().has("host") && !config().has("port") && !keys.empty()) { - LOG_INFO(&Poco::Logger::get("KeeperClient"), "Found keeper node in the config.xml, will use it for connection"); + LOG_INFO(getLogger("KeeperClient"), "Found keeper node in the config.xml, will use it for connection"); for (const auto & key : keys) { diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index 20448aafa2f1..2b2759412ab3 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -28,7 +28,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) po::store(po::command_line_parser(argc, argv).options(desc).run(), options); Poco::AutoPtr console_channel(new Poco::ConsoleChannel); - Poco::Logger * logger = &Poco::Logger::get("KeeperConverter"); + LoggerPtr logger = getLogger("KeeperConverter"); logger->setChannel(console_channel); if (options.count("help")) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 109884ec899b..c751702dc6fb 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -624,7 +624,7 @@ catch (...) void Keeper::logRevision() const { - LOG_INFO(&Poco::Logger::get("Application"), + LOG_INFO(getLogger("Application"), "Starting ClickHouse Keeper {} (revision: {}, git hash: {}, build id: {}), PID {}", VERSION_STRING, ClickHouseRevision::getVersionRevision(), diff --git a/programs/library-bridge/CatBoostLibraryHandlerFactory.cpp b/programs/library-bridge/CatBoostLibraryHandlerFactory.cpp index 6ee078f6c5cf..7ce896636e70 100644 --- a/programs/library-bridge/CatBoostLibraryHandlerFactory.cpp +++ b/programs/library-bridge/CatBoostLibraryHandlerFactory.cpp @@ -13,7 +13,7 @@ CatBoostLibraryHandlerFactory & CatBoostLibraryHandlerFactory::instance() } CatBoostLibraryHandlerFactory::CatBoostLibraryHandlerFactory() - : log(&Poco::Logger::get("CatBoostLibraryHandlerFactory")) + : log(getLogger("CatBoostLibraryHandlerFactory")) { } diff --git a/programs/library-bridge/CatBoostLibraryHandlerFactory.h b/programs/library-bridge/CatBoostLibraryHandlerFactory.h index 6ba3fe84ec9e..e29834cbe791 100644 --- a/programs/library-bridge/CatBoostLibraryHandlerFactory.h +++ b/programs/library-bridge/CatBoostLibraryHandlerFactory.h @@ -31,7 +31,7 @@ class CatBoostLibraryHandlerFactory final : private boost::noncopyable /// map: model path --> catboost library handler std::unordered_map library_handlers TSA_GUARDED_BY(mutex); std::mutex mutex; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/programs/library-bridge/ExternalDictionaryLibraryAPI.cpp b/programs/library-bridge/ExternalDictionaryLibraryAPI.cpp index 70cd6fca3751..4fa5c991f0f8 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryAPI.cpp +++ b/programs/library-bridge/ExternalDictionaryLibraryAPI.cpp @@ -9,40 +9,40 @@ const char DICT_LOGGER_NAME[] = "LibraryDictionarySourceExternal"; void ExternalDictionaryLibraryAPI::log(LogLevel level, CString msg) { - auto & logger = Poco::Logger::get(DICT_LOGGER_NAME); + auto logger = getLogger(DICT_LOGGER_NAME); switch (level) { case LogLevel::TRACE: - if (logger.trace()) - logger.trace(msg); + if (logger->trace()) + logger->trace(msg); break; case LogLevel::DEBUG: - if (logger.debug()) - logger.debug(msg); + if (logger->debug()) + logger->debug(msg); break; case LogLevel::INFORMATION: - if (logger.information()) - logger.information(msg); + if (logger->information()) + logger->information(msg); break; case LogLevel::NOTICE: - if (logger.notice()) - logger.notice(msg); + if (logger->notice()) + logger->notice(msg); break; case LogLevel::WARNING: - if (logger.warning()) - logger.warning(msg); + if (logger->warning()) + logger->warning(msg); break; case LogLevel::ERROR: - if (logger.error()) - logger.error(msg); + if (logger->error()) + logger->error(msg); break; case LogLevel::CRITICAL: - if (logger.critical()) - logger.critical(msg); + if (logger->critical()) + logger->critical(msg); break; case LogLevel::FATAL: - if (logger.fatal()) - logger.fatal(msg); + if (logger->fatal()) + logger->fatal(msg); break; } } diff --git a/programs/library-bridge/ExternalDictionaryLibraryHandlerFactory.cpp b/programs/library-bridge/ExternalDictionaryLibraryHandlerFactory.cpp index 6acd9af20ed8..1b2b57beeb11 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryHandlerFactory.cpp +++ b/programs/library-bridge/ExternalDictionaryLibraryHandlerFactory.cpp @@ -26,7 +26,7 @@ void ExternalDictionaryLibraryHandlerFactory::create( if (library_handlers.contains(dictionary_id)) { - LOG_WARNING(&Poco::Logger::get("ExternalDictionaryLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id); + LOG_WARNING(getLogger("ExternalDictionaryLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id); return; } diff --git a/programs/library-bridge/LibraryBridgeHandlerFactory.cpp b/programs/library-bridge/LibraryBridgeHandlerFactory.cpp index 4af1f8355e80..e5ab22f2d40d 100644 --- a/programs/library-bridge/LibraryBridgeHandlerFactory.cpp +++ b/programs/library-bridge/LibraryBridgeHandlerFactory.cpp @@ -12,7 +12,7 @@ LibraryBridgeHandlerFactory::LibraryBridgeHandlerFactory( size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) - , log(&Poco::Logger::get(name_)) + , log(getLogger(name_)) , name(name_) , keep_alive_timeout(keep_alive_timeout_) { diff --git a/programs/library-bridge/LibraryBridgeHandlerFactory.h b/programs/library-bridge/LibraryBridgeHandlerFactory.h index 7565052c4cbe..5b0f088bc296 100644 --- a/programs/library-bridge/LibraryBridgeHandlerFactory.h +++ b/programs/library-bridge/LibraryBridgeHandlerFactory.h @@ -19,7 +19,7 @@ class LibraryBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContex std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: - Poco::Logger * log; + LoggerPtr log; const std::string name; const size_t keep_alive_timeout; }; diff --git a/programs/library-bridge/LibraryBridgeHandlers.cpp b/programs/library-bridge/LibraryBridgeHandlers.cpp index b0b465460e08..ab146f458df9 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.cpp +++ b/programs/library-bridge/LibraryBridgeHandlers.cpp @@ -47,7 +47,7 @@ namespace if (!response.sent()) *response.send() << message << '\n'; - LOG_WARNING(&Poco::Logger::get("LibraryBridge"), fmt::runtime(message)); + LOG_WARNING(getLogger("LibraryBridge"), fmt::runtime(message)); } std::shared_ptr parseColumns(String && column_string) @@ -92,7 +92,7 @@ static void writeData(Block data, OutputFormatPtr format) ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) , keep_alive_timeout(keep_alive_timeout_) - , log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeRequestHandler")) + , log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler")) { } @@ -380,7 +380,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) , keep_alive_timeout(keep_alive_timeout_) - , log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeExistsHandler")) + , log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler")) { } @@ -419,7 +419,7 @@ CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler( size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) , keep_alive_timeout(keep_alive_timeout_) - , log(&Poco::Logger::get("CatBoostLibraryBridgeRequestHandler")) + , log(getLogger("CatBoostLibraryBridgeRequestHandler")) { } @@ -623,7 +623,7 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) , keep_alive_timeout(keep_alive_timeout_) - , log(&Poco::Logger::get("CatBoostLibraryBridgeExistsHandler")) + , log(getLogger("CatBoostLibraryBridgeExistsHandler")) { } diff --git a/programs/library-bridge/LibraryBridgeHandlers.h b/programs/library-bridge/LibraryBridgeHandlers.h index 4f08d7a60840..1db71eb24cb8 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.h +++ b/programs/library-bridge/LibraryBridgeHandlers.h @@ -26,7 +26,7 @@ class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler, static constexpr inline auto FORMAT = "RowBinary"; const size_t keep_alive_timeout; - Poco::Logger * log; + LoggerPtr log; }; @@ -40,7 +40,7 @@ class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, private: const size_t keep_alive_timeout; - Poco::Logger * log; + LoggerPtr log; }; @@ -69,7 +69,7 @@ class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithConte private: const size_t keep_alive_timeout; - Poco::Logger * log; + LoggerPtr log; }; @@ -83,7 +83,7 @@ class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContex private: const size_t keep_alive_timeout; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 4e0b9eeb731a..443d4a52fa3e 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -221,7 +221,7 @@ void LocalServer::tryInitPath() { // The path is not provided explicitly - use a unique path in the system temporary directory // (or in the current dir if temporary don't exist) - Poco::Logger * log = &logger(); + LoggerRawPtr log = &logger(); std::filesystem::path parent_folder; std::filesystem::path default_path; @@ -631,7 +631,7 @@ void LocalServer::processConfig() tryInitPath(); - Poco::Logger * log = &logger(); + LoggerRawPtr log = &logger(); /// Maybe useless if (config().has("macros")) diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index e3087701182c..ca7044fdf328 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -18,7 +18,7 @@ class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext public: ODBCColumnsInfoHandler(size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) - , log(&Poco::Logger::get("ODBCColumnsInfoHandler")) + , log(getLogger("ODBCColumnsInfoHandler")) , keep_alive_timeout(keep_alive_timeout_) { } @@ -26,7 +26,7 @@ class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: - Poco::Logger * log; + LoggerPtr log; size_t keep_alive_timeout; }; diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h index ff5c02ca07b3..7b78c5b4b93c 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.h +++ b/programs/odbc-bridge/IdentifierQuoteHandler.h @@ -16,7 +16,7 @@ class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext public: IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) - , log(&Poco::Logger::get("IdentifierQuoteHandler")) + , log(getLogger("IdentifierQuoteHandler")) , keep_alive_timeout(keep_alive_timeout_) { } @@ -24,7 +24,7 @@ class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: - Poco::Logger * log; + LoggerPtr log; size_t keep_alive_timeout; }; diff --git a/programs/odbc-bridge/MainHandler.h b/programs/odbc-bridge/MainHandler.h index 7977245ff821..ed0c6b2e28c3 100644 --- a/programs/odbc-bridge/MainHandler.h +++ b/programs/odbc-bridge/MainHandler.h @@ -24,7 +24,7 @@ class ODBCHandler : public HTTPRequestHandler, WithContext ContextPtr context_, const String & mode_) : WithContext(context_) - , log(&Poco::Logger::get("ODBCHandler")) + , log(getLogger("ODBCHandler")) , keep_alive_timeout(keep_alive_timeout_) , mode(mode_) { @@ -33,7 +33,7 @@ class ODBCHandler : public HTTPRequestHandler, WithContext void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: - Poco::Logger * log; + LoggerPtr log; size_t keep_alive_timeout; String mode; diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 3aa3d9a652b6..c46144c3dc83 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -23,7 +23,7 @@ namespace ErrorCodes ODBCSource::ODBCSource( nanodbc::ConnectionHolderPtr connection_holder, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_) : ISource(sample_block) - , log(&Poco::Logger::get("ODBCSource")) + , log(getLogger("ODBCSource")) , max_block_size{max_block_size_} , query(query_str) { diff --git a/programs/odbc-bridge/ODBCBlockInputStream.h b/programs/odbc-bridge/ODBCBlockInputStream.h index 79d5816ad014..dedd98f930f5 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.h +++ b/programs/odbc-bridge/ODBCBlockInputStream.h @@ -30,7 +30,7 @@ class ODBCSource final : public ISource column.insertFrom(sample_column, 0); } - Poco::Logger * log; + LoggerPtr log; const UInt64 max_block_size; ExternalResultDescription description; diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index eb5901ad3e1f..87c09d1e7571 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -19,7 +19,7 @@ ODBCSink::ODBCSink( ContextPtr local_context_, IdentifierQuotingStyle quoting_) : ISink(sample_block_) - , log(&Poco::Logger::get("ODBCSink")) + , log(getLogger("ODBCSink")) , connection_holder(std::move(connection_holder_)) , db_name(remote_database_name_) , table_name(remote_table_name_) diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.h b/programs/odbc-bridge/ODBCBlockOutputStream.h index f5e7b4e3a2d5..06edce92e1a6 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.h +++ b/programs/odbc-bridge/ODBCBlockOutputStream.h @@ -30,7 +30,7 @@ using ValueType = ExternalResultDescription::ValueType; void consume(Chunk chunk) override; private: - Poco::Logger * log; + LoggerPtr log; nanodbc::ConnectionHolderPtr connection_holder; std::string db_name; diff --git a/programs/odbc-bridge/ODBCHandlerFactory.cpp b/programs/odbc-bridge/ODBCHandlerFactory.cpp index dd21358df8c3..eebb0c24c7a8 100644 --- a/programs/odbc-bridge/ODBCHandlerFactory.cpp +++ b/programs/odbc-bridge/ODBCHandlerFactory.cpp @@ -11,7 +11,7 @@ namespace DB ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) - , log(&Poco::Logger::get(name_)) + , log(getLogger(name_)) , name(name_) , keep_alive_timeout(keep_alive_timeout_) { diff --git a/programs/odbc-bridge/ODBCHandlerFactory.h b/programs/odbc-bridge/ODBCHandlerFactory.h index 3e3da7c9f246..4aaf1b55453c 100644 --- a/programs/odbc-bridge/ODBCHandlerFactory.h +++ b/programs/odbc-bridge/ODBCHandlerFactory.h @@ -22,7 +22,7 @@ class ODBCBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: - Poco::Logger * log; + LoggerPtr log; std::string name; size_t keep_alive_timeout; }; diff --git a/programs/odbc-bridge/ODBCPooledConnectionFactory.h b/programs/odbc-bridge/ODBCPooledConnectionFactory.h index a10055c66598..b70e45f2b9db 100644 --- a/programs/odbc-bridge/ODBCPooledConnectionFactory.h +++ b/programs/odbc-bridge/ODBCPooledConnectionFactory.h @@ -97,7 +97,7 @@ T execute(nanodbc::ConnectionHolderPtr connection_holder, std::functiongetAsyncLoader().stop(); ); @@ -2491,7 +2490,7 @@ void Server::stopServers( const ServerType & server_type ) const { - Poco::Logger * log = &logger(); + LoggerRawPtr log = &logger(); /// Remove servers once all their connections are closed auto check_server = [&log](const char prefix[], auto & server) @@ -2530,7 +2529,7 @@ void Server::updateServers( std::vector & servers, std::vector & servers_to_start_before_tables) { - Poco::Logger * log = &logger(); + LoggerRawPtr log = &logger(); const auto listen_hosts = getListenHosts(config); const auto interserver_listen_hosts = getInterserverListenHosts(config); diff --git a/programs/server/binary.html b/programs/server/binary.html index 988dd33a72a0..eec39cd44638 100644 --- a/programs/server/binary.html +++ b/programs/server/binary.html @@ -60,10 +60,29 @@ /// If it is hosted on server, assume that it is the address of ClickHouse. if (location.protocol != 'file:') { host = location.origin; - user = 'default'; add_http_cors_header = false; } + if (window.location.search) { + const params = new URLSearchParams(window.location.search); + if (params.has('host')) { host = params.get('host'); } + if (params.has('user')) { user = params.get('user'); } + if (params.has('password')) { password = params.get('password'); } + } + + let url = `${host}?allow_introspection_functions=1`; + + if (add_http_cors_header) { + url += '&add_http_cors_header=1'; + } + + if (user) { + url += `&user=${encodeURIComponent(user)}`; + } + if (password) { + url += `&password=${encodeURIComponent(password)}`; + } + let map = L.map('space', { crs: L.CRS.Simple, center: [-512, 512], @@ -97,24 +116,11 @@ const key = `${coords.z}-${coords.x}-${coords.y}`; let buf = cached_tiles[key]; if (!buf) { - let url = `${host}?default_format=RowBinary&allow_introspection_functions=1`; - - if (add_http_cors_header) { - // For debug purposes, you may set add_http_cors_header from a browser console - url += '&add_http_cors_header=1'; - } - - if (user) { - url += `&user=${encodeURIComponent(user)}`; - } - if (password) { - url += `&password=${encodeURIComponent(password)}`; - } - - url += `¶m_z=${coords.z}¶m_x=${coords.x}¶m_y=${coords.y}`; - url += `&enable_http_compression=1&network_compression_method=zstd&network_zstd_compression_level=6`; + let request_url = `${url}&default_format=RowBinary` + + `¶m_z=${coords.z}¶m_x=${coords.x}¶m_y=${coords.y}` + + `&enable_http_compression=1&network_compression_method=zstd&network_zstd_compression_level=6`; - const response = await fetch(url, { method: 'POST', body: sql }); + const response = await fetch(request_url, { method: 'POST', body: sql }); if (!response.ok) { const text = await response.text(); @@ -232,7 +238,7 @@ const addr_hex = '0x' + addr_int.toString(16); const response = fetch( - `http://localhost:8123/?default_format=JSON`, + `${url}&default_format=JSON`, { method: 'POST', body: `SELECT encodeXMLComponent(demangle(addressToSymbol(${addr_int}::UInt64))) AS name, diff --git a/programs/server/config.xml b/programs/server/config.xml index 0dc271692b88..6a40818332b2 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1562,6 +1562,10 @@ 30000000 + + backups + + !!cond * a + !cond * b), except floating point types because of Inf or NaN. + * For better performance, try to use branch free code for numeric types(i.e. cond ? a : b --> !!cond * a + !cond * b) */ +template +concept is_native_int_or_decimal_v + = std::is_integral_v || (is_decimal && sizeof(ResultType) <= 8); + +// This macro performs a branch-free conditional assignment for floating point types. +// It uses bitwise operations to avoid branching, which can be beneficial for performance. +#define BRANCHFREE_IF_FLOAT(TYPE, vc, va, vb, vr) \ + using UIntType = typename NumberTraits::Construct::Type; \ + using IntType = typename NumberTraits::Construct::Type; \ + auto mask = static_cast(static_cast(vc) - 1); \ + auto new_a = static_cast(va); \ + auto new_b = static_cast(vb); \ + UIntType uint_a; \ + std::memcpy(&uint_a, &new_a, sizeof(UIntType)); \ + UIntType uint_b; \ + std::memcpy(&uint_b, &new_b, sizeof(UIntType)); \ + UIntType tmp = (~mask & uint_a) | (mask & uint_b); \ + (vr) = *(reinterpret_cast(&tmp)); + template inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res) { + size_t size = cond.size(); bool a_is_short = a.size() < size; bool b_is_short = b.size() < size; @@ -57,47 +81,68 @@ inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const Arr size_t a_index = 0, b_index = 0; for (size_t i = 0; i < size; ++i) { - if constexpr (std::is_integral_v) - { + if constexpr (is_native_int_or_decimal_v) res[i] = !!cond[i] * static_cast(a[a_index]) + (!cond[i]) * static_cast(b[b_index]); - a_index += !!cond[i]; - b_index += !cond[i]; + else if constexpr (std::is_floating_point_v) + { + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[a_index], b[b_index], res[i]) } else - res[i] = cond[i] ? static_cast(a[a_index++]) : static_cast(b[b_index++]); + res[i] = cond[i] ? static_cast(a[a_index]) : static_cast(b[b_index]); + + a_index += !!cond[i]; + b_index += !cond[i]; } } else if (a_is_short) { size_t a_index = 0; for (size_t i = 0; i < size; ++i) - if constexpr (std::is_integral_v) - { + { + if constexpr (is_native_int_or_decimal_v) res[i] = !!cond[i] * static_cast(a[a_index]) + (!cond[i]) * static_cast(b[i]); - a_index += !!cond[i]; + else if constexpr (std::is_floating_point_v) + { + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[a_index], b[i], res[i]) } else - res[i] = cond[i] ? static_cast(a[a_index++]) : static_cast(b[i]); + res[i] = cond[i] ? static_cast(a[a_index]) : static_cast(b[i]); + + a_index += !!cond[i]; + } } else if (b_is_short) { size_t b_index = 0; for (size_t i = 0; i < size; ++i) - if constexpr (std::is_integral_v) - { + { + if constexpr (is_native_int_or_decimal_v) res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b[b_index]); - b_index += !cond[i]; + else if constexpr (std::is_floating_point_v) + { + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b[b_index], res[i]) } else - res[i] = cond[i] ? static_cast(a[i]) : static_cast(b[b_index++]); + res[i] = cond[i] ? static_cast(a[i]) : static_cast(b[b_index]); + + b_index += !cond[i]; + } } else { for (size_t i = 0; i < size; ++i) - if constexpr (std::is_integral_v) + { + if constexpr (is_native_int_or_decimal_v) res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b[i]); + else if constexpr (std::is_floating_point_v) + { + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b[i], res[i]) + } else + { res[i] = cond[i] ? static_cast(a[i]) : static_cast(b[i]); + } + } } } @@ -110,21 +155,32 @@ inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, Ar { size_t a_index = 0; for (size_t i = 0; i < size; ++i) - if constexpr (std::is_integral_v) - { + { + if constexpr (is_native_int_or_decimal_v) res[i] = !!cond[i] * static_cast(a[a_index]) + (!cond[i]) * static_cast(b); - a_index += !!cond[i]; + else if constexpr (std::is_floating_point_v) + { + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[a_index], b, res[i]) } else - res[i] = cond[i] ? static_cast(a[a_index++]) : static_cast(b); + res[i] = cond[i] ? static_cast(a[a_index]) : static_cast(b); + + a_index += !!cond[i]; + } } else { for (size_t i = 0; i < size; ++i) - if constexpr (std::is_integral_v) + { + if constexpr (is_native_int_or_decimal_v) res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b); + else if constexpr (std::is_floating_point_v) + { + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b, res[i]) + } else res[i] = cond[i] ? static_cast(a[i]) : static_cast(b); + } } } @@ -137,21 +193,68 @@ inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, Ar { size_t b_index = 0; for (size_t i = 0; i < size; ++i) - if constexpr (std::is_integral_v) - { + { + if constexpr (is_native_int_or_decimal_v) res[i] = !!cond[i] * static_cast(a) + (!cond[i]) * static_cast(b[b_index]); - b_index += !cond[i]; + else if constexpr (std::is_floating_point_v) + { + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a, b[b_index], res[i]) } else - res[i] = cond[i] ? static_cast(a) : static_cast(b[b_index++]); + res[i] = cond[i] ? static_cast(a) : static_cast(b[b_index]); + + b_index += !cond[i]; + } } else { for (size_t i = 0; i < size; ++i) - if constexpr (std::is_integral_v) + { + if constexpr (is_native_int_or_decimal_v) res[i] = !!cond[i] * static_cast(a) + (!cond[i]) * static_cast(b[i]); + else if constexpr (std::is_floating_point_v) + { + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a, b[i], res[i]) + } else res[i] = cond[i] ? static_cast(a) : static_cast(b[i]); + } + } +} + +template +inline void fillConstantConstant(const ArrayCond & cond, A a, B b, ArrayResult & res) +{ + size_t size = cond.size(); + + /// Int8(alias type of uint8_t) has special aliasing properties that prevents compiler from auto-vectorizing for below codes, refer to https://gist.github.com/alexei-zaripov/dcc14c78819c5f1354afe8b70932007c + /// + /// for (size_t i = 0; i < size; ++i) + /// res[i] = cond[i] ? static_cast(a) : static_cast(b); + /// + /// Therefore, we manually optimize it by avoiding branch miss when ResultType is Int8. Other types like (U)Int128|256 or Decimal128/256 also benefit from this optimization. + if constexpr (std::is_same_v || is_over_big_int) + { + alignas(64) const ResultType ab[2] = {static_cast(a), static_cast(b)}; + for (size_t i = 0; i < size; ++i) + { + res[i] = ab[!cond[i]]; + } + } + else if constexpr (std::is_same_v || std::is_same_v) + { + ResultType new_a = static_cast(a); + ResultType new_b = static_cast(b); + for (size_t i = 0; i < size; ++i) + { + /// Reuse new_a and new_b to achieve auto-vectorization + res[i] = cond[i] ? new_a : new_b; + } + } + else + { + for (size_t i = 0; i < size; ++i) + res[i] = cond[i] ? static_cast(a) : static_cast(b); } } @@ -197,8 +300,7 @@ struct NumIfImpl auto col_res = ColVecResult::create(size); ArrayResult & res = col_res->getData(); - for (size_t i = 0; i < size; ++i) - res[i] = cond[i] ? static_cast(a) : static_cast(b); + fillConstantConstant(cond, a, b, res); return col_res; } }; @@ -247,8 +349,7 @@ struct NumIfImpl, Decimal, Decimal> auto col_res = ColVecResult::create(size, scale); ArrayResult & res = col_res->getData(); - for (size_t i = 0; i < size; ++i) - res[i] = cond[i] ? static_cast(a) : static_cast(b); + fillConstantConstant(cond, a, b, res); return col_res; } }; @@ -258,9 +359,16 @@ class FunctionIf : public FunctionIfBase { public: static constexpr auto name = "if"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type); + } + + explicit FunctionIf(bool use_variant_when_no_common_type_ = false) : FunctionIfBase(), use_variant_when_no_common_type(use_variant_when_no_common_type_) {} private: + bool use_variant_when_no_common_type = false; + template static UInt32 decimalScale(const ColumnsWithTypeAndName & arguments [[maybe_unused]]) { @@ -669,13 +777,17 @@ class FunctionIf : public FunctionIfBase } static ColumnPtr executeGeneric( - const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, size_t input_rows_count) + const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, size_t input_rows_count, bool use_variant_when_no_common_type) { /// Convert both columns to the common type (if needed). const ColumnWithTypeAndName & arg1 = arguments[1]; const ColumnWithTypeAndName & arg2 = arguments[2]; - DataTypePtr common_type = getLeastSupertype(DataTypes{arg1.type, arg2.type}); + DataTypePtr common_type; + if (use_variant_when_no_common_type) + common_type = getLeastSupertypeOrVariant(DataTypes{arg1.type, arg2.type}); + else + common_type = getLeastSupertype(DataTypes{arg1.type, arg2.type}); ColumnPtr col_then = castColumn(arg1, common_type); ColumnPtr col_else = castColumn(arg2, common_type); @@ -850,6 +962,10 @@ class FunctionIf : public FunctionIfBase ColumnPtr executeForNullableThenElse(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const { + /// If result type is Variant, we don't need to remove Nullable. + if (isVariant(result_type)) + return nullptr; + const ColumnWithTypeAndName & arg_cond = arguments[0]; const ColumnWithTypeAndName & arg_then = arguments[1]; const ColumnWithTypeAndName & arg_else = arguments[2]; @@ -955,6 +1071,11 @@ class FunctionIf : public FunctionIfBase assert_cast(*result_column).applyNullMap(assert_cast(*arg_cond.column)); return result_column; } + else if (auto * variant_column = typeid_cast(result_column.get())) + { + variant_column->applyNullMap(assert_cast(*arg_cond.column).getData()); + return result_column; + } else return ColumnNullable::create(materializeColumnIfConst(result_column), arg_cond.column); } @@ -993,6 +1114,11 @@ class FunctionIf : public FunctionIfBase assert_cast(*result_column).applyNegatedNullMap(assert_cast(*arg_cond.column)); return result_column; } + else if (auto * variant_column = typeid_cast(result_column.get())) + { + variant_column->applyNegatedNullMap(assert_cast(*arg_cond.column).getData()); + return result_column; + } else { size_t size = input_rows_count; @@ -1082,6 +1208,9 @@ class FunctionIf : public FunctionIfBase throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument (condition) of function if. " "Must be UInt8.", arguments[0]->getName()); + if (use_variant_when_no_common_type) + return getLeastSupertypeOrVariant(DataTypes{arguments[1], arguments[2]}); + return getLeastSupertype(DataTypes{arguments[1], arguments[2]}); } @@ -1112,17 +1241,12 @@ class FunctionIf : public FunctionIfBase if (cond_const_col) { - if (arg_then.type->equals(*arg_else.type)) - { - return cond_const_col->getValue() - ? arg_then.column - : arg_else.column; - } + UInt8 value = cond_const_col->getValue(); + const ColumnWithTypeAndName & arg = value ? arg_then : arg_else; + if (arg.type->equals(*result_type)) + return arg.column; else - { - materialized_cond_col = cond_const_col->convertToFullColumn(); - cond_col = typeid_cast(&*materialized_cond_col); - } + return castColumn(arg, result_type); } if (!cond_col) @@ -1159,13 +1283,15 @@ class FunctionIf : public FunctionIfBase TypeIndex left_id = left_type->getTypeId(); TypeIndex right_id = right_type->getTypeId(); + /// TODO optimize for map type + /// TODO optimize for nullable type if (!(callOnBasicTypes(left_id, right_id, call) || (res = executeTyped(cond_col, arguments, result_type, input_rows_count)) || (res = executeString(cond_col, arguments, result_type)) || (res = executeGenericArray(cond_col, arguments, result_type)) || (res = executeTuple(arguments, result_type, input_rows_count)))) { - return executeGeneric(cond_col, arguments, input_rows_count); + return executeGeneric(cond_col, arguments, input_rows_count, use_variant_when_no_common_type); } return res; diff --git a/src/Functions/isNotNull.cpp b/src/Functions/isNotNull.cpp index cbdc08c2fabd..360c2fc7f9fd 100644 --- a/src/Functions/isNotNull.cpp +++ b/src/Functions/isNotNull.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -45,6 +46,18 @@ class FunctionIsNotNull : public IFunction ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & elem = arguments[0]; + + if (isVariant(elem.type)) + { + const auto & discriminators = checkAndGetColumn(*elem.column)->getLocalDiscriminators(); + auto res = DataTypeUInt8().createColumn(); + auto & data = typeid_cast(*res).getData(); + data.reserve(discriminators.size()); + for (auto discr : discriminators) + data.push_back(discr != ColumnVariant::NULL_DISCRIMINATOR); + return res; + } + if (elem.type->isLowCardinalityNullable()) { const auto * low_cardinality_column = checkAndGetColumn(*elem.column); diff --git a/src/Functions/isNull.cpp b/src/Functions/isNull.cpp index cdce037088d5..4bf4e44f866e 100644 --- a/src/Functions/isNull.cpp +++ b/src/Functions/isNull.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -44,6 +45,18 @@ class FunctionIsNull : public IFunction ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override { const ColumnWithTypeAndName & elem = arguments[0]; + + if (isVariant(elem.type)) + { + const auto & discriminators = checkAndGetColumn(*elem.column)->getLocalDiscriminators(); + auto res = DataTypeUInt8().createColumn(); + auto & data = typeid_cast(*res).getData(); + data.reserve(discriminators.size()); + for (auto discr : discriminators) + data.push_back(discr == ColumnVariant::NULL_DISCRIMINATOR); + return res; + } + if (elem.type->isLowCardinalityNullable()) { const auto * low_cardinality_column = checkAndGetColumn(*elem.column); diff --git a/src/Functions/logTrace.cpp b/src/Functions/logTrace.cpp index 55f387cbfeb2..923ea9fd70ef 100644 --- a/src/Functions/logTrace.cpp +++ b/src/Functions/logTrace.cpp @@ -46,7 +46,7 @@ namespace throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Constant string", getName()); - static auto * log = &Poco::Logger::get("FunctionLogTrace"); + static auto log = getLogger("FunctionLogTrace"); LOG_TRACE(log, fmt::runtime(message)); return DataTypeUInt8().createColumnConst(input_rows_count, 0); diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index c950a0491a5b..66cd10a3f0bd 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -30,9 +31,11 @@ class FunctionMap : public IFunction public: static constexpr auto name = "map"; - static FunctionPtr create(ContextPtr) + explicit FunctionMap(bool use_variant_as_common_type_) : use_variant_as_common_type(use_variant_as_common_type_) {} + + static FunctionPtr create(ContextPtr context) { - return std::make_shared(); + return std::make_shared(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type); } String getName() const override @@ -77,8 +80,16 @@ class FunctionMap : public IFunction } DataTypes tmp; - tmp.emplace_back(getLeastSupertype(keys)); - tmp.emplace_back(getLeastSupertype(values)); + if (use_variant_as_common_type) + { + tmp.emplace_back(getLeastSupertypeOrVariant(keys)); + tmp.emplace_back(getLeastSupertypeOrVariant(values)); + } + else + { + tmp.emplace_back(getLeastSupertype(keys)); + tmp.emplace_back(getLeastSupertype(values)); + } return std::make_shared(tmp); } @@ -138,6 +149,9 @@ class FunctionMap : public IFunction return ColumnMap::create(nested_column); } + +private: + bool use_variant_as_common_type = false; }; /// mapFromArrays(keys, values) is a function that allows you to make key-value pair from a pair of arrays diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index d0f5a1ce439e..cb946b55c73b 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -117,6 +118,9 @@ class FunctionMultiIf final : public FunctionIfBase types_of_branches.emplace_back(arg); }); + if (context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type) + return getLeastSupertypeOrVariant(types_of_branches); + return getLeastSupertype(types_of_branches); } diff --git a/src/Functions/regexpExtract.cpp b/src/Functions/regexpExtract.cpp index 0502d2fbfdc7..f6bbd2f96f2e 100644 --- a/src/Functions/regexpExtract.cpp +++ b/src/Functions/regexpExtract.cpp @@ -124,21 +124,23 @@ class FunctionRegexpExtract : public IFunction res_offsets.push_back(res_offset); } - static void vectorConstant( + void vectorConstant( const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, ssize_t index, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets) const { const OptimizedRegularExpression regexp = Regexps::createRegexp(pattern); unsigned capture = regexp.getNumberOfSubpatterns(); if (index < 0 || index >= capture + 1) throw Exception( ErrorCodes::INDEX_OF_POSITIONAL_ARGUMENT_IS_OUT_OF_RANGE, - "Index value {} is out of range, should be in [0, {})", + "Index value {} for regexp pattern `{}` in function {} is out-of-range, should be in [0, {})", index, + pattern, + getName(), capture + 1); OptimizedRegularExpression::MatchVec matches; @@ -161,13 +163,13 @@ class FunctionRegexpExtract : public IFunction } } - static void vectorVector( + void vectorVector( const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, const ColumnPtr & column_index, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets) const { res_data.reserve(data.size() / 5); res_offsets.reserve(offsets.size()); @@ -187,8 +189,10 @@ class FunctionRegexpExtract : public IFunction if (index < 0 || index >= capture + 1) throw Exception( ErrorCodes::INDEX_OF_POSITIONAL_ARGUMENT_IS_OUT_OF_RANGE, - "Index value {} is out of range, should be in [0, {})", + "Index value {} for regexp pattern `{}` in function {} is out-of-range, should be in [0, {})", index, + pattern, + getName(), capture + 1); regexp.match( @@ -202,12 +206,12 @@ class FunctionRegexpExtract : public IFunction } } - static void constantVector( + void constantVector( const std::string & str, const std::string & pattern, const ColumnPtr & column_index, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets) const { size_t rows = column_index->size(); res_data.reserve(str.size() / 5); @@ -230,8 +234,10 @@ class FunctionRegexpExtract : public IFunction if (index < 0 || index >= capture + 1) throw Exception( ErrorCodes::INDEX_OF_POSITIONAL_ARGUMENT_IS_OUT_OF_RANGE, - "Index value {} is out of range, should be in [0, {})", + "Index value {} for regexp pattern `{}` in function {} is out-of-range, should be in [0, {})", index, + pattern, + getName(), capture + 1); saveMatch(matches, index, padded_str, 0, res_data, res_offsets, res_offset); diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index 21e36761213d..e9276c4aefba 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -128,6 +128,10 @@ class FunctionSeriesDecomposeSTL : public IFunction res_data.insert(residue.begin(), residue.end()); res_col_offsets_data.push_back(res_data.size()); + // Create Baseline = seasonal + trend + std::transform(seasonal.begin(), seasonal.end(), trend.begin(), std::back_inserter(res_data), std::plus<>()); + res_col_offsets_data.push_back(res_data.size()); + root_offsets_data.push_back(res_col_offsets->size()); prev_src_offset = curr_offset; @@ -201,7 +205,7 @@ The number of data points in `series` should be at least twice the value of `per **Returned value** -- An array of three arrays where the first array include seasonal components, the second array - trend, and the third array - residue component. +- An array of four arrays where the first array include seasonal components, the second array - trend, the third array - residue component, and the fourth array - baseline(seasonal + trend) component. Type: [Array](../../sql-reference/data-types/array.md). @@ -230,6 +234,10 @@ SELECT seriesDecomposeSTL([10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, [ 0, 0.0000019073486, -0.0000019073486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0000019073486, 0, 0 + ], + [ + 10.1, 20.449999, 40.340004, 10.100001, 20.45, 40.34, 10.100001, 20.45, 40.34, 10.1, 20.45, 40.34, + 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.100002, 20.45, 40.34 ]] │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ```)", diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp index ad5be7d9dfd7..c71739090293 100644 --- a/src/Functions/translate.cpp +++ b/src/Functions/translate.cpp @@ -1,12 +1,15 @@ -#include -#include #include +#include +#include +#include #include #include -#include +#include +#include #include #include -#include +#include + #include @@ -298,7 +301,14 @@ class FunctionTranslate : public IFunction throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}", arguments[2]->getName(), getName()); - return std::make_shared(); + if (isString(arguments[0])) + return std::make_shared(); + else + { + const auto * ptr = checkAndGetDataType(arguments[0].get()); + chassert(ptr); + return std::make_shared(ptr->getN()); + } } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override diff --git a/src/Functions/variantElement.cpp b/src/Functions/variantElement.cpp new file mode 100644 index 000000000000..2744a0dabb81 --- /dev/null +++ b/src/Functions/variantElement.cpp @@ -0,0 +1,238 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace +{ + +/** Extract element of Variant by variant type name. + * Also the function looks through Arrays: you can get Array of Variant elements from Array of Variants. + */ +class FunctionVariantElement : public IFunction +{ +public: + static constexpr auto name = "variantElement"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const size_t number_of_arguments = arguments.size(); + + if (number_of_arguments < 2 || number_of_arguments > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + getName(), number_of_arguments); + + size_t count_arrays = 0; + const IDataType * input_type = arguments[0].type.get(); + while (const DataTypeArray * array = checkAndGetDataType(input_type)) + { + input_type = array->getNestedType().get(); + ++count_arrays; + } + + const DataTypeVariant * variant_type = checkAndGetDataType(input_type); + if (!variant_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Variant or Array of Variant. Actual {}", + getName(), + arguments[0].type->getName()); + + std::optional variant_global_discr = getVariantGlobalDiscriminator(arguments[1].column, *variant_type, number_of_arguments); + if (variant_global_discr.has_value()) + { + DataTypePtr return_type = makeNullableOrLowCardinalityNullableSafe(variant_type->getVariant(variant_global_discr.value())); + + for (; count_arrays; --count_arrays) + return_type = std::make_shared(return_type); + + return return_type; + } + else + return arguments[2].type; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & input_arg = arguments[0]; + const IDataType * input_type = input_arg.type.get(); + const IColumn * input_col = input_arg.column.get(); + + bool input_arg_is_const = false; + if (typeid_cast(input_col)) + { + input_col = assert_cast(input_col)->getDataColumnPtr().get(); + input_arg_is_const = true; + } + + Columns array_offsets; + while (const DataTypeArray * array_type = checkAndGetDataType(input_type)) + { + const ColumnArray * array_col = assert_cast(input_col); + + input_type = array_type->getNestedType().get(); + input_col = &array_col->getData(); + array_offsets.push_back(array_col->getOffsetsPtr()); + } + + const DataTypeVariant * input_type_as_variant = checkAndGetDataType(input_type); + const ColumnVariant * input_col_as_variant = checkAndGetColumn(input_col); + if (!input_type_as_variant || !input_col_as_variant) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Variant or array of Variants. Actual {}", getName(), input_arg.type->getName()); + + std::optional variant_global_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size()); + + if (!variant_global_discr.has_value()) + return arguments[2].column; + + const auto & variant_type = input_type_as_variant->getVariant(*variant_global_discr); + const auto & variant_column = input_col_as_variant->getVariantPtrByGlobalDiscriminator(*variant_global_discr); + + /// If Variant has only NULLs or our variant doesn't have any real values, + /// just create column with default values and create null mask with 1. + if (input_col_as_variant->hasOnlyNulls() || variant_column->empty()) + { + auto res = variant_type->createColumn(); + + if (variant_type->lowCardinality()) + assert_cast(*res).nestedToNullable(); + + res->insertManyDefaults(input_col_as_variant->size()); + if (!variant_type->canBeInsideNullable()) + return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count); + + auto null_map = ColumnUInt8::create(); + auto & null_map_data = null_map->getData(); + null_map_data.resize_fill(input_col_as_variant->size(), 1); + return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(res), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count); + } + + /// If we extract single non-empty column and have no NULLs, then just return this variant. + if (auto non_empty_local_discr = input_col_as_variant->getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) + { + /// If we were trying to extract some other variant, + /// it would be empty and we would already processed this case above. + chassert(input_col_as_variant->globalDiscriminatorByLocal(*non_empty_local_discr) == variant_global_discr); + return wrapInArraysAndConstIfNeeded(makeNullableOrLowCardinalityNullableSafe(variant_column), array_offsets, input_arg_is_const, input_rows_count); + } + + /// In general case we should calculate null-mask for variant + /// according to the discriminators column and expand + /// variant column by this mask to get a full column (with default values on NULLs) + const auto & local_discriminators = input_col_as_variant->getLocalDiscriminators(); + auto null_map = ColumnUInt8::create(); + auto & null_map_data = null_map->getData(); + null_map_data.reserve(local_discriminators.size()); + auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr); + for (auto local_discr : local_discriminators) + null_map_data.push_back(local_discr != variant_local_discr); + + auto expanded_variant_column = IColumn::mutate(variant_column); + if (variant_type->lowCardinality()) + expanded_variant_column = assert_cast(*expanded_variant_column).cloneNullable(); + expanded_variant_column->expand(null_map_data, /*inverted = */ true); + if (variant_type->canBeInsideNullable()) + return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(expanded_variant_column), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count); + return wrapInArraysAndConstIfNeeded(std::move(expanded_variant_column), array_offsets, input_arg_is_const, input_rows_count); + } +private: + std::optional getVariantGlobalDiscriminator(const ColumnPtr & index_column, const DataTypeVariant & variant_type, size_t argument_size) const + { + const auto * name_col = checkAndGetColumnConst(index_column.get()); + if (!name_col) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument to {} with Variant argument must be a constant String", + getName()); + + String variant_element_name = name_col->getValue(); + auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name); + if (variant_element_type) + { + const auto & variants = variant_type.getVariants(); + for (size_t i = 0; i != variants.size(); ++i) + { + if (variants[i]->getName() == variant_element_type->getName()) + return i; + } + } + + if (argument_size == 2) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} doesn't contain variant with type {}", variant_type.getName(), variant_element_name); + return std::nullopt; + } + + ColumnPtr wrapInArraysAndConstIfNeeded(ColumnPtr res, const Columns & array_offsets, bool input_arg_is_const, size_t input_rows_count) const + { + for (auto it = array_offsets.rbegin(); it != array_offsets.rend(); ++it) + res = ColumnArray::create(res, *it); + + if (input_arg_is_const) + res = ColumnConst::create(res, input_rows_count); + + return res; + } +}; + +} + +REGISTER_FUNCTION(VariantElement) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Extracts a column with specified type from a `Variant` column. +)", + .syntax{"variantElement(variant, type_name, [, default_value])"}, + .arguments{{ + {"variant", "Variant column"}, + {"type_name", "The name of the variant type to extract"}, + {"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}}}, + .examples{{{ + "Example", + R"( +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;)", + R"( +┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐ +│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │ +│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘ +)"}}}, + .categories{"Variant"}, + }); +} + +} diff --git a/src/Functions/variantType.cpp b/src/Functions/variantType.cpp new file mode 100644 index 000000000000..e867cb03a23b --- /dev/null +++ b/src/Functions/variantType.cpp @@ -0,0 +1,111 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace +{ + +/// Return enum with type name for each row in Variant column. +class FunctionVariantType : public IFunction +{ +public: + static constexpr auto name = "variantType"; + static constexpr auto enum_name_for_null = "None"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.empty() || arguments.size() > 1) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1", + getName(), arguments.empty()); + + const DataTypeVariant * variant_type = checkAndGetDataType(arguments[0].type.get()); + + if (!variant_type) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Variant, got {} instead", + getName(), arguments[0].type->getName()); + + const auto & variants = variant_type->getVariants(); + std::vector> enum_values; + enum_values.reserve(variants.size() + 1); + for (ColumnVariant::Discriminator i = 0; i != variants.size(); ++i) + enum_values.emplace_back(variants[i]->getName(), i); + enum_values.emplace_back(enum_name_for_null, ColumnVariant::NULL_DISCRIMINATOR); + return std::make_shared>(enum_values); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const ColumnVariant * variant_column = checkAndGetColumn(arguments[0].column.get()); + if (!variant_column) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Variant, got {} instead", + getName(), arguments[0].type->getName()); + + auto res = result_type->createColumn(); + auto & res_data = typeid_cast(res.get())->getData(); + res_data.reserve(input_rows_count); + for (size_t i = 0; i != input_rows_count; ++i) + res_data.push_back(variant_column->globalDiscriminatorAt(i)); + + return res; + } +}; + +} + +REGISTER_FUNCTION(VariantType) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Returns the variant type name for each row of `Variant` column. If row contains NULL, it returns 'None' for it. +)", + .syntax = {"variantType(variant)"}, + .arguments = {{"variant", "Variant column"}}, + .examples = {{{ + "Example", + R"( +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT variantType(v) FROM test;)", + R"( +┌─variantType(v)─┐ +│ None │ +│ UInt64 │ +│ String │ +│ Array(UInt64) │ +└────────────────┘ +)"}}}, + .categories{"Variant"}, + }); +} + +} diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp index 785a5005f878..8cb4a2e0bd69 100644 --- a/src/IO/Archives/ZipArchiveWriter.cpp +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -246,7 +246,7 @@ ZipArchiveWriter::~ZipArchiveWriter() /// However it is suspicious to destroy instance without finalization at the green path. if (!std::uncaught_exceptions() && std::current_exception() == nullptr) { - Poco::Logger * log = &Poco::Logger::get("ZipArchiveWriter"); + LoggerPtr log = getLogger("ZipArchiveWriter"); LOG_ERROR(log, "ZipArchiveWriter is not finalized when destructor is called. " "The zip archive might not be written at all or might be truncated. " diff --git a/src/IO/CompressedReadBufferWrapper.h b/src/IO/CompressedReadBufferWrapper.h index bb58a7bfeb3e..66e57488434d 100644 --- a/src/IO/CompressedReadBufferWrapper.h +++ b/src/IO/CompressedReadBufferWrapper.h @@ -1,11 +1,12 @@ #pragma once #include #include +#include namespace DB { -class CompressedReadBufferWrapper : public BufferWithOwnMemory +class CompressedReadBufferWrapper : public BufferWithOwnMemory, public ReadBufferWrapperBase { public: CompressedReadBufferWrapper( @@ -16,7 +17,7 @@ class CompressedReadBufferWrapper : public BufferWithOwnMemory : BufferWithOwnMemory(buf_size, existing_memory, alignment) , in(std::move(in_)) {} - const ReadBuffer & getWrappedReadBuffer() const { return *in; } + const ReadBuffer & getWrappedReadBuffer() const override { return *in; } ReadBuffer & getWrappedReadBuffer() { return *in; } void prefetch(Priority priority) override { in->prefetch(priority); } diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index cce394c67c98..c4468a1b896f 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -70,7 +70,7 @@ namespace static_assert(std::has_virtual_destructor_v, "The base class must have a virtual destructor"); public: - HTTPSessionAdapter(const std::string & host, UInt16 port) : Session(host, port), log{&Poco::Logger::get("HTTPSessionAdapter")} { } + HTTPSessionAdapter(const std::string & host, UInt16 port) : Session(host, port), log{getLogger("HTTPSessionAdapter")} { } ~HTTPSessionAdapter() override = default; protected: @@ -132,7 +132,7 @@ namespace } } } - Poco::Logger * log; + LoggerPtr log; }; bool isHTTPS(const Poco::URI & uri) @@ -223,7 +223,7 @@ namespace bool wait_on_pool_size_limit) : Base( static_cast(max_pool_size_), - &Poco::Logger::get("HTTPSessionPool"), + getLogger("HTTPSessionPool"), wait_on_pool_size_limit ? BehaviourOnLimit::Wait : BehaviourOnLimit::AllocateNewBypassingPool) , host(host_) , port(port_) diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index 8d73f221748f..cdeb8a186351 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -50,7 +50,7 @@ ParallelReadBuffer::ParallelReadBuffer( , file_size(file_size_) , range_step(std::max(1ul, range_step_)) { - LOG_TRACE(&Poco::Logger::get("ParallelReadBuffer"), "Parallel reading is used"); + LOG_TRACE(getLogger("ParallelReadBuffer"), "Parallel reading is used"); try { diff --git a/src/IO/ReadBuffer.cpp b/src/IO/ReadBuffer.cpp index bf054d08425c..0d1cd322fdd2 100644 --- a/src/IO/ReadBuffer.cpp +++ b/src/IO/ReadBuffer.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB @@ -7,7 +8,7 @@ namespace DB namespace { template - class ReadBufferWrapper : public ReadBuffer + class ReadBufferWrapper : public ReadBuffer, public ReadBufferWrapperBase { public: ReadBufferWrapper(ReadBuffer & in_, CustomData && custom_data_) @@ -15,6 +16,8 @@ namespace { } + const ReadBuffer & getWrappedReadBuffer() const override { return in; } + private: ReadBuffer & in; CustomData custom_data; diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 101e25f8b436..f28c23a71d7a 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -39,7 +39,7 @@ class ReadBufferFromS3 : public ReadBufferFromFileBase std::optional read_result; std::unique_ptr impl; - Poco::Logger * log = &Poco::Logger::get("ReadBufferFromS3"); + LoggerPtr log = getLogger("ReadBufferFromS3"); public: ReadBufferFromS3( diff --git a/src/IO/ReadBufferWrapperBase.h b/src/IO/ReadBufferWrapperBase.h new file mode 100644 index 000000000000..1c594e8018a3 --- /dev/null +++ b/src/IO/ReadBufferWrapperBase.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace DB +{ + +class ReadBufferWrapperBase +{ +public: + virtual const ReadBuffer & getWrappedReadBuffer() const = 0; + virtual ~ReadBufferWrapperBase() = default; +}; + +} diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 05d35a57b125..bcfe5fd52305 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -619,13 +620,16 @@ void readQuotedStringInto(Vector & s, ReadBuffer & buf) readAnyQuotedStringInto<'\'', enable_sql_style_quoting>(s, buf); } -template +template bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf) { - return readAnyQuotedStringInto<'\'', false, Vector, bool>(s, buf); + return readAnyQuotedStringInto<'\'', enable_sql_style_quoting, Vector, bool>(s, buf); } -template bool tryReadQuotedStringInto(String & s, ReadBuffer & buf); +template bool tryReadQuotedStringInto(String & s, ReadBuffer & buf); +template bool tryReadQuotedStringInto(String & s, ReadBuffer & buf); +template bool tryReadQuotedStringInto>(PaddedPODArray & s, ReadBuffer & buf); +template bool tryReadQuotedStringInto>(PaddedPODArray & s, ReadBuffer & buf); template void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf) @@ -633,6 +637,16 @@ void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf) readAnyQuotedStringInto<'"', enable_sql_style_quoting>(s, buf); } +template +bool tryReadDoubleQuotedStringInto(Vector & s, ReadBuffer & buf) +{ + return readAnyQuotedStringInto<'"', enable_sql_style_quoting, Vector, bool>(s, buf); +} + +template bool tryReadDoubleQuotedStringInto(String & s, ReadBuffer & buf); +template bool tryReadDoubleQuotedStringInto(String & s, ReadBuffer & buf); + + template void readBackQuotedStringInto(Vector & s, ReadBuffer & buf) { @@ -652,6 +666,18 @@ void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf) readQuotedStringInto(s, buf); } +bool tryReadQuotedString(String & s, ReadBuffer & buf) +{ + s.clear(); + return tryReadQuotedStringInto(s, buf); +} + +bool tryReadQuotedStringWithSQLStyle(String & s, ReadBuffer & buf) +{ + s.clear(); + return tryReadQuotedStringInto(s, buf); +} + template void readQuotedStringInto(PaddedPODArray & s, ReadBuffer & buf); template void readQuotedStringInto(String & s, ReadBuffer & buf); @@ -672,6 +698,18 @@ void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf) readDoubleQuotedStringInto(s, buf); } +bool tryReadDoubleQuotedString(String & s, ReadBuffer & buf) +{ + s.clear(); + return tryReadDoubleQuotedStringInto(s, buf); +} + +bool tryReadDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf) +{ + s.clear(); + return tryReadDoubleQuotedStringInto(s, buf); +} + void readBackQuotedString(String & s, ReadBuffer & buf) { s.clear(); @@ -691,7 +729,7 @@ concept WithResize = requires (T value) { value.size() } -> std::integral<>; }; -template +template void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings) { /// Empty string @@ -754,12 +792,20 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & { PeekableReadBuffer * peekable_buf = dynamic_cast(&buf); if (!peekable_buf) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Reading CSV string with custom delimiter is allowed only when using PeekableReadBuffer"); + { + if constexpr (allow_throw) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Reading CSV string with custom delimiter is allowed only when using PeekableReadBuffer"); + return; + } while (true) { if (peekable_buf->eof()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading CSV string, expected custom delimiter \"{}\"", custom_delimiter); + { + if constexpr (allow_throw) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading CSV string, expected custom delimiter \"{}\"", custom_delimiter); + return; + } char * next_pos = reinterpret_cast(memchr(peekable_buf->position(), custom_delimiter[0], peekable_buf->available())); if (!next_pos) @@ -948,6 +994,9 @@ String readCSVFieldWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const For template void readCSVStringInto>(PaddedPODArray & s, ReadBuffer & buf, const FormatSettings::CSV & settings); template void readCSVStringInto(NullOutput & s, ReadBuffer & buf, const FormatSettings::CSV & settings); +template void readCSVStringInto(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings); +template void readCSVStringInto(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings); +template void readCSVStringInto, false, false>(PaddedPODArray & s, ReadBuffer & buf, const FormatSettings::CSV & settings); template @@ -1069,15 +1118,18 @@ ReturnType readJSONObjectPossiblyInvalid(Vector & s, ReadBuffer & buf) } template void readJSONObjectPossiblyInvalid(String & s, ReadBuffer & buf); +template bool readJSONObjectPossiblyInvalid(String & s, ReadBuffer & buf); template void readJSONObjectPossiblyInvalid>(PaddedPODArray & s, ReadBuffer & buf); +template bool readJSONObjectPossiblyInvalid, bool>(PaddedPODArray & s, ReadBuffer & buf); -template -void readJSONArrayInto(Vector & s, ReadBuffer & buf) +template +ReturnType readJSONArrayInto(Vector & s, ReadBuffer & buf) { - readJSONObjectOrArrayPossiblyInvalid(s, buf); + return readJSONObjectOrArrayPossiblyInvalid(s, buf); } -template void readJSONArrayInto>(PaddedPODArray & s, ReadBuffer & buf); +template void readJSONArrayInto, void>(PaddedPODArray & s, ReadBuffer & buf); +template bool readJSONArrayInto, bool>(PaddedPODArray & s, ReadBuffer & buf); template ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) @@ -1217,6 +1269,13 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D return false; } + if constexpr (!throw_exception) + { + if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3]) + || !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9])) + return false; + } + UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); @@ -1240,6 +1299,13 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D return false; } + if constexpr (!throw_exception) + { + if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[3]) || !isNumericASCII(s[4]) + || !isNumericASCII(s[6]) || !isNumericASCII(s[7])) + return false; + } + hour = (s[0] - '0') * 10 + (s[1] - '0'); minute = (s[3] - '0') * 10 + (s[4] - '0'); second = (s[6] - '0') * 10 + (s[7] - '0'); @@ -1259,7 +1325,14 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D { /// Not very efficient. for (const char * digit_pos = s; digit_pos < s_pos; ++digit_pos) + { + if constexpr (!throw_exception) + { + if (!isNumericASCII(*digit_pos)) + return false; + } datetime = datetime * 10 + *digit_pos - '0'; + } } datetime *= negative_multiplier; @@ -1282,14 +1355,24 @@ template bool readDateTimeTextFallback(time_t &, ReadBuffer &, cons template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -void skipJSONField(ReadBuffer & buf, StringRef name_of_field) +template +ReturnType skipJSONFieldImpl(ReadBuffer & buf, StringRef name_of_field) { + static constexpr bool throw_exception = std::is_same_v; + if (buf.eof()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString()); + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString()); + return ReturnType(false); + } else if (*buf.position() == '"') /// skip double-quoted string { NullOutput sink; - readJSONStringInto(sink, buf); + if constexpr (throw_exception) + readJSONStringInto(sink, buf); + else if (!tryReadJSONStringInto(sink, buf)) + return ReturnType(false); } else if (isNumericASCII(*buf.position()) || *buf.position() == '-' || *buf.position() == '+' || *buf.position() == '.') /// skip number { @@ -1298,19 +1381,32 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field) double v; if (!tryReadFloatText(v, buf)) - throw Exception(ErrorCodes::INCORRECT_DATA, "Expected a number field for key '{}'", name_of_field.toString()); + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::INCORRECT_DATA, "Expected a number field for key '{}'", name_of_field.toString()); + return ReturnType(false); + } } else if (*buf.position() == 'n') /// skip null { - assertString("null", buf); + if constexpr (throw_exception) + assertString("null", buf); + else if (!checkString("null", buf)) + return ReturnType(false); } else if (*buf.position() == 't') /// skip true { - assertString("true", buf); + if constexpr (throw_exception) + assertString("true", buf); + else if (!checkString("true", buf)) + return ReturnType(false); } else if (*buf.position() == 'f') /// skip false { - assertString("false", buf); + if constexpr (throw_exception) + assertString("false", buf); + else if (!checkString("false", buf)) + return ReturnType(false); } else if (*buf.position() == '[') { @@ -1320,12 +1416,16 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field) if (!buf.eof() && *buf.position() == ']') /// skip empty array { ++buf.position(); - return; + return ReturnType(true); } while (true) { - skipJSONField(buf, name_of_field); + if constexpr (throw_exception) + skipJSONFieldImpl(buf, name_of_field); + else if (!skipJSONFieldImpl(buf, name_of_field)) + return ReturnType(false); + skipWhitespaceIfAny(buf); if (!buf.eof() && *buf.position() == ',') @@ -1339,7 +1439,11 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field) break; } else - throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString()); + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString()); + return ReturnType(false); + } } } else if (*buf.position() == '{') /// skip whole object @@ -1353,19 +1457,34 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field) if (*buf.position() == '"') { NullOutput sink; - readJSONStringInto(sink, buf); + if constexpr (throw_exception) + readJSONStringInto(sink, buf); + else if (!tryReadJSONStringInto(sink, buf)) + return ReturnType(false); } else - throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString()); + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString()); + return ReturnType(false); + } // ':' skipWhitespaceIfAny(buf); if (buf.eof() || !(*buf.position() == ':')) - throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString()); + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString()); + return ReturnType(false); + } ++buf.position(); skipWhitespaceIfAny(buf); - skipJSONField(buf, name_of_field); + if constexpr (throw_exception) + skipJSONFieldImpl(buf, name_of_field); + else if (!skipJSONFieldImpl(buf, name_of_field)) + return ReturnType(false); + skipWhitespaceIfAny(buf); // optional ',' @@ -1377,18 +1496,37 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field) } if (buf.eof()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString()); + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString()); + return ReturnType(false); + } ++buf.position(); } else { - throw Exception( - ErrorCodes::INCORRECT_DATA, - "Cannot read JSON field here: '{}'. Unexpected symbol '{}'{}", - String(buf.position(), std::min(buf.available(), size_t(10))), - std::string(1, *buf.position()), - name_of_field.empty() ? "" : " for key " + name_of_field.toString()); + if constexpr (throw_exception) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Cannot read JSON field here: '{}'. Unexpected symbol '{}'{}", + String(buf.position(), std::min(buf.available(), size_t(10))), + std::string(1, *buf.position()), + name_of_field.empty() ? "" : " for key " + name_of_field.toString()); + + return ReturnType(false); } + + return ReturnType(true); +} + +void skipJSONField(ReadBuffer & buf, StringRef name_of_field) +{ + skipJSONFieldImpl(buf, name_of_field); +} + +bool trySkipJSONField(ReadBuffer & buf, StringRef name_of_field) +{ + return skipJSONFieldImpl(buf, name_of_field); } @@ -1601,23 +1739,31 @@ void skipToNextRowOrEof(PeekableReadBuffer & buf, const String & row_after_delim } // Use PeekableReadBuffer to copy field to string after parsing. -template -static void readParsedValueInto(Vector & s, ReadBuffer & buf, ParseFunc parse_func) +template +static ReturnType readParsedValueInto(Vector & s, ReadBuffer & buf, ParseFunc parse_func) { PeekableReadBuffer peekable_buf(buf); peekable_buf.setCheckpoint(); - parse_func(peekable_buf); + if constexpr (std::is_same_v) + parse_func(peekable_buf); + else if (!parse_func(peekable_buf)) + return ReturnType(false); peekable_buf.makeContinuousMemoryFromCheckpointToPos(); auto * end = peekable_buf.position(); peekable_buf.rollbackToCheckpoint(); s.append(peekable_buf.position(), end); peekable_buf.position() = end; + return ReturnType(true); } -template -static void readQuotedStringFieldInto(Vector & s, ReadBuffer & buf) +template +static ReturnType readQuotedStringFieldInto(Vector & s, ReadBuffer & buf) { - assertChar('\'', buf); + if constexpr (std::is_same_v) + assertChar('\'', buf); + else if (!checkChar('\'', buf)) + return ReturnType(false); + s.push_back('\''); while (!buf.eof()) { @@ -1645,16 +1791,23 @@ static void readQuotedStringFieldInto(Vector & s, ReadBuffer & buf) } if (buf.eof()) - return; + return ReturnType(false); ++buf.position(); s.push_back('\''); + return ReturnType(true); } -template -static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf) +template +static ReturnType readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf) { - assertChar(opening_bracket, buf); + static constexpr bool throw_exception = std::is_same_v; + + if constexpr (throw_exception) + assertChar(opening_bracket, buf); + else if (!checkChar(opening_bracket, buf)) + return ReturnType(false); + s.push_back(opening_bracket); size_t balance = 1; @@ -1670,7 +1823,10 @@ static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf) if (*buf.position() == '\'') { - readQuotedStringFieldInto(s, buf); + if constexpr (throw_exception) + readQuotedStringFieldInto(s, buf); + else if (!readQuotedStringFieldInto(s, buf)) + return ReturnType(false); } else if (*buf.position() == opening_bracket) { @@ -1685,13 +1841,20 @@ static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf) ++buf.position(); } } + + if (balance) + return ReturnType(false); + + return ReturnType(true); } -template -void readQuotedFieldInto(Vector & s, ReadBuffer & buf) +template +ReturnType readQuotedFieldInto(Vector & s, ReadBuffer & buf) { + static constexpr bool throw_exception = std::is_same_v; + if (buf.eof()) - return; + return ReturnType(false); /// Possible values in 'Quoted' field: /// - Strings: '...' @@ -1703,35 +1866,47 @@ void readQuotedFieldInto(Vector & s, ReadBuffer & buf) /// - Number: integer, float, decimal. if (*buf.position() == '\'') - readQuotedStringFieldInto(s, buf); + return readQuotedStringFieldInto(s, buf); else if (*buf.position() == '[') - readQuotedFieldInBracketsInto<'[', ']'>(s, buf); + return readQuotedFieldInBracketsInto(s, buf); else if (*buf.position() == '(') - readQuotedFieldInBracketsInto<'(', ')'>(s, buf); + return readQuotedFieldInBracketsInto(s, buf); else if (*buf.position() == '{') - readQuotedFieldInBracketsInto<'{', '}'>(s, buf); + return readQuotedFieldInBracketsInto(s, buf); else if (checkCharCaseInsensitive('n', buf)) { /// NULL or NaN if (checkCharCaseInsensitive('u', buf)) { - assertStringCaseInsensitive("ll", buf); + if constexpr (throw_exception) + assertStringCaseInsensitive("ll", buf); + else if (!checkStringCaseInsensitive("ll", buf)) + return ReturnType(false); s.append("NULL"); } else { - assertStringCaseInsensitive("an", buf); + if constexpr (throw_exception) + assertStringCaseInsensitive("an", buf); + else if (!checkStringCaseInsensitive("an", buf)) + return ReturnType(false); s.append("NaN"); } } else if (checkCharCaseInsensitive('t', buf)) { - assertStringCaseInsensitive("rue", buf); + if constexpr (throw_exception) + assertStringCaseInsensitive("rue", buf); + else if (!checkStringCaseInsensitive("rue", buf)) + return ReturnType(false); s.append("true"); } else if (checkCharCaseInsensitive('f', buf)) { - assertStringCaseInsensitive("alse", buf); + if constexpr (throw_exception) + assertStringCaseInsensitive("alse", buf); + else if (!checkStringCaseInsensitive("alse", buf)) + return ReturnType(false); s.append("false"); } else @@ -1740,13 +1915,19 @@ void readQuotedFieldInto(Vector & s, ReadBuffer & buf) auto parse_func = [](ReadBuffer & in) { Float64 tmp; - readFloatText(tmp, in); + if constexpr (throw_exception) + readFloatText(tmp, in); + else + return tryReadFloatText(tmp, in); }; - readParsedValueInto(s, buf, parse_func); + + return readParsedValueInto(s, buf, parse_func); } + + return ReturnType(true); } -template void readQuotedFieldInto(NullOutput & s, ReadBuffer & buf); +template void readQuotedFieldInto(NullOutput & s, ReadBuffer & buf); void readQuotedField(String & s, ReadBuffer & buf) { @@ -1754,11 +1935,24 @@ void readQuotedField(String & s, ReadBuffer & buf) readQuotedFieldInto(s, buf); } +bool tryReadQuotedField(String & s, ReadBuffer & buf) +{ + s.clear(); + return readQuotedFieldInto(s, buf); +} + void readJSONField(String & s, ReadBuffer & buf) { s.clear(); auto parse_func = [](ReadBuffer & in) { skipJSONField(in, ""); }; - readParsedValueInto(s, buf, parse_func); + readParsedValueInto(s, buf, parse_func); +} + +bool tryReadJSONField(String & s, ReadBuffer & buf) +{ + s.clear(); + auto parse_func = [](ReadBuffer & in) { return trySkipJSONField(in, ""); }; + return readParsedValueInto(s, buf, parse_func); } void readTSVField(String & s, ReadBuffer & buf) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 85584d63ee87..49530f4787a2 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -38,7 +38,6 @@ #include #include #include -#include #include #include @@ -51,6 +50,7 @@ namespace DB template struct Memory; +class PeekableReadBuffer; namespace ErrorCodes { @@ -258,26 +258,43 @@ inline void readBoolText(bool & x, ReadBuffer & buf) x = tmp != '0'; } -inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case = false) +template +inline ReturnType readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case = false) { + static constexpr bool throw_exception = std::is_same_v; + if (buf.eof()) [[unlikely]] - throwReadAfterEOF(); + { + if constexpr (throw_exception) + throwReadAfterEOF(); + else + return ReturnType(false); + } switch (*buf.position()) { case 't': - assertString("true", buf); + if constexpr (throw_exception) + assertString("true", buf); + else if (!checkString("true", buf)) + return ReturnType(false); x = true; break; case 'f': - assertString("false", buf); + if constexpr (throw_exception) + assertString("false", buf); + else if (!checkString("false", buf)) + return ReturnType(false); x = false; break; case 'T': { if (support_upper_case) { - assertString("TRUE", buf); + if constexpr (throw_exception) + assertString("TRUE", buf); + else if (!checkString("TRUE", buf)) + return ReturnType(false); x = true; break; } @@ -288,7 +305,10 @@ inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case { if (support_upper_case) { - assertString("FALSE", buf); + if constexpr (throw_exception) + assertString("FALSE", buf); + else if (!checkString("FALSE", buf)) + return ReturnType(false); x = false; break; } @@ -296,8 +316,15 @@ inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case [[fallthrough]]; } default: - throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Unexpected Bool value"); + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Unexpected Bool value"); + else + return ReturnType(false); + } } + + return ReturnType(true); } enum class ReadIntTextCheckOverflow @@ -469,7 +496,10 @@ void readIntText(T & x, ReadBuffer & buf) template bool tryReadIntText(T & x, ReadBuffer & buf) { - return readIntTextImpl(x, buf); + if constexpr (is_decimal) + return tryReadIntText(x.value, buf); + else + return readIntTextImpl(x, buf); } @@ -478,16 +508,18 @@ bool tryReadIntText(T & x, ReadBuffer & buf) * - for numbers starting with zero, parsed only zero; * - symbol '+' before number is not supported; */ -template -void readIntTextUnsafe(T & x, ReadBuffer & buf) +template +ReturnType readIntTextUnsafe(T & x, ReadBuffer & buf) { + static constexpr bool throw_exception = std::is_same_v; bool negative = false; make_unsigned_t res = 0; auto on_error = [] { - if (throw_on_error) + if constexpr (throw_exception) throwReadAfterEOF(); + return ReturnType(false); }; if (buf.eof()) [[unlikely]] @@ -505,7 +537,7 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf) { ++buf.position(); x = 0; - return; + return ReturnType(true); } while (!buf.eof()) @@ -524,12 +556,13 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf) /// See note about undefined behaviour above. x = is_signed_v && negative ? -res : res; + return ReturnType(true); } template -void tryReadIntTextUnsafe(T & x, ReadBuffer & buf) +bool tryReadIntTextUnsafe(T & x, ReadBuffer & buf) { - return readIntTextUnsafe(x, buf); + return readIntTextUnsafe(x, buf); } @@ -551,9 +584,15 @@ void readEscapedString(String & s, ReadBuffer & buf); void readQuotedString(String & s, ReadBuffer & buf); void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); +bool tryReadQuotedString(String & s, ReadBuffer & buf); +bool tryReadQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); + void readDoubleQuotedString(String & s, ReadBuffer & buf); void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); +bool tryReadDoubleQuotedString(String & s, ReadBuffer & buf); +bool tryReadDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); + void readJSONString(String & s, ReadBuffer & buf); void readBackQuotedString(String & s, ReadBuffer & buf); @@ -616,7 +655,7 @@ void readBackQuotedStringInto(Vector & s, ReadBuffer & buf); template void readStringUntilEOFInto(Vector & s, ReadBuffer & buf); -template +template void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings); /// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception. @@ -629,7 +668,7 @@ bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf) return readJSONStringInto(s, buf); } -template +template bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf); /// Reads chunk of data between {} in that way, @@ -638,8 +677,8 @@ bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf); template ReturnType readJSONObjectPossiblyInvalid(Vector & s, ReadBuffer & buf); -template -void readJSONArrayInto(Vector & s, ReadBuffer & buf); +template +ReturnType readJSONArrayInto(Vector & s, ReadBuffer & buf); template void readStringUntilWhitespaceInto(Vector & s, ReadBuffer & buf); @@ -963,6 +1002,13 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons { if (s[4] < '0' || s[4] > '9') { + if constexpr (!throw_exception) + { + if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3]) + || !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9])) + return ReturnType(false); + } + UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); @@ -975,6 +1021,13 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons bool dt_long = (s[10] == ' ' || s[10] == 'T'); if (dt_long) { + if constexpr (!throw_exception) + { + if (!isNumericASCII(s[11]) || !isNumericASCII(s[12]) || !isNumericASCII(s[14]) || !isNumericASCII(s[15]) + || !isNumericASCII(s[17]) || !isNumericASCII(s[18])) + return ReturnType(false); + } + hour = (s[11] - '0') * 10 + (s[12] - '0'); minute = (s[14] - '0') * 10 + (s[15] - '0'); second = (s[17] - '0') * 10 + (s[18] - '0'); @@ -1312,6 +1365,11 @@ inline bool tryReadText(is_integer auto & x, ReadBuffer & buf) return tryReadIntText(x, buf); } +inline bool tryReadText(is_floating_point auto & x, ReadBuffer & buf) +{ + return tryReadFloatText(x, buf); +} + inline bool tryReadText(UUID & x, ReadBuffer & buf) { return tryReadUUIDText(x, buf); } inline bool tryReadText(IPv4 & x, ReadBuffer & buf) { return tryReadIPv4Text(x, buf); } inline bool tryReadText(IPv6 & x, ReadBuffer & buf) { return tryReadIPv6Text(x, buf); } @@ -1321,9 +1379,20 @@ inline void readText(is_floating_point auto & x, ReadBuffer & buf) { readFloatTe inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } inline void readText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { readDateText(x, buf, time_zone); } +inline bool tryReadText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { return tryReadDateText(x, buf, time_zone); } inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } +inline bool tryReadText(LocalDate & x, ReadBuffer & buf) { return tryReadDateText(x, buf); } inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } +inline bool tryReadText(LocalDateTime & x, ReadBuffer & buf) +{ + time_t time; + if (!tryReadDateTimeText(time, buf)) + return false; + x = LocalDateTime(time, DateLUT::instance()); + return true; +} + inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); } inline void readText(IPv4 & x, ReadBuffer & buf) { readIPv4Text(x, buf); } inline void readText(IPv6 & x, ReadBuffer & buf) { readIPv6Text(x, buf); } @@ -1401,39 +1470,71 @@ inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf) } /// CSV for numbers: quotes are optional, no special escaping rules. -template -inline void readCSVSimple(T & x, ReadBuffer & buf) +template +inline ReturnType readCSVSimple(T & x, ReadBuffer & buf) { + static constexpr bool throw_exception = std::is_same_v; + if (buf.eof()) [[unlikely]] - throwReadAfterEOF(); + { + if constexpr (throw_exception) + throwReadAfterEOF(); + return ReturnType(false); + } char maybe_quote = *buf.position(); if (maybe_quote == '\'' || maybe_quote == '\"') ++buf.position(); - readText(x, buf); + if constexpr (throw_exception) + readText(x, buf); + else if (!tryReadText(x, buf)) + return ReturnType(false); if (maybe_quote == '\'' || maybe_quote == '\"') - assertChar(maybe_quote, buf); + { + if constexpr (throw_exception) + assertChar(maybe_quote, buf); + else if (!checkChar(maybe_quote, buf)) + return ReturnType(false); + } + + return ReturnType(true); } // standalone overload for dates: to avoid instantiating DateLUTs while parsing other types -template -inline void readCSVSimple(T & x, ReadBuffer & buf, const DateLUTImpl & time_zone) +template +inline ReturnType readCSVSimple(T & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { + static constexpr bool throw_exception = std::is_same_v; + if (buf.eof()) [[unlikely]] - throwReadAfterEOF(); + { + if constexpr (throw_exception) + throwReadAfterEOF(); + return ReturnType(false); + } char maybe_quote = *buf.position(); if (maybe_quote == '\'' || maybe_quote == '\"') ++buf.position(); - readText(x, buf, time_zone); + if constexpr (throw_exception) + readText(x, buf, time_zone); + else if (!tryReadText(x, buf, time_zone)) + return ReturnType(false); if (maybe_quote == '\'' || maybe_quote == '\"') - assertChar(maybe_quote, buf); + { + if constexpr (throw_exception) + assertChar(maybe_quote, buf); + else if (!checkChar(maybe_quote, buf)) + return ReturnType(false); + } + + return ReturnType(true); } template @@ -1443,18 +1544,52 @@ inline void readCSV(T & x, ReadBuffer & buf) readCSVSimple(x, buf); } +template +requires is_arithmetic_v +inline bool tryReadCSV(T & x, ReadBuffer & buf) +{ + return readCSVSimple(x, buf); +} + inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); } +inline bool tryReadCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) +{ + x.clear(); + readCSVStringInto(x, buf, settings); + return true; +} + inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline bool tryReadCSV(LocalDate & x, ReadBuffer & buf) { return readCSVSimple(x, buf); } + inline void readCSV(DayNum & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline bool tryReadCSV(DayNum & x, ReadBuffer & buf) { return readCSVSimple(x, buf); } inline void readCSV(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { readCSVSimple(x, buf, time_zone); } +inline bool tryReadCSV(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { return readCSVSimple(x, buf, time_zone); } + inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline bool tryReadCSV(LocalDateTime & x, ReadBuffer & buf) { return readCSVSimple(x, buf); } + inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline bool tryReadCSV(UUID & x, ReadBuffer & buf) { return readCSVSimple(x, buf); } + inline void readCSV(IPv4 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline bool tryReadCSV(IPv4 & x, ReadBuffer & buf) { return readCSVSimple(x, buf); } + inline void readCSV(IPv6 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline bool tryReadCSV(IPv6 & x, ReadBuffer & buf) { return readCSVSimple(x, buf); } + inline void readCSV(UInt128 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline bool tryReadCSV(UInt128 & x, ReadBuffer & buf) { return readCSVSimple(x, buf); } + inline void readCSV(Int128 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline bool tryReadCSV(Int128 & x, ReadBuffer & buf) { return readCSVSimple(x, buf); } + inline void readCSV(UInt256 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline bool tryReadCSV(UInt256 & x, ReadBuffer & buf) { return readCSVSimple(x, buf); } + inline void readCSV(Int256 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline bool tryReadCSV(Int256 & x, ReadBuffer & buf) { return readCSVSimple(x, buf); } template void readBinary(std::vector & x, ReadBuffer & buf) @@ -1536,6 +1671,7 @@ inline void skipWhitespaceIfAny(ReadBuffer & buf, bool one_line = false) /// Skips json value. void skipJSONField(ReadBuffer & buf, StringRef name_of_field); +bool trySkipJSONField(ReadBuffer & buf, StringRef name_of_field); /** Read serialized exception. @@ -1750,12 +1886,14 @@ struct PcgDeserializer } }; -template -void readQuotedFieldInto(Vector & s, ReadBuffer & buf); +template +ReturnType readQuotedFieldInto(Vector & s, ReadBuffer & buf); void readQuotedField(String & s, ReadBuffer & buf); +bool tryReadQuotedField(String & s, ReadBuffer & buf); void readJSONField(String & s, ReadBuffer & buf); +bool tryReadJSONField(String & s, ReadBuffer & buf); void readTSVField(String & s, ReadBuffer & buf); diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index b2c4a53bd9b2..bf5c426f8036 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -265,7 +265,7 @@ ReadWriteBufferFromHTTPBase::ReadWriteBufferFromHTTPBase( , file_info(file_info_) , http_skip_not_found_url(http_skip_not_found_url_) , settings {settings_} - , log(&Poco::Logger::get("ReadWriteBufferFromHTTP")) + , log(getLogger("ReadWriteBufferFromHTTP")) , proxy_config(proxy_config_) { if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0 diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 29c0804bb280..63ca3e0417cd 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -109,7 +109,7 @@ namespace detail bool http_skip_not_found_url; ReadSettings settings; - Poco::Logger * log; + LoggerPtr log; ProxyConfiguration proxy_config; diff --git a/src/IO/S3/AWSLogger.cpp b/src/IO/S3/AWSLogger.cpp index d6162823aeee..dcdba7753b27 100644 --- a/src/IO/S3/AWSLogger.cpp +++ b/src/IO/S3/AWSLogger.cpp @@ -41,7 +41,7 @@ AWSLogger::AWSLogger(bool enable_s3_requests_logging_) : enable_s3_requests_logging(enable_s3_requests_logging_) { for (auto [tag, name] : S3_LOGGER_TAG_NAMES) - tag_loggers[tag] = &Poco::Logger::get(name); + tag_loggers[tag] = getLogger(name); default_logger = tag_loggers[S3_LOGGER_TAG_NAMES[0][0]]; } diff --git a/src/IO/S3/AWSLogger.h b/src/IO/S3/AWSLogger.h index fdb6eed1f868..a4987f17c0dd 100644 --- a/src/IO/S3/AWSLogger.h +++ b/src/IO/S3/AWSLogger.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace Poco { class Logger; } @@ -29,9 +30,9 @@ class AWSLogger final : public Aws::Utils::Logging::LogSystemInterface void Flush() final {} private: - Poco::Logger * default_logger; + LoggerPtr default_logger; bool enable_s3_requests_logging; - std::unordered_map tag_loggers; + std::unordered_map tag_loggers; }; } diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 64259ce5a76f..7f0ede727408 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -184,7 +184,7 @@ Client::Client( , client_settings(client_settings_) , max_redirects(max_redirects_) , sse_kms_config(std::move(sse_kms_config_)) - , log(&Poco::Logger::get("S3Client")) + , log(getLogger("S3Client")) { auto * endpoint_provider = dynamic_cast(accessEndpointProvider().get()); endpoint_provider->GetBuiltInParameters().GetParameter("Region").GetString(explicit_region); @@ -234,7 +234,7 @@ Client::Client( , provider_type(other.provider_type) , max_redirects(other.max_redirects) , sse_kms_config(other.sse_kms_config) - , log(&Poco::Logger::get("S3Client")) + , log(getLogger("S3Client")) { cache = std::make_shared(*other.cache); ClientCacheRegistry::instance().registerClient(cache); @@ -854,7 +854,7 @@ void ClientCacheRegistry::clearCacheForAll() } else { - LOG_INFO(&Poco::Logger::get("ClientCacheRegistry"), "Deleting leftover S3 client cache"); + LOG_INFO(getLogger("ClientCacheRegistry"), "Deleting leftover S3 client cache"); it = client_caches.erase(it); } } diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index 677b739fd396..8da21bd2c2c7 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -281,7 +281,7 @@ class Client : private Aws::S3::S3Client const ServerSideEncryptionKMSConfig sse_kms_config; - Poco::Logger * log; + LoggerPtr log; }; class ClientFactory diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index b0b33244015f..e64f54b99ad3 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -76,7 +76,7 @@ constexpr int AVAILABILITY_ZONE_REQUEST_TIMEOUT_SECONDS = 3; AWSEC2MetadataClient::AWSEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration, const char * endpoint_) : Aws::Internal::AWSHttpResourceClient(client_configuration) , endpoint(endpoint_) - , logger(&Poco::Logger::get("AWSEC2InstanceProfileConfigLoader")) + , logger(getLogger("AWSEC2InstanceProfileConfigLoader")) { } @@ -200,7 +200,7 @@ Aws::String AWSEC2MetadataClient::getCurrentRegion() const static Aws::String getAWSMetadataEndpoint() { - auto * logger = &Poco::Logger::get("AWSEC2InstanceProfileConfigLoader"); + auto logger = getLogger("AWSEC2InstanceProfileConfigLoader"); Aws::String ec2_metadata_service_endpoint = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT"); if (ec2_metadata_service_endpoint.empty()) { @@ -285,7 +285,7 @@ String getGCPAvailabilityZoneOrException() String getRunningAvailabilityZone() { - LOG_INFO(&Poco::Logger::get("Application"), "Trying to detect the availability zone."); + LOG_INFO(getLogger("Application"), "Trying to detect the availability zone."); try { return AWSEC2MetadataClient::getAvailabilityZoneOrException(); @@ -310,7 +310,7 @@ String getRunningAvailabilityZone() AWSEC2InstanceProfileConfigLoader::AWSEC2InstanceProfileConfigLoader(const std::shared_ptr & client_, bool use_secure_pull_) : client(client_) , use_secure_pull(use_secure_pull_) - , logger(&Poco::Logger::get("AWSEC2InstanceProfileConfigLoader")) + , logger(getLogger("AWSEC2InstanceProfileConfigLoader")) { } @@ -352,7 +352,7 @@ bool AWSEC2InstanceProfileConfigLoader::LoadInternal() AWSInstanceProfileCredentialsProvider::AWSInstanceProfileCredentialsProvider(const std::shared_ptr & config_loader) : ec2_metadata_config_loader(config_loader) , load_frequency_ms(Aws::Auth::REFRESH_THRESHOLD) - , logger(&Poco::Logger::get("AWSInstanceProfileCredentialsProvider")) + , logger(getLogger("AWSInstanceProfileCredentialsProvider")) { LOG_INFO(logger, "Creating Instance with injected EC2MetadataClient and refresh rate."); } @@ -396,7 +396,7 @@ void AWSInstanceProfileCredentialsProvider::refreshIfExpired() AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider::AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider( DB::S3::PocoHTTPClientConfiguration & aws_client_configuration, uint64_t expiration_window_seconds_) - : logger(&Poco::Logger::get("AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider")) + : logger(getLogger("AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider")) , expiration_window_seconds(expiration_window_seconds_) { // check environment variables @@ -529,7 +529,7 @@ SSOCredentialsProvider::SSOCredentialsProvider(DB::S3::PocoHTTPClientConfigurati : profile_to_use(Aws::Auth::GetConfigProfileName()) , aws_client_configuration(std::move(aws_client_configuration_)) , expiration_window_seconds(expiration_window_seconds_) - , logger(&Poco::Logger::get(SSO_CREDENTIALS_PROVIDER_LOG_TAG)) + , logger(getLogger(SSO_CREDENTIALS_PROVIDER_LOG_TAG)) { LOG_TRACE(logger, "Setting sso credentials provider to read config from {}", profile_to_use); } @@ -659,7 +659,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain( const Aws::Auth::AWSCredentials & credentials, CredentialsConfiguration credentials_configuration) { - auto * logger = &Poco::Logger::get("S3CredentialsProviderChain"); + auto logger = getLogger("S3CredentialsProviderChain"); /// we don't provide any credentials to avoid signing if (credentials_configuration.no_sign_request) diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h index 5e83ea307989..34dc0c1d2bd0 100644 --- a/src/IO/S3/Credentials.h +++ b/src/IO/S3/Credentials.h @@ -70,7 +70,7 @@ class AWSEC2MetadataClient : public Aws::Internal::AWSHttpResourceClient const Aws::String endpoint; mutable std::recursive_mutex token_mutex; mutable Aws::String token; - Poco::Logger * logger; + LoggerPtr logger; }; std::shared_ptr InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration); @@ -88,7 +88,7 @@ class AWSEC2InstanceProfileConfigLoader : public Aws::Config::AWSProfileConfigLo private: std::shared_ptr client; bool use_secure_pull; - Poco::Logger * logger; + LoggerPtr logger; }; class AWSInstanceProfileCredentialsProvider : public Aws::Auth::AWSCredentialsProvider @@ -107,7 +107,7 @@ class AWSInstanceProfileCredentialsProvider : public Aws::Auth::AWSCredentialsPr std::shared_ptr ec2_metadata_config_loader; Int64 load_frequency_ms; - Poco::Logger * logger; + LoggerPtr logger; }; class AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider : public Aws::Auth::AWSCredentialsProvider @@ -133,7 +133,7 @@ class AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider : public Aws::Auth::AWS Aws::String session_name; Aws::String token; bool initialized = false; - Poco::Logger * logger; + LoggerPtr logger; uint64_t expiration_window_seconds; }; @@ -163,7 +163,7 @@ class SSOCredentialsProvider : public Aws::Auth::AWSCredentialsProvider DB::S3::PocoHTTPClientConfiguration aws_client_configuration; uint64_t expiration_window_seconds; - Poco::Logger * logger; + LoggerPtr logger; void Reload() override; void refreshIfExpired(); diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 946bd74dcb52..21acdfd69f26 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -345,7 +345,7 @@ void PocoHTTPClient::makeRequestInternalImpl( { using SessionPtr = std::conditional_t; - Poco::Logger * log = &Poco::Logger::get("AWSClient"); + LoggerPtr log = getLogger("AWSClient"); auto uri = request.GetUri().GetURIString(); auto method = getMethod(request); diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 830377622ef2..98024e74f8ec 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -61,7 +61,7 @@ namespace ThreadPoolCallbackRunner schedule_, bool for_disk_s3_, BlobStorageLogWriterPtr blob_storage_log_, - const Poco::Logger * log_) + const LoggerPtr log_) : client_ptr(client_ptr_) , dest_bucket(dest_bucket_) , dest_key(dest_key_) @@ -87,7 +87,7 @@ namespace ThreadPoolCallbackRunner schedule; bool for_disk_s3; BlobStorageLogWriterPtr blob_storage_log; - const Poco::Logger * log; + const LoggerPtr log; /// Represents a task uploading a single part. /// Keep this struct small because there can be thousands of parts. @@ -475,7 +475,7 @@ namespace ThreadPoolCallbackRunner schedule_, bool for_disk_s3_, BlobStorageLogWriterPtr blob_storage_log_) - : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, &Poco::Logger::get("copyDataToS3File")) + : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, getLogger("copyDataToS3File")) , create_read_buffer(create_read_buffer_) , offset(offset_) , size(size_) @@ -658,7 +658,7 @@ namespace ThreadPoolCallbackRunner schedule_, bool for_disk_s3_, BlobStorageLogWriterPtr blob_storage_log_) - : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, &Poco::Logger::get("copyS3File")) + : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, getLogger("copyS3File")) , src_bucket(src_bucket_) , src_key(src_key_) , offset(src_offset_) diff --git a/src/IO/WithFileName.cpp b/src/IO/WithFileName.cpp index 2383182f7e77..7b50b2059359 100644 --- a/src/IO/WithFileName.cpp +++ b/src/IO/WithFileName.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include @@ -16,10 +16,10 @@ static String getFileName(const T & entry) String getFileNameFromReadBuffer(const ReadBuffer & in) { - if (const auto * compressed = dynamic_cast(&in)) - return getFileName(compressed->getWrappedReadBuffer()); + if (const auto * wrapper = dynamic_cast(&in)) + return getFileNameFromReadBuffer(wrapper->getWrappedReadBuffer()); else if (const auto * parallel = dynamic_cast(&in)) - return getFileName(parallel->getReadBuffer()); + return getFileNameFromReadBuffer(parallel->getReadBuffer()); else if (const auto * peekable = dynamic_cast(&in)) return getFileNameFromReadBuffer(peekable->getSubBuffer()); else diff --git a/src/IO/WriteBuffer.cpp b/src/IO/WriteBuffer.cpp index 61fdd31e16a3..bcc7445486ea 100644 --- a/src/IO/WriteBuffer.cpp +++ b/src/IO/WriteBuffer.cpp @@ -17,7 +17,7 @@ WriteBuffer::~WriteBuffer() /// However it is suspicious to destroy instance without finalization at the green path if (!std::uncaught_exceptions() && std::current_exception() == nullptr) { - Poco::Logger * log = &Poco::Logger::get("WriteBuffer"); + LoggerPtr log = getLogger("WriteBuffer"); LOG_ERROR( log, "WriteBuffer is not finalized when destructor is called. " diff --git a/src/IO/WriteBufferFromEncryptedFile.h b/src/IO/WriteBufferFromEncryptedFile.h index c6edcf765337..2b59bb468d13 100644 --- a/src/IO/WriteBufferFromEncryptedFile.h +++ b/src/IO/WriteBufferFromEncryptedFile.h @@ -40,7 +40,7 @@ class WriteBufferFromEncryptedFile : public WriteBufferDecoratorsendRequest(request); } diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 191e522c59a2..230f39b074e4 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -91,7 +91,7 @@ class WriteBufferFromS3 final : public WriteBufferFromFileBase const WriteSettings write_settings; const std::shared_ptr client_ptr; const std::optional> object_metadata; - Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3"); + LoggerPtr log = getLogger("WriteBufferFromS3"); LogSeriesLimiterPtr limitedLog = std::make_shared(log, 1, 5); IBufferAllocationPolicyPtr buffer_allocation_policy; diff --git a/src/IO/readDecimalText.h b/src/IO/readDecimalText.h index 3417310a9904..8b4405ee2e93 100644 --- a/src/IO/readDecimalText.h +++ b/src/IO/readDecimalText.h @@ -224,4 +224,24 @@ inline void readCSVDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint assertChar(maybe_quote, buf); } +template +inline bool tryReadCSVDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale) +{ + if (buf.eof()) + return false; + + char maybe_quote = *buf.position(); + + if (maybe_quote == '\'' || maybe_quote == '\"') + ++buf.position(); + + if (!tryReadDecimalText(buf, x, precision, scale)) + return false; + + if ((maybe_quote == '\'' || maybe_quote == '\"') && !checkChar(maybe_quote, buf)) + return false; + + return true; +} + } diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 31b7bdffb1f3..6512def92023 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -282,6 +282,13 @@ const ActionsDAG::Node & ActionsDAG::addFunctionImpl( { size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size(); column = node.function->execute(arguments, node.result_type, num_rows, true); + if (column->getDataType() != node.result_type->getColumnType()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected return type from {}. Expected {}. Got {}", + node.function->getName(), + node.result_type->getColumnType(), + column->getDataType()); } else { @@ -2440,7 +2447,6 @@ bool ActionsDAG::isSortingPreserved( ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( const NodeRawConstPtrs & filter_nodes, const std::unordered_map & node_name_to_input_node_column, - const ContextPtr & context, bool single_output_condition_node) { if (filter_nodes.empty()) @@ -2542,10 +2548,15 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( { if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { - auto index_hint_filter_dag = buildFilterActionsDAG(index_hint->getActions()->getOutputs(), - node_name_to_input_node_column, - context, - false /*single_output_condition_node*/); + ActionsDAGPtr index_hint_filter_dag; + const auto & index_hint_args = index_hint->getActions()->getOutputs(); + + if (index_hint_args.empty()) + index_hint_filter_dag = std::make_shared(); + else + index_hint_filter_dag = buildFilterActionsDAG(index_hint_args, + node_name_to_input_node_column, + false /*single_output_condition_node*/); auto index_hint_function_clone = std::make_shared(); index_hint_function_clone->setActions(std::move(index_hint_filter_dag)); @@ -2583,8 +2594,8 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( if (result_dag_outputs.size() > 1 && single_output_condition_node) { - auto function_builder = FunctionFactory::instance().get("and", context); - result_dag_outputs = { &result_dag->addFunction(function_builder, result_dag_outputs, {}) }; + FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); + result_dag_outputs = { &result_dag->addFunction(func_builder_and, result_dag_outputs, {}) }; } return result_dag; diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index f18ae5d5c75a..45f6e5cc717b 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -389,8 +389,7 @@ class ActionsDAG */ static ActionsDAGPtr buildFilterActionsDAG( const NodeRawConstPtrs & filter_nodes, - const std::unordered_map & node_name_to_input_node_column, - const ContextPtr & context, + const std::unordered_map & node_name_to_input_node_column = {}, bool single_output_condition_node = true); /// Check if `predicate` is a combination of AND functions. diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 1789cc6c4b14..78e125146d46 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1414,10 +1414,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool set_key = right_in_operand->getTreeHash(/*ignore_aliases=*/ true); if (auto set = data.prepared_sets->findSubquery(set_key)) - { - set->markAsINSubquery(); return set; - } FutureSetFromSubqueryPtr external_table_set; @@ -1464,7 +1461,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool } return data.prepared_sets->addFromSubquery( - set_key, std::move(source), nullptr, std::move(external_table_set), data.getContext()->getSettingsRef(), /*in_subquery=*/true); + set_key, std::move(source), nullptr, std::move(external_table_set), data.getContext()->getSettingsRef()); } else { diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 4171818d3e6a..331cd991ea19 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -105,7 +105,7 @@ class HashTablesStatistics if (const auto hint = cache->get(params.key)) { LOG_TRACE( - &Poco::Logger::get("Aggregator"), + getLogger("Aggregator"), "An entry for key={} found in cache: sum_of_sizes={}, median_size={}", params.key, hint->sum_of_sizes, @@ -129,7 +129,7 @@ class HashTablesStatistics || hint->median_size < median_size) { LOG_TRACE( - &Poco::Logger::get("Aggregator"), + getLogger("Aggregator"), "Statistics updated for key={}: new sum_of_sizes={}, median_size={}", params.key, sum_of_sizes, @@ -229,7 +229,7 @@ void initDataVariantsWithSizeHint( /// But we will also work with the big (i.e. not so cache friendly) HT from the beginning which may result in a slight slowdown. /// So let's just do nothing. LOG_TRACE( - &Poco::Logger::get("Aggregator"), + getLogger("Aggregator"), "No space were preallocated in hash tables because 'max_size_to_preallocate_for_aggregation' has too small value: {}, " "should be at least {}", stats_collecting_params.max_size_to_preallocate_for_aggregation, diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index f4aa78043ca4..109bd0dd939a 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1293,7 +1293,7 @@ class Aggregator final /// How many RAM were used to process the query before processing the first block. Int64 memory_usage_before_aggregation = 0; - Poco::Logger * log = &Poco::Logger::get("Aggregator"); + LoggerPtr log = getLogger("Aggregator"); /// For external aggregation. TemporaryDataOnDiskPtr tmp_data; diff --git a/src/Interpreters/AsynchronousInsertLog.cpp b/src/Interpreters/AsynchronousInsertLog.cpp index 9034f5828696..5d851f6b47d0 100644 --- a/src/Interpreters/AsynchronousInsertLog.cpp +++ b/src/Interpreters/AsynchronousInsertLog.cpp @@ -32,8 +32,7 @@ ColumnsDescription AsynchronousInsertLogElement::getColumnsDescription() {"Preprocessed", static_cast(DataKind::Preprocessed)}, }); - return ColumnsDescription - { + return ColumnsDescription{ {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, @@ -53,6 +52,7 @@ ColumnsDescription AsynchronousInsertLogElement::getColumnsDescription() {"flush_time", std::make_shared()}, {"flush_time_microseconds", std::make_shared(6)}, {"flush_query_id", std::make_shared()}, + {"timeout_milliseconds", std::make_shared()}, }; } @@ -80,6 +80,7 @@ void AsynchronousInsertLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(flush_time); columns[i++]->insert(flush_time_microseconds); columns[i++]->insert(flush_query_id); + columns[i++]->insert(timeout_milliseconds); } } diff --git a/src/Interpreters/AsynchronousInsertLog.h b/src/Interpreters/AsynchronousInsertLog.h index d05375002ad8..70b56a273ad6 100644 --- a/src/Interpreters/AsynchronousInsertLog.h +++ b/src/Interpreters/AsynchronousInsertLog.h @@ -38,6 +38,7 @@ struct AsynchronousInsertLogElement time_t flush_time{}; Decimal64 flush_time_microseconds{}; String flush_query_id; + UInt64 timeout_milliseconds = 0; static std::string name() { return "AsynchronousInsertLog"; } static ColumnsDescription getColumnsDescription(); diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 63ee62cdef49..e7f292d9b778 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -33,13 +33,14 @@ #include #include - namespace CurrentMetrics { extern const Metric PendingAsyncInsert; extern const Metric AsynchronousInsertThreads; extern const Metric AsynchronousInsertThreadsActive; extern const Metric AsynchronousInsertThreadsScheduled; + extern const Metric AsynchronousInsertQueueSize; + extern const Metric AsynchronousInsertQueueBytes; } namespace ProfileEvents @@ -60,6 +61,7 @@ namespace ErrorCodes extern const int UNKNOWN_FORMAT; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int INVALID_SETTING_VALUE; } static const NameSet settings_to_skip @@ -171,16 +173,41 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep } } +AsynchronousInsertQueue::QueueShardFlushTimeHistory::TimePoints +AsynchronousInsertQueue::QueueShardFlushTimeHistory::getRecentTimePoints() const +{ + std::shared_lock lock(mutex); + return time_points; +} + +void AsynchronousInsertQueue::QueueShardFlushTimeHistory::updateWithCurrentTime() +{ + std::unique_lock lock(mutex); + time_points.first = time_points.second; + time_points.second = std::chrono::steady_clock::now(); +} + AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_) : WithContext(context_) , pool_size(pool_size_) , flush_on_shutdown(flush_on_shutdown_) , queue_shards(pool_size) - , pool(CurrentMetrics::AsynchronousInsertThreads, CurrentMetrics::AsynchronousInsertThreadsActive, CurrentMetrics::AsynchronousInsertThreadsScheduled, pool_size) + , flush_time_history_per_queue_shard(pool_size) + , pool( + CurrentMetrics::AsynchronousInsertThreads, + CurrentMetrics::AsynchronousInsertThreadsActive, + CurrentMetrics::AsynchronousInsertThreadsScheduled, + pool_size) { if (!pool_size) throw Exception(ErrorCodes::BAD_ARGUMENTS, "pool_size cannot be zero"); + const auto & settings = getContext()->getSettingsRef(); + + for (size_t i = 0; i < pool_size; ++i) + queue_shards[i].busy_timeout_ms + = std::min(Milliseconds(settings.async_insert_busy_timeout_min_ms), Milliseconds(settings.async_insert_busy_timeout_max_ms)); + for (size_t i = 0; i < pool_size; ++i) dump_by_first_update_threads.emplace_back([this, i] { processBatchDeadlines(i); }); } @@ -201,7 +228,7 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() if (flush_on_shutdown) { for (auto & [_, elem] : shard.queue) - scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext()); + scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext(), i); } else { @@ -217,14 +244,14 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() LOG_TRACE(log, "Asynchronous insertion queue finished"); } -void AsynchronousInsertQueue::scheduleDataProcessingJob(const InsertQuery & key, InsertDataPtr data, ContextPtr global_context) +void AsynchronousInsertQueue::scheduleDataProcessingJob( + const InsertQuery & key, InsertDataPtr data, ContextPtr global_context, size_t shard_num) { /// Wrap 'unique_ptr' with 'shared_ptr' to make this /// lambda copyable and allow to save it to the thread pool. - pool.scheduleOrThrowOnError([key, global_context, my_data = std::make_shared(std::move(data))]() mutable - { - processData(key, std::move(*my_data), std::move(global_context)); - }); + pool.scheduleOrThrowOnError( + [this, key, global_context, shard_num, my_data = std::make_shared(std::move(data))]() mutable + { processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); }); } void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context) @@ -300,6 +327,7 @@ AsynchronousInsertQueue::PushResult AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr query_context) { const auto & settings = query_context->getSettingsRef(); + validateSettings(settings, log); auto & insert_query = query->as(); auto data_kind = chunk.getDataKind(); @@ -319,23 +347,22 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr auto shard_num = key.hash % pool_size; auto & shard = queue_shards[shard_num]; - + const auto flush_time_points = flush_time_history_per_queue_shard[shard_num].getRecentTimePoints(); { std::lock_guard lock(shard.mutex); auto [it, inserted] = shard.iterators.try_emplace(key.hash); + auto now = std::chrono::steady_clock::now(); + auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, shard_num, flush_time_points, now); if (inserted) - { - auto now = std::chrono::steady_clock::now(); - auto timeout = now + Milliseconds{key.settings.async_insert_busy_timeout_ms}; - it->second = shard.queue.emplace(timeout, Container{key, std::make_unique()}).first; - } + it->second = shard.queue.emplace(now + timeout_ms, Container{key, std::make_unique(timeout_ms)}).first; auto queue_it = it->second; auto & data = queue_it->second.data; size_t entry_data_size = entry->chunk.byteSize(); assert(data); + auto size_in_bytes = data->size_in_bytes; data->size_in_bytes += entry_data_size; data->entries.emplace_back(entry); insert_future = entry->getFuture(); @@ -346,23 +373,50 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr bool has_enough_bytes = data->size_in_bytes >= key.settings.async_insert_max_data_size; bool has_enough_queries = data->entries.size() >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate; - /// Here we check whether we hit the limit on maximum data size in the buffer. - /// And use setting from query context. - /// It works, because queries with the same set of settings are already grouped together. - if (!flush_stopped && (has_enough_bytes || has_enough_queries)) + auto max_busy_timeout_exceeded = [&shard, &settings, &now, &flush_time_points]() -> bool + { + if (!settings.async_insert_use_adaptive_busy_timeout || !shard.last_insert_time || !flush_time_points.first) + return false; + + auto max_ms = Milliseconds(settings.async_insert_busy_timeout_max_ms); + return *shard.last_insert_time + max_ms < now && *flush_time_points.first + max_ms < *flush_time_points.second; + }; + + /// Here we check whether we have hit the limit on the maximum data size in the buffer or + /// if the elapsed time since the last insert exceeds the maximum busy wait timeout. + /// We also use the limit settings from the query context. + /// This works because queries with the same set of settings are already grouped together. + if (!flush_stopped && (has_enough_bytes || has_enough_queries || max_busy_timeout_exceeded())) { + data->timeout_ms = Milliseconds::zero(); data_to_process = std::move(data); shard.iterators.erase(it); shard.queue.erase(queue_it); } + shard.last_insert_time = now; + shard.busy_timeout_ms = timeout_ms; + CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert); ProfileEvents::increment(ProfileEvents::AsyncInsertQuery); ProfileEvents::increment(ProfileEvents::AsyncInsertBytes, entry_data_size); + + if (data_to_process) + { + if (!inserted) + CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueSize); + CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueBytes, size_in_bytes); + } + else + { + if (inserted) + CurrentMetrics::add(CurrentMetrics::AsynchronousInsertQueueSize); + CurrentMetrics::add(CurrentMetrics::AsynchronousInsertQueueBytes, entry_data_size); + } } if (data_to_process) - scheduleDataProcessingJob(key, std::move(data_to_process), getContext()); + scheduleDataProcessingJob(key, std::move(data_to_process), getContext(), shard_num); else shard.are_tasks_available.notify_one(); @@ -374,6 +428,98 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr }; } +AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeoutMs( + const Settings & settings, + const QueueShard & shard, + size_t shard_num, + const QueueShardFlushTimeHistory::TimePoints & flush_time_points, + std::chrono::steady_clock::time_point now) const +{ + if (!settings.async_insert_use_adaptive_busy_timeout) + return settings.async_insert_busy_timeout_max_ms; + + const auto max_ms = Milliseconds(settings.async_insert_busy_timeout_max_ms); + const auto min_ms = std::min(std::max(Milliseconds(settings.async_insert_busy_timeout_min_ms), Milliseconds(1)), max_ms); + + auto normalize = [&min_ms, &max_ms](const auto & t_ms) { return std::min(std::max(t_ms, min_ms), max_ms); }; + + if (!shard.last_insert_time || !flush_time_points.first) + return normalize(shard.busy_timeout_ms); + + const auto & last_insert_time = *shard.last_insert_time; + const auto & [t1, t2] = std::tie(*flush_time_points.first, *flush_time_points.second); + const double increase_rate = settings.async_insert_busy_timeout_increase_rate; + const double decrease_rate = settings.async_insert_busy_timeout_decrease_rate; + + const auto decreased_timeout_ms = std::min( + std::chrono::duration_cast(shard.busy_timeout_ms / (1.0 + decrease_rate)), shard.busy_timeout_ms - Milliseconds(1)); + + /// Increase the timeout for frequent inserts. + if (last_insert_time + min_ms > now) + { + auto timeout_ms = std::max( + std::chrono::duration_cast(shard.busy_timeout_ms * (1.0 + increase_rate)), + shard.busy_timeout_ms + Milliseconds(1)); + if (timeout_ms != shard.busy_timeout_ms) + LOG_TRACE( + log, + "Async timeout increased from {} to {} for queue shard {}.", + shard.busy_timeout_ms.count(), + timeout_ms.count(), + shard_num); + + return normalize(timeout_ms); + } + /// Decrease the timeout if inserts are not frequent, + /// that is, if the time since the last insert and the difference between the last two queue flushes were both + /// long enough (exceeding the adjusted timeout). + /// This ensures the timeout value converges to the minimum over time for non-frequent inserts. + else if (last_insert_time + decreased_timeout_ms < now && t1 + decreased_timeout_ms < t2) + { + auto timeout_ms = decreased_timeout_ms; + if (timeout_ms != shard.busy_timeout_ms) + LOG_TRACE( + log, + "Async timeout decreased from {} to {} for queue shard {}.", + shard.busy_timeout_ms.count(), + timeout_ms.count(), + shard_num); + + return normalize(timeout_ms); + } + + return normalize(shard.busy_timeout_ms); +} + +void AsynchronousInsertQueue::validateSettings(const Settings & settings, LoggerPtr log) +{ + const auto max_ms = std::chrono::milliseconds(settings.async_insert_busy_timeout_max_ms); + + if (max_ms == std::chrono::milliseconds::zero()) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting 'async_insert_busy_timeout_max_ms' can't be zero"); + + if (!settings.async_insert_use_adaptive_busy_timeout) + return; + + /// Adaptive timeout settings. + const auto min_ms = std::chrono::milliseconds(settings.async_insert_busy_timeout_min_ms); + + if (min_ms > max_ms) + if (log) + LOG_WARNING( + log, + "Setting 'async_insert_busy_timeout_min_ms'={} is greater than 'async_insert_busy_timeout_max_ms'={}. Ignoring " + "'async_insert_busy_timeout_min_ms'", + min_ms.count(), + max_ms.count()); + + if (settings.async_insert_busy_timeout_increase_rate <= 0) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting 'async_insert_busy_timeout_increase_rate' must be greater than zero"); + + if (settings.async_insert_busy_timeout_decrease_rate <= 0) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting 'async_insert_busy_timeout_decrease_rate' must be greater than zero"); +} + void AsynchronousInsertQueue::flushAll() { std::lock_guard flush_lock(flush_mutex); @@ -395,14 +541,15 @@ void AsynchronousInsertQueue::flushAll() size_t total_bytes = 0; size_t total_entries = 0; - for (auto & queue : queues_to_flush) + for (size_t i = 0; i < pool_size; ++i) { + auto & queue = queues_to_flush[i]; total_queries += queue.size(); for (auto & [_, entry] : queue) { total_bytes += entry.data->size_in_bytes; total_entries += entry.data->entries.size(); - scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext()); + scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext(), i); } } @@ -429,17 +576,21 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num) { std::unique_lock lock(shard.mutex); - shard.are_tasks_available.wait_for(lock, - Milliseconds(getContext()->getSettingsRef().async_insert_busy_timeout_ms), [&shard, this] - { - if (shutdown) - return true; + const auto rel_time + = std::min(shard.busy_timeout_ms, Milliseconds(getContext()->getSettingsRef().async_insert_poll_timeout_ms)); + shard.are_tasks_available.wait_for( + lock, + rel_time, + [&shard, this] + { + if (shutdown) + return true; - if (!shard.queue.empty() && shard.queue.begin()->first < std::chrono::steady_clock::now()) - return true; + if (!shard.queue.empty() && shard.queue.begin()->first < std::chrono::steady_clock::now()) + return true; - return false; - }); + return false; + }); if (shutdown) return; @@ -449,21 +600,30 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num) const auto now = std::chrono::steady_clock::now(); + size_t size_in_bytes = 0; while (true) { if (shard.queue.empty() || shard.queue.begin()->first > now) break; auto it = shard.queue.begin(); + size_in_bytes += it->second.data->size_in_bytes; + shard.iterators.erase(it->second.key.hash); entries_to_flush.emplace_back(std::move(it->second)); shard.queue.erase(it); } + + if (!entries_to_flush.empty()) + { + CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueSize, entries_to_flush.size()); + CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueBytes, size_in_bytes); + } } for (auto & entry : entries_to_flush) - scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext()); + scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext(), shard_num); } } @@ -507,7 +667,8 @@ String serializeQuery(const IAST & query, size_t max_length) } // static -void AsynchronousInsertQueue::processData(InsertQuery key, InsertDataPtr data, ContextPtr global_context) +void AsynchronousInsertQueue::processData( + InsertQuery key, InsertDataPtr data, ContextPtr global_context, QueueShardFlushTimeHistory & queue_shard_flush_time_history) try { if (!data) @@ -515,7 +676,7 @@ try SCOPE_EXIT(CurrentMetrics::sub(CurrentMetrics::PendingAsyncInsert, data->entries.size())); - const auto * log = &Poco::Logger::get("AsynchronousInsertQueue"); + const auto log = getLogger("AsynchronousInsertQueue"); const auto & insert_query = assert_cast(*key.query); auto insert_context = Context::createCopy(global_context); @@ -613,9 +774,12 @@ try throw; } - auto add_entry_to_log = [&]( - const auto & entry, const auto & entry_query_for_logging, - const auto & exception, size_t num_rows, size_t num_bytes) + auto add_entry_to_log = [&](const auto & entry, + const auto & entry_query_for_logging, + const auto & exception, + size_t num_rows, + size_t num_bytes, + Milliseconds timeout_ms) { if (!async_insert_log) return; @@ -632,6 +796,7 @@ try elem.rows = num_rows; elem.exception = exception; elem.data_kind = entry->chunk.getDataKind(); + elem.timeout_milliseconds = timeout_ms.count(); /// If there was a parsing error, /// the entry won't be flushed anyway, @@ -666,9 +831,9 @@ try auto header = pipeline.getHeader(); if (key.data_kind == DataKind::Parsed) - chunk = processEntriesWithParsing(key, data->entries, header, insert_context, log, add_entry_to_log); + chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_log); else - chunk = processPreprocessedEntries(key, data->entries, header, insert_context, add_entry_to_log); + chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_log); ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows()); @@ -691,6 +856,8 @@ try LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str); + queue_shard_flush_time_history.updateWithCurrentTime(); + bool pulling_pipeline = false; logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal); } @@ -729,10 +896,10 @@ catch (...) template Chunk AsynchronousInsertQueue::processEntriesWithParsing( const InsertQuery & key, - const std::list & entries, + const InsertDataPtr & data, const Block & header, const ContextPtr & insert_context, - const Poco::Logger * logger, + const LoggerPtr logger, LogFunc && add_to_async_insert_log) { size_t total_rows = 0; @@ -770,7 +937,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( auto chunk_info = std::make_shared(); auto query_for_logging = serializeQuery(*key.query, insert_context->getSettingsRef().log_queries_cut_to_length); - for (const auto & entry : entries) + for (const auto & entry : data->entries) { current_entry = entry; @@ -786,7 +953,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( chunk_info->offsets.push_back(total_rows); chunk_info->tokens.push_back(entry->async_dedup_token); - add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes); + add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes, data->timeout_ms); current_exception.clear(); } @@ -798,7 +965,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( template Chunk AsynchronousInsertQueue::processPreprocessedEntries( const InsertQuery & key, - const std::list & entries, + const InsertDataPtr & data, const Block & header, const ContextPtr & insert_context, LogFunc && add_to_async_insert_log) @@ -821,7 +988,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( return it->second; }; - for (const auto & entry : entries) + for (const auto & entry : data->entries) { const auto * block = entry->chunk.asBlock(); if (!block) @@ -837,7 +1004,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( chunk_info->tokens.push_back(entry->async_dedup_token); const auto & query_for_logging = get_query_by_format(entry->format); - add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes()); + add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes(), data->timeout_ms); } Chunk chunk(std::move(result_columns), total_rows); diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 99394d0fb14d..c2c4755f192c 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -10,6 +10,7 @@ #include #include +#include #include namespace DB @@ -53,6 +54,8 @@ class AsynchronousInsertQueue : public WithContext Preprocessed = 1, }; + static void validateSettings(const Settings & settings, LoggerPtr log); + /// Force flush the whole queue. void flushAll(); @@ -146,6 +149,9 @@ class AsynchronousInsertQueue : public WithContext std::atomic_bool finished = false; }; + InsertData() = default; + explicit InsertData(Milliseconds timeout_ms_) : timeout_ms(timeout_ms_) { } + ~InsertData() { auto it = entries.begin(); @@ -163,6 +169,7 @@ class AsynchronousInsertQueue : public WithContext std::list entries; size_t size_in_bytes = 0; + Milliseconds timeout_ms = Milliseconds::zero(); }; using InsertDataPtr = std::unique_ptr; @@ -180,6 +187,8 @@ class AsynchronousInsertQueue : public WithContext using QueueIterator = Queue::iterator; using QueueIteratorByKey = std::unordered_map; + using OptionalTimePoint = std::optional; + struct QueueShard { mutable std::mutex mutex; @@ -187,12 +196,30 @@ class AsynchronousInsertQueue : public WithContext Queue queue; QueueIteratorByKey iterators; + + OptionalTimePoint last_insert_time; + std::chrono::milliseconds busy_timeout_ms; + }; + + /// Times of the two most recent queue flushes. + /// Used to calculate adaptive timeout. + struct QueueShardFlushTimeHistory + { + public: + using TimePoints = std::pair; + TimePoints getRecentTimePoints() const; + void updateWithCurrentTime(); + + private: + mutable std::shared_mutex mutex; + TimePoints time_points; }; const size_t pool_size; const bool flush_on_shutdown; std::vector queue_shards; + std::vector flush_time_history_per_queue_shard; /// Logic and events behind queue are as follows: /// - async_insert_busy_timeout_ms: @@ -214,29 +241,38 @@ class AsynchronousInsertQueue : public WithContext /// Uses async_insert_busy_timeout_ms and processBatchDeadlines() std::vector dump_by_first_update_threads; - Poco::Logger * log = &Poco::Logger::get("AsynchronousInsertQueue"); + LoggerPtr log = getLogger("AsynchronousInsertQueue"); PushResult pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr query_context); + + Milliseconds getBusyWaitTimeoutMs( + const Settings & settings, + const QueueShard & shard, + size_t shard_num, + const QueueShardFlushTimeHistory::TimePoints & flush_time_points, + std::chrono::steady_clock::time_point now) const; + void preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context); void processBatchDeadlines(size_t shard_num); - void scheduleDataProcessingJob(const InsertQuery & key, InsertDataPtr data, ContextPtr global_context); + void scheduleDataProcessingJob(const InsertQuery & key, InsertDataPtr data, ContextPtr global_context, size_t shard_num); - static void processData(InsertQuery key, InsertDataPtr data, ContextPtr global_context); + static void processData( + InsertQuery key, InsertDataPtr data, ContextPtr global_context, QueueShardFlushTimeHistory & queue_shard_flush_time_history); template static Chunk processEntriesWithParsing( const InsertQuery & key, - const std::list & entries, + const InsertDataPtr & data, const Block & header, const ContextPtr & insert_context, - const Poco::Logger * logger, + const LoggerPtr logger, LogFunc && add_to_async_insert_log); template static Chunk processPreprocessedEntries( const InsertQuery & key, - const std::list & entries, + const InsertDataPtr & data, const Block & header, const ContextPtr & insert_context, LogFunc && add_to_async_insert_log); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 51f725e1699b..d242544f787b 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -85,7 +85,7 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s , boundary_alignment(settings.boundary_alignment) , load_metadata_threads(settings.load_metadata_threads) , write_cache_per_user_directory(settings.write_cache_per_user_id_directory) - , log(&Poco::Logger::get("FileCache(" + cache_name + ")")) + , log(getLogger("FileCache(" + cache_name + ")")) , metadata(settings.base_path, settings.background_download_queue_size_limit, settings.background_download_threads, write_cache_per_user_directory) { if (settings.cache_policy == "LRU") @@ -972,7 +972,7 @@ void FileCache::loadMetadataImpl() std::mutex set_exception_mutex; std::atomic stop_loading = false; - LOG_INFO(log, "Loading filesystem cache with {} threads", load_metadata_threads); + LOG_INFO(log, "Loading filesystem cache with {} threads from {}", load_metadata_threads, metadata.getBaseDirectory()); for (size_t i = 0; i < load_metadata_threads; ++i) { diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 64e03b739680..2de2f347999e 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -193,7 +193,7 @@ class FileCache : private boost::noncopyable size_t load_metadata_threads; const bool write_cache_per_user_directory; - Poco::Logger * log; + LoggerPtr log; std::exception_ptr init_exception; std::atomic is_initialized = false; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 8e5d1fd757fc..8bd894659173 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -65,9 +65,9 @@ FileSegment::FileSegment( , queue_iterator(queue_iterator_) , cache(cache_) #ifdef ABORT_ON_LOGICAL_ERROR - , log(&Poco::Logger::get(fmt::format("FileSegment({}) : {}", key_.toString(), range().toString()))) + , log(getLogger(fmt::format("FileSegment({}) : {}", key_.toString(), range().toString()))) #else - , log(&Poco::Logger::get("FileSegment")) + , log(getLogger("FileSegment")) #endif { /// On creation, file segment state can be EMPTY, DOWNLOADED, DOWNLOADING. @@ -339,30 +339,31 @@ void FileSegment::write(const char * from, size_t size, size_t offset) if (!size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing zero size is not allowed"); - const auto file_segment_path = getPath(); - { auto lock = lockFileSegment(); - assertIsDownloaderUnlocked("write", lock); assertNotDetachedUnlocked(lock); + } + + const auto file_segment_path = getPath(); + { if (download_state != State::DOWNLOADING) throw Exception( ErrorCodes::LOGICAL_ERROR, "Expected DOWNLOADING state, got {}", stateToString(download_state)); - size_t first_non_downloaded_offset = getCurrentWriteOffset(); + const size_t first_non_downloaded_offset = getCurrentWriteOffset(); if (offset != first_non_downloaded_offset) throw Exception( ErrorCodes::LOGICAL_ERROR, "Attempt to write {} bytes to offset: {}, but current write offset is {}", size, offset, first_non_downloaded_offset); - size_t current_downloaded_size = getDownloadedSize(); + const size_t current_downloaded_size = getDownloadedSize(); chassert(reserved_size >= current_downloaded_size); - size_t free_reserved_size = reserved_size - current_downloaded_size; + const size_t free_reserved_size = reserved_size - current_downloaded_size; if (free_reserved_size < size) throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -371,20 +372,24 @@ void FileSegment::write(const char * from, size_t size, size_t offset) if (!is_unbound && current_downloaded_size == range().size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "File segment is already fully downloaded"); - if (!cache_writer) - { - if (current_downloaded_size > 0) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Cache writer was finalized (downloaded size: {}, state: {})", - current_downloaded_size, stateToString(download_state)); - - cache_writer = std::make_unique(file_segment_path); - } + if (!cache_writer && current_downloaded_size > 0) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cache writer was finalized (downloaded size: {}, state: {})", + current_downloaded_size, stateToString(download_state)); } try { + if (!cache_writer) + cache_writer = std::make_unique(file_segment_path); + +#ifdef ABORT_ON_LOGICAL_ERROR + /// This mutex is only needed to have a valid assertion in assertCacheCorrectness(), + /// which is only executed in debug/sanitizer builds (under ABORT_ON_LOGICAL_ERROR). + std::lock_guard lock(write_mutex); +#endif + cache_writer->write(from, size); cache_writer->next(); @@ -393,21 +398,30 @@ void FileSegment::write(const char * from, size_t size, size_t offset) } catch (ErrnoException & e) { + const int code = e.getErrno(); + const bool is_no_space_left_error = code == /* No space left on device */28 || code == /* Quota exceeded */122; + auto lock = lockFileSegment(); + e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lock))); + setDownloadFailedUnlocked(lock); - int code = e.getErrno(); - if (code == /* No space left on device */28 || code == /* Quota exceeded */122) + if (downloaded_size == 0 && fs::exists(file_segment_path)) + { + fs::remove(file_segment_path); + } + else if (is_no_space_left_error) { const auto file_size = fs::file_size(file_segment_path); + chassert(downloaded_size <= file_size); chassert(reserved_size >= file_size); chassert(file_size <= range().size()); + if (downloaded_size != file_size) downloaded_size = file_size; } - setDownloadFailedUnlocked(lock); throw; } @@ -781,21 +795,42 @@ bool FileSegment::assertCorrectness() const return assertCorrectnessUnlocked(lockFileSegment()); } -bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) const +bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock) const { - auto check_iterator = [this](const Priority::IteratorPtr & it) + auto throw_logical = [&](const std::string & error) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "{}. File segment info: {}", error, getInfoForLogUnlocked(lock)); + }; + + auto check_iterator = [&](const Priority::IteratorPtr & it) { UNUSED(this); if (!it) return; const auto & entry = it->getEntry(); - UNUSED(entry); - chassert(entry->size == reserved_size); + if (download_state != State::DOWNLOADING && entry->size != reserved_size) + throw_logical(fmt::format("Expected entry.size == reserved_size ({} == {})", entry->size, reserved_size)); + chassert(entry->key == key()); chassert(entry->offset == offset()); }; + const auto file_path = getPath(); + if (segment_kind != FileSegmentKind::Temporary) + { + std::lock_guard lk(write_mutex); + if (downloaded_size == 0) + { + if (download_state != State::DOWNLOADING && fs::exists(file_path)) + throw_logical("Expected file " + file_path + " not to exist"); + } + else if (!fs::exists(file_path)) + { + throw_logical("Expected file " + file_path + " to exist"); + } + } + if (download_state == State::DOWNLOADED) { chassert(downloader_id.empty()); diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index cb718bcdd2ed..ea97a6b01571 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -262,6 +262,7 @@ friend class FileCache; /// Because of reserved_size in tryReserve(). /// downloaded_size should always be less or equal to reserved_size std::atomic downloaded_size = 0; std::atomic reserved_size = 0; + mutable std::mutex write_mutex; mutable FileSegmentGuard segment_guard; std::weak_ptr key_metadata; @@ -269,7 +270,7 @@ friend class FileCache; /// Because of reserved_size in tryReserve(). FileCache * cache; std::condition_variable cv; - Poco::Logger * log; + LoggerPtr log; std::atomic hits_count = 0; /// cache hits. std::atomic ref_count = 0; /// Used for getting snapshot state diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 998b11723d81..a74a4b8b621a 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -71,7 +71,7 @@ class LRUFileCachePriority final : public IFileCachePriority friend class SLRUFileCachePriority; LRUQueue queue; - Poco::Logger * log = &Poco::Logger::get("LRUFileCachePriority"); + LoggerPtr log = getLogger("LRUFileCachePriority"); StatePtr state; void updateElementsCount(int64_t num); diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index aafa94f04be3..727f2762ccaa 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -153,7 +153,7 @@ std::string KeyMetadata::getFileSegmentPath(const FileSegment & file_segment) co return cache_metadata->getFileSegmentPath(key, file_segment.offset(), file_segment.getKind(), user); } -Poco::Logger * KeyMetadata::logger() const +LoggerPtr KeyMetadata::logger() const { return cache_metadata->log; } @@ -167,7 +167,7 @@ CacheMetadata::CacheMetadata( , cleanup_queue(std::make_shared()) , download_queue(std::make_shared(background_download_queue_size_limit_)) , write_cache_per_user_directory(write_cache_per_user_directory_) - , log(&Poco::Logger::get("CacheMetadata")) + , log(getLogger("CacheMetadata")) , download_threads_num(background_download_threads_) { } @@ -924,7 +924,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( { auto file_segment = it->second->file_segment; - LOG_DEBUG( + LOG_TEST( key_metadata->logger(), "Remove from cache. Key: {}, offset: {}, size: {}", getKey(), file_segment->offset(), file_segment->reserved_size); @@ -938,8 +938,18 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( try { const auto path = key_metadata->getFileSegmentPath(*file_segment); - bool exists = fs::exists(path); - if (exists) + if (file_segment->segment_kind == FileSegmentKind::Temporary) + { + /// FIXME: For temporary file segment the requirement is not as strong because + /// the implementation of "temporary data in cache" creates files in advance. + if (fs::exists(path)) + fs::remove(path); + } + else if (file_segment->downloaded_size == 0) + { + chassert(!fs::exists(path)); + } + else if (fs::exists(path)) { fs::remove(path); @@ -952,7 +962,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( LOG_TEST(key_metadata->logger(), "Removed file segment at path: {}", path); } - else if (file_segment->downloaded_size && !can_be_broken) + else if (!can_be_broken) { #ifdef ABORT_ON_LOGICAL_ERROR throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 3003ad74e186..c02127cdef30 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -99,7 +99,7 @@ struct KeyMetadata : private std::map, std::atomic created_base_directory = false; LockedKeyPtr lockNoStateCheck(); - Poco::Logger * logger() const; + LoggerPtr logger() const; bool addToDownloadQueue(FileSegmentPtr file_segment); void addToCleanupQueue(); }; @@ -177,7 +177,7 @@ class CacheMetadata : private boost::noncopyable const DownloadQueuePtr download_queue; const bool write_cache_per_user_directory; - Poco::Logger * log; + LoggerPtr log; mutable std::shared_mutex key_prefix_directory_mutex; struct MetadataBucket : public std::unordered_map diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 2dd4887dd20a..c574f3085e30 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -156,7 +156,7 @@ class QueryCache Cache::MappedPtr query_result TSA_GUARDED_BY(mutex) = std::make_shared(); std::atomic skip_insert = false; bool was_finalized = false; - Poco::Logger * logger = &Poco::Logger::get("QueryCache"); + LoggerPtr logger = getLogger("QueryCache"); Writer(Cache & cache_, const Key & key_, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_, @@ -183,7 +183,7 @@ class QueryCache std::unique_ptr source_from_chunks; std::unique_ptr source_from_chunks_totals; std::unique_ptr source_from_chunks_extremes; - Poco::Logger * logger = &Poco::Logger::get("QueryCache"); + LoggerPtr logger = getLogger("QueryCache"); friend class QueryCache; /// for createReader() }; diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index e949979ae611..d97fa80a6c78 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -58,7 +58,7 @@ class SLRUFileCachePriority : public IFileCachePriority double size_ratio; LRUFileCachePriority protected_queue; LRUFileCachePriority probationary_queue; - Poco::Logger * log = &Poco::Logger::get("SLRUFileCachePriority"); + LoggerPtr log = getLogger("SLRUFileCachePriority"); void increasePriority(SLRUIterator & iterator, const CacheGuard::Lock & lock); }; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index bf5b8712fb73..7cd4e2d6e8d8 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -58,6 +58,9 @@ void WriteBufferToFileSegment::nextImpl() reserve_stat_msg += fmt::format("{} hold {}, can release {}; ", toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size)); + if (std::filesystem::exists(file_segment->getPath())) + std::filesystem::remove(file_segment->getPath()); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: {}(segment info: {})", bytes_to_write, file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache", @@ -74,7 +77,7 @@ void WriteBufferToFileSegment::nextImpl() } catch (...) { - LOG_WARNING(&Poco::Logger::get("WriteBufferToFileSegment"), "Failed to write to the underlying buffer ({})", file_segment->getInfoForLog()); + LOG_WARNING(getLogger("WriteBufferToFileSegment"), "Failed to write to the underlying buffer ({})", file_segment->getInfoForLog()); throw; } diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp index d0b00056cb4e..52b74597c4b4 100644 --- a/src/Interpreters/ClusterDiscovery.cpp +++ b/src/Interpreters/ClusterDiscovery.cpp @@ -116,7 +116,7 @@ ClusterDiscovery::ClusterDiscovery( const String & config_prefix) : context(Context::createCopy(context_)) , current_node_name(toString(ServerUUID::get())) - , log(&Poco::Logger::get("ClusterDiscovery")) + , log(getLogger("ClusterDiscovery")) { LOG_DEBUG(log, "Cluster discovery is enabled"); @@ -553,7 +553,7 @@ bool ClusterDiscovery::NodeInfo::parse(const String & data, NodeInfo & result) else { LOG_ERROR( - &Poco::Logger::get("ClusterDiscovery"), + getLogger("ClusterDiscovery"), "Unsupported version '{}' of data in zk node '{}'", ver, data.size() < 1024 ? data : "[data too long]"); } @@ -561,7 +561,7 @@ bool ClusterDiscovery::NodeInfo::parse(const String & data, NodeInfo & result) catch (Poco::Exception & e) { LOG_WARNING( - &Poco::Logger::get("ClusterDiscovery"), + getLogger("ClusterDiscovery"), "Can't parse '{}' from node: {}", data.size() < 1024 ? data : "[data too long]", e.displayText()); return false; diff --git a/src/Interpreters/ClusterDiscovery.h b/src/Interpreters/ClusterDiscovery.h index 8083fb6db413..756ed3d8d9be 100644 --- a/src/Interpreters/ClusterDiscovery.h +++ b/src/Interpreters/ClusterDiscovery.h @@ -152,7 +152,7 @@ class ClusterDiscovery bool is_initialized = false; ThreadFromGlobalPool main_thread; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index f8a070a6fde1..f0592735cafa 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -175,7 +175,7 @@ void SelectStreamFactory::createForShard( ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable); if (shard_info.hasRemoteConnections()) { - LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), + LOG_WARNING(getLogger("ClusterProxy::SelectStreamFactory"), "There is no table {} on local replica of shard {}, will try remote replicas.", main_table.getNameForLogs(), shard_info.shard_num); emplace_remote_stream(); @@ -213,7 +213,7 @@ void SelectStreamFactory::createForShard( /// If we reached this point, local replica is stale. ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica); - LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), "Local replica of shard {} is stale (delay: {}s.)", shard_info.shard_num, local_delay); + LOG_WARNING(getLogger("ClusterProxy::SelectStreamFactory"), "Local replica of shard {} is stale (delay: {}s.)", shard_info.shard_num, local_delay); if (!settings.fallback_to_stale_replicas_for_distributed_queries) { diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 5865e669e47b..35451e1d774c 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -42,7 +42,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, const Settings & settings, const StorageID & main_table, ASTPtr additional_filter_ast, - Poco::Logger * log, + LoggerPtr log, const DistributedSettings * distributed_settings) { Settings new_settings = settings; @@ -202,7 +202,7 @@ void executeQuery( const StorageID & main_table, const ASTPtr & table_func_ptr, SelectStreamFactory & stream_factory, - Poco::Logger * log, + LoggerPtr log, const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info, @@ -347,14 +347,14 @@ void executeQueryWithParallelReplicas( if (settings.use_hedged_requests.changed) { LOG_WARNING( - &Poco::Logger::get("executeQueryWithParallelReplicas"), + getLogger("executeQueryWithParallelReplicas"), "Setting 'use_hedged_requests' explicitly with enabled 'allow_experimental_parallel_reading_from_replicas' has no effect. " "Hedged connections are not used for parallel reading from replicas"); } else { LOG_INFO( - &Poco::Logger::get("executeQueryWithParallelReplicas"), + getLogger("executeQueryWithParallelReplicas"), "Disabling 'use_hedged_requests' in favor of 'allow_experimental_parallel_reading_from_replicas'. Hedged connections are " "not used for parallel reading from replicas"); } @@ -390,7 +390,7 @@ void executeQueryWithParallelReplicas( chassert(shard_count == not_optimized_cluster->getShardsAddresses().size()); - LOG_DEBUG(&Poco::Logger::get("executeQueryWithParallelReplicas"), "Parallel replicas query in shard scope: shard_num={} cluster={}", + LOG_DEBUG(getLogger("executeQueryWithParallelReplicas"), "Parallel replicas query in shard scope: shard_num={} cluster={}", shard_num, not_optimized_cluster->getName()); // get cluster for shard specified by shard_num @@ -417,7 +417,7 @@ void executeQueryWithParallelReplicas( getThrottler(new_context), std::move(scalars), std::move(external_tables), - &Poco::Logger::get("ReadFromParallelRemoteReplicasStep"), + getLogger("ReadFromParallelRemoteReplicasStep"), std::move(storage_limits)); query_plan.addStep(std::move(read_from_remote)); diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index a19ece0bbdcc..bbc3c6c9e49c 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -43,7 +43,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, const Settings & settings, const StorageID & main_table, ASTPtr additional_filter_ast = nullptr, - Poco::Logger * log = nullptr, + LoggerPtr log = nullptr, const DistributedSettings * distributed_settings = nullptr); using AdditionalShardFilterGenerator = std::function; @@ -57,7 +57,7 @@ void executeQuery( const StorageID & main_table, const ASTPtr & table_func_ptr, SelectStreamFactory & stream_factory, - Poco::Logger * log, + LoggerPtr log, const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info, diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 38a033bee1df..0e5897e7306b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -197,7 +198,7 @@ namespace ErrorCodes */ struct ContextSharedPart : boost::noncopyable { - Poco::Logger * log = &Poco::Logger::get("Context"); + LoggerPtr log = getLogger("Context"); /// For access of most of shared objects. mutable ContextSharedMutex mutex; @@ -1010,7 +1011,7 @@ void Context::setFilesystemCacheUser(const String & user) shared->filesystem_cache_user = user; } -static void setupTmpPath(Poco::Logger * log, const std::string & path) +static void setupTmpPath(LoggerPtr log, const std::string & path) try { LOG_DEBUG(log, "Setting up {} to store temporary data in it", path); @@ -4276,11 +4277,11 @@ void Context::setApplicationType(ApplicationType type) /// Lock isn't required, you should set it at start shared->application_type = type; - if (type == ApplicationType::SERVER) - { + if (type == ApplicationType::LOCAL || type == ApplicationType::SERVER) shared->server_settings.loadSettingsFromConfig(Poco::Util::Application::instance().config()); + + if (type == ApplicationType::SERVER) shared->configureServerWideThrottling(); - } } void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & config) @@ -4291,7 +4292,7 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi shared->system_profile_name = config.getString("system_profile", shared->default_profile_name); setCurrentProfile(shared->system_profile_name); - applySettingsQuirks(settings, &Poco::Logger::get("SettingsQuirks")); + applySettingsQuirks(settings, getLogger("SettingsQuirks")); shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name); buffer_context = Context::createCopy(shared_from_this()); @@ -4861,10 +4862,10 @@ AsynchronousInsertQueue * Context::getAsynchronousInsertQueue() const void Context::setAsynchronousInsertQueue(const std::shared_ptr & ptr) { - using namespace std::chrono; + AsynchronousInsertQueue::validateSettings(settings, getLogger("Context")); - if (std::chrono::milliseconds(settings.async_insert_busy_timeout_ms) == 0ms) - throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting async_insert_busy_timeout_ms can't be zero"); + if (std::chrono::milliseconds(settings.async_insert_poll_timeout_ms) == std::chrono::milliseconds::zero()) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting async_insert_poll_timeout_ms can't be zero"); shared->async_insert_queue = ptr; } diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index 005450c2a2c8..42af164f4ad3 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -249,7 +249,7 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da ASTPtr on_expr = makeOnExpression(expr_it->second); if (rewritten = joined.rewriteCrossToInner(on_expr); rewritten) { - LOG_DEBUG(&Poco::Logger::get("CrossToInnerJoin"), "Rewritten '{}' to '{}'", query_before, queryToString(*joined.tableJoin())); + LOG_DEBUG(getLogger("CrossToInnerJoin"), "Rewritten '{}' to '{}'", query_before, queryToString(*joined.tableJoin())); } } diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index d418be51cc59..90eec421abf4 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -215,7 +215,7 @@ ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const Z } -bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper, const std::optional & config_host_name) +bool DDLTask::findCurrentHostID(ContextPtr global_context, LoggerPtr log, const ZooKeeperPtr & zookeeper, const std::optional & config_host_name) { bool host_in_hostlist = false; std::exception_ptr first_exception = nullptr; @@ -312,7 +312,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c return host_in_hostlist; } -void DDLTask::setClusterInfo(ContextPtr context, Poco::Logger * log) +void DDLTask::setClusterInfo(ContextPtr context, LoggerPtr log) { auto * query_on_cluster = dynamic_cast(query.get()); if (!query_on_cluster) diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index bc45b46bf0f2..5a8a5bfb1842 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -146,9 +146,9 @@ struct DDLTask : public DDLTaskBase { DDLTask(const String & name, const String & path) : DDLTaskBase(name, path) {} - bool findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper, const std::optional & config_host_name); + bool findCurrentHostID(ContextPtr global_context, LoggerPtr log, const ZooKeeperPtr & zookeeper, const std::optional & config_host_name); - void setClusterInfo(ContextPtr context, Poco::Logger * log); + void setClusterInfo(ContextPtr context, LoggerPtr log); String getShardID() const override; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index c0611dfaf7df..fd807d54eff6 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -79,7 +79,7 @@ DDLWorker::DDLWorker( const CurrentMetrics::Metric * max_entry_metric_, const CurrentMetrics::Metric * max_pushed_entry_metric_) : context(Context::createCopy(context_)) - , log(&Poco::Logger::get(logger_name)) + , log(getLogger(logger_name)) , pool_size(pool_size_) , max_entry_metric(max_entry_metric_) , max_pushed_entry_metric(max_pushed_entry_metric_) diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index adc9a491d815..9eb6606e73cd 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -151,7 +151,7 @@ class DDLWorker void runCleanupThread(); ContextMutablePtr context; - Poco::Logger * log; + LoggerPtr log; std::optional config_host_name; /// host_name from config diff --git a/src/Interpreters/DNSCacheUpdater.cpp b/src/Interpreters/DNSCacheUpdater.cpp index c6a38d85acbb..4769395acaab 100644 --- a/src/Interpreters/DNSCacheUpdater.cpp +++ b/src/Interpreters/DNSCacheUpdater.cpp @@ -24,7 +24,7 @@ void DNSCacheUpdater::run() /// Reload cluster config if IP of any host has been changed since last update. if (resolver.updateCache(max_consecutive_failures)) { - LOG_INFO(&Poco::Logger::get("DNSCacheUpdater"), "IPs of some hosts have been changed. Will reload cluster config."); + LOG_INFO(getLogger("DNSCacheUpdater"), "IPs of some hosts have been changed. Will reload cluster config."); try { getContext()->reloadClusterConfig(); @@ -45,7 +45,7 @@ void DNSCacheUpdater::run() void DNSCacheUpdater::start() { - LOG_INFO(&Poco::Logger::get("DNSCacheUpdater"), "Update period {} seconds", update_period_seconds); + LOG_INFO(getLogger("DNSCacheUpdater"), "Update period {} seconds", update_period_seconds); task_handle->activateAndSchedule(); } diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 138ec588202f..87985d1d12b5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -827,7 +827,7 @@ DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_) , referential_dependencies{"ReferentialDeps"} , loading_dependencies{"LoadingDeps"} , view_dependencies{"ViewDeps"} - , log(&Poco::Logger::get("DatabaseCatalog")) + , log(getLogger("DatabaseCatalog")) , first_async_drop_in_queue(tables_marked_dropped.end()) { } diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 19882b0b8281..beb73e3ef96a 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -318,7 +318,7 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext /// View dependencies between a source table and its view. TablesDependencyGraph view_dependencies TSA_GUARDED_BY(databases_mutex); - Poco::Logger * log; + LoggerPtr log; std::atomic_bool is_shutting_down = false; diff --git a/src/Interpreters/DirectJoin.cpp b/src/Interpreters/DirectJoin.cpp index 431f216436dc..3255b56b3be6 100644 --- a/src/Interpreters/DirectJoin.cpp +++ b/src/Interpreters/DirectJoin.cpp @@ -67,7 +67,7 @@ DirectKeyValueJoin::DirectKeyValueJoin(std::shared_ptr table_join_, : table_join(table_join_) , storage(storage_) , right_sample_block(right_sample_block_) - , log(&Poco::Logger::get("DirectKeyValueJoin")) + , log(getLogger("DirectKeyValueJoin")) { if (!table_join->oneDisjunct() || table_join->getOnlyClause().key_names_left.size() != 1 || diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h index 5f6643148181..ef8d12a1b8f1 100644 --- a/src/Interpreters/DirectJoin.h +++ b/src/Interpreters/DirectJoin.h @@ -60,7 +60,7 @@ class DirectKeyValueJoin : public IJoin Block right_sample_block; Block right_sample_block_with_storage_column_names; Block sample_block_with_columns_to_add; - Poco::Logger * log; + LoggerPtr log; }; diff --git a/src/Interpreters/EmbeddedDictionaries.cpp b/src/Interpreters/EmbeddedDictionaries.cpp index 6c0ccce66b57..1435d16cb073 100644 --- a/src/Interpreters/EmbeddedDictionaries.cpp +++ b/src/Interpreters/EmbeddedDictionaries.cpp @@ -125,7 +125,7 @@ EmbeddedDictionaries::EmbeddedDictionaries( ContextPtr context_, const bool throw_on_error) : WithContext(context_) - , log(&Poco::Logger::get("EmbeddedDictionaries")) + , log(getLogger("EmbeddedDictionaries")) , geo_dictionaries_loader(std::move(geo_dictionaries_loader_)) , reload_period(getContext()->getConfigRef().getInt("builtin_dictionaries_reload_interval", 3600)) { diff --git a/src/Interpreters/EmbeddedDictionaries.h b/src/Interpreters/EmbeddedDictionaries.h index e71098636fe7..b537146e92d7 100644 --- a/src/Interpreters/EmbeddedDictionaries.h +++ b/src/Interpreters/EmbeddedDictionaries.h @@ -24,7 +24,7 @@ class GeoDictionariesLoader; class EmbeddedDictionaries : WithContext { private: - Poco::Logger * log; + LoggerPtr log; MultiVersion regions_hierarchies; MultiVersion regions_names; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index f1c577948eb1..1bd1e2c318ff 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -611,6 +611,13 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute); res_column.column = action.node->function->execute(arguments, res_column.type, num_rows, dry_run); + if (res_column.column->getDataType() != res_column.type->getColumnType()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected return type from {}. Expected {}. Got {}", + action.node->function->getName(), + res_column.type->getColumnType(), + res_column.column->getDataType()); } break; } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 5c628436d609..fefbd67bfc17 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -120,7 +120,7 @@ bool allowEarlyConstantFolding(const ActionsDAG & actions, const Settings & sett return true; } -Poco::Logger * getLogger() { return &Poco::Logger::get("ExpressionAnalyzer"); } +LoggerPtr getLogger() { return ::getLogger("ExpressionAnalyzer"); } } diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index 0eacb598fbe9..16275b23053f 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -38,10 +38,9 @@ static CHJIT & getJITInstance() return jit; } -static Poco::Logger * getLogger() +static LoggerPtr getLogger() { - static Poco::Logger & logger = Poco::Logger::get("ExpressionJIT"); - return &logger; + return ::getLogger("ExpressionJIT"); } class CompiledFunctionHolder : public CompiledExpressionCacheEntry diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index 46171c95cb0f..74984de00643 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes /// Must not acquire Context lock in constructor to avoid possibility of deadlocks. ExternalDictionariesLoader::ExternalDictionariesLoader(ContextPtr global_context_) - : ExternalLoader("external dictionary", &Poco::Logger::get("ExternalDictionariesLoader")) + : ExternalLoader("external dictionary", getLogger("ExternalDictionariesLoader")) , WithContext(global_context_) { setConfigSettings({"dictionary", "name", "database", "uuid"}); diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index 56d480d87354..36664cbd06fb 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -95,7 +95,7 @@ namespace class ExternalLoader::LoadablesConfigReader : private boost::noncopyable { public: - LoadablesConfigReader(const String & type_name_, Poco::Logger * log_) + LoadablesConfigReader(const String & type_name_, LoggerPtr log_) : type_name(type_name_), log(log_) { } @@ -377,7 +377,7 @@ class ExternalLoader::LoadablesConfigReader : private boost::noncopyable } const String type_name; - Poco::Logger * log; + LoggerPtr log; std::mutex mutex; ExternalLoaderConfigSettings settings; @@ -401,7 +401,7 @@ class ExternalLoader::LoadingDispatcher : private boost::noncopyable LoadingDispatcher( const CreateObjectFunction & create_object_function_, const String & type_name_, - Poco::Logger * log_) + LoggerPtr log_) : create_object(create_object_function_) , type_name(type_name_) , log(log_) @@ -1193,7 +1193,7 @@ class ExternalLoader::LoadingDispatcher : private boost::noncopyable const CreateObjectFunction create_object; const String type_name; - Poco::Logger * log; + LoggerPtr log; mutable std::mutex mutex; std::condition_variable event; @@ -1273,7 +1273,7 @@ class ExternalLoader::PeriodicUpdater : private boost::noncopyable }; -ExternalLoader::ExternalLoader(const String & type_name_, Poco::Logger * log_) +ExternalLoader::ExternalLoader(const String & type_name_, LoggerPtr log_) : config_files_reader(std::make_unique(type_name_, log_)) , loading_dispatcher(std::make_unique( [this](auto && a, auto && b, auto && c) { return createObject(a, b, c); }, diff --git a/src/Interpreters/ExternalLoader.h b/src/Interpreters/ExternalLoader.h index 49b4ea77e0d3..a5d83bdab50a 100644 --- a/src/Interpreters/ExternalLoader.h +++ b/src/Interpreters/ExternalLoader.h @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace Poco { class Logger; } @@ -84,7 +85,7 @@ class ExternalLoader template static constexpr bool is_vector_load_result_type = std::is_same_v || std::is_same_v; - ExternalLoader(const String & type_name_, Poco::Logger * log); + ExternalLoader(const String & type_name_, LoggerPtr log); virtual ~ExternalLoader(); /// Adds a repository which will be used to read configurations from. @@ -230,7 +231,7 @@ class ExternalLoader std::unique_ptr periodic_updater; const String type_name; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h index 3fc9f8920ed7..7688d44f7a96 100644 --- a/src/Interpreters/FullSortingMergeJoin.h +++ b/src/Interpreters/FullSortingMergeJoin.h @@ -25,7 +25,7 @@ class FullSortingMergeJoin : public IJoin : table_join(table_join_) , right_sample_block(right_sample_block_) { - LOG_TRACE(&Poco::Logger::get("FullSortingMergeJoin"), "Will use full sorting merge join"); + LOG_TRACE(getLogger("FullSortingMergeJoin"), "Will use full sorting merge join"); } std::string getName() const override { return "FullSortingMergeJoin"; } diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.cpp b/src/Interpreters/GatherFunctionQuantileVisitor.cpp index 664bb9e93832..6b6dc3627717 100644 --- a/src/Interpreters/GatherFunctionQuantileVisitor.cpp +++ b/src/Interpreters/GatherFunctionQuantileVisitor.cpp @@ -30,6 +30,7 @@ static const std::unordered_map quantile_fuse_name_mapping = {"quantileTDigestWeighted", "quantilesTDigestWeighted"}, {"quantileTiming", "quantilesTiming"}, {"quantileTimingWeighted", "quantilesTimingWeighted"}, + {"quantileGK", "quantilesGK"}, }; String GatherFunctionQuantileData::toFusedNameOrSelf(const String & func_name) diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 384b562c80c1..5f029395df90 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -32,6 +32,7 @@ namespace ErrorCodes { extern const int WRONG_GLOBAL_SUBQUERY; extern const int LOGICAL_ERROR; + extern const int SUPPORT_IS_DISABLED; } class GlobalSubqueriesMatcher @@ -200,23 +201,33 @@ class GlobalSubqueriesMatcher } private: - static bool shouldBeExecutedGlobally(const Data & data) + /// GLOBAL IN + static void visit(ASTFunction & func, ASTPtr &, Data & data) { const Settings & settings = data.getContext()->getSettingsRef(); - /// For parallel replicas we reinterpret JOIN as GLOBAL JOIN as a way to broadcast data + const bool prefer_global = settings.prefer_global_in_and_join; const bool enable_parallel_processing_of_joins = data.getContext()->canUseParallelReplicasOnInitiator(); - return settings.prefer_global_in_and_join || enable_parallel_processing_of_joins; - } - - /// GLOBAL IN - static void visit(ASTFunction & func, ASTPtr &, Data & data) - { - if ((shouldBeExecutedGlobally(data) + if (((prefer_global || enable_parallel_processing_of_joins) && (func.name == "in" || func.name == "notIn" || func.name == "nullIn" || func.name == "notNullIn")) || func.name == "globalIn" || func.name == "globalNotIn" || func.name == "globalNullIn" || func.name == "globalNotNullIn") { ASTPtr & ast = func.arguments->children[1]; + if (enable_parallel_processing_of_joins) + { + /// We don't enable parallel replicas for IN (subquery) + if (ast->as()) + { + if (settings.allow_experimental_parallel_reading_from_replicas == 1) + { + LOG_DEBUG(getLogger("GlobalSubqueriesMatcher"), "IN with subquery is not supported with parallel replicas"); + data.getContext()->getQueryContext()->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + return; + } + else if (settings.allow_experimental_parallel_reading_from_replicas == 2) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas"); + } + } /// Literal or function can use regular IN. /// NOTE: We don't support passing table functions to IN. @@ -241,9 +252,41 @@ class GlobalSubqueriesMatcher /// GLOBAL JOIN static void visit(ASTTablesInSelectQueryElement & table_elem, ASTPtr &, Data & data) { + const Settings & settings = data.getContext()->getSettingsRef(); + const bool prefer_global = settings.prefer_global_in_and_join; + const bool enable_parallel_processing_of_joins = data.getContext()->canUseParallelReplicasOnInitiator(); + if (table_elem.table_join - && (table_elem.table_join->as().locality == JoinLocality::Global || shouldBeExecutedGlobally(data))) + && (table_elem.table_join->as().locality == JoinLocality::Global || prefer_global + || enable_parallel_processing_of_joins)) { + if (enable_parallel_processing_of_joins) + { + /// For parallel replicas we currently only support JOIN with subqueries + /// Note that tableA join tableB is previously converted into tableA JOIN (Select * FROM tableB) so that's ok + /// We don't support WITH cte as (subquery) Select table JOIN cte because we don't do conversion in AST + bool is_subquery = false; + if (const auto * ast_table_expr = table_elem.table_expression->as()) + { + is_subquery = ast_table_expr->subquery && ast_table_expr->subquery->as() != nullptr + && ast_table_expr->subquery->as()->cte_name.empty(); + } + else if (table_elem.table_expression->as()) + is_subquery = true; + + if (!is_subquery) + { + if (settings.allow_experimental_parallel_reading_from_replicas == 1) + { + LOG_DEBUG(getLogger("GlobalSubqueriesMatcher"), "JOIN with parallel replicas is only supported with subqueries"); + data.getContext()->getQueryContext()->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + return; + } + else if (settings.allow_experimental_parallel_reading_from_replicas == 2) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JOIN with parallel replicas is only supported with subqueries"); + } + } + Names required_columns; /// Fill required columns for GLOBAL JOIN. diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 26d666a89133..5fb92a68a297 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -121,7 +121,7 @@ class GraceHashJoin::FileBucket : boost::noncopyable public: using BucketLock = std::unique_lock; - explicit FileBucket(size_t bucket_index_, TemporaryFileStream & left_file_, TemporaryFileStream & right_file_, Poco::Logger * log_) + explicit FileBucket(size_t bucket_index_, TemporaryFileStream & left_file_, TemporaryFileStream & right_file_, LoggerPtr log_) : idx{bucket_index_} , left_file{left_file_} , right_file{right_file_} @@ -223,7 +223,7 @@ class GraceHashJoin::FileBucket : boost::noncopyable std::atomic state; - Poco::Logger * log; + LoggerPtr log; }; namespace @@ -261,7 +261,7 @@ GraceHashJoin::GraceHashJoin( const Block & right_sample_block_, TemporaryDataOnDiskScopePtr tmp_data_, bool any_take_last_row_) - : log{&Poco::Logger::get("GraceHashJoin")} + : log{getLogger("GraceHashJoin")} , context{context_} , table_join{std::move(table_join_)} , left_sample_block{left_sample_block_} @@ -403,7 +403,7 @@ void GraceHashJoin::addBuckets(const size_t bucket_count) catch (...) { LOG_ERROR( - &Poco::Logger::get("GraceHashJoin"), + getLogger("GraceHashJoin"), "Can't create bucket {} due to error: {}", current_size + i, getCurrentExceptionMessage(false)); diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index 2cadeee10b97..ff396683230d 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -120,7 +120,7 @@ class GraceHashJoin final : public IJoin /// Structure block to store in the HashJoin according to sample_block. Block prepareRightBlock(const Block & block); - Poco::Logger * log; + LoggerPtr log; ContextPtr context; std::shared_ptr table_join; Block left_sample_block; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 467cc4c25319..33dc178ca00c 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -245,7 +245,7 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s , right_sample_block(right_sample_block_) , max_joined_block_rows(table_join->maxJoinedBlockRows()) , instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "") - , log(&Poco::Logger::get("HashJoin")) + , log(getLogger("HashJoin")) { LOG_TRACE(log, "{}Keys: {}, datatype: {}, kind: {}, strictness: {}, right header: {}", instance_log_id, TableJoin::formatClauses(table_join->getClauses(), true), data->type, kind, strictness, right_sample_block.dumpStructure()); diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 17f003adc4b2..29bb90700092 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -446,7 +446,7 @@ class HashJoin : public IJoin /// Several instances can be created, for example, in GraceHashJoin to handle different buckets String instance_log_id; - Poco::Logger * log; + LoggerPtr log; /// Should be set via setLock to protect hash table from modification from StorageJoin /// If set HashJoin instance is not available for modification (addBlockToJoin) diff --git a/src/Interpreters/InternalTextLogsQueue.cpp b/src/Interpreters/InternalTextLogsQueue.cpp index 3be58a11beba..ca8461937ac3 100644 --- a/src/Interpreters/InternalTextLogsQueue.cpp +++ b/src/Interpreters/InternalTextLogsQueue.cpp @@ -43,7 +43,7 @@ void InternalTextLogsQueue::pushBlock(Block && log_block) if (blocksHaveEqualStructure(sample_block, log_block)) (void)(emplace(log_block.mutateColumns())); else - LOG_WARNING(&Poco::Logger::get("InternalTextLogsQueue"), "Log block have different structure"); + LOG_WARNING(getLogger("InternalTextLogsQueue"), "Log block have different structure"); } std::string_view InternalTextLogsQueue::getPriorityName(int priority) diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 0cc4afd62f22..98a281bd5ade 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -149,7 +149,7 @@ class TableCheckTask : public ChunkInfo class TableCheckSource : public ISource { public: - TableCheckSource(Strings databases_, ContextPtr context_, Poco::Logger * log_) + TableCheckSource(Strings databases_, ContextPtr context_, LoggerPtr log_) : ISource(getSingleValueBlock(0)) , databases(databases_) , context(context_) @@ -157,7 +157,7 @@ class TableCheckSource : public ISource { } - TableCheckSource(std::shared_ptr table_check_task_, Poco::Logger * log_) + TableCheckSource(std::shared_ptr table_check_task_, LoggerPtr log_) : ISource(getSingleValueBlock(0)) , table_check_task(table_check_task_) , log(log_) @@ -260,14 +260,14 @@ class TableCheckSource : public ISource ContextPtr context; - Poco::Logger * log; + LoggerPtr log; }; /// Receives TableCheckTask and returns CheckResult converted to sinle-row chunk class TableCheckWorkerProcessor : public ISimpleTransform { public: - TableCheckWorkerProcessor(bool with_table_name_, Poco::Logger * log_) + TableCheckWorkerProcessor(bool with_table_name_, LoggerPtr log_) : ISimpleTransform(getSingleValueBlock(0), getHeaderForCheckResult(with_table_name_), true) , with_table_name(with_table_name_) , log(log_) @@ -308,7 +308,7 @@ class TableCheckWorkerProcessor : public ISimpleTransform /// If true, then output will contain columns with database and table names bool with_table_name; - Poco::Logger * log; + LoggerPtr log; }; /// Accumulates all results and returns single value diff --git a/src/Interpreters/InterpreterCheckQuery.h b/src/Interpreters/InterpreterCheckQuery.h index 5ffd1d4298f6..4bba3ed780ca 100644 --- a/src/Interpreters/InterpreterCheckQuery.h +++ b/src/Interpreters/InterpreterCheckQuery.h @@ -19,7 +19,7 @@ class InterpreterCheckQuery : public IInterpreter, WithContext private: ASTPtr query_ptr; - Poco::Logger * log = &Poco::Logger::get("InterpreterCheckQuery"); + LoggerPtr log = getLogger("InterpreterCheckQuery"); }; } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d002cc6d9806..7133c9eef340 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -716,7 +716,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti setEngine(create); /// We have to check access rights again (in case engine was changed). - if (create.storage) + if (create.storage && create.storage->engine) { auto source_access_type = StorageFactory::instance().getSourceAccessType(create.storage->engine->name); if (source_access_type != AccessType::NONE) @@ -957,6 +957,20 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat } } } + if (!create.attach && !settings.allow_experimental_variant_type) + { + for (const auto & [name, type] : properties.columns.getAllPhysical()) + { + if (isVariant(type)) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot create table with column '{}' which type is '{}' " + "because experimental Variant type is not allowed. " + "Set setting allow_experimental_variant_type = 1 in order to allow it", + name, type->getName()); + } + } + } } namespace @@ -1214,7 +1228,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) } else if (create.attach && !create.attach_short_syntax && getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY) { - auto * log = &Poco::Logger::get("InterpreterCreateQuery"); + auto log = getLogger("InterpreterCreateQuery"); LOG_WARNING(log, "ATTACH TABLE query with full table definition is not recommended: " "use either ATTACH TABLE {}; to attach existing table " "or CREATE TABLE {}
; to create new table " @@ -1421,8 +1435,14 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, interpreter.execute(); } else - throw Exception(storage_already_exists_error_code, - "{} {}.{} already exists", storage_name, backQuoteIfNeed(create.getDatabase()), backQuoteIfNeed(create.getTable())); + { + if (database->getTable(create.getTable(), getContext())->isDictionary()) + throw Exception(ErrorCodes::DICTIONARY_ALREADY_EXISTS, + "Dictionary {}.{} already exists", backQuoteIfNeed(create.getDatabase()), backQuoteIfNeed(create.getTable())); + else + throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, + "Table {}.{} already exists", backQuoteIfNeed(create.getDatabase()), backQuoteIfNeed(create.getTable())); + } } else if (!create.attach) { @@ -1455,7 +1475,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, /// so the existing directory probably contains some leftovers from previous unsuccessful attempts to create the table fs::path trash_path = fs::path{getContext()->getPath()} / "trash" / data_path / getHexUIntLowercase(thread_local_rng()); - LOG_WARNING(&Poco::Logger::get("InterpreterCreateQuery"), "Directory for {} data {} already exists. Will move it to {}", + LOG_WARNING(getLogger("InterpreterCreateQuery"), "Directory for {} data {} already exists. Will move it to {}", Poco::toLower(storage_name), String(data_path), trash_path); fs::create_directories(trash_path.parent_path()); renameNoReplace(full_data_path, trash_path); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index c8e05fcd5e31..734306cf58d0 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -533,7 +533,7 @@ BlockIO InterpreterInsertQuery::execute() { /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name)) + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name)) query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); } } diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 5efffdaa194a..3431cd5e568a 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -161,7 +161,7 @@ class SyncKillQuerySource : public ISource if (curr_process.processed) continue; - LOG_DEBUG(&Poco::Logger::get("KillQuery"), "Will kill query {} (synchronously)", curr_process.query_id); + LOG_DEBUG(getLogger("KillQuery"), "Will kill query {} (synchronously)", curr_process.query_id); auto code = process_list.sendCancelToQuery(curr_process.query_id, curr_process.user, true); @@ -229,7 +229,7 @@ BlockIO InterpreterKillQueryQuery::execute() for (const auto & query_desc : queries_to_stop) { if (!query.test) - LOG_DEBUG(&Poco::Logger::get("KillQuery"), "Will kill query {} (asynchronously)", query_desc.query_id); + LOG_DEBUG(getLogger("KillQuery"), "Will kill query {} (asynchronously)", query_desc.query_id); auto code = (query.test) ? CancellationCode::Unknown : process_list.sendCancelToQuery(query_desc.query_id, query_desc.user, true); insertResultRow(query_desc.source_num, code, processes_block, header, res_columns); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c0e9aeaae1d4..d0cf9f1160c0 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -381,7 +381,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( : IInterpreterUnionOrSelectQuery(options_.modify_inplace ? query_ptr_ : query_ptr_->clone(), context_, options_) , storage(storage_) , input_pipe(std::move(input_pipe_)) - , log(&Poco::Logger::get("InterpreterSelectQuery")) + , log(getLogger("InterpreterSelectQuery")) , metadata_snapshot(metadata_snapshot_) , prepared_sets(prepared_sets_) { @@ -864,38 +864,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() ASTSelectQuery & query = getSelectQuery(); /// While only_analyze we don't know anything about parts, so any decision about how many parallel replicas to use would be wrong - if (!storage || !context->canUseParallelReplicasOnInitiator()) - return false; - - /// check if IN operator with subquery is present in the query - /// if so, disable parallel replicas - if (query_analyzer->getPreparedSets()->hasSubqueries()) - { - bool in_subqueries = false; - const auto & sets = query_analyzer->getPreparedSets(); - const auto subqueries = sets->getSubqueries(); - for (const auto & subquery : subqueries) - { - if (subquery->isINSubquery()) - { - in_subqueries = true; - break; - } - } - - if (in_subqueries) - { - if (settings.allow_experimental_parallel_reading_from_replicas == 2) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas"); - - context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); - LOG_DEBUG(log, "Disabling parallel replicas to execute a query with IN with subquery"); - return true; - } - } - - if (options.only_analyze) + if (!storage || options.only_analyze || !context->canUseParallelReplicasOnInitiator()) return false; if (getTrivialCount(0).has_value()) @@ -1572,7 +1541,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle filter_nodes.push_back(&analysis_result.before_where->findInOutputs(analysis_result.where_column_name)); } - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes); if (!filter_actions_dag) return {}; diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index fbb53d71755c..c307e457649c 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -253,7 +253,7 @@ class InterpreterSelectQuery : public IInterpreterUnionOrSelectQuery /// Used when we read from prepared input, not table or subquery. std::optional input_pipe; - Poco::Logger * log; + LoggerPtr log; StorageMetadataPtr metadata_snapshot; StorageSnapshotPtr storage_snapshot; diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 868ef170f7ce..4897101d80b2 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -74,60 +74,25 @@ ContextMutablePtr buildContext(const ContextPtr & context, const SelectQueryOpti void replaceStorageInQueryTree(QueryTreeNodePtr & query_tree, const ContextPtr & context, const StoragePtr & storage) { - auto query_to_replace_table_expression = query_tree; - QueryTreeNodePtr table_expression_to_replace; + auto nodes = extractAllTableReferences(query_tree); + IQueryTreeNode::ReplacementMap replacement_map; - while (!table_expression_to_replace) + for (auto & node : nodes) { - if (auto * union_node = query_to_replace_table_expression->as()) - query_to_replace_table_expression = union_node->getQueries().getNodes().at(0); - - auto & query_to_replace_table_expression_typed = query_to_replace_table_expression->as(); - auto left_table_expression = extractLeftTableExpression(query_to_replace_table_expression_typed.getJoinTree()); - auto left_table_expression_node_type = left_table_expression->getNodeType(); - - switch (left_table_expression_node_type) - { - case QueryTreeNodeType::QUERY: - case QueryTreeNodeType::UNION: - { - query_to_replace_table_expression = std::move(left_table_expression); - break; - } - case QueryTreeNodeType::TABLE: - case QueryTreeNodeType::TABLE_FUNCTION: - case QueryTreeNodeType::IDENTIFIER: - { - table_expression_to_replace = std::move(left_table_expression); - break; - } - default: - { - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Expected table, table function or identifier node to replace with storage. Actual {}", - left_table_expression->formatASTForErrorMessage()); - } - } - } - - /// Don't replace storage if table name differs - if (auto * table_node = table_expression_to_replace->as(); table_node && table_node->getStorageID().getFullNameNotQuoted() != storage->getStorageID().getFullNameNotQuoted()) - return; + auto & table_node = node->as(); - auto replacement_table_expression = std::make_shared(storage, context); - std::optional table_expression_modifiers; + /// Don't replace storage if table name differs + if (table_node.getStorageID().getFullNameNotQuoted() != storage->getStorageID().getFullNameNotQuoted()) + continue; - if (auto * table_node = table_expression_to_replace->as()) - table_expression_modifiers = table_node->getTableExpressionModifiers(); - else if (auto * table_function_node = table_expression_to_replace->as()) - table_expression_modifiers = table_function_node->getTableExpressionModifiers(); - else if (auto * identifier_node = table_expression_to_replace->as()) - table_expression_modifiers = identifier_node->getTableExpressionModifiers(); + auto replacement_table_expression = std::make_shared(storage, context); - if (table_expression_modifiers) - replacement_table_expression->setTableExpressionModifiers(*table_expression_modifiers); + if (auto table_expression_modifiers = table_node.getTableExpressionModifiers()) + replacement_table_expression->setTableExpressionModifiers(*table_expression_modifiers); - query_tree = query_tree->cloneAndReplace(table_expression_to_replace, std::move(replacement_table_expression)); + replacement_map.emplace(node.get(), std::move(replacement_table_expression)); + } + query_tree = query_tree->cloneAndReplace(replacement_map); } QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 1411e7e017be..9a80553f1493 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -221,7 +221,7 @@ void InterpreterSystemQuery::startStopAction(StorageActionBlockType action_type, void InterpreterSystemQuery::startStopActionInDatabase(StorageActionBlockType action_type, bool start, const String & database_name, const DatabasePtr & database, - const ContextPtr & local_context, Poco::Logger * log) + const ContextPtr & local_context, LoggerPtr log) { auto manager = local_context->getActionLocksManager(); auto access = local_context->getAccess(); @@ -251,7 +251,7 @@ void InterpreterSystemQuery::startStopActionInDatabase(StorageActionBlockType ac InterpreterSystemQuery::InterpreterSystemQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) - : WithMutableContext(context_), query_ptr(query_ptr_->clone()), log(&Poco::Logger::get("InterpreterSystemQuery")) + : WithMutableContext(context_), query_ptr(query_ptr_->clone()), log(getLogger("InterpreterSystemQuery")) { } @@ -379,27 +379,28 @@ BlockIO InterpreterSystemQuery::execute() case Type::DROP_FILESYSTEM_CACHE: { getContext()->checkAccess(AccessType::SYSTEM_DROP_FILESYSTEM_CACHE); + const auto user_id = FileCache::getCommonUser().user_id; if (query.filesystem_cache_name.empty()) { auto caches = FileCacheFactory::instance().getAll(); for (const auto & [_, cache_data] : caches) - cache_data->cache->removeAllReleasable(FileCache::getCommonUser().user_id); + cache_data->cache->removeAllReleasable(user_id); } else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name)->cache; if (query.key_to_drop.empty()) { - cache->removeAllReleasable(FileCache::getCommonUser().user_id); + cache->removeAllReleasable(user_id); } else { auto key = FileCacheKey::fromKeyString(query.key_to_drop); if (query.offset_to_drop.has_value()) - cache->removeFileSegment(key, query.offset_to_drop.value(), FileCache::getCommonUser().user_id); + cache->removeFileSegment(key, query.offset_to_drop.value(), user_id); else - cache->removeKey(key, FileCache::getCommonUser().user_id); + cache->removeKey(key, user_id); } } break; diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index 89de7402b4d3..1419c430aca2 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -43,11 +43,11 @@ class InterpreterSystemQuery : public IInterpreter, WithMutableContext static void startStopActionInDatabase(StorageActionBlockType action_type, bool start, const String & database_name, const DatabasePtr & database, - const ContextPtr & local_context, Poco::Logger * log); + const ContextPtr & local_context, LoggerPtr log); private: ASTPtr query_ptr; - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; StorageID table_id = StorageID::createEmpty(); /// Will be set up if query contains table name VolumePtr volume_ptr; diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp index 094b58789a8d..c344732a2620 100644 --- a/src/Interpreters/InterserverCredentials.cpp +++ b/src/Interpreters/InterserverCredentials.cpp @@ -35,7 +35,7 @@ InterserverCredentials::CurrentCredentials InterserverCredentials::parseCredenti const Poco::Util::AbstractConfiguration & config, const std::string & root_tag) { - auto * log = &Poco::Logger::get("InterserverCredentials"); + auto log = getLogger("InterserverCredentials"); CurrentCredentials store; store.emplace_back(current_user_, current_password_); if (config.getBool(root_tag + ".allow_empty", false)) diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index c104af770f06..9be8bf178a19 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -335,12 +335,12 @@ std::shared_ptr JoinedTables::makeTableJoin(const ASTSelectQuery & se auto dictionary = dictionary_helper.getDictionary(dictionary_name); if (!dictionary) { - LOG_TRACE(&Poco::Logger::get("JoinedTables"), "Can't use dictionary join: dictionary '{}' was not found", dictionary_name); + LOG_TRACE(getLogger("JoinedTables"), "Can't use dictionary join: dictionary '{}' was not found", dictionary_name); return nullptr; } if (dictionary->getSpecialKeyType() == DictionarySpecialKeyType::Range) { - LOG_TRACE(&Poco::Logger::get("JoinedTables"), "Can't use dictionary join: dictionary '{}' is a range dictionary", dictionary_name); + LOG_TRACE(getLogger("JoinedTables"), "Can't use dictionary join: dictionary '{}' is a range dictionary", dictionary_name); return nullptr; } diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 4a80e1a3c56e..901c82029eec 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -492,7 +492,7 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right , max_joined_block_rows(table_join->maxJoinedBlockRows()) , max_rows_in_right_block(table_join->maxRowsInRightBlock()) , max_files_to_merge(table_join->maxFilesToMerge()) - , log(&Poco::Logger::get("MergeJoin")) + , log(getLogger("MergeJoin")) { switch (table_join->strictness()) { diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 98fae1d419fc..4486c134d518 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -117,7 +117,7 @@ class MergeJoin : public IJoin Names lowcard_right_keys; - Poco::Logger * log; + LoggerPtr log; void changeLeftColumns(Block & block, MutableColumns && columns) const; void addRightColumns(Block & block, MutableColumns && columns); diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h index cc3868250242..4e71bd568518 100644 --- a/src/Interpreters/MonotonicityCheckVisitor.h +++ b/src/Interpreters/MonotonicityCheckVisitor.h @@ -1,13 +1,17 @@ #pragma once #include +#include #include +#include #include #include -#include #include +#include +#include #include #include +#include #include #include #include @@ -33,6 +37,8 @@ class MonotonicityCheckMatcher ASTIdentifier * identifier = nullptr; DataTypePtr arg_data_type = {}; + Range range = Range::createWholeUniverse(); + void reject() { monotonicity.is_monotonic = false; } bool isRejected() const { return !monotonicity.is_monotonic; } @@ -97,13 +103,30 @@ class MonotonicityCheckMatcher if (data.isRejected()) return; - /// TODO: monotonicity for functions of several arguments - if (!ast_function.arguments || ast_function.arguments->children.size() != 1) + /// Monotonicity check only works for functions that contain at most two arguments and one of them must be a constant. + if (!ast_function.arguments) { data.reject(); return; } + auto arguments_size = ast_function.arguments->children.size(); + + if (arguments_size == 0 || arguments_size > 2) + { + data.reject(); + return; + } + else if (arguments_size == 2) + { + /// If the function has two arguments, then one of them must be a constant. + if (!ast_function.arguments->children[0]->as() && !ast_function.arguments->children[1]->as()) + { + data.reject(); + return; + } + } + if (!data.canOptimize(ast_function)) { data.reject(); @@ -124,14 +147,33 @@ class MonotonicityCheckMatcher return; } - ColumnsWithTypeAndName args; - args.emplace_back(data.arg_data_type, "tmp"); - auto function_base = function->build(args); + auto function_arguments = getFunctionArguments(ast_function, data); + + auto function_base = function->build(function_arguments); if (function_base && function_base->hasInformationAboutMonotonicity()) { bool is_positive = data.monotonicity.is_positive; - data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, Field(), Field()); + data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, data.range.left, data.range.right); + + auto & key_range = data.range; + + /// If we apply function to open interval, we can get empty intervals in result. + /// E.g. for ('2020-01-03', '2020-01-20') after applying 'toYYYYMM' we will get ('202001', '202001'). + /// To avoid this we make range left and right included. + /// Any function that treats NULL specially is not monotonic. + /// Thus we can safely use isNull() as an -Inf/+Inf indicator here. + if (!key_range.left.isNull()) + { + key_range.left = applyFunction(function_base, data.arg_data_type, key_range.left); + key_range.left_included = true; + } + + if (!key_range.right.isNull()) + { + key_range.right = applyFunction(function_base, data.arg_data_type, key_range.right); + key_range.right_included = true; + } if (!is_positive) data.monotonicity.is_positive = !data.monotonicity.is_positive; @@ -143,13 +185,53 @@ class MonotonicityCheckMatcher static bool needChildVisit(const ASTPtr & parent, const ASTPtr &) { - /// Currently we check monotonicity only for single-argument functions. - /// Although, multi-argument functions with all but one constant arguments can also be monotonic. + /// Multi-argument functions with all but one constant arguments can be monotonic. if (const auto * func = typeid_cast(parent.get())) - return func->arguments->children.size() < 2; + return func->arguments->children.size() <= 2; return true; } + + static ColumnWithTypeAndName extractLiteralColumnAndTypeFromAstLiteral(const ASTLiteral * literal) + { + ColumnWithTypeAndName result; + + result.type = applyVisitor(FieldToDataType(), literal->value); + result.column = result.type->createColumnConst(0, literal->value); + + return result; + } + + static ColumnsWithTypeAndName getFunctionArguments(const ASTFunction & ast_function, const Data & data) + { + ColumnsWithTypeAndName args; + + auto arguments_size = ast_function.arguments->children.size(); + + chassert(arguments_size == 1 || arguments_size == 2); + + if (arguments_size == 2) + { + if (ast_function.arguments->children[0]->as()) + { + const auto * literal = ast_function.arguments->children[0]->as(); + args.push_back(extractLiteralColumnAndTypeFromAstLiteral(literal)); + args.emplace_back(data.arg_data_type, "tmp"); + } + else + { + const auto * literal = ast_function.arguments->children[1]->as(); + args.emplace_back(data.arg_data_type, "tmp"); + args.push_back(extractLiteralColumnAndTypeFromAstLiteral(literal)); + } + } + else + { + args.emplace_back(data.arg_data_type, "tmp"); + } + + return args; + } }; using MonotonicityCheckVisitor = ConstInDepthNodeVisitor; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 86cd2d84fa37..502b961ced8c 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -342,6 +342,11 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const return part && part->hasProjection(name); } +bool MutationsInterpreter::Source::hasBrokenProjection(const String & name) const +{ + return part && part->hasBrokenProjection(name); +} + bool MutationsInterpreter::Source::isCompactPart() const { return part && part->getType() == MergeTreeDataPartType::Compact; @@ -807,7 +812,7 @@ void MutationsInterpreter::prepare(bool dry_run) { mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); const auto & projection = projections_desc.get(command.projection_name); - if (!source.hasProjection(projection.name)) + if (!source.hasProjection(projection.name) || source.hasBrokenProjection(projection.name)) { for (const auto & column : projection.required_columns) dependencies.emplace(column, ColumnDependency::PROJECTION); @@ -994,6 +999,13 @@ void MutationsInterpreter::prepare(bool dry_run) if (!source.hasProjection(projection.name)) continue; + /// Always rebuild broken projections. + if (source.hasBrokenProjection(projection.name)) + { + materialized_projections.insert(projection.name); + continue; + } + if (need_rebuild_projections) { materialized_projections.insert(projection.name); @@ -1274,7 +1286,7 @@ void MutationsInterpreter::Source::read( for (size_t i = 0; i < num_filters; ++i) nodes[i] = &steps[i]->actions()->findInOutputs(names[i]); - filter = ActionsDAG::buildFilterActionsDAG(nodes, {}, context_); + filter = ActionsDAG::buildFilterActionsDAG(nodes); } VirtualColumns virtual_columns(std::move(required_columns), part); @@ -1284,7 +1296,7 @@ void MutationsInterpreter::Source::read( plan, *data, storage_snapshot, part, std::move(virtual_columns.columns_to_read), apply_deleted_mask_, filter, context_, - &Poco::Logger::get("MutationsInterpreter")); + getLogger("MutationsInterpreter")); virtual_columns.addVirtuals(plan); } diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index eda94190185d..4c35ec34b584 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -126,6 +126,7 @@ class MutationsInterpreter bool materializeTTLRecalculateOnly() const; bool hasSecondaryIndex(const String & name) const; bool hasProjection(const String & name) const; + bool hasBrokenProjection(const String & name) const; bool isCompactPart() const; void read( diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 9819b8e3ec4e..a7f20a067854 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -271,7 +271,7 @@ bool PartLog::addNewParts( } catch (...) { - tryLogCurrentException(part_log ? part_log->log : &Poco::Logger::get("PartLog"), __PRETTY_FUNCTION__); + tryLogCurrentException(part_log ? part_log->log : getLogger("PartLog"), __PRETTY_FUNCTION__); return false; } diff --git a/src/Interpreters/PasteJoin.h b/src/Interpreters/PasteJoin.h index df7bb2f280c1..f87a70215517 100644 --- a/src/Interpreters/PasteJoin.h +++ b/src/Interpreters/PasteJoin.h @@ -24,7 +24,7 @@ class PasteJoin : public IJoin : table_join(table_join_) , right_sample_block(right_sample_block_) { - LOG_TRACE(&Poco::Logger::get("PasteJoin"), "Will use paste join"); + LOG_TRACE(getLogger("PasteJoin"), "Will use paste join"); } std::string getName() const override { return "PasteJoin"; } diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index cc3db726f011..76f75cde1dc7 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -98,12 +98,8 @@ FutureSetFromSubquery::FutureSetFromSubquery( std::unique_ptr source_, StoragePtr external_table_, std::shared_ptr external_table_set_, - const Settings & settings, - bool in_subquery_) - : external_table(std::move(external_table_)) - , external_table_set(std::move(external_table_set_)) - , source(std::move(source_)) - , in_subquery(in_subquery_) + const Settings & settings) + : external_table(std::move(external_table_)), external_table_set(std::move(external_table_set_)), source(std::move(source_)) { set_and_key = std::make_shared(); set_and_key->key = std::move(key); @@ -281,16 +277,10 @@ FutureSetFromSubqueryPtr PreparedSets::addFromSubquery( std::unique_ptr source, StoragePtr external_table, FutureSetFromSubqueryPtr external_table_set, - const Settings & settings, - bool in_subquery) + const Settings & settings) { auto from_subquery = std::make_shared( - toString(key, {}), - std::move(source), - std::move(external_table), - std::move(external_table_set), - settings, - in_subquery); + toString(key, {}), std::move(source), std::move(external_table), std::move(external_table_set), settings); auto [it, inserted] = sets_from_subqueries.emplace(key, from_subquery); @@ -340,15 +330,6 @@ std::shared_ptr PreparedSets::findSubquery(const Hash & k return it->second; } -void PreparedSets::markAsINSubquery(const Hash & key) -{ - auto it = sets_from_subqueries.find(key); - if (it == sets_from_subqueries.end()) - return; - - it->second->markAsINSubquery(); -} - std::shared_ptr PreparedSets::findStorage(const Hash & key) const { auto it = sets_from_storage.find(key); diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index 7178cff73b91..3419d3b6839c 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -101,8 +101,7 @@ class FutureSetFromSubquery final : public FutureSet std::unique_ptr source_, StoragePtr external_table_, std::shared_ptr external_table_set_, - const Settings & settings, - bool in_subquery_); + const Settings & settings); FutureSetFromSubquery( String key, @@ -118,8 +117,6 @@ class FutureSetFromSubquery final : public FutureSet QueryTreeNodePtr detachQueryTree() { return std::move(query_tree); } void setQueryPlan(std::unique_ptr source_); - void markAsINSubquery() { in_subquery = true; } - bool isINSubquery() const { return in_subquery; } private: SetAndKeyPtr set_and_key; @@ -128,11 +125,6 @@ class FutureSetFromSubquery final : public FutureSet std::unique_ptr source; QueryTreeNodePtr query_tree; - bool in_subquery = false; // subquery used in IN operator - // the flag can be removed after enabling new analyzer and removing interpreter - // or after enabling support IN operator with subqueries in parallel replicas - // Note: it's necessary with interpreter since prepared sets used also for GLOBAL JOINs, - // with new analyzer it's not a case }; using FutureSetFromSubqueryPtr = std::shared_ptr; @@ -160,8 +152,7 @@ class PreparedSets std::unique_ptr source, StoragePtr external_table, FutureSetFromSubqueryPtr external_table_set, - const Settings & settings, - bool in_subquery = false); + const Settings & settings); FutureSetFromSubqueryPtr addFromSubquery( const Hash & key, @@ -171,7 +162,6 @@ class PreparedSets FutureSetFromTuplePtr findTuple(const Hash & key, const DataTypes & types) const; FutureSetFromStoragePtr findStorage(const Hash & key) const; FutureSetFromSubqueryPtr findSubquery(const Hash & key) const; - void markAsINSubquery(const Hash & key); using Subqueries = std::vector; Subqueries getSubqueries() const; diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 2b84b7655b37..5b3b87114ae8 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -86,7 +86,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q if (!is_unlimited_query && max_size && processes.size() >= max_size) { if (queue_max_wait_ms) - LOG_WARNING(&Poco::Logger::get("ProcessList"), "Too many simultaneous queries, will wait {} ms.", queue_max_wait_ms); + LOG_WARNING(getLogger("ProcessList"), "Too many simultaneous queries, will wait {} ms.", queue_max_wait_ms); if (!queue_max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(queue_max_wait_ms), [&]{ return processes.size() < max_size; })) throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries. Maximum: {}", max_size); } @@ -295,7 +295,7 @@ ProcessListEntry::~ProcessListEntry() auto user_process_list_it = parent.user_to_queries.find(user); if (user_process_list_it == parent.user_to_queries.end()) { - LOG_ERROR(&Poco::Logger::get("ProcessList"), "Logical error: cannot find user in ProcessList"); + LOG_ERROR(getLogger("ProcessList"), "Logical error: cannot find user in ProcessList"); std::terminate(); } @@ -323,7 +323,7 @@ ProcessListEntry::~ProcessListEntry() if (!found) { - LOG_ERROR(&Poco::Logger::get("ProcessList"), "Logical error: cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser"); + LOG_ERROR(getLogger("ProcessList"), "Logical error: cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser"); std::terminate(); } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 506fa13b7ba2..0717abd47821 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -122,6 +122,21 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) ast = transformToSubcolumn(name_in_storage, subcolumn_name); ast->setAlias(alias); } + else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) + { + const auto * literal = arguments[1]->as(); + if (!literal) + return; + + String subcolumn_name; + auto value_type = literal->value.getType(); + if (value_type != Field::Types::String) + return; + + subcolumn_name = literal->value.get(); + ast = transformToSubcolumn(name_in_storage, subcolumn_name); + ast->setAlias(alias); + } else { auto it = binary_function_to_subcolumn.find(function.name); diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index d2f9fe8b325e..df97a09f6863 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -265,7 +265,7 @@ class NamedSessionsStorage ThreadFromGlobalPool thread; bool quit = false; - Poco::Logger * log = &Poco::Logger::get("NamedSessionsStorage"); + LoggerPtr log = getLogger("NamedSessionsStorage"); }; @@ -282,7 +282,7 @@ void Session::shutdownNamedSessions() Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure, const std::string & certificate) : auth_id(UUIDHelpers::generateV4()), global_context(global_context_), - log(&Poco::Logger::get(String{magic_enum::enum_name(interface_)} + "-Session")) + log(getLogger(String{magic_enum::enum_name(interface_)} + "-Session")) { prepared_client_info.emplace(); prepared_client_info->interface = interface_; @@ -349,10 +349,9 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So try { - auto auth_result = global_context->getAccessControl().authenticate(credentials_, address.host()); + auto auth_result = global_context->getAccessControl().authenticate(credentials_, address.host(), getClientInfo().getLastForwardedFor()); user_id = auth_result.user_id; settings_from_auth_server = auth_result.settings; - LOG_DEBUG(log, "{} Authenticated with global context as user {}", toString(auth_id), toString(*user_id)); } diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 75e1414b8cba..cde000d89fa8 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -123,7 +123,7 @@ class Session /// to set when creating a session context SettingsChanges settings_from_auth_server; - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; }; } diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 7136b090c42f..5a65d40d89f2 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -33,7 +33,7 @@ class Set /// store all set elements in explicit form. /// This is needed for subsequent use for index. Set(const SizeLimits & limits_, size_t max_elements_to_fill_, bool transform_null_in_) - : log(&Poco::Logger::get("Set")), + : log(getLogger("Set")), limits(limits_), max_elements_to_fill(max_elements_to_fill_), transform_null_in(transform_null_in_), cast_cache(std::make_unique()) {} @@ -114,7 +114,7 @@ class Set /// Types for set_elements. DataTypes set_elements_types; - Poco::Logger * log; + LoggerPtr log; /// Limitations on the maximum size of the set SizeLimits limits; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 954368db3127..2fb782befa16 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -125,13 +125,13 @@ std::shared_ptr createSystemLog( { if (!config.has(config_prefix)) { - LOG_DEBUG(&Poco::Logger::get("SystemLog"), + LOG_DEBUG(getLogger("SystemLog"), "Not creating {}.{} since corresponding section '{}' is missing from config", default_database_name, default_table_name, config_prefix); return {}; } - LOG_DEBUG(&Poco::Logger::get("SystemLog"), + LOG_DEBUG(getLogger("SystemLog"), "Creating {}.{} from {}", default_database_name, default_table_name, config_prefix); SystemLogSettings log_settings; @@ -143,7 +143,7 @@ std::shared_ptr createSystemLog( { /// System tables must be loaded before other tables, but loading order is undefined for all databases except `system` LOG_ERROR( - &Poco::Logger::get("SystemLog"), + getLogger("SystemLog"), "Custom database name for a system table specified in config." " Table `{}` will be created in `system` database instead of `{}`", log_settings.queue_settings.table, @@ -395,7 +395,7 @@ SystemLog::SystemLog( std::shared_ptr> queue_) : Base(settings_.queue_settings, queue_) , WithContext(context_) - , log(&Poco::Logger::get("SystemLog (" + settings_.queue_settings.database + "." + settings_.queue_settings.table + ")")) + , log(getLogger("SystemLog (" + settings_.queue_settings.database + "." + settings_.queue_settings.table + ")")) , table_id(settings_.queue_settings.database, settings_.queue_settings.table) , storage_def(settings_.engine) , create_query(serializeAST(*getCreateTableQuery())) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 8c357e43be98..c296b91e24a7 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -131,7 +131,7 @@ class SystemLog : public SystemLogBase, private boost::noncopyable, void stopFlushThread() override; protected: - Poco::Logger * log; + LoggerPtr log; using ISystemLog::is_shutdown; using ISystemLog::saving_thread; diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index efe3fd7f7403..e9fa224df111 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -683,7 +683,7 @@ void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const Rig if (!left_type_map.empty() || !right_type_map.empty()) { LOG_TRACE( - &Poco::Logger::get("TableJoin"), + getLogger("TableJoin"), "Infer supertype for joined columns. Left: [{}], Right: [{}]", formatTypeMap(left_type_map, left_types), formatTypeMap(right_type_map, right_types)); @@ -876,7 +876,7 @@ static void addJoinConditionWithAnd(ASTPtr & current_cond, const ASTPtr & new_co void TableJoin::addJoinCondition(const ASTPtr & ast, bool is_left) { auto & cond_ast = is_left ? clauses.back().on_filter_condition_left : clauses.back().on_filter_condition_right; - LOG_TRACE(&Poco::Logger::get("TableJoin"), "Adding join condition for {} table: {} -> {}", + LOG_TRACE(getLogger("TableJoin"), "Adding join condition for {} table: {} -> {}", (is_left ? "left" : "right"), ast ? queryToString(ast) : "NULL", cond_ast ? queryToString(cond_ast) : "NULL"); addJoinConditionWithAnd(cond_ast, ast); } diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 7bf5e86eb415..a48e7d8e040a 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -222,7 +223,7 @@ struct TemporaryFileStream::InputReader , in_compressed_buf(in_file_buf) , in_reader(in_compressed_buf, header_, DBMS_TCP_PROTOCOL_VERSION) { - LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Reading {} from {}", header_.dumpStructure(), path); + LOG_TEST(getLogger("TemporaryFileStream"), "Reading {} from {}", header_.dumpStructure(), path); } explicit InputReader(const String & path, size_t size = 0) @@ -230,7 +231,7 @@ struct TemporaryFileStream::InputReader , in_compressed_buf(in_file_buf) , in_reader(in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION) { - LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Reading from {}", path); + LOG_TEST(getLogger("TemporaryFileStream"), "Reading from {}", path); } Block read() @@ -249,7 +250,7 @@ TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const , file(std::move(file_)) , out_writer(std::make_unique(std::make_unique(file->getAbsolutePath()), header)) { - LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", file->getAbsolutePath()); + LOG_TEST(getLogger("TemporaryFileStream"), "Writing to temporary file {}", file->getAbsolutePath()); } TemporaryFileStream::TemporaryFileStream(FileSegmentsHolderPtr segments_, const Block & header_, TemporaryDataOnDisk * parent_) @@ -261,7 +262,7 @@ TemporaryFileStream::TemporaryFileStream(FileSegmentsHolderPtr segments_, const throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream can be created only from single segment"); auto out_buf = std::make_unique(&segment_holder->front()); - LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", out_buf->getFileName()); + LOG_TEST(getLogger("TemporaryFileStream"), "Writing to temporary file {}", out_buf->getFileName()); out_writer = std::make_unique(std::move(out_buf), header); } diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index 0c35cd63d13b..e57d9130369a 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -7,7 +7,6 @@ #include #include #include -#include namespace CurrentMetrics @@ -27,6 +26,8 @@ using TemporaryDataOnDiskPtr = std::unique_ptr; class TemporaryFileStream; using TemporaryFileStreamPtr = std::unique_ptr; +class FileCache; + /* * Used to account amount of temporary data written to disk. * If limit is set, throws exception if limit is exceeded. diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 30fbe26d0385..1fe11be60906 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -65,7 +65,7 @@ TraceCollector::~TraceCollector() if (thread.joinable()) thread.join(); else - LOG_ERROR(&Poco::Logger::get("TraceCollector"), "TraceCollector thread is malformed and cannot be joined"); + LOG_ERROR(getLogger("TraceCollector"), "TraceCollector thread is malformed and cannot be joined"); } diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index a86f6110a840..96c69536c9a5 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes extern const int UNKNOWN_STATUS_OF_TRANSACTION; } -static void tryWriteEventToSystemLog(Poco::Logger * log, ContextPtr context, +static void tryWriteEventToSystemLog(LoggerPtr log, ContextPtr context, TransactionsInfoLogElement::Type type, const TransactionID & tid, CSN csn = Tx::UnknownCSN) try { @@ -44,7 +44,7 @@ catch (...) TransactionLog::TransactionLog() : global_context(Context::getGlobalContextInstance()) - , log(&Poco::Logger::get("TransactionLog")) + , log(getLogger("TransactionLog")) , zookeeper_path(global_context->getConfigRef().getString("transaction_log.zookeeper_path", "/clickhouse/txn")) , zookeeper_path_log(zookeeper_path + "/log") , fault_probability_before_commit(global_context->getConfigRef().getDouble("transaction_log.fault_probability_before_commit", 0)) diff --git a/src/Interpreters/TransactionLog.h b/src/Interpreters/TransactionLog.h index 6e8777d85198..58847553dfda 100644 --- a/src/Interpreters/TransactionLog.h +++ b/src/Interpreters/TransactionLog.h @@ -154,7 +154,7 @@ class TransactionLog final : public SingletonHelper CSN getCSNImpl(const TIDHash & tid_hash, const std::atomic * failback_with_strict_load_csn = nullptr) const; const ContextPtr global_context; - Poco::Logger * const log; + LoggerPtr const log; /// The newest snapshot available for reading std::atomic latest_snapshot; diff --git a/src/Interpreters/TransactionVersionMetadata.cpp b/src/Interpreters/TransactionVersionMetadata.cpp index 01735a798b91..7bedca5d5c75 100644 --- a/src/Interpreters/TransactionVersionMetadata.cpp +++ b/src/Interpreters/TransactionVersionMetadata.cpp @@ -23,7 +23,7 @@ namespace ErrorCodes VersionMetadata::VersionMetadata() { /// It would be better to make it static, but static loggers do not work for some reason (initialization order?) - log = &Poco::Logger::get("VersionMetadata"); + log = getLogger("VersionMetadata"); } /// It can be used for introspection purposes only diff --git a/src/Interpreters/TransactionVersionMetadata.h b/src/Interpreters/TransactionVersionMetadata.h index 18ac445cc29d..4309975d195b 100644 --- a/src/Interpreters/TransactionVersionMetadata.h +++ b/src/Interpreters/TransactionVersionMetadata.h @@ -72,7 +72,7 @@ struct VersionMetadata String toString(bool one_line = true) const; - Poco::Logger * log; + LoggerPtr log; VersionMetadata(); }; diff --git a/src/Interpreters/TransactionsInfoLog.cpp b/src/Interpreters/TransactionsInfoLog.cpp index e893be814ca9..4a413439671e 100644 --- a/src/Interpreters/TransactionsInfoLog.cpp +++ b/src/Interpreters/TransactionsInfoLog.cpp @@ -92,7 +92,7 @@ void TransactionsInfoLogElement::appendToBlock(MutableColumns & columns) const } -void tryWriteEventToSystemLog(Poco::Logger * log, +void tryWriteEventToSystemLog(LoggerPtr log, TransactionsInfoLogElement::Type type, const TransactionID & tid, const TransactionInfoContext & context) try diff --git a/src/Interpreters/TransactionsInfoLog.h b/src/Interpreters/TransactionsInfoLog.h index 0a607704e741..009d1b67474a 100644 --- a/src/Interpreters/TransactionsInfoLog.h +++ b/src/Interpreters/TransactionsInfoLog.h @@ -54,7 +54,7 @@ class TransactionsInfoLog : public SystemLog }; -void tryWriteEventToSystemLog(Poco::Logger * log, TransactionsInfoLogElement::Type type, +void tryWriteEventToSystemLog(LoggerPtr log, TransactionsInfoLogElement::Type type, const TransactionID & tid, const TransactionInfoContext & context); } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 6ed3ff2f1e6e..ecd021328e78 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -642,13 +642,13 @@ bool tryJoinOnConst(TableJoin & analyzed_join, const ASTPtr & on_expression, Con if (eval_const_res.value()) { /// JOIN ON 1 == 1 - LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as cross join"); + LOG_DEBUG(getLogger("TreeRewriter"), "Join on constant executed as cross join"); analyzed_join.resetToCross(); } else { /// JOIN ON 1 != 1 - LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as empty join"); + LOG_DEBUG(getLogger("TreeRewriter"), "Join on constant executed as empty join"); analyzed_join.resetKeys(); } return true; diff --git a/src/Interpreters/applyFunction.cpp b/src/Interpreters/applyFunction.cpp new file mode 100644 index 000000000000..a53f14f0381f --- /dev/null +++ b/src/Interpreters/applyFunction.cpp @@ -0,0 +1,43 @@ +#include + +#include +#include + +namespace DB +{ + +static Field applyFunctionForField(const FunctionBasePtr & func, const DataTypePtr & arg_type, const Field & arg_value) +{ + ColumnsWithTypeAndName columns{ + {arg_type->createColumnConst(1, arg_value), arg_type, "x"}, + }; + + auto col = func->execute(columns, func->getResultType(), 1); + return (*col)[0]; +} + +FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) +{ + /// Fallback for fields without block reference. + if (field.isExplicit()) + return applyFunctionForField(func, current_type, field); + + String result_name = "_" + func->getName() + "_" + toString(field.column_idx); + const auto & columns = field.columns; + size_t result_idx = columns->size(); + + for (size_t i = 0; i < result_idx; ++i) + if ((*columns)[i].name == result_name) + result_idx = i; + + if (result_idx == columns->size()) + { + ColumnsWithTypeAndName args{(*columns)[field.column_idx]}; + field.columns->emplace_back(ColumnWithTypeAndName{nullptr, func->getResultType(), result_name}); + (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size()); + } + + return {field.columns, field.row_idx, result_idx}; +} + +} diff --git a/src/Interpreters/applyFunction.h b/src/Interpreters/applyFunction.h new file mode 100644 index 000000000000..9b8ae43a53ca --- /dev/null +++ b/src/Interpreters/applyFunction.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +namespace DB +{ +struct FieldRef; + +class IFunctionBase; +class IDataType; + +using DataTypePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; + +FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field); +} diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 6b6054fdae3e..6122ec6180a3 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -221,7 +221,7 @@ class DDLQueryStatusSource final : public ISource String node_path; ContextPtr context; Stopwatch watch; - Poco::Logger * log; + LoggerPtr log; NameSet waiting_hosts; /// hosts from task host list NameSet finished_hosts; /// finished hosts from host list @@ -309,7 +309,7 @@ DDLQueryStatusSource::DDLQueryStatusSource( , node_path(zk_node_path) , context(context_) , watch(CLOCK_MONOTONIC_COARSE) - , log(&Poco::Logger::get("DDLQueryStatusSource")) + , log(getLogger("DDLQueryStatusSource")) { auto output_mode = context->getSettingsRef().distributed_ddl_output_mode; throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE @@ -382,7 +382,7 @@ Chunk DDLQueryStatusSource::generateChunkWithUnfinishedHosts() const return Chunk(std::move(columns), unfinished_hosts.size()); } -static NameSet getOfflineHosts(const String & node_path, const NameSet & hosts_to_wait, const ZooKeeperPtr & zookeeper, Poco::Logger * log) +static NameSet getOfflineHosts(const String & node_path, const NameSet & hosts_to_wait, const ZooKeeperPtr & zookeeper, LoggerPtr log) { fs::path replicas_path; if (node_path.ends_with('/')) @@ -470,7 +470,7 @@ Chunk DDLQueryStatusSource::generate() { auto retries_ctl = ZooKeeperRetriesControl( - "executeDDLQueryOnCluster", &Poco::Logger::get("DDLQueryStatusSource"), getRetriesInfo(), context->getProcessListElement()); + "executeDDLQueryOnCluster", getLogger("DDLQueryStatusSource"), getRetriesInfo(), context->getProcessListElement()); retries_ctl.retryLoop([&]() { auto zookeeper = context->getZooKeeper(); @@ -540,7 +540,7 @@ Chunk DDLQueryStatusSource::generate() auto retries_ctl = ZooKeeperRetriesControl( "executeDDLQueryOnCluster", - &Poco::Logger::get("DDLQueryStatusSource"), + getLogger("DDLQueryStatusSource"), getRetriesInfo(), context->getProcessListElement()); retries_ctl.retryLoop([&]() diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h index 7daf9babf9f4..d33655538757 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.h +++ b/src/Interpreters/executeDDLQueryOnCluster.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace zkutil diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 533d58aaa8f2..1787f627c2e5 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -102,6 +102,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int QUERY_WAS_CANCELLED; extern const int INCORRECT_DATA; + extern const int SUPPORT_IS_DISABLED; } @@ -119,7 +120,7 @@ static void logQuery(const String & query, ContextPtr context, bool internal, Qu { if (internal) { - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(internal) {} (stage: {})", toOneLineQuery(query), QueryProcessingStage::toString(stage)); + LOG_DEBUG(getLogger("executeQuery"), "(internal) {} (stage: {})", toOneLineQuery(query), QueryProcessingStage::toString(stage)); } else { @@ -142,7 +143,7 @@ static void logQuery(const String & query, ContextPtr context, bool internal, Qu if (auto txn = context->getCurrentTransaction()) transaction_info = fmt::format(" (TID: {}, TIDH: {})", txn->tid, txn->tid.getHash()); - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}){}{} {} (stage: {})", + LOG_DEBUG(getLogger("executeQuery"), "(from {}{}{}){}{} {} (stage: {})", client_info.current_address.toString(), (current_user != "default" ? ", user: " + current_user : ""), (!initial_query_id.empty() && current_query_id != initial_query_id ? ", initial_query_id: " + initial_query_id : std::string()), @@ -153,7 +154,7 @@ static void logQuery(const String & query, ContextPtr context, bool internal, Qu if (client_info.client_trace_context.trace_id != UUID()) { - LOG_TRACE(&Poco::Logger::get("executeQuery"), + LOG_TRACE(getLogger("executeQuery"), "OpenTelemetry traceparent '{}'", client_info.client_trace_context.composeTraceparentHeader()); } @@ -207,9 +208,9 @@ static void logException(ContextPtr context, QueryLogElement & elem, bool log_er elem.stack_trace); if (log_error) - LOG_ERROR(&Poco::Logger::get("executeQuery"), message); + LOG_ERROR(getLogger("executeQuery"), message); else - LOG_INFO(&Poco::Logger::get("executeQuery"), message); + LOG_INFO(getLogger("executeQuery"), message); } static void @@ -396,7 +397,7 @@ void logQueryFinish( double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; double rows_per_second = static_cast(elem.read_rows) / elapsed_seconds; LOG_DEBUG( - &Poco::Logger::get("executeQuery"), + getLogger("executeQuery"), "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", elem.read_rows, ReadableSize(elem.read_bytes), @@ -660,7 +661,7 @@ static std::tuple executeQueryImpl( /// we still have enough span logs for the execution of external queries. std::shared_ptr query_span = internal ? nullptr : std::make_shared("query"); if (query_span && query_span->trace_id != UUID{}) - LOG_TRACE(&Poco::Logger::get("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id); + LOG_TRACE(getLogger("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id); auto query_start_time = std::chrono::system_clock::now(); @@ -709,10 +710,7 @@ static std::tuple executeQueryImpl( { if (settings.dialect == Dialect::kusto && !internal) { - ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); - - /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseKQLQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Kusto dialect is disabled until these two bugs will be fixed: https://github.com/ClickHouse/ClickHouse/issues/59037 and https://github.com/ClickHouse/ClickHouse/issues/59036"); } else if (settings.dialect == Dialect::prql && !internal) { @@ -925,7 +923,7 @@ static std::tuple executeQueryImpl( bool async_insert = false; auto * queue = context->getAsynchronousInsertQueue(); - auto * logger = &Poco::Logger::get("executeQuery"); + auto logger = getLogger("executeQuery"); if (insert_query && async_insert_enabled) { @@ -1131,7 +1129,7 @@ static std::tuple executeQueryImpl( const size_t num_query_runs = query_cache->recordQueryRun(key); if (num_query_runs <= settings.query_cache_min_query_runs) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), + LOG_TRACE(getLogger("QueryCache"), "Skipped insert because the query ran {} times but the minimum required number of query runs to cache the query result is {}", num_query_runs, settings.query_cache_min_query_runs); } @@ -1387,7 +1385,7 @@ void executeQuery( catch (const DB::Exception & e) { /// Ignore this exception and report the original one - LOG_WARNING(&Poco::Logger::get("executeQuery"), getExceptionMessageAndPattern(e, true)); + LOG_WARNING(getLogger("executeQuery"), getExceptionMessageAndPattern(e, true)); } } }; diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index c7a1cab8bac1..fd8f5b154c43 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -237,17 +237,36 @@ static std::unordered_map collectOffsetsColumns( { auto & offsets_column = offsets_columns[stream_name]; if (!offsets_column) + { offsets_column = current_offsets_column; - - #ifndef NDEBUG - const auto & offsets_data = assert_cast(*offsets_column).getData(); - const auto & current_offsets_data = assert_cast(*current_offsets_column).getData(); - - if (offsets_data != current_offsets_data) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Found non-equal columns with offsets (sizes: {} and {}) for stream {}", - offsets_data.size(), current_offsets_data.size(), stream_name); - #endif + } + else + { + /// If we are inside Variant element, it may happen that + /// offsets are different, because when we read Variant + /// element as a subcolumn, we expand this column according + /// to the discriminators, so, offsets column can be changed. + /// In this case we should select the original offsets column + /// of this stream, which is the smallest one. + bool inside_variant_element = false; + for (const auto & elem : subpath) + inside_variant_element |= elem.type == ISerialization::Substream::VariantElement; + + if (offsets_column->size() != current_offsets_column->size() && inside_variant_element) + offsets_column = offsets_column->size() < current_offsets_column->size() ? offsets_column : current_offsets_column; +#ifndef NDEBUG + else + { + const auto & offsets_data = assert_cast(*offsets_column).getData(); + const auto & current_offsets_data = assert_cast(*current_offsets_column).getData(); + + if (offsets_data != current_offsets_data) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Found non-equal columns with offsets (sizes: {} and {}) for stream {}", + offsets_data.size(), current_offsets_data.size(), stream_name); + } +#endif + } } }, available_column->type, res_columns[i]); } diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index a2d2c56c710f..0b7a6dc92b07 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -156,7 +156,7 @@ static void checkIncompleteOrdinaryToAtomicConversion(ContextPtr context, const LoadTaskPtrs loadMetadata(ContextMutablePtr context, const String & default_database_name, bool async_load_databases) { - Poco::Logger * log = &Poco::Logger::get("loadMetadata"); + LoggerPtr log = getLogger("loadMetadata"); String path = context->getPath() + "metadata"; @@ -290,7 +290,7 @@ static void loadSystemDatabaseImpl(ContextMutablePtr context, const String & dat } } -static void convertOrdinaryDatabaseToAtomic(Poco::Logger * log, ContextMutablePtr context, const DatabasePtr & database, +static void convertOrdinaryDatabaseToAtomic(LoggerPtr log, ContextMutablePtr context, const DatabasePtr & database, const String & name, const String tmp_name) { /// It's kind of C++ script that creates temporary database with Atomic engine, @@ -369,7 +369,7 @@ static void convertOrdinaryDatabaseToAtomic(Poco::Logger * log, ContextMutablePt /// Can be called only during server startup when there are no queries from users. static void maybeConvertOrdinaryDatabaseToAtomic(ContextMutablePtr context, const String & database_name, LoadTaskPtrs * startup_tasks = nullptr) { - Poco::Logger * log = &Poco::Logger::get("loadMetadata"); + LoggerPtr log = getLogger("loadMetadata"); auto database = DatabaseCatalog::instance().getDatabase(database_name); if (!database) @@ -482,7 +482,7 @@ void convertDatabasesEnginesIfNeed(const LoadTaskPtrs & load_metadata, ContextMu if (!fs::exists(convert_flag_path)) return; - LOG_INFO(&Poco::Logger::get("loadMetadata"), "Found convert_ordinary_to_atomic file in flags directory, " + LOG_INFO(getLogger("loadMetadata"), "Found convert_ordinary_to_atomic file in flags directory, " "will try to convert all Ordinary databases to Atomic"); // Wait for all table to be loaded and started @@ -492,7 +492,7 @@ void convertDatabasesEnginesIfNeed(const LoadTaskPtrs & load_metadata, ContextMu if (name != DatabaseCatalog::SYSTEM_DATABASE) maybeConvertOrdinaryDatabaseToAtomic(context, name); - LOG_INFO(&Poco::Logger::get("loadMetadata"), "Conversion finished, removing convert_ordinary_to_atomic flag"); + LOG_INFO(getLogger("loadMetadata"), "Conversion finished, removing convert_ordinary_to_atomic flag"); fs::remove(convert_flag_path); } diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 87f76f7f824a..551a883d093b 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -60,6 +60,17 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS); } } + + if (!settings.allow_experimental_variant_type) + { + if (isVariant(type)) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because experimental Variant type is not allowed. " + "Set setting allow_experimental_variant_type = 1 in order to allow it", type->getName()); + } + } } ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context) diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index ef1bbe5498eb..1fbbfa4b12f7 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -18,12 +18,14 @@ struct DataTypeValidationSettings : allow_suspicious_low_cardinality_types(settings.allow_suspicious_low_cardinality_types) , allow_experimental_object_type(settings.allow_experimental_object_type) , allow_suspicious_fixed_string_types(settings.allow_suspicious_fixed_string_types) + , allow_experimental_variant_type(settings.allow_experimental_variant_type) { } bool allow_suspicious_low_cardinality_types = true; bool allow_experimental_object_type = true; bool allow_suspicious_fixed_string_types = true; + bool allow_experimental_variant_type = true; }; void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings); diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp index f8df03ed8306..44167fe72424 100644 --- a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp @@ -52,7 +52,7 @@ ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, c && context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE))) { - LOG_DEBUG(&Poco::Logger::get("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID()); + LOG_DEBUG(getLogger("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID()); return query_on_cluster->getRewrittenASTWithoutOnCluster(params); } diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 258853e8162a..1d17585cc96c 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -159,7 +159,6 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log if (config.getBool("logger.use_syslog", false)) { - //const std::string & cmd_name = commandName(); auto syslog_level = Poco::Logger::parseLevel(config.getString("logger.syslog_level", log_level_string)); if (syslog_level > max_log_level) { @@ -228,22 +227,24 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log split->open(); logger.close(); - logger.setChannel(split); - // Global logging level (it can be overridden for specific loggers). + logger.setChannel(split); logger.setLevel(max_log_level); - // Set level to all already created loggers - std::vector names; - //logger_root = Logger::root(); - logger.root().names(names); - for (const auto & name : names) - logger.root().get(name).setLevel(max_log_level); - - // Attach to the root logger. + // Global logging level and channel (it can be overridden for specific loggers). logger.root().setLevel(max_log_level); logger.root().setChannel(logger.getChannel()); + // Set level and channel to all already created loggers + std::vector names; + logger.names(names); + + for (const auto & name : names) + { + logger.get(name).setLevel(max_log_level); + logger.get(name).setChannel(split); + } + // Explicitly specified log levels for specific loggers. { Poco::Util::AbstractConfiguration::Keys loggers_level; diff --git a/src/Parsers/ASTBackupQuery.cpp b/src/Parsers/ASTBackupQuery.cpp index 2c26e7236872..bdb78eaf9714 100644 --- a/src/Parsers/ASTBackupQuery.cpp +++ b/src/Parsers/ASTBackupQuery.cpp @@ -261,23 +261,24 @@ ASTPtr ASTBackupQuery::clone() const if (settings) res->settings = settings->clone(); + cloneOutputOptions(*res); + return res; } -void ASTBackupQuery::formatImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const +void ASTBackupQuery::formatQueryImpl(const FormatSettings & fs, FormatState &, FormatStateStacked) const { - format.ostr << (format.hilite ? hilite_keyword : "") << ((kind == Kind::BACKUP) ? "BACKUP " : "RESTORE ") - << (format.hilite ? hilite_none : ""); + fs.ostr << (fs.hilite ? hilite_keyword : "") << ((kind == Kind::BACKUP) ? "BACKUP " : "RESTORE ") << (fs.hilite ? hilite_none : ""); - formatElements(elements, format); - formatOnCluster(format); + formatElements(elements, fs); + formatOnCluster(fs); - format.ostr << (format.hilite ? hilite_keyword : "") << ((kind == Kind::BACKUP) ? " TO " : " FROM ") << (format.hilite ? hilite_none : ""); - backup_name->format(format); + fs.ostr << (fs.hilite ? hilite_keyword : "") << ((kind == Kind::BACKUP) ? " TO " : " FROM ") << (fs.hilite ? hilite_none : ""); + backup_name->format(fs); if (settings || base_backup_name) - formatSettings(settings, base_backup_name, cluster_host_ids, format); + formatSettings(settings, base_backup_name, cluster_host_ids, fs); } ASTPtr ASTBackupQuery::getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams & params) const diff --git a/src/Parsers/ASTBackupQuery.h b/src/Parsers/ASTBackupQuery.h index 0201c2b14f98..a56cdebc7b33 100644 --- a/src/Parsers/ASTBackupQuery.h +++ b/src/Parsers/ASTBackupQuery.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include @@ -40,7 +40,7 @@ class ASTFunction; * For the BACKUP command this clause allows to set the name which an object will have inside the backup. * And for the RESTORE command this clause allows to set the name which an object will have after RESTORE has finished. */ -class ASTBackupQuery : public IAST, public ASTQueryWithOnCluster +class ASTBackupQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster { public: enum Kind @@ -91,7 +91,7 @@ class ASTBackupQuery : public IAST, public ASTQueryWithOnCluster String getID(char) const override; ASTPtr clone() const override; - void formatImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override; + void formatQueryImpl(const FormatSettings & fs, FormatState &, FormatStateStacked) const override; ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override; QueryKind getQueryKind() const override; diff --git a/src/Parsers/DumpASTNode.h b/src/Parsers/DumpASTNode.h index 60fcece55904..5efc0e018f47 100644 --- a/src/Parsers/DumpASTNode.h +++ b/src/Parsers/DumpASTNode.h @@ -165,7 +165,7 @@ class DebugASTLog : log(nullptr) { if constexpr (_enable) - log = &Poco::Logger::get("AST"); + log = getLogger("AST"); } ~DebugASTLog() @@ -177,7 +177,7 @@ class DebugASTLog WriteBuffer * stream() { return (_enable ? &buf : nullptr); } private: - Poco::Logger * log; + LoggerPtr log; WriteBufferFromOwnString buf; }; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index eeb76e3bb9ea..42400a0f13b1 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -250,7 +250,7 @@ bool ParserTableAsStringLiteralIdentifier::parseImpl(Pos & pos, ASTPtr & node, E ReadBufferFromMemory in(pos->begin, pos->size()); String s; - if (!tryReadQuotedStringInto(s, in)) + if (!tryReadQuotedString(s, in)) { expected.add(pos, "string literal"); return false; diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index 3e2a6facac6c..b75f17dca721 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -116,6 +116,18 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!type_name_suffix.empty()) type_name = type_name_upper + " " + type_name_suffix; + /// skip trailing comma in types, e.g. Tuple(Int, String,) + if (pos->type == TokenType::Comma) + { + Expected test_expected; + auto test_pos = pos; + ++test_pos; + if (ParserToken(TokenType::ClosingRoundBracket).ignore(test_pos, test_expected)) + { // the end of the type definition was reached and there was a trailing comma + ++pos; + } + } + auto function_node = std::make_shared(); function_node->name = type_name; function_node->no_empty_args = true; @@ -133,6 +145,9 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!args_parser.parse(pos, expr_list_args, expected)) return false; + if (pos->type == TokenType::Comma) + // ignore trailing comma inside Nested structures like Tuple(Int, Tuple(Int, String),) + ++pos; if (pos->type != TokenType::ClosingRoundBracket) return false; ++pos; diff --git a/src/Parsers/ParserQuery.cpp b/src/Parsers/ParserQuery.cpp index 7ed69940bedb..22ddc25019f9 100644 --- a/src/Parsers/ParserQuery.cpp +++ b/src/Parsers/ParserQuery.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -61,7 +60,6 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserExternalDDLQuery external_ddl_p; ParserTransactionControl transaction_control_p; ParserDeleteQuery delete_p; - ParserBackupQuery backup_p; bool res = query_with_output_p.parse(pos, node, expected) || insert_p.parse(pos, node, expected) @@ -86,8 +84,7 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || grant_p.parse(pos, node, expected) || external_ddl_p.parse(pos, node, expected) || transaction_control_p.parse(pos, node, expected) - || delete_p.parse(pos, node, expected) - || backup_p.parse(pos, node, expected); + || delete_p.parse(pos, node, expected); return res; } diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index f03df6cacfe0..7a627ae5f6ae 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -65,6 +66,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserShowGrantsQuery show_grants_p; ParserShowPrivilegesQuery show_privileges_p; ParserExplainQuery explain_p(end, allow_settings_after_format_in_insert); + ParserBackupQuery backup_p; ASTPtr query; @@ -94,7 +96,8 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec || show_access_p.parse(pos, query, expected) || show_access_entities_p.parse(pos, query, expected) || show_grants_p.parse(pos, query, expected) - || show_privileges_p.parse(pos, query, expected); + || show_privileges_p.parse(pos, query, expected) + || backup_p.parse(pos, query, expected); if (!parsed) return false; diff --git a/src/Parsers/queryToString.cpp b/src/Parsers/queryToString.cpp index 9721aa1f1289..4a1903393f6f 100644 --- a/src/Parsers/queryToString.cpp +++ b/src/Parsers/queryToString.cpp @@ -3,6 +3,11 @@ namespace DB { + String queryToStringNullable(const ASTPtr & query) + { + return query ? queryToString(query) : ""; + } + String queryToString(const ASTPtr & query) { return queryToString(*query); diff --git a/src/Parsers/queryToString.h b/src/Parsers/queryToString.h index 873de218293e..3acd560b1e28 100644 --- a/src/Parsers/queryToString.h +++ b/src/Parsers/queryToString.h @@ -6,4 +6,5 @@ namespace DB { String queryToString(const ASTPtr & query); String queryToString(const IAST & query); + String queryToStringNullable(const ASTPtr & query); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index f2def5713257..efccadcbe1a1 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -215,7 +215,7 @@ void collectFiltersForAnalysis(const QueryTreeNodePtr & query_tree, const Planne if (!read_from_dummy) continue; - auto filter_actions = ActionsDAG::buildFilterActionsDAG(read_from_dummy->getFilterNodes().nodes, {}, query_context); + auto filter_actions = ActionsDAG::buildFilterActionsDAG(read_from_dummy->getFilterNodes().nodes); auto & table_expression_data = dummy_storage_to_table_expression_data.at(&read_from_dummy->getStorage()); table_expression_data->setFilterActions(std::move(filter_actions)); } @@ -1196,7 +1196,7 @@ void Planner::buildQueryPlanIfNeeded() if (query_plan.isInitialized()) return; - LOG_TRACE(&Poco::Logger::get("Planner"), "Query {} to stage {}{}", + LOG_TRACE(getLogger("Planner"), "Query {} to stage {}{}", query_tree->formatConvertedASTForErrorMessage(), QueryProcessingStage::toString(select_query_options.to_stage), select_query_options.only_analyze ? " only analyze" : ""); @@ -1355,7 +1355,7 @@ void Planner::buildPlanForQueryNode() auto & mutable_context = planner_context->getMutableQueryContext(); mutable_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - LOG_DEBUG(&Poco::Logger::get("Planner"), "Disabling parallel replicas to execute a query with IN with subquery"); + LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas to execute a query with IN with subquery"); } } @@ -1382,7 +1382,7 @@ void Planner::buildPlanForQueryNode() else { LOG_DEBUG( - &Poco::Logger::get("Planner"), + getLogger("Planner"), "FINAL modifier is not supported with parallel replicas. Query will be executed without using them."); auto & mutable_context = planner_context->getMutableQueryContext(); mutable_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); @@ -1401,7 +1401,7 @@ void Planner::buildPlanForQueryNode() else { LOG_DEBUG( - &Poco::Logger::get("Planner"), + getLogger("Planner"), "JOINs are not supported with parallel replicas. Query will be executed without using them."); auto & mutable_context = planner_context->getMutableQueryContext(); @@ -1422,7 +1422,7 @@ void Planner::buildPlanForQueryNode() query_plan = std::move(join_tree_query_plan.query_plan); used_row_policies = std::move(join_tree_query_plan.used_row_policies); - LOG_TRACE(&Poco::Logger::get("Planner"), "Query {} from stage {} to stage {}{}", + LOG_TRACE(getLogger("Planner"), "Query {} from stage {} to stage {}{}", query_tree->formatConvertedASTForErrorMessage(), QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(select_query_options.to_stage), diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 552f25d70358..ab25f6d24234 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -276,7 +276,7 @@ bool applyTrivialCountIfPossible( /// The query could use trivial count if it didn't use parallel replicas, so let's disable it query_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); query_context->setSetting("max_parallel_replicas", UInt64{0}); - LOG_TRACE(&Poco::Logger::get("Planner"), "Disabling parallel replicas to be able to use a trivial count optimization"); + LOG_TRACE(getLogger("Planner"), "Disabling parallel replicas to be able to use a trivial count optimization"); } @@ -478,7 +478,7 @@ FilterDAGInfo buildCustomKeyFilterIfNeeded(const StoragePtr & storage, "(setting 'max_parallel_replcias'), but the table does not have custom_key defined for it " " or it's invalid (setting 'parallel_replicas_custom_key')"); - LOG_TRACE(&Poco::Logger::get("Planner"), "Processing query on a replica using custom_key '{}'", settings.parallel_replicas_custom_key.value); + LOG_TRACE(getLogger("Planner"), "Processing query on a replica using custom_key '{}'", settings.parallel_replicas_custom_key.value); auto parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica( settings.parallel_replicas_count, @@ -725,7 +725,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; LOG_TRACE( - &Poco::Logger::get("Planner"), + getLogger("Planner"), "Estimated {} rows to read. It is enough work for {} parallel replicas", rows_to_read, number_of_replicas_to_use); @@ -735,12 +735,12 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres planner_context->getMutableQueryContext()->setSetting( "allow_experimental_parallel_reading_from_replicas", Field(0)); planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{0}); - LOG_DEBUG(&Poco::Logger::get("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); + LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); } else if (number_of_replicas_to_use < settings.max_parallel_replicas) { planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); - LOG_DEBUG(&Poco::Logger::get("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); + LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); } } diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 9b249d21a247..94ee249106a5 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -388,8 +388,8 @@ JoinClausesAndActions buildJoinClausesAndActions(//const ColumnsWithTypeAndName ActionsDAGPtr left_join_actions = std::make_shared(left_table_expression_columns); ActionsDAGPtr right_join_actions = std::make_shared(right_table_expression_columns); - // LOG_TRACE(&Poco::Logger::get("Planner"), "buildJoinClausesAndActions cols {} ", left_join_actions->dumpDAG()); - // LOG_TRACE(&Poco::Logger::get("Planner"), "buildJoinClausesAndActions cols {} ", right_join_actions->dumpDAG()); + // LOG_TRACE(getLogger("Planner"), "buildJoinClausesAndActions cols {} ", left_join_actions->dumpDAG()); + // LOG_TRACE(getLogger("Planner"), "buildJoinClausesAndActions cols {} ", right_join_actions->dumpDAG()); /** In ActionsDAG if input node has constant representation additional constant column is added. * That way we cannot simply check that node has INPUT type during resolution of expression join table side. @@ -411,8 +411,8 @@ JoinClausesAndActions buildJoinClausesAndActions(//const ColumnsWithTypeAndName * ON (t1.id = t2.id) AND 1 != 1 AND (t1.value >= t1.value); */ auto join_expression = join_node.getJoinExpression(); - // LOG_TRACE(&Poco::Logger::get("Planner"), "buildJoinClausesAndActions expr {} ", join_expression->formatConvertedASTForErrorMessage()); - // LOG_TRACE(&Poco::Logger::get("Planner"), "buildJoinClausesAndActions expr {} ", join_expression->dumpTree()); + // LOG_TRACE(getLogger("Planner"), "buildJoinClausesAndActions expr {} ", join_expression->formatConvertedASTForErrorMessage()); + // LOG_TRACE(getLogger("Planner"), "buildJoinClausesAndActions expr {} ", join_expression->dumpTree()); auto * constant_join_expression = join_expression->as(); diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index dee12dad2829..862a460f0ed2 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -83,7 +83,7 @@ class PipelineExecutor std::atomic_bool cancelled = false; std::atomic_bool cancelled_reading = false; - Poco::Logger * log = &Poco::Logger::get("PipelineExecutor"); + LoggerPtr log = getLogger("PipelineExecutor"); /// Now it's used to check if query was killed. QueryStatusPtr process_list_element; diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 5f27fa78c55d..8c317a34a9d5 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -230,7 +230,7 @@ Chunk IRowInputFormat::read() { if (num_errors && (params.allow_errors_num > 0 || params.allow_errors_ratio > 0)) { - Poco::Logger * log = &Poco::Logger::get("IRowInputFormat"); + LoggerPtr log = getLogger("IRowInputFormat"); LOG_DEBUG(log, "Skipped {} rows with errors while reading the input stream", num_errors); } diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 26c632b83dc5..79b7ca17a5af 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -91,7 +91,7 @@ void IIRowSchemaReader::setContext(ContextPtr & context) } else { - LOG_WARNING(&Poco::Logger::get("IIRowSchemaReader"), "Couldn't parse schema inference hints: {}. This setting will be ignored", hints_parsing_error); + LOG_WARNING(getLogger("IIRowSchemaReader"), "Couldn't parse schema inference hints: {}. This setting will be ignored", hints_parsing_error); } } diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 5722c6600717..8dc8fa516dc0 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -984,7 +984,7 @@ class AvroConfluentRowInputFormat::SchemaRegistry try { Poco::URI url(base_url, base_url.getPath() + "/schemas/ids/" + std::to_string(id)); - LOG_TRACE((&Poco::Logger::get("AvroConfluentRowInputFormat")), "Fetching schema id = {} from url {}", id, url.toString()); + LOG_TRACE((getLogger("AvroConfluentRowInputFormat")), "Fetching schema id = {} from url {}", id, url.toString()); /// One second for connect/send/receive. Just in case. auto timeouts = ConnectionTimeouts() @@ -1029,7 +1029,7 @@ class AvroConfluentRowInputFormat::SchemaRegistry markSessionForReuse(session); auto schema = json_body->getValue("schema"); - LOG_TRACE((&Poco::Logger::get("AvroConfluentRowInputFormat")), "Successfully fetched schema id = {}\n{}", id, schema); + LOG_TRACE((getLogger("AvroConfluentRowInputFormat")), "Successfully fetched schema id = {}\n{}", id, schema); return avro::compileJsonSchemaFromString(schema); } catch (const Exception &) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 2c0a25243574..dd7d6c6b0242 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -392,7 +392,7 @@ bool CSVFormatReader::readFieldImpl(ReadBuffer & istr, DB::IColumn & column, con if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. - return SerializationNullable::deserializeTextCSVImpl(column, istr, format_settings, serialization); + return SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(column, istr, format_settings, serialization); } /// Read the column normally. diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index c4b3c8feb8cd..fe4d4e3be080 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB diff --git a/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp b/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp index 43ef25210325..b655e892d3b4 100644 --- a/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp @@ -198,7 +198,7 @@ void DWARFBlockInputFormat::initializeIfNeeded() if (elf.has_value()) return; - LOG_DEBUG(&Poco::Logger::get("DWARF"), "Opening ELF"); + LOG_DEBUG(getLogger("DWARF"), "Opening ELF"); initELF(); if (is_stopped) return; @@ -209,7 +209,7 @@ void DWARFBlockInputFormat::initializeIfNeeded() auto abbrev_section = elf->findSectionByName(".debug_abbrev"); if (!abbrev_section.has_value()) throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "No .debug_abbrev section"); - LOG_DEBUG(&Poco::Logger::get("DWARF"), ".debug_abbrev is {:.3f} MiB, .debug_info is {:.3f} MiB", abbrev_section->size() * 1. / (1 << 20), info_section->size() * 1. / (1 << 20)); + LOG_DEBUG(getLogger("DWARF"), ".debug_abbrev is {:.3f} MiB, .debug_info is {:.3f} MiB", abbrev_section->size() * 1. / (1 << 20), info_section->size() * 1. / (1 << 20)); /// (The StringRef points into Elf's mmap of the whole file, or into file_contents.) extractor.emplace(llvm::StringRef(info_section->begin(), info_section->size()), /*IsLittleEndian*/ true, /*AddressSize*/ 8); @@ -237,7 +237,7 @@ void DWARFBlockInputFormat::initializeIfNeeded() for (std::unique_ptr & unit : dwarf_context->info_section_units()) units_queue.emplace_back(unit.get()); - LOG_DEBUG(&Poco::Logger::get("DWARF"), "{} units, reading in {} threads", units_queue.size(), num_threads); + LOG_DEBUG(getLogger("DWARF"), "{} units, reading in {} threads", units_queue.size(), num_threads); pool.emplace(CurrentMetrics::DWARFReaderThreads, CurrentMetrics::DWARFReaderThreadsActive, CurrentMetrics::DWARFReaderThreadsScheduled, num_threads); for (size_t i = 0; i < num_threads; ++i) @@ -782,7 +782,7 @@ void DWARFBlockInputFormat::parseFilenameTable(UnitState & unit, uint64_t offset auto error = prologue.parse(*debug_line_extractor, &offset, /*RecoverableErrorHandler*/ [&](auto e) { if (++seen_debug_line_warnings < 10) - LOG_INFO(&Poco::Logger::get("DWARF"), "Parsing error: {}", llvm::toString(std::move(e))); + LOG_INFO(getLogger("DWARF"), "Parsing error: {}", llvm::toString(std::move(e))); }, *dwarf_context, unit.dwarf_unit); if (error) diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp index f78ce530ecb8..23faa0577158 100644 --- a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index 7e8b4accf4d7..9c7f095e6613 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -409,7 +409,7 @@ bool MySQLDumpRowInputFormat::readField(IColumn & column, size_t column_idx) const auto & type = types[column_idx]; const auto & serialization = serializations[column_idx]; if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) - return SerializationNullable::deserializeTextQuotedImpl(column, *in, format_settings, serialization); + return SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(column, *in, format_settings, serialization); serialization->deserializeTextQuoted(column, *in, format_settings); return true; diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h index c2f08479730e..50a736463594 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h @@ -84,7 +84,7 @@ class ParallelFormattingOutputFormat : public IOutputFormat , pool(CurrentMetrics::ParallelFormattingOutputFormatThreads, CurrentMetrics::ParallelFormattingOutputFormatThreadsActive, CurrentMetrics::ParallelFormattingOutputFormatThreadsScheduled, params.max_threads_for_parallel_formatting) { - LOG_TEST(&Poco::Logger::get("ParallelFormattingOutputFormat"), "Parallel formatting is being used"); + LOG_TEST(getLogger("ParallelFormattingOutputFormat"), "Parallel formatting is being used"); NullWriteBuffer buf; save_totals_and_extremes_in_statistics = internal_formatter_creator(buf)->areTotalsAndExtremesUsedInFinalize(); diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h index ff97afa83487..c4736ceea3af 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h @@ -111,7 +111,7 @@ class ParallelParsingInputFormat : public IInputFormat // bump into reader thread on wraparound. processing_units.resize(params.max_threads + 2); - LOG_TRACE(&Poco::Logger::get("ParallelParsingInputFormat"), "Parallel parsing is used"); + LOG_TRACE(getLogger("ParallelParsingInputFormat"), "Parallel parsing is used"); } ~ParallelParsingInputFormat() override diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 432e944a2460..29bc0012dc0a 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -147,7 +147,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex const auto & type = getPort().getHeader().getByPosition(index).type; const auto & serialization = serializations[index]; if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) - read_columns[index] = SerializationNullable::deserializeTextEscapedImpl(*columns[index], *in, format_settings, serialization); + read_columns[index] = SerializationNullable::deserializeNullAsDefaultOrNestedTextEscaped(*columns[index], *in, format_settings, serialization); else serialization->deserializeTextEscaped(*columns[index], *in, format_settings); } diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 6f6dae334e55..85b1797dab87 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -168,7 +168,7 @@ bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & t if (is_raw) { if (as_nullable) - return SerializationNullable::deserializeTextRawImpl(column, *buf, format_settings, serialization); + return SerializationNullable::deserializeNullAsDefaultOrNestedTextRaw(column, *buf, format_settings, serialization); serialization->deserializeTextRaw(column, *buf, format_settings); return true; @@ -176,7 +176,7 @@ bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & t if (as_nullable) - return SerializationNullable::deserializeTextEscapedImpl(column, *buf, format_settings, serialization); + return SerializationNullable::deserializeNullAsDefaultOrNestedTextEscaped(column, *buf, format_settings, serialization); serialization->deserializeTextEscaped(column, *buf, format_settings); return true; diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index 00a270e96111..32abd532a520 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index aa193ffd36a0..8659dcd23185 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -293,7 +293,7 @@ bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx) const auto & type = types[column_idx]; const auto & serialization = serializations[column_idx]; if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) - read = SerializationNullable::deserializeTextQuotedImpl(column, *buf, format_settings, serialization); + read = SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(column, *buf, format_settings, serialization); else serialization->deserializeTextQuoted(column, *buf, format_settings); } @@ -492,7 +492,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx &found_in_cache, delimiter); - LOG_TEST(&Poco::Logger::get("ValuesBlockInputFormat"), "Will use an expression template to parse column {}: {}", + LOG_TEST(getLogger("ValuesBlockInputFormat"), "Will use an expression template to parse column {}: {}", column_idx, structure->dumpTemplate()); templates[column_idx].emplace(structure); diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index 478ce41f9249..2ad6a825c8fb 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index 14325223602e..8948cee217ca 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -28,7 +28,7 @@ CollapsingSortedAlgorithm::CollapsingSortedAlgorithm( bool only_positive_sign_, size_t max_block_size_rows_, size_t max_block_size_bytes_, - Poco::Logger * log_, + LoggerPtr log_, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h index 28bb87cb394f..be1a3a3bf33d 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h @@ -34,7 +34,7 @@ class CollapsingSortedAlgorithm final : public IMergingAlgorithmWithSharedChunks bool only_positive_sign_, /// For select final. Skip rows with sum(sign) < 0. size_t max_block_size_rows_, size_t max_block_size_bytes_, - Poco::Logger * log_, + LoggerPtr log_, WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false); @@ -64,7 +64,7 @@ class CollapsingSortedAlgorithm final : public IMergingAlgorithmWithSharedChunks PODArray current_row_sources; /// Sources of rows with the current primary key size_t count_incorrect_data = 0; /// To prevent too many error messages from writing to the log. - Poco::Logger * log; + LoggerPtr log; void reportIncorrectData(); void insertRow(RowRef & row); diff --git a/src/Processors/Merges/Algorithms/RowRef.h b/src/Processors/Merges/Algorithms/RowRef.h index 81969cd19880..ee64224d44df 100644 --- a/src/Processors/Merges/Algorithms/RowRef.h +++ b/src/Processors/Merges/Algorithms/RowRef.h @@ -86,7 +86,7 @@ class SharedChunkAllocator { if (free_chunks.size() != chunks.size()) { - LOG_ERROR(&Poco::Logger::get("SharedChunkAllocator"), "SharedChunkAllocator was destroyed before RowRef was released. StackTrace: {}", StackTrace().toString()); + LOG_ERROR(getLogger("SharedChunkAllocator"), "SharedChunkAllocator was destroyed before RowRef was released. StackTrace: {}", StackTrace().toString()); return; } @@ -103,7 +103,7 @@ class SharedChunkAllocator /// This may happen if allocator was removed before chunks. /// Log message and exit, because we don't want to throw exception in destructor. - LOG_ERROR(&Poco::Logger::get("SharedChunkAllocator"), "SharedChunkAllocator was destroyed before RowRef was released. StackTrace: {}", StackTrace().toString()); + LOG_ERROR(getLogger("SharedChunkAllocator"), "SharedChunkAllocator was destroyed before RowRef was released. StackTrace: {}", StackTrace().toString()); return; } diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index b0cb6bc6d624..4479ac82f66d 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -29,7 +29,7 @@ class CollapsingSortedTransform final : public IMergingTransform 0) { - LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), "Pushed down filter {} to the {} side of join", split_filter_column_name, kind); + LOG_DEBUG(getLogger("QueryPlanOptimizations"), "Pushed down filter {} to the {} side of join", split_filter_column_name, kind); } return updated_steps; }; diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 5c5171d4296d..7902b36f80ec 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -164,7 +164,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) storage.getConditionEstimatorByPredicate(read_from_merge_tree->getQueryInfo(), storage_snapshot, context), queried_columns, storage.supportedPrewhereColumns(), - &Poco::Logger::get("QueryPlanOptimizePrewhere")}; + getLogger("QueryPlanOptimizePrewhere")}; auto optimize_result = where_optimizer.optimize(filter_step->getExpression(), filter_step->getFilterColumnName(), diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index c3e651154ae7..534716cc60e9 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -123,7 +123,7 @@ std::optional matchAggregateFunctions( if (it == projection_aggregate_functions.end()) { // LOG_TRACE( - // &Poco::Logger::get("optimizeUseProjections"), + // getLogger("optimizeUseProjections"), // "Cannot match agg func {} by name {}", // aggregate.column_name, aggregate.function->getName()); @@ -151,7 +151,7 @@ std::optional matchAggregateFunctions( /// not match. if (!candidate.function->getStateType()->equals(*aggregate.function->getStateType())) { - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Cannot match agg func {} vs {} by state {} vs {}", + // LOG_TRACE(getLogger("optimizeUseProjections"), "Cannot match agg func {} vs {} by state {} vs {}", // aggregate.column_name, candidate.column_name, // candidate.function->getStateType()->getName(), aggregate.function->getStateType()->getName()); continue; @@ -194,7 +194,7 @@ std::optional matchAggregateFunctions( if (mt == matches.end()) { // LOG_TRACE( - // &Poco::Logger::get("optimizeUseProjections"), + // getLogger("optimizeUseProjections"), // "Cannot match agg func {} vs {} : can't match arg {} vs {} : no node in map", // aggregate.column_name, candidate.column_name, query_name, proj_name); @@ -205,7 +205,7 @@ std::optional matchAggregateFunctions( if (node_match.node != proj_node || node_match.monotonicity) { // LOG_TRACE( - // &Poco::Logger::get("optimizeUseProjections"), + // getLogger("optimizeUseProjections"), // "Cannot match agg func {} vs {} : can't match arg {} vs {} : no match or monotonicity", // aggregate.column_name, candidate.column_name, query_name, proj_name); @@ -285,7 +285,7 @@ ActionsDAGPtr analyzeAggregateProjection( // for (const auto & [node, match] : matches) // { - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Match {} {} -> {} {} (with monotonicity : {})", + // LOG_TRACE(getLogger("optimizeUseProjections"), "Match {} {} -> {} {} (with monotonicity : {})", // static_cast(node), node->result_name, // static_cast(match.node), (match.node ? match.node->result_name : ""), match.monotonicity != std::nullopt); // } @@ -379,7 +379,7 @@ ActionsDAGPtr analyzeAggregateProjection( /// Not a match and there is no matched child. if (frame.node->type == ActionsDAG::ActionType::INPUT) { - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Cannot find match for {}", frame.node->result_name); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Cannot find match for {}", frame.node->result_name); return {}; } @@ -389,7 +389,7 @@ ActionsDAGPtr analyzeAggregateProjection( } } - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Folding actions by projection"); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Folding actions by projection"); auto proj_dag = query.dag->foldActionsByProjection(new_inputs, query_key_nodes); appendAggregateFunctions(*proj_dag, aggregates, *matched_aggregates); @@ -453,7 +453,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( if (!can_use_minmax_projection && agg_projections.empty()) return candidates; - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Has agg projection"); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Has agg projection"); QueryDAG dag; if (!dag.build(*node.children.front())) @@ -461,22 +461,22 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( auto query_index = buildDAGIndex(*dag.dag); - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Query DAG: {}", dag.dag->dumpDAG()); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Query DAG: {}", dag.dag->dumpDAG()); candidates.has_filter = dag.filter_node; if (can_use_minmax_projection) { const auto * projection = &*(metadata->minmax_count_projection); - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Try projection {}", projection->name); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Try projection {}", projection->name); auto info = getAggregatingProjectionInfo(*projection, context, metadata, key_virtual_columns); - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection DAG {}", info.before_aggregation->dumpDAG()); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection DAG {}", info.before_aggregation->dumpDAG()); if (auto proj_dag = analyzeAggregateProjection(info, dag, query_index, keys, aggregates)) { - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection analyzed DAG {}", proj_dag->dumpDAG()); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection analyzed DAG {}", proj_dag->dumpDAG()); AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(proj_dag)}; - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection sample block {}", sample_block.dumpStructure()); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection sample block {}", sample_block.dumpStructure()); auto block = reading.getMergeTreeData().getMinMaxCountProjectionBlock( metadata, candidate.dag->getRequiredColumnsNames(), @@ -485,7 +485,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( max_added_blocks.get(), context); - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection sample block 2 {}", block.dumpStructure()); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection sample block 2 {}", block.dumpStructure()); // minmax_count_projection cannot be used when there is no data to process, because // it will produce incorrect result during constant aggregation. @@ -518,12 +518,12 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( candidates.real.reserve(agg_projections.size()); for (const auto * projection : agg_projections) { - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Try projection {}", projection->name); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Try projection {}", projection->name); auto info = getAggregatingProjectionInfo(*projection, context, metadata, key_virtual_columns); - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection DAG {}", info.before_aggregation->dumpDAG()); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection DAG {}", info.before_aggregation->dumpDAG()); if (auto proj_dag = analyzeAggregateProjection(info, dag, query_index, keys, aggregates)) { - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection analyzed DAG {}", proj_dag->dumpDAG()); + // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection analyzed DAG {}", proj_dag->dumpDAG()); AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(proj_dag)}; candidate.projection = projection; candidates.real.emplace_back(std::move(candidate)); @@ -650,7 +650,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & /// Add reading from projection step. if (candidates.minmax_projection) { - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Minmax proj block {}", + // LOG_TRACE(getLogger("optimizeUseProjections"), "Minmax proj block {}", // candidates.minmax_projection->block.dumpStructure()); Pipe pipe(std::make_shared(std::move(candidates.minmax_projection->block))); @@ -712,7 +712,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & }); } - // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection reading header {}", + // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection reading header {}", // projection_reading->getOutputStream().header.dumpStructure()); projection_reading->setStepDescription(best_candidate->projection->name); diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index 1ac759df1d18..a183f50dee59 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -223,7 +223,7 @@ bool analyzeProjectionCandidate( { const auto & created_projections = part_with_ranges.data_part->getProjectionParts(); auto it = created_projections.find(candidate.projection->name); - if (it != created_projections.end()) + if (it != created_projections.end() && !it->second->is_broken) { projection_parts.push_back(it->second); } diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp index a6029d673e32..232d3118612b 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp @@ -39,14 +39,14 @@ namespace else ss << value; - LOG_DEBUG(&Poco::Logger::get("redundantDistinct"), "{}{}{}", key, separator, ss.str()); + LOG_DEBUG(getLogger("redundantDistinct"), "{}{}{}", key, separator, ss.str()); } } void logActionsDAG(const String & prefix, const ActionsDAGPtr & actions) { if constexpr (debug_logging_enabled) - LOG_DEBUG(&Poco::Logger::get("redundantDistinct"), "{} :\n{}", prefix, actions->dumpDAG()); + LOG_DEBUG(getLogger("redundantDistinct"), "{} :\n{}", prefix, actions->dumpDAG()); } using DistinctColumns = std::set; diff --git a/src/Processors/QueryPlan/QueryPlanVisitor.h b/src/Processors/QueryPlan/QueryPlanVisitor.h index 0f2652166491..aed1a2b22497 100644 --- a/src/Processors/QueryPlan/QueryPlanVisitor.h +++ b/src/Processors/QueryPlan/QueryPlanVisitor.h @@ -99,7 +99,7 @@ class QueryPlanVisitor { const IQueryPlanStep * current_step = node->step.get(); LOG_DEBUG( - &Poco::Logger::get("QueryPlanVisitor"), + getLogger("QueryPlanVisitor"), "{}: {}: {}", prefix, getStepId(current_step), diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index d02e387afc3d..5ed56f59fc14 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -247,7 +247,7 @@ ReadFromMergeTree::ReadFromMergeTree( size_t num_streams_, bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, - Poco::Logger * log_, + LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading) : SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader( @@ -274,7 +274,7 @@ ReadFromMergeTree::ReadFromMergeTree( , requested_num_streams(num_streams_) , sample_factor_column_queried(sample_factor_column_queried_) , max_block_numbers_to_read(std::move(max_block_numbers_to_read_)) - , log(log_) + , log(std::move(log_)) , analyzed_result_ptr(analyzed_result_ptr_) , is_parallel_reading_from_replicas(enable_parallel_reading) { @@ -1323,7 +1323,7 @@ static ActionsDAGPtr buildFilterDAG( } } - return ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_node_column, context); + return ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_node_column); } static void buildIndexes( @@ -1476,7 +1476,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( const MergeTreeData & data, const Names & real_column_names, bool sample_factor_column_queried, - Poco::Logger * log, + LoggerPtr log, std::optional & indexes) { auto updated_query_info_with_filter_dag = query_info; @@ -1508,7 +1508,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( const MergeTreeData & data, const Names & real_column_names, bool sample_factor_column_queried, - Poco::Logger * log, + LoggerPtr log, std::optional & indexes) { AnalysisResult result; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index aed2a270ca18..fdeaff57279f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -120,7 +120,7 @@ class ReadFromMergeTree final : public SourceStepWithFilter size_t num_streams_, bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, - Poco::Logger * log_, + LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading); @@ -168,7 +168,7 @@ class ReadFromMergeTree final : public SourceStepWithFilter const MergeTreeData & data, const Names & real_column_names, bool sample_factor_column_queried, - Poco::Logger * log, + LoggerPtr log, std::optional & indexes); AnalysisResultPtr selectRangesToRead( @@ -217,7 +217,7 @@ class ReadFromMergeTree final : public SourceStepWithFilter const MergeTreeData & data, const Names & real_column_names, bool sample_factor_column_queried, - Poco::Logger * log, + LoggerPtr log, std::optional & indexes); int getSortDirection() const @@ -259,7 +259,7 @@ class ReadFromMergeTree final : public SourceStepWithFilter /// Pre-computed value, needed to trigger sets creating for PK mutable std::optional indexes; - Poco::Logger * log; + LoggerPtr log; UInt64 selected_parts = 0; UInt64 selected_rows = 0; UInt64 selected_marks = 0; diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 4bbba4cfa304..4dd799039655 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -102,7 +102,7 @@ ReadFromRemote::ReadFromRemote( ThrottlerPtr throttler_, Scalars scalars_, Tables external_tables_, - Poco::Logger * log_, + LoggerPtr log_, UInt32 shard_count_, std::shared_ptr storage_limits_, const String & cluster_name_) @@ -172,7 +172,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream catch (const Exception & ex) { if (ex.code() == ErrorCodes::ALL_CONNECTION_TRIES_FAILED) - LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), + LOG_WARNING(getLogger("ClusterProxy::SelectStreamFactory"), "Connections to remote replicas of local shard {} failed, will use stale local replica", my_shard.shard_info.shard_num); else throw; @@ -361,7 +361,7 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( ThrottlerPtr throttler_, Scalars scalars_, Tables external_tables_, - Poco::Logger * log_, + LoggerPtr log_, std::shared_ptr storage_limits_) : ISourceStep(DataStream{.header = std::move(header_)}) , cluster(cluster_) @@ -402,7 +402,7 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder size_t all_replicas_count = current_settings.max_parallel_replicas; if (all_replicas_count > cluster->getShardsInfo().size()) { - LOG_INFO(&Poco::Logger::get("ReadFromParallelRemoteReplicasStep"), + LOG_INFO(getLogger("ReadFromParallelRemoteReplicasStep"), "The number of replicas requested ({}) is bigger than the real number available in the cluster ({}). "\ "Will use the latter number to execute the query.", current_settings.max_parallel_replicas, cluster->getShardsInfo().size()); all_replicas_count = cluster->getShardsInfo().size(); diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index 82ef45d6bbf4..f853a12910b6 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -35,7 +35,7 @@ class ReadFromRemote final : public ISourceStep ThrottlerPtr throttler_, Scalars scalars_, Tables external_tables_, - Poco::Logger * log_, + LoggerPtr log_, UInt32 shard_count_, std::shared_ptr storage_limits_, const String & cluster_name_); @@ -57,7 +57,7 @@ class ReadFromRemote final : public ISourceStep Scalars scalars; Tables external_tables; std::shared_ptr storage_limits; - Poco::Logger * log; + LoggerPtr log; UInt32 shard_count; const String cluster_name; std::optional priority_func_factory; @@ -80,7 +80,7 @@ class ReadFromParallelRemoteReplicasStep : public ISourceStep ThrottlerPtr throttler_, Scalars scalars_, Tables external_tables_, - Poco::Logger * log_, + LoggerPtr log_, std::shared_ptr storage_limits_); String getName() const override { return "ReadFromRemoteParallelReplicas"; } @@ -103,7 +103,7 @@ class ReadFromParallelRemoteReplicasStep : public ISourceStep Scalars scalars; Tables external_tables; std::shared_ptr storage_limits; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 5173b18c6bfa..d8b3f4fbb8e6 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -507,7 +507,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() ActionsDAGPtr ReadFromSystemNumbersStep::buildFilterDAG() { std::unordered_map node_name_to_input_node_column; - return ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, node_name_to_input_node_column, context); + return ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, node_name_to_input_node_column); } void ReadFromSystemNumbersStep::checkLimits(size_t rows) diff --git a/src/Processors/SourceWithKeyCondition.h b/src/Processors/SourceWithKeyCondition.h index 82d46eb74a41..3538adf1d75b 100644 --- a/src/Processors/SourceWithKeyCondition.h +++ b/src/Processors/SourceWithKeyCondition.h @@ -22,7 +22,7 @@ class SourceWithKeyCondition : public ISource for (const auto & column : keys.getColumnsWithTypeAndName()) node_name_to_input_column.insert({column.name, column}); - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_column, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_column); key_condition = std::make_shared( filter_actions_dag, context, diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index 81225d1cdf25..be691fd5b2e1 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -58,7 +58,7 @@ MySQLSource::MySQLSource( const Block & sample_block, const StreamSettings & settings_) : ISource(sample_block.cloneEmpty()) - , log(&Poco::Logger::get("MySQLSource")) + , log(getLogger("MySQLSource")) , connection{std::make_unique(entry, query_str)} , settings{std::make_unique(settings_)} { @@ -69,7 +69,7 @@ MySQLSource::MySQLSource( /// For descendant MySQLWithFailoverSource MySQLSource::MySQLSource(const Block &sample_block_, const StreamSettings & settings_) : ISource(sample_block_.cloneEmpty()) - , log(&Poco::Logger::get("MySQLSource")) + , log(getLogger("MySQLSource")) , settings(std::make_unique(settings_)) { description.init(sample_block_); diff --git a/src/Processors/Sources/MySQLSource.h b/src/Processors/Sources/MySQLSource.h index c4d447886c04..fc26ffa3645e 100644 --- a/src/Processors/Sources/MySQLSource.h +++ b/src/Processors/Sources/MySQLSource.h @@ -50,7 +50,7 @@ class MySQLSource : public ISource mysqlxx::UseQueryResult result; }; - Poco::Logger * log; + LoggerPtr log; std::unique_ptr connection; const std::unique_ptr settings; diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 1f23292c6b3f..55eaf67eb3b1 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -158,7 +158,7 @@ class TimeoutReadBufferFromFileDescriptor : public BufferWithOwnMemory( num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, - num_inputs, row_sources_buf_, block_preferred_size_) - , log(&Poco::Logger::get("ColumnGathererStream")) + num_inputs, row_sources_buf_, block_preferred_size_rows_, block_preferred_size_bytes_) + , log(getLogger("ColumnGathererStream")) { if (header.columns() != 1) throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Header should have 1 column, but contains {}", diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h index b5bbbff9aca1..821d04db0df1 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.h +++ b/src/Processors/Transforms/ColumnGathererTransform.h @@ -5,7 +5,6 @@ #include #include - namespace Poco { class Logger; } @@ -57,7 +56,11 @@ using MergedRowSources = PODArray; class ColumnGathererStream final : public IMergingAlgorithm { public: - ColumnGathererStream(size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_ = DEFAULT_BLOCK_SIZE); + ColumnGathererStream( + size_t num_inputs, + ReadBuffer & row_sources_buf_, + size_t block_preferred_size_rows_, + size_t block_preferred_size_bytes_); const char * getName() const override { return "ColumnGathererStream"; } void initialize(Inputs inputs) override; @@ -92,13 +95,12 @@ class ColumnGathererStream final : public IMergingAlgorithm std::vector sources; ReadBuffer & row_sources_buf; - const size_t block_preferred_size; + const size_t block_preferred_size_rows; + const size_t block_preferred_size_bytes; Source * source_to_fully_copy = nullptr; ssize_t next_required_source = -1; - size_t cur_block_preferred_size = 0; - UInt64 merged_rows = 0; UInt64 merged_bytes = 0; }; @@ -110,7 +112,8 @@ class ColumnGathererTransform final : public IMergingTransform(row_sources_end - row_source_pos), block_preferred_size); - column_res.reserve(cur_block_preferred_size); + /// Actually reserve works only for fixed size columns. + /// So it's safe to ignore preferred size in bytes and call reserve for number of rows. + size_t size_to_reserve = std::min(static_cast(row_sources_end - row_source_pos), block_preferred_size_rows); + column_res.reserve(size_to_reserve); } - size_t cur_size = column_res.size(); next_required_source = -1; - while (row_source_pos < row_sources_end && cur_size < cur_block_preferred_size) + while (row_source_pos < row_sources_end + && column_res.size() < block_preferred_size_rows + && column_res.allocatedBytes() < block_preferred_size_bytes) { RowSourcePart row_source = *row_source_pos; size_t source_num = row_source.getSourceNum(); @@ -159,6 +165,7 @@ void ColumnGathererStream::gather(Column & column_res) /// Consecutive optimization. TODO: precompute lengths size_t len = 1; size_t max_len = std::min(static_cast(row_sources_end - row_source_pos), source.size - source.pos); // interval should be in the same block + while (len < max_len && row_source_pos->data == row_source.data) { ++len; @@ -181,8 +188,6 @@ void ColumnGathererStream::gather(Column & column_res) column_res.insertFrom(*source.column, source.pos); else column_res.insertRangeFrom(*source.column, source.pos, len); - - cur_size += len; } source.pos += len; diff --git a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.h b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.h index d214a310a8c9..0f5dab06fc91 100644 --- a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.h +++ b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.h @@ -67,7 +67,7 @@ class CreatingSetsOnTheFlyTransform : public ISimpleTransform /// Set to fill SetWithStatePtr set; - Poco::Logger * log = &Poco::Logger::get("CreatingSetsOnTheFlyTransform"); + LoggerPtr log = getLogger("CreatingSetsOnTheFlyTransform"); }; /* @@ -108,7 +108,7 @@ class FilterBySetOnTheFlyTransform : public ISimpleTransform size_t result_rows = 0; } stat; - Poco::Logger * log = &Poco::Logger::get("FilterBySetOnTheFlyTransform"); + LoggerPtr log = getLogger("FilterBySetOnTheFlyTransform"); }; } diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index d1ec7dcbca74..74dcd829b4da 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -63,7 +63,7 @@ class CreatingSetsTransform : public IAccumulatingTransform size_t bytes_to_transfer = 0; using Logger = Poco::Logger; - Poco::Logger * log = &Poco::Logger::get("CreatingSetsTransform"); + LoggerPtr log = getLogger("CreatingSetsTransform"); bool is_initialized = false; diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index b725c3e1a5fb..aaa98e968031 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -28,7 +28,7 @@ void logDebug(String key, const T & value, const char * separator = " : ") else ss << value; - LOG_DEBUG(&Poco::Logger::get("FillingTransform"), "{}{}{}", key, separator, ss.str()); + LOG_DEBUG(getLogger("FillingTransform"), "{}{}{}", key, separator, ss.str()); } } diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 4e7868ea1c2d..0c0a86ce270c 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -14,12 +14,12 @@ namespace ErrorCodes Block JoiningTransform::transformHeader(Block header, const JoinPtr & join) { - LOG_DEBUG(&Poco::Logger::get("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); + LOG_DEBUG(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); join->checkTypesOfKeys(header); join->initialize(header); ExtraBlockPtr tmp; join->joinBlock(header, tmp); - LOG_DEBUG(&Poco::Logger::get("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); + LOG_DEBUG(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); return header; } diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 15c88244cbd3..2d313d4ea5c3 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -273,7 +273,7 @@ MergeJoinAlgorithm::MergeJoinAlgorithm( size_t max_block_size_) : table_join(table_join_) , max_block_size(max_block_size_) - , log(&Poco::Logger::get("MergeJoinAlgorithm")) + , log(getLogger("MergeJoinAlgorithm")) { if (input_headers.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs"); @@ -860,7 +860,7 @@ MergeJoinTransform::MergeJoinTransform( /* always_read_till_end_= */ false, /* empty_chunk_on_finish_= */ true, table_join, input_headers, max_block_size) - , log(&Poco::Logger::get("MergeJoinTransform")) + , log(getLogger("MergeJoinTransform")) { LOG_TRACE(log, "Use MergeJoinTransform"); } diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index eb45169a2b07..793de00db406 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -269,7 +269,7 @@ class MergeJoinAlgorithm final : public IMergingAlgorithm Statistic stat; - Poco::Logger * log; + LoggerPtr log; }; class MergeJoinTransform final : public IMergingTransform @@ -289,7 +289,7 @@ class MergeJoinTransform final : public IMergingTransform protected: void onFinish() override; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index de77711d1294..64d84ea4b007 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -30,7 +30,7 @@ namespace DB class BufferingToFileTransform : public IAccumulatingTransform { public: - BufferingToFileTransform(const Block & header, TemporaryFileStream & tmp_stream_, Poco::Logger * log_) + BufferingToFileTransform(const Block & header, TemporaryFileStream & tmp_stream_, LoggerPtr log_) : IAccumulatingTransform(header, header) , tmp_stream(tmp_stream_) , log(log_) @@ -73,7 +73,7 @@ class BufferingToFileTransform : public IAccumulatingTransform private: TemporaryFileStream & tmp_stream; - Poco::Logger * log; + LoggerPtr log; }; MergeSortingTransform::MergeSortingTransform( diff --git a/src/Processors/Transforms/MergeSortingTransform.h b/src/Processors/Transforms/MergeSortingTransform.h index e8c180b69032..4478d5a07e83 100644 --- a/src/Processors/Transforms/MergeSortingTransform.h +++ b/src/Processors/Transforms/MergeSortingTransform.h @@ -50,7 +50,7 @@ class MergeSortingTransform : public SortingTransform size_t sum_rows_in_blocks = 0; size_t sum_bytes_in_blocks = 0; - Poco::Logger * log = &Poco::Logger::get("MergeSortingTransform"); + LoggerPtr log = getLogger("MergeSortingTransform"); /// If remerge doesn't save memory at least several times, mark it as useless and don't do it anymore. bool remerge_is_useful = true; diff --git a/src/Processors/Transforms/MergingAggregatedTransform.h b/src/Processors/Transforms/MergingAggregatedTransform.h index 73e0d8cd0132..ade76b2f3048 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.h +++ b/src/Processors/Transforms/MergingAggregatedTransform.h @@ -21,7 +21,7 @@ class MergingAggregatedTransform : public IAccumulatingTransform private: AggregatingTransformParamsPtr params; - Poco::Logger * log = &Poco::Logger::get("MergingAggregatedTransform"); + LoggerPtr log = getLogger("MergingAggregatedTransform"); size_t max_threads; AggregatedDataVariants data_variants; diff --git a/src/Processors/Transforms/PasteJoinTransform.cpp b/src/Processors/Transforms/PasteJoinTransform.cpp index ff3e2fb85e55..d2fa7eed256d 100644 --- a/src/Processors/Transforms/PasteJoinTransform.cpp +++ b/src/Processors/Transforms/PasteJoinTransform.cpp @@ -33,7 +33,7 @@ PasteJoinAlgorithm::PasteJoinAlgorithm( size_t max_block_size_) : table_join(table_join_) , max_block_size(max_block_size_) - , log(&Poco::Logger::get("PasteJoinAlgorithm")) + , log(getLogger("PasteJoinAlgorithm")) { if (input_headers.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "PasteJoinAlgorithm requires exactly two inputs"); @@ -117,7 +117,7 @@ PasteJoinTransform::PasteJoinTransform( /* always_read_till_end_= */ false, /* empty_chunk_on_finish_= */ true, table_join, input_headers, max_block_size) - , log(&Poco::Logger::get("PasteJoinTransform")) + , log(getLogger("PasteJoinTransform")) { LOG_TRACE(log, "Use PasteJoinTransform"); } diff --git a/src/Processors/Transforms/PasteJoinTransform.h b/src/Processors/Transforms/PasteJoinTransform.h index 7ecf70e18dc2..04cb5486cd5d 100644 --- a/src/Processors/Transforms/PasteJoinTransform.h +++ b/src/Processors/Transforms/PasteJoinTransform.h @@ -61,7 +61,7 @@ class PasteJoinAlgorithm final : public IMergingAlgorithm Statistic stat; - Poco::Logger * log; + LoggerPtr log; UInt64 last_used_row[2] = {0, 0}; }; @@ -82,7 +82,7 @@ class PasteJoinTransform final : public IMergingTransform protected: void onFinish() override; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Processors/Transforms/TTLCalcTransform.cpp b/src/Processors/Transforms/TTLCalcTransform.cpp index 31fb61239ef8..2b4ed96d4e38 100644 --- a/src/Processors/Transforms/TTLCalcTransform.cpp +++ b/src/Processors/Transforms/TTLCalcTransform.cpp @@ -13,7 +13,7 @@ TTLCalcTransform::TTLCalcTransform( bool force_) : IAccumulatingTransform(header_, header_) , data_part(data_part_) - , log(&Poco::Logger::get(storage_.getLogName() + " (TTLCalcTransform)")) + , log(getLogger(storage_.getLogName() + " (TTLCalcTransform)")) { auto old_ttl_infos = data_part->ttl_infos; diff --git a/src/Processors/Transforms/TTLCalcTransform.h b/src/Processors/Transforms/TTLCalcTransform.h index 495879400dce..baa31c01c526 100644 --- a/src/Processors/Transforms/TTLCalcTransform.h +++ b/src/Processors/Transforms/TTLCalcTransform.h @@ -38,7 +38,7 @@ class TTLCalcTransform : public IAccumulatingTransform /// ttl_infos and empty_columns are updating while reading const MergeTreeData::MutableDataPartPtr & data_part; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp index 7cde86098c7a..db9326f9acf0 100644 --- a/src/Processors/Transforms/TTLTransform.cpp +++ b/src/Processors/Transforms/TTLTransform.cpp @@ -25,7 +25,7 @@ TTLTransform::TTLTransform( bool force_) : IAccumulatingTransform(header_, header_) , data_part(data_part_) - , log(&Poco::Logger::get(storage_.getLogName() + " (TTLTransform)")) + , log(getLogger(storage_.getLogName() + " (TTLTransform)")) { auto old_ttl_infos = data_part->ttl_infos; diff --git a/src/Processors/Transforms/TTLTransform.h b/src/Processors/Transforms/TTLTransform.h index 3f0dffd19989..3606db7f4c2c 100644 --- a/src/Processors/Transforms/TTLTransform.h +++ b/src/Processors/Transforms/TTLTransform.h @@ -42,7 +42,7 @@ class TTLTransform : public IAccumulatingTransform /// ttl_infos and empty_columns are updating while reading const MergeTreeData::MutableDataPartPtr & data_part; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 71d652e74d07..8ddc3ab0c61f 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -244,7 +244,33 @@ Chain buildPushingToViewsChain( // Do not deduplicate insertions into MV if the main insertion is Ok if (disable_deduplication_for_children) + { insert_context->setSetting("insert_deduplicate", Field{false}); + } + else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && + !insert_settings.insert_deduplication_token.value.empty()) + { + /** Update deduplication token passed to dependent MV with current table id. So it is possible to properly handle + * deduplication in complex INSERT flows. + * + * Example: + * + * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 + * | | + * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ + * + * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will + * be inserted into `ds_2_1`. + */ + auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; + + if (table_id.hasUUID()) + insert_deduplication_token += "_" + toString(table_id.uuid); + else + insert_deduplication_token += "_" + table_id.getFullNameNotQuoted(); + + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); + } // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) @@ -267,7 +293,7 @@ Chain buildPushingToViewsChain( if (view == nullptr) { LOG_WARNING( - &Poco::Logger::get("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); + getLogger("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); continue; } @@ -310,7 +336,7 @@ Chain buildPushingToViewsChain( // In case the materialized view is dropped/detached at this point, we register a warning and ignore it assert(materialized_view->is_dropped || materialized_view->is_detached); LOG_WARNING( - &Poco::Logger::get("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); + getLogger("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); continue; } @@ -341,7 +367,7 @@ Chain buildPushingToViewsChain( /// It may happen if materialize view query was changed and it doesn't depend on this source table anymore. /// See setting `allow_experimental_alter_materialized_view_structure` LOG_DEBUG( - &Poco::Logger::get("PushingToViews"), "Table '{}' is not a source for view '{}' anymore, current source is '{}'", + getLogger("PushingToViews"), "Table '{}' is not a source for view '{}' anymore, current source is '{}'", select_query.select_table_id.getFullTableName(), view_id.getFullTableName(), table_id); continue; } @@ -835,14 +861,14 @@ void FinalizingViewsTransform::work() /// Exception will be ignored, it is saved here for the system.query_views_log if (materialized_views_ignore_errors) - tryLogException(view.exception, &Poco::Logger::get("PushingToViews"), "Cannot push to the storage, ignoring the error"); + tryLogException(view.exception, getLogger("PushingToViews"), "Cannot push to the storage, ignoring the error"); } else { view.runtime_stats->setStatus(QueryViewsLogElement::ViewStatus::QUERY_FINISH); LOG_TRACE( - &Poco::Logger::get("PushingToViews"), + getLogger("PushingToViews"), "Pushing ({}) from {} to {} took {} ms.", views_data->max_threads <= 1 ? "sequentially" : ("parallel " + std::to_string(views_data->max_threads)), views_data->source_storage_id.getNameForLogs(), diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 5a8ccc2592b2..444f1258f3e4 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -186,7 +186,7 @@ class RemoteQueryExecutor void setMainTable(StorageID main_table_) { main_table = std::move(main_table_); } - void setLogger(Poco::Logger * logger) { log = logger; } + void setLogger(LoggerPtr logger) { log = logger; } const Block & getHeader() const { return header; } @@ -283,7 +283,7 @@ class RemoteQueryExecutor PoolMode pool_mode = PoolMode::GET_MANY; StorageID main_table = StorageID::createEmpty(); - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; GetPriorityForLoadBalancing::Func priority_func; diff --git a/src/Server/CertificateReloader.cpp b/src/Server/CertificateReloader.cpp index 8795d4807de1..c974f450c9a3 100644 --- a/src/Server/CertificateReloader.cpp +++ b/src/Server/CertificateReloader.cpp @@ -105,7 +105,7 @@ CertificateReloader::Data::Data(std::string cert_path, std::string key_path, std } -bool CertificateReloader::File::changeIfModified(std::string new_path, Poco::Logger * logger) +bool CertificateReloader::File::changeIfModified(std::string new_path, LoggerPtr logger) { std::error_code ec; std::filesystem::file_time_type new_modification_time = std::filesystem::last_write_time(new_path, ec); diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h index 9f04179b8d6c..028914e682f1 100644 --- a/src/Server/CertificateReloader.h +++ b/src/Server/CertificateReloader.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -51,7 +52,7 @@ class CertificateReloader private: CertificateReloader() = default; - Poco::Logger * log = &Poco::Logger::get("CertificateReloader"); + LoggerPtr log = getLogger("CertificateReloader"); struct File { @@ -61,7 +62,7 @@ class CertificateReloader std::string path; std::filesystem::file_time_type modification_time; - bool changeIfModified(std::string new_path, Poco::Logger * logger); + bool changeIfModified(std::string new_path, LoggerPtr logger); }; File cert_file{"certificate"}; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 6bb6ba139adf..f31a8d6feb50 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -76,7 +76,7 @@ namespace static std::once_flag once_flag; std::call_once(once_flag, [&config] { - static Poco::Logger * logger = &Poco::Logger::get("grpc"); + static LoggerPtr logger = getLogger("grpc"); gpr_set_log_function([](gpr_log_func_args* args) { if (args->severity == GPR_LOG_SEVERITY_DEBUG) @@ -614,7 +614,7 @@ namespace class Call { public: - Call(CallType call_type_, std::unique_ptr responder_, IServer & iserver_, Poco::Logger * log_); + Call(CallType call_type_, std::unique_ptr responder_, IServer & iserver_, LoggerPtr log_); ~Call(); void start(const std::function & on_finish_call_callback); @@ -656,7 +656,7 @@ namespace const CallType call_type; std::unique_ptr responder; IServer & iserver; - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; std::optional session; ContextMutablePtr query_context; @@ -718,7 +718,7 @@ namespace }; // NOLINTEND(clang-analyzer-optin.performance.Padding) - Call::Call(CallType call_type_, std::unique_ptr responder_, IServer & iserver_, Poco::Logger * log_) + Call::Call(CallType call_type_, std::unique_ptr responder_, IServer & iserver_, LoggerPtr log_) : call_type(call_type_), responder(std::move(responder_)), iserver(iserver_), log(log_) { } @@ -1843,7 +1843,7 @@ class GRPCServer::Runner GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & address_to_listen_) : iserver(iserver_) , address_to_listen(address_to_listen_) - , log(&Poco::Logger::get("GRPCServer")) + , log(getLogger("GRPCServer")) , runner(std::make_unique(*this)) {} diff --git a/src/Server/GRPCServer.h b/src/Server/GRPCServer.h index 359a2506e953..a9c8161298fc 100644 --- a/src/Server/GRPCServer.h +++ b/src/Server/GRPCServer.h @@ -5,6 +5,7 @@ #if USE_GRPC #include #include +#include #include "clickhouse_grpc.grpc.pb.h" namespace Poco { class Logger; } @@ -46,7 +47,7 @@ class GRPCServer IServer & iserver; const Poco::Net::SocketAddress address_to_listen; - Poco::Logger * log; + LoggerPtr log; GRPCService grpc_service; std::unique_ptr grpc_server; std::unique_ptr queue; diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp index 4a6e85ba0fba..9db02eac2206 100644 --- a/src/Server/HTTP/HTTPServerRequest.cpp +++ b/src/Server/HTTP/HTTPServerRequest.cpp @@ -65,7 +65,7 @@ HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse { stream = std::move(in); if (!startsWith(getContentType(), "multipart/form-data")) - LOG_WARNING(LogFrequencyLimiter(&Poco::Logger::get("HTTPServerRequest"), 10), "Got an HTTP request with no content length " + LOG_WARNING(LogFrequencyLimiter(getLogger("HTTPServerRequest"), 10), "Got an HTTP request with no content length " "and no chunked/multipart encoding, it may be impossible to distinguish graceful EOF from abnormal connection loss"); } else diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index bdc8e7d59c9c..72e7c5552f85 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -137,7 +137,7 @@ bool tryAddHttpOptionHeadersFromConfig(HTTPServerResponse & response, const Poco { /// If there is empty header name, it will not be processed and message about it will be in logs if (config.getString("http_options_response." + config_key + ".name", "").empty()) - LOG_WARNING(&Poco::Logger::get("processOptionsRequest"), "Empty header was found in config. It will not be processed."); + LOG_WARNING(getLogger("processOptionsRequest"), "Empty header was found in config. It will not be processed."); else response.add(config.getString("http_options_response." + config_key + ".name", ""), config.getString("http_options_response." + config_key + ".value", "")); @@ -328,7 +328,7 @@ void HTTPHandler::pushDelayedResults(Output & used_output) HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const std::optional & content_type_override_) : server(server_) - , log(&Poco::Logger::get(name)) + , log(getLogger(name)) , default_settings(server.context()->getSettingsRef()) , content_type_override(content_type_override_) { diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index 0b623fe5f656..fa2d0dae1993 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -100,7 +100,7 @@ class HTTPHandler : public HTTPRequestHandler }; IServer & server; - Poco::Logger * log; + LoggerPtr log; /// It is the name of the server that will be sent in an http-header X-ClickHouse-Server-Display-Name. String server_display_name; diff --git a/src/Server/HTTPRequestHandlerFactoryMain.cpp b/src/Server/HTTPRequestHandlerFactoryMain.cpp index 5481bcd5083c..48c2ab21468a 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.cpp +++ b/src/Server/HTTPRequestHandlerFactoryMain.cpp @@ -7,7 +7,7 @@ namespace DB { HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string & name_) - : log(&Poco::Logger::get(name_)), name(name_) + : log(getLogger(name_)), name(name_) { } diff --git a/src/Server/HTTPRequestHandlerFactoryMain.h b/src/Server/HTTPRequestHandlerFactoryMain.h index 07b278d831c3..1075b7d1d60d 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.h +++ b/src/Server/HTTPRequestHandlerFactoryMain.h @@ -21,7 +21,7 @@ class HTTPRequestHandlerFactoryMain : public HTTPRequestHandlerFactory std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: - Poco::Logger * log; + LoggerPtr log; std::string name; HTTPPathHints hints; diff --git a/src/Server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h index 66042ad3d1df..226a06f5a457 100644 --- a/src/Server/InterserverIOHTTPHandler.h +++ b/src/Server/InterserverIOHTTPHandler.h @@ -26,7 +26,7 @@ class InterserverIOHTTPHandler : public HTTPRequestHandler public: explicit InterserverIOHTTPHandler(IServer & server_) : server(server_) - , log(&Poco::Logger::get("InterserverIOHTTPHandler")) + , log(getLogger("InterserverIOHTTPHandler")) { } @@ -39,7 +39,7 @@ class InterserverIOHTTPHandler : public HTTPRequestHandler }; IServer & server; - Poco::Logger * log; + LoggerPtr log; CurrentMetrics::Increment metric_increment{CurrentMetrics::InterserverConnection}; diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp index de6edd199d74..c973be040c8e 100644 --- a/src/Server/KeeperReadinessHandler.cpp +++ b/src/Server/KeeperReadinessHandler.cpp @@ -63,7 +63,7 @@ void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTP } catch (...) { - LOG_ERROR((&Poco::Logger::get("KeeperReadinessHandler")), "Cannot send exception to client"); + LOG_ERROR((getLogger("KeeperReadinessHandler")), "Cannot send exception to client"); } } } diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 76b84f0ce6e1..6709cd298e5b 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -220,7 +220,7 @@ KeeperTCPHandler::KeeperTCPHandler( Poco::Timespan send_timeout_, const Poco::Net::StreamSocket & socket_) : Poco::Net::TCPServerConnection(socket_) - , log(&Poco::Logger::get("KeeperTCPHandler")) + , log(getLogger("KeeperTCPHandler")) , keeper_dispatcher(keeper_dispatcher_) , operation_timeout( 0, diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index adb1baa084f9..c1c522eee89d 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -63,7 +63,7 @@ class KeeperTCPHandler : public Poco::Net::TCPServerConnection ~KeeperTCPHandler() override; private: - Poco::Logger * log; + LoggerPtr log; std::shared_ptr keeper_dispatcher; Poco::Timespan operation_timeout; Poco::Timespan min_session_timeout; diff --git a/src/Server/KeeperTCPHandlerFactory.h b/src/Server/KeeperTCPHandlerFactory.h index 36f284442c6c..239bf8b55247 100644 --- a/src/Server/KeeperTCPHandlerFactory.h +++ b/src/Server/KeeperTCPHandlerFactory.h @@ -17,7 +17,7 @@ class KeeperTCPHandlerFactory : public TCPServerConnectionFactory private: ConfigGetter config_getter; std::shared_ptr keeper_dispatcher; - Poco::Logger * log; + LoggerPtr log; Poco::Timespan receive_timeout; Poco::Timespan send_timeout; @@ -37,7 +37,7 @@ class KeeperTCPHandlerFactory : public TCPServerConnectionFactory bool secure) : config_getter(config_getter_) , keeper_dispatcher(keeper_dispatcher_) - , log(&Poco::Logger::get(std::string{"KeeperTCP"} + (secure ? "S" : "") + "HandlerFactory")) + , log(getLogger(std::string{"KeeperTCP"} + (secure ? "S" : "") + "HandlerFactory")) , receive_timeout(/* seconds = */ receive_timeout_seconds, /* microseconds = */ 0) , send_timeout(/* seconds = */ send_timeout_seconds, /* microseconds = */ 0) { diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 969eb24d126c..72fe3b7cea9f 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -57,16 +57,109 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } - static const size_t PACKET_HEADER_SIZE = 4; static const size_t SSL_REQUEST_PAYLOAD_SIZE = 32; -static String showWarningsReplacementQuery(const String & query); -static String showCountWarningsReplacementQuery(const String & query); -static String selectEmptyReplacementQuery(const String & query); -static String showTableStatusReplacementQuery(const String & query); -static String killConnectionIdReplacementQuery(const String & query); -static String selectLimitReplacementQuery(const String & query); +static bool checkShouldReplaceQuery(const String & query, const String & prefix) +{ + return query.length() >= prefix.length() + && std::equal(prefix.begin(), prefix.end(), query.begin(), [](char a, char b) { return std::tolower(a) == std::tolower(b); }); +} + +static bool isFederatedServerSetupSetCommand(const String & query) +{ + re2::RE2::Options regexp_options; + regexp_options.set_case_sensitive(false); + static const re2::RE2 expr( + "(^(SET NAMES(.*)))" + "|(^(SET character_set_results(.*)))" + "|(^(SET FOREIGN_KEY_CHECKS(.*)))" + "|(^(SET AUTOCOMMIT(.*)))" + "|(^(SET sql_mode(.*)))" + "|(^(SET @@(.*)))" + "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))", regexp_options); + assert(expr.ok()); + return re2::RE2::FullMatch(query, expr); +} + +/// Always return an empty set with appropriate column definitions for SHOW WARNINGS queries +/// See also: https://dev.mysql.com/doc/refman/8.0/en/show-warnings.html +static String showWarningsReplacementQuery([[maybe_unused]] const String & query) +{ + return "SELECT '' AS Level, 0::UInt32 AS Code, '' AS Message WHERE false"; +} + +static String showCountWarningsReplacementQuery([[maybe_unused]] const String & query) +{ + return "SELECT 0::UInt64 AS `@@session.warning_count`"; +} + +/// Replace "[query(such as SHOW VARIABLES...)]" into "". +static String selectEmptyReplacementQuery(const String & query) +{ + std::ignore = query; + return "select ''"; +} + +/// Replace "SHOW TABLE STATUS LIKE 'xx'" into "SELECT ... FROM system.tables WHERE name LIKE 'xx'". +static String showTableStatusReplacementQuery(const String & query) +{ + const String prefix = "SHOW TABLE STATUS LIKE "; + if (query.size() > prefix.size()) + { + String suffix = query.data() + prefix.length(); + return ( + "SELECT" + " name AS Name," + " engine AS Engine," + " '10' AS Version," + " 'Dynamic' AS Row_format," + " 0 AS Rows," + " 0 AS Avg_row_length," + " 0 AS Data_length," + " 0 AS Max_data_length," + " 0 AS Index_length," + " 0 AS Data_free," + " 'NULL' AS Auto_increment," + " metadata_modification_time AS Create_time," + " metadata_modification_time AS Update_time," + " metadata_modification_time AS Check_time," + " 'utf8_bin' AS Collation," + " 'NULL' AS Checksum," + " '' AS Create_options," + " '' AS Comment" + " FROM system.tables" + " WHERE name LIKE " + + suffix); + } + return query; +} + +static std::optional setSettingReplacementQuery(const String & query, const String & mysql_setting, const String & clickhouse_setting) +{ + const String prefix = "SET " + mysql_setting; + // if (query.length() >= prefix.length() && boost::iequals(std::string_view(prefix), std::string_view(query.data(), 3))) + if (checkShouldReplaceQuery(query, prefix)) + return "SET " + clickhouse_setting + String(query.data() + prefix.length()); + return std::nullopt; +} + +/// Replace "KILL QUERY [connection_id]" into "KILL QUERY WHERE query_id LIKE 'mysql:[connection_id]:xxx'". +static String killConnectionIdReplacementQuery(const String & query) +{ + const String prefix = "KILL QUERY "; + if (query.size() > prefix.size()) + { + String suffix = query.data() + prefix.length(); + static const re2::RE2 expr("^[0-9]"); + if (re2::RE2::FullMatch(suffix, expr)) + { + String replacement = fmt::format("KILL QUERY WHERE query_id LIKE 'mysql:{}:%'", suffix); + return replacement; + } + } + return query; +} MySQLHandler::MySQLHandler( IServer & server_, @@ -78,7 +171,7 @@ MySQLHandler::MySQLHandler( : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) - , log(&Poco::Logger::get("MySQLHandler")) + , log(getLogger("MySQLHandler")) , connection_id(connection_id_) , auth_plugin(new MySQLProtocol::Authentication::Native41()) , read_event(read_event_) @@ -88,12 +181,14 @@ MySQLHandler::MySQLHandler( if (ssl_enabled) server_capabilities |= CLIENT_SSL; - replacements.emplace("SHOW WARNINGS", showWarningsReplacementQuery); - replacements.emplace("SHOW COUNT(*) WARNINGS", showCountWarningsReplacementQuery); - replacements.emplace("KILL QUERY", killConnectionIdReplacementQuery); - replacements.emplace("SHOW TABLE STATUS LIKE", showTableStatusReplacementQuery); - replacements.emplace("SHOW VARIABLES", selectEmptyReplacementQuery); - replacements.emplace("SET SQL_SELECT_LIMIT", selectLimitReplacementQuery); + queries_replacements.emplace("SHOW WARNINGS", showWarningsReplacementQuery); + queries_replacements.emplace("SHOW COUNT(*) WARNINGS", showCountWarningsReplacementQuery); + queries_replacements.emplace("KILL QUERY", killConnectionIdReplacementQuery); + queries_replacements.emplace("SHOW TABLE STATUS LIKE", showTableStatusReplacementQuery); + queries_replacements.emplace("SHOW VARIABLES", selectEmptyReplacementQuery); + settings_replacements.emplace("SQL_SELECT_LIMIT", "limit"); + settings_replacements.emplace("NET_WRITE_TIMEOUT", "send_timeout"); + settings_replacements.emplace("NET_READ_TIMEOUT", "receive_timeout"); } void MySQLHandler::run() @@ -324,8 +419,6 @@ void MySQLHandler::comPing() packet_endpoint->sendPacket(OKPacket(0x0, client_capabilities, 0, 0, 0), true); } -static bool isFederatedServerSetupSetCommand(const String & query); - void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) { String query = String(payload.position(), payload.buffer().end()); @@ -342,17 +435,29 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) bool should_replace = false; bool with_output = false; - for (auto const & x : replacements) + // Queries replacements + for (auto const & [query_to_replace, replacement_fn] : queries_replacements) { - if (0 == strncasecmp(x.first.c_str(), query.c_str(), x.first.size())) + if (checkShouldReplaceQuery(query, query_to_replace)) { should_replace = true; - replacement_query = x.second(query); + replacement_query = replacement_fn(query); break; } } - ReadBufferFromString replacement(replacement_query); + // Settings replacements + if (!should_replace) + for (auto const & [mysql_setting, clickhouse_setting] : settings_replacements) + { + const auto replacement_query_opt = setSettingReplacementQuery(query, mysql_setting, clickhouse_setting); + if (replacement_query_opt.has_value()) + { + should_replace = true; + replacement_query = replacement_query_opt.value(); + break; + } + } auto query_context = session->makeQueryContext(); query_context->setCurrentQueryId(fmt::format("mysql:{}:{}", connection_id, toString(UUIDHelpers::generateV4()))); @@ -385,7 +490,14 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) } }; - executeQuery(should_replace ? replacement : payload, *out, false, query_context, set_result_details, QueryFlags{}, format_settings); + if (should_replace) + { + ReadBufferFromString replacement(replacement_query); + executeQuery(replacement, *out, false, query_context, set_result_details, QueryFlags{}, format_settings); + } + else + executeQuery(payload, *out, false, query_context, set_result_details, QueryFlags{}, format_settings); + if (!with_output) packet_endpoint->sendPacket(OKPacket(0x00, client_capabilities, affected_rows, 0, 0), true); @@ -531,99 +643,4 @@ void MySQLHandlerSSL::finishHandshakeSSL( } #endif - -static bool isFederatedServerSetupSetCommand(const String & query) -{ - re2::RE2::Options regexp_options; - regexp_options.set_case_sensitive(false); - static const re2::RE2 expr( - "(^(SET NAMES(.*)))" - "|(^(SET character_set_results(.*)))" - "|(^(SET FOREIGN_KEY_CHECKS(.*)))" - "|(^(SET AUTOCOMMIT(.*)))" - "|(^(SET sql_mode(.*)))" - "|(^(SET @@(.*)))" - "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))", regexp_options); - assert(expr.ok()); - return re2::RE2::FullMatch(query, expr); -} - -/// Always return an empty set with appropriate column definitions for SHOW WARNINGS queries -/// See also: https://dev.mysql.com/doc/refman/8.0/en/show-warnings.html -static String showWarningsReplacementQuery([[maybe_unused]] const String & query) -{ - return "SELECT '' AS Level, 0::UInt32 AS Code, '' AS Message WHERE false"; -} - -static String showCountWarningsReplacementQuery([[maybe_unused]] const String & query) -{ - return "SELECT 0::UInt64 AS `@@session.warning_count`"; -} - -/// Replace "[query(such as SHOW VARIABLES...)]" into "". -static String selectEmptyReplacementQuery(const String & query) -{ - std::ignore = query; - return "select ''"; -} - -/// Replace "SHOW TABLE STATUS LIKE 'xx'" into "SELECT ... FROM system.tables WHERE name LIKE 'xx'". -static String showTableStatusReplacementQuery(const String & query) -{ - const String prefix = "SHOW TABLE STATUS LIKE "; - if (query.size() > prefix.size()) - { - String suffix = query.data() + prefix.length(); - return ( - "SELECT" - " name AS Name," - " engine AS Engine," - " '10' AS Version," - " 'Dynamic' AS Row_format," - " 0 AS Rows," - " 0 AS Avg_row_length," - " 0 AS Data_length," - " 0 AS Max_data_length," - " 0 AS Index_length," - " 0 AS Data_free," - " 'NULL' AS Auto_increment," - " metadata_modification_time AS Create_time," - " metadata_modification_time AS Update_time," - " metadata_modification_time AS Check_time," - " 'utf8_bin' AS Collation," - " 'NULL' AS Checksum," - " '' AS Create_options," - " '' AS Comment" - " FROM system.tables" - " WHERE name LIKE " - + suffix); - } - return query; -} - -static String selectLimitReplacementQuery(const String & query) -{ - const String prefix = "SET SQL_SELECT_LIMIT"; - if (query.starts_with(prefix)) - return "SET limit" + std::string(query.data() + prefix.length()); - return query; -} - -/// Replace "KILL QUERY [connection_id]" into "KILL QUERY WHERE query_id LIKE 'mysql:[connection_id]:xxx'". -static String killConnectionIdReplacementQuery(const String & query) -{ - const String prefix = "KILL QUERY "; - if (query.size() > prefix.size()) - { - String suffix = query.data() + prefix.length(); - static const re2::RE2 expr("^[0-9]"); - if (re2::RE2::FullMatch(suffix, expr)) - { - String replacement = fmt::format("KILL QUERY WHERE query_id LIKE 'mysql:{}:%'", suffix); - return replacement; - } - } - return query; -} - } diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 36d63ebca849..2deb2b8f4358 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -81,7 +81,7 @@ class MySQLHandler : public Poco::Net::TCPServerConnection IServer & server; TCPServer & tcp_server; - Poco::Logger * log; + LoggerPtr log; uint32_t connection_id = 0; uint32_t server_capabilities = 0; @@ -92,9 +92,13 @@ class MySQLHandler : public Poco::Net::TCPServerConnection MySQLProtocol::PacketEndpointPtr packet_endpoint; std::unique_ptr session; - using ReplacementFn = std::function; - using Replacements = std::unordered_map; - Replacements replacements; + using QueryReplacementFn = std::function; + using QueriesReplacements = std::unordered_map; + QueriesReplacements queries_replacements; + + /// MySQL setting name --> ClickHouse setting name + using SettingsReplacements = std::unordered_map; + SettingsReplacements settings_replacements; std::mutex prepared_statements_mutex; UInt32 current_prepared_statement_id TSA_GUARDED_BY(prepared_statements_mutex) = 0; diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp index 79234c647aa6..1dd43e6dab2a 100644 --- a/src/Server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -23,7 +23,7 @@ namespace ErrorCodes MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_) : server(server_) - , log(&Poco::Logger::get("MySQLHandlerFactory")) + , log(getLogger("MySQLHandlerFactory")) , read_event(read_event_) , write_event(write_event_) { diff --git a/src/Server/MySQLHandlerFactory.h b/src/Server/MySQLHandlerFactory.h index 307ee3b2f0de..4108269d7380 100644 --- a/src/Server/MySQLHandlerFactory.h +++ b/src/Server/MySQLHandlerFactory.h @@ -20,7 +20,7 @@ class MySQLHandlerFactory : public TCPServerConnectionFactory { private: IServer & server; - Poco::Logger * log; + LoggerPtr log; #if USE_SSL struct RSADeleter diff --git a/src/Server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h index 57b91a0ad044..1c23d8964152 100644 --- a/src/Server/PostgreSQLHandler.h +++ b/src/Server/PostgreSQLHandler.h @@ -40,7 +40,7 @@ class PostgreSQLHandler : public Poco::Net::TCPServerConnection void run() final; private: - Poco::Logger * log = &Poco::Logger::get("PostgreSQLHandler"); + LoggerPtr log = getLogger("PostgreSQLHandler"); IServer & server; TCPServer & tcp_server; diff --git a/src/Server/PostgreSQLHandlerFactory.cpp b/src/Server/PostgreSQLHandlerFactory.cpp index 096bbbdcda9a..29eb75679760 100644 --- a/src/Server/PostgreSQLHandlerFactory.cpp +++ b/src/Server/PostgreSQLHandlerFactory.cpp @@ -7,7 +7,7 @@ namespace DB PostgreSQLHandlerFactory::PostgreSQLHandlerFactory(IServer & server_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_) : server(server_) - , log(&Poco::Logger::get("PostgreSQLHandlerFactory")) + , log(getLogger("PostgreSQLHandlerFactory")) , read_event(read_event_) , write_event(write_event_) { diff --git a/src/Server/PostgreSQLHandlerFactory.h b/src/Server/PostgreSQLHandlerFactory.h index e5f762fca6d7..43674306ff66 100644 --- a/src/Server/PostgreSQLHandlerFactory.h +++ b/src/Server/PostgreSQLHandlerFactory.h @@ -14,7 +14,7 @@ class PostgreSQLHandlerFactory : public TCPServerConnectionFactory { private: IServer & server; - Poco::Logger * log; + LoggerPtr log; ProfileEvents::Event read_event; ProfileEvents::Event write_event; diff --git a/src/Server/ProxyV1Handler.h b/src/Server/ProxyV1Handler.h index b50c2acbc550..a044b9a966b1 100644 --- a/src/Server/ProxyV1Handler.h +++ b/src/Server/ProxyV1Handler.h @@ -13,7 +13,7 @@ class ProxyV1Handler : public Poco::Net::TCPServerConnection using StreamSocket = Poco::Net::StreamSocket; public: explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, const std::string & conf_name_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket), log(&Poco::Logger::get("ProxyV1Handler")), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} + : Poco::Net::TCPServerConnection(socket), log(getLogger("ProxyV1Handler")), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} void run() override; @@ -21,7 +21,7 @@ class ProxyV1Handler : public Poco::Net::TCPServerConnection bool readWord(int max_len, std::string & word, bool & eol); private: - Poco::Logger * log; + LoggerPtr log; IServer & server; std::string conf_name; TCPProtocolStackData & stack_data; diff --git a/src/Server/ProxyV1HandlerFactory.h b/src/Server/ProxyV1HandlerFactory.h index 028596d745d0..0398c8c1ccff 100644 --- a/src/Server/ProxyV1HandlerFactory.h +++ b/src/Server/ProxyV1HandlerFactory.h @@ -16,7 +16,7 @@ class ProxyV1HandlerFactory : public TCPServerConnectionFactory { private: IServer & server; - Poco::Logger * log; + LoggerPtr log; std::string conf_name; class DummyTCPHandler : public Poco::Net::TCPServerConnection @@ -28,7 +28,7 @@ class ProxyV1HandlerFactory : public TCPServerConnectionFactory public: explicit ProxyV1HandlerFactory(IServer & server_, const std::string & conf_name_) - : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")), conf_name(conf_name_) + : server(server_), log(getLogger("ProxyV1HandlerFactory")), conf_name(conf_name_) { } diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 07f3b67b6a7e..91c6bd722d32 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -118,7 +118,7 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe } catch (...) { - LOG_ERROR((&Poco::Logger::get("ReplicasStatusHandler")), "Cannot send exception to client"); + LOG_ERROR((getLogger("ReplicasStatusHandler")), "Cannot send exception to client"); } } } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index fa7206eeaac9..ec6b374518d7 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -189,7 +189,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N , server(server_) , tcp_server(tcp_server_) , parse_proxy_protocol(parse_proxy_protocol_) - , log(&Poco::Logger::get("TCPHandler")) + , log(getLogger("TCPHandler")) , read_event(read_event_) , write_event(write_event_) , server_display_name(std::move(server_display_name_)) @@ -200,7 +200,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) - , log(&Poco::Logger::get("TCPHandler")) + , log(getLogger("TCPHandler")) , forwarded_for(stack_data.forwarded_for) , certificate(stack_data.certificate) , read_event(read_event_) diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 4eb84ee5eee5..26cecf466629 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -160,7 +160,7 @@ class TCPHandler : public Poco::Net::TCPServerConnection IServer & server; TCPServer & tcp_server; bool parse_proxy_protocol = false; - Poco::Logger * log; + LoggerPtr log; String forwarded_for; String certificate; diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index 3eb032f4250b..d65c9898b23b 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -18,7 +18,7 @@ class TCPHandlerFactory : public TCPServerConnectionFactory private: IServer & server; bool parse_proxy_protocol = false; - Poco::Logger * log; + LoggerPtr log; std::string server_display_name; ProfileEvents::Event read_event; @@ -38,7 +38,7 @@ class TCPHandlerFactory : public TCPServerConnectionFactory */ TCPHandlerFactory(IServer & server_, bool secure_, bool parse_proxy_protocol_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end()) : server(server_), parse_proxy_protocol(parse_proxy_protocol_) - , log(&Poco::Logger::get(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory")) + , log(getLogger(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory")) , read_event(read_event_) , write_event(write_event_) { diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 7373e6e1c4ea..b76bb8d72fdf 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -23,7 +23,7 @@ class TCPProtocolStackFactory : public TCPServerConnectionFactory { private: IServer & server [[maybe_unused]]; - Poco::Logger * log; + LoggerPtr log; std::string conf_name; std::vector stack; AllowedClientHosts allowed_client_hosts; @@ -38,7 +38,7 @@ class TCPProtocolStackFactory : public TCPServerConnectionFactory public: template explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) - : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) + : server(server_), log(getLogger("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) { const auto & config = server.config(); /// Fill list of allowed hosts. diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h index 9e3002d29719..19602c7d25e9 100644 --- a/src/Server/TLSHandlerFactory.h +++ b/src/Server/TLSHandlerFactory.h @@ -19,7 +19,7 @@ class TLSHandlerFactory : public TCPServerConnectionFactory { private: IServer & server; - Poco::Logger * log; + LoggerPtr log; std::string conf_name; class DummyTCPHandler : public Poco::Net::TCPServerConnection @@ -31,7 +31,7 @@ class TLSHandlerFactory : public TCPServerConnectionFactory public: explicit TLSHandlerFactory(IServer & server_, const std::string & conf_name_) - : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")), conf_name(conf_name_) + : server(server_), log(getLogger("TLSHandlerFactory")), conf_name(conf_name_) { } diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index ac7a3bfccf3d..e45d2a55acb1 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -17,6 +17,7 @@ INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html"); INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html"); INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js"); +INCBIN(resource_lz_string_js, SOURCE_DIR "/programs/server/js/lz-string.js"); INCBIN(resource_binary_html, SOURCE_DIR "/programs/server/binary.html"); @@ -59,6 +60,9 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR static re2::RE2 uplot_url = R"(https://[^\s"'`]+u[Pp]lot[^\s"'`]*\.js)"; RE2::Replace(&html, uplot_url, "/js/uplot.js"); + static re2::RE2 lz_string_url = R"(https://[^\s"'`]+lz-string[^\s"'`]*\.js)"; + RE2::Replace(&html, lz_string_url, "/js/lz-string.js"); + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(html); } else if (request.getURI().starts_with("/binary")) @@ -71,6 +75,11 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize); } + else if (request.getURI() == "/js/lz-string.js") + { + response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_lz_string_jsData), gresource_lz_string_jsSize); + } else { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND); diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index 937801c47671..a5dea2f63db8 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -91,7 +91,7 @@ class ExternalDataSourceCache : private boost::noncopyable std::mutex mutex; std::unique_ptr lru_caches; - Poco::Logger * log = &Poco::Logger::get("ExternalDataSourceCache"); + LoggerPtr log = getLogger("ExternalDataSourceCache"); String calculateLocalPath(IRemoteFileMetadataPtr meta) const; diff --git a/src/Storages/Cache/RemoteCacheController.cpp b/src/Storages/Cache/RemoteCacheController.cpp index b72f5336ea47..403d0c8e43b9 100644 --- a/src/Storages/Cache/RemoteCacheController.cpp +++ b/src/Storages/Cache/RemoteCacheController.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes std::shared_ptr RemoteCacheController::recover(const std::filesystem::path & local_path_) { - auto * log = &Poco::Logger::get("RemoteCacheController"); + auto log = getLogger("RemoteCacheController"); if (!std::filesystem::exists(local_path_ / "data.bin")) { diff --git a/src/Storages/Cache/RemoteCacheController.h b/src/Storages/Cache/RemoteCacheController.h index fafe363bbd48..782a6b895198 100644 --- a/src/Storages/Cache/RemoteCacheController.h +++ b/src/Storages/Cache/RemoteCacheController.h @@ -116,7 +116,7 @@ class RemoteCacheController //std::shared_ptr remote_read_buffer; std::unique_ptr data_file_writer; - Poco::Logger * log = &Poco::Logger::get("RemoteCacheController"); + LoggerPtr log = getLogger("RemoteCacheController"); }; using RemoteCacheControllerPtr = std::shared_ptr; diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index b8bffb267e57..3584f137225d 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -314,7 +314,7 @@ struct DeltaLakeMetadataParser::Impl return version; } - Poco::Logger * log = &Poco::Logger::get("DeltaLakeMetadataParser"); + LoggerPtr log = getLogger("DeltaLakeMetadataParser"); }; diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp index 78d69c839891..699dfe8fda02 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ b/src/Storages/DataLakes/HudiMetadataParser.cpp @@ -50,7 +50,7 @@ struct HudiMetadataParser::Impl */ Strings processMetadataFiles(const Configuration & configuration) { - auto * log = &Poco::Logger::get("HudiMetadataParser"); + auto log = getLogger("HudiMetadataParser"); const auto keys = MetadataReadHelper::listFiles(configuration, "", Poco::toLower(configuration.format)); diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h index 77a22cd00fc6..db3f835494f8 100644 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ b/src/Storages/DataLakes/IStorageDataLake.h @@ -22,15 +22,15 @@ class IStorageDataLake : public Storage using Configuration = typename Storage::Configuration; template - explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, Args && ...args) - : Storage(getConfigurationForDataRead(configuration_, context_), context_, std::forward(args)...) + explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, bool attach, Args && ...args) + : Storage(getConfigurationForDataRead(configuration_, context_, {}, attach), context_, std::forward(args)...) , base_configuration(configuration_) - , log(&Poco::Logger::get(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) + , log(getLogger(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) template - static StoragePtr create(const Configuration & configuration_, ContextPtr context_, Args && ...args) + static StoragePtr create(const Configuration & configuration_, ContextPtr context_, bool attach, Args && ...args) { - return std::make_shared>(configuration_, context_, std::forward(args)...); + return std::make_shared>(configuration_, context_, attach, std::forward(args)...); } String getName() const override { return name; } @@ -64,24 +64,34 @@ class IStorageDataLake : public Storage private: static Configuration getConfigurationForDataRead( - const Configuration & base_configuration, ContextPtr local_context, const Strings & keys = {}) + const Configuration & base_configuration, ContextPtr local_context, const Strings & keys = {}, bool attach = false) { auto configuration{base_configuration}; configuration.update(local_context); configuration.static_configuration = true; - if (keys.empty()) - configuration.keys = getDataFiles(configuration, local_context); - else - configuration.keys = keys; - - LOG_TRACE( - &Poco::Logger::get("DataLake"), - "New configuration path: {}, keys: {}", - configuration.getPath(), fmt::join(configuration.keys, ", ")); - - configuration.connect(local_context); - return configuration; + try + { + if (keys.empty()) + configuration.keys = getDataFiles(configuration, local_context); + else + configuration.keys = keys; + + LOG_TRACE( + getLogger("DataLake"), + "New configuration path: {}, keys: {}", + configuration.getPath(), fmt::join(configuration.keys, ", ")); + + configuration.connect(local_context); + return configuration; + } + catch (...) + { + if (!attach) + throw; + tryLogCurrentException(__PRETTY_FUNCTION__); + return configuration; + } } static Strings getDataFiles(const Configuration & configuration, ContextPtr local_context) @@ -102,7 +112,7 @@ class IStorageDataLake : public Storage Configuration base_configuration; std::mutex configuration_update_mutex; - Poco::Logger * log; + LoggerPtr log; }; @@ -115,7 +125,7 @@ static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args) if (configuration.format == "auto") configuration.format = "Parquet"; - return DataLake::create(configuration, args.getContext(), args.table_id, args.columns, args.constraints, + return DataLake::create(configuration, args.getContext(), args.attach, args.table_id, args.columns, args.constraints, args.comment, getFormatSettings(args.getContext())); } diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp index e0c7e26a2e12..df1536f53fc9 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp @@ -58,7 +58,7 @@ IcebergMetadata::IcebergMetadata( , manifest_list_file(std::move(manifest_list_file_)) , current_schema_id(current_schema_id_) , schema(std::move(schema_)) - , log(&Poco::Logger::get("IcebergMetadata")) + , log(getLogger("IcebergMetadata")) { } @@ -375,7 +375,7 @@ std::pair getMetadataFileAndVersion(const StorageS3::Configuratio std::unique_ptr parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context_) { const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(configuration); - LOG_DEBUG(&Poco::Logger::get("IcebergMetadata"), "Parse metadata {}", metadata_file_path); + LOG_DEBUG(getLogger("IcebergMetadata"), "Parse metadata {}", metadata_file_path); auto buf = S3DataLakeMetadataReadHelper::createReadBuffer(metadata_file_path, context_, configuration); String json_str; readJSONObjectPossiblyInvalid(json_str, *buf); @@ -596,10 +596,11 @@ Strings IcebergMetadata::getDataFiles() const auto status = status_int_column->getInt(i); const auto data_path = std::string(file_path_string_column->getDataAt(i).toView()); const auto pos = data_path.find(configuration.url.key); - const auto file_path = data_path.substr(pos); if (pos == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration.url.key, data_path); + const auto file_path = data_path.substr(pos); + if (ManifestEntryStatus(status) == ManifestEntryStatus::DELETED) { LOG_TEST(log, "Processing delete file for path: {}", file_path); diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h index d42ad84f472e..3e6a2ec34157 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h @@ -84,7 +84,7 @@ class IcebergMetadata : WithContext Int32 current_schema_id; NamesAndTypesList schema; Strings data_files; - Poco::Logger * log; + LoggerPtr log; }; diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp index 20ac77976cb0..8a1a2cdbd8f1 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp @@ -8,6 +8,7 @@ namespace DB StoragePtr StorageIceberg::create( const DB::StorageIceberg::Configuration & base_configuration, DB::ContextPtr context_, + bool attach, const DB::StorageID & table_id_, const DB::ColumnsDescription & columns_, const DB::ConstraintsDescription & constraints_, @@ -16,10 +17,30 @@ StoragePtr StorageIceberg::create( { auto configuration{base_configuration}; configuration.update(context_); - auto metadata = parseIcebergMetadata(configuration, context_); - auto schema_from_metadata = metadata->getTableSchema(); - configuration.keys = metadata->getDataFiles(); - return std::make_shared(std::move(metadata), configuration, context_, table_id_, columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment, format_settings_); + std::unique_ptr metadata; + NamesAndTypesList schema_from_metadata; + try + { + metadata = parseIcebergMetadata(configuration, context_); + schema_from_metadata = metadata->getTableSchema(); + configuration.keys = metadata->getDataFiles(); + } + catch (...) + { + if (!attach) + throw; + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + return std::make_shared( + std::move(metadata), + configuration, + context_, + table_id_, + columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + constraints_, + comment, + format_settings_); } StorageIceberg::StorageIceberg( @@ -52,12 +73,11 @@ void StorageIceberg::updateConfigurationImpl(ContextPtr local_context) { const bool updated = base_configuration.update(local_context); auto new_metadata = parseIcebergMetadata(base_configuration, local_context); - /// Check if nothing was changed. - if (updated && new_metadata->getVersion() == current_metadata->getVersion()) - return; - if (new_metadata->getVersion() != current_metadata->getVersion()) + if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion()) current_metadata = std::move(new_metadata); + else if (!updated) + return; auto updated_configuration{base_configuration}; /// If metadata wasn't changed, we won't list data files again. diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h index a18865b5a549..4e63da5508a9 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.h @@ -30,6 +30,7 @@ class StorageIceberg : public StorageS3 static StoragePtr create(const Configuration & base_configuration, ContextPtr context_, + bool attach, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp index ac472c190e44..d66e21550a33 100644 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ b/src/Storages/DataLakes/S3MetadataReader.cpp @@ -77,7 +77,7 @@ std::vector S3DataLakeMetadataReadHelper::listFiles( is_finished = !outcome.GetResult().GetIsTruncated(); } - LOG_TRACE(&Poco::Logger::get("S3DataLakeMetadataReadHelper"), "Listed {} files", res.size()); + LOG_TRACE(getLogger("S3DataLakeMetadataReadHelper"), "Listed {} files", res.size()); return res; } diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 26fa489a63df..4e01cb2c6cff 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -60,7 +60,7 @@ namespace { template -ConnectionPoolPtrs createPoolsForAddresses(const Cluster::Addresses & addresses, PoolFactory && factory, Poco::Logger * log) +ConnectionPoolPtrs createPoolsForAddresses(const Cluster::Addresses & addresses, PoolFactory && factory, LoggerPtr log) { ConnectionPoolPtrs pools; @@ -121,7 +121,7 @@ DistributedAsyncInsertDirectoryQueue::DistributedAsyncInsertDirectoryQueue( , default_sleep_time(storage.getDistributedSettingsRef().background_insert_sleep_time_ms.totalMilliseconds()) , sleep_time(default_sleep_time) , max_sleep_time(storage.getDistributedSettingsRef().background_insert_max_sleep_time_ms.totalMilliseconds()) - , log(&Poco::Logger::get(getLoggerName())) + , log(getLogger(getLoggerName())) , monitor_blocker(monitor_blocker_) , metric_pending_bytes(CurrentMetrics::DistributedBytesToInsert, 0) , metric_pending_files(CurrentMetrics::DistributedFilesToInsert, 0) diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h index 8bbd99c786a6..f7d7553851a8 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h @@ -145,7 +145,7 @@ class DistributedAsyncInsertDirectoryQueue const std::chrono::milliseconds max_sleep_time; std::chrono::time_point last_decrease_time {std::chrono::system_clock::now()}; std::mutex mutex; - Poco::Logger * log; + LoggerPtr log; ActionBlocker & monitor_blocker; BackgroundSchedulePoolTaskHolder task_handle; diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp index a8ed89e66f12..cfcee4dc8a26 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp @@ -18,7 +18,7 @@ namespace ErrorCodes extern const int CHECKSUM_DOESNT_MATCH; } -DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFile & in, Poco::Logger * log) +DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFile & in, LoggerPtr log) { DistributedAsyncInsertHeader distributed_header; diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.h b/src/Storages/Distributed/DistributedAsyncInsertHeader.h index a7330fa5ef1b..fb4b46964637 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHeader.h +++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.h @@ -38,7 +38,7 @@ struct DistributedAsyncInsertHeader std::string block_header_string; Block block_header; - static DistributedAsyncInsertHeader read(ReadBufferFromFile & in, Poco::Logger * log); + static DistributedAsyncInsertHeader read(ReadBufferFromFile & in, LoggerPtr log); OpenTelemetry::TracingContextHolderPtr createTracingContextHolder(const char * function, std::shared_ptr open_telemetry_span_log) const; }; diff --git a/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp b/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp index 98073ba1e089..a9bdef31711d 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp @@ -72,7 +72,7 @@ void writeRemoteConvert( RemoteInserter & remote, bool compression_expected, ReadBufferFromFile & in, - Poco::Logger * log) + LoggerPtr log) { if (!remote.getHeader()) { diff --git a/src/Storages/Distributed/DistributedAsyncInsertHelpers.h b/src/Storages/Distributed/DistributedAsyncInsertHelpers.h index 9543450418ca..202d9ff6fff8 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHelpers.h +++ b/src/Storages/Distributed/DistributedAsyncInsertHelpers.h @@ -1,9 +1,7 @@ #pragma once -namespace Poco -{ -class Logger; -} +#include + namespace DB { @@ -30,6 +28,6 @@ void writeRemoteConvert( RemoteInserter & remote, bool compression_expected, ReadBufferFromFile & in, - Poco::Logger * log); + LoggerPtr log); } diff --git a/src/Storages/Distributed/DistributedAsyncInsertSource.cpp b/src/Storages/Distributed/DistributedAsyncInsertSource.cpp index 7992636ac112..33e53da2857f 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertSource.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertSource.cpp @@ -10,7 +10,7 @@ namespace DB struct DistributedAsyncInsertSource::Data { - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; ReadBufferFromFile in; CompressedReadBuffer decompressing_in; @@ -19,7 +19,7 @@ struct DistributedAsyncInsertSource::Data Block first_block; explicit Data(const String & file_name) - : log(&Poco::Logger::get("DistributedAsyncInsertSource")) + : log(getLogger("DistributedAsyncInsertSource")) , in(file_name) , decompressing_in(in) , block_in(decompressing_in, DistributedAsyncInsertHeader::read(in, log).revision) diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 650539ef1e92..1efa98d0c13b 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -62,7 +62,7 @@ namespace ErrorCodes extern const int ABORTED; } -static Block adoptBlock(const Block & header, const Block & block, Poco::Logger * log) +static Block adoptBlock(const Block & header, const Block & block, LoggerPtr log) { if (blocksHaveEqualStructure(header, block)) return block; @@ -84,7 +84,7 @@ static Block adoptBlock(const Block & header, const Block & block, Poco::Logger } -static void writeBlockConvert(PushingPipelineExecutor & executor, const Block & block, size_t repeats, Poco::Logger * log) +static void writeBlockConvert(PushingPipelineExecutor & executor, const Block & block, size_t repeats, LoggerPtr log) { Block adopted_block = adoptBlock(executor.getHeader(), block, log); for (size_t i = 0; i < repeats; ++i) @@ -126,7 +126,7 @@ DistributedSink::DistributedSink( , insert_timeout(insert_timeout_) , main_table(main_table_) , columns_to_send(columns_to_send_.begin(), columns_to_send_.end()) - , log(&Poco::Logger::get("DistributedSink")) + , log(getLogger("DistributedSink")) { const auto & settings = context->getSettingsRef(); if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index 1bb4419e1a56..654c1db354f3 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -152,7 +152,7 @@ class DistributedSink : public SinkToStorage std::atomic finished_jobs_count{0}; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/FileLog/FileLogConsumer.cpp b/src/Storages/FileLog/FileLogConsumer.cpp index bfe081c7bad9..1bd3026ab8c6 100644 --- a/src/Storages/FileLog/FileLogConsumer.cpp +++ b/src/Storages/FileLog/FileLogConsumer.cpp @@ -22,7 +22,7 @@ FileLogConsumer::FileLogConsumer( ContextPtr context_, size_t stream_number_, size_t max_streams_number_) - : log(&Poco::Logger::get("FileLogConsumer " + toString(stream_number_))) + : log(getLogger("FileLogConsumer " + toString(stream_number_))) , storage(storage_) , batch_size(max_batch_size) , poll_timeout(poll_timeout_) diff --git a/src/Storages/FileLog/FileLogConsumer.h b/src/Storages/FileLog/FileLogConsumer.h index b19f3a9350ba..e44bfeb18064 100644 --- a/src/Storages/FileLog/FileLogConsumer.h +++ b/src/Storages/FileLog/FileLogConsumer.h @@ -42,7 +42,7 @@ class FileLogConsumer BufferStatus buffer_status = BufferStatus::INIT; - Poco::Logger * log; + LoggerPtr log; StorageFileLog & storage; diff --git a/src/Storages/FileLog/FileLogDirectoryWatcher.cpp b/src/Storages/FileLog/FileLogDirectoryWatcher.cpp index 9d488616e851..844b31fd7c91 100644 --- a/src/Storages/FileLog/FileLogDirectoryWatcher.cpp +++ b/src/Storages/FileLog/FileLogDirectoryWatcher.cpp @@ -6,7 +6,7 @@ namespace DB FileLogDirectoryWatcher::FileLogDirectoryWatcher(const std::string & path_, StorageFileLog & storage_, ContextPtr context_) : path(path_) , storage(storage_) - , log(&Poco::Logger::get("FileLogDirectoryWatcher(" + path + ")")) + , log(getLogger("FileLogDirectoryWatcher(" + path + ")")) , dw(std::make_unique(*this, path, context_)) { } diff --git a/src/Storages/FileLog/FileLogDirectoryWatcher.h b/src/Storages/FileLog/FileLogDirectoryWatcher.h index 9b7afcf8e129..1cf3697c7c07 100644 --- a/src/Storages/FileLog/FileLogDirectoryWatcher.h +++ b/src/Storages/FileLog/FileLogDirectoryWatcher.h @@ -65,7 +65,7 @@ class FileLogDirectoryWatcher /// accessed in thread created by dw. Events events; - Poco::Logger * log; + LoggerPtr log; std::mutex mutex; diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index ef776a3d3137..9c7648ef658b 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -139,7 +139,7 @@ StorageFileLog::StorageFileLog( , path(path_) , metadata_base_path(std::filesystem::path(metadata_base_path_) / "metadata") , format_name(format_name_) - , log(&Poco::Logger::get("StorageFileLog (" + table_id_.table_name + ")")) + , log(getLogger("StorageFileLog (" + table_id_.table_name + ")")) , disk(getContext()->getStoragePolicy("default")->getDisks().at(0)) , milliseconds_to_wait(filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds()) { diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 33442d8b33b5..cc5815a1cef4 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -149,7 +149,7 @@ class StorageFileLog final : public IStorage, WithContext FileInfos file_infos; const String format_name; - Poco::Logger * log; + LoggerPtr log; DiskPtr disk; diff --git a/src/Storages/Freeze.cpp b/src/Storages/Freeze.cpp index b9642ec79078..a5a5a07c9a12 100644 --- a/src/Storages/Freeze.cpp +++ b/src/Storages/Freeze.cpp @@ -76,7 +76,7 @@ bool FreezeMetaData::load(DiskPtr data_disk, const String & path) readIntText(version, buffer); if (version < 1 || version > 2) { - LOG_ERROR(&Poco::Logger::get("FreezeMetaData"), "Unknown frozen metadata version: {}", version); + LOG_ERROR(getLogger("FreezeMetaData"), "Unknown frozen metadata version: {}", version); return false; } DB::assertChar('\n', buffer); diff --git a/src/Storages/Freeze.h b/src/Storages/Freeze.h index a64be7465dd2..5775653aaeaa 100644 --- a/src/Storages/Freeze.h +++ b/src/Storages/Freeze.h @@ -38,7 +38,7 @@ class Unfreezer private: ContextPtr local_context; zkutil::ZooKeeperPtr zookeeper; - Poco::Logger * log = &Poco::Logger::get("Unfreezer"); + LoggerPtr log = getLogger("Unfreezer"); static constexpr std::string_view backup_directory_prefix = "shadow"; static bool removeFreezedPart(DiskPtr disk, const String & path, const String & part_name, ContextPtr local_context, zkutil::ZooKeeperPtr zookeeper); }; diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp index 553473fcc9eb..65df2c020ba1 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -44,7 +44,7 @@ AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS( , prefetch_buffer(settings_.remote_fs_buffer_size) , read_until_position(impl->getFileSize()) , use_prefetch(settings_.remote_fs_prefetch) - , log(&Poco::Logger::get("AsynchronousReadBufferFromHDFS")) + , log(getLogger("AsynchronousReadBufferFromHDFS")) { ProfileEvents::increment(ProfileEvents::RemoteFSBuffers); } diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h index 9c01bd6e434e..1d3e8b8e3e98 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h @@ -62,7 +62,7 @@ class AsynchronousReadBufferFromHDFS : public ReadBufferFromFileBase std::optional read_until_position; bool use_prefetch; - Poco::Logger * log; + LoggerPtr log; /// Metrics to profile prefetch Stopwatch interval_watch; diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 12b32b740deb..f9a55a1285ad 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -55,7 +55,7 @@ void HDFSBuilderWrapper::loadFromConfig( need_kinit = true; hadoop_kerberos_keytab = config.getString(key_path); #else // USE_KRB5 - LOG_WARNING(&Poco::Logger::get("HDFSClient"), "hadoop_kerberos_keytab parameter is ignored because ClickHouse was built without support of krb5 library."); + LOG_WARNING(getLogger("HDFSClient"), "hadoop_kerberos_keytab parameter is ignored because ClickHouse was built without support of krb5 library."); #endif // USE_KRB5 continue; } @@ -66,7 +66,7 @@ void HDFSBuilderWrapper::loadFromConfig( hadoop_kerberos_principal = config.getString(key_path); hdfsBuilderSetPrincipal(hdfs_builder, hadoop_kerberos_principal.c_str()); #else // USE_KRB5 - LOG_WARNING(&Poco::Logger::get("HDFSClient"), "hadoop_kerberos_principal parameter is ignored because ClickHouse was built without support of krb5 library."); + LOG_WARNING(getLogger("HDFSClient"), "hadoop_kerberos_principal parameter is ignored because ClickHouse was built without support of krb5 library."); #endif // USE_KRB5 continue; } @@ -81,7 +81,7 @@ void HDFSBuilderWrapper::loadFromConfig( hadoop_security_kerberos_ticket_cache_path = config.getString(key_path); // standard param - pass further #else // USE_KRB5 - LOG_WARNING(&Poco::Logger::get("HDFSClient"), "hadoop.security.kerberos.ticket.cache.path parameter is ignored because ClickHouse was built without support of krb5 library."); + LOG_WARNING(getLogger("HDFSClient"), "hadoop.security.kerberos.ticket.cache.path parameter is ignored because ClickHouse was built without support of krb5 library."); #endif // USE_KRB5 } @@ -95,7 +95,7 @@ void HDFSBuilderWrapper::loadFromConfig( #if USE_KRB5 void HDFSBuilderWrapper::runKinit() { - LOG_DEBUG(&Poco::Logger::get("HDFSClient"), "Running KerberosInit"); + LOG_DEBUG(getLogger("HDFSClient"), "Running KerberosInit"); try { kerberosInit(hadoop_kerberos_keytab,hadoop_kerberos_principal,hadoop_security_kerberos_ticket_cache_path); @@ -104,7 +104,7 @@ void HDFSBuilderWrapper::runKinit() { throw Exception(ErrorCodes::KERBEROS_ERROR, "KerberosInit failure: {}", getExceptionMessage(e, false)); } - LOG_DEBUG(&Poco::Logger::get("HDFSClient"), "Finished KerberosInit"); + LOG_DEBUG(getLogger("HDFSClient"), "Finished KerberosInit"); } #endif // USE_KRB5 diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 1e26f1be72c1..ab21c4946e43 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -877,7 +877,7 @@ class ReadFromHDFS : public SourceStepWithFilter void ReadFromHDFS::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index f1f0019d3e05..7170763c959f 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -105,7 +105,7 @@ class StorageHDFS final : public IStorage, WithContext bool is_path_with_globs; NamesAndTypesList virtual_columns; - Poco::Logger * log = &Poco::Logger::get("StorageHDFS"); + LoggerPtr log = getLogger("StorageHDFS"); }; class PullingPipelineExecutor; diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 2e8129b9845e..fad294361026 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -45,7 +45,7 @@ StorageHDFSCluster::StorageHDFSCluster( const ConstraintsDescription & constraints_, const String & compression_method_, bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageHDFSCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageHDFSCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , uri(uri_) , format_name(format_name_) , compression_method(compression_method_) diff --git a/src/Storages/Hive/HiveCommon.cpp b/src/Storages/Hive/HiveCommon.cpp index 609adcf65c94..b58302f262ec 100644 --- a/src/Storages/Hive/HiveCommon.cpp +++ b/src/Storages/Hive/HiveCommon.cpp @@ -25,7 +25,7 @@ static const int hive_metastore_client_recv_timeout_ms = 10000; static const int hive_metastore_client_send_timeout_ms = 10000; ThriftHiveMetastoreClientPool::ThriftHiveMetastoreClientPool(ThriftHiveMetastoreClientBuilder builder_) - : PoolBase(max_hive_metastore_client_connections, &Poco::Logger::get("ThriftHiveMetastoreClientPool")), builder(builder_) + : PoolBase(max_hive_metastore_client_connections, getLogger("ThriftHiveMetastoreClientPool")), builder(builder_) { } diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h index e2c19fb1684e..0f9d3364ffd4 100644 --- a/src/Storages/Hive/HiveCommon.h +++ b/src/Storages/Hive/HiveCommon.h @@ -115,7 +115,7 @@ class HiveMetastoreClient const bool empty_partition_keys; const HiveFilesCachePtr hive_files_cache; - Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient"); + LoggerPtr log = getLogger("HiveMetastoreClient"); }; @@ -138,7 +138,7 @@ class HiveMetastoreClient CacheBase table_metadata_cache; ThriftHiveMetastoreClientPool client_pool; - Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient"); + LoggerPtr log = getLogger("HiveMetastoreClient"); }; using HiveMetastoreClientPtr = std::shared_ptr; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index a9347ac4d995..6766ecd6b4f4 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -411,7 +411,7 @@ class StorageHiveSource : public ISource, WithContext bool generate_chunk_from_metadata{false}; UInt64 current_file_remained_rows = 0; - Poco::Logger * log = &Poco::Logger::get("StorageHive"); + LoggerPtr log = getLogger("StorageHive"); }; @@ -780,7 +780,7 @@ class ReadFromHive : public SourceStepWithFilter HDFSFSPtr fs_, HiveMetastoreClient::HiveTableMetadataPtr hive_table_metadata_, Block sample_block_, - Poco::Logger * log_, + LoggerPtr log_, ContextPtr context_, size_t max_block_size_, size_t num_streams_) @@ -805,7 +805,7 @@ class ReadFromHive : public SourceStepWithFilter HDFSFSPtr fs; HiveMetastoreClient::HiveTableMetadataPtr hive_table_metadata; Block sample_block; - Poco::Logger * log; + LoggerPtr log; ContextPtr context; size_t max_block_size; @@ -818,7 +818,7 @@ class ReadFromHive : public SourceStepWithFilter void ReadFromHive::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); createFiles(filter_actions_dag); } diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index b0ec96604ccc..07440097f7a2 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -149,7 +149,7 @@ class StorageHive final : public IStorage, WithContext std::shared_ptr storage_settings; - Poco::Logger * log = &Poco::Logger::get("StorageHive"); + LoggerPtr log = getLogger("StorageHive"); }; } diff --git a/src/Storages/IMessageProducer.cpp b/src/Storages/IMessageProducer.cpp index cf3146960417..20c47f6f0b4a 100644 --- a/src/Storages/IMessageProducer.cpp +++ b/src/Storages/IMessageProducer.cpp @@ -4,7 +4,7 @@ namespace DB { -IMessageProducer::IMessageProducer(Poco::Logger * log_) : log(log_) +IMessageProducer::IMessageProducer(LoggerPtr log_) : log(log_) { } @@ -12,7 +12,16 @@ void AsynchronousMessageProducer::start(const ContextPtr & context) { LOG_TEST(log, "Executing startup"); - initialize(); + try + { + initialize(); + } + catch (...) + { + finished = true; + throw; + } + producing_task = context->getSchedulePool().createTask(getProducingTaskName(), [this] { LOG_TEST(log, "Starting producing task loop"); diff --git a/src/Storages/IMessageProducer.h b/src/Storages/IMessageProducer.h index 12580d5f94a3..c769c3251916 100644 --- a/src/Storages/IMessageProducer.h +++ b/src/Storages/IMessageProducer.h @@ -16,7 +16,7 @@ namespace DB class IMessageProducer { public: - explicit IMessageProducer(Poco::Logger * log_); + explicit IMessageProducer(LoggerPtr log_); /// Do some preparations. virtual void start(const ContextPtr & context) = 0; @@ -30,14 +30,14 @@ class IMessageProducer virtual ~IMessageProducer() = default; protected: - Poco::Logger * log; + LoggerPtr log; }; /// Implements interface for concurrent message producing. class AsynchronousMessageProducer : public IMessageProducer { public: - explicit AsynchronousMessageProducer(Poco::Logger * log_) : IMessageProducer(log_) {} + explicit AsynchronousMessageProducer(LoggerPtr log_) : IMessageProducer(log_) {} /// Create and schedule task in BackgroundSchedulePool that will produce messages. void start(const ContextPtr & context) override; diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index 6f42d8f855ca..812b213cf33d 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -32,7 +32,7 @@ namespace DB IStorageCluster::IStorageCluster( const String & cluster_name_, const StorageID & table_id_, - Poco::Logger * log_, + LoggerPtr log_, bool structure_argument_was_provided_) : IStorage(table_id_) , log(log_) @@ -54,7 +54,7 @@ class ReadFromCluster : public SourceStepWithFilter ASTPtr query_to_send_, QueryProcessingStage::Enum processed_stage_, ClusterPtr cluster_, - Poco::Logger * log_, + LoggerPtr log_, ContextPtr context_) : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) , storage(std::move(storage_)) @@ -71,7 +71,7 @@ class ReadFromCluster : public SourceStepWithFilter ASTPtr query_to_send; QueryProcessingStage::Enum processed_stage; ClusterPtr cluster; - Poco::Logger * log; + LoggerPtr log; ContextPtr context; std::optional extension; @@ -82,7 +82,7 @@ class ReadFromCluster : public SourceStepWithFilter void ReadFromCluster::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index b233f20103db..8d93e94be9ad 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -19,7 +19,7 @@ class IStorageCluster : public IStorage IStorageCluster( const String & cluster_name_, const StorageID & table_id_, - Poco::Logger * log_, + LoggerPtr log_, bool structure_argument_was_provided_); void read( @@ -46,7 +46,7 @@ class IStorageCluster : public IStorage virtual void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) = 0; private: - Poco::Logger * log; + LoggerPtr log; String cluster_name; bool structure_argument_was_provided; }; diff --git a/src/Storages/KVStorageUtils.cpp b/src/Storages/KVStorageUtils.cpp index 3031fc6bf9df..5175c93041b9 100644 --- a/src/Storages/KVStorageUtils.cpp +++ b/src/Storages/KVStorageUtils.cpp @@ -236,7 +236,7 @@ std::pair getFilterKeys( if (filter_nodes.nodes.empty()) return {{}, true}; - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const auto * predicate = filter_actions_dag->getOutputs().at(0); FieldVectorPtr res = std::make_shared(); diff --git a/src/Storages/Kafka/KafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp index 40f2897322de..47167e19a38b 100644 --- a/src/Storages/Kafka/KafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -47,7 +47,7 @@ const auto DRAIN_TIMEOUT_MS = 5000ms; KafkaConsumer::KafkaConsumer( - Poco::Logger * log_, + LoggerPtr log_, size_t max_batch_size, size_t poll_timeout_, bool intermediate_commit_, diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index c4dfc56312fe..9cc78d428566 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -62,7 +62,7 @@ class KafkaConsumer }; KafkaConsumer( - Poco::Logger * log_, + LoggerPtr log_, size_t max_batch_size, size_t poll_timeout_, bool intermediate_commit_, @@ -150,7 +150,7 @@ class KafkaConsumer std::string rdkafka_stat; ConsumerPtr consumer; - Poco::Logger * log; + LoggerPtr log; const size_t batch_size = 1; const size_t poll_timeout = 0; size_t offsets_stored = 0; diff --git a/src/Storages/Kafka/KafkaProducer.cpp b/src/Storages/Kafka/KafkaProducer.cpp index edbfc76ef939..77676fb010b4 100644 --- a/src/Storages/Kafka/KafkaProducer.cpp +++ b/src/Storages/Kafka/KafkaProducer.cpp @@ -18,7 +18,7 @@ namespace DB KafkaProducer::KafkaProducer( ProducerPtr producer_, const std::string & topic_, std::chrono::milliseconds poll_timeout, std::atomic & shutdown_called_, const Block & header) - : IMessageProducer(&Poco::Logger::get("KafkaProducer")) + : IMessageProducer(getLogger("KafkaProducer")) , producer(producer_) , topic(topic_) , timeout(poll_timeout) diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index 1fbd7e2d705f..dc62c13f6332 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -33,7 +33,7 @@ KafkaSource::KafkaSource( const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, const Names & columns, - Poco::Logger * log_, + LoggerPtr log_, size_t max_block_size_, bool commit_in_suffix_) : ISource(storage_snapshot_->getSampleBlockForColumns(columns)) diff --git a/src/Storages/Kafka/KafkaSource.h b/src/Storages/Kafka/KafkaSource.h index 485a8e55b6a9..a1b94b15a19a 100644 --- a/src/Storages/Kafka/KafkaSource.h +++ b/src/Storages/Kafka/KafkaSource.h @@ -22,7 +22,7 @@ class KafkaSource : public ISource const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, const Names & columns, - Poco::Logger * log_, + LoggerPtr log_, size_t max_block_size_, bool commit_in_suffix = false); ~KafkaSource() override; @@ -41,7 +41,7 @@ class KafkaSource : public ISource StorageSnapshotPtr storage_snapshot; ContextPtr context; Names column_names; - Poco::Logger * log; + LoggerPtr log; UInt64 max_block_size; KafkaConsumerPtr consumer; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 522a381700d0..aa347fc719de 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -327,7 +327,7 @@ StorageKafka::StorageKafka( , max_rows_per_message(kafka_settings->kafka_max_rows_per_message.value) , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value, macros_info)) , num_consumers(kafka_settings->kafka_num_consumers.value) - , log(&Poco::Logger::get("StorageKafka (" + table_id_.table_name + ")")) + , log(getLogger("StorageKafka (" + table_id_.table_name + ")")) , intermediate_commit(kafka_settings->kafka_commit_every_batch.value) , settings_adjustments(createSettingsAdjustments()) , thread_per_consumer(kafka_settings->kafka_thread_per_consumer.value) diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index d370d6018f77..f9a1e3ff6f3a 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -101,7 +101,7 @@ class StorageKafka final : public IStorage, WithContext const size_t max_rows_per_message; const String schema_name; const size_t num_consumers; /// total number of consumers - Poco::Logger * log; + LoggerPtr log; const bool intermediate_commit; const SettingsChanges settings_adjustments; diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 3c116321083e..f81225bbee32 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -209,7 +209,7 @@ StorageLiveView::StorageLiveView( live_view_context = Context::createCopy(getContext()); live_view_context->makeQueryContext(); - log = &Poco::Logger::get("StorageLiveView (" + table_id_.database_name + "." + table_id_.table_name + ")"); + log = getLogger("StorageLiveView (" + table_id_.database_name + "." + table_id_.table_name + ")"); StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index e0566d586ee1..6b8780cb81b0 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -184,7 +184,7 @@ using MilliSeconds = std::chrono::milliseconds; ContextMutablePtr live_view_context; - Poco::Logger * log; + LoggerPtr log; bool is_periodically_refreshed = false; Seconds periodic_live_view_refresh; diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index bc26301e3b9e..daf7bd657841 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -27,7 +27,7 @@ namespace ErrorCodes RefreshTask::RefreshTask( const ASTRefreshStrategy & strategy) - : log(&Poco::Logger::get("RefreshTask")) + : log(getLogger("RefreshTask")) , refresh_schedule(strategy) {} diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h index 8a062f6f3591..78599f4f4b41 100644 --- a/src/Storages/MaterializedView/RefreshTask.h +++ b/src/Storages/MaterializedView/RefreshTask.h @@ -62,7 +62,7 @@ class RefreshTask : public std::enable_shared_from_this void setFakeTime(std::optional t); private: - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; std::weak_ptr view_to_refresh; /// Protects interrupt_execution and running_executor. diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp index cc3bc8fc2a8e..9d64592ed64f 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp @@ -60,7 +60,7 @@ AsyncBlockIDsCache::AsyncBlockIDsCache(TStorage & storage_) , update_wait(storage.getSettings()->async_block_ids_cache_update_wait_ms) , path(storage.getZooKeeperPath() + "/async_blocks") , log_name(storage.getStorageID().getFullTableName() + " (AsyncBlockIDsCache)") - , log(&Poco::Logger::get(log_name)) + , log(getLogger(log_name)) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ update(); }); } diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.h b/src/Storages/MergeTree/AsyncBlockIDsCache.h index 38c38da0033d..bea012f1d329 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.h +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.h @@ -43,7 +43,7 @@ class AsyncBlockIDsCache BackgroundSchedulePool::TaskHolder task; const String log_name; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index f4b92ff8c577..000d36752cb0 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -57,7 +57,7 @@ std::string DataPartStorageOnDiskBase::getRelativePath() const return fs::path(root_path) / part_dir / ""; } -std::optional DataPartStorageOnDiskBase::getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const +std::optional DataPartStorageOnDiskBase::getRelativePathForPrefix(LoggerPtr log, const String & prefix, bool detached, bool broken) const { assert(!broken || detached); String res; @@ -335,7 +335,9 @@ void DataPartStorageOnDiskBase::backup( const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, - TemporaryFilesOnDisks * temp_dirs) const + TemporaryFilesOnDisks * temp_dirs, + bool is_projection_part, + bool allow_backup_broken_projection) const { fs::path part_path_on_disk = fs::path{root_path} / part_dir; fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir; @@ -377,7 +379,7 @@ void DataPartStorageOnDiskBase::backup( bool copy_encrypted = !backup_settings.decrypt_files_from_encrypted_disks; - for (const auto & filepath : files_to_backup) + auto backup_file = [&](const String & filepath) { auto filepath_on_disk = part_path_on_disk / filepath; auto filepath_in_backup = part_path_in_backup / filepath; @@ -385,8 +387,10 @@ void DataPartStorageOnDiskBase::backup( if (files_without_checksums.contains(filepath)) { backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, read_settings, copy_encrypted)); - continue; + return; } + else if (is_projection_part && allow_backup_broken_projection && !disk->exists(filepath_on_disk)) + return; if (make_temporary_hard_links) { @@ -411,6 +415,31 @@ void DataPartStorageOnDiskBase::backup( backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner); backup_entries.emplace_back(filepath_in_backup, std::move(backup_entry)); + }; + + auto * log = &Poco::Logger::get("DataPartStorageOnDiskBase::backup"); + + for (const auto & filepath : files_to_backup) + { + if (is_projection_part && allow_backup_broken_projection) + { + try + { + backup_file(filepath); + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::FILE_DOESNT_EXIST) + throw; + + LOG_ERROR(log, "Cannot backup file {} of projection part {}. Will try to ignore it", filepath, part_dir); + continue; + } + } + else + { + backup_file(filepath); + } } } @@ -471,7 +500,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( const DiskPtr & dst_disk, const ReadSettings & read_settings, const WriteSettings & write_settings, - Poco::Logger * log, + LoggerPtr log, const std::function & cancellation_hook) const { String path_to_clone = fs::path(to) / dir_path / ""; @@ -505,7 +534,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( void DataPartStorageOnDiskBase::rename( std::string new_root_path, std::string new_part_dir, - Poco::Logger * log, + LoggerPtr log, bool remove_new_dir_if_exists, bool fsync_part_dir) { @@ -564,7 +593,7 @@ void DataPartStorageOnDiskBase::remove( const MergeTreeDataPartChecksums & checksums, std::list projections, bool is_temp, - Poco::Logger * log) + LoggerPtr log) { /// NOTE We rename part to delete_tmp_ instead of delete_tmp_ to avoid race condition /// when we try to remove two parts with the same name, but different relative paths, @@ -722,7 +751,7 @@ void DataPartStorageOnDiskBase::clearDirectory( const CanRemoveDescription & can_remove_description, const MergeTreeDataPartChecksums & checksums, bool is_temp, - Poco::Logger * log) + LoggerPtr log) { auto disk = volume->getDisk(); auto [can_remove_shared_data, names_not_to_remove] = can_remove_description; diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 339acce59537..75bf3d6f93cb 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -25,7 +25,7 @@ class DataPartStorageOnDiskBase : public IDataPartStorage UInt64 calculateTotalSizeOnDisk() const override; /// Returns path to place detached part in or nullopt if we don't need to detach part (if it already exists and has the same content) - std::optional getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const override; + std::optional getRelativePathForPrefix(LoggerPtr log, const String & prefix, bool detached, bool broken) const override; /// Returns true if detached part already exists and has the same content (compares checksums.txt and the list of files) bool looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, std::optional & original_checksums_content, @@ -58,7 +58,9 @@ class DataPartStorageOnDiskBase : public IDataPartStorage const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, - TemporaryFilesOnDisks * temp_dirs) const override; + TemporaryFilesOnDisks * temp_dirs, + bool is_projection_part, + bool allow_backup_broken_projection) const override; MutableDataPartStoragePtr freeze( const std::string & to, @@ -74,14 +76,14 @@ class DataPartStorageOnDiskBase : public IDataPartStorage const DiskPtr & dst_disk, const ReadSettings & read_settings, const WriteSettings & write_settings, - Poco::Logger * log, + LoggerPtr log, const std::function & cancellation_hook ) const override; void rename( std::string new_root_path, std::string new_part_dir, - Poco::Logger * log, + LoggerPtr log, bool remove_new_dir_if_exists, bool fsync_part_dir) override; @@ -90,7 +92,7 @@ class DataPartStorageOnDiskBase : public IDataPartStorage const MergeTreeDataPartChecksums & checksums, std::list projections, bool is_temp, - Poco::Logger * log) override; + LoggerPtr log) override; void changeRootPath(const std::string & from_root, const std::string & to_root) override; void createDirectories() override; @@ -130,7 +132,7 @@ class DataPartStorageOnDiskBase : public IDataPartStorage const CanRemoveDescription & can_remove_description, const MergeTreeDataPartChecksums & checksums, bool is_temp, - Poco::Logger * log); + LoggerPtr log); /// For names of expected data part files returns the actual names /// of files in filesystem to which data of these files is written. diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index a59f2a356e88..ce70fbe18e50 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -99,7 +99,7 @@ struct ReplicatedFetchReadCallback Service::Service(StorageReplicatedMergeTree & data_) : data(data_) - , log(&Poco::Logger::get(data.getStorageID().getNameForLogs() + " (Replicated PartsService)")) + , log(getLogger(data.getStorageID().getNameForLogs() + " (Replicated PartsService)")) {} std::string Service::getId(const std::string & node_id) const @@ -415,7 +415,7 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name) Fetcher::Fetcher(StorageReplicatedMergeTree & data_) : data(data_) - , log(&Poco::Logger::get(data.getStorageID().getNameForLogs() + " (Fetcher)")) + , log(getLogger(data.getStorageID().getNameForLogs() + " (Fetcher)")) {} std::pair Fetcher::fetchSelectedPart( diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index 07939a660a87..8c15dc3cfdb4 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -55,7 +55,7 @@ class Service final : public InterserverIOEndpoint /// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish, /// so Service will never access dangling reference to storage StorageReplicatedMergeTree & data; - Poco::Logger * log; + LoggerPtr log; }; /** Client for getting the parts from the table *MergeTree. @@ -137,7 +137,7 @@ class Fetcher final : private boost::noncopyable ThrottlerPtr throttler); StorageReplicatedMergeTree & data; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp index 5741e11aa224..1ffb51774302 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp @@ -64,7 +64,7 @@ std::optional createEphemeralLockInZooKeeper( { const String & failed_op_path = ops[failed_idx]->getPath(); LOG_DEBUG( - &Poco::Logger::get("createEphemeralLockInZooKeeper"), + getLogger("createEphemeralLockInZooKeeper"), "Deduplication path already exists: deduplication_path={}", failed_op_path); return EphemeralLockInZooKeeper{"", nullptr, "", failed_op_path}; @@ -73,7 +73,7 @@ std::optional createEphemeralLockInZooKeeper( else if (responses[0]->error == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG( - &Poco::Logger::get("createEphemeralLockInZooKeeper"), + getLogger("createEphemeralLockInZooKeeper"), "Deduplication path already exists: deduplication_path={}", deduplication_path); return {}; @@ -119,7 +119,7 @@ EphemeralLockInZooKeeper::~EphemeralLockInZooKeeper() { if (Coordination::isHardwareError(e.code)) LOG_DEBUG( - &Poco::Logger::get("EphemeralLockInZooKeeper"), + getLogger("EphemeralLockInZooKeeper"), "ZooKeeper communication error during unlock: code={} message='{}'", e.code, e.message()); @@ -130,7 +130,7 @@ EphemeralLockInZooKeeper::~EphemeralLockInZooKeeper() /// But it's possible that the multi op request can be executed on server side, and client will not get response due to network issue. /// In such case, assumeUnlocked() will not be called, so we'll get ZNONODE error here since the noded is already deleted LOG_DEBUG( - &Poco::Logger::get("EphemeralLockInZooKeeper"), + getLogger("EphemeralLockInZooKeeper"), "ZooKeeper node was already deleted: code={} message={}", e.code, e.message()); @@ -168,7 +168,7 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions( Coordination::Error rc = zookeeper->tryMulti(lock_ops, lock_responses); if (rc == Coordination::Error::ZBADVERSION) { - LOG_TRACE(&Poco::Logger::get("EphemeralLocksInAllPartitions"), "Someone has inserted a block in a new partition while we were creating locks. Retry."); + LOG_TRACE(getLogger("EphemeralLocksInAllPartitions"), "Someone has inserted a block in a new partition while we were creating locks. Retry."); continue; } else if (rc != Coordination::Error::ZOK) diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index afbe91a8a6d7..d06d9791a538 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -151,12 +151,12 @@ class IDataPartStorage : public boost::noncopyable const MergeTreeDataPartChecksums & checksums, std::list projections, bool is_temp, - Poco::Logger * log) = 0; + LoggerPtr log) = 0; /// Get a name like 'prefix_partdir_tryN' which does not exist in a root dir. /// TODO: remove it. virtual std::optional getRelativePathForPrefix( - Poco::Logger * log, const String & prefix, bool detached, bool broken) const = 0; + LoggerPtr log, const String & prefix, bool detached, bool broken) const = 0; /// Reset part directory, used for in-memory parts. /// TODO: remove it. @@ -223,7 +223,9 @@ class IDataPartStorage : public boost::noncopyable const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, - TemporaryFilesOnDisks * temp_dirs) const = 0; + TemporaryFilesOnDisks * temp_dirs, + bool is_projection_part, + bool allow_backup_broken_projection) const = 0; /// Creates hardlinks into 'to/dir_path' for every file in data part. /// Callback is called after hardlinks are created, but before 'delete-on-destroy.txt' marker is removed. @@ -263,7 +265,7 @@ class IDataPartStorage : public boost::noncopyable const DiskPtr & disk, const ReadSettings & read_settings, const WriteSettings & write_settings, - Poco::Logger * log, + LoggerPtr log, const std::function & cancellation_hook ) const = 0; @@ -314,7 +316,7 @@ class IDataPartStorage : public boost::noncopyable virtual void rename( std::string new_root_path, std::string new_part_dir, - Poco::Logger * log, + LoggerPtr log, bool remove_new_dir_if_exists, bool fsync_part_dir) = 0; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 87f23b0da2ad..8f7be6e863da 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -81,6 +81,7 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); size_t minmax_idx_size = minmax_column_types.size(); + hyperrectangle.clear(); hyperrectangle.reserve(minmax_idx_size); for (size_t i = 0; i < minmax_idx_size; ++i) { @@ -104,6 +105,39 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par initialized = true; } +Block IMergeTreeDataPart::MinMaxIndex::getBlock(const MergeTreeData & data) const +{ + if (!initialized) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to get block from uninitialized MinMax index."); + + Block block; + + const auto metadata_snapshot = data.getInMemoryMetadataPtr(); + const auto & partition_key = metadata_snapshot->getPartitionKey(); + + const auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); + const auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); + const auto minmax_idx_size = minmax_column_types.size(); + + for (size_t i = 0; i < minmax_idx_size; ++i) + { + const auto & data_type = minmax_column_types[i]; + const auto & column_name = minmax_column_names[i]; + + const auto column = data_type->createColumn(); + + const auto min_val = hyperrectangle.at(i).left; + const auto max_val = hyperrectangle.at(i).right; + + column->insert(min_val); + column->insert(max_val); + + block.insert(ColumnWithTypeAndName(column->getPtr(), data_type, column_name)); + } + + return block; +} + IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store( const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & out_checksums) const { @@ -185,8 +219,7 @@ void IMergeTreeDataPart::MinMaxIndex::merge(const MinMaxIndex & other) if (!initialized) { - hyperrectangle = other.hyperrectangle; - initialized = true; + *this = other; } else { @@ -673,13 +706,14 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity` loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. loadPartitionAndMinMaxIndex(); + bool has_broken_projections = false; if (!parent_part) { loadTTLInfos(); - loadProjections(require_columns_checksums, check_consistency, false /* if_not_loaded */); + loadProjections(require_columns_checksums, check_consistency, has_broken_projections, false /* if_not_loaded */); } - if (check_consistency) + if (check_consistency && !has_broken_projections) checkConsistency(require_columns_checksums); loadDefaultCompressionCodec(); @@ -741,7 +775,7 @@ void IMergeTreeDataPart::addProjectionPart( projection_parts[projection_name] = std::move(projection_part); } -void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded) +void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded) { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); for (const auto & projection : metadata_snapshot->projections) @@ -758,10 +792,34 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch else { auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build(); - part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); + + try + { + part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); + } + catch (...) + { + if (isRetryableException(std::current_exception())) + throw; + + auto message = getCurrentExceptionMessage(true); + LOG_ERROR(&Poco::Logger::get("IMergeTreeDataPart"), + "Cannot load projection {}, will consider it broken. Reason: {}", projection.name, message); + + has_broken_projection = true; + part->setBrokenReason(message, getCurrentExceptionCode()); + } + addProjectionPart(projection.name, std::move(part)); } } + else if (checksums.has(path)) + { + auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build(); + part->setBrokenReason("Projection directory " + path + " does not exist while loading projections", ErrorCodes::NO_FILE_IN_DATA_PART); + addProjectionPart(projection.name, std::move(part)); + has_broken_projection = true; + } } } @@ -1156,7 +1214,8 @@ void IMergeTreeDataPart::loadChecksums(bool require) /// Check the data while we are at it. LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name); - checksums = checkDataPart(shared_from_this(), false); + bool noop; + checksums = checkDataPart(shared_from_this(), false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */false); writeChecksums(checksums, {}); bytes_on_disk = checksums.getTotalSizeOnDisk(); @@ -1663,7 +1722,7 @@ try metadata_manager->deleteAll(true); metadata_manager->assertAllDeleted(true); - getDataPartStorage().rename(to.parent_path(), to.filename(), storage.log, remove_new_dir_if_exists, fsync_dir); + getDataPartStorage().rename(to.parent_path(), to.filename(), storage.log.load(), remove_new_dir_if_exists, fsync_dir); metadata_manager->updateAll(true); auto new_projection_root_path = to.string(); @@ -1758,7 +1817,7 @@ void IMergeTreeDataPart::remove() } bool is_temporary_part = is_temp || state == MergeTreeDataPartState::Temporary; - getDataPartStorage().remove(std::move(can_remove_callback), checksums, projection_checksums, is_temporary_part, storage.log); + getDataPartStorage().remove(std::move(can_remove_callback), checksums, projection_checksums, is_temporary_part, storage.log.load()); } std::optional IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached, bool broken) const @@ -1775,7 +1834,7 @@ std::optional IMergeTreeDataPart::getRelativePathForPrefix(const String if (detached && parent_part) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot detach projection"); - return getDataPartStorage().getRelativePathForPrefix(storage.log, prefix, detached, broken); + return getDataPartStorage().getRelativePathForPrefix(storage.log.load(), prefix, detached, broken); } std::optional IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix, bool broken) const @@ -1841,7 +1900,7 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk( throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not clone data part {} to empty directory.", name); String path_to_clone = fs::path(storage.relative_data_path) / directory_name / ""; - return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, read_settings, write_settings, storage.log, cancellation_hook); + return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, read_settings, write_settings, storage.log.load(), cancellation_hook); } UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const @@ -2163,6 +2222,32 @@ std::optional IMergeTreeDataPart::getStreamNameForColumn( return getStreamNameOrHash(stream_name, extension, storage_); } +void IMergeTreeDataPart::markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const +{ + auto it = projection_parts.find(projection_name); + if (it == projection_parts.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no projection part '{}'", projection_name); + it->second->setBrokenReason(message, code); +} + +bool IMergeTreeDataPart::hasBrokenProjection(const String & projection_name) const +{ + auto it = projection_parts.find(projection_name); + if (it == projection_parts.end()) + return false; + return it->second->is_broken; +} + +void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const +{ + std::lock_guard lock(broken_reason_mutex); + if (is_broken) + return; + is_broken = true; + exception = message; + exception_code = code; +} + bool isCompactPart(const MergeTreeDataPartPtr & data_part) { return (data_part && data_part->getType() == MergeTreeDataPartType::Compact); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 640a1f1d0a34..b118aa78ce97 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -261,6 +261,12 @@ class IMergeTreeDataPart : public std::enable_shared_from_this is_frozen {false}; + /// If it is a projection part, it can be broken sometimes. + mutable std::atomic is_broken {false}; + mutable std::string exception; + mutable int exception_code = 0; + mutable std::mutex broken_reason_mutex; + /// Indicates that the part was marked Outdated by PartCheckThread because the part was not committed to ZooKeeper mutable bool is_unexpected_local_part = false; @@ -336,6 +342,7 @@ class IMergeTreeDataPart : public std::enable_shared_from_this>; @@ -423,9 +430,16 @@ class IMergeTreeDataPart : public std::enable_shared_from_this && projection_part); + void markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const; + bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); } - void loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded = false); + bool hasBrokenProjection(const String & projection_name) const; + + /// Return true, if all projections were loaded successfully and none was marked as broken. + void loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded = false); + + void setBrokenReason(const String & message, int code) const; /// Return set of metadata file names without checksums. For example, /// columns.txt or checksums.txt itself. @@ -579,7 +593,7 @@ class IMergeTreeDataPart : public std::enable_shared_from_this> projection_parts; + mutable std::map> projection_parts; mutable PartMetadataManagerPtr metadata_manager; diff --git a/src/Storages/MergeTree/InsertBlockInfo.cpp b/src/Storages/MergeTree/InsertBlockInfo.cpp index ac900f8cf097..2de3ae8996a7 100644 --- a/src/Storages/MergeTree/InsertBlockInfo.cpp +++ b/src/Storages/MergeTree/InsertBlockInfo.cpp @@ -9,7 +9,7 @@ namespace ErrorCodes } AsyncInsertBlockInfo::AsyncInsertBlockInfo( - Poco::Logger * log_, + LoggerPtr log_, std::vector && block_id_, BlockWithPartition && block_, std::optional && unmerged_block_with_partition_) diff --git a/src/Storages/MergeTree/InsertBlockInfo.h b/src/Storages/MergeTree/InsertBlockInfo.h index 3882373c0fa9..7d7ec0c9f292 100644 --- a/src/Storages/MergeTree/InsertBlockInfo.h +++ b/src/Storages/MergeTree/InsertBlockInfo.h @@ -8,7 +8,7 @@ namespace DB struct SyncInsertBlockInfo { SyncInsertBlockInfo( - Poco::Logger * /*log_*/, + LoggerPtr /*log_*/, std::string && block_id_, BlockWithPartition && /*block_*/, std::optional && /*unmerged_block_with_partition_*/) @@ -25,7 +25,7 @@ struct SyncInsertBlockInfo struct AsyncInsertBlockInfo { - Poco::Logger * log; + LoggerPtr log; std::vector block_id; BlockWithPartition block_with_partition; /// Some merging algorithms can mofidy the block which loses the information about the async insert offsets @@ -34,7 +34,7 @@ struct AsyncInsertBlockInfo std::unordered_map> block_id_to_offset_idx; AsyncInsertBlockInfo( - Poco::Logger * log_, + LoggerPtr log_, std::vector && block_id_, BlockWithPartition && block_, std::optional && unmerged_block_with_partition_); diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index d5922ae1bc2d..e5bcb11091f5 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1,36 +1,37 @@ -#include -#include -#include +#include +#include #include #include #include #include +#include #include -#include #include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include #include #include #include -#include -#include +#include +#include +#include #include +#include +#include +#include #include #include @@ -836,21 +837,6 @@ bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants return node.tryGetConstant(out_value, out_type); } - -static Field applyFunctionForField( - const FunctionBasePtr & func, - const DataTypePtr & arg_type, - const Field & arg_value) -{ - ColumnsWithTypeAndName columns - { - { arg_type->createColumnConst(1, arg_value), arg_type, "x" }, - }; - - auto col = func->execute(columns, func->getResultType(), 1); - return (*col)[0]; -} - /// The case when arguments may have types different than in the primary key. static std::pair applyFunctionForFieldOfUnknownType( const FunctionBasePtr & func, @@ -890,33 +876,6 @@ static std::pair applyBinaryFunctionForFieldOfUnknownType( return {std::move(result), std::move(return_type)}; } - -static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) -{ - /// Fallback for fields without block reference. - if (field.isExplicit()) - return applyFunctionForField(func, current_type, field); - - String result_name = "_" + func->getName() + "_" + toString(field.column_idx); - const auto & columns = field.columns; - size_t result_idx = columns->size(); - - for (size_t i = 0; i < result_idx; ++i) - { - if ((*columns)[i].name == result_name) - result_idx = i; - } - - if (result_idx == columns->size()) - { - ColumnsWithTypeAndName args{(*columns)[field.column_idx]}; - field.columns->emplace_back(ColumnWithTypeAndName {nullptr, func->getResultType(), result_name}); - (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size()); - } - - return {field.columns, field.row_idx, result_idx}; -} - /** When table's key has expression with these functions from a column, * and when a column in a query is compared with a constant, such as: * CREATE TABLE (x String) ORDER BY toDate(x) diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index 2e48892563be..3bd486fd54a9 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -19,7 +19,7 @@ namespace zkutil * For now, every replica can become leader if there is no leader among replicas with old version. */ -void checkNoOldLeaders(Poco::Logger * log, ZooKeeper & zookeeper, const String path) +void checkNoOldLeaders(LoggerPtr log, ZooKeeper & zookeeper, const String path) { /// Previous versions (before 21.12) used to create ephemeral sequential node path/leader_election- /// Replica with the lexicographically smallest node name becomes leader (before 20.6) or enables multi-leader mode (since 20.6) diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 23037b1ee7ab..ae6e398026d7 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -28,7 +28,7 @@ MergeFromLogEntryTask::MergeFromLogEntryTask( StorageReplicatedMergeTree & storage_, IExecutableTask::TaskResultCallback & task_result_callback_) : ReplicatedMergeMutateTaskBase( - &Poco::Logger::get( + getLogger( storage_.getStorageID().getShortName() + "::" + selected_entry_->log_entry->new_part_name + " (MergeFromLogEntryTask)"), storage_, selected_entry_, diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 4b5b7ca8018a..58418ef35f2e 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -588,7 +588,15 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const auto pipe = Pipe::unitePipes(std::move(pipes)); ctx->rows_sources_read_buf->seek(0, 0); - auto transform = std::make_unique(pipe.getHeader(), pipe.numOutputPorts(), *ctx->rows_sources_read_buf); + + const auto data_settings = global_ctx->data->getSettings(); + auto transform = std::make_unique( + pipe.getHeader(), + pipe.numOutputPorts(), + *ctx->rows_sources_read_buf, + data_settings->merge_max_block_size, + data_settings->merge_max_block_size_bytes); + pipe.addTransform(std::move(transform)); ctx->column_parts_pipeline = QueryPipeline(std::move(pipe)); @@ -720,8 +728,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c MergeTreeData::DataPartsVector projection_parts; for (const auto & part : global_ctx->future_part->parts) { - auto it = part->getProjectionParts().find(projection.name); - if (it != part->getProjectionParts().end()) + auto actual_projection_parts = part->getProjectionParts(); + auto it = actual_projection_parts.find(projection.name); + if (it != actual_projection_parts.end() && !it->second->is_broken) projection_parts.push_back(it->second); } if (projection_parts.size() < global_ctx->future_part->parts.size()) diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index b2a5796737d8..6f5336baaad3 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -228,7 +228,7 @@ class MergeTask size_t sum_compressed_bytes_upper_bound{0}; bool blocks_are_granules_size{false}; - Poco::Logger * log{&Poco::Logger::get("MergeTask::PrepareStage")}; + LoggerPtr log{getLogger("MergeTask::PrepareStage")}; /// Dependencies for next stages std::list::const_iterator it_name_and_type; @@ -354,7 +354,7 @@ class MergeTask MergeTasks tasks_for_projections; MergeTasks::iterator projections_iterator; - Poco::Logger * log{&Poco::Logger::get("MergeTask::MergeProjectionsStage")}; + LoggerPtr log{getLogger("MergeTask::MergeProjectionsStage")}; }; using MergeProjectionsRuntimeContextPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index a3f8e02f5eb1..8cb0badc19bf 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -144,7 +144,7 @@ bool MergeTreeBackgroundExecutor::trySchedule(ExecutableTaskPtr task) return true; } -void printExceptionWithRespectToAbort(Poco::Logger * log, const String & query_id) +void printExceptionWithRespectToAbort(LoggerPtr log, const String & query_id) { std::exception_ptr ex = std::current_exception(); diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h index 63f75ffc8d93..0ed032935894 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h @@ -307,7 +307,7 @@ class MergeTreeBackgroundExecutor final : boost::noncopyable std::condition_variable has_tasks TSA_GUARDED_BY(mutex); bool shutdown TSA_GUARDED_BY(mutex) = false; std::unique_ptr pool; - Poco::Logger * log = &Poco::Logger::get("MergeTreeBackgroundExecutor"); + LoggerPtr log = getLogger("MergeTreeBackgroundExecutor"); }; extern template class MergeTreeBackgroundExecutor; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 61332a4ff384..3ca746a71973 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8,21 +8,6 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include @@ -35,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -43,47 +27,58 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include +#include #include +#include #include -#include -#include #include +#include +#include #include #include #include #include #include -#include #include #include -#include #include #include #include #include #include #include +#include #include #include +#include #include #include -#include #include -#include +#include #include #include -#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -93,9 +88,7 @@ #include #include -#include #include -#include #include #include #include @@ -197,6 +190,50 @@ namespace ErrorCodes extern const int LIMIT_EXCEEDED; } +static size_t getPartitionAstFieldsCount(const ASTPartition & partition_ast, ASTPtr partition_value_ast) +{ + if (partition_ast.fields_count.has_value()) + return *partition_ast.fields_count; + + if (partition_value_ast->as()) + return 1; + + const auto * tuple_ast = partition_value_ast->as(); + + if (!tuple_ast) + { + throw Exception( + ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID()); + } + + if (tuple_ast->name != "tuple") + { + if (!isFunctionCast(tuple_ast)) + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); + + if (tuple_ast->arguments->as()->children.empty()) + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); + + auto first_arg = tuple_ast->arguments->as()->children.at(0); + if (const auto * inner_tuple = first_arg->as(); inner_tuple && inner_tuple->name == "tuple") + { + const auto * arguments_ast = tuple_ast->arguments->as(); + return arguments_ast ? arguments_ast->children.size() : 0; + } + else if (const auto * inner_literal_tuple = first_arg->as(); inner_literal_tuple) + { + return inner_literal_tuple->value.getType() == Field::Types::Tuple ? inner_literal_tuple->value.safeGet().size() : 1; + } + + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); + } + else + { + const auto * arguments_ast = tuple_ast->arguments->as(); + return arguments_ast ? arguments_ast->children.size() : 0; + } +} + static void checkSuspiciousIndices(const ASTFunction * index_function) { std::unordered_set unique_index_expression_hashes; @@ -300,7 +337,11 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re if (disk->isBroken()) continue; - if (!disk->isReadOnly()) + /// Write once disk is almost the same as read-only for MergeTree, + /// since it does not support move, that is required for any + /// operation over MergeTree, so avoid writing format_version.txt + /// into it as well, to avoid leaving it after DROP. + if (!disk->isReadOnly() && !disk->isWriteOnce()) { auto buf = disk->writeFile(format_version_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, getContext()->getWriteSettings()); writeIntText(format_version.toUnderType(), *buf); @@ -354,8 +395,7 @@ MergeTreeData::MergeTreeData( , merging_params(merging_params_) , require_part_metadata(require_part_metadata_) , broken_part_callback(broken_part_callback_) - , log_name(std::make_shared(table_id_.getNameForLogs())) - , log(&Poco::Logger::get(*log_name)) + , log(table_id_.getNameForLogs()) , storage_settings(std::move(storage_settings_)) , pinned_part_uuids(std::make_shared()) , data_parts_by_info(data_parts_indexes.get()) @@ -1222,7 +1262,7 @@ MergeTreeData::PartLoadingTree::build(PartLoadingInfos nodes) } static std::optional calculatePartSizeSafe( - const MergeTreeData::DataPartPtr & part, Poco::Logger * log) + const MergeTreeData::DataPartPtr & part, const LoggerPtr & log) { try { @@ -1296,7 +1336,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( res.is_broken = true; tryLogCurrentException(log, fmt::format("while loading part {} on path {}", part_name, part_path)); - res.size_of_part = calculatePartSizeSafe(res.part, log); + res.size_of_part = calculatePartSizeSafe(res.part, log.load()); auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size"; LOG_ERROR(log, @@ -1327,7 +1367,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( if (part_disk_ptr->exists(marker_path)) { /// NOTE: getBytesOnDisk() cannot be used here, since it may be zero if checksums.txt does not exist. - res.size_of_part = calculatePartSizeSafe(res.part, log); + res.size_of_part = calculatePartSizeSafe(res.part, log.load()); res.is_broken = true; auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size"; @@ -2114,7 +2154,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(const String & root_path, siz { /// Actually we don't rely on temporary_directories_lifetime when removing old temporaries directories, /// it's just an extra level of protection just in case we have a bug. - LOG_INFO(LogFrequencyLimiter(log, 10), "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path); + LOG_INFO(LogFrequencyLimiter(log.load(), 10), "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path); continue; } else if (!disk->exists(it->path())) @@ -2734,12 +2774,20 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_ void MergeTreeData::renameInMemory(const StorageID & new_table_id) { IStorage::renameInMemory(new_table_id); - std::atomic_store(&log_name, std::make_shared(new_table_id.getNameForLogs())); - log = &Poco::Logger::get(*log_name); + log.store(new_table_id.getNameForLogs()); } void MergeTreeData::dropAllData() { + /// In case there is read-only/write-once disk we cannot allow to call dropAllData(), but dropping tables is allowed. + /// + /// Note, that one may think that drop on write-once disk should be + /// supported, since it is pretty trivial to implement + /// MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(), + /// however removing part requires moveDirectory() as well. + if (isStaticStorage()) + return; + LOG_TRACE(log, "dropAllData: waiting for locks."); auto settings_ptr = getSettings(); @@ -4854,7 +4902,7 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D } void MergeTreeData::checkAlterPartitionIsPossible( - const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr local_context) const + const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr) const { for (const auto & command : commands) { @@ -4882,7 +4930,15 @@ void MergeTreeData::checkAlterPartitionIsPossible( throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH PARTITION ALL currently"); } else - getPartitionIDFromQuery(command.partition, local_context); + { + // The below `getPartitionIDFromQuery` call will not work for attach / replace because it assumes the partition expressions + // are the same and deliberately uses this storage. Later on, `MergeTreeData::replaceFrom` is called, and it makes the right + // call to `getPartitionIDFromQuery` using source storage. + // Note: `PartitionCommand::REPLACE_PARTITION` is used both for `REPLACE PARTITION` and `ATTACH PARTITION FROM` queries. + // But not for `ATTACH PARTITION` queries. + if (command.type != PartitionCommand::REPLACE_PARTITION) + getPartitionIDFromQuery(command.partition, getContext()); + } } } } @@ -5286,7 +5342,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( if (hold_table_lock && !table_lock) table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); - if (backup_settings.check_parts) + if (backup_settings.check_projection_parts) part->checkConsistencyWithProjections(/* require_part_metadata= */ true); BackupEntries backup_entries_from_part; @@ -5298,7 +5354,8 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( read_settings, make_temporary_hard_links, backup_entries_from_part, - &temp_dirs); + &temp_dirs, + false, false); auto projection_parts = part->getProjectionParts(); for (const auto & [projection_name, projection_part] : projection_parts) @@ -5311,7 +5368,9 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( read_settings, make_temporary_hard_links, backup_entries_from_part, - &temp_dirs); + &temp_dirs, + projection_part->is_broken, + backup_settings.allow_backup_broken_projections); } if (hold_storage_and_part_ptrs) @@ -5616,69 +5675,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc MergeTreePartInfo::validatePartitionID(partition_ast.id->clone(), format_version); return partition_ast.id->as()->value.safeGet(); } - size_t partition_ast_fields_count = 0; ASTPtr partition_value_ast = partition_ast.value->clone(); - if (!partition_ast.fields_count.has_value()) - { - if (partition_value_ast->as()) - { - partition_ast_fields_count = 1; - } - else if (const auto * tuple_ast = partition_value_ast->as()) - { - if (tuple_ast->name != "tuple") - { - if (isFunctionCast(tuple_ast)) - { - if (tuple_ast->arguments->as()->children.empty()) - { - throw Exception( - ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); - } - auto first_arg = tuple_ast->arguments->as()->children.at(0); - if (const auto * inner_tuple = first_arg->as(); inner_tuple && inner_tuple->name == "tuple") - { - const auto * arguments_ast = tuple_ast->arguments->as(); - if (arguments_ast) - partition_ast_fields_count = arguments_ast->children.size(); - else - partition_ast_fields_count = 0; - } - else if (const auto * inner_literal_tuple = first_arg->as(); inner_literal_tuple) - { - if (inner_literal_tuple->value.getType() == Field::Types::Tuple) - partition_ast_fields_count = inner_literal_tuple->value.safeGet().size(); - else - partition_ast_fields_count = 1; - } - else - { - throw Exception( - ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); - } - } - else - throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); - } - else - { - const auto * arguments_ast = tuple_ast->arguments->as(); - if (arguments_ast) - partition_ast_fields_count = arguments_ast->children.size(); - else - partition_ast_fields_count = 0; - } - } - else - { - throw Exception( - ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID()); - } - } - else - { - partition_ast_fields_count = *partition_ast.fields_count; - } + auto partition_ast_fields_count = getPartitionAstFieldsCount(partition_ast, partition_value_ast); if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { @@ -6249,13 +6247,13 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( log, "Would like to reserve space on volume '{}' by TTL rule of table '{}' but volume was not found", move_ttl_entry->destination_name, - *std::atomic_load(&log_name)); + log.loadName()); else if (move_ttl_entry->destination_type == DataDestinationType::DISK && !move_ttl_entry->if_exists) LOG_WARNING( log, "Would like to reserve space on disk '{}' by TTL rule of table '{}' but disk was not found", move_ttl_entry->destination_name, - *std::atomic_load(&log_name)); + log.loadName()); } else if (is_insert && !perform_ttl_move_on_insert) { @@ -6264,7 +6262,7 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( "TTL move on insert to {} {} for table {} is disabled", (move_ttl_entry->destination_type == DataDestinationType::VOLUME ? "volume" : "disk"), move_ttl_entry->destination_name, - *std::atomic_load(&log_name)); + log.loadName()); } else { @@ -6280,13 +6278,13 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( log, "Would like to reserve space on volume '{}' by TTL rule of table '{}' but there is not enough space", move_ttl_entry->destination_name, - *std::atomic_load(&log_name)); + log.loadName()); else if (move_ttl_entry->destination_type == DataDestinationType::DISK) LOG_WARNING( log, "Would like to reserve space on disk '{}' by TTL rule of table '{}' but there is not enough space", move_ttl_entry->destination_name, - *std::atomic_load(&log_name)); + log.loadName()); } } } @@ -7014,23 +7012,35 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour if (my_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical())) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure"); - auto query_to_string = [] (const ASTPtr & ast) - { - return ast ? queryToString(ast) : ""; - }; - - if (query_to_string(my_snapshot->getSortingKeyAST()) != query_to_string(src_snapshot->getSortingKeyAST())) + if (queryToStringNullable(my_snapshot->getSortingKeyAST()) != queryToStringNullable(src_snapshot->getSortingKeyAST())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different ordering"); - if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different partition key"); - if (format_version != src_data->format_version) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different format_version"); - if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST())) + if (queryToStringNullable(my_snapshot->getPrimaryKeyAST()) != queryToStringNullable(src_snapshot->getPrimaryKeyAST())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key"); + const auto is_a_subset_of = [](const auto & lhs, const auto & rhs) + { + if (lhs.size() > rhs.size()) + return false; + + const auto rhs_set = NameSet(rhs.begin(), rhs.end()); + for (const auto & lhs_element : lhs) + if (!rhs_set.contains(lhs_element)) + return false; + + return true; + }; + + if (!is_a_subset_of(my_snapshot->getColumnsRequiredForPartitionKey(), src_snapshot->getColumnsRequiredForPartitionKey())) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Destination table partition expression columns must be a subset of source table partition expression columns"); + } + const auto check_definitions = [](const auto & my_descriptions, const auto & src_descriptions) { if (my_descriptions.size() != src_descriptions.size()) @@ -7071,128 +7081,56 @@ std::pair MergeTreeData::cloneAn const ReadSettings & read_settings, const WriteSettings & write_settings) { - /// Check that the storage policy contains the disk where the src_part is located. - bool does_storage_policy_allow_same_disk = false; - for (const DiskPtr & disk : getStoragePolicy()->getDisks()) - { - if (disk->getName() == src_part->getDataPartStorage().getDiskName()) - { - does_storage_policy_allow_same_disk = true; - break; - } - } - if (!does_storage_policy_allow_same_disk) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Could not clone and load part {} because disk does not belong to storage policy", - quoteString(src_part->getDataPartStorage().getFullPath())); - - String dst_part_name = src_part->getNewName(dst_part_info); - String tmp_dst_part_name = tmp_part_prefix + dst_part_name; - auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name); - - /// Why it is needed if we only hardlink files? - auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk()); - auto src_part_storage = src_part->getDataPartStoragePtr(); - - scope_guard src_flushed_tmp_dir_lock; - MergeTreeData::MutableDataPartPtr src_flushed_tmp_part; - - /// If source part is in memory, flush it to disk and clone it already in on-disk format - /// Protect tmp dir from removing by cleanup thread with src_flushed_tmp_dir_lock - /// Construct src_flushed_tmp_part in order to delete part with its directory at destructor - if (auto src_part_in_memory = asInMemoryPart(src_part)) - { - auto flushed_part_path = *src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); - - auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename(); - src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name); - - auto flushed_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); - - src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, src_part->name, flushed_part_storage) - .withPartInfo(src_part->info) - .withPartFormatFromDisk() - .build(); - - src_flushed_tmp_part->is_temp = true; - src_part_storage = flushed_part_storage; - } - - String with_copy; - if (params.copy_instead_of_hardlink) - with_copy = " (copying data)"; - - auto dst_part_storage = src_part_storage->freeze( - relative_data_path, - tmp_dst_part_name, - read_settings, - write_settings, - /* save_metadata_callback= */ {}, - params); - - if (params.metadata_version_to_write.has_value()) - { - chassert(!params.keep_metadata_version); - auto out_metadata = dst_part_storage->writeFile(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, getContext()->getWriteSettings()); - writeText(metadata_snapshot->getMetadataVersion(), *out_metadata); - out_metadata->finalize(); - if (getSettings()->fsync_after_insert) - out_metadata->sync(); - } - - LOG_DEBUG(log, "Clone{} part {} to {}{}", - src_flushed_tmp_part ? " flushed" : "", - src_part_storage->getFullPath(), - std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name), - with_copy); - - auto dst_data_part = MergeTreeDataPartBuilder(*this, dst_part_name, dst_part_storage) - .withPartFormatFromDisk() - .build(); + return MergeTreeDataPartCloner::clone( + this, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, require_part_metadata, params, read_settings, write_settings); +} - if (!params.copy_instead_of_hardlink && params.hardlinked_files) - { - params.hardlinked_files->source_part_name = src_part->name; - params.hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); +std::pair MergeTreeData::cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( + const MergeTreeData::DataPartPtr & src_part, + const MergeTreePartition & new_partition, + const String & partition_id, + const IMergeTreeDataPart::MinMaxIndex & min_max_index, + const String & tmp_part_prefix, + const StorageMetadataPtr & my_metadata_snapshot, + const IDataPartStorage::ClonePartParams & clone_params, + ContextPtr local_context, + Int64 min_block, + Int64 max_block +) +{ + MergeTreePartInfo dst_part_info(partition_id, min_block, max_block, src_part->info.level); + + return MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression( + this, + src_part, + my_metadata_snapshot, + dst_part_info, + tmp_part_prefix, + local_context->getReadSettings(), + local_context->getWriteSettings(), + new_partition, + min_max_index, + false, + clone_params); +} + +std::pair MergeTreeData::createPartitionAndMinMaxIndexFromSourcePart( + const MergeTreeData::DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context) +{ + const auto & src_data = src_part->storage; - for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next()) - { - if (!params.files_to_copy_instead_of_hardlinks.contains(it->name()) - && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED - && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) - { - params.hardlinked_files->hardlinks_from_source_part.insert(it->name()); - } - } + auto metadata_manager = std::make_shared(src_part.get()); + IMergeTreeDataPart::MinMaxIndex min_max_index; - auto projections = src_part->getProjectionParts(); - for (const auto & [name, projection_part] : projections) - { - const auto & projection_storage = projection_part->getDataPartStorage(); - for (auto it = projection_storage.iterate(); it->isValid(); it->next()) - { - auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name(); - if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix) - && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED - && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) - { - params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix); - } - } - } - } + min_max_index.load(src_data, metadata_manager); - /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. - TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID; - dst_data_part->version.setCreationTID(tid, nullptr); - dst_data_part->storeVersionMetadata(); + MergeTreePartition new_partition; - dst_data_part->is_temp = true; + new_partition.create(metadata_snapshot, min_max_index.getBlock(src_data), 0u, local_context); - dst_data_part->loadColumnsChecksumsIndexes(require_part_metadata, true); - dst_data_part->modification_time = dst_part_storage->getLastModified().epochTime(); - return std::make_pair(dst_data_part, std::move(temporary_directory_lock)); + return {new_partition, min_max_index}; } String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const @@ -7803,21 +7741,39 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason) { - if (left->getProjectionParts().size() != right->getProjectionParts().size()) + auto remove_broken_parts_from_consideration = [](auto & parts) + { + std::set broken_projection_parts; + for (const auto & [name, part] : parts) + { + if (part->is_broken) + broken_projection_parts.emplace(name); + } + for (const auto & name : broken_projection_parts) + parts.erase(name); + }; + + auto left_projection_parts = left->getProjectionParts(); + auto right_projection_parts = right->getProjectionParts(); + + remove_broken_parts_from_consideration(left_projection_parts); + remove_broken_parts_from_consideration(right_projection_parts); + + if (left_projection_parts.size() != right_projection_parts.size()) { out_reason = fmt::format( "Parts have different number of projections: {} in part '{}' and {} in part '{}'", - left->getProjectionParts().size(), + left_projection_parts.size(), left->name, - right->getProjectionParts().size(), + right_projection_parts.size(), right->name ); return false; } - for (const auto & [name, _] : left->getProjectionParts()) + for (const auto & [name, _] : left_projection_parts) { - if (!right->hasProjection(name)) + if (!right_projection_parts.contains(name)) { out_reason = fmt::format( "The part '{}' doesn't have projection '{}' while part '{}' does", right->name, name, left->name @@ -7989,7 +7945,7 @@ bool MergeTreeData::insertQueryIdOrThrowNoLock(const String & query_id, size_t m throw Exception( ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries for table {}. Maximum is: {}", - *std::atomic_load(&log_name), + log.loadName(), max_queries); query_id_set.insert(query_id); return true; @@ -8181,7 +8137,7 @@ ReservationPtr MergeTreeData::balancedReservation( } // Record submerging big parts in the tagger to clean them up. - tagger_ptr->emplace(*this, part_name, std::move(covered_parts), log); + tagger_ptr->emplace(*this, part_name, std::move(covered_parts), log.load()); } } } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index f0dbaf0e307a..dfdc22baa8f1 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -231,6 +232,7 @@ class MergeTreeData : public IStorage, public WithMutableContext } }; + using DataParts = std::set; using MutableDataParts = std::set; using DataPartsVector = std::vector; @@ -461,14 +463,19 @@ class MergeTreeData : public IStorage, public WithMutableContext /// Load the set of data parts from disk. Call once - immediately after the object is created. void loadDataParts(bool skip_sanity_checks, std::optional> expected_parts); - String getLogName() const { return *std::atomic_load(&log_name); } + String getLogName() const { return log.loadName(); } Int64 getMaxBlockNumber() const; struct ProjectionPartsVector { - DataPartsVector projection_parts; DataPartsVector data_parts; + + DataPartsVector projection_parts; + DataPartStateVector projection_parts_states; + + DataPartsVector broken_projection_parts; + DataPartStateVector broken_projection_parts_states; }; /// Returns a copy of the list so that the caller shouldn't worry about locks. @@ -483,7 +490,7 @@ class MergeTreeData : public IStorage, public WithMutableContext const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const; /// Same as above but only returns projection parts ProjectionPartsVector getProjectionPartsVectorForInternalUsage( - const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const; + const DataPartStates & affordable_states, MergeTreeData::DataPartStateVector * out_states) const; /// Returns absolutely all parts (and snapshot of their states) @@ -848,6 +855,23 @@ class MergeTreeData : public IStorage, public WithMutableContext const ReadSettings & read_settings, const WriteSettings & write_settings); + std::pair cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( + const MergeTreeData::DataPartPtr & src_part, + const MergeTreePartition & new_partition, + const String & partition_id, + const IMergeTreeDataPart::MinMaxIndex & min_max_index, + const String & tmp_part_prefix, + const StorageMetadataPtr & my_metadata_snapshot, + const IDataPartStorage::ClonePartParams & clone_params, + ContextPtr local_context, + Int64 min_block, + Int64 max_block); + + static std::pair createPartitionAndMinMaxIndexFromSourcePart( + const MergeTreeData::DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context); + virtual std::vector getMutationsStatus() const = 0; /// Returns true if table can create new parts with adaptive granularity @@ -1114,10 +1138,7 @@ class MergeTreeData : public IStorage, public WithMutableContext /// Engine-specific methods BrokenPartCallback broken_part_callback; - /// log_name will change during table RENAME. Use atomic_shared_ptr to allow concurrent RW. - /// NOTE clang-14 doesn't have atomic_shared_ptr yet. Use std::atomic* operations for now. - std::shared_ptr log_name; - std::atomic log; + AtomicLogger log; /// Storage settings. /// Use get and set to receive readonly versions. @@ -1601,10 +1622,10 @@ struct CurrentlySubmergingEmergingTagger MergeTreeData & storage; String emerging_part_name; MergeTreeData::DataPartsVector submerging_parts; - Poco::Logger * log; + LoggerPtr log; CurrentlySubmergingEmergingTagger( - MergeTreeData & storage_, const String & name_, MergeTreeData::DataPartsVector && parts_, Poco::Logger * log_) + MergeTreeData & storage_, const String & name_, MergeTreeData::DataPartsVector && parts_, LoggerPtr log_) : storage(storage_), emerging_part_name(name_), submerging_parts(std::move(parts_)), log(log_) { } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 8c03aef6f99f..58fddde7b545 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -66,7 +66,7 @@ static const double DISK_USAGE_COEFFICIENT_TO_SELECT = 2; static const double DISK_USAGE_COEFFICIENT_TO_RESERVE = 1.1; MergeTreeDataMergerMutator::MergeTreeDataMergerMutator(MergeTreeData & data_) - : data(data_), log(&Poco::Logger::get(data.getLogName() + " (MergerMutator)")) + : data(data_), log(getLogger(data.getLogName() + " (MergerMutator)")) { } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 6eab0ee0c371..f3a3f51b6c3a 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -213,7 +213,7 @@ public : private: MergeTreeData & data; - Poco::Logger * log; + LoggerPtr log; /// When the last time you wrote to the log that the disk space was running out (not to write about this too often). time_t disk_space_warning_time = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 837b940e3542..d4980a67a43d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -54,6 +54,8 @@ struct MergeTreeDataPartChecksums bool has(const String & file_name) const { return files.find(file_name) != files.end(); } + bool remove(const String & file_name) { return files.erase(file_name); } + bool empty() const { return files.empty(); } /// Checks that the set of columns and their checksums are the same. If not, throws an exception. diff --git a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp new file mode 100644 index 000000000000..04019d2c6650 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp @@ -0,0 +1,319 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +namespace DistinctPartitionExpression +{ +std::unique_ptr updatePartitionFile( + const MergeTreeData & merge_tree_data, + const MergeTreePartition & partition, + const MergeTreeData::MutableDataPartPtr & dst_part, + IDataPartStorage & storage) +{ + storage.removeFile("partition.dat"); + // Leverage already implemented MergeTreePartition::store to create & store partition.dat. + // Checksum is re-calculated later. + return partition.store(merge_tree_data, storage, dst_part->checksums); +} + +IMergeTreeDataPart::MinMaxIndex::WrittenFiles updateMinMaxFiles( + const MergeTreeData & merge_tree_data, + const MergeTreeData::MutableDataPartPtr & dst_part, + IDataPartStorage & storage, + const StorageMetadataPtr & metadata_snapshot) +{ + for (const auto & column_name : MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->partition_key)) + { + auto file = "minmax_" + escapeForFileName(column_name) + ".idx"; + storage.removeFile(file); + } + + return dst_part->minmax_idx->store(merge_tree_data, storage, dst_part->checksums); +} + +void finalizeNewFiles(const std::vector> & files, bool sync_new_files) +{ + for (const auto & file : files) + { + file->finalize(); + if (sync_new_files) + file->sync(); + } +} + +void updateNewPartFiles( + const MergeTreeData & merge_tree_data, + const MergeTreeData::MutableDataPartPtr & dst_part, + const MergeTreePartition & new_partition, + const IMergeTreeDataPart::MinMaxIndex & new_min_max_index, + const StorageMetadataPtr & src_metadata_snapshot, + bool sync_new_files) +{ + auto & storage = dst_part->getDataPartStorage(); + + *dst_part->minmax_idx = new_min_max_index; + + auto partition_file = updatePartitionFile(merge_tree_data, new_partition, dst_part, storage); + + auto min_max_files = updateMinMaxFiles(merge_tree_data, dst_part, storage, src_metadata_snapshot); + + IMergeTreeDataPart::MinMaxIndex::WrittenFiles written_files; + + if (partition_file) + written_files.emplace_back(std::move(partition_file)); + + written_files.insert(written_files.end(), std::make_move_iterator(min_max_files.begin()), std::make_move_iterator(min_max_files.end())); + + finalizeNewFiles(written_files, sync_new_files); + + // MergeTreeDataPartCloner::finalize_part calls IMergeTreeDataPart::loadColumnsChecksumsIndexes, which will re-create + // the checksum file if it doesn't exist. Relying on that is cumbersome, but this refactoring is simply a code extraction + // with small improvements. It can be further improved in the future. + storage.removeFile("checksums.txt"); +} +} + +namespace +{ +bool doesStoragePolicyAllowSameDisk(MergeTreeData * merge_tree_data, const MergeTreeData::DataPartPtr & src_part) +{ + for (const DiskPtr & disk : merge_tree_data->getStoragePolicy()->getDisks()) + if (disk->getName() == src_part->getDataPartStorage().getDiskName()) + return true; + return false; +} + +DataPartStoragePtr flushPartStorageToDiskIfInMemory( + MergeTreeData * merge_tree_data, + const MergeTreeData::DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const String & tmp_part_prefix, + const String & tmp_dst_part_name, + scope_guard & src_flushed_tmp_dir_lock, + MergeTreeData::MutableDataPartPtr src_flushed_tmp_part) +{ + if (auto src_part_in_memory = asInMemoryPart(src_part)) + { + auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); + auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename(); + + src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name); + + auto flushed_part_storage = src_part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot); + + src_flushed_tmp_part = MergeTreeDataPartBuilder(*merge_tree_data, src_part->name, flushed_part_storage) + .withPartInfo(src_part->info) + .withPartFormatFromDisk() + .build(); + + src_flushed_tmp_part->is_temp = true; + + return flushed_part_storage; + } + + return src_part->getDataPartStoragePtr(); +} + +std::shared_ptr hardlinkAllFiles( + MergeTreeData * merge_tree_data, + const DB::ReadSettings & read_settings, + const DB::WriteSettings & write_settings, + const DataPartStoragePtr & storage, + const String & path, + const DB::IDataPartStorage::ClonePartParams & params) +{ + return storage->freeze( + merge_tree_data->getRelativeDataPath(), + path, + read_settings, + write_settings, + /*save_metadata_callback=*/{}, + params); +} + +std::pair cloneSourcePart( + MergeTreeData * merge_tree_data, + const MergeTreeData::DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + const DB::IDataPartStorage::ClonePartParams & params) +{ + const auto dst_part_name = src_part->getNewName(dst_part_info); + + const auto tmp_dst_part_name = tmp_part_prefix + dst_part_name; + + auto temporary_directory_lock = merge_tree_data->getTemporaryPartDirectoryHolder(tmp_dst_part_name); + + src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk()); + + scope_guard src_flushed_tmp_dir_lock; + MergeTreeData::MutableDataPartPtr src_flushed_tmp_part; + + auto src_part_storage = flushPartStorageToDiskIfInMemory( + merge_tree_data, src_part, metadata_snapshot, tmp_part_prefix, tmp_dst_part_name, src_flushed_tmp_dir_lock, src_flushed_tmp_part); + + auto dst_part_storage = hardlinkAllFiles(merge_tree_data, read_settings, write_settings, src_part_storage, tmp_dst_part_name, params); + + if (params.metadata_version_to_write.has_value()) + { + chassert(!params.keep_metadata_version); + auto out_metadata = dst_part_storage->writeFile( + IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, merge_tree_data->getContext()->getWriteSettings()); + writeText(metadata_snapshot->getMetadataVersion(), *out_metadata); + out_metadata->finalize(); + if (merge_tree_data->getSettings()->fsync_after_insert) + out_metadata->sync(); + } + + LOG_DEBUG( + &Poco::Logger::get("MergeTreeDataPartCloner"), + "Clone {} part {} to {}{}", + src_flushed_tmp_part ? "flushed" : "", + src_part_storage->getFullPath(), + std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name), + false); + + + auto part = MergeTreeDataPartBuilder(*merge_tree_data, dst_part_name, dst_part_storage).withPartFormatFromDisk().build(); + + return std::make_pair(part, std::move(temporary_directory_lock)); +} + +void handleHardLinkedParameterFiles(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params) +{ + const auto & hardlinked_files = params.hardlinked_files; + + hardlinked_files->source_part_name = src_part->name; + hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); + + for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next()) + { + if (!params.files_to_copy_instead_of_hardlinks.contains(it->name()) + && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED + && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) + { + hardlinked_files->hardlinks_from_source_part.insert(it->name()); + } + } +} + +void handleProjections(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params) +{ + auto projections = src_part->getProjectionParts(); + for (const auto & [name, projection_part] : projections) + { + const auto & projection_storage = projection_part->getDataPartStorage(); + for (auto it = projection_storage.iterate(); it->isValid(); it->next()) + { + auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name(); + if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix) + && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED + && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) + { + params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix); + } + } + } +} + +MergeTreeData::MutableDataPartPtr finalizePart( + const MergeTreeData::MutableDataPartPtr & dst_part, const DB::IDataPartStorage::ClonePartParams & params, bool require_part_metadata) +{ + /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. + TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID; + dst_part->version.setCreationTID(tid, nullptr); + dst_part->storeVersionMetadata(); + + dst_part->is_temp = true; + + dst_part->loadColumnsChecksumsIndexes(require_part_metadata, true); + + dst_part->modification_time = dst_part->getDataPartStorage().getLastModified().epochTime(); + + return dst_part; +} + +std::pair cloneAndHandleHardlinksAndProjections( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + const IDataPartStorage::ClonePartParams & params) +{ + chassert(!merge_tree_data->isStaticStorage()); + if (!doesStoragePolicyAllowSameDisk(merge_tree_data, src_part)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Could not clone and load part {} because disk does not belong to storage policy", + quoteString(src_part->getDataPartStorage().getFullPath())); + + auto [destination_part, temporary_directory_lock] = cloneSourcePart( + merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); + + if (!params.copy_instead_of_hardlink && params.hardlinked_files) + { + handleHardLinkedParameterFiles(src_part, params); + handleProjections(src_part, params); + } + + return std::make_pair(destination_part, std::move(temporary_directory_lock)); +} +} + +std::pair MergeTreeDataPartCloner::clone( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + bool require_part_metadata, + const IDataPartStorage::ClonePartParams & params, + const ReadSettings & read_settings, + const WriteSettings & write_settings) +{ + auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections( + merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); + + return std::make_pair(finalizePart(destination_part, params, require_part_metadata), std::move(temporary_directory_lock)); +} + +std::pair MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + const MergeTreePartition & new_partition, + const IMergeTreeDataPart::MinMaxIndex & new_min_max_index, + bool sync_new_files, + const IDataPartStorage::ClonePartParams & params) +{ + auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections( + merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); + + DistinctPartitionExpression::updateNewPartFiles( + *merge_tree_data, destination_part, new_partition, new_min_max_index, src_part->storage.getInMemoryMetadataPtr(), sync_new_files); + + return std::make_pair(finalizePart(destination_part, params, false), std::move(temporary_directory_lock)); +} + +} diff --git a/src/Storages/MergeTree/MergeTreeDataPartCloner.h b/src/Storages/MergeTree/MergeTreeDataPartCloner.h new file mode 100644 index 000000000000..53585f20b7f0 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeDataPartCloner.h @@ -0,0 +1,43 @@ +#pragma once + +namespace DB +{ + +struct StorageInMemoryMetadata; +using StorageMetadataPtr = std::shared_ptr; +struct MergeTreePartition; +class IMergeTreeDataPart; + +class MergeTreeDataPartCloner +{ +public: + using DataPart = IMergeTreeDataPart; + using MutableDataPartPtr = std::shared_ptr; + using DataPartPtr = std::shared_ptr; + + static std::pair clone( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + bool require_part_metadata, + const IDataPartStorage::ClonePartParams & params, + const ReadSettings & read_settings, + const WriteSettings & write_settings); + + static std::pair cloneWithDistinctPartitionExpression( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + const MergeTreePartition & new_partition, + const IMergeTreeDataPart::MinMaxIndex & new_min_max_index, + bool sync_new_files, + const IDataPartStorage::ClonePartParams & params); +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 66f593bbf331..a76d370d057f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -74,7 +74,7 @@ namespace ErrorCodes MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & data_) - : data(data_), log(&Poco::Logger::get(data.getLogName() + " (SelectExecutor)")) + : data(data_), log(getLogger(data.getLogName() + " (SelectExecutor)")) { } @@ -83,7 +83,7 @@ size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, const Settings & settings, - Poco::Logger * log) + LoggerPtr log) { size_t rows_count = 0; @@ -167,7 +167,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool sample_factor_column_queried, - Poco::Logger * log) + LoggerPtr log) { const Settings & settings = context->getSettingsRef(); /// Sampling. @@ -503,7 +503,7 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( const MergeTreeData & data, const ContextPtr & context, const PartitionIdToMaxBlock * max_block_numbers_to_read, - Poco::Logger * log, + LoggerPtr log, ReadFromMergeTree::IndexStats & index_stats) { chassert(alter_conversions.empty() || parts.size() == alter_conversions.size()); @@ -590,7 +590,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd const std::optional & part_offset_condition, const UsefulSkipIndexes & skip_indexes, const MergeTreeReaderSettings & reader_settings, - Poco::Logger * log, + LoggerPtr log, size_t num_streams, ReadFromMergeTree::IndexStats & index_stats, bool use_skip_indexes) @@ -1082,7 +1082,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( const KeyCondition & key_condition, const std::optional & part_offset_condition, const Settings & settings, - Poco::Logger * log) + LoggerPtr log) { MarkRanges res; @@ -1322,7 +1322,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( const MergeTreeReaderSettings & reader_settings, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, - Poco::Logger * log) + LoggerPtr log) { if (!index_helper->getDeserializedFormat(part->getDataPartStorage(), index_helper->getFileName())) { @@ -1440,7 +1440,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex( const MergeTreeReaderSettings & reader_settings, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, - Poco::Logger * log) + LoggerPtr log) { for (const auto & index_helper : indices) { @@ -1596,7 +1596,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context, PartFilterCounters & counters, - Poco::Logger * log) + LoggerPtr log) { /// process_parts prepare parts that have to be read for the query, /// returns false if duplicated parts' UUID have been met diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index ba1f20054f08..17975354187e 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -71,11 +71,11 @@ class MergeTreeDataSelectExecutor const KeyCondition & key_condition, const std::optional & part_offset_condition, const Settings & settings, - Poco::Logger * log); + LoggerPtr log); private: const MergeTreeData & data; - Poco::Logger * log; + LoggerPtr log; /// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index. static size_t getApproximateTotalRowsToRead( @@ -83,7 +83,7 @@ class MergeTreeDataSelectExecutor const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, const Settings & settings, - Poco::Logger * log); + LoggerPtr log); static MarkRanges filterMarksUsingIndex( MergeTreeIndexPtr index_helper, @@ -94,7 +94,7 @@ class MergeTreeDataSelectExecutor const MergeTreeReaderSettings & reader_settings, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, - Poco::Logger * log); + LoggerPtr log); static MarkRanges filterMarksUsingMergedIndex( MergeTreeIndices indices, @@ -105,7 +105,7 @@ class MergeTreeDataSelectExecutor const MergeTreeReaderSettings & reader_settings, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, - Poco::Logger * log); + LoggerPtr log); struct PartFilterCounters { @@ -141,7 +141,7 @@ class MergeTreeDataSelectExecutor const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context, PartFilterCounters & counters, - Poco::Logger * log); + LoggerPtr log); public: /// For given number rows and bytes, get the number of marks to read. @@ -184,7 +184,7 @@ class MergeTreeDataSelectExecutor const MergeTreeData & data, const ContextPtr & context, const PartitionIdToMaxBlock * max_block_numbers_to_read, - Poco::Logger * log, + LoggerPtr log, ReadFromMergeTree::IndexStats & index_stats); /// Filter parts using primary key and secondary indexes. @@ -199,7 +199,7 @@ class MergeTreeDataSelectExecutor const std::optional & part_offset_condition, const UsefulSkipIndexes & skip_indexes, const MergeTreeReaderSettings & reader_settings, - Poco::Logger * log, + LoggerPtr log, size_t num_streams, ReadFromMergeTree::IndexStats & index_stats, bool use_skip_indexes); @@ -216,7 +216,7 @@ class MergeTreeDataSelectExecutor const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool sample_factor_column_queried, - Poco::Logger * log); + LoggerPtr log); /// Check query limits: max_partitions_to_read, max_concurrent_queries. /// Also, return QueryIdHolder. If not null, we should keep it until query finishes. diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 3c0b2d2b42e9..ce3015c5dcb7 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -115,7 +115,7 @@ void buildScatterSelector( if (max_parts && partitions_count >= max_parts && !throw_on_limit) { const auto & client_info = context->getClientInfo(); - Poco::Logger * log = &Poco::Logger::get("MergeTreeDataWriter"); + LoggerPtr log = getLogger("MergeTreeDataWriter"); LOG_WARNING(log, "INSERT query from initial_user {} (query ID: {}) inserted a block " "that created parts in {} partitions. This is being logged " @@ -335,7 +335,7 @@ Block MergeTreeDataWriter::mergeBlock( case MergeTreeData::MergingParams::Collapsing: return std::make_shared( block, 1, sort_description, merging_params.sign_column, - false, block_size + 1, /*block_size_bytes=*/0, &Poco::Logger::get("MergeTreeDataWriter")); + false, block_size + 1, /*block_size_bytes=*/0, getLogger("MergeTreeDataWriter")); case MergeTreeData::MergingParams::Summing: return std::make_shared( block, 1, sort_description, merging_params.columns_to_sum, @@ -618,7 +618,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( bool is_temp, IMergeTreeDataPart * parent_part, const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection) { @@ -729,7 +729,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPart( const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection, IMergeTreeDataPart * parent_part) @@ -748,7 +748,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPart( /// projection part merges. MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempProjectionPart( const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection, IMergeTreeDataPart * parent_part, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 2fb6b1f22d43..8fb8b82dbe6a 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -45,8 +45,9 @@ class MergeTreeDataWriter public: explicit MergeTreeDataWriter(MergeTreeData & data_) : data(data_) - , log(&Poco::Logger::get(data.getLogName() + " (Writer)")) - {} + , log(getLogger(data.getLogName() + " (Writer)")) + { + } /** Split the block to blocks, each of them must be written as separate part. * (split rows by partition) @@ -91,7 +92,7 @@ class MergeTreeDataWriter /// For insertion. static TemporaryPart writeProjectionPart( const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection, IMergeTreeDataPart * parent_part); @@ -99,7 +100,7 @@ class MergeTreeDataWriter /// For mutation: MATERIALIZE PROJECTION. static TemporaryPart writeTempProjectionPart( const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection, IMergeTreeDataPart * parent_part, @@ -126,12 +127,12 @@ class MergeTreeDataWriter bool is_temp, IMergeTreeDataPart * parent_part, const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection); MergeTreeData & data; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index ddeaf69136a3..76ef3be25b3a 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -467,6 +467,45 @@ void MergeTreePartition::create(const StorageMetadataPtr & metadata_snapshot, Bl } } +void MergeTreePartition::createAndValidateMinMaxPartitionIds( + const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context) +{ + if (!metadata_snapshot->hasPartitionKey()) + return; + + auto partition_key_names_and_types = executePartitionByExpression(metadata_snapshot, block_with_min_max_partition_ids, context); + value.resize(partition_key_names_and_types.size()); + + /// Executing partition_by expression adds new columns to passed block according to partition functions. + /// The block is passed by reference and is used afterwards. `moduloLegacy` needs to be substituted back + /// with just `modulo`, because it was a temporary substitution. + static constexpr std::string_view modulo_legacy_function_name = "moduloLegacy"; + + size_t i = 0; + for (const auto & element : partition_key_names_and_types) + { + auto & partition_column = block_with_min_max_partition_ids.getByName(element.name); + + if (element.name.starts_with(modulo_legacy_function_name)) + partition_column.name.replace(0, modulo_legacy_function_name.size(), "modulo"); + + Field extracted_min_partition_id_field; + Field extracted_max_partition_id_field; + + partition_column.column->get(0, extracted_min_partition_id_field); + partition_column.column->get(1, extracted_max_partition_id_field); + + if (extracted_min_partition_id_field != extracted_max_partition_id_field) + { + throw Exception( + ErrorCodes::INVALID_PARTITION_VALUE, + "Can not create the partition. A partition can not contain values that have different partition ids"); + } + + partition_column.column->get(0u, value[i++]); + } +} + NamesAndTypesList MergeTreePartition::executePartitionByExpression(const StorageMetadataPtr & metadata_snapshot, Block & block, ContextPtr context) { auto adjusted_partition_key = adjustPartitionKey(metadata_snapshot, context); diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index 78b141f26ec5..fd7ae02cde4d 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -1,11 +1,12 @@ #pragma once -#include +#include #include #include #include #include -#include +#include +#include namespace DB { @@ -51,6 +52,11 @@ struct MergeTreePartition void create(const StorageMetadataPtr & metadata_snapshot, Block block, size_t row, ContextPtr context); + /// Copy of MergeTreePartition::create, but also validates if min max partition keys are equal. If they are different, + /// it means the partition can't be created because the data doesn't belong to the same partition. + void createAndValidateMinMaxPartitionIds( + const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context); + static void appendFiles(const MergeTreeData & storage, Strings & files); /// Adjust partition key and execute its expression on block. Return sample block according to used expression. diff --git a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp new file mode 100644 index 000000000000..21bcdb84a960 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +namespace +{ +bool isDestinationPartitionExpressionMonotonicallyIncreasing( + const std::vector & hyperrectangle, const MergeTreeData & destination_storage) +{ + auto destination_table_metadata = destination_storage.getInMemoryMetadataPtr(); + + auto key_description = destination_table_metadata->getPartitionKey(); + auto definition_ast = key_description.definition_ast->clone(); + + auto table_identifier = std::make_shared(destination_storage.getStorageID().getTableName()); + auto table_with_columns + = TableWithColumnNamesAndTypes{DatabaseAndTableWithAlias(table_identifier), destination_table_metadata->getColumns().getOrdinary()}; + + auto expression_list = extractKeyExpressionList(definition_ast); + + MonotonicityCheckVisitor::Data data{{table_with_columns}, destination_storage.getContext(), /*group_by_function_hashes*/ {}}; + + for (auto i = 0u; i < expression_list->children.size(); i++) + { + data.range = hyperrectangle[i]; + + MonotonicityCheckVisitor(data).visit(expression_list->children[i]); + + if (!data.monotonicity.is_monotonic || !data.monotonicity.is_positive) + return false; + } + + return true; +} + +bool isExpressionDirectSubsetOf(const ASTPtr source, const ASTPtr destination) +{ + auto source_expression_list = extractKeyExpressionList(source); + auto destination_expression_list = extractKeyExpressionList(destination); + + std::unordered_set source_columns; + + for (auto i = 0u; i < source_expression_list->children.size(); ++i) + source_columns.insert(source_expression_list->children[i]->getColumnName()); + + for (auto i = 0u; i < destination_expression_list->children.size(); ++i) + if (!source_columns.contains(destination_expression_list->children[i]->getColumnName())) + return false; + + return true; +} +} + +void MergeTreePartitionCompatibilityVerifier::verify( + const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts) +{ + const auto source_metadata = source_storage.getInMemoryMetadataPtr(); + const auto destination_metadata = destination_storage.getInMemoryMetadataPtr(); + + const auto source_partition_key_ast = source_metadata->getPartitionKeyAST(); + const auto destination_partition_key_ast = destination_metadata->getPartitionKeyAST(); + + // If destination partition expression columns are a subset of source partition expression columns, + // there is no need to check for monotonicity. + if (isExpressionDirectSubsetOf(source_partition_key_ast, destination_partition_key_ast)) + return; + + const auto src_global_min_max_indexes = MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(source_parts, destination_storage); + + assert(!src_global_min_max_indexes.hyperrectangle.empty()); + + if (!isDestinationPartitionExpressionMonotonicallyIncreasing(src_global_min_max_indexes.hyperrectangle, destination_storage)) + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Destination table partition expression is not monotonically increasing"); + + MergeTreePartition().createAndValidateMinMaxPartitionIds( + destination_storage.getInMemoryMetadataPtr(), + src_global_min_max_indexes.getBlock(destination_storage), + destination_storage.getContext()); +} + +} diff --git a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h new file mode 100644 index 000000000000..1682add3ebde --- /dev/null +++ b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/* + * Verifies that source and destination partitions are compatible. + * To be compatible, one of the following criteria must be met: + * 1. Destination partition expression columns are a subset of source partition columns; or + * 2. Destination partition expression is monotonic on the source global min_max idx Range AND the computer partition id for + * the source global min_max idx range is the same. + * + * If not, an exception is thrown. + * */ + +class MergeTreePartitionCompatibilityVerifier +{ +public: + using DataPart = IMergeTreeDataPart; + using DataPartPtr = std::shared_ptr; + using DataPartsVector = std::vector; + + static void + verify(const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts); +}; + +} diff --git a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp new file mode 100644 index 000000000000..0871efadf0ca --- /dev/null +++ b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp @@ -0,0 +1,25 @@ +#include + +namespace DB +{ + +IMergeTreeDataPart::MinMaxIndex +MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(const DataPartsVector & parts, const MergeTreeData & storage) +{ + IMergeTreeDataPart::MinMaxIndex global_min_max_indexes; + + for (const auto & part : parts) + { + auto metadata_manager = std::make_shared(part.get()); + + auto local_min_max_index = MergeTreeData::DataPart::MinMaxIndex(); + + local_min_max_index.load(storage, metadata_manager); + + global_min_max_indexes.merge(local_min_max_index); + } + + return global_min_max_indexes; +} + +} diff --git a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h new file mode 100644 index 000000000000..4f2711772469 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +#include +#include + +namespace DB +{ + +/* + * Calculates global min max indexes for a given set of parts on given storage. + * */ +class MergeTreePartitionGlobalMinMaxIdxCalculator +{ + using DataPart = IMergeTreeDataPart; + using DataPartPtr = std::shared_ptr; + using DataPartsVector = std::vector; + +public: + static IMergeTreeDataPart::MinMaxIndex calculate(const DataPartsVector & parts, const MergeTreeData & storage); +}; + +} diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h index b9109e51309c..43d8ebdd6d34 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.h +++ b/src/Storages/MergeTree/MergeTreePartsMover.h @@ -48,7 +48,7 @@ class MergeTreePartsMover explicit MergeTreePartsMover(MergeTreeData * data_) : data(data_) - , log(&Poco::Logger::get("MergeTreePartsMover")) + , log(getLogger("MergeTreePartsMover")) { } @@ -81,7 +81,7 @@ class MergeTreePartsMover private: MergeTreeData * data; - Poco::Logger * log; + LoggerPtr log; }; diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 3f9632637b66..47c2fe07bb47 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -128,7 +128,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( context_) , WithContext(context_) , prefetch_threadpool(getContext()->getPrefetchThreadpool()) - , log(&Poco::Logger::get("MergeTreePrefetchedReadPool(" + (parts_ranges.empty() ? "" : parts_ranges.front().data_part->storage.getStorageID().getNameForLogs()) + ")")) + , log(getLogger("MergeTreePrefetchedReadPool(" + (parts_ranges.empty() ? "" : parts_ranges.front().data_part->storage.getStorageID().getNameForLogs()) + ")")) { /// Tasks creation might also create a lost of readers - check they do not /// do any time consuming operations in ctor. diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index 9925d4e2fa4f..378034c5eae5 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -122,7 +122,7 @@ class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase, private WithCo TasksPerThread per_thread_tasks; std::priority_queue prefetch_queue; /// the smallest on top bool started_prefetches = false; - Poco::Logger * log; + LoggerPtr log; /// A struct which allows to track max number of tasks which were in the /// threadpool simultaneously (similar to CurrentMetrics, but the result diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 04d421389634..79ed18f4d1f4 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -231,7 +231,7 @@ class MergeTreeRangeReader using RangesInfo = std::vector; - explicit ReadResult(Poco::Logger * log_) : log(log_) {} + explicit ReadResult(LoggerPtr log_) : log(log_) {} static size_t getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges); @@ -298,7 +298,7 @@ class MergeTreeRangeReader size_t countZeroTails(const IColumn::Filter & filter, NumRows & zero_tails, bool can_read_incomplete_granules) const; static size_t numZerosInTail(const UInt8 * begin, const UInt8 * end); - Poco::Logger * log; + LoggerPtr log; }; ReadResult read(size_t max_rows, MarkRanges & ranges); @@ -325,7 +325,7 @@ class MergeTreeRangeReader bool is_initialized = false; Names non_const_virtual_column_names; - Poco::Logger * log = &Poco::Logger::get("MergeTreeRangeReader"); + LoggerPtr log = getLogger("MergeTreeRangeReader"); }; } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index 3a1af947cae1..e45ccad912f1 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -108,7 +108,7 @@ class MergeTreeReadPool : public MergeTreeReadPoolBase std::vector threads_tasks; std::set remaining_thread_tasks; - Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPool"); + LoggerPtr log = getLogger("MergeTreeReadPool"); }; } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index 7579a892b67c..6a548dffe374 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -34,7 +34,7 @@ class MergeTreeReadPoolParallelReplicas : public MergeTreeReadPoolBase const CoordinationMode coordination_mode; RangesInDataPartsDescription buffered_ranges; bool no_more_tasks_available{false}; - Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPoolParallelReplicas"); + LoggerPtr log = getLogger("MergeTreeReadPoolParallelReplicas"); }; } diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp index dcfed700fac2..41c7531b6a66 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.cpp +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -184,7 +184,11 @@ MergeTreeReadTask::BlockAndProgress MergeTreeReadTask::read(const BlockSizeParam Block block; if (read_result.num_rows != 0) + { + for (const auto & column : read_result.columns) + column->assumeMutableRef().shrinkToFit(); block = sample_block.cloneWithColumns(read_result.columns); + } BlockAndProgress res = { .block = std::move(block), diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index cf1a6313b514..b06ae788e91d 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -114,7 +114,7 @@ class MergeTreeSelectProcessor : private boost::noncopyable /// Should we add part level to produced chunk. Part level is useful for next steps if query has FINAL bool add_part_level = false; - Poco::Logger * log = &Poco::Logger::get("MergeTreeSelectProcessor"); + LoggerPtr log = getLogger("MergeTreeSelectProcessor"); std::atomic is_cancelled{false}; }; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 82e9f8fd2db8..d0fbc3160246 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -68,7 +68,7 @@ class MergeTreeSequentialSource : public ISource /// Should read using direct IO bool read_with_direct_io; - Poco::Logger * log = &Poco::Logger::get("MergeTreeSequentialSource"); + LoggerPtr log = getLogger("MergeTreeSequentialSource"); std::optional mark_ranges; @@ -140,6 +140,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( if (storage.supportsSubcolumns()) options.withSubcolumns(); + columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read); } else @@ -156,6 +157,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( read_settings.local_fs_method = LocalFSReadMethod::pread; if (read_with_direct_io) read_settings.direct_io_threshold = 1; + /// Configure throttling switch (type) { @@ -224,7 +226,10 @@ try for (size_t i = 0; i < num_columns; ++i) { if (header.has(it->name)) + { + columns[i]->assumeMutableRef().shrinkToFit(); res_columns.emplace_back(std::move(columns[i])); + } ++it; } @@ -318,7 +323,7 @@ class ReadFromPart final : public ISourceStep bool apply_deleted_mask_, ActionsDAGPtr filter_, ContextPtr context_, - Poco::Logger * log_) + LoggerPtr log_) : ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}) , type(type_) , storage(storage_) @@ -381,7 +386,7 @@ class ReadFromPart final : public ISourceStep bool apply_deleted_mask; ActionsDAGPtr filter; ContextPtr context; - Poco::Logger * log; + LoggerPtr log; }; void createReadFromPartStep( @@ -394,7 +399,7 @@ void createReadFromPartStep( bool apply_deleted_mask, ActionsDAGPtr filter, ContextPtr context, - Poco::Logger * log) + LoggerPtr log) { auto reading = std::make_unique(type, storage, storage_snapshot, std::move(data_part), diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index 41def48aab6a..a5e36a7726ff 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -41,6 +41,6 @@ void createReadFromPartStep( bool apply_deleted_mask, ActionsDAGPtr filter, ContextPtr context, - Poco::Logger * log); + LoggerPtr log); } diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 153930b400d3..b42da22239eb 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -65,7 +65,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr conte if (ast && isDiskFunction(ast)) { auto disk_name = getOrCreateDiskFromDiskAST(ast, context, is_attach); - LOG_TRACE(&Poco::Logger::get("MergeTreeSettings"), "Created custom disk {}", disk_name); + LOG_TRACE(getLogger("MergeTreeSettings"), "Created custom disk {}", disk_name); value = disk_name; } } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 0cac051bb2cf..4aecf85ac2a0 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -56,7 +56,7 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( const ConditionEstimator & estimator_, const Names & queried_columns_, const std::optional & supported_columns_, - Poco::Logger * log_) + LoggerPtr log_) : estimator(estimator_) , table_columns{collections::map( metadata_snapshot->getColumns().getAllPhysical(), [](const NameAndTypePair & col) { return col.name; })} @@ -132,8 +132,8 @@ std::optional MergeTreeWhe if (!optimize_result) return {}; - auto filter_actions = reconstructDAG(optimize_result->where_conditions, context); - auto prewhere_filter_actions = reconstructDAG(optimize_result->prewhere_conditions, context); + auto filter_actions = reconstructDAG(optimize_result->where_conditions); + auto prewhere_filter_actions = reconstructDAG(optimize_result->prewhere_conditions); FilterActionsOptimizeResult result = { std::move(filter_actions), std::move(prewhere_filter_actions) }; return result; @@ -343,7 +343,7 @@ ASTPtr MergeTreeWhereOptimizer::reconstructAST(const Conditions & conditions) return function; } -ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditions, const ContextPtr & context) +ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditions) { if (conditions.empty()) return {}; @@ -354,7 +354,7 @@ ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditi for (const auto & condition : conditions) filter_nodes.push_back(condition.node.getDAGNode()); - return ActionsDAG::buildFilterActionsDAG(filter_nodes, {} /*node_name_to_input_node_column*/, context); + return ActionsDAG::buildFilterActionsDAG(filter_nodes); } std::optional MergeTreeWhereOptimizer::optimizeImpl(const RPNBuilderTreeNode & node, diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 0ef7ac9efff3..b56219e3c590 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -41,7 +41,7 @@ class MergeTreeWhereOptimizer : private boost::noncopyable const ConditionEstimator & estimator_, const Names & queried_columns_, const std::optional & supported_columns_, - Poco::Logger * log_); + LoggerPtr log_); void optimize(SelectQueryInfo & select_query_info, const ContextPtr & context) const; @@ -123,7 +123,7 @@ class MergeTreeWhereOptimizer : private boost::noncopyable static ASTPtr reconstructAST(const Conditions & conditions); /// Reconstruct DAG from conditions - static ActionsDAGPtr reconstructDAG(const Conditions & conditions, const ContextPtr & context); + static ActionsDAGPtr reconstructDAG(const Conditions & conditions); void optimizeArbitrary(ASTSelectQuery & select) const; @@ -156,7 +156,7 @@ class MergeTreeWhereOptimizer : private boost::noncopyable const std::optional supported_columns; const NameSet sorting_key_names; const NameToIndexMap primary_key_names_positions; - Poco::Logger * log; + LoggerPtr log; std::unordered_map column_sizes; UInt64 total_size_of_queried_columns = 0; }; diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index a8b3df483ed5..2236c1a93805 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -36,7 +36,7 @@ MergeTreeWriteAheadLog::MergeTreeWriteAheadLog( , name(name_) , path(storage.getRelativeDataPath() + name_) , pool(storage.getContext()->getSchedulePool()) - , log(&Poco::Logger::get(storage.getLogName() + " (WriteAheadLog)")) + , log(getLogger(storage.getLogName() + " (WriteAheadLog)")) { init(); sync_task = pool.createTask("MergeTreeWriteAheadLog::sync", [this] diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index 5fb9dd907a17..9550fa6ecee5 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -99,7 +99,7 @@ class MergeTreeWriteAheadLog mutable std::mutex write_mutex; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 8b34c221eec6..1d10a1433efd 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -155,7 +155,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( for (const auto & name : checksums_to_remove) checksums.files.erase(name); - LOG_TRACE(&Poco::Logger::get("MergedBlockOutputStream"), "filled checksums {}", new_part->getNameWithState()); + LOG_TRACE(getLogger("MergedBlockOutputStream"), "filled checksums {}", new_part->getNameWithState()); for (const auto & [projection_name, projection_part] : new_part->getProjectionParts()) checksums.addFile( diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.h b/src/Storages/MergeTree/MutateFromLogEntryTask.h index 42d8307e948e..68c7f4642148 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.h +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.h @@ -23,7 +23,7 @@ class MutateFromLogEntryTask : public ReplicatedMergeMutateTaskBase StorageReplicatedMergeTree & storage_, Callback && task_result_callback_) : ReplicatedMergeMutateTaskBase( - &Poco::Logger::get(storage_.getStorageID().getShortName() + "::" + selected_entry_->log_entry->new_part_name + " (MutateFromLogEntryTask)"), + getLogger(storage_.getStorageID().getShortName() + "::" + selected_entry_->log_entry->new_part_name + " (MutateFromLogEntryTask)"), storage_, selected_entry_, task_result_callback_) diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index bf8e879e3d07..0b19aebe36d6 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -111,7 +111,7 @@ bool MutatePlainMergeTreeTask::executeStep() if (merge_mutate_entry->txn) merge_mutate_entry->txn->onException(); PreformattedMessage exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); - LOG_ERROR(&Poco::Logger::get("MutatePlainMergeTreeTask"), exception_message); + LOG_ERROR(getLogger("MutatePlainMergeTreeTask"), exception_message); storage.updateMutationEntriesErrors(future_part, false, exception_message.text); write_part_log(ExecutionStatus::fromCurrentException("", true)); tryLogCurrentException(__PRETTY_FUNCTION__); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index e4070aa82626..8eb23dd41fa5 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -21,7 +21,9 @@ #include #include #include +#include #include +#include #include #include @@ -61,7 +63,7 @@ static void splitAndModifyMutationCommands( const MutationCommands & commands, MutationCommands & for_interpreter, MutationCommands & for_file_renames, - Poco::Logger * log) + LoggerPtr log) { auto part_columns = part->getColumnsDescription(); @@ -308,6 +310,15 @@ getColumnsForNewDataPart( } } + if (!storage_columns_set.contains(BlockNumberColumn::name)) + { + if (source_part->tryGetSerialization(BlockNumberColumn::name) != nullptr) + { + storage_columns.push_back({BlockNumberColumn::name, BlockNumberColumn::type}); + storage_columns_set.insert(BlockNumberColumn::name); + } + } + SerializationInfoByName new_serialization_infos; for (const auto & [name, old_info] : serialization_infos) { @@ -540,7 +551,9 @@ static std::set getProjectionsToRecalculate( { bool need_recalculate = materialized_projections.contains(projection.name) - || (!is_full_part_storage && source_part->hasProjection(projection.name)); + || (!is_full_part_storage + && source_part->hasProjection(projection.name) + && !source_part->hasBrokenProjection(projection.name)); if (need_recalculate) projections_to_recalc.insert(&projection); @@ -674,15 +687,25 @@ static NameToNameVector collectFilesForRenames( { if (command.type == MutationCommand::Type::DROP_INDEX) { - if (source_part->checksums.has(INDEX_FILE_PREFIX + command.column_name + ".idx2")) + static const std::array suffixes = {".idx2", ".idx"}; + static const std::array gin_suffixes = {".gin_dict", ".gin_post", ".gin_seg", ".gin_sid"}; /// .gin_* is inverted index + + for (const auto & suffix : suffixes) { - add_rename(INDEX_FILE_PREFIX + command.column_name + ".idx2", ""); - add_rename(INDEX_FILE_PREFIX + command.column_name + mrk_extension, ""); + const String filename = INDEX_FILE_PREFIX + command.column_name + suffix; + const String filename_mrk = INDEX_FILE_PREFIX + command.column_name + mrk_extension; + + if (source_part->checksums.has(filename)) + { + add_rename(filename, ""); + add_rename(filename_mrk, ""); + } } - else if (source_part->checksums.has(INDEX_FILE_PREFIX + command.column_name + ".idx")) + for (const auto & gin_suffix : gin_suffixes) { - add_rename(INDEX_FILE_PREFIX + command.column_name + ".idx", ""); - add_rename(INDEX_FILE_PREFIX + command.column_name + mrk_extension, ""); + const String filename = INDEX_FILE_PREFIX + command.column_name + gin_suffix; + if (source_part->checksums.has(filename)) + add_rename(filename, ""); } } else if (command.type == MutationCommand::Type::DROP_PROJECTION) @@ -874,7 +897,8 @@ void finalizeMutatedPart( new_data_part->modification_time = time(nullptr); /// Load rest projections which are hardlinked - new_data_part->loadProjections(false, false, true /* if_not_loaded */); + bool noop; + new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */); /// All information about sizes is stored in checksums. /// It doesn't make sense to touch filesystem for sizes. @@ -896,7 +920,7 @@ struct MutationContext TableLockHolder * holder; MergeListEntry * mutate_entry; - Poco::Logger * log{&Poco::Logger::get("MutateTask")}; + LoggerPtr log{getLogger("MutateTask")}; FutureMergedMutatedPartPtr future_part; MergeTreeData::DataPartPtr source_part; @@ -975,7 +999,7 @@ class MergeProjectionPartsTask : public IExecutableTask , projection(projection_) , block_num(block_num_) , ctx(ctx_) - , log(&Poco::Logger::get("MergeProjectionPartsTask")) + , log(getLogger("MergeProjectionPartsTask")) { LOG_DEBUG(log, "Selected {} projection_parts from {} to {}", parts.size(), parts.front()->name, parts.back()->name); level_parts[current_level] = std::move(parts); @@ -1079,7 +1103,7 @@ class MergeProjectionPartsTask : public IExecutableTask size_t & block_num; MutationContextPtr ctx; - Poco::Logger * log; + LoggerPtr log; std::map level_parts; size_t current_level = 0; @@ -1451,7 +1475,9 @@ class MutateAllPartColumnsTask : public IExecutableTask bool need_recalculate = ctx->materialized_projections.contains(projection.name) - || (!is_full_part_storage && ctx->source_part->hasProjection(projection.name)); + || (!is_full_part_storage + && ctx->source_part->hasProjection(projection.name) + && !ctx->source_part->hasBrokenProjection(projection.name)); if (need_recalculate) { @@ -1575,8 +1601,9 @@ class MutateAllPartColumnsTask : public IExecutableTask void finalize() { + bool noop; ctx->new_data_part->minmax_idx = std::move(ctx->minmax_idx); - ctx->new_data_part->loadProjections(false, false, true /* if_not_loaded */); + ctx->new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */); ctx->mutating_executor.reset(); ctx->mutating_pipeline.reset(); @@ -1921,7 +1948,7 @@ static bool canSkipConversionToNullable(const MergeTreeDataPartPtr & part, const if (!part_column) return false; - /// For ALTER MODIFY COLUMN from 'Type' to 'Nullable(Type)' we can skip mutatation and + /// For ALTER MODIFY COLUMN from 'Type' to 'Nullable(Type)' we can skip mutation and /// apply only metadata conversion. But it doesn't work for custom serialization. const auto * to_nullable = typeid_cast(command.data_type.get()); if (!to_nullable) @@ -1937,6 +1964,20 @@ static bool canSkipConversionToNullable(const MergeTreeDataPartPtr & part, const return true; } +static bool canSkipConversionToVariant(const MergeTreeDataPartPtr & part, const MutationCommand & command) +{ + if (command.type != MutationCommand::READ_COLUMN) + return false; + + auto part_column = part->tryGetColumn(command.column_name); + if (!part_column) + return false; + + /// For ALTER MODIFY COLUMN with Variant extension (like 'Variant(T1, T2)' to 'Variant(T1, T2, T3, ...)') + /// we can skip mutation and apply only metadata conversion. + return isVariantExtension(part_column->type, command.data_type); +} + static bool canSkipMutationCommandForPart(const MergeTreeDataPartPtr & part, const MutationCommand & command, const ContextPtr & context) { if (command.partition) @@ -1952,6 +1993,9 @@ static bool canSkipMutationCommandForPart(const MergeTreeDataPartPtr & part, con if (canSkipConversionToNullable(part, command)) return true; + if (canSkipConversionToVariant(part, command)) + return true; + return false; } diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 980b128ec75a..abc51bde3fb8 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -56,6 +56,32 @@ takeFromRange(const MarkRange & range, size_t min_number_of_marks, size_t & curr current_marks_amount += range_we_take.getNumberOfMarks(); return range_we_take.getNumberOfMarks(); } + +void sortResponseRanges(RangesInDataPartsDescription & result) +{ + std::ranges::sort(result, [](const auto & lhs, const auto & rhs) { return lhs.info < rhs.info; }); + + RangesInDataPartsDescription new_result; + + /// Aggregate ranges for each part within a single entry + for (auto & ranges_in_part : result) + { + if (new_result.empty() || new_result.back().info != ranges_in_part.info) + new_result.push_back(RangesInDataPartDescription{.info = ranges_in_part.info}); + + new_result.back().ranges.insert( + new_result.back().ranges.end(), + std::make_move_iterator(ranges_in_part.ranges.begin()), + std::make_move_iterator(ranges_in_part.ranges.end())); + ranges_in_part.ranges.clear(); + } + + /// Sort ranges for each part + for (auto & ranges_in_part : new_result) + std::sort(ranges_in_part.ranges.begin(), ranges_in_part.ranges.end()); + + result = std::move(new_result); +} } namespace ProfileEvents @@ -219,7 +245,7 @@ class DefaultCoordinator : public ParallelReplicasReadingCoordinator::ImplInterf }; std::vector replica_status; - Poco::Logger * log = &Poco::Logger::get("DefaultCoordinator"); + LoggerPtr log = getLogger("DefaultCoordinator"); /// Workflow of a segment: /// 0. `all_parts_to_read` contains all the parts and thus all the segments initially present there (virtually) @@ -775,6 +801,8 @@ ParallelReadResponse DefaultCoordinator::handleRequest(ParallelReadRequest reque } } + sortResponseRanges(response.description); + LOG_DEBUG( log, "Going to respond to replica {} with {}; mine_marks={}, stolen_by_hash={}, stolen_rest={}", @@ -807,7 +835,7 @@ class InOrderCoordinator : public ParallelReplicasReadingCoordinator::ImplInterf Parts all_parts_to_read; size_t total_rows_to_read = 0; - Poco::Logger * log = &Poco::Logger::get(fmt::format("{}{}", magic_enum::enum_name(mode), "Coordinator")); + LoggerPtr log = getLogger(fmt::format("{}{}", magic_enum::enum_name(mode), "Coordinator")); }; template diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 76b8080f64cd..78fcfabb7044 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -20,7 +20,7 @@ PartMovesBetweenShardsOrchestrator::PartMovesBetweenShardsOrchestrator(StorageRe : storage(storage_) , zookeeper_path(storage.zookeeper_path) , logger_name(storage.getStorageID().getFullTableName() + " (PartMovesBetweenShardsOrchestrator)") - , log(&Poco::Logger::get(logger_name)) + , log(getLogger(logger_name)) , entries_znode_path(zookeeper_path + "/part_moves_shard") { /// Schedule pool is not designed for long-running tasks. TODO replace with a separate thread? diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h index af21022953c5..abe259c77ab5 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h @@ -176,7 +176,7 @@ class PartMovesBetweenShardsOrchestrator String zookeeper_path; String logger_name; - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; std::atomic need_stop{false}; BackgroundSchedulePool::TaskHolder task; diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp index 668576f90211..eb51d600da3b 100644 --- a/src/Storages/MergeTree/PartitionPruner.cpp +++ b/src/Storages/MergeTree/PartitionPruner.cpp @@ -59,7 +59,7 @@ bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) const { WriteBufferFromOwnString buf; part.partition.serializeText(part.storage, buf, FormatSettings{}); - LOG_TRACE(&Poco::Logger::get("PartitionPruner"), "Partition {} gets pruned", buf.str()); + LOG_TRACE(getLogger("PartitionPruner"), "Partition {} gets pruned", buf.str()); } } diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h index b0755ccd3cae..d750c02d3e11 100644 --- a/src/Storages/MergeTree/RPNBuilder.h +++ b/src/Storages/MergeTree/RPNBuilder.h @@ -229,6 +229,12 @@ class RPNBuilder rpn_elements.emplace_back(std::move(element)); } + if (arguments_size == 0 && function_node.getFunctionName() == "indexHint") + { + element.function = RPNElement::ALWAYS_TRUE; + rpn_elements.emplace_back(std::move(element)); + } + return; } } diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h index 18fcacecc9e3..2b1fcec62a83 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h @@ -17,7 +17,7 @@ class ReplicatedMergeMutateTaskBase : public IExecutableTask { public: ReplicatedMergeMutateTaskBase( - Poco::Logger * log_, + LoggerPtr log_, StorageReplicatedMergeTree & storage_, ReplicatedMergeTreeQueue::SelectedEntryPtr & selected_entry_, IExecutableTask::TaskResultCallback & task_result_callback_) @@ -66,7 +66,7 @@ class ReplicatedMergeMutateTaskBase : public IExecutableTask ReplicatedMergeTreeQueue::SelectedEntryPtr selected_entry; ReplicatedMergeTreeLogEntry & entry; MergeList::EntryPtr merge_mutate_entry{nullptr}; - Poco::Logger * log; + LoggerPtr log; /// ProfileEvents for current part will be stored here ProfileEvents::Counters profile_counters; ContextMutablePtr task_context; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index a544ac908a4c..336d19692d43 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes ReplicatedMergeTreeAttachThread::ReplicatedMergeTreeAttachThread(StorageReplicatedMergeTree & storage_) : storage(storage_) , log_name(storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreeAttachThread)") - , log(&Poco::Logger::get(log_name)) + , log(getLogger(log_name)) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this] { run(); }); const auto storage_settings = storage.getSettings(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h index 222b30b519b1..250a5ed34d1c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h @@ -34,7 +34,7 @@ class ReplicatedMergeTreeAttachThread BackgroundSchedulePool::TaskHolder task; std::string log_name; - Poco::Logger * log; + LoggerPtr log; std::atomic first_try_done{false}; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 8daee661c752..67942491ae25 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes ReplicatedMergeTreeCleanupThread::ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_) : storage(storage_) , log_name(storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreeCleanupThread)") - , log(&Poco::Logger::get(log_name)) + , log(getLogger(log_name)) , sleep_ms(storage.getSettings()->cleanup_delay_period * 1000) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ run(); }); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index ae9aabdb4e7f..5beaef569955 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -40,7 +40,7 @@ class ReplicatedMergeTreeCleanupThread private: StorageReplicatedMergeTree & storage; String log_name; - Poco::Logger * log; + LoggerPtr log; BackgroundSchedulePool::TaskHolder task; pcg64 rng{randomSeed()}; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index b1875464725b..bc0b4f73a310 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -28,7 +28,7 @@ static const auto PART_CHECK_ERROR_SLEEP_MS = 5 * 1000; ReplicatedMergeTreePartCheckThread::ReplicatedMergeTreePartCheckThread(StorageReplicatedMergeTree & storage_) : storage(storage_) , log_name(storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreePartCheckThread)") - , log(&Poco::Logger::get(log_name)) + , log(getLogger(log_name)) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this] { run(); }); task->schedule(); @@ -63,7 +63,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t if (parts_set.contains(name)) return; - LOG_TRACE(log, "Enqueueing {} for check after after {}s", name, delay_to_check_seconds); + LOG_TRACE(log, "Enqueueing {} for check after {}s", name, delay_to_check_seconds); parts_queue.emplace_back(name, std::chrono::steady_clock::now() + std::chrono::seconds(delay_to_check_seconds)); parts_set.insert(name); task->schedule(); @@ -274,7 +274,7 @@ std::pair ReplicatedMergeTreePartCheckThread::findLo return std::make_pair(exists_in_zookeeper, part); } -ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name) +ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name, bool throw_on_broken_projection) { ReplicatedCheckResult result; auto [exists_in_zookeeper, part] = findLocalPart(part_name); @@ -341,6 +341,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St /// before the ReplicatedMergeTreePartHeader was introduced. String part_path = storage.replica_path + "/parts/" + part_name; String part_znode = zookeeper->get(part_path); + bool is_broken_projection = false; try { @@ -362,8 +363,10 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St checkDataPart( part, - true, - [this] { return need_stop.load(); }); + /* require_checksums */true, + is_broken_projection, + [this] { return need_stop.load(); }, + throw_on_broken_projection); if (need_stop) { @@ -382,14 +385,27 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St if (isRetryableException(std::current_exception())) throw; - tryLogCurrentException(log, __PRETTY_FUNCTION__); + PreformattedMessage message; + if (is_broken_projection) + { + WriteBufferFromOwnString wb; + message = PreformattedMessage::create( + "Part {} has a broken projections. It will be ignored. Broken projections info: {}", + part_name, getCurrentExceptionMessage(false)); + LOG_DEBUG(log, message); + result.action = ReplicatedCheckResult::DoNothing; + } + else + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); - auto message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name); - LOG_ERROR(log, message); + message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name); + LOG_ERROR(log, message); + result.action = ReplicatedCheckResult::TryFetchMissing; + } /// Part is broken, let's try to find it and fetch. result.status = {part_name, false, message}; - result.action = ReplicatedCheckResult::TryFetchMissing; return result; } @@ -419,12 +435,12 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St } -CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional * recheck_after) +CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional * recheck_after, bool throw_on_broken_projection) { LOG_INFO(log, "Checking part {}", part_name); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); - ReplicatedCheckResult result = checkPartImpl(part_name); + ReplicatedCheckResult result = checkPartImpl(part_name, throw_on_broken_projection); switch (result.action) { case ReplicatedCheckResult::None: UNREACHABLE(); @@ -577,7 +593,7 @@ void ReplicatedMergeTreePartCheckThread::run() } std::optional recheck_after; - checkPartAndFix(selected->name, &recheck_after); + checkPartAndFix(selected->name, &recheck_after, /* throw_on_broken_projection */false); if (need_stop) return; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index 68dc6ca3d1de..9091f6985462 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -65,9 +65,9 @@ class ReplicatedMergeTreePartCheckThread size_t size() const; /// Check part by name - CheckResult checkPartAndFix(const String & part_name, std::optional * recheck_after = nullptr); + CheckResult checkPartAndFix(const String & part_name, std::optional * recheck_after = nullptr, bool throw_on_broken_projection = true); - ReplicatedCheckResult checkPartImpl(const String & part_name); + ReplicatedCheckResult checkPartImpl(const String & part_name, bool throw_on_broken_projection); std::unique_lock pausePartsCheck(); @@ -87,7 +87,7 @@ class ReplicatedMergeTreePartCheckThread StorageReplicatedMergeTree & storage; String log_name; - Poco::Logger * log; + LoggerPtr log; using StringSet = std::set; struct PartToCheck diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index a3afa8cd88a9..8d921bdcb1c5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -36,7 +36,7 @@ ReplicatedMergeTreeQueue::ReplicatedMergeTreeQueue(StorageReplicatedMergeTree & zookeeper_path = storage.zookeeper_path; replica_path = storage.replica_path; logger_name = storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreeQueue)"; - log = &Poco::Logger::get(logger_name); + log = getLogger(logger_name); } @@ -2149,7 +2149,7 @@ LocalMergePredicate::LocalMergePredicate(ReplicatedMergeTreeQueue & queue_) template CommittingBlocks BaseMergePredicate::getCommittingBlocks( - zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, Poco::Logger * log_) + zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, LoggerPtr log_) { CommittingBlocks committing_blocks; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 92201b11d37d..84106565dff0 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -71,7 +71,7 @@ class ReplicatedMergeTreeQueue String zookeeper_path; String replica_path; String logger_name; - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; /// Protects the queue, future_parts and other queue state variables. mutable std::mutex state_mutex; @@ -519,7 +519,7 @@ class BaseMergePredicate /// This predicate is checked for the first part of each range. bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String & out_reason) const; - CommittingBlocks getCommittingBlocks(zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, Poco::Logger * log_); + CommittingBlocks getCommittingBlocks(zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, LoggerPtr log_); protected: /// A list of partitions that can be used in the merge predicate diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 579592b0b3e8..b79418da7916 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -33,7 +33,7 @@ static String generateActiveNodeIdentifier() ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_) : storage(storage_) , log_name(storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreeRestartingThread)") - , log(&Poco::Logger::get(log_name)) + , log(getLogger(log_name)) , active_node_identifier(generateActiveNodeIdentifier()) { const auto storage_settings = storage.getSettings(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 02103272a1f5..01071d80e8bf 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -42,7 +42,7 @@ class ReplicatedMergeTreeRestartingThread private: StorageReplicatedMergeTree & storage; String log_name; - Poco::Logger * log; + LoggerPtr log; std::atomic need_stop {false}; /// The random data we wrote into `/replicas/me/is_active`. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 73ad595ec20e..1fb2393948a6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -58,7 +58,7 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk ProfileEvents::Counters part_counters; Partition() = default; - Partition(Poco::Logger * log_, + Partition(LoggerPtr log_, MergeTreeDataWriter::TemporaryPart && temp_part_, UInt64 elapsed_ns_, BlockIDsType && block_id_, @@ -92,7 +92,7 @@ std::vector testSelfDeduplicate(std::vector data, std::vector::DelayedChunk::Partition part( - &Poco::Logger::get("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1), std::nullopt, std::move(profile_counters)); + getLogger("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1), std::nullopt, std::move(profile_counters)); part.filterSelfDuplicate(); @@ -138,7 +138,7 @@ ReplicatedMergeTreeSinkImpl::ReplicatedMergeTreeSinkImpl( , is_attach(is_attach_) , quorum_parallel(quorum_parallel_) , deduplicate(deduplicate_) - , log(&Poco::Logger::get(storage.getLogName() + " (Replicated OutputStream)")) + , log(getLogger(storage.getLogName() + " (Replicated OutputStream)")) , context(context_) , storage_snapshot(storage.getStorageSnapshotWithoutData(metadata_snapshot, context_)) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 4811d93775b6..29f3183be646 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -128,7 +128,7 @@ class ReplicatedMergeTreeSinkImpl : public SinkToStorage bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; - Poco::Logger * log; + LoggerPtr log; ContextPtr context; StorageSnapshotPtr storage_snapshot; diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 8cf5b6a88945..0b545beb1163 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -43,6 +43,7 @@ namespace ErrorCodes extern const int NO_FILE_IN_DATA_PART; extern const int NETWORK_ERROR; extern const int SOCKET_TIMEOUT; + extern const int BROKEN_PROJECTION; } @@ -117,7 +118,9 @@ static IMergeTreeDataPart::Checksums checkDataPart( const NameSet & files_without_checksums, const ReadSettings & read_settings, bool require_checksums, - std::function is_cancelled) + std::function is_cancelled, + bool & is_broken_projection, + bool throw_on_broken_projection) { /** Responsibility: * - read list of columns from columns.txt; @@ -126,6 +129,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( */ CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedChecks}; + Poco::Logger * log = &Poco::Logger::get("checkDataPart"); NamesAndTypesList columns_txt; @@ -275,17 +279,55 @@ static IMergeTreeDataPart::Checksums checkDataPart( } } + std::string broken_projections_message; for (const auto & [name, projection] : data_part->getProjectionParts()) { if (is_cancelled()) return {}; auto projection_file = name + ".proj"; - auto projection_checksums = checkDataPart( - projection, *data_part_storage.getProjection(projection_file), - projection->getColumns(), projection->getType(), - projection->getFileNamesWithoutChecksums(), - read_settings, require_checksums, is_cancelled); + if (!throw_on_broken_projection && projection->is_broken) + { + projections_on_disk.erase(projection_file); + checksums_txt.remove(projection_file); + } + + IMergeTreeDataPart::Checksums projection_checksums; + try + { + bool noop; + projection_checksums = checkDataPart( + projection, *data_part_storage.getProjection(projection_file), + projection->getColumns(), projection->getType(), + projection->getFileNamesWithoutChecksums(), + read_settings, require_checksums, is_cancelled, noop, /* throw_on_broken_projection */false); + } + catch (...) + { + if (isRetryableException(std::current_exception())) + throw; + + if (!projection->is_broken) + { + LOG_TEST(log, "Marking projection {} as broken ({})", name, projection_file); + projection->setBrokenReason(getCurrentExceptionMessage(false), getCurrentExceptionCode()); + } + + is_broken_projection = true; + if (throw_on_broken_projection) + { + if (!broken_projections_message.empty()) + broken_projections_message += "\n"; + + broken_projections_message += fmt::format( + "Part {} has a broken projection {} (error: {})", + data_part->name, name, getCurrentExceptionMessage(false)); + continue; + } + + projections_on_disk.erase(projection_file); + checksums_txt.remove(projection_file); + } checksums_data.files[projection_file] = IMergeTreeDataPart::Checksums::Checksum( projection_checksums.getTotalSizeOnDisk(), @@ -294,6 +336,11 @@ static IMergeTreeDataPart::Checksums checkDataPart( projections_on_disk.erase(projection_file); } + if (throw_on_broken_projection && !broken_projections_message.empty()) + { + throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message); + } + if (require_checksums && !projections_on_disk.empty()) { throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, @@ -321,7 +368,9 @@ IMergeTreeDataPart::Checksums checkDataPartInMemory(const DataPartInMemoryPtr & IMergeTreeDataPart::Checksums checkDataPart( MergeTreeData::DataPartPtr data_part, bool require_checksums, - std::function is_cancelled) + bool & is_broken_projection, + std::function is_cancelled, + bool throw_on_broken_projection) { if (auto part_in_memory = asInMemoryPart(data_part)) return checkDataPartInMemory(part_in_memory); @@ -338,7 +387,7 @@ IMergeTreeDataPart::Checksums checkDataPart( throw; LOG_DEBUG( - &Poco::Logger::get("checkDataPart"), + getLogger("checkDataPart"), "Will drop cache for data part {} and will check it once again", data_part->name); auto & cache = *FileCacheFactory::instance().getByName(*cache_name)->cache; @@ -363,7 +412,9 @@ IMergeTreeDataPart::Checksums checkDataPart( data_part->getFileNamesWithoutChecksums(), read_settings, require_checksums, - is_cancelled); + is_cancelled, + is_broken_projection, + throw_on_broken_projection); }; try @@ -377,7 +428,9 @@ IMergeTreeDataPart::Checksums checkDataPart( data_part->getFileNamesWithoutChecksums(), read_settings, require_checksums, - is_cancelled); + is_cancelled, + is_broken_projection, + throw_on_broken_projection); } catch (...) { diff --git a/src/Storages/MergeTree/checkDataPart.h b/src/Storages/MergeTree/checkDataPart.h index d0e48b6f80ab..a01978f4efec 100644 --- a/src/Storages/MergeTree/checkDataPart.h +++ b/src/Storages/MergeTree/checkDataPart.h @@ -10,7 +10,9 @@ namespace DB IMergeTreeDataPart::Checksums checkDataPart( MergeTreeData::DataPartPtr data_part, bool require_checksums, - std::function is_cancelled = []{ return false; }); + bool & is_broken_projection, + std::function is_cancelled = []{ return false; }, + bool throw_on_broken_projection = false); bool isNotEnoughMemoryErrorCode(int code); bool isRetryableException(const std::exception_ptr exception_ptr); diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 1aa19c9ccde5..4fb81d690707 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -20,7 +20,7 @@ MessageQueueSink::MessageQueueSink( void MessageQueueSink::onStart() { LOG_TEST( - &Poco::Logger::get("MessageQueueSink"), + getLogger("MessageQueueSink"), "Executing startup for MessageQueueSink"); initialize(); diff --git a/src/Storages/NATS/NATSConnection.cpp b/src/Storages/NATS/NATSConnection.cpp index 70b3599aa090..d7ad0cf8219e 100644 --- a/src/Storages/NATS/NATSConnection.cpp +++ b/src/Storages/NATS/NATSConnection.cpp @@ -13,7 +13,7 @@ static const auto RETRIES_MAX = 20; static const auto CONNECTED_TO_BUFFER_SIZE = 256; -NATSConnectionManager::NATSConnectionManager(const NATSConfiguration & configuration_, Poco::Logger * log_) +NATSConnectionManager::NATSConnectionManager(const NATSConfiguration & configuration_, LoggerPtr log_) : configuration(configuration_) , log(log_) , event_handler(loop.getLoop(), log) @@ -115,8 +115,8 @@ void NATSConnectionManager::connectImpl() } natsOptions_SetMaxReconnect(options, configuration.max_reconnect); natsOptions_SetReconnectWait(options, configuration.reconnect_wait); - natsOptions_SetDisconnectedCB(options, disconnectedCallback, log); - natsOptions_SetReconnectedCB(options, reconnectedCallback, log); + natsOptions_SetDisconnectedCB(options, disconnectedCallback, log.get()); + natsOptions_SetReconnectedCB(options, reconnectedCallback, log.get()); natsStatus status; { auto lock = event_handler.setThreadLocalLoop(); diff --git a/src/Storages/NATS/NATSConnection.h b/src/Storages/NATS/NATSConnection.h index b49070473b28..c350f395a927 100644 --- a/src/Storages/NATS/NATSConnection.h +++ b/src/Storages/NATS/NATSConnection.h @@ -24,7 +24,7 @@ struct NATSConfiguration class NATSConnectionManager { public: - NATSConnectionManager(const NATSConfiguration & configuration_, Poco::Logger * log_); + NATSConnectionManager(const NATSConfiguration & configuration_, LoggerPtr log_); ~NATSConnectionManager(); bool isConnected(); @@ -54,7 +54,7 @@ class NATSConnectionManager static void reconnectedCallback(natsConnection * nc, void * log); NATSConfiguration configuration; - Poco::Logger * log; + LoggerPtr log; UVLoop loop; NATSHandler event_handler; diff --git a/src/Storages/NATS/NATSConsumer.cpp b/src/Storages/NATS/NATSConsumer.cpp index c7b40973b72e..136cb13ddfac 100644 --- a/src/Storages/NATS/NATSConsumer.cpp +++ b/src/Storages/NATS/NATSConsumer.cpp @@ -21,7 +21,7 @@ NATSConsumer::NATSConsumer( StorageNATS & storage_, std::vector & subjects_, const String & subscribe_queue_name, - Poco::Logger * log_, + LoggerPtr log_, uint32_t queue_size_, const std::atomic & stopped_) : connection(connection_) diff --git a/src/Storages/NATS/NATSConsumer.h b/src/Storages/NATS/NATSConsumer.h index a5470433303d..e8d3a849c2a0 100644 --- a/src/Storages/NATS/NATSConsumer.h +++ b/src/Storages/NATS/NATSConsumer.h @@ -24,7 +24,7 @@ class NATSConsumer StorageNATS & storage_, std::vector & subjects_, const String & subscribe_queue_name, - Poco::Logger * log_, + LoggerPtr log_, uint32_t queue_size_, const std::atomic & stopped_); @@ -58,7 +58,7 @@ class NATSConsumer StorageNATS & storage; std::vector subscriptions; std::vector subjects; - Poco::Logger * log; + LoggerPtr log; const std::atomic & stopped; bool subscribed = false; diff --git a/src/Storages/NATS/NATSHandler.cpp b/src/Storages/NATS/NATSHandler.cpp index 7006e5633a92..03f1fc1a4955 100644 --- a/src/Storages/NATS/NATSHandler.cpp +++ b/src/Storages/NATS/NATSHandler.cpp @@ -12,7 +12,7 @@ namespace DB static const auto MAX_THREAD_WORK_DURATION_MS = 60000; -NATSHandler::NATSHandler(uv_loop_t * loop_, Poco::Logger * log_) : +NATSHandler::NATSHandler(uv_loop_t * loop_, LoggerPtr log_) : loop(loop_), log(log_), loop_running(false), diff --git a/src/Storages/NATS/NATSHandler.h b/src/Storages/NATS/NATSHandler.h index e3894c888a3c..6f9ec398cfae 100644 --- a/src/Storages/NATS/NATSHandler.h +++ b/src/Storages/NATS/NATSHandler.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB { @@ -23,7 +23,7 @@ using LockPtr = std::unique_ptr>; class NATSHandler { public: - NATSHandler(uv_loop_t * loop_, Poco::Logger * log_); + NATSHandler(uv_loop_t * loop_, LoggerPtr log_); ~NATSHandler(); @@ -47,7 +47,7 @@ class NATSHandler private: uv_loop_t * loop; natsOptions * opts = nullptr; - Poco::Logger * log; + LoggerPtr log; std::atomic loop_running; std::atomic loop_state; diff --git a/src/Storages/NATS/NATSProducer.cpp b/src/Storages/NATS/NATSProducer.cpp index a8510149baf2..fb8abb016f80 100644 --- a/src/Storages/NATS/NATSProducer.cpp +++ b/src/Storages/NATS/NATSProducer.cpp @@ -23,7 +23,7 @@ NATSProducer::NATSProducer( const NATSConfiguration & configuration_, const String & subject_, std::atomic & shutdown_called_, - Poco::Logger * log_) + LoggerPtr log_) : AsynchronousMessageProducer(log_) , connection(configuration_, log_) , subject(subject_) diff --git a/src/Storages/NATS/NATSProducer.h b/src/Storages/NATS/NATSProducer.h index 0303d05969b2..6923553a551b 100644 --- a/src/Storages/NATS/NATSProducer.h +++ b/src/Storages/NATS/NATSProducer.h @@ -20,7 +20,7 @@ class NATSProducer : public AsynchronousMessageProducer const NATSConfiguration & configuration_, const String & subject_, std::atomic & shutdown_called_, - Poco::Logger * log_); + LoggerPtr log_); void produce(const String & message, size_t rows_in_message, const Columns & columns, size_t last_row) override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 9cb1fbd85061..2af9a9f974f9 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -59,7 +59,7 @@ StorageNATS::StorageNATS( , schema_name(getContext()->getMacros()->expand(nats_settings->nats_schema)) , num_consumers(nats_settings->nats_num_consumers.value) , max_rows_per_message(nats_settings->nats_max_rows_per_message) - , log(&Poco::Logger::get("StorageNATS (" + table_id_.table_name + ")")) + , log(getLogger("StorageNATS (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , is_attach(is_attach_) diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index 16a162b85008..882119f5cdbc 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -78,7 +78,7 @@ class StorageNATS final : public IStorage, WithContext size_t num_consumers; size_t max_rows_per_message; - Poco::Logger * log; + LoggerPtr log; NATSConnectionManagerPtr connection; /// Connection for all consumers NATSConfiguration configuration; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index b24421094098..f99ebf517928 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -51,7 +51,7 @@ MaterializedPostgreSQLConsumer::MaterializedPostgreSQLConsumer( bool schema_as_a_part_of_table_name_, StorageInfos storages_info_, const String & name_for_logger) - : log(&Poco::Logger::get("PostgreSQLReplicaConsumer(" + name_for_logger + ")")) + : log(getLogger("PostgreSQLReplicaConsumer(" + name_for_logger + ")")) , context(context_) , replication_slot_name(replication_slot_name_) , publication_name(publication_name_) @@ -76,7 +76,7 @@ MaterializedPostgreSQLConsumer::MaterializedPostgreSQLConsumer( } -MaterializedPostgreSQLConsumer::StorageData::StorageData(const StorageInfo & storage_info, Poco::Logger * log_) +MaterializedPostgreSQLConsumer::StorageData::StorageData(const StorageInfo & storage_info, LoggerPtr log_) : storage(storage_info.storage) , table_description(storage_info.storage->getInMemoryMetadataPtr()->getSampleBlock()) , columns_attributes(storage_info.attributes) diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h index 3e95c1cd7de5..972c03e50d86 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h @@ -32,7 +32,7 @@ class MaterializedPostgreSQLConsumer private: struct StorageData { - explicit StorageData(const StorageInfo & storage_info, Poco::Logger * log_); + explicit StorageData(const StorageInfo & storage_info, LoggerPtr log_); size_t getColumnsNum() const { return table_description.sample_block.columns(); } @@ -137,7 +137,7 @@ class MaterializedPostgreSQLConsumer return (static_cast(upper_half) << 32) + lower_half; } - Poco::Logger * log; + LoggerPtr log; ContextPtr context; const std::string replication_slot_name, publication_name; diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 43de2069b195..2bb1e2dde0d7 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -128,7 +128,7 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( const MaterializedPostgreSQLSettings & replication_settings, bool is_materialized_postgresql_database_) : WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get("PostgreSQLReplicationHandler")) + , log(getLogger("PostgreSQLReplicationHandler")) , is_attach(is_attach_) , postgres_database(postgres_database_) , postgres_schema(replication_settings.materialized_postgresql_schema) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 5d426b3c512d..5c519053d844 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -102,7 +102,7 @@ friend class TemporaryReplicationSlot; void assertInitialized() const; - Poco::Logger * log; + LoggerPtr log; /// If it is not attach, i.e. a create query, then if publication already exists - always drop it. bool is_attach; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 0faf553797ad..f13cb820ec35 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -60,7 +60,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( std::unique_ptr replication_settings) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(remote_database_name, remote_table_name_) + ")")) + , log(getLogger("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(remote_database_name, remote_table_name_) + ")")) , is_materialized_postgresql_database(false) , has_nested(false) , nested_context(makeNestedTableContext(context_->getGlobalContext())) @@ -101,7 +101,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( const String & postgres_table_name) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(postgres_database_name, postgres_table_name) + ")")) + , log(getLogger("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(postgres_database_name, postgres_table_name) + ")")) , is_materialized_postgresql_database(true) , has_nested(false) , nested_context(makeNestedTableContext(context_->getGlobalContext())) @@ -120,7 +120,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( const String & postgres_table_name) : IStorage(StorageID(nested_storage_->getStorageID().database_name, nested_storage_->getStorageID().table_name)) , WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(postgres_database_name, postgres_table_name) + ")")) + , log(getLogger("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(postgres_database_name, postgres_table_name) + ")")) , is_materialized_postgresql_database(true) , has_nested(true) , nested_context(makeNestedTableContext(context_->getGlobalContext())) @@ -141,7 +141,7 @@ StoragePtr StorageMaterializedPostgreSQL::createTemporary() const auto tmp_storage = DatabaseCatalog::instance().tryGetTable(tmp_table_id, nested_context); if (tmp_storage) { - LOG_TRACE(&Poco::Logger::get("MaterializedPostgreSQLStorage"), "Temporary table {} already exists, dropping", tmp_table_id.getNameForLogs()); + LOG_TRACE(getLogger("MaterializedPostgreSQLStorage"), "Temporary table {} already exists, dropping", tmp_table_id.getNameForLogs()); InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), getContext(), tmp_table_id, /* sync */true); } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index bebbb74ddd11..9c9418a8caa9 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -142,7 +142,7 @@ class StorageMaterializedPostgreSQL final : public IStorage, WithContext String getNestedTableName() const; - Poco::Logger * log; + LoggerPtr log; /// Not nullptr only for single MaterializedPostgreSQL storage, because for MaterializedPostgreSQL /// database engine there is one replication handler for all tables. diff --git a/src/Storages/RabbitMQ/RabbitMQConnection.cpp b/src/Storages/RabbitMQ/RabbitMQConnection.cpp index 13d065774a27..98ceba42676b 100644 --- a/src/Storages/RabbitMQ/RabbitMQConnection.cpp +++ b/src/Storages/RabbitMQ/RabbitMQConnection.cpp @@ -11,7 +11,7 @@ static const auto CONNECT_SLEEP = 200; static const auto RETRIES_MAX = 20; -RabbitMQConnection::RabbitMQConnection(const RabbitMQConfiguration & configuration_, Poco::Logger * log_) +RabbitMQConnection::RabbitMQConnection(const RabbitMQConfiguration & configuration_, LoggerPtr log_) : configuration(configuration_) , log(log_) , event_handler(loop.getLoop(), log) diff --git a/src/Storages/RabbitMQ/RabbitMQConnection.h b/src/Storages/RabbitMQ/RabbitMQConnection.h index 698230b16f4a..5adb64561948 100644 --- a/src/Storages/RabbitMQ/RabbitMQConnection.h +++ b/src/Storages/RabbitMQ/RabbitMQConnection.h @@ -22,7 +22,7 @@ struct RabbitMQConfiguration class RabbitMQConnection { public: - RabbitMQConnection(const RabbitMQConfiguration & configuration_, Poco::Logger * log_); + RabbitMQConnection(const RabbitMQConfiguration & configuration_, LoggerPtr log_); bool isConnected(); @@ -51,7 +51,7 @@ class RabbitMQConnection void disconnectImpl(bool immediately = false); RabbitMQConfiguration configuration; - Poco::Logger * log; + LoggerPtr log; UVLoop loop; /// Preserve order of destruction here: diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp index f6facc04212e..1843bebe3c7f 100644 --- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp @@ -24,7 +24,7 @@ RabbitMQConsumer::RabbitMQConsumer( std::vector & queues_, size_t channel_id_base_, const String & channel_base_, - Poco::Logger * log_, + LoggerPtr log_, uint32_t queue_size_) : event_handler(event_handler_) , queues(queues_) diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h index 89dfa060eecb..c78b33bfc7cc 100644 --- a/src/Storages/RabbitMQ/RabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h @@ -32,7 +32,7 @@ class RabbitMQConsumer std::vector & queues_, size_t channel_id_base_, const String & channel_base_, - Poco::Logger * log_, + LoggerPtr log_, uint32_t queue_size_); struct CommitInfo @@ -88,7 +88,7 @@ class RabbitMQConsumer const String channel_base; const size_t channel_id_base; - Poco::Logger * log; + LoggerPtr log; std::atomic stopped; String channel_id; diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 745af0d20e3a..be352f26f7be 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -8,7 +8,7 @@ namespace DB /* The object of this class is shared between concurrent consumers (who share the same connection == share the same * event loop and handler). */ -RabbitMQHandler::RabbitMQHandler(uv_loop_t * loop_, Poco::Logger * log_) : +RabbitMQHandler::RabbitMQHandler(uv_loop_t * loop_, LoggerPtr log_) : AMQP::LibUvHandler(loop_), loop(loop_), log(log_), diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 4223732a4a07..244692cf8009 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -24,7 +24,7 @@ class RabbitMQHandler : public AMQP::LibUvHandler { public: - RabbitMQHandler(uv_loop_t * loop_, Poco::Logger * log_); + RabbitMQHandler(uv_loop_t * loop_, LoggerPtr log_); void onError(AMQP::TcpConnection * connection, const char * message) override; void onReady(AMQP::TcpConnection * connection) override; @@ -50,7 +50,7 @@ class RabbitMQHandler : public AMQP::LibUvHandler private: uv_loop_t * loop; - Poco::Logger * log; + LoggerPtr log; std::atomic connection_running, loop_running; std::atomic loop_state; diff --git a/src/Storages/RabbitMQ/RabbitMQProducer.cpp b/src/Storages/RabbitMQ/RabbitMQProducer.cpp index 246569060d00..7ad83213b9b1 100644 --- a/src/Storages/RabbitMQ/RabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/RabbitMQProducer.cpp @@ -31,7 +31,7 @@ RabbitMQProducer::RabbitMQProducer( const size_t channel_id_base_, const bool persistent_, std::atomic & shutdown_called_, - Poco::Logger * log_) + LoggerPtr log_) : AsynchronousMessageProducer(log_) , connection(configuration_, log_) , routing_keys(routing_keys_) diff --git a/src/Storages/RabbitMQ/RabbitMQProducer.h b/src/Storages/RabbitMQ/RabbitMQProducer.h index 70afbbb9b903..a790eda0d085 100644 --- a/src/Storages/RabbitMQ/RabbitMQProducer.h +++ b/src/Storages/RabbitMQ/RabbitMQProducer.h @@ -24,7 +24,7 @@ class RabbitMQProducer : public AsynchronousMessageProducer const size_t channel_id_base_, const bool persistent_, std::atomic & shutdown_called_, - Poco::Logger * log_); + LoggerPtr log_); void produce(const String & message, size_t rows_in_message, const Columns & columns, size_t last_row) override; diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 793064c10f88..3cec448fc115 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -70,7 +70,7 @@ RabbitMQSource::RabbitMQSource( , ack_in_suffix(ack_in_suffix_) , non_virtual_header(std::move(headers.first)) , virtual_header(std::move(headers.second)) - , log(&Poco::Logger::get("RabbitMQSource")) + , log(getLogger("RabbitMQSource")) , max_execution_time_ms(max_execution_time_) { storage.incrementReader(); diff --git a/src/Storages/RabbitMQ/RabbitMQSource.h b/src/Storages/RabbitMQ/RabbitMQSource.h index a25b3d502220..21d059bfae2e 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.h +++ b/src/Storages/RabbitMQ/RabbitMQSource.h @@ -47,7 +47,7 @@ class RabbitMQSource : public ISource const Block non_virtual_header; const Block virtual_header; - Poco::Logger * log; + LoggerPtr log; RabbitMQConsumerPtr consumer; uint64_t max_execution_time_ms = 0; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index fce2d775b157..868f48d0b7d3 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -69,7 +69,7 @@ StorageRabbitMQ::StorageRabbitMQ( ContextPtr context_, const ColumnsDescription & columns_, std::unique_ptr rabbitmq_settings_, - bool is_attach_) + bool is_attach) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , rabbitmq_settings(std::move(rabbitmq_settings_)) @@ -86,12 +86,11 @@ StorageRabbitMQ::StorageRabbitMQ( , persistent(rabbitmq_settings->rabbitmq_persistent.value) , use_user_setup(rabbitmq_settings->rabbitmq_queue_consume.value) , hash_exchange(num_consumers > 1 || num_queues > 1) - , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) + , log(getLogger("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) , unique_strbase(getRandomName()) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , milliseconds_to_wait(rabbitmq_settings->rabbitmq_empty_queue_backoff_start_ms) - , is_attach(is_attach_) { const auto & config = getContext()->getConfigRef(); @@ -318,10 +317,11 @@ void StorageRabbitMQ::connectionFunc() try { if (connection->reconnect()) + { initRabbitMQ(); - - streaming_task->scheduleAfter(RESCHEDULE_MS); - return; + streaming_task->scheduleAfter(RESCHEDULE_MS); + return; + } } catch (...) { @@ -373,57 +373,37 @@ void StorageRabbitMQ::initRabbitMQ() } else { - try - { - auto rabbit_channel = connection->createChannel(); - - /// Main exchange -> Bridge exchange -> ( Sharding exchange ) -> Queues -> Consumers + auto rabbit_channel = connection->createChannel(); - initExchange(*rabbit_channel); - bindExchange(*rabbit_channel); + /// Main exchange -> Bridge exchange -> ( Sharding exchange ) -> Queues -> Consumers - for (const auto i : collections::range(0, num_queues)) - bindQueue(i + 1, *rabbit_channel); + initExchange(*rabbit_channel); + bindExchange(*rabbit_channel); - if (queues.size() != num_queues) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Expected all queues to be initialized (but having {}/{})", - queues.size(), num_queues); - } + for (const auto i : collections::range(0, num_queues)) + bindQueue(i + 1, *rabbit_channel); - LOG_TRACE(log, "RabbitMQ setup completed"); - rabbit_channel->close(); - } - catch (...) + if (queues.size() != num_queues) { - tryLogCurrentException(log); - if (is_attach) - return; /// A user will have to reattach the table. - throw; + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Expected all queues to be initialized (but having {}/{})", + queues.size(), num_queues); } + + LOG_TRACE(log, "RabbitMQ setup completed"); + rabbit_channel->close(); } LOG_TRACE(log, "Registering {} conumers", num_consumers); for (size_t i = 0; i < num_consumers; ++i) { - try - { - auto consumer = createConsumer(); - consumer->updateChannel(*connection); - consumers_ref.push_back(consumer); - pushConsumer(consumer); - ++num_created_consumers; - } - catch (...) - { - if (!is_attach) - throw; - - tryLogCurrentException(log); - } + auto consumer = createConsumer(); + consumer->updateChannel(*connection); + consumers_ref.push_back(consumer); + pushConsumer(consumer); + ++num_created_consumers; } LOG_TRACE(log, "Registered {}/{} conumers", num_created_consumers, num_consumers); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 120930cf01d1..696734617be3 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -27,7 +27,7 @@ class StorageRabbitMQ final: public IStorage, WithContext ContextPtr context_, const ColumnsDescription & columns_, std::unique_ptr rabbitmq_settings_, - bool is_attach_); + bool is_attach); std::string getName() const override { return "RabbitMQ"; } @@ -102,7 +102,7 @@ class StorageRabbitMQ final: public IStorage, WithContext bool use_user_setup; bool hash_exchange; - Poco::Logger * log; + LoggerPtr log; RabbitMQConnectionPtr connection; /// Connection for all consumers RabbitMQConfiguration configuration; @@ -158,10 +158,9 @@ class StorageRabbitMQ final: public IStorage, WithContext size_t read_attempts = 0; mutable bool drop_table = false; - bool is_attach; RabbitMQConsumerPtr createConsumer(); - bool initialized = false; + std::atomic initialized = false; /// Functions working in the background void streamingToViewsFunc(); diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp index f49e1d6f25c5..ac80ded57923 100644 --- a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp @@ -129,11 +129,14 @@ S3QueueFilesMetadata::S3QueueFilesMetadata(const fs::path & zookeeper_path_, con , max_loading_retries(settings_.s3queue_loading_retries.value) , min_cleanup_interval_ms(settings_.s3queue_cleanup_interval_min_ms.value) , max_cleanup_interval_ms(settings_.s3queue_cleanup_interval_max_ms.value) + , shards_num(settings_.s3queue_total_shards_num) + , threads_per_shard(settings_.s3queue_processing_threads_num) , zookeeper_processing_path(zookeeper_path_ / "processing") , zookeeper_processed_path(zookeeper_path_ / "processed") , zookeeper_failed_path(zookeeper_path_ / "failed") + , zookeeper_shards_path(zookeeper_path_ / "shards") , zookeeper_cleanup_lock_path(zookeeper_path_ / "cleanup_lock") - , log(&Poco::Logger::get("S3QueueFilesMetadata")) + , log(getLogger("S3QueueFilesMetadata")) { if (mode == S3QueueMode::UNORDERED && (max_set_size || max_set_age_sec)) { @@ -197,6 +200,123 @@ S3QueueFilesMetadata::NodeMetadata S3QueueFilesMetadata::createNodeMetadata( return metadata; } +bool S3QueueFilesMetadata::isShardedProcessing() const +{ + return getProcessingIdsNum() > 1 && mode == S3QueueMode::ORDERED; +} + +size_t S3QueueFilesMetadata::registerNewShard() +{ + if (!isShardedProcessing()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot register a new shard, because processing is not sharded"); + } + + const auto zk_client = getZooKeeper(); + zk_client->createAncestors(zookeeper_shards_path / ""); + + std::string shard_node_path; + size_t shard_id = 0; + for (size_t i = 0; i < shards_num; ++i) + { + const auto node_path = getZooKeeperPathForShard(i); + auto err = zk_client->tryCreate(node_path, "", zkutil::CreateMode::Persistent); + if (err == Coordination::Error::ZOK) + { + shard_node_path = node_path; + shard_id = i; + break; + } + else if (err == Coordination::Error::ZNODEEXISTS) + continue; + else + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected error: {}", magic_enum::enum_name(err)); + } + + if (shard_node_path.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to register a new shard"); + + LOG_TRACE(log, "Using shard {} (zk node: {})", shard_id, shard_node_path); + return shard_id; +} + +std::string S3QueueFilesMetadata::getZooKeeperPathForShard(size_t shard_id) const +{ + return zookeeper_shards_path / ("shard" + toString(shard_id)); +} + +void S3QueueFilesMetadata::registerNewShard(size_t shard_id) +{ + if (!isShardedProcessing()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot register a new shard, because processing is not sharded"); + } + + const auto zk_client = getZooKeeper(); + const auto node_path = getZooKeeperPathForShard(shard_id); + zk_client->createAncestors(node_path); + + auto err = zk_client->tryCreate(node_path, "", zkutil::CreateMode::Persistent); + if (err != Coordination::Error::ZOK) + { + if (err == Coordination::Error::ZNODEEXISTS) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot register shard {}: already exists", shard_id); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected error: {}", magic_enum::enum_name(err)); + } +} + +bool S3QueueFilesMetadata::isShardRegistered(size_t shard_id) +{ + const auto zk_client = getZooKeeper(); + const auto node_path = getZooKeeperPathForShard(shard_id); + return zk_client->exists(node_path); +} + +void S3QueueFilesMetadata::unregisterShard(size_t shard_id) +{ + if (!isShardedProcessing()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot unregister a shard, because processing is not sharded"); + } + + const auto zk_client = getZooKeeper(); + const auto node_path = getZooKeeperPathForShard(shard_id); + zk_client->remove(node_path); +} + +size_t S3QueueFilesMetadata::getProcessingIdsNum() const +{ + return shards_num * threads_per_shard; +} + +std::vector S3QueueFilesMetadata::getProcessingIdsForShard(size_t shard_id) const +{ + std::vector res(threads_per_shard); + std::iota(res.begin(), res.end(), shard_id * threads_per_shard); + return res; +} + +bool S3QueueFilesMetadata::isProcessingIdBelongsToShard(size_t id, size_t shard_id) const +{ + return shard_id * threads_per_shard <= id && id < (shard_id + 1) * threads_per_shard; +} + +size_t S3QueueFilesMetadata::getIdForProcessingThread(size_t thread_id, size_t shard_id) const +{ + return shard_id * threads_per_shard + thread_id; +} + +size_t S3QueueFilesMetadata::getProcessingIdForPath(const std::string & path) const +{ + return sipHash64(path) % getProcessingIdsNum(); +} + S3QueueFilesMetadata::ProcessingNodeHolderPtr S3QueueFilesMetadata::trySetFileAsProcessing(const std::string & path) { auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueSetFileProcessingMicroseconds); @@ -212,16 +332,24 @@ S3QueueFilesMetadata::ProcessingNodeHolderPtr S3QueueFilesMetadata::trySetFileAs std::lock_guard lock(file_status->metadata_lock); switch (file_status->state) { - case FileStatus::State::Processing: [[fallthrough]]; + case FileStatus::State::Processing: + { + LOG_TEST(log, "File {} is already processing", path); + return {}; + } case FileStatus::State::Processed: { + LOG_TEST(log, "File {} is already processed", path); return {}; } case FileStatus::State::Failed: { /// If max_loading_retries == 0, file is not retriable. if (max_loading_retries == 0) + { + LOG_TEST(log, "File {} is failed and processing retries are disabled", path); return {}; + } /// Otherwise file_status->retries is also cached. /// In case file_status->retries >= max_loading_retries we can fully rely that it is true @@ -230,7 +358,10 @@ S3QueueFilesMetadata::ProcessingNodeHolderPtr S3QueueFilesMetadata::trySetFileAs /// (another server could have done a try after we cached retries value), /// so check with zookeeper here. if (file_status->retries >= max_loading_retries) + { + LOG_TEST(log, "File {} is failed and processing retries are exceeeded", path); return {}; + } break; } @@ -284,35 +415,31 @@ S3QueueFilesMetadata::ProcessingNodeHolderPtr S3QueueFilesMetadata::trySetFileAs if (!file_status->processing_start_time) file_status->processing_start_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - break; + return processing_node_holder; } case SetFileProcessingResult::AlreadyProcessed: { std::lock_guard lock(file_status->metadata_lock); file_status->state = FileStatus::State::Processed; - break; + return {}; } case SetFileProcessingResult::AlreadyFailed: { std::lock_guard lock(file_status->metadata_lock); file_status->state = FileStatus::State::Failed; - break; + return {}; } case SetFileProcessingResult::ProcessingByOtherNode: { /// We cannot save any local state here, see comment above. - break; + return {}; } } - - if (result == SetFileProcessingResult::Success) - return processing_node_holder; - - return {}; } std::pair S3QueueFilesMetadata::trySetFileAsProcessingForUnorderedMode(const std::string & path, const FileStatusPtr & file_status) + S3QueueFilesMetadata::ProcessingNodeHolderPtr> +S3QueueFilesMetadata::trySetFileAsProcessingForUnorderedMode(const std::string & path, const FileStatusPtr & file_status) { /// In one zookeeper transaction do the following: /// 1. check that corresponding persistent nodes do not exist in processed/ and failed/; @@ -339,7 +466,8 @@ std::pair(node_metadata.processing_id, path, zookeeper_processing_path / node_name, file_status, zk_client); + auto holder = std::make_unique( + node_metadata.processing_id, path, zookeeper_processing_path / node_name, file_status, zk_client); return std::pair{SetFileProcessingResult::Success, std::move(holder)}; } @@ -362,7 +490,8 @@ std::pair S3QueueFilesMetadata::trySetFileAsProcessingForOrderedMode(const std::string & path, const FileStatusPtr & file_status) + S3QueueFilesMetadata::ProcessingNodeHolderPtr> +S3QueueFilesMetadata::trySetFileAsProcessingForOrderedMode(const std::string & path, const FileStatusPtr & file_status) { /// Same as for Unordered mode. /// The only difference is the check if the file is already processed. @@ -385,10 +514,15 @@ std::pairget(zookeeper_processed_path, &processed_node_stat); + auto processed_node = isShardedProcessing() + ? zookeeper_processed_path / toString(getProcessingIdForPath(path)) + : zookeeper_processed_path; + NodeMetadata processed_node_metadata; - if (!data.empty()) + Coordination::Stat processed_node_stat; + std::string data; + auto processed_node_exists = zk_client->tryGet(processed_node, data, &processed_node_stat); + if (processed_node_exists && !data.empty()) processed_node_metadata = NodeMetadata::fromString(data); auto max_processed_file_path = processed_node_metadata.file_path; @@ -403,13 +537,25 @@ std::pairtryMulti(requests, responses); if (code == Coordination::Error::ZOK) { - auto holder = std::make_unique(node_metadata.processing_id, path, zookeeper_processing_path / node_name, file_status, zk_client); + auto holder = std::make_unique( + node_metadata.processing_id, path, zookeeper_processing_path / node_name, file_status, zk_client); + + LOG_TEST(log, "File {} is ready to be processed", path); return std::pair{SetFileProcessingResult::Success, std::move(holder)}; } @@ -491,20 +637,31 @@ void S3QueueFilesMetadata::setFileProcessedForUnorderedMode(ProcessingNodeHolder "this could be a result of expired zookeeper session", path); } + void S3QueueFilesMetadata::setFileProcessedForOrderedMode(ProcessingNodeHolderPtr holder) +{ + auto processed_node_path = isShardedProcessing() + ? zookeeper_processed_path / toString(getProcessingIdForPath(holder->path)) + : zookeeper_processed_path; + + return setFileProcessedForOrderedModeImpl(holder->path, holder, processed_node_path); +} + +void S3QueueFilesMetadata::setFileProcessedForOrderedModeImpl( + const std::string & path, ProcessingNodeHolderPtr holder, const std::string & processed_node_path) { /// Update a persistent node in /processed and remove ephemeral node from /processing. - const auto & path = holder->path; const auto node_name = getNodeName(path); const auto node_metadata = createNodeMetadata(path).toString(); const auto zk_client = getZooKeeper(); + LOG_TEST(log, "Setting file `{}` as processed (at {})", path, processed_node_path); while (true) { std::string res; Coordination::Stat stat; - bool exists = zk_client->tryGet(zookeeper_processed_path, res, &stat); + bool exists = zk_client->tryGet(processed_node_path, res, &stat); Coordination::Requests requests; if (exists) { @@ -513,39 +670,41 @@ void S3QueueFilesMetadata::setFileProcessedForOrderedMode(ProcessingNodeHolderPt auto metadata = NodeMetadata::fromString(res); if (metadata.file_path >= path) { - /// Here we get in the case that maximum processed file is bigger than ours. - /// This is possible to achieve in case of parallel processing - /// but for local processing we explicitly disable parallel mode and do everything in a single thread - /// (see constructor of StorageS3Queue where s3queue_processing_threads_num is explicitly set to 1 in case of Ordered mode). - /// Nevertheless, in case of distributed processing we cannot do anything with parallelism. - /// What this means? - /// It means that in scenario "distributed processing + Ordered mode" - /// a setting s3queue_loading_retries will not work. It is possible to fix, it is in TODO. - - /// Return because there is nothing to change, - /// the max processed file is already bigger than ours. + LOG_TRACE(log, "File {} is already processed, current max processed file: {}", path, metadata.file_path); return; } } - requests.push_back(zkutil::makeSetRequest(zookeeper_processed_path, node_metadata, stat.version)); + requests.push_back(zkutil::makeSetRequest(processed_node_path, node_metadata, stat.version)); } else { - requests.push_back(zkutil::makeCreateRequest(zookeeper_processed_path, node_metadata, zkutil::CreateMode::Persistent)); + requests.push_back(zkutil::makeCreateRequest(processed_node_path, node_metadata, zkutil::CreateMode::Persistent)); } Coordination::Responses responses; - if (holder->remove(&requests, &responses)) + if (holder) { - LOG_TEST(log, "Moved file `{}` to processed", path); - if (max_loading_retries) - zk_client->tryRemove(zookeeper_failed_path / (node_name + ".retriable"), -1); - return; + if (holder->remove(&requests, &responses)) + { + LOG_TEST(log, "Moved file `{}` to processed", path); + if (max_loading_retries) + zk_client->tryRemove(zookeeper_failed_path / (node_name + ".retriable"), -1); + return; + } + } + else + { + auto code = zk_client->tryMulti(requests, responses); + if (code == Coordination::Error::ZOK) + return; } /// Failed to update max processed node, retry. if (!responses.empty() && responses[0]->error != Coordination::Error::ZOK) + { + LOG_TRACE(log, "Failed to update processed node ({}). Will retry.", magic_enum::enum_name(responses[0]->error)); continue; + } LOG_WARNING(log, "Cannot set file ({}) as processed since processing node " "does not exist with expected processing id does not exist, " @@ -554,6 +713,22 @@ void S3QueueFilesMetadata::setFileProcessedForOrderedMode(ProcessingNodeHolderPt } } +void S3QueueFilesMetadata::setFileProcessed(const std::string & path, size_t shard_id) +{ + if (mode != S3QueueMode::ORDERED) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Can set file as preprocessed only for Ordered mode"); + + if (isShardedProcessing()) + { + for (const auto & processor : getProcessingIdsForShard(shard_id)) + setFileProcessedForOrderedModeImpl(path, nullptr, zookeeper_processed_path / toString(processor)); + } + else + { + setFileProcessedForOrderedModeImpl(path, nullptr, zookeeper_processed_path); + } +} + void S3QueueFilesMetadata::setFileFailed(ProcessingNodeHolderPtr holder, const String & exception_message) { auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueSetFileFailedMicroseconds); @@ -689,7 +864,7 @@ S3QueueFilesMetadata::ProcessingNodeHolder::ProcessingNodeHolder( , path(path_) , zk_node_path(zk_node_path_) , processing_id(processing_id_) - , log(&Poco::Logger::get("ProcessingNodeHolder")) + , log(getLogger("ProcessingNodeHolder")) { } diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.h b/src/Storages/S3Queue/S3QueueFilesMetadata.h index f3be7c5c3a0d..9301ea7ceb8c 100644 --- a/src/Storages/S3Queue/S3QueueFilesMetadata.h +++ b/src/Storages/S3Queue/S3QueueFilesMetadata.h @@ -42,6 +42,7 @@ class S3QueueFilesMetadata ~S3QueueFilesMetadata(); void setFileProcessed(ProcessingNodeHolderPtr holder); + void setFileProcessed(const std::string & path, size_t shard_id); void setFileFailed(ProcessingNodeHolderPtr holder, const std::string & exception_message); @@ -80,6 +81,38 @@ class S3QueueFilesMetadata void deactivateCleanupTask(); + /// Should the table use sharded processing? + /// We use sharded processing for Ordered mode of S3Queue table. + /// It allows to parallelize processing within a single server + /// and to allow distributed processing. + bool isShardedProcessing() const; + + /// Register a new shard for processing. + /// Return a shard id of registered shard. + size_t registerNewShard(); + /// Register a new shard for processing by given id. + /// Throws exception if shard by this id is already registered. + void registerNewShard(size_t shard_id); + /// Unregister shard from keeper. + void unregisterShard(size_t shard_id); + bool isShardRegistered(size_t shard_id); + + /// Total number of processing ids. + /// A processing id identifies a single processing thread. + /// There might be several processing ids per shard. + size_t getProcessingIdsNum() const; + /// Get processing ids identified with requested shard. + std::vector getProcessingIdsForShard(size_t shard_id) const; + /// Check if given processing id belongs to a given shard. + bool isProcessingIdBelongsToShard(size_t id, size_t shard_id) const; + /// Get a processing id for processing thread by given thread id. + /// thread id is a value in range [0, threads_per_shard]. + size_t getIdForProcessingThread(size_t thread_id, size_t shard_id) const; + + /// Calculate which processing id corresponds to a given file path. + /// The file will be processed by a thread related to this processing id. + size_t getProcessingIdForPath(const std::string & path) const; + private: const S3QueueMode mode; const UInt64 max_set_size; @@ -87,13 +120,16 @@ class S3QueueFilesMetadata const UInt64 max_loading_retries; const size_t min_cleanup_interval_ms; const size_t max_cleanup_interval_ms; + const size_t shards_num; + const size_t threads_per_shard; const fs::path zookeeper_processing_path; const fs::path zookeeper_processed_path; const fs::path zookeeper_failed_path; + const fs::path zookeeper_shards_path; const fs::path zookeeper_cleanup_lock_path; - Poco::Logger * log; + LoggerPtr log; std::atomic_bool shutdown = false; BackgroundSchedulePool::TaskHolder task; @@ -104,6 +140,10 @@ class S3QueueFilesMetadata void setFileProcessedForOrderedMode(ProcessingNodeHolderPtr holder); void setFileProcessedForUnorderedMode(ProcessingNodeHolderPtr holder); + std::string getZooKeeperPathForShard(size_t shard_id) const; + + void setFileProcessedForOrderedModeImpl( + const std::string & path, ProcessingNodeHolderPtr holder, const std::string & processed_node_path); enum class SetFileProcessingResult { @@ -117,8 +157,7 @@ class S3QueueFilesMetadata struct NodeMetadata { - std::string file_path; - UInt64 last_processed_timestamp = 0; + std::string file_path; UInt64 last_processed_timestamp = 0; std::string last_exception; UInt64 retries = 0; std::string processing_id; /// For ephemeral processing node. @@ -169,7 +208,7 @@ class S3QueueFilesMetadata::ProcessingNodeHolder std::string zk_node_path; std::string processing_id; bool removed = false; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h index 66fe9b4ce319..c26e973a1c0b 100644 --- a/src/Storages/S3Queue/S3QueueSettings.h +++ b/src/Storages/S3Queue/S3QueueSettings.h @@ -22,6 +22,7 @@ class ASTStorage; M(UInt32, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \ M(UInt32, s3queue_processing_threads_num, 1, "Number of processing threads", 0) \ M(UInt32, s3queue_enable_logging_to_s3queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \ + M(String, s3queue_last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ M(UInt32, s3queue_tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \ M(UInt32, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ M(UInt32, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ @@ -29,6 +30,8 @@ class ASTStorage; M(UInt32, s3queue_tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \ M(UInt32, s3queue_cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ M(UInt32, s3queue_cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ + M(UInt32, s3queue_total_shards_num, 1, "Value 0 means disabled", 0) \ + M(UInt32, s3queue_current_shard_num, 0, "", 0) \ #define LIST_OF_S3QUEUE_SETTINGS(M, ALIAS) \ S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index 27bec039f96c..b4f5f957f763 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -28,6 +28,7 @@ namespace ErrorCodes { extern const int S3_ERROR; extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; } StorageS3QueueSource::S3QueueKeyWithInfo::S3QueueKeyWithInfo( @@ -42,33 +43,112 @@ StorageS3QueueSource::S3QueueKeyWithInfo::S3QueueKeyWithInfo( StorageS3QueueSource::FileIterator::FileIterator( std::shared_ptr metadata_, std::unique_ptr glob_iterator_, + size_t current_shard_, std::atomic & shutdown_called_) : metadata(metadata_) , glob_iterator(std::move(glob_iterator_)) , shutdown_called(shutdown_called_) + , log(&Poco::Logger::get("StorageS3QueueSource")) + , sharded_processing(metadata->isShardedProcessing()) + , current_shard(current_shard_) { + if (sharded_processing) + { + for (const auto & id : metadata->getProcessingIdsForShard(current_shard)) + sharded_keys.emplace(id, std::deque{}); + } } -StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next() +StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(size_t idx) { while (!shutdown_called) { - KeyWithInfoPtr val = glob_iterator->next(); + KeyWithInfoPtr val{nullptr}; + + { + std::unique_lock lk(sharded_keys_mutex, std::defer_lock); + if (sharded_processing) + { + /// To make sure order on keys in each shard in sharded_keys + /// we need to check sharded_keys and to next() under lock. + lk.lock(); + + if (auto it = sharded_keys.find(idx); it != sharded_keys.end()) + { + auto & keys = it->second; + if (!keys.empty()) + { + val = keys.front(); + keys.pop_front(); + } + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Processing id {} does not exist (Expected ids: {})", + idx, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); + } + } + + if (!val) + { + val = glob_iterator->next(); + if (val && sharded_processing) + { + const auto processing_id_for_key = metadata->getProcessingIdForPath(val->key); + if (idx != processing_id_for_key) + { + if (metadata->isProcessingIdBelongsToShard(processing_id_for_key, current_shard)) + { + LOG_TEST(log, "Putting key {} into queue of processor {} (total: {})", + val->key, processing_id_for_key, sharded_keys.size()); + + if (auto it = sharded_keys.find(idx); it != sharded_keys.end()) + { + it->second.push_back(val); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Processing id {} does not exist (Expected ids: {})", + idx, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); + } + } + continue; + } + } + } + } if (!val) return {}; if (shutdown_called) { - LOG_TEST(&Poco::Logger::get("StorageS3QueueSource"), "Shutdown was called, stopping file iterator"); + LOG_TEST(log, "Shutdown was called, stopping file iterator"); + return {}; + } + + auto processing_holder = metadata->trySetFileAsProcessing(val->key); + if (shutdown_called) + { + LOG_TEST(log, "Shutdown was called, stopping file iterator"); return {}; } - if (auto processing_holder = metadata->trySetFileAsProcessing(val->key); - processing_holder && !shutdown_called) + LOG_TEST(log, "Checking if can process key {} for processing_id {}", val->key, idx); + + if (processing_holder) { return std::make_shared(val->key, val->info, processing_holder); } + else if (sharded_processing + && metadata->getFileStatus(val->key)->state == S3QueueFilesMetadata::FileStatus::State::Processing) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "File {} is processing by someone else in sharded processing. " + "It is a bug", val->key); + } } return {}; } @@ -83,6 +163,7 @@ StorageS3QueueSource::StorageS3QueueSource( const Block & header_, std::unique_ptr internal_source_, std::shared_ptr files_metadata_, + size_t processing_id_, const S3QueueAction & action_, RemoveFileFunc remove_file_func_, const NamesAndTypesList & requested_virtual_columns_, @@ -91,11 +172,12 @@ StorageS3QueueSource::StorageS3QueueSource( const std::atomic & table_is_being_dropped_, std::shared_ptr s3_queue_log_, const StorageID & storage_id_, - Poco::Logger * log_) + LoggerPtr log_) : ISource(header_) , WithContext(context_) , name(std::move(name_)) , action(action_) + , processing_id(processing_id_) , files_metadata(files_metadata_) , internal_source(std::move(internal_source_)) , requested_virtual_columns(requested_virtual_columns_) @@ -123,7 +205,7 @@ void StorageS3QueueSource::lazyInitialize() if (initialized) return; - internal_source->lazyInitialize(); + internal_source->lazyInitialize(processing_id); reader = std::move(internal_source->reader); if (reader) reader_future = std::move(internal_source->reader_future); @@ -249,7 +331,7 @@ Chunk StorageS3QueueSource::generate() /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. internal_source->create_reader_pool.wait(); - reader_future = internal_source->createReaderAsync(); + reader_future = internal_source->createReaderAsync(processing_id); } return {}; diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index 542f8e8fd8c7..8fc7305ea085 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -38,12 +38,16 @@ class StorageS3QueueSource : public ISource, WithContext class FileIterator : public IIterator { public: - FileIterator(std::shared_ptr metadata_, std::unique_ptr glob_iterator_, std::atomic & shutdown_called_); + FileIterator( + std::shared_ptr metadata_, + std::unique_ptr glob_iterator_, + size_t current_shard_, + std::atomic & shutdown_called_); /// Note: /// List results in s3 are always returned in UTF-8 binary order. /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) - KeyWithInfoPtr next() override; + KeyWithInfoPtr next(size_t idx) override; size_t estimatedKeysCount() override; @@ -52,6 +56,12 @@ class StorageS3QueueSource : public ISource, WithContext const std::unique_ptr glob_iterator; std::atomic & shutdown_called; std::mutex mutex; + Poco::Logger * log; + + const bool sharded_processing; + const size_t current_shard; + std::unordered_map> sharded_keys; + std::mutex sharded_keys_mutex; }; StorageS3QueueSource( @@ -59,6 +69,7 @@ class StorageS3QueueSource : public ISource, WithContext const Block & header_, std::unique_ptr internal_source_, std::shared_ptr files_metadata_, + size_t processing_id_, const S3QueueAction & action_, RemoveFileFunc remove_file_func_, const NamesAndTypesList & requested_virtual_columns_, @@ -67,7 +78,7 @@ class StorageS3QueueSource : public ISource, WithContext const std::atomic & table_is_being_dropped_, std::shared_ptr s3_queue_log_, const StorageID & storage_id_, - Poco::Logger * log_); + LoggerPtr log_); ~StorageS3QueueSource() override; @@ -80,6 +91,7 @@ class StorageS3QueueSource : public ISource, WithContext private: const String name; const S3QueueAction action; + const size_t processing_id; const std::shared_ptr files_metadata; const std::shared_ptr internal_source; const NamesAndTypesList requested_virtual_columns; @@ -89,7 +101,7 @@ class StorageS3QueueSource : public ISource, WithContext const StorageID storage_id; RemoveFileFunc remove_file_func; - Poco::Logger * log; + LoggerPtr log; using ReaderHolder = StorageS3Source::ReaderHolder; ReaderHolder reader; diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp index 104f70224b63..3ee2594135dd 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp @@ -16,8 +16,22 @@ namespace DB namespace ErrorCodes { extern const int METADATA_MISMATCH; + extern const int BAD_ARGUMENTS; } +namespace +{ + S3QueueMode modeFromString(const std::string & mode) + { + if (mode == "ordered") + return S3QueueMode::ORDERED; + if (mode == "unordered") + return S3QueueMode::UNORDERED; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected S3Queue mode: {}", mode); + } +} + + S3QueueTableMetadata::S3QueueTableMetadata( const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings, @@ -28,10 +42,11 @@ S3QueueTableMetadata::S3QueueTableMetadata( mode = engine_settings.mode.toString(); s3queue_tracked_files_limit = engine_settings.s3queue_tracked_files_limit; s3queue_tracked_file_ttl_sec = engine_settings.s3queue_tracked_file_ttl_sec; + s3queue_total_shards_num = engine_settings.s3queue_total_shards_num; + s3queue_processing_threads_num = engine_settings.s3queue_processing_threads_num; columns = storage_metadata.getColumns().toString(); } - String S3QueueTableMetadata::toString() const { Poco::JSON::Object json; @@ -39,6 +54,8 @@ String S3QueueTableMetadata::toString() const json.set("mode", mode); json.set("s3queue_tracked_files_limit", s3queue_tracked_files_limit); json.set("s3queue_tracked_file_ttl_sec", s3queue_tracked_file_ttl_sec); + json.set("s3queue_total_shards_num", s3queue_total_shards_num); + json.set("s3queue_processing_threads_num", s3queue_processing_threads_num); json.set("format_name", format_name); json.set("columns", columns); @@ -58,6 +75,10 @@ void S3QueueTableMetadata::read(const String & metadata_str) s3queue_tracked_file_ttl_sec = json->getValue("s3queue_tracked_file_ttl_sec"); format_name = json->getValue("format_name"); columns = json->getValue("columns"); + if (json->has("s3queue_total_shards_num")) + s3queue_total_shards_num = json->getValue("s3queue_total_shards_num"); + if (json->has("s3queue_processing_threads_num")) + s3queue_processing_threads_num = json->getValue("s3queue_processing_threads_num"); } S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str) @@ -67,7 +88,6 @@ S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str) return metadata; } - void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const { if (after_processing != from_zk.after_processing) @@ -83,8 +103,8 @@ void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in engine mode. " "Stored in ZooKeeper: {}, local: {}", - DB::toString(from_zk.mode), - DB::toString(mode)); + from_zk.mode, + mode); if (s3queue_tracked_files_limit != from_zk.s3queue_tracked_files_limit) throw Exception( @@ -109,6 +129,28 @@ void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata "Stored in ZooKeeper: {}, local: {}", from_zk.format_name, format_name); + + if (modeFromString(mode) == S3QueueMode::ORDERED) + { + if (s3queue_processing_threads_num != from_zk.s3queue_processing_threads_num) + { + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in s3queue_processing_threads_num setting. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.s3queue_processing_threads_num, + s3queue_processing_threads_num); + } + if (s3queue_total_shards_num != from_zk.s3queue_total_shards_num) + { + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in s3queue_total_shards_num setting. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.s3queue_total_shards_num, + s3queue_total_shards_num); + } + } } void S3QueueTableMetadata::checkEquals(const S3QueueTableMetadata & from_zk) const diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h index f15665692c4b..30642869930f 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -23,6 +23,8 @@ struct S3QueueTableMetadata String mode; UInt64 s3queue_tracked_files_limit; UInt64 s3queue_tracked_file_ttl_sec; + UInt64 s3queue_total_shards_num; + UInt64 s3queue_processing_threads_num; S3QueueTableMetadata() = default; S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata); diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index bc33e8cf2a90..0723205b5446 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -75,14 +75,8 @@ namespace return zkutil::extractZooKeeperPath(result_zk_path, true); } - void checkAndAdjustSettings(S3QueueSettings & s3queue_settings, const Settings & settings, Poco::Logger * log) + void checkAndAdjustSettings(S3QueueSettings & s3queue_settings, const Settings & settings) { - if (s3queue_settings.mode == S3QueueMode::ORDERED && s3queue_settings.s3queue_processing_threads_num > 1) - { - LOG_WARNING(log, "Parallel processing is not yet supported for Ordered mode"); - s3queue_settings.s3queue_processing_threads_num = 1; - } - if (!s3queue_settings.s3queue_processing_threads_num) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `s3queue_processing_threads_num` cannot be set to zero"); @@ -110,7 +104,8 @@ StorageS3Queue::StorageS3Queue( const ConstraintsDescription & constraints_, const String & comment, ContextPtr context_, - std::optional format_settings_) + std::optional format_settings_, + ASTStorage * engine_args) : IStorage(table_id_) , WithContext(context_) , s3queue_settings(std::move(s3queue_settings_)) @@ -119,7 +114,7 @@ StorageS3Queue::StorageS3Queue( , configuration{configuration_} , format_settings(format_settings_) , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) - , log(&Poco::Logger::get("StorageS3Queue (" + table_id_.table_name + ")")) + , log(getLogger("StorageS3Queue (" + table_id_.table_name + ")")) { if (configuration.url.key.empty()) { @@ -134,7 +129,7 @@ StorageS3Queue::StorageS3Queue( throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); } - checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef(), log); + checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef()); configuration.update(context_); FormatFactory::instance().checkFormatName(configuration.format); @@ -160,19 +155,36 @@ StorageS3Queue::StorageS3Queue( LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); - /// Get metadata manager from S3QueueMetadataFactory, - /// it will increase the ref count for the metadata object. - /// The ref count is decreased when StorageS3Queue::drop() method is called. - files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings); try { createOrCheckMetadata(storage_metadata); } catch (...) { - S3QueueMetadataFactory::instance().remove(zk_path); throw; } + + /// Get metadata manager from S3QueueMetadataFactory, + /// it will increase the ref count for the metadata object. + /// The ref count is decreased when StorageS3Queue::drop() method is called. + files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings); + + if (files_metadata->isShardedProcessing()) + { + if (!s3queue_settings->s3queue_current_shard_num.changed) + { + s3queue_settings->s3queue_current_shard_num = static_cast(files_metadata->registerNewShard()); + engine_args->settings->changes.setSetting("s3queue_current_shard_num", s3queue_settings->s3queue_current_shard_num.value); + } + else if (!files_metadata->isShardRegistered(s3queue_settings->s3queue_current_shard_num)) + { + files_metadata->registerNewShard(s3queue_settings->s3queue_current_shard_num); + } + } + if (s3queue_settings->mode == S3QueueMode::ORDERED && !s3queue_settings->s3queue_last_processed_path.value.empty()) + { + files_metadata->setFileProcessed(s3queue_settings->s3queue_last_processed_path.value, s3queue_settings->s3queue_current_shard_num); + } } void StorageS3Queue::startup() @@ -186,6 +198,7 @@ void StorageS3Queue::shutdown(bool is_drop) table_is_being_dropped = is_drop; shutdown_called = true; + LOG_TRACE(log, "Shutting down storage..."); if (task) { task->deactivate(); @@ -194,8 +207,16 @@ void StorageS3Queue::shutdown(bool is_drop) if (files_metadata) { files_metadata->deactivateCleanupTask(); + + if (is_drop && files_metadata->isShardedProcessing()) + { + files_metadata->unregisterShard(s3queue_settings->s3queue_current_shard_num); + LOG_TRACE(log, "Unregistered shard {} from zookeeper", s3queue_settings->s3queue_current_shard_num); + } + files_metadata.reset(); } + LOG_TRACE(log, "Shut down storage"); } void StorageS3Queue::drop() @@ -220,14 +241,12 @@ class ReadFromS3Queue : public SourceStepWithFilter ReadFromFormatInfo info_, std::shared_ptr storage_, ContextPtr context_, - size_t max_block_size_, - size_t num_streams_) + size_t max_block_size_) : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) , info(std::move(info_)) , storage(std::move(storage_)) , context(std::move(context_)) , max_block_size(max_block_size_) - , num_streams(num_streams_) { } @@ -236,7 +255,6 @@ class ReadFromS3Queue : public SourceStepWithFilter std::shared_ptr storage; ContextPtr context; size_t max_block_size; - size_t num_streams; std::shared_ptr iterator; @@ -254,7 +272,7 @@ void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) void ReadFromS3Queue::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -270,7 +288,7 @@ void StorageS3Queue::read( ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - size_t num_streams) + size_t) { if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) { @@ -292,8 +310,7 @@ void StorageS3Queue::read( read_from_format_info, std::move(this_ptr), local_context, - max_block_size, - num_streams); + max_block_size); query_plan.addStep(std::move(reading)); } @@ -301,11 +318,15 @@ void StorageS3Queue::read( void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { Pipes pipes; - const size_t adjusted_num_streams = std::min(num_streams, storage->s3queue_settings->s3queue_processing_threads_num); + const size_t adjusted_num_streams = storage->s3queue_settings->s3queue_processing_threads_num; createIterator(nullptr); for (size_t i = 0; i < adjusted_num_streams; ++i) - pipes.emplace_back(storage->createSource(info, iterator, max_block_size, context)); + pipes.emplace_back(storage->createSource( + info, + iterator, + storage->files_metadata->getIdForProcessingThread(i, storage->s3queue_settings->s3queue_current_shard_num), + max_block_size, context)); auto pipe = Pipe::unitePipes(std::move(pipes)); if (pipe.empty()) @@ -320,6 +341,7 @@ void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const std::shared_ptr StorageS3Queue::createSource( const ReadFromFormatInfo & info, std::shared_ptr file_iterator, + size_t processing_id, size_t max_block_size, ContextPtr local_context) { @@ -359,7 +381,7 @@ std::shared_ptr StorageS3Queue::createSource( auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr; return std::make_shared( getName(), info.source_header, std::move(internal_source), - files_metadata, after_processing, file_deleter, info.requested_virtual_columns, + files_metadata, processing_id, after_processing, file_deleter, info.requested_virtual_columns, local_context, shutdown_called, table_is_being_dropped, s3_queue_log, getStorageID(), log); } @@ -463,7 +485,8 @@ bool StorageS3Queue::streamToViews() for (size_t i = 0; i < s3queue_settings->s3queue_processing_threads_num; ++i) { auto source = createSource( - read_from_format_info, file_iterator, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context); + read_from_format_info, file_iterator, files_metadata->getIdForProcessingThread(i, s3queue_settings->s3queue_current_shard_num), + DBMS_DEFAULT_BUFFER_SIZE, s3queue_context); pipes.emplace_back(std::move(source)); } @@ -566,7 +589,7 @@ std::shared_ptr StorageS3Queue::createFileIterator auto glob_iterator = std::make_unique( *configuration.client, configuration.url, predicate, virtual_columns, local_context, /* read_keys */nullptr, configuration.request_settings); - return std::make_shared(files_metadata, std::move(glob_iterator), shutdown_called); + return std::make_shared(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called); } void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) @@ -600,7 +623,7 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) if (user_format_settings.has(change.name)) user_format_settings.set(change.name, change.value); else - LOG_TRACE(&Poco::Logger::get("StorageS3"), "Remove: {}", change.name); + LOG_TRACE(getLogger("StorageS3"), "Remove: {}", change.name); args.storage_def->settings->changes.removeSetting(change.name); } @@ -624,7 +647,8 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) args.constraints, args.comment, args.getContext(), - format_settings); + format_settings, + args.storage_def); }, { .supports_settings = true, diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index 3d3594dc2ab3..fd3b4bb4914c 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace Aws::S3 @@ -35,7 +36,8 @@ class StorageS3Queue : public IStorage, WithContext const ConstraintsDescription & constraints_, const String & comment, ContextPtr context_, - std::optional format_settings_); + std::optional format_settings_, + ASTStorage * engine_args); String getName() const override { return "S3Queue"; } @@ -79,7 +81,7 @@ class StorageS3Queue : public IStorage, WithContext std::atomic shutdown_called = false; std::atomic table_is_being_dropped = false; - Poco::Logger * log; + LoggerPtr log; void startup() override; void shutdown(bool is_drop) override; @@ -91,6 +93,7 @@ class StorageS3Queue : public IStorage, WithContext std::shared_ptr createSource( const ReadFromFormatInfo & info, std::shared_ptr file_iterator, + size_t processing_id, size_t max_block_size, ContextPtr local_context); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index cd841a1a6733..01c31eab2b1e 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -707,7 +707,7 @@ class ReadFromAzureBlob : public SourceStepWithFilter void ReadFromAzureBlob::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 16e5b9edfb69..6fc3c5ce5920 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -319,7 +319,7 @@ class StorageAzureBlobSource : public ISource, WithContext ReaderHolder reader; - Poco::Logger * log = &Poco::Logger::get("StorageAzureBlobSource"); + LoggerPtr log = getLogger("StorageAzureBlobSource"); ThreadPool create_reader_pool; ThreadPoolCallbackRunner create_reader_scheduler; diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp index a6372577fb05..1d587512f38f 100644 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -38,7 +38,7 @@ StorageAzureBlobCluster::StorageAzureBlobCluster( const ConstraintsDescription & constraints_, ContextPtr context_, bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , configuration{configuration_} , object_storage(std::move(object_storage_)) { diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 6f4b1563a46b..d5c135bb81dc 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -137,7 +137,7 @@ StorageBuffer::StorageBuffer( , flush_thresholds(flush_thresholds_) , destination_id(destination_id_) , allow_materialized(allow_materialized_) - , log(&Poco::Logger::get("StorageBuffer (" + table_id_.getFullTableName() + ")")) + , log(getLogger("StorageBuffer (" + table_id_.getFullTableName() + ")")) , bg_pool(getContext()->getBufferFlushSchedulePool()) { StorageInMemoryMetadata storage_metadata; @@ -433,7 +433,7 @@ void StorageBuffer::read( } -static void appendBlock(Poco::Logger * log, const Block & from, Block & to) +static void appendBlock(LoggerPtr log, const Block & from, Block & to) { size_t rows = from.rows(); size_t old_rows = to.rows(); diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index ef646a125483..47f6239b1734 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -166,7 +166,7 @@ friend class BufferSink; Writes lifetime_writes; Writes total_writes; - Poco::Logger * log; + LoggerPtr log; void flushAllBuffers(bool check_thresholds = true); bool flushBuffer(Buffer & buffer, bool check_thresholds, bool locked = false); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 987ea4a4957f..f711ed5ac92a 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -329,7 +329,7 @@ StorageDistributed::StorageDistributed( , remote_database(remote_database_) , remote_table(remote_table_) , remote_table_function_ptr(remote_table_function_ptr_) - , log(&Poco::Logger::get("StorageDistributed (" + id_.table_name + ")")) + , log(getLogger("StorageDistributed (" + id_.table_name + ")")) , owned_cluster(std::move(owned_cluster_)) , cluster_name(getContext()->getMacros()->expand(cluster_name_)) , has_sharding_key(sharding_key_) @@ -779,18 +779,11 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, } else { - auto resolved_remote_storage_id = remote_storage_id; - // In case of cross-replication we don't know what database is used for the table. - // `storage_id.hasDatabase()` can return false only on the initiator node. - // Each shard will use the default database (in the case of cross-replication shards may have different defaults). - if (remote_storage_id.hasDatabase()) - resolved_remote_storage_id = query_context->resolveStorageID(remote_storage_id); - auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); auto column_names_and_types = distributed_storage_snapshot->getColumns(get_column_options); - auto storage = std::make_shared(resolved_remote_storage_id, ColumnsDescription{column_names_and_types}); + auto storage = std::make_shared(remote_storage_id, ColumnsDescription{column_names_and_types}); auto table_node = std::make_shared(std::move(storage), query_context); if (table_expression_modifiers) @@ -1102,7 +1095,7 @@ static ActionsDAGPtr getFilterFromQuery(const ASTPtr & ast, ContextPtr context) if (!source) return nullptr; - return ActionsDAG::buildFilterActionsDAG(source->getFilterNodes().nodes, {}, context); + return ActionsDAG::buildFilterActionsDAG(source->getFilterNodes().nodes); } @@ -1597,7 +1590,7 @@ ClusterPtr StorageDistributed::skipUnusedShardsWithAnalyzer( if (nodes.empty()) return nullptr; - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, {}, local_context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes); size_t limit = local_context->getSettingsRef().optimize_skip_unused_shards_limit; if (!limit || limit > SSIZE_MAX) diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index b7ed85e87df3..161a5983f941 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -238,7 +238,7 @@ class StorageDistributed final : public IStorage, WithContext String remote_table; ASTPtr remote_table_function_ptr; - Poco::Logger * log; + LoggerPtr log; /// Used to implement TableFunctionRemote. std::shared_ptr owned_cluster; diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 2acbf3f46106..e475211deb39 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -80,7 +80,7 @@ StorageExecutable::StorageExecutable( : IStorage(table_id_) , settings(settings_) , input_queries(input_queries_) - , log(settings.is_executable_pool ? &Poco::Logger::get("StorageExecutablePool") : &Poco::Logger::get("StorageExecutable")) + , log(settings.is_executable_pool ? getLogger("StorageExecutablePool") : getLogger("StorageExecutable")) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 37455385675c..2be2a84ab497 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -45,7 +45,7 @@ class StorageExecutable final : public IStorage private: ExecutableSettings settings; std::vector input_queries; - Poco::Logger * log; + LoggerPtr log; std::unique_ptr coordinator; }; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 9f864813de9a..0d9e79d1d54f 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -1040,7 +1041,7 @@ void StorageFileSource::beforeDestroy() catch (const std::exception & e) { // Cannot throw exception from destructor, will write only error - LOG_ERROR(&Poco::Logger::get("~StorageFileSource"), "Failed to rename file {}: {}", file_path_ref, e.what()); + LOG_ERROR(getLogger("~StorageFileSource"), "Failed to rename file {}: {}", file_path_ref, e.what()); continue; } } @@ -1352,7 +1353,7 @@ class ReadFromFile : public SourceStepWithFilter void ReadFromFile::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index b74868597a6c..2955eb0f1aac 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -163,7 +163,7 @@ class StorageFile final : public IStorage mutable std::shared_timed_mutex rwlock; - Poco::Logger * log = &Poco::Logger::get("StorageFile"); + LoggerPtr log = getLogger("StorageFile"); /// Total number of bytes to read (sums for multiple files in case of globs). Needed for progress bar. size_t total_bytes_to_read = 0; diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp index c12124f1e07e..0cc961bb464d 100644 --- a/src/Storages/StorageFileCluster.cpp +++ b/src/Storages/StorageFileCluster.cpp @@ -34,7 +34,7 @@ StorageFileCluster::StorageFileCluster( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageFileCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageFileCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , filename(filename_) , format_name(format_name_) , compression_method(compression_method_) diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index efe446a8ccd8..b9e082c0b224 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -104,7 +104,7 @@ void StorageJoin::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPt if (disk->exists(path)) disk->removeRecursive(path); else - LOG_INFO(&Poco::Logger::get("StorageJoin"), "Path {} is already removed from disk {}", path, disk->getName()); + LOG_INFO(getLogger("StorageJoin"), "Path {} is already removed from disk {}", path, disk->getName()); disk->createDirectories(path); disk->createDirectories(fs::path(path) / "tmp/"); diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 8914838afed3..80abaa3ea2d2 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -321,7 +321,7 @@ StorageKeeperMap::StorageKeeperMap( , primary_key(primary_key_) , zookeeper_name(zkutil::extractZooKeeperName(zk_root_path_)) , keys_limit(keys_limit_) - , log(&Poco::Logger::get(fmt::format("StorageKeeperMap ({})", table_id.getNameForLogs()))) + , log(getLogger(fmt::format("StorageKeeperMap ({})", table_id.getNameForLogs()))) { std::string path_prefix = context_->getConfigRef().getString("keeper_map_path_prefix", ""); if (path_prefix.empty()) @@ -776,7 +776,7 @@ void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collec auto with_retries = std::make_shared ( - &Poco::Logger::get(fmt::format("StorageKeeperMapBackup ({})", getStorageID().getNameForLogs())), + getLogger(fmt::format("StorageKeeperMapBackup ({})", getStorageID().getNameForLogs())), [&] { return getClient(); }, WithRetries::KeeperSettings::fromContext(backup_entries_collector.getContext()), backup_entries_collector.getContext()->getProcessListElement(), @@ -808,7 +808,7 @@ void StorageKeeperMap::restoreDataFromBackup(RestorerFromBackup & restorer, cons auto with_retries = std::make_shared ( - &Poco::Logger::get(fmt::format("StorageKeeperMapRestore ({})", getStorageID().getNameForLogs())), + getLogger(fmt::format("StorageKeeperMapRestore ({})", getStorageID().getNameForLogs())), [&] { return getClient(); }, WithRetries::KeeperSettings::fromContext(restorer.getContext()), restorer.getContext()->getProcessListElement(), diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index aa9687243d88..9dca96a24a36 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -146,7 +146,7 @@ class StorageKeeperMap final : public IStorage, public IKeyValueEntity, WithCont mutable std::mutex init_mutex; mutable std::optional table_is_valid; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 050e76c9205c..bfe75e61bcde 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -523,7 +523,7 @@ void StorageMaterializedView::backupData(BackupEntriesCollector & backup_entries if (auto table = tryGetTargetTable()) table->backupData(backup_entries_collector, data_path_in_backup, partitions); else - LOG_WARNING(&Poco::Logger::get("StorageMaterializedView"), + LOG_WARNING(getLogger("StorageMaterializedView"), "Inner table does not exist, will not backup any data"); } } diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 0011e3c57a23..02d81eda59a8 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -79,11 +79,11 @@ class MemorySink : public SinkToStorage for (const auto & elem : block) compressed_block.insert({ elem.column->compress(), elem.type, elem.name }); - new_blocks.emplace_back(compressed_block); + new_blocks.push_back(std::move(compressed_block)); } else { - new_blocks.emplace_back(block); + new_blocks.push_back(std::move(block)); } } @@ -472,9 +472,21 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat while (auto block = block_in.read()) { - new_bytes += block.bytes(); - new_rows += block.rows(); - new_blocks.push_back(std::move(block)); + if (compress) + { + Block compressed_block; + for (const auto & elem : block) + compressed_block.insert({ elem.column->compress(), elem.type, elem.name }); + + new_blocks.push_back(std::move(compressed_block)); + } + else + { + new_blocks.push_back(std::move(block)); + } + + new_bytes += new_blocks.back().bytes(); + new_rows += new_blocks.back().rows(); } } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 0d67403fa2f2..8d75382c91c9 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -982,7 +982,7 @@ StorageMerge::StorageListWithLocks ReadFromMerge::getSelectedTables( if (!filter_by_database_virtual_column && !filter_by_table_virtual_column) return res; - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); if (!filter_actions_dag) return res; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index fbdde15c2af9..0f75c726bcef 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -5,9 +5,9 @@ #include #include -#include #include #include +#include #include "Common/Exception.h" #include #include @@ -20,27 +20,30 @@ #include #include #include +#include #include -#include #include #include #include #include #include -#include #include +#include #include #include #include #include #include -#include -#include -#include +#include #include +#include +#include +#include +#include #include -#include #include +#include +#include #include #include #include @@ -65,6 +68,7 @@ namespace ErrorCodes extern const int NO_SUCH_DATA_PART; extern const int ABORTED; extern const int SUPPORT_IS_DISABLED; + extern const int TABLE_IS_READ_ONLY; } namespace ActionLocks @@ -74,7 +78,7 @@ namespace ActionLocks extern const StorageActionBlockType PartsMove; } -static MergeTreeTransactionPtr tryGetTransactionForMutation(const MergeTreeMutationEntry & mutation, Poco::Logger * log = nullptr) +static MergeTreeTransactionPtr tryGetTransactionForMutation(const MergeTreeMutationEntry & mutation, LoggerPtr log = nullptr) { assert(!mutation.tid.isEmpty()); if (mutation.tid.isPrehistoric()) @@ -294,6 +298,8 @@ std::optional StorageMergeTree::totalBytesUncompressed(const Settings &) SinkToStoragePtr StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) { + assertNotReadonly(); + const auto & settings = local_context->getSettingsRef(); return std::make_shared( *this, metadata_snapshot, settings.max_partitions_per_insert_block, local_context); @@ -319,9 +325,6 @@ void StorageMergeTree::checkTableCanBeDropped(ContextPtr query_context) const void StorageMergeTree::drop() { shutdown(true); - /// In case there is read-only disk we cannot allow to call dropAllData(), but dropping tables is allowed. - if (isStaticStorage()) - return; dropAllData(); } @@ -330,6 +333,8 @@ void StorageMergeTree::alter( ContextPtr local_context, AlterLockHolder & table_lock_holder) { + assertNotReadonly(); + if (local_context->getCurrentTransaction() && local_context->getSettingsRef().throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ALTER METADATA is not supported inside transactions"); @@ -620,6 +625,8 @@ void StorageMergeTree::setMutationCSN(const String & mutation_id, CSN csn) void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context) { + assertNotReadonly(); + delayMutationOrThrowIfNeeded(nullptr, query_context); /// Validate partition IDs (if any) before starting mutation @@ -683,7 +690,7 @@ std::optional StorageMergeTree::getIncompleteMutationsS const auto & mutation_entry = current_mutation_it->second; - auto txn = tryGetTransactionForMutation(mutation_entry, log); + auto txn = tryGetTransactionForMutation(mutation_entry, log.load()); /// There's no way a transaction may finish before a mutation that was started by the transaction. /// But sometimes we need to check status of an unrelated mutation, in this case we don't care about transactions. assert(txn || mutation_entry.tid.isPrehistoric() || from_another_mutation); @@ -810,6 +817,8 @@ std::vector StorageMergeTree::getMutationsStatus() cons CancellationCode StorageMergeTree::killMutation(const String & mutation_id) { + assertNotReadonly(); + LOG_TRACE(log, "Killing mutation {}", mutation_id); UInt64 mutation_version = MergeTreeMutationEntry::tryParseFileName(mutation_id); if (!mutation_version) @@ -829,7 +838,7 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id) if (!to_kill) return CancellationCode::NotFound; - if (auto txn = tryGetTransactionForMutation(*to_kill, log)) + if (auto txn = tryGetTransactionForMutation(*to_kill, log.load())) { LOG_TRACE(log, "Cancelling transaction {} which had started mutation {}", to_kill->tid, mutation_id); TransactionLog::instance().rollbackTransaction(txn); @@ -1222,7 +1231,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( if (!part->version.isVisible(first_mutation_tid.start_csn, first_mutation_tid)) continue; - txn = tryGetTransactionForMutation(mutations_begin_it->second, log); + txn = tryGetTransactionForMutation(mutations_begin_it->second, log.load()); if (!txn) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find transaction {} that has started mutation {} " "that is going to be applied to part {}", @@ -1520,6 +1529,8 @@ bool StorageMergeTree::optimize( bool cleanup, ContextPtr local_context) { + assertNotReadonly(); + if (deduplicate) { if (deduplicate_by_columns.empty()) @@ -1765,6 +1776,8 @@ void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_pa void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr query_context, TableExclusiveLockHolder &) { + assertNotReadonly(); + { /// Asks to complete merges and does not allow them to start. /// This protects against "revival" of data for a removed partition after completion of merge. @@ -2039,6 +2052,8 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, ContextPtr local_context) { + assertNotReadonly(); + auto lock1 = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); auto lock2 = source_table->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); auto merges_blocker = stopMergesAndWait(); @@ -2049,41 +2064,73 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con ProfileEventsScope profile_events_scope; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot); - String partition_id = getPartitionIDFromQuery(partition, local_context); + String partition_id = src_data.getPartitionIDFromQuery(partition, local_context); DataPartsVector src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id); + + bool attach_empty_partition = !replace && src_parts.empty(); + if (attach_empty_partition) + return; + MutableDataPartsVector dst_parts; std::vector dst_parts_locks; static const String TMP_PREFIX = "tmp_replace_from_"; - for (const DataPartPtr & src_part : src_parts) + const auto my_partition_expression = my_metadata_snapshot->getPartitionKeyAST(); + const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST(); + const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression); + + if (is_partition_exp_different && !src_parts.empty()) + MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_parts); + + for (DataPartPtr & src_part : src_parts) { if (!canReplacePartition(src_part)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot replace partition '{}' because part '{}' has inconsistent granularity with table", partition_id, src_part->name); + IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; /// This will generate unique name in scope of current server process. - Int64 temp_index = insert_increment.get(); - MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); + auto index = insert_increment.get(); - IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( - src_part, - TMP_PREFIX, - dst_part_info, - my_metadata_snapshot, - clone_params, - local_context->getReadSettings(), - local_context->getWriteSettings()); - dst_parts.emplace_back(std::move(dst_part)); - dst_parts_locks.emplace_back(std::move(part_lock)); - } + if (is_partition_exp_different) + { + auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart( + src_part, my_metadata_snapshot, local_context); + + auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( + src_part, + new_partition, + new_partition.getID(*this), + new_min_max_index, + TMP_PREFIX, + my_metadata_snapshot, + clone_params, + local_context, + index, + index); - /// ATTACH empty part set - if (!replace && dst_parts.empty()) - return; + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + else + { + MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); + + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( + src_part, + TMP_PREFIX, + dst_part_info, + my_metadata_snapshot, + clone_params, + local_context->getReadSettings(), + local_context->getWriteSettings()); + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + } MergeTreePartInfo drop_range; if (replace) @@ -2284,11 +2331,12 @@ std::optional StorageMergeTree::checkDataNext(DataValidationTasksPt { /// If the checksums file is not present, calculate the checksums and write them to disk. static constexpr auto checksums_path = "checksums.txt"; + bool noop; if (part->isStoredOnDisk() && !part->getDataPartStorage().exists(checksums_path)) { try { - auto calculated_checksums = checkDataPart(part, false); + auto calculated_checksums = checkDataPart(part, false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true); calculated_checksums.checkEqual(part->checksums, true); auto & part_mutable = const_cast(*part); @@ -2309,7 +2357,7 @@ std::optional StorageMergeTree::checkDataNext(DataValidationTasksPt { try { - checkDataPart(part, true); + checkDataPart(part, true, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true); return CheckResult(part->name, true, ""); } catch (...) @@ -2437,6 +2485,12 @@ PreparedSetsCachePtr StorageMergeTree::getPreparedSetsCache(Int64 mutation_id) return cache; } +void StorageMergeTree::assertNotReadonly() const +{ + if (isStaticStorage()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage"); +} + void StorageMergeTree::fillNewPartName(MutableDataPartPtr & part, DataPartsLock &) { part->info.min_block = part->info.max_block = increment.get(); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 51bf6aa42e72..359fa1d262d7 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -273,6 +273,8 @@ class StorageMergeTree final : public MergeTreeData PreparedSetsCachePtr getPreparedSetsCache(Int64 mutation_id); + void assertNotReadonly() const; + friend class MergeTreeSink; friend class MergeTreeData; friend class MergePlainMergeTreeTask; diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 76a439eabafc..da391909dff4 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -55,7 +55,7 @@ StorageMySQL::StorageMySQL( , on_duplicate_clause{on_duplicate_clause_} , mysql_settings(mysql_settings_) , pool(std::make_shared(pool_)) - , log(&Poco::Logger::get("StorageMySQL (" + table_id_.table_name + ")")) + , log(getLogger("StorageMySQL (" + table_id_.table_name + ")")) { StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index 5303117cf5c8..daabd66a5309 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -92,7 +92,7 @@ class StorageMySQL final : public IStorage, WithContext mysqlxx::PoolWithFailoverPtr pool; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 8fe2a161dba4..9379cb5a1c6c 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -72,7 +72,7 @@ StoragePostgreSQL::StoragePostgreSQL( , remote_table_schema(remote_table_schema_) , on_conflict(on_conflict_) , pool(std::move(pool_)) - , log(&Poco::Logger::get("StoragePostgreSQL (" + table_id_.table_name + ")")) + , log(getLogger("StoragePostgreSQL (" + table_id_.table_name + ")")) { StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index 725a935aa46c..1ed4f7a7611b 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -79,7 +79,7 @@ class StoragePostgreSQL final : public IStorage String on_conflict; postgres::PoolWithFailoverPtr pool; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index ddb1b62c7b03..83bb3c606c92 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -206,7 +206,7 @@ StorageRedis::StorageRedis( , WithContext(context_->getGlobalContext()) , table_id(table_id_) , configuration(configuration_) - , log(&Poco::Logger::get("StorageRedis")) + , log(getLogger("StorageRedis")) , primary_key(primary_key_) { pool = std::make_shared(configuration.pool_size); diff --git a/src/Storages/StorageRedis.h b/src/Storages/StorageRedis.h index a525a4ed7de9..a0eb2bfa580d 100644 --- a/src/Storages/StorageRedis.h +++ b/src/Storages/StorageRedis.h @@ -74,7 +74,7 @@ class StorageRedis : public IStorage, public IKeyValueEntity, WithContext StorageID table_id; RedisConfiguration configuration; - Poco::Logger * log; + LoggerPtr log; RedisPoolPtr pool; const String primary_key; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 715cbab9eeae..6bd57cc4d6d3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1,6 +1,5 @@ #include -#include #include #include @@ -26,22 +25,18 @@ #include -#include #include #include #include -#include #include -#include #include #include -#include #include -#include #include #include #include -#include +#include +#include #include #include #include @@ -53,9 +48,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -63,21 +60,16 @@ #include #include #include -#include #include -#include #include #include #include #include -#include -#include #include #include #include #include -#include #include #include @@ -105,9 +97,6 @@ #include #include -#include - -#include #include #include @@ -320,7 +309,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( attach, [this] (const std::string & name) { enqueuePartForCheck(name); }) , zookeeper_name(zkutil::extractZooKeeperName(zookeeper_path_)) - , zookeeper_path(zkutil::extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ !attach, log)) + , zookeeper_path(zkutil::extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ !attach, log.load())) , replica_name(replica_name_) , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_) , reader(*this) @@ -812,7 +801,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr else { auto metadata_drop_lock = zkutil::EphemeralNodeHolder::existing(drop_lock_path, *zookeeper); - if (!removeTableNodesFromZooKeeper(zookeeper, zookeeper_path, metadata_drop_lock, log)) + if (!removeTableNodesFromZooKeeper(zookeeper, zookeeper_path, metadata_drop_lock, log.load())) { /// Someone is recursively removing table right now, we cannot create new table until old one is removed continue; @@ -1096,6 +1085,8 @@ void StorageReplicatedMergeTree::drop() /// Table can be shut down, restarting thread is not active /// and calling StorageReplicatedMergeTree::getZooKeeper()/getAuxiliaryZooKeeper() won't suffice. zookeeper = getZooKeeperIfTableShutDown(); + /// Update zookeeper client, since existing may be expired, while ZooKeeper is required inside dropAllData(). + current_zookeeper = zookeeper; /// If probably there is metadata in ZooKeeper, we don't allow to drop the table. if (!zookeeper) @@ -1128,12 +1119,12 @@ void StorageReplicatedMergeTree::drop() if (lost_part_count > 0) LOG_INFO(log, "Dropping table with non-zero lost_part_count equal to {}", lost_part_count); } - dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings(), &has_metadata_in_zookeeper); + dropReplica(zookeeper, zookeeper_path, replica_name, log.load(), getSettings(), &has_metadata_in_zookeeper); } } void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, - Poco::Logger * logger, MergeTreeSettingsPtr table_settings, std::optional * has_metadata_out) + LoggerPtr logger, MergeTreeSettingsPtr table_settings, std::optional * has_metadata_out) { if (zookeeper->expired()) throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table was not dropped because ZooKeeper session has expired."); @@ -1251,7 +1242,7 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con } } -void StorageReplicatedMergeTree::dropReplica(const String & drop_zookeeper_path, const String & drop_replica, Poco::Logger * logger) +void StorageReplicatedMergeTree::dropReplica(const String & drop_zookeeper_path, const String & drop_replica, LoggerPtr logger) { zkutil::ZooKeeperPtr zookeeper = getZooKeeperIfTableShutDown(); @@ -1266,7 +1257,7 @@ void StorageReplicatedMergeTree::dropReplica(const String & drop_zookeeper_path, bool StorageReplicatedMergeTree::removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, - const String & zookeeper_path, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock, Poco::Logger * logger) + const String & zookeeper_path, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock, LoggerPtr logger) { bool completely_removed = false; @@ -2713,16 +2704,48 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( - part_desc->src_table_part, - TMP_PREFIX + "clone_", - part_desc->new_part_info, - metadata_snapshot, - clone_params, - getContext()->getReadSettings(), - getContext()->getWriteSettings()); - part_desc->res_part = std::move(res_part); - part_desc->temporary_part_lock = std::move(temporary_part_lock); + + const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST(); + const auto src_partition_expression = source_table->getInMemoryMetadataPtr()->getPartitionKeyAST(); + + const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression); + + if (is_partition_exp_different) + { + auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart( + part_desc->src_table_part, metadata_snapshot, getContext()); + + auto partition_id = new_partition.getID(*this); + + auto [res_part, temporary_part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( + part_desc->src_table_part, + new_partition, + partition_id, + new_min_max_index, + TMP_PREFIX + "clone_", + metadata_snapshot, + clone_params, + getContext(), + part_desc->new_part_info.min_block, + part_desc->new_part_info.max_block); + + part_desc->res_part = std::move(res_part); + part_desc->temporary_part_lock = std::move(temporary_part_lock); + } + else + { + auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( + part_desc->src_table_part, + TMP_PREFIX + "clone_", + part_desc->new_part_info, + metadata_snapshot, + clone_params, + getContext()->getReadSettings(), + getContext()->getWriteSettings()); + + part_desc->res_part = std::move(res_part); + part_desc->temporary_part_lock = std::move(temporary_part_lock); + } } else if (!part_desc->replica.empty()) { @@ -4181,7 +4204,7 @@ void StorageReplicatedMergeTree::startBeingLeader() return; } - zkutil::checkNoOldLeaders(log, *zookeeper, fs::path(zookeeper_path) / "leader_election"); + zkutil::checkNoOldLeaders(log.load(), *zookeeper, fs::path(zookeeper_path) / "leader_election"); LOG_INFO(log, "Became leader"); is_leader = true; @@ -4275,7 +4298,7 @@ void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(St auto zookeeper = getZooKeeperIfTableShutDown(); - auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log); + auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log.load()); if (unique_parts_set.empty()) { LOG_INFO(log, "Will not wait for unique parts to be fetched because we don't have any unique parts"); @@ -4316,7 +4339,7 @@ void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(St LOG_INFO(log, "Successfully waited all the parts"); } -std::set StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_) +std::set StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, LoggerPtr log_) { if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica_name_ / "is_active")) { @@ -4659,6 +4682,9 @@ bool StorageReplicatedMergeTree::fetchPart( zkutil::ZooKeeper::Ptr zookeeper_, bool try_fetch_shared) { + if (isStaticStorage()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage"); + auto zookeeper = zookeeper_ ? zookeeper_ : getZooKeeper(); const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); @@ -5496,6 +5522,8 @@ void StorageReplicatedMergeTree::assertNotReadonly() const { if (is_readonly) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode (replica path: {})", replica_path); + if (isStaticStorage()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage"); } @@ -5504,6 +5532,8 @@ SinkToStoragePtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, con if (!initialization_done) throw Exception(ErrorCodes::NOT_INITIALIZED, "Table is not initialized yet"); + if (isStaticStorage()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage"); /// If table is read-only because it doesn't have metadata in zk yet, then it's not possible to insert into it /// Without this check, we'll write data parts on disk, and afterwards will remove them since we'll fail to commit them into zk /// In case of remote storage like s3, it'll generate unnecessary PUT requests @@ -7853,11 +7883,22 @@ void StorageReplicatedMergeTree::replacePartitionFrom( ProfileEventsScope profile_events_scope; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); - String partition_id = getPartitionIDFromQuery(partition, query_context); + String partition_id = src_data.getPartitionIDFromQuery(partition, query_context); /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); + bool attach_empty_partition = !replace && src_all_parts.empty(); + if (attach_empty_partition) + return; + + const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST(); + const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST(); + const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression); + + if (is_partition_exp_different && !src_all_parts.empty()) + MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_all_parts); + LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); static const String TMP_PREFIX = "tmp_replace_from_"; @@ -7912,6 +7953,18 @@ void StorageReplicatedMergeTree::replacePartitionFrom( "Cannot replace partition '{}' because part '{}" "' has inconsistent granularity with table", partition_id, src_part->name); + IMergeTreeDataPart::MinMaxIndex min_max_index = *src_part->minmax_idx; + MergeTreePartition merge_tree_partition = src_part->partition; + + if (is_partition_exp_different) + { + auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart(src_part, metadata_snapshot, query_context); + + merge_tree_partition = new_partition; + min_max_index = new_min_max_index; + partition_id = merge_tree_partition.getID(*this); + } + String hash_hex = src_part->checksums.getTotalChecksumHex(); const bool is_duplicated_part = replaced_parts.contains(hash_hex); replaced_parts.insert(hash_hex); @@ -7930,27 +7983,52 @@ void StorageReplicatedMergeTree::replacePartitionFrom( continue; } - UInt64 index = lock->getNumber(); - MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + + UInt64 index = lock->getNumber(); + IDataPartStorage::ClonePartParams clone_params { .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( - src_part, - TMP_PREFIX, - dst_part_info, - metadata_snapshot, - clone_params, - query_context->getReadSettings(), - query_context->getWriteSettings()); + + if (is_partition_exp_different) + { + auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( + src_part, + merge_tree_partition, + partition_id, + min_max_index, + TMP_PREFIX, + metadata_snapshot, + clone_params, + query_context, + index, + index); + + dst_parts.emplace_back(dst_part); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + else + { + MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); + + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( + src_part, + TMP_PREFIX, + dst_part_info, + metadata_snapshot, + clone_params, + query_context->getReadSettings(), + query_context->getWriteSettings()); + + dst_parts.emplace_back(dst_part); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + src_parts.emplace_back(src_part); - dst_parts.emplace_back(dst_part); - dst_parts_locks.emplace_back(std::move(part_lock)); ephemeral_locks.emplace_back(std::move(*lock)); block_id_paths.emplace_back(block_id_path); part_checksums.emplace_back(hash_hex); @@ -8879,12 +8957,11 @@ IStorage::DataValidationTasksPtr StorageReplicatedMergeTree::getCheckTaskList( std::optional StorageReplicatedMergeTree::checkDataNext(DataValidationTasksPtr & check_task_list) { - if (auto part = assert_cast(check_task_list.get())->next()) { try { - return CheckResult(part_check_thread.checkPartAndFix(part->name)); + return part_check_thread.checkPartAndFix(part->name, /* recheck_after */nullptr, /* throw_on_broken_projection */true); } catch (const Exception & ex) { @@ -9348,7 +9425,7 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co return unlockSharedDataByID( part.getUniqueId(), shared_id, part.info, replica_name, - part.getDataPartStorage().getDiskType(), zookeeper, *getSettings(), log, zookeeper_path, format_version); + part.getDataPartStorage().getDiskType(), zookeeper, *getSettings(), log.load(), zookeeper_path, format_version); } namespace @@ -9364,7 +9441,7 @@ namespace /// But sometimes we need an opposite. When we deleting all_0_0_0_1 it can be non replicated to other replicas, so we are the only owner of this part. /// In this case when we will drop all_0_0_0_1 we will drop blobs for all_0_0_0. But it will lead to dataloss. For such case we need to check that other replicas /// still need parent part. -std::pair getParentLockedBlobs(const ZooKeeperWithFaultInjectionPtr & zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const MergeTreePartInfo & part_info, MergeTreeDataFormatVersion format_version, Poco::Logger * log) +std::pair getParentLockedBlobs(const ZooKeeperWithFaultInjectionPtr & zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const MergeTreePartInfo & part_info, MergeTreeDataFormatVersion format_version, LoggerPtr log) { NameSet files_not_to_remove; @@ -9455,7 +9532,7 @@ std::pair getParentLockedBlobs(const ZooKeeperWithFaultInjectionP std::pair StorageReplicatedMergeTree::unlockSharedDataByID( String part_id, const String & table_uuid, const MergeTreePartInfo & part_info, const String & replica_name_, const std::string & disk_type, const ZooKeeperWithFaultInjectionPtr & zookeeper_ptr, const MergeTreeSettings & settings, - Poco::Logger * logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version) + LoggerPtr logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version) { boost::replace_all(part_id, "/", "_"); @@ -10143,7 +10220,7 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( size_t failed_op = zkutil::getFailedOpIndex(error, responses); if (ops[failed_op]->getPath() == zookeeper_node) { - LOG_WARNING(&Poco::Logger::get("ZeroCopyLocks"), "Replacing persistent lock with ephemeral for path {}. It can happen only in case of local part loss", zookeeper_node); + LOG_WARNING(getLogger("ZeroCopyLocks"), "Replacing persistent lock with ephemeral for path {}. It can happen only in case of local part loss", zookeeper_node); replace_existing_lock = true; continue; } @@ -10201,7 +10278,7 @@ bool StorageReplicatedMergeTree::removeSharedDetachedPart(DiskPtr disk, const St detached_replica_name, disk->getDataSourceDescription().toString(), std::make_shared(zookeeper), local_context->getReplicatedMergeTreeSettings(), - &Poco::Logger::get("StorageReplicatedMergeTree"), + getLogger("StorageReplicatedMergeTree"), detached_zookeeper_path, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING); @@ -10301,7 +10378,7 @@ void StorageReplicatedMergeTree::backupData( bool exists = false; Strings mutation_ids; { - ZooKeeperRetriesControl retries_ctl("getMutations", log, zookeeper_retries_info, nullptr); + ZooKeeperRetriesControl retries_ctl("getMutations", log.load(), zookeeper_retries_info, nullptr); retries_ctl.retryLoop([&]() { if (!zookeeper || zookeeper->expired()) @@ -10320,7 +10397,7 @@ void StorageReplicatedMergeTree::backupData( bool mutation_id_exists = false; String mutation; - ZooKeeperRetriesControl retries_ctl("getMutation", log, zookeeper_retries_info, nullptr); + ZooKeeperRetriesControl retries_ctl("getMutation", log.load(), zookeeper_retries_info, nullptr); retries_ctl.retryLoop([&]() { if (!zookeeper || zookeeper->expired()) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 3c3c2f56fe2d..c682b1ec88d0 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -255,13 +255,13 @@ class StorageReplicatedMergeTree final : public MergeTreeData /** Remove a specific replica from zookeeper. */ static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, - Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional * has_metadata_out = nullptr); + LoggerPtr logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional * has_metadata_out = nullptr); - void dropReplica(const String & drop_zookeeper_path, const String & drop_replica, Poco::Logger * logger); + void dropReplica(const String & drop_zookeeper_path, const String & drop_replica, LoggerPtr logger); /// Removes table from ZooKeeper after the last replica was dropped static bool removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, - const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock, Poco::Logger * logger); + const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock, LoggerPtr logger); /// Schedules job to execute in background pool (merge, mutate, drop range and so on) bool scheduleDataProcessingJob(BackgroundJobsAssignee & assignee) override; @@ -308,7 +308,7 @@ class StorageReplicatedMergeTree final : public MergeTreeData const std::string & disk_type, const ZooKeeperWithFaultInjectionPtr & zookeeper_, const MergeTreeSettings & settings, - Poco::Logger * logger, + LoggerPtr logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version); @@ -773,7 +773,7 @@ class StorageReplicatedMergeTree final : public MergeTreeData String findReplicaHavingCoveringPart(LogEntry & entry, bool active); bool findReplicaHavingCoveringPart(const String & part_name, bool active); String findReplicaHavingCoveringPartImplLowLevel(LogEntry * entry, const String & part_name, String & found_part_name, bool active); - static std::set findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_); + static std::set findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, LoggerPtr log_); /** Download the specified part from the specified replica. * If `to_detached`, the part is placed in the `detached` directory. diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index c376af5a3d7b..4fde6fd04f37 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -244,7 +244,7 @@ class StorageS3Source::DisclosedGlobIterator::Impl : WithContext fillInternalBufferAssumeLocked(); } - KeyWithInfoPtr next() + KeyWithInfoPtr next(size_t) { std::lock_guard lock(mutex); return nextAssumeLocked(); @@ -436,9 +436,9 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( { } -StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next() +StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next(size_t idx) /// NOLINT { - return pimpl->next(); + return pimpl->next(idx); } size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount() @@ -471,7 +471,7 @@ class StorageS3Source::KeysIterator::Impl } } - KeyWithInfoPtr next() + KeyWithInfoPtr next(size_t) { size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= keys.size()) @@ -516,9 +516,9 @@ StorageS3Source::KeysIterator::KeysIterator( { } -StorageS3Source::KeyWithInfoPtr StorageS3Source::KeysIterator::next() +StorageS3Source::KeyWithInfoPtr StorageS3Source::KeysIterator::next(size_t idx) /// NOLINT { - return pimpl->next(); + return pimpl->next(idx); } size_t StorageS3Source::KeysIterator::estimatedKeysCount() @@ -545,7 +545,7 @@ StorageS3Source::ReadTaskIterator::ReadTaskIterator( buffer.emplace_back(std::make_shared(key_future.get(), std::nullopt)); } -StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next() +StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) /// NOLINT { size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= buffer.size()) @@ -599,23 +599,23 @@ StorageS3Source::StorageS3Source( { } -void StorageS3Source::lazyInitialize() +void StorageS3Source::lazyInitialize(size_t idx) { if (initialized) return; - reader = createReader(); + reader = createReader(idx); if (reader) - reader_future = createReaderAsync(); + reader_future = createReaderAsync(idx); initialized = true; } -StorageS3Source::ReaderHolder StorageS3Source::createReader() +StorageS3Source::ReaderHolder StorageS3Source::createReader(size_t idx) { KeyWithInfoPtr key_with_info; do { - key_with_info = (*file_iterator)(); + key_with_info = file_iterator->next(idx); if (!key_with_info || key_with_info->key.empty()) return {}; @@ -689,9 +689,9 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader() return ReaderHolder{key_with_info, bucket, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; } -std::future StorageS3Source::createReaderAsync() +std::future StorageS3Source::createReaderAsync(size_t idx) { - return create_reader_scheduler([this] { return createReader(); }, Priority{}); + return create_reader_scheduler([=, this] { return createReader(idx); }, Priority{}); } std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & key, size_t object_size) @@ -1160,7 +1160,7 @@ void StorageS3::read( void ReadFromStorageS3Step::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, local_context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -1194,7 +1194,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const size_t max_threads = local_context->getSettingsRef().max_threads; const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); - LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads); + LOG_DEBUG(getLogger("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads); Pipes pipes; pipes.reserve(num_streams); @@ -1347,7 +1347,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, } for (const auto & error : response.GetResult().GetErrors()) - LOG_WARNING(&Poco::Logger::get("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); + LOG_WARNING(getLogger("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); } StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(ContextPtr local_context) diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index b90a0d394cb2..81a03cc5ad56 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -61,7 +61,7 @@ class StorageS3Source : public SourceWithKeyCondition, WithContext { public: virtual ~IIterator() = default; - virtual KeyWithInfoPtr next() = 0; + virtual KeyWithInfoPtr next(size_t idx = 0) = 0; /// NOLINT /// Estimates how many streams we need to process all files. /// If keys count >= max_threads_count, the returned number may not represent the actual number of the keys. @@ -85,7 +85,7 @@ class StorageS3Source : public SourceWithKeyCondition, WithContext const S3Settings::RequestSettings & request_settings_ = {}, std::function progress_callback_ = {}); - KeyWithInfoPtr next() override; + KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT size_t estimatedKeysCount() override; private: @@ -106,7 +106,7 @@ class StorageS3Source : public SourceWithKeyCondition, WithContext KeysWithInfo * read_keys = nullptr, std::function progress_callback_ = {}); - KeyWithInfoPtr next() override; + KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT size_t estimatedKeysCount() override; private: @@ -120,7 +120,7 @@ class StorageS3Source : public SourceWithKeyCondition, WithContext public: explicit ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count); - KeyWithInfoPtr next() override; + KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT size_t estimatedKeysCount() override; private: @@ -242,7 +242,7 @@ class StorageS3Source : public SourceWithKeyCondition, WithContext size_t max_parsing_threads = 1; bool need_only_count; - Poco::Logger * log = &Poco::Logger::get("StorageS3Source"); + LoggerPtr log = getLogger("StorageS3Source"); ThreadPool create_reader_pool; ThreadPoolCallbackRunner create_reader_scheduler; @@ -253,11 +253,11 @@ class StorageS3Source : public SourceWithKeyCondition, WithContext /// Notice: we should initialize reader and future_reader lazily in generate to make sure key_condition /// is set before createReader is invoked for key_condition is read in createReader. - void lazyInitialize(); + void lazyInitialize(size_t idx = 0); /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(); - std::future createReaderAsync(); + ReaderHolder createReader(size_t idx = 0); + std::future createReaderAsync(size_t idx = 0); std::unique_ptr createS3ReadBuffer(const String & key, size_t object_size); std::unique_ptr createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size); diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index e1738056e9d7..25c2b42b766f 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -40,7 +40,7 @@ StorageS3Cluster::StorageS3Cluster( const ConstraintsDescription & constraints_, ContextPtr context_, bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageS3Cluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageS3Cluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , s3_configuration{configuration_} { context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri); diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index d5db5763da91..85c5e16a1bf9 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -42,7 +42,7 @@ StorageSQLite::StorageSQLite( , remote_table_name(remote_table_name_) , database_path(database_path_) , sqlite_db(sqlite_db_) - , log(&Poco::Logger::get("StorageSQLite (" + table_id_.table_name + ")")) + , log(getLogger("StorageSQLite (" + table_id_.table_name + ")")) { StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h index 9da040cbd5c5..baacdfb48997 100644 --- a/src/Storages/StorageSQLite.h +++ b/src/Storages/StorageSQLite.h @@ -50,7 +50,7 @@ class StorageSQLite final : public IStorage, public WithContext String remote_table_name; String database_path; SQLitePtr sqlite_db; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 1b0db1da8005..7d7f3113cdbd 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -218,7 +218,7 @@ void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_sn if (disk->exists(path)) disk->removeRecursive(path); else - LOG_INFO(&Poco::Logger::get("StorageSet"), "Path {} is already removed from disk {}", path, disk->getName()); + LOG_INFO(getLogger("StorageSet"), "Path {} is already removed from disk {}", path, disk->getName()); disk->createDirectories(path); disk->createDirectories(fs::path(path) / "tmp/"); @@ -284,7 +284,7 @@ void StorageSetOrJoinBase::restoreFromFile(const String & file_path) finishInsert(); /// TODO Add speed, compressed bytes, data volume in memory, compression ratio ... Generalize all statistics logging in project. - LOG_INFO(&Poco::Logger::get("StorageSetOrJoinBase"), "Loaded from backup file {}. {} rows, {}. State has {} unique rows.", + LOG_INFO(getLogger("StorageSetOrJoinBase"), "Loaded from backup file {}. {} rows, {}. State has {} unique rows.", file_path, info.rows, ReadableSize(info.bytes), getSize(ctx)); } diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 91f6246d1018..359f142949f2 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -277,7 +277,7 @@ StorageStripeLog::StorageStripeLog( , index_file_path(table_path + "index.mrk") , file_checker(disk, table_path + "sizes.json") , max_compress_block_size(context_->getSettings().max_compress_block_size) - , log(&Poco::Logger::get("StorageStripeLog")) + , log(getLogger("StorageStripeLog")) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index a05117a9ad59..c7f3e7e21e6d 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -123,7 +123,7 @@ friend class StripeLogSink; mutable std::shared_timed_mutex rwlock; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 0ba72af6fc08..433f4ed77001 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -930,7 +930,7 @@ class ReadFromURL : public SourceStepWithFilter void ReadFromURL::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -1284,7 +1284,7 @@ StorageURLWithFailover::StorageURLWithFailover( { Poco::URI poco_uri(uri_option); context_->getRemoteHostFilter().checkURL(poco_uri); - LOG_DEBUG(&Poco::Logger::get("StorageURLDistributed"), "Adding URL option: {}", uri_option); + LOG_DEBUG(getLogger("StorageURLDistributed"), "Adding URL option: {}", uri_option); uri_options.emplace_back(uri_option); } } diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index a0b5fcd6f285..2365887983d7 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -45,7 +45,7 @@ StorageURLCluster::StorageURLCluster( const ConstraintsDescription & constraints_, const StorageURL::Configuration & configuration_, bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageURLCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageURLCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , uri(uri_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index a274b1ba4db8..259abefb00fd 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -45,7 +45,7 @@ StorageXDBC::StorageXDBC( , bridge_helper(bridge_helper_) , remote_database_name(remote_database_name_) , remote_table_name(remote_table_name_) - , log(&Poco::Logger::get("Storage" + bridge_helper->getName())) + , log(getLogger("Storage" + bridge_helper->getName())) { uri = bridge_helper->getMainURI().toString(); } diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index fe678785dc28..cba15a832267 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -47,7 +47,7 @@ class StorageXDBC : public IStorageURLBase std::string remote_database_name; std::string remote_table_name; - Poco::Logger * log; + LoggerPtr log; std::string getReadMethod() const override; diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index fbd5afd32745..b5a985fec9bf 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -109,6 +109,7 @@ const char * auto_contributors[] { "Ali Demirci", "Aliaksandr Pliutau", "Aliaksandr Shylau", + "Aliaksei Khatskevich", "Alina Terekhova", "Amesaru", "Amila Welihinda", @@ -179,6 +180,7 @@ const char * auto_contributors[] { "Arsen Hakobyan", "Arslan G", "ArtCorp", + "Artem Alperin", "Artem Andreenko", "Artem Gavrilov", "Artem Hnilov", @@ -223,7 +225,9 @@ const char * auto_contributors[] { "Bill", "Bin Xie", "BiteTheDDDDt", + "Blacksmith", "BlahGeek", + "Blargian", "Bo Lu", "Bogdan", "Bogdan Voronin", @@ -373,6 +377,7 @@ const char * auto_contributors[] { "Evgeny Kruglov", "Evgeny Markov", "Ewout", + "Eyal Halpern Shalev", "FArthur-cmd", "FFFFFFFHHHHHHH", "FFish", @@ -513,6 +518,7 @@ const char * auto_contributors[] { "Javi santana bot", "JaySon", "JaySon-Huang", + "Jayme Bird", "Jean Baptiste Favre", "Jeffrey Dang", "Jens Hoevenaars", @@ -613,6 +619,7 @@ const char * auto_contributors[] { "Lewinma", "Li Shuai", "Li Yin", + "Lino Uruñuela", "Lirikl", "Liu Cong", "LiuCong", @@ -636,6 +643,7 @@ const char * auto_contributors[] { "MagiaGroz", "Maks Skorokhod", "Maksim", + "Maksim Alekseev", "Maksim Buren", "Maksim Fedotov", "Maksim Kita", @@ -653,6 +661,7 @@ const char * auto_contributors[] { "Mariano Benítez Mulet", "Mark Andreev", "Mark Frost", + "Mark Needham", "Mark Papadakis", "Mark Polokhov", "Maroun Maroun", @@ -662,6 +671,7 @@ const char * auto_contributors[] { "Martijn Bakker", "Marvin Taschenberger", "Masha", + "Mathieu Rey", "Matthew Peveler", "Matwey V. Kornilov", "Max", @@ -733,6 +743,7 @@ const char * auto_contributors[] { "Mingliang Pan", "Misko Lee", "Misz606", + "MochiXu", "Mohamad Fadhil", "Mohammad Arab Anvari", "Mohammad Hossein Sekhavat", @@ -780,6 +791,7 @@ const char * auto_contributors[] { "Nikolai Sorokin", "Nikolay", "Nikolay Degterinsky", + "Nikolay Edigaryev", "Nikolay Kirsh", "Nikolay Semyachkin", "Nikolay Shcheglov", @@ -876,6 +888,7 @@ const char * auto_contributors[] { "Roman Bug", "Roman Chyrva", "Roman G", + "Roman Glinskikh", "Roman Heinrich", "Roman Lipovsky", "Roman Nikolaev", @@ -948,6 +961,7 @@ const char * auto_contributors[] { "Seyed Mehrshad Hosseini", "Shane Andrade", "Shani Elharrar", + "Shaun Struwig", "Sherry Wang", "Shoh Jahon", "Shri Bodas", @@ -1015,6 +1029,7 @@ const char * auto_contributors[] { "Tian Xinhui", "Tiaonmmn", "Tigran Khudaverdyan", + "Tim Liou", "Tim Windelschmidt", "Timur Magomedov", "Timur Solodovnikov", @@ -1109,6 +1124,7 @@ const char * auto_contributors[] { "Wang Fenjin", "WangZengrui", "Wangyang Guo", + "Waterkin", "Weiqing Xu", "William Shallum", "Winter Zhang", @@ -1152,6 +1168,7 @@ const char * auto_contributors[] { "Yury Stankevich", "Yusuke Tanaka", "Zach Naimon", + "Zheng Miao", "ZhiYong Wang", "Zhichang Yu", "Zhichun Wu", @@ -1213,6 +1230,7 @@ const char * auto_contributors[] { "attack204", "auxten", "avasiliev", + "avinzhang", "avogar", "avoiderboi", "avsharapov", @@ -1253,6 +1271,7 @@ const char * auto_contributors[] { "chengy8934", "chenjian", "chenqi", + "chenwei", "chenxing-xc", "chenxing.xc", "chertus", @@ -1301,6 +1320,7 @@ const char * auto_contributors[] { "ducle.canh", "eaxdev", "edef", + "edpyt", "eejoin", "egatov", "ekrasikov", @@ -1540,6 +1560,7 @@ const char * auto_contributors[] { "mlkui", "mnkonkova", "mo-avatar", + "mochi", "monchickey", "morty", "moscas", @@ -1671,6 +1692,7 @@ const char * auto_contributors[] { "sundy-li", "sundyli", "sunlisheng", + "sunny19930321", "svladykin", "tai", "taichong", diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 0ffed6c97718..51ecb8f17cae 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -54,7 +54,7 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database return {}; guard.reset(); - LOG_TRACE(&Poco::Logger::get("StorageSystemDatabases"), "Failed to lock database {} ({}), will retry", name, database->getUUID()); + LOG_TRACE(getLogger("StorageSystemDatabases"), "Failed to lock database {} ({}), will retry", name, database->getUUID()); } ASTPtr ast = database->getCreateDatabaseQuery(); diff --git a/src/Storages/System/StorageSystemJemalloc.cpp b/src/Storages/System/StorageSystemJemalloc.cpp index 9c3a075b2c15..15543208dd9e 100644 --- a/src/Storages/System/StorageSystemJemalloc.cpp +++ b/src/Storages/System/StorageSystemJemalloc.cpp @@ -77,7 +77,7 @@ void fillJemallocBins(MutableColumns & res_columns) void fillJemallocBins(MutableColumns &) { - LOG_INFO(&Poco::Logger::get("StorageSystemJemallocBins"), "jemalloc is not enabled"); + LOG_INFO(getLogger("StorageSystemJemallocBins"), "jemalloc is not enabled"); } #endif // USE_JEMALLOC diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index 016705f4e663..b1494f2ba98d 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -83,7 +83,11 @@ StorageSystemProjectionParts::StorageSystemProjectionParts(const StorageID & tab {"rows_where_ttl_info.expression", std::make_shared(std::make_shared())}, {"rows_where_ttl_info.min", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.max", std::make_shared(std::make_shared())} + {"rows_where_ttl_info.max", std::make_shared(std::make_shared())}, + + {"is_broken", std::make_shared()}, + {"exception_code", std::make_shared()}, + {"exception", std::make_shared()}, } ) { @@ -272,12 +276,38 @@ void StorageSystemProjectionParts::processNextStorage( add_ttl_info_map(part->ttl_infos.moves_ttl); if (columns_mask[src_index++]) - columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc())); + { + if (part->default_codec) + columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc())); + else + columns[res_index++]->insertDefault(); + } add_ttl_info_map(part->ttl_infos.recompression_ttl); add_ttl_info_map(part->ttl_infos.group_by_ttl); add_ttl_info_map(part->ttl_infos.rows_where_ttl); + { + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->is_broken.load(std::memory_order_relaxed)); + + if (part->is_broken) + { + std::lock_guard lock(part->broken_reason_mutex); + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->exception_code); + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->exception); + } + else + { + if (columns_mask[src_index++]) + columns[res_index++]->insertDefault(); + if (columns_mask[src_index++]) + columns[res_index++]->insertDefault(); + } + } + /// _state column should be the latest. /// Do not use part->getState*, it can be changed from different thread if (has_state_column) diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index d9a120954434..eeb3db342b49 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -56,12 +56,12 @@ class StatusRequestsPool /// Used to assign unique incremental ids to requests. UInt64 request_id TSA_GUARDED_BY(mutex) = 0; - Poco::Logger * log; + LoggerPtr log; public: explicit StatusRequestsPool(size_t max_threads) : thread_pool(CurrentMetrics::SystemReplicasThreads, CurrentMetrics::SystemReplicasThreadsActive, CurrentMetrics::SystemReplicasThreadsScheduled, max_threads) - , log(&Poco::Logger::get("StatusRequestsPool")) + , log(getLogger("StatusRequestsPool")) {} ~StatusRequestsPool() diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index e02d4bf1733b..82a5fd4e33f6 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -173,7 +173,7 @@ bool wait(int timeout_ms) } using ThreadIdToName = std::unordered_map>; -ThreadIdToName getFilteredThreadNames(const ActionsDAG::Node * predicate, ContextPtr context, const PaddedPODArray & thread_ids, Poco::Logger * log) +ThreadIdToName getFilteredThreadNames(const ActionsDAG::Node * predicate, ContextPtr context, const PaddedPODArray & thread_ids, LoggerPtr log) { ThreadIdToName tid_to_name; MutableColumnPtr all_thread_names = ColumnString::create(); @@ -274,7 +274,7 @@ bool isSignalBlocked(UInt64 tid, int signal) class StackTraceSource : public ISource { public: - StackTraceSource(const Names & column_names, Block header_, ASTPtr && query_, ActionsDAGPtr && filter_dag_, ContextPtr context_, UInt64 max_block_size_, Poco::Logger * log_) + StackTraceSource(const Names & column_names, Block header_, ASTPtr && query_, ActionsDAGPtr && filter_dag_, ContextPtr context_, UInt64 max_block_size_, LoggerPtr log_) : ISource(header_) , context(context_) , header(std::move(header_)) @@ -426,7 +426,7 @@ class StackTraceSource : public ISource bool send_signal = false; bool read_thread_names = false; - Poco::Logger * log; + LoggerPtr log; std::filesystem::directory_iterator proc_it; std::filesystem::directory_iterator end; @@ -463,7 +463,7 @@ class ReadFromSystemStackTrace : public SourceStepWithFilter void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); Pipe pipe(std::make_shared( column_names, getOutputStream().header, @@ -481,7 +481,7 @@ class ReadFromSystemStackTrace : public SourceStepWithFilter ASTPtr && query_, ContextPtr context_, size_t max_block_size_, - Poco::Logger * log_) + LoggerPtr log_) : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) , column_names(column_names_) , query(query_) @@ -496,7 +496,7 @@ class ReadFromSystemStackTrace : public SourceStepWithFilter ASTPtr query; ContextPtr context; size_t max_block_size; - Poco::Logger * log; + LoggerPtr log; }; } @@ -504,7 +504,7 @@ class ReadFromSystemStackTrace : public SourceStepWithFilter StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_) : IStorage(table_id_) - , log(&Poco::Logger::get("StorageSystemStackTrace")) + , log(getLogger("StorageSystemStackTrace")) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription({ diff --git a/src/Storages/System/StorageSystemStackTrace.h b/src/Storages/System/StorageSystemStackTrace.h index 18216cea1bd3..ce1b7f8ccd2a 100644 --- a/src/Storages/System/StorageSystemStackTrace.h +++ b/src/Storages/System/StorageSystemStackTrace.h @@ -38,7 +38,7 @@ class StorageSystemStackTrace final : public IStorage bool isSystemStorage() const override { return true; } protected: - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 92ae643db556..e0d2dd03e78f 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -693,10 +693,15 @@ class ReadFromSystemTables : public SourceStepWithFilter { } + void applyFilters() override; + private: ContextPtr context; std::vector columns_mask; size_t max_block_size; + + ColumnPtr filtered_databases_column; + ColumnPtr filtered_tables_column; }; void StorageSystemTables::read( @@ -723,16 +728,19 @@ void StorageSystemTables::read( query_plan.addStep(std::move(reading)); } -void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +void ReadFromSystemTables::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); - ColumnPtr filtered_databases_column = getFilteredDatabases(predicate, context); - ColumnPtr filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); + filtered_databases_column = getFilteredDatabases(predicate, context); + filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); +} +void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ Pipe pipe(std::make_shared( std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases_column), std::move(filtered_tables_column), context)); pipeline.init(std::move(pipe)); diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 37fe9074950c..7a2b830b0883 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -426,7 +428,30 @@ void ReadFromSystemZooKeeper::applyFilters() void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) { - zkutil::ZooKeeperPtr zookeeper = context->getZooKeeper(); + QueryStatusPtr query_status = context->getProcessListElement(); + + const auto & settings = context->getSettingsRef(); + /// Use insert settings for now in order not to introduce new settings. + /// Hopefully insert settings will also be unified and replaced with some generic retry settings. + ZooKeeperRetriesInfo retries_seetings( + settings.insert_keeper_max_retries, + settings.insert_keeper_retry_initial_backoff_ms, + settings.insert_keeper_retry_max_backoff_ms); + + ZooKeeperWithFaultInjection::Ptr zookeeper; + /// Handles reconnects when needed + auto get_zookeeper = [&] () + { + if (!zookeeper || zookeeper->expired()) + { + zookeeper = ZooKeeperWithFaultInjection::createInstance( + settings.insert_keeper_fault_injection_probability, + settings.insert_keeper_fault_injection_seed, + context->getZooKeeper(), + "", nullptr); + } + return zookeeper; + }; if (paths.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -448,6 +473,9 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) std::unordered_set added; while (!paths.empty()) { + if (query_status) + query_status->checkTimeLimit(); + list_tasks.clear(); std::vector paths_to_list; while (!paths.empty() && static_cast(list_tasks.size()) < max_inflight_requests) @@ -470,7 +498,10 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) paths_to_list.emplace_back(task.path_corrected); list_tasks.emplace_back(std::move(task)); } - auto list_responses = zookeeper->tryGetChildren(paths_to_list); + + zkutil::ZooKeeper::MultiTryGetChildrenResponse list_responses; + ZooKeeperRetriesControl("", nullptr, retries_seetings, query_status).retryLoop( + [&]() { list_responses = get_zookeeper()->tryGetChildren(paths_to_list); }); struct GetTask { @@ -514,7 +545,9 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) } } - auto get_responses = zookeeper->tryGet(paths_to_get); + zkutil::ZooKeeper::MultiTryGetResponse get_responses; + ZooKeeperRetriesControl("", nullptr, retries_seetings, query_status).retryLoop( + [&]() { get_responses = get_zookeeper()->tryGet(paths_to_get); }); for (size_t i = 0, size = get_tasks.size(); i < size; ++i) { diff --git a/src/Storages/UVLoop.h b/src/Storages/UVLoop.h index 4945e1b56fac..dd1d64973d12 100644 --- a/src/Storages/UVLoop.h +++ b/src/Storages/UVLoop.h @@ -63,7 +63,7 @@ class UVLoop : public boost::noncopyable private: std::unique_ptr loop_ptr; - Poco::Logger * log = &Poco::Logger::get("UVLoop"); + LoggerPtr log = getLogger("UVLoop"); static void onUVWalkClosingCallback(uv_handle_t * handle, void *) { diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index e845e03d122a..33ff6e7104ff 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -211,7 +212,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block const auto * expr_const_node = actions->tryFindInOutputs(expr_column_name); if (!expr_const_node) return false; - auto filter_actions = ActionsDAG::buildFilterActionsDAG({expr_const_node}, {}, context); + auto filter_actions = ActionsDAG::buildFilterActionsDAG({expr_const_node}); const auto & nodes = filter_actions->getNodes(); bool has_dependent_columns = std::any_of(nodes.begin(), nodes.end(), [&](const auto & node) { diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index f9ba8e9717f9..0764685cb07a 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1160,7 +1160,7 @@ StorageWindowView::StorageWindowView( bool attach_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name))) + , log(getLogger(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name))) , fire_signal_timeout_s(context_->getSettingsRef().wait_for_window_view_fire_signal_timeout.totalSeconds()) , clean_interval_usec(context_->getSettingsRef().window_view_clean_interval.totalMicroseconds()) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index de8f880c6022..969fda8f78e2 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -177,7 +177,7 @@ class StorageWindowView final : public IStorage, WithContext const Block & getOutputHeader() const; private: - Poco::Logger * log; + LoggerPtr log; /// Stored query, e.g. SELECT * FROM * GROUP BY tumble(now(), *) ASTPtr select_query; diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index d88850875324..e840d5fc8bee 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -46,7 +46,7 @@ class TableFunctionHive : public ITableFunction void parseArguments(const ASTPtr & ast_function_, ContextPtr context_) override; private: - Poco::Logger * logger = &Poco::Logger::get("TableFunctionHive"); + LoggerPtr logger = getLogger("TableFunctionHive"); String cluster_name; String hive_metastore_url; diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index d85249637767..961e5683fe2a 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -34,7 +34,7 @@ class ITableFunctionDataLake : public TableFunction columns = parseColumnsListFromString(TableFunction::configuration.structure, context); StoragePtr storage = Storage::create( - TableFunction::configuration, context, StorageID(TableFunction::getDatabaseName(), table_name), + TableFunction::configuration, context, false, StorageID(TableFunction::getDatabaseName(), table_name), columns, ConstraintsDescription{}, String{}, std::nullopt); storage->startup(); diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 7de2d5a9fdd1..141e51badbb5 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -164,6 +164,9 @@ endif () if (ENABLE_OPENSSL) set(USE_OPENSSL_INTREE 1) endif () +if (TARGET ch_contrib::ssh) + set(USE_SSH 1) +endif() if (TARGET ch_contrib::fiu) set(FIU_ENABLE 1) endif() diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 23f22209451a..e1d4de59a230 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -1,7 +1,6 @@ test_access_for_functions/test.py::test_access_rights_for_function test_build_sets_from_multiple_threads/test.py::test_set test_concurrent_backups_s3/test.py::test_concurrent_backups -test_dictionaries_update_and_reload/test.py::test_reload_after_fail_in_cache_dictionary test_distributed_backward_compatability/test.py::test_distributed_in_tuple test_distributed_type_object/test.py::test_distributed_type_object test_executable_table_function/test.py::test_executable_function_input_python diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index a84f912f3711..d944dd96e25f 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -7,7 +7,6 @@ 01214_test_storage_merge_aliases_with_where 01244_optimize_distributed_group_by_sharding_key 01268_shard_avgweighted -01495_subqueries_in_with_statement 01560_merge_distributed_join 01584_distributed_buffer_cannot_find_column 01624_soft_constraints diff --git a/tests/ci/cache_utils.py b/tests/ci/cache_utils.py index ccb2049d1e6f..062207fadd1b 100644 --- a/tests/ci/cache_utils.py +++ b/tests/ci/cache_utils.py @@ -116,6 +116,7 @@ def __init__( self.s3_helper = s3_helper def _download(self, url: str, ignore_error: bool = False) -> None: + self.temp_path.mkdir(parents=True, exist_ok=True) compressed_cache = self.temp_path / self.archive_name try: if url.startswith("file://"): diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 067aa6173fcd..52879a1a7781 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1,5 +1,7 @@ import argparse import concurrent.futures +from dataclasses import asdict, dataclass +from enum import Enum import json import logging import os @@ -7,10 +9,21 @@ import subprocess import sys from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional +import time +from typing import Any, Dict, List, Optional, Sequence, Union import docker_images_helper -from ci_config import CI_CONFIG, Labels +import upload_result_helper +from build_check import get_release_or_pr +from ci_config import CI_CONFIG, Build, Labels, JobNames +from ci_utils import GHActions, is_hex +from clickhouse_helper import ( + CiLogsCredentials, + ClickHouseHelper, + get_instance_id, + get_instance_type, + prepare_tests_results_for_clickhouse, +) from commit_status_helper import ( CommitStatusData, RerunHelper, @@ -24,6 +37,7 @@ from env_helper import ( CI, GITHUB_JOB_API_URL, + GITHUB_RUN_URL, REPO_COPY, REPORT_PATH, S3_BUILDS_BUCKET, @@ -36,18 +50,589 @@ from pr_info import PRInfo from report import SUCCESS, BuildResult, JobReport from s3_helper import S3Helper -from clickhouse_helper import ( - CiLogsCredentials, - ClickHouseHelper, - get_instance_id, - get_instance_type, - prepare_tests_results_for_clickhouse, -) -from build_check import get_release_or_pr -import upload_result_helper from version_helper import get_version_from_repo +@dataclass +class PendingState: + updated_at: float + run_url: str + + +class CiCache: + """ + CI cache is a bunch of records. Record is a file stored under special location on s3. + The file name has following format + + _[]--___.ci + + RECORD_TYPE: + SUCCESSFUL - for successfuly finished jobs + PENDING - for pending jobs + + ATTRIBUTES: + release - for jobs being executed on the release branch including master branch (not a PR branch) + """ + + _S3_CACHE_PREFIX = "CI_cache_v1" + _CACHE_BUILD_REPORT_PREFIX = "build_report" + _RECORD_FILE_EXTENSION = ".ci" + _LOCAL_CACHE_PATH = Path(TEMP_PATH) / "ci_cache" + _ATTRIBUTE_RELEASE = "release" + # divider symbol 1 + _DIV1 = "--" + # divider symbol 2 + _DIV2 = "_" + assert _DIV1 != _DIV2 + + class RecordType(Enum): + SUCCESSFUL = "successful" + PENDING = "pending" + + @dataclass + class Record: + record_type: "CiCache.RecordType" + job_name: str + job_digest: str + batch: int + num_batches: int + release_branch: bool + file: str = "" + + def to_str_key(self): + """other fields must not be included in the hash str""" + return "_".join( + [self.job_name, self.job_digest, str(self.batch), str(self.num_batches)] + ) + + class JobType(Enum): + DOCS = "DOCS" + SRCS = "SRCS" + + @classmethod + def is_docs_job(cls, job_name: str) -> bool: + return job_name == JobNames.DOCS_CHECK + + @classmethod + def is_srcs_job(cls, job_name: str) -> bool: + return not cls.is_docs_job(job_name) + + @classmethod + def get_type_by_name(cls, job_name: str) -> "CiCache.JobType": + res = cls.SRCS + if cls.is_docs_job(job_name): + res = cls.DOCS + elif cls.is_srcs_job(job_name): + res = cls.SRCS + else: + assert False + return res + + def __init__( + self, + s3: S3Helper, + job_digests: Dict[str, str], + ): + self.s3 = s3 + self.job_digests = job_digests + self.cache_s3_paths = { + job_type: f"{self._S3_CACHE_PREFIX}/{job_type.value}-{self.job_digests[self._get_reference_job_name(job_type)]}/" + for job_type in self.JobType + } + self.s3_record_prefixes = { + record_type: record_type.value for record_type in self.RecordType + } + self.records: Dict["CiCache.RecordType", Dict[str, "CiCache.Record"]] = { + record_type: {} for record_type in self.RecordType + } + + self.cache_updated = False + self.cache_data_fetched = True + if not self._LOCAL_CACHE_PATH.exists(): + self._LOCAL_CACHE_PATH.mkdir(parents=True, exist_ok=True) + + def _get_reference_job_name(self, job_type: JobType) -> str: + res = Build.PACKAGE_RELEASE + if job_type == self.JobType.DOCS: + res = JobNames.DOCS_CHECK + elif job_type == self.JobType.SRCS: + res = Build.PACKAGE_RELEASE + else: + assert False + return res + + def _get_record_file_name( + self, + record_type: RecordType, + job_name: str, + batch: int, + num_batches: int, + release_branch: bool, + ) -> str: + prefix = self.s3_record_prefixes[record_type] + prefix_extended = ( + self._DIV2.join([prefix, self._ATTRIBUTE_RELEASE]) + if release_branch + else prefix + ) + assert self._DIV1 not in job_name, f"Invalid job name {job_name}" + job_name = self._DIV2.join( + [job_name, self.job_digests[job_name], str(batch), str(num_batches)] + ) + file_name = self._DIV1.join([prefix_extended, job_name]) + file_name += self._RECORD_FILE_EXTENSION + return file_name + + def _get_record_s3_path(self, job_name: str) -> str: + return self.cache_s3_paths[self.JobType.get_type_by_name(job_name)] + + def _parse_record_file_name( + self, record_type: RecordType, file_name: str + ) -> Optional["CiCache.Record"]: + # validate filename + if ( + not file_name.endswith(self._RECORD_FILE_EXTENSION) + or not len(file_name.split(self._DIV1)) == 2 + ): + print("ERROR: wrong file name format") + return None + + file_name = file_name.removesuffix(self._RECORD_FILE_EXTENSION) + release_branch = False + + prefix_extended, job_suffix = file_name.split(self._DIV1) + record_type_and_attribute = prefix_extended.split(self._DIV2) + + # validate filename prefix + failure = False + if not 0 < len(record_type_and_attribute) <= 2: + print("ERROR: wrong file name prefix") + failure = True + if ( + len(record_type_and_attribute) > 1 + and record_type_and_attribute[1] != self._ATTRIBUTE_RELEASE + ): + print("ERROR: wrong record attribute") + failure = True + if record_type_and_attribute[0] != self.s3_record_prefixes[record_type]: + print("ERROR: wrong record type") + failure = True + if failure: + return None + + if ( + len(record_type_and_attribute) > 1 + and record_type_and_attribute[1] == self._ATTRIBUTE_RELEASE + ): + release_branch = True + + job_properties = job_suffix.split(self._DIV2) + job_name, job_digest, batch, num_batches = ( + self._DIV2.join(job_properties[:-3]), + job_properties[-3], + int(job_properties[-2]), + int(job_properties[-1]), + ) + + if not is_hex(job_digest): + print("ERROR: wrong record job digest") + return None + + record = self.Record( + record_type, + job_name, + job_digest, + batch, + num_batches, + release_branch, + file="", + ) + return record + + def update(self): + """ + Pulls cache records from s3. Only records name w/o content. + """ + for record_type in self.RecordType: + prefix = self.s3_record_prefixes[record_type] + cache_list = self.records[record_type] + for job_type in self.JobType: + path = self.cache_s3_paths[job_type] + records = self.s3.list_prefix(f"{path}{prefix}", S3_BUILDS_BUCKET) + records = [record.split("/")[-1] for record in records] + GHActions.print_in_group( + f"Cache records: [{record_type}] in [{job_type.value}]", records + ) + for file in records: + record = self._parse_record_file_name( + record_type=record_type, file_name=file + ) + if not record: + print(f"ERROR: failed to parse cache record [{file}]") + continue + if ( + record.job_name not in self.job_digests + or self.job_digests[record.job_name] != record.job_digest + ): + # skip records we are not interested in + continue + + if record.to_str_key() not in cache_list: + cache_list[record.to_str_key()] = record + self.cache_data_fetched = False + elif ( + not cache_list[record.to_str_key()].release_branch + and record.release_branch + ): + # replace a non-release record with a release one + cache_list[record.to_str_key()] = record + self.cache_data_fetched = False + + self.cache_updated = True + return self + + def fetch_records_data(self): + """ + Pulls CommitStatusData for all cached jobs from s3 + """ + if not self.cache_updated: + self.update() + + if self.cache_data_fetched: + # there are no record w/o underling data - no need to fetch + return self + + # clean up + for file in self._LOCAL_CACHE_PATH.glob("*.ci"): + file.unlink() + + # download all record files + for job_type in self.JobType: + path = self.cache_s3_paths[job_type] + for record_type in self.RecordType: + prefix = self.s3_record_prefixes[record_type] + _ = self.s3.download_files( + bucket=S3_BUILDS_BUCKET, + s3_path=f"{path}{prefix}", + file_suffix=self._RECORD_FILE_EXTENSION, + local_directory=self._LOCAL_CACHE_PATH, + ) + + # validate we have files for all records and save file names meanwhile + for record_type in self.RecordType: + record_list = self.records[record_type] + for _, record in record_list.items(): + record_file_name = self._get_record_file_name( + record_type, + record.job_name, + record.batch, + record.num_batches, + record.release_branch, + ) + assert ( + self._LOCAL_CACHE_PATH / record_file_name + ).is_file(), f"BUG. Record file must be present: {self._LOCAL_CACHE_PATH / record_file_name}" + record.file = record_file_name + + self.cache_data_fetched = True + return self + + def exist( + self, + record_type: "CiCache.RecordType", + job: str, + batch: int, + num_batches: int, + release_branch: bool, + ) -> bool: + if not self.cache_updated: + self.update() + record_key = self.Record( + record_type, + job, + self.job_digests[job], + batch, + num_batches, + release_branch, + ).to_str_key() + res = record_key in self.records[record_type] + if release_branch: + return res and self.records[record_type][record_key].release_branch + else: + return res + + def push( + self, + record_type: "CiCache.RecordType", + job: str, + batches: Union[int, Sequence[int]], + num_batches: int, + status: Union[CommitStatusData, PendingState], + release_branch: bool = False, + ) -> None: + """ + Pushes a cache record (CommitStatusData) + @release_branch adds "release" attribute to a record + """ + if isinstance(batches, int): + batches = [batches] + for batch in batches: + record_file = self._LOCAL_CACHE_PATH / self._get_record_file_name( + record_type, job, batch, num_batches, release_branch + ) + record_s3_path = self._get_record_s3_path(job) + if record_type == self.RecordType.SUCCESSFUL: + assert isinstance(status, CommitStatusData) + status.dump_to_file(record_file) + elif record_type == self.RecordType.PENDING: + assert isinstance(status, PendingState) + with open(record_file, "w") as json_file: + json.dump(asdict(status), json_file) + else: + assert False + + _ = self.s3.upload_file( + bucket=S3_BUILDS_BUCKET, + file_path=record_file, + s3_path=record_s3_path + record_file.name, + ) + record = self.Record( + record_type, + job, + self.job_digests[job], + batch, + num_batches, + release_branch, + file=record_file.name, + ) + if ( + record.release_branch + or record.to_str_key() not in self.records[record_type] + ): + self.records[record_type][record.to_str_key()] = record + + def get( + self, record_type: "CiCache.RecordType", job: str, batch: int, num_batches: int + ) -> Optional[Union[CommitStatusData, PendingState]]: + """ + Gets a cache record data for a job, or None if a cache miss + """ + + if not self.cache_data_fetched: + self.fetch_records_data() + + record_key = self.Record( + record_type, + job, + self.job_digests[job], + batch, + num_batches, + release_branch=False, + ).to_str_key() + + if record_key not in self.records[record_type]: + return None + + record_file_name = self.records[record_type][record_key].file + + res = CommitStatusData.load_from_file( + self._LOCAL_CACHE_PATH / record_file_name + ) # type: CommitStatusData + + return res + + def delete( + self, + record_type: "CiCache.RecordType", + job: str, + batch: int, + num_batches: int, + release_branch: bool, + ) -> None: + """ + deletes record from the cache + """ + raise NotImplementedError("Let's try make cache push-and-read-only") + # assert ( + # record_type == self.RecordType.PENDING + # ), "FIXME: delete is supported for pending records only" + # record_file_name = self._get_record_file_name( + # self.RecordType.PENDING, + # job, + # batch, + # num_batches, + # release_branch=release_branch, + # ) + # record_s3_path = self._get_record_s3_path(job) + # self.s3.delete_file_from_s3(S3_BUILDS_BUCKET, record_s3_path + record_file_name) + + # record_key = self.Record( + # record_type, + # job, + # self.job_digests[job], + # batch, + # num_batches, + # release_branch=False, + # ).to_str_key() + + # if record_key in self.records[record_type]: + # del self.records[record_type][record_key] + + def is_successful( + self, job: str, batch: int, num_batches: int, release_branch: bool + ) -> bool: + """ + checks if a given job have already been done successfuly + """ + return self.exist( + self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch + ) + + def is_pending( + self, job: str, batch: int, num_batches: int, release_branch: bool + ) -> bool: + """ + check pending record in the cache for a given job + @release_branch - checks that "release" attribute is set for a record + """ + if self.is_successful(job, batch, num_batches, release_branch): + # successful record is present - not pending + return False + + return self.exist( + self.RecordType.PENDING, job, batch, num_batches, release_branch + ) + + def push_successful( + self, + job: str, + batch: int, + num_batches: int, + job_status: CommitStatusData, + release_branch: bool = False, + ) -> None: + """ + Pushes a cache record (CommitStatusData) + @release_branch adds "release" attribute to a record + """ + self.push( + self.RecordType.SUCCESSFUL, + job, + [batch], + num_batches, + job_status, + release_branch, + ) + + def push_pending( + self, job: str, batches: List[int], num_batches: int, release_branch: bool + ) -> None: + """ + pushes pending record for a job to the cache + """ + pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL) + self.push( + self.RecordType.PENDING, + job, + batches, + num_batches, + pending_state, + release_branch, + ) + + def get_successful( + self, job: str, batch: int, num_batches: int + ) -> Optional[CommitStatusData]: + """ + Gets a cache record (CommitStatusData) for a job, or None if a cache miss + """ + res = self.get(self.RecordType.SUCCESSFUL, job, batch, num_batches) + assert res is None or isinstance(res, CommitStatusData) + return res + + def delete_pending( + self, job: str, batch: int, num_batches: int, release_branch: bool + ) -> None: + """ + deletes pending record from the cache + """ + self.delete(self.RecordType.PENDING, job, batch, num_batches, release_branch) + + def download_build_reports(self, file_prefix: str = "") -> List[str]: + """ + not ideal class for this method, + but let it be as we store build reports in CI cache directory on s3 + and CiCache knows where exactly + + @file_prefix allows to filter out reports by git head_ref + """ + report_path = Path(REPORT_PATH) + report_path.mkdir(exist_ok=True, parents=True) + path = ( + self._get_record_s3_path(Build.PACKAGE_RELEASE) + + self._CACHE_BUILD_REPORT_PREFIX + ) + if file_prefix: + path += "_" + file_prefix + reports_files = self.s3.download_files( + bucket=S3_BUILDS_BUCKET, + s3_path=path, + file_suffix=".json", + local_directory=report_path, + ) + return reports_files + + def upload_build_report(self, build_result: BuildResult) -> str: + result_json_path = build_result.write_json(Path(TEMP_PATH)) + s3_path = ( + self._get_record_s3_path(Build.PACKAGE_RELEASE) + result_json_path.name + ) + return self.s3.upload_file( + bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path + ) + + # def await_jobs(self, jobs_with_params: Dict[str, Dict[str, Any]]) -> List[str]: + # if not jobs_with_params: + # return [] + # print(f"Start awaiting jobs [{list(jobs_with_params)}]") + # poll_interval_sec = 180 + # start_at = int(time.time()) + # TIMEOUT = 3000 + # expired_sec = 0 + # done_jobs = [] # type: List[str] + # while expired_sec < TIMEOUT and jobs_with_params: + # time.sleep(poll_interval_sec) + # self.update() + # pending_finished: List[str] = [] + # for job_name in jobs_with_params: + # num_batches = jobs_with_params[job_name]["num_batches"] + # for batch in jobs_with_params[job_name]["batches"]: + # if self.is_pending(job_name, batch, num_batches): + # continue + # print( + # f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore" + # ) + # pending_finished.append(job_name) + # if pending_finished: + # # restart timer + # start_at = int(time.time()) + # expired_sec = 0 + # # remove finished jobs from awaiting list + # for job in pending_finished: + # del jobs_with_params[job] + # done_jobs.append(job) + # else: + # expired_sec = int(time.time()) - start_at + # print(f" ...awaiting continues... time left [{TIMEOUT - expired_sec}]") + # if done_jobs: + # print( + # f"Awaiting OK. Left jobs: [{list(jobs_with_params)}], finished jobs: [{done_jobs}]" + # ) + # else: + # print("Awaiting FAILED. No job has finished.") + # return done_jobs + + def get_check_name(check_name: str, batch: int, num_batches: int) -> str: res = check_name if num_batches > 1: @@ -155,11 +740,12 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: default=False, help="will create run config for rebuilding all dockers, used in --configure action (for nightly docker job)", ) + # FIXME: remove, not used parser.add_argument( "--rebuild-all-binaries", action="store_true", default=False, - help="will create run config without skipping build jobs in any case, used in --configure action (for release branches)", + help="[DEPRECATED. to be removed, once no wf use it] will create run config without skipping build jobs in any case, used in --configure action (for release branches)", ) parser.add_argument( "--commit-message", @@ -169,23 +755,8 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: return parser.parse_args() -def get_file_flag_name( - job_name: str, digest: str, batch: int = 0, num_batches: int = 1 -) -> str: - if num_batches < 2: - return f"job_{job_name}_{digest}.ci" - else: - return f"job_{job_name}_{digest}_{batch}_{num_batches}.ci" - - -def get_s3_path(build_digest: str) -> str: - return f"CI_data/BUILD-{build_digest}/" - - -def get_s3_path_docs(digest: str) -> str: - return f"CI_data/DOCS-{digest}/" - - +# FIXME: rewrite the docker job as regular reusable_test job and move interaction with docker hub inside job script +# that way run config will be more clean, workflow more generic and less api calls to dockerhub def check_missing_images_on_dockerhub( image_name_tag: Dict[str, str], arch: Optional[str] = None ) -> Dict[str, str]: @@ -260,29 +831,112 @@ def run_docker_command( return result -def _check_and_update_for_early_style_check(run_config: dict) -> None: +def _pre_action(s3, indata, pr_info): + CommitStatusData.cleanup() + JobReport.cleanup() + BuildResult.cleanup() + ci_cache = CiCache(s3, indata["jobs_data"]["digests"]) + + # for release/master branches reports must be from the same branches + report_prefix = pr_info.head_ref if pr_info.number == 0 else "" + reports_files = ci_cache.download_build_reports(file_prefix=report_prefix) + print(f"Pre action done. Report files [{reports_files}] have been downloaded") + + +def _mark_success_action( + s3: S3Helper, + indata: Dict[str, Any], + pr_info: PRInfo, + job: str, + batch: int, +) -> None: + ci_cache = CiCache(s3, indata["jobs_data"]["digests"]) + job_config = CI_CONFIG.get_job_config(job) + num_batches = job_config.num_batches + # if batch is not provided - set to 0 + batch = 0 if batch == -1 else batch + assert ( + 0 <= batch < num_batches + ), f"--batch must be provided and in range [0, {num_batches}) for {job}" + + # FIXME: find generic design for propagating and handling job status (e.g. stop using statuses in GH api) + # now job ca be build job w/o status data, any other job that exit with 0 with or w/o status data + if CI_CONFIG.is_build_job(job): + # there is no status for build jobs + # create dummy success to mark it as done + # FIXME: consider creating commit status for build jobs too, to treat everything the same way + CommitStatusData("success", "dummy description", "dummy_url").dump_status() + + job_status = None + if CommitStatusData.exist(): + # normal scenario + job_status = CommitStatusData.load_status() + else: + # apparently exit after rerun-helper check + # do nothing, exit without failure + print(f"ERROR: no status file for job [{job}]") + + if job_config.run_always or job_config.run_by_label: + print(f"Job [{job}] runs always or by label in CI - do not cache") + else: + if pr_info.is_master(): + pass + # delete method is disabled for ci_cache. need it? + # pending enabled for master branch jobs only + # ci_cache.delete_pending(job, batch, num_batches, release_branch=True) + if job_status and job_status.is_ok(): + ci_cache.push_successful( + job, batch, num_batches, job_status, pr_info.is_release_branch() + ) + print(f"Job [{job}] is ok") + elif job_status: + print(f"Job [{job}] is not ok, status [{job_status.status}]") + + +def _print_results(result: Any, outfile: Optional[str], pretty: bool = False) -> None: + if outfile: + with open(outfile, "w") as f: + if isinstance(result, str): + print(result, file=f) + elif isinstance(result, dict): + print(json.dumps(result, indent=2 if pretty else None), file=f) + else: + raise AssertionError(f"Unexpected type for 'res': {type(result)}") + else: + if isinstance(result, str): + print(result) + elif isinstance(result, dict): + print(json.dumps(result, indent=2 if pretty else None)) + else: + raise AssertionError(f"Unexpected type for 'res': {type(result)}") + + +def _check_and_update_for_early_style_check(jobs_data: dict, docker_data: dict) -> None: """ This is temporary hack to start style check before docker build if possible FIXME: need better solution to do style check as soon as possible and as fast as possible w/o dependency on docker job """ - jobs_to_do = run_config.get("jobs_data", {}).get("jobs_to_do", []) - docker_to_build = run_config.get("docker_data", {}).get("missing_multi", []) + jobs_to_do = jobs_data.get("jobs_to_do", []) + docker_to_build = docker_data.get("missing_multi", []) if ( - "Style check" in jobs_to_do + JobNames.STYLE_CHECK in jobs_to_do and docker_to_build and "clickhouse/style-test" not in docker_to_build ): - index = jobs_to_do.index("Style check") + index = jobs_to_do.index(JobNames.STYLE_CHECK) jobs_to_do[index] = "Style check early" -def _update_config_for_docs_only(run_config: dict) -> None: - DOCS_CHECK_JOBS = ["Docs check", "Style check"] +def _update_config_for_docs_only(jobs_data: dict) -> None: + DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK] print(f"NOTE: Will keep only docs related jobs: [{DOCS_CHECK_JOBS}]") - jobs_to_do = run_config.get("jobs_data", {}).get("jobs_to_do", []) - run_config["jobs_data"]["jobs_to_do"] = [ - job for job in jobs_to_do if job in DOCS_CHECK_JOBS - ] + jobs_to_do = jobs_data.get("jobs_to_do", []) + jobs_data["jobs_to_do"] = [job for job in jobs_to_do if job in DOCS_CHECK_JOBS] + jobs_data["jobs_to_wait"] = { + job: params + for job, params in jobs_data["jobs_to_wait"].items() + if job in DOCS_CHECK_JOBS + } def _configure_docker_jobs( @@ -351,14 +1005,11 @@ def _configure_docker_jobs( def _configure_jobs( - build_digest: str, - docs_digest: str, job_digester: JobDigester, s3: S3Helper, - rebuild_all_binaries: bool, - pr_labels: Iterable[str], + pr_info: PRInfo, commit_tokens: List[str], - ci_cache_enabled: bool, + ci_cache_disabled: bool, ) -> Dict: ## a. digest each item from the config job_digester = JobDigester() @@ -374,19 +1025,12 @@ def _configure_jobs( print(f" job [{job.rjust(50)}] has digest [{digest}]") print("::endgroup::") - ## b. check if we have something done - if ci_cache_enabled: - done_files = [] - else: - path = get_s3_path(build_digest) - done_files = s3.list_prefix(path) - done_files = [file.split("/")[-1] for file in done_files] - # print(f"S3 CI files for the build [{build_digest}]: {done_files}") - docs_path = get_s3_path_docs(docs_digest) - done_files_docs = s3.list_prefix(docs_path) - done_files_docs = [file.split("/")[-1] for file in done_files_docs] - # print(f"S3 CI files for the docs [{docs_digest}]: {done_files_docs}") - done_files += done_files_docs + ## b. check what we need to run + ci_cache = None + if not ci_cache_disabled: + ci_cache = CiCache(s3, digests) + + jobs_to_wait: Dict[str, Dict[str, Any]] = {} for job in digests: digest = digests[job] @@ -394,22 +1038,37 @@ def _configure_jobs( num_batches: int = job_config.num_batches batches_to_do: List[int] = [] - if job_config.run_by_label: - # this job controlled by label, add to todo if it's labe is set in pr - if job_config.run_by_label in pr_labels: - for batch in range(num_batches): # type: ignore - batches_to_do.append(batch) - elif job_config.run_always: - # always add to todo - batches_to_do.append(batch) - else: - # this job controlled by digest, add to todo if it's not successfully done before - for batch in range(num_batches): # type: ignore - success_flag_name = get_file_flag_name(job, digest, batch, num_batches) - if success_flag_name not in done_files or ( - rebuild_all_binaries and CI_CONFIG.is_build_job(job) - ): + for batch in range(num_batches): # type: ignore + if job_config.pr_only and pr_info.is_release_branch(): + continue + if job_config.run_by_label: + # this job controlled by label, add to todo if its label is set in pr + if job_config.run_by_label in pr_info.labels: batches_to_do.append(batch) + elif job_config.run_always: + # always add to todo + batches_to_do.append(batch) + elif not ci_cache: + batches_to_do.append(batch) + elif not ci_cache.is_successful( + job, + batch, + num_batches, + release_branch=pr_info.is_release_branch() + and job_config.required_on_release_branch, + ): + # ci cache is enabled and job is not in the cache - add + batches_to_do.append(batch) + + # check if it's pending in the cache + if ci_cache.is_pending(job, batch, num_batches, release_branch=False): + if job in jobs_to_wait: + jobs_to_wait[job]["batches"].append(batch) + else: + jobs_to_wait[job] = { + "batches": [batch], + "num_batches": num_batches, + } if batches_to_do: jobs_to_do.append(job) @@ -420,11 +1079,11 @@ def _configure_jobs( else: jobs_to_skip.append(job) - ## c. check CI controlling labels commit messages - if pr_labels: + ## c. check CI controlling labels and commit messages + if pr_info.labels: jobs_requested_by_label = [] # type: List[str] ci_controlling_labels = [] # type: List[str] - for label in pr_labels: + for label in pr_info.labels: label_config = CI_CONFIG.get_label_config(label) if label_config: jobs_requested_by_label += label_config.run_jobs @@ -434,6 +1093,8 @@ def _configure_jobs( print( f" : following jobs will be executed: [{jobs_requested_by_label}]" ) + # so far there is only "do not test" label in the config that runs only Style check. + # check later if we need to filter out requested jobs using ci cache. right now we do it: jobs_to_do = [job for job in jobs_requested_by_label if job in jobs_to_do] if commit_tokens: @@ -482,68 +1143,39 @@ def _configure_jobs( "digests": digests, "jobs_to_do": jobs_to_do, "jobs_to_skip": jobs_to_skip, + "jobs_to_wait": jobs_to_wait, "jobs_params": { job: params for job, params in jobs_params.items() if job in jobs_to_do }, } -def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None: +def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None: if indata["ci_flags"][Labels.NO_CI_CACHE]: print("CI cache is disabled - skip restoring commit statuses from CI cache") return - - temp_path = Path(TEMP_PATH) - if not temp_path.exists(): - temp_path.mkdir(parents=True, exist_ok=True) - - # clean up before start - for file in temp_path.glob("*.ci"): - file.unlink() - - # download all metadata files - path = get_s3_path(indata["build"]) - files = s3.download_files( # type: ignore - bucket=S3_BUILDS_BUCKET, - s3_path=path, - file_suffix=".ci", - local_directory=temp_path, - ) - print(f"CI metadata files [{files}]") - path = get_s3_path_docs(indata["docs"]) - files_docs = s3.download_files( # type: ignore - bucket=S3_BUILDS_BUCKET, - s3_path=path, - file_suffix=".ci", - local_directory=temp_path, - ) - print(f"CI docs metadata files [{files_docs}]") - files += files_docs - - # parse CI metadata job_digests = indata["jobs_data"]["digests"] + ci_cache = CiCache(s3, job_digests).update().fetch_records_data() + # create GH status pr_info = PRInfo() commit = get_commit(Github(get_best_robot_token(), per_page=100), pr_info.sha) - def run_create_status(job, digest, batch, num_batches): - success_flag_name = get_file_flag_name(job, digest, batch, num_batches) - if success_flag_name in files: - print(f"Going to re-create GH status for job [{job}] sha [{pr_info.sha}]") - job_status = CommitStatusData.load_from_file( - f"{TEMP_PATH}/{success_flag_name}" - ) # type: CommitStatusData - assert job_status.status == SUCCESS, "BUG!" - commit.create_status( - state=job_status.status, - target_url=job_status.report_url, - description=format_description( - f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: " - f"{job_status.description}" - ), - context=get_check_name(job, batch=batch, num_batches=num_batches), - ) - print(f"GH status re-created from file [{success_flag_name}]") + def _run_create_status(job: str, batch: int, num_batches: int) -> None: + job_status = ci_cache.get_successful(job, batch, num_batches) + if not job_status: + return + print(f"Going to re-create GH status for job [{job}] sha [{pr_info.sha}]") + assert job_status.status == "success", "BUG!" + commit.create_status( + state=job_status.status, + target_url=job_status.report_url, + description=format_description( + f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: " + f"{job_status.description}" + ), + context=get_check_name(job, batch=batch, num_batches=num_batches), + ) with concurrent.futures.ThreadPoolExecutor() as executor: futures = [] @@ -551,12 +1183,9 @@ def run_create_status(job, digest, batch, num_batches): if CI_CONFIG.is_build_job(job): # no GH status for build jobs continue - digest = job_digests[job] num_batches = CI_CONFIG.get_job_config(job).num_batches for batch in range(num_batches): - future = executor.submit( - run_create_status, job, digest, batch, num_batches - ) + future = executor.submit(_run_create_status, job, batch, num_batches) futures.append(future) done, _ = concurrent.futures.wait(futures) for future in done: @@ -568,11 +1197,6 @@ def run_create_status(job, digest, batch, num_batches): set_status_comment(commit, pr_info) print("... CI report update - done") - # clean up - ci_files = list(temp_path.glob("*.ci")) - for file in ci_files: - file.unlink() - def _fetch_commit_tokens(message: str) -> List[str]: pattern = r"#[\w-]+" @@ -584,7 +1208,7 @@ def _fetch_commit_tokens(message: str) -> List[str]: def _upload_build_artifacts( pr_info: PRInfo, build_name: str, - build_digest: str, + ci_cache: CiCache, job_report: JobReport, s3: S3Helper, s3_destination: str, @@ -640,12 +1264,8 @@ def _upload_build_artifacts( head_ref=pr_info.head_ref, pr_number=pr_info.number, ) - result_json_path = build_result.write_json() - s3_path = get_s3_path(build_digest) + result_json_path.name - build_report_url = s3.upload_file( - bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path - ) - print(f"Report file [{result_json_path}] has been uploaded to [{build_report_url}]") + report_url = ci_cache.upload_build_report(build_result) + print(f"Report file has been uploaded to [{report_url}]") # Upload head master binaries static_bin_name = CI_CONFIG.build_config[build_name].static_binary_name @@ -852,9 +1472,6 @@ def main() -> int: ### CONFIGURE action: start if args.configure: - docker_data = {} - git_ref = git_runner.run(f"{GIT_PREFIX} rev-parse HEAD") - # if '#no_merge_commit' is set in commit message - set git ref to PR branch head to avoid merge-commit tokens = [] ci_flags = { @@ -876,6 +1493,9 @@ def main() -> int: ci_flags[Labels.NO_CI_CACHE] = True print("NOTE: Disable CI Cache") + docker_data = {} + git_ref = git_runner.run(f"{GIT_PREFIX} rev-parse HEAD") + # let's get CH version version = get_version_from_repo(git=Git(True)).string print(f"Got CH version for this commit: [{version}]") @@ -893,17 +1513,13 @@ def main() -> int: CI_CONFIG.get_digest_config("package_release") ) docs_digest = job_digester.get_job_digest( - CI_CONFIG.get_digest_config("Docs check") + CI_CONFIG.get_digest_config(JobNames.DOCS_CHECK) ) jobs_data = ( _configure_jobs( - build_digest, - docs_digest, job_digester, s3, - # FIXME: add suport for master wf w/o rebuilds - args.rebuild_all_binaries or pr_info.is_master(), - pr_info.labels, + pr_info, tokens, ci_flags[Labels.NO_CI_CACHE], ) @@ -911,6 +1527,39 @@ def main() -> int: else {} ) + # FIXME: Early style check manipulates with job names might be not robust with await feature + if pr_info.number != 0 and not args.docker_digest_or_latest: + # FIXME: it runs style check before docker build if possible (style-check images is not changed) + # find a way to do style check always before docker build and others + _check_and_update_for_early_style_check(jobs_data, docker_data) + if args.skip_jobs and pr_info.has_changes_in_documentation_only(): + _update_config_for_docs_only(jobs_data) + + # TODO: await pending jobs + # wait for pending jobs to be finished, await_jobs is a long blocking call if any job has to be awaited + ci_cache = CiCache(s3, jobs_data["digests"]) + # awaited_jobs = ci_cache.await_jobs(jobs_data.get("jobs_to_wait", {})) + # for job in awaited_jobs: + # jobs_to_do = jobs_data["jobs_to_do"] + # if job in jobs_to_do: + # jobs_to_do.remove(job) + # else: + # assert False, "BUG" + + # set planned jobs as pending in the CI cache if on the master + if pr_info.is_master(): + for job in jobs_data["jobs_to_do"]: + config = CI_CONFIG.get_job_config(job) + if config.run_always or config.run_by_label: + continue + job_params = jobs_data["jobs_params"][job] + ci_cache.push_pending( + job, + job_params["batches"], + config.num_batches, + release_branch=pr_info.is_release_branch(), + ) + # conclude results result["git_ref"] = git_ref result["version"] = version @@ -919,45 +1568,12 @@ def main() -> int: result["ci_flags"] = ci_flags result["jobs_data"] = jobs_data result["docker_data"] = docker_data - if pr_info.number != 0 and not args.docker_digest_or_latest: - # FIXME: it runs style check before docker build if possible (style-check images is not changed) - # find a way to do style check always before docker build and others - _check_and_update_for_early_style_check(result) - if pr_info.has_changes_in_documentation_only(): - _update_config_for_docs_only(result) ### CONFIGURE action: end ### PRE action: start elif args.pre: - CommitStatusData.cleanup() - JobReport.cleanup() - BuildResult.cleanup() - assert indata, "Run config must be provided via --infile" - report_path = Path(REPORT_PATH) - report_path.mkdir(exist_ok=True, parents=True) - path = get_s3_path(indata["build"]) - reports_files = s3.download_files( # type: ignore - bucket=S3_BUILDS_BUCKET, - s3_path=path, - file_suffix=".json", - local_directory=report_path, - ) - # for release/master branches reports must be created on the same branches - files = [] - if pr_info.number == 0: - for file in reports_files: - if pr_info.head_ref not in file: - # keep reports from the same branch only, if not in a PR - (report_path / file).unlink() - print(f"drop report: [{report_path / file}]") - else: - files.append(file) - reports_files = files - print( - f"Pre action done. Report files [{reports_files}] have been downloaded from [{path}] to [{report_path}]" - ) - ### PRE action: end + _pre_action(s3, indata, pr_info) ### RUN action: start elif args.run: @@ -989,6 +1605,9 @@ def main() -> int: print("::endgroup::") else: # this is a test job - check if GH commit status is present + + # rerun helper check + # FIXME: remove rerun_helper check and rely on ci cache only commit = get_commit( Github(get_best_robot_token(), per_page=100), pr_info.sha ) @@ -1001,6 +1620,38 @@ def main() -> int: print(status) print("::endgroup::") + # ci cache check + elif not indata["ci_flags"][Labels.NO_CI_CACHE]: + ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() + job_config = CI_CONFIG.get_job_config(check_name) + if ci_cache.is_successful( + check_name, + args.batch, + job_config.num_batches, + job_config.required_on_release_branch, + ): + job_status = ci_cache.get_successful( + check_name, args.batch, job_config.num_batches + ) + assert job_status, "BUG" + commit.create_status( + state=job_status.status, + target_url=job_status.report_url, + description=format_description( + f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: " + f"{job_status.description}" + ), + context=get_check_name( + check_name, + batch=args.batch, + num_batches=job_config.num_batches, + ), + ) + previous_status = job_status.status + print("::group::Commit Status Data") + print(job_status) + print("::endgroup::") + if previous_status: print( f"Commit status or Build Report is already present - job will be skipped with status: [{previous_status}]" @@ -1015,15 +1666,15 @@ def main() -> int: ### POST action: start elif args.post: - assert ( - not CI_CONFIG.is_build_job(args.job_name) or indata - ), "--infile with config must be provided for POST action of a build type job [{args.job_name}]" job_report = JobReport.load() if JobReport.exist() else None if job_report: ch_helper = ClickHouseHelper() check_url = "" if CI_CONFIG.is_build_job(args.job_name): + assert ( + indata + ), "--infile with config must be provided for POST action of a build type job [{args.job_name}]" build_name = args.job_name s3_path_prefix = "/".join( ( @@ -1035,7 +1686,7 @@ def main() -> int: log_url = _upload_build_artifacts( pr_info, build_name, - build_digest=indata["build"], # type: ignore + ci_cache=CiCache(s3, indata["jobs_data"]["digests"]), job_report=job_report, s3=s3, s3_destination=s3_path_prefix, @@ -1112,80 +1763,16 @@ def main() -> int: ### MARK SUCCESS action: start elif args.mark_success: assert indata, "Run config must be provided via --infile" - job = args.job_name - job_config = CI_CONFIG.get_job_config(job) - num_batches = job_config.num_batches - assert ( - num_batches <= 1 or 0 <= args.batch < num_batches - ), f"--batch must be provided and in range [0, {num_batches}) for {job}" - - # FIXME: find generic design for propagating and handling job status (e.g. stop using statuses in GH api) - # now job ca be build job w/o status data, any other job that exit with 0 with or w/o status data - if CI_CONFIG.is_build_job(job): - # there is no status for build jobs - # create dummy success to mark it as done - job_status = CommitStatusData( - status="success", description="dummy status", report_url="dummy_url" - ) - else: - if not CommitStatusData.is_present(): - # apparently exit after rerun-helper check - # do nothing, exit without failure - print(f"ERROR: no status file for job [{job}]") - job_status = CommitStatusData( - status="dummy failure", - description="dummy status", - report_url="dummy_url", - ) - else: - # normal case - job_status = CommitStatusData.load_status() - - # Storing job data (report_url) to restore OK GH status on job results reuse - if job_config.run_always: - print(f"Job [{job}] runs always in CI - do not mark as done") - elif job_status.is_ok(): - success_flag_name = get_file_flag_name( - job, indata["jobs_data"]["digests"][job], args.batch, num_batches - ) - if not CI_CONFIG.is_docs_job(job): - path = get_s3_path(indata["build"]) + success_flag_name - else: - path = get_s3_path_docs(indata["docs"]) + success_flag_name - job_status.dump_to_file(success_flag_name) - _ = s3.upload_file( - bucket=S3_BUILDS_BUCKET, file_path=success_flag_name, s3_path=path - ) - os.remove(success_flag_name) - print( - f"Job [{job}] with digest [{indata['jobs_data']['digests'][job]}] {f'and batch {args.batch}/{num_batches}' if num_batches > 1 else ''} marked as successful. path: [{path}]" - ) - else: - print(f"Job [{job}] is not ok, status [{job_status.status}]") - ### MARK SUCCESS action: end + _mark_success_action(s3, indata, pr_info, args.job_name, args.batch) ### UPDATE GH STATUSES action: start elif args.update_gh_statuses: assert indata, "Run config must be provided via --infile" - _update_gh_statuses(indata=indata, s3=s3) - ### UPDATE GH STATUSES action: end + _update_gh_statuses_action(indata=indata, s3=s3) ### print results - if args.outfile: - with open(args.outfile, "w") as f: - if isinstance(result, str): - print(result, file=f) - elif isinstance(result, dict): - print(json.dumps(result, indent=2 if args.pretty else None), file=f) - else: - raise AssertionError(f"Unexpected type for 'res': {type(result)}") - else: - if isinstance(result, str): - print(result) - elif isinstance(result, dict): - print(json.dumps(result, indent=2 if args.pretty else None)) - else: - raise AssertionError(f"Unexpected type for 'res': {type(result)}") + _print_results(result, args.outfile, args.pretty) + return exit_code diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index ece7f2f7bae4..347de73b5ed3 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -6,11 +6,15 @@ from pathlib import Path from typing import Callable, Dict, Iterable, List, Literal, Optional, Union -from integration_test_images import IMAGES from ci_utils import WithIter +from integration_test_images import IMAGES class Labels(metaclass=WithIter): + """ + Label names or commit tokens in normalized form + """ + DO_NOT_TEST_LABEL = "do_not_test" NO_MERGE_COMMIT = "no_merge_commit" NO_CI_CACHE = "no_ci_cache" @@ -42,8 +46,9 @@ class Build(metaclass=WithIter): class JobNames(metaclass=WithIter): STYLE_CHECK = "Style check" - FAST_TEST = "Fast tests" - DOCKER_SERVER = "Docker server and keeper images" + FAST_TEST = "Fast test" + DOCKER_SERVER = "Docker server image" + DOCKER_KEEPER = "Docker keeper image" INSTALL_TEST_AMD = "Install packages (amd64)" INSTALL_TEST_ARM = "Install packages (arm64)" @@ -110,7 +115,6 @@ class JobNames(metaclass=WithIter): PERFORMANCE_TEST_AMD64 = "Performance Comparison" PERFORMANCE_TEST_ARM64 = "Performance Comparison Aarch64" - SQL_LANCER_TEST = "SQLancer (release)" SQL_LOGIC_TEST = "Sqllogic test (release)" SQLANCER = "SQLancer (release)" @@ -131,6 +135,8 @@ class JobNames(metaclass=WithIter): DOCS_CHECK = "Docs check" BUGFIX_VALIDATE = "tests bugfix validate check" + MARK_RELEASE_READY = "Mark Commit Release Ready" + # dynamically update JobName with Build jobs for attr_name in dir(Build): @@ -155,7 +161,7 @@ class DigestConfig: @dataclass class LabelConfig: """ - class to configure different CI scenarious per GH label or commit message token + configures different CI scenarious per GH label """ run_jobs: Iterable[str] = frozenset() @@ -164,19 +170,26 @@ class to configure different CI scenarious per GH label or commit message token @dataclass class JobConfig: """ - contains config parameter relevant for job execution in CI workflow - @digest - configures digest calculation for the job - @run_command - will be triggered for the job if omited in CI workflow yml - @timeout - @num_batches - sets number of batches for multi-batch job + contains config parameters for job execution in CI workflow """ + # configures digest calculation for the job digest: DigestConfig = field(default_factory=DigestConfig) + # will be triggered for the job if omited in CI workflow yml run_command: str = "" + # job timeout timeout: Optional[int] = None + # sets number of batches for multi-batch job num_batches: int = 1 + # label that enables job in CI, if set digest won't be used run_by_label: str = "" + # to run always regardless of the job digest or/and label run_always: bool = False + # if the job needs to be run on the release branch, including master (e.g. building packages, docker server). + # NOTE: Subsequent runs on the same branch with the similar digest are still considered skippable. + required_on_release_branch: bool = False + # job is for pr workflow only + pr_only: bool = False @dataclass @@ -193,6 +206,7 @@ class BuildConfig: static_binary_name: str = "" job_config: JobConfig = field( default_factory=lambda: JobConfig( + required_on_release_branch=True, digest=DigestConfig( include_paths=[ "./src", @@ -213,6 +227,12 @@ class BuildConfig: "./programs", "./packages", "./docker/packager/packager", + "./rust", + # FIXME: This is a WA to rebuild the CH and recreate the Performance.tar.zst artifact + # when there are changes in performance test scripts. + # Due to the current design of the perf test we need to rebuild CH when the performance test changes, + # otherwise the changes will not be visible in the PerformanceTest job in CI + "./tests/performance", ], exclude_files=[".md"], docker=["clickhouse/binary-builder"], @@ -607,6 +627,8 @@ def validate(self) -> None: "tsan", "msan", "ubsan", + # skip build report jobs as not all builds will be done + "build check", ) ] ) @@ -773,16 +795,30 @@ def validate(self) -> None: ), }, other_jobs_configs={ + JobNames.MARK_RELEASE_READY: TestConfig( + "", job_config=JobConfig(required_on_release_branch=True) + ), JobNames.DOCKER_SERVER: TestConfig( "", job_config=JobConfig( + required_on_release_branch=True, digest=DigestConfig( include_paths=[ "tests/ci/docker_server.py", "./docker/server", + ] + ), + ), + ), + JobNames.DOCKER_KEEPER: TestConfig( + "", + job_config=JobConfig( + digest=DigestConfig( + include_paths=[ + "tests/ci/docker_server.py", "./docker/keeper", ] - ) + ), ), ), JobNames.DOCS_CHECK: TestConfig( @@ -797,11 +833,12 @@ def validate(self) -> None: JobNames.FAST_TEST: TestConfig( "", job_config=JobConfig( + pr_only=True, digest=DigestConfig( include_paths=["./tests/queries/0_stateless/"], exclude_files=[".md"], docker=["clickhouse/fasttest"], - ) + ), ), ), JobNames.STYLE_CHECK: TestConfig( @@ -916,7 +953,7 @@ def validate(self) -> None: Build.PACKAGE_DEBUG, job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore ), - JobNames.STATELESS_TEST_S3_DEBUG: TestConfig( + JobNames.STATELESS_TEST_S3_TSAN: TestConfig( Build.PACKAGE_TSAN, job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore ), @@ -971,11 +1008,15 @@ def validate(self) -> None: ), JobNames.COMPATIBILITY_TEST: TestConfig( Build.PACKAGE_RELEASE, - job_config=JobConfig(digest=compatibility_check_digest), + job_config=JobConfig( + required_on_release_branch=True, digest=compatibility_check_digest + ), ), JobNames.COMPATIBILITY_TEST_ARM: TestConfig( Build.PACKAGE_AARCH64, - job_config=JobConfig(digest=compatibility_check_digest), + job_config=JobConfig( + required_on_release_branch=True, digest=compatibility_check_digest + ), ), JobNames.UNIT_TEST: TestConfig( Build.BINARY_RELEASE, job_config=JobConfig(**unit_test_common_params) # type: ignore @@ -1117,16 +1158,22 @@ def __hash__(self) -> int: lambda x: x.startswith("Compatibility check"), ), CheckDescription( - "Docker image for servers", + JobNames.DOCKER_SERVER, + "The check to build and optionally push the mentioned image to docker hub", + lambda x: x.startswith("Docker server"), + ), + CheckDescription( + JobNames.DOCKER_KEEPER, "The check to build and optionally push the mentioned image to docker hub", - lambda x: x.startswith("Docker image") - and (x.endswith("building check") or x.endswith("build and push")), + lambda x: x.startswith("Docker keeper"), ), CheckDescription( - "Docs Check", "Builds and tests the documentation", lambda x: x == "Docs Check" + JobNames.DOCS_CHECK, + "Builds and tests the documentation", + lambda x: x == JobNames.DOCS_CHECK, ), CheckDescription( - "Fast test", + JobNames.FAST_TEST, "Normally this is the first check that is ran for a PR. It builds ClickHouse " 'and runs most of stateless functional tests, ' @@ -1134,7 +1181,7 @@ def __hash__(self) -> int: "Look at the report to see which tests fail, then reproduce the failure " 'locally as described here', - lambda x: x == "Fast test", + lambda x: x == JobNames.FAST_TEST, ), CheckDescription( "Flaky tests", @@ -1208,10 +1255,10 @@ def __hash__(self) -> int: lambda x: x.startswith("Stress test ("), ), CheckDescription( - "Style Check", + JobNames.STYLE_CHECK, "Runs a set of checks to keep the code style clean. If some of tests failed, " "see the related log from the report", - lambda x: x == "Style Check", + lambda x: x == JobNames.STYLE_CHECK, ), CheckDescription( "Unit tests", diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 3c267cff79db..7e2a3d117252 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,6 +1,6 @@ from contextlib import contextmanager import os -from typing import Union, Iterator +from typing import List, Union, Iterator from pathlib import Path @@ -17,3 +17,21 @@ def cd(path: Union[Path, str]) -> Iterator[None]: yield finally: os.chdir(oldpwd) + + +def is_hex(s): + try: + int(s, 16) + return True + except ValueError: + return False + + +class GHActions: + @staticmethod + def print_in_group(group_name: str, lines: Union[str, List[str]]) -> None: + lines = list(lines) + print(f"::group::{group_name}") + for line in lines: + print(line) + print("::endgroup::") diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 598eef9922e7..851a4cc5298c 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -350,7 +350,7 @@ def load_status(cls): # type: ignore return cls.load_from_file(STATUS_FILE_PATH) @classmethod - def is_present(cls) -> bool: + def exist(cls) -> bool: return STATUS_FILE_PATH.is_file() def dump_status(self) -> None: diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index a982cbc2a32e..400d4bc6ad5e 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -6,16 +6,13 @@ from pathlib import Path from docker_images_helper import get_docker_image, pull_image -from env_helper import TEMP_PATH, REPO_COPY +from env_helper import REPO_COPY, TEMP_PATH from pr_info import PRInfo -from report import JobReport, TestResults, TestResult +from report import JobReport, TestResult, TestResults from stopwatch import Stopwatch from tee_popen import TeePopen -NAME = "Docs Check" - - def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index c8ddcf250574..b558253ca95a 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -1,22 +1,20 @@ #!/usr/bin/env python3 import argparse +import csv import logging -import subprocess import os -import csv +import subprocess import sys from pathlib import Path from typing import Tuple from docker_images_helper import DockerImage, get_docker_image, pull_image -from env_helper import S3_BUILDS_BUCKET, TEMP_PATH, REPO_COPY +from env_helper import REPO_COPY, S3_BUILDS_BUCKET, TEMP_PATH from pr_info import FORCE_TESTS_LABEL, PRInfo from report import JobReport, TestResult, TestResults, read_test_results from stopwatch import Stopwatch from tee_popen import TeePopen -NAME = "Fast test" - # Will help to avoid errors like _csv.Error: field larger than field limit (131072) csv.field_size_limit(sys.maxsize) diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 524da916a5e0..5a98fa06b393 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -29,7 +29,7 @@ from clickhouse_helper import get_instance_type, get_instance_id from stopwatch import Stopwatch from build_download_helper import download_builds_filter -from report import JobReport +from report import SUCCESS, JobReport IMAGE_NAME = "clickhouse/performance-comparison" @@ -223,7 +223,7 @@ def too_many_slow(msg): message = message_match.group(1).strip() # TODO: Remove me, always green mode for the first time, unless errors - status = "success" + status = SUCCESS if "errors" in message.lower() or too_many_slow(message.lower()): status = "failure" # TODO: Remove until here @@ -249,7 +249,7 @@ def too_many_slow(msg): check_name=check_name_with_group, ).dump() - if status == "error": + if status != SUCCESS: sys.exit(1) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index d1867ebf7863..744de7dea728 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -3,15 +3,20 @@ import logging import os from typing import Dict, List, Set, Union +from urllib.parse import quote +# isort: off +# for some reason this line moves to the end from unidiff import PatchSet # type: ignore +# isort: on + from build_download_helper import get_gh_api from env_helper import ( + GITHUB_EVENT_PATH, GITHUB_REPOSITORY, - GITHUB_SERVER_URL, GITHUB_RUN_URL, - GITHUB_EVENT_PATH, + GITHUB_SERVER_URL, ) FORCE_TESTS_LABEL = "force tests" @@ -282,7 +287,10 @@ def __init__( self.fetch_changed_files() def is_master(self) -> bool: - return self.number == 0 and self.base_ref == "master" + return self.number == 0 and self.head_ref == "master" + + def is_release_branch(self) -> bool: + return self.number == 0 def is_scheduled(self): return self.event_type == EventType.SCHEDULE @@ -295,9 +303,10 @@ def compare_pr_url(self, pr_object: dict) -> str: @staticmethod def compare_url(first: str, second: str) -> str: + """the first and second are URL encoded to not fail on '#' and other symbols""" return ( "https://api.github.com/repos/" - f"{GITHUB_REPOSITORY}/compare/{first}...{second}" + f"{GITHUB_REPOSITORY}/compare/{quote(first)}...{quote(second)}" ) def fetch_changed_files(self): diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 108aa7d1946f..a6312872c2a6 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -1,11 +1,14 @@ #!/usr/bin/env python3 import atexit -import sys import logging +import sys from typing import Tuple +# isort: off from github import Github +# isort: on + from commit_status_helper import ( CI_STATUS_NAME, create_ci_report, @@ -18,12 +21,12 @@ ) from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL from get_robot_token import get_best_robot_token -from pr_info import FORCE_TESTS_LABEL, PRInfo from lambda_shared_package.lambda_shared.pr import ( CATEGORY_TO_LABEL, TRUSTED_CONTRIBUTORS, check_pr_description, ) +from pr_info import FORCE_TESTS_LABEL, PRInfo from report import FAILURE TRUSTED_ORG_IDS = { @@ -146,7 +149,7 @@ def main(): ) post_commit_status( commit, - "failure", + FAILURE, url, format_description(description_error), PR_CHECK, @@ -170,6 +173,14 @@ def main(): # allow the workflow to continue if not can_run: + post_commit_status( + commit, + FAILURE, + "", + description, + PR_CHECK, + pr_info, + ) print("::notice ::Cannot run") sys.exit(1) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 616d645b5a67..bff53f00ad35 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -107,6 +107,9 @@ def _upload_file_to_s3( logging.info("Upload %s to %s. Meta: %s", file_path, url, metadata) return url + def delete_file_from_s3(self, bucket_name: str, s3_path: str) -> None: + self.client.delete_object(Bucket=bucket_name, Key=s3_path) + def upload_test_report_to_s3(self, file_path: Path, s3_path: str) -> str: if CI: return self._upload_file_to_s3(S3_TEST_REPORTS_BUCKET, file_path, s3_path) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 4f791a5ee01a..785e29b23591 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -8,7 +8,6 @@ from pathlib import Path from typing import List, Tuple - from docker_images_helper import get_docker_image, pull_image from env_helper import REPO_COPY, TEMP_PATH from git_helper import GIT_PREFIX, git_runner @@ -17,8 +16,6 @@ from ssh import SSHKey from stopwatch import Stopwatch -NAME = "Style Check" - def process_result( result_directory: Path, diff --git a/tests/ci/test_ci_cache.py b/tests/ci/test_ci_cache.py new file mode 100644 index 000000000000..0f8acf2656c0 --- /dev/null +++ b/tests/ci/test_ci_cache.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python + +from hashlib import md5 +from pathlib import Path +import shutil +from typing import Dict, Set +import unittest +from ci_config import Build, JobNames +from s3_helper import S3Helper +from ci import CiCache +from digest_helper import JOB_DIGEST_LEN +from commit_status_helper import CommitStatusData +from env_helper import S3_BUILDS_BUCKET, TEMP_PATH + + +def _create_mock_digest_1(string): + return md5((string).encode("utf-8")).hexdigest()[:JOB_DIGEST_LEN] + + +def _create_mock_digest_2(string): + return md5((string + "+nonce").encode("utf-8")).hexdigest()[:JOB_DIGEST_LEN] + + +DIGESTS = {job: _create_mock_digest_1(job) for job in JobNames} +DIGESTS2 = {job: _create_mock_digest_2(job) for job in JobNames} + + +# pylint:disable=protected-access +class S3HelperTestMock(S3Helper): + def __init__(self) -> None: + super().__init__() + self.files_on_s3_paths = {} # type: Dict[str, Set[str]] + + # local path which is mocking remote s3 path with ci_cache + self.mock_remote_s3_path = Path(TEMP_PATH) / "mock_s3_path" + if not self.mock_remote_s3_path.exists(): + self.mock_remote_s3_path.mkdir(parents=True, exist_ok=True) + for file in self.mock_remote_s3_path.iterdir(): + file.unlink() + + def list_prefix(self, s3_prefix_path, bucket=S3_BUILDS_BUCKET): + assert bucket == S3_BUILDS_BUCKET + file_prefix = Path(s3_prefix_path).name + path = str(Path(s3_prefix_path).parent) + return [ + path + "/" + file + for file in self.files_on_s3_paths[path] + if file.startswith(file_prefix) + ] + + def upload_file(self, bucket, file_path, s3_path): + assert bucket == S3_BUILDS_BUCKET + file_name = Path(file_path).name + assert ( + file_name in s3_path + ), f"Record file name [{file_name}] must be part of a path on s3 [{s3_path}]" + s3_path = str(Path(s3_path).parent) + if s3_path in self.files_on_s3_paths: + self.files_on_s3_paths[s3_path].add(file_name) + else: + self.files_on_s3_paths[s3_path] = set([file_name]) + shutil.copy(file_path, self.mock_remote_s3_path) + + def download_files(self, bucket, s3_path, file_suffix, local_directory): + assert bucket == S3_BUILDS_BUCKET + assert file_suffix == CiCache._RECORD_FILE_EXTENSION + assert local_directory == CiCache._LOCAL_CACHE_PATH + assert CiCache._S3_CACHE_PREFIX in s3_path + assert [job_type.value in s3_path for job_type in CiCache.JobType] + + # copying from mock remote path to local cache + for remote_record in self.mock_remote_s3_path.glob(f"*{file_suffix}"): + destination_file = CiCache._LOCAL_CACHE_PATH / remote_record.name + shutil.copy(remote_record, destination_file) + + +# pylint:disable=protected-access +class TestCiCache(unittest.TestCase): + def test_cache(self): + s3_mock = S3HelperTestMock() + ci_cache = CiCache(s3_mock, DIGESTS) + # immitate another CI run is using cache + ci_cache_2 = CiCache(s3_mock, DIGESTS2) + NUM_BATCHES = 10 + + DOCS_JOBS_NUM = 1 + assert len(set(job for job in JobNames)) == len(list(job for job in JobNames)) + NONDOCS_JOBS_NUM = len(set(job for job in JobNames)) - DOCS_JOBS_NUM + + PR_NUM = 123456 + status = CommitStatusData( + status="success", + report_url="dummy url", + description="OK OK OK", + sha="deadbeaf2", + pr_num=PR_NUM, + ) + + ### add some pending statuses for two batches and on non-release branch + for job in JobNames: + ci_cache.push_pending(job, [0, 1], NUM_BATCHES, release_branch=False) + ci_cache_2.push_pending(job, [0, 1], NUM_BATCHES, release_branch=False) + + ### add success status for 0 batch, non-release branch + for job in JobNames: + ci_cache.push_successful(job, 0, NUM_BATCHES, status, release_branch=False) + ci_cache_2.push_successful( + job, 0, NUM_BATCHES, status, release_branch=False + ) + + ### check all expected directories were created on s3 mock + expected_build_path_1 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_1(Build.PACKAGE_RELEASE)}" + expected_docs_path_1 = ( + f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_1(JobNames.DOCS_CHECK)}" + ) + expected_build_path_2 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_2(Build.PACKAGE_RELEASE)}" + expected_docs_path_2 = ( + f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_2(JobNames.DOCS_CHECK)}" + ) + self.assertCountEqual( + list(s3_mock.files_on_s3_paths.keys()), + [ + f"{CiCache._S3_CACHE_PREFIX}/{expected_build_path_1}", + f"{CiCache._S3_CACHE_PREFIX}/{expected_docs_path_1}", + f"{CiCache._S3_CACHE_PREFIX}/{expected_build_path_2}", + f"{CiCache._S3_CACHE_PREFIX}/{expected_docs_path_2}", + ], + ) + + ### check number of cache files is as expected + FILES_PER_JOB = 3 # 1 successful + 2 pending batches = 3 + self.assertEqual( + len( + s3_mock.files_on_s3_paths[ + f"{CiCache._S3_CACHE_PREFIX}/{expected_build_path_1}" + ] + ), + NONDOCS_JOBS_NUM * FILES_PER_JOB, + ) + self.assertEqual( + len( + s3_mock.files_on_s3_paths[ + f"{CiCache._S3_CACHE_PREFIX}/{expected_docs_path_1}" + ] + ), + DOCS_JOBS_NUM * FILES_PER_JOB, + ) + self.assertEqual( + len( + s3_mock.files_on_s3_paths[ + f"{CiCache._S3_CACHE_PREFIX}/{expected_build_path_2}" + ] + ), + NONDOCS_JOBS_NUM * FILES_PER_JOB, + ) + self.assertEqual( + len( + s3_mock.files_on_s3_paths[ + f"{CiCache._S3_CACHE_PREFIX}/{expected_docs_path_2}" + ] + ), + DOCS_JOBS_NUM * FILES_PER_JOB, + ) + + ### check statuses for all jobs in cache + for job in JobNames: + self.assertEqual( + ci_cache.is_successful(job, 0, NUM_BATCHES, release_branch=False), True + ) + self.assertEqual( + ci_cache.is_successful(job, 0, NUM_BATCHES, release_branch=True), False + ) + self.assertEqual( + ci_cache.is_successful( + job, batch=1, num_batches=NUM_BATCHES, release_branch=False + ), + False, + ) # false - it's pending + self.assertEqual( + ci_cache.is_successful( + job, + batch=NUM_BATCHES, + num_batches=NUM_BATCHES, + release_branch=False, + ), + False, + ) # false - no such record + self.assertEqual( + ci_cache.is_pending(job, 0, NUM_BATCHES, release_branch=False), False + ) # false, it's successful, success has more priority than pending + self.assertEqual( + ci_cache.is_pending(job, 1, NUM_BATCHES, release_branch=False), True + ) # true + self.assertEqual( + ci_cache.is_pending(job, 1, NUM_BATCHES, release_branch=True), False + ) # false, not pending job on release_branch + + status2 = ci_cache.get_successful(job, 0, NUM_BATCHES) + assert status2 and status2.pr_num == PR_NUM + status2 = ci_cache.get_successful(job, 1, NUM_BATCHES) + assert status2 is None + + ### add some more pending statuses for two batches and for a release branch + for job in JobNames: + ci_cache.push_pending( + job, batches=[0, 1], num_batches=NUM_BATCHES, release_branch=True + ) + + ### add success statuses for 0 batch and release branch + PR_NUM = 234 + status = CommitStatusData( + status="success", + report_url="dummy url", + description="OK OK OK", + sha="deadbeaf2", + pr_num=PR_NUM, + ) + for job in JobNames: + ci_cache.push_successful(job, 0, NUM_BATCHES, status, release_branch=True) + + ### check number of cache files is as expected + FILES_PER_JOB = 6 # 1 successful + 1 successful_release + 2 pending batches + 2 pending batches release = 6 + self.assertEqual( + len( + s3_mock.files_on_s3_paths[ + f"{CiCache._S3_CACHE_PREFIX}/{expected_build_path_1}" + ] + ), + NONDOCS_JOBS_NUM * FILES_PER_JOB, + ) + self.assertEqual( + len( + s3_mock.files_on_s3_paths[ + f"{CiCache._S3_CACHE_PREFIX}/{expected_docs_path_1}" + ] + ), + DOCS_JOBS_NUM * FILES_PER_JOB, + ) + + ### check statuses + for job in JobNames: + self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, False), True) + self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, True), True) + self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, False), False) + self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, True), False) + self.assertEqual( + ci_cache.is_pending(job, 0, NUM_BATCHES, False), False + ) # it's success, not pending + self.assertEqual( + ci_cache.is_pending(job, 0, NUM_BATCHES, True), False + ) # it's success, not pending + self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, False), True) + self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, True), True) + + status2 = ci_cache.get_successful(job, 0, NUM_BATCHES) + assert status2 and status2.pr_num == PR_NUM + status2 = ci_cache.get_successful(job, 1, NUM_BATCHES) + assert status2 is None + + ### create new cache object and verify the same checks + ci_cache = CiCache(s3_mock, DIGESTS) + for job in JobNames: + self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, False), True) + self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, True), True) + self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, False), False) + self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, True), False) + self.assertEqual( + ci_cache.is_pending(job, 0, NUM_BATCHES, False), False + ) # it's success, not pending + self.assertEqual( + ci_cache.is_pending(job, 0, NUM_BATCHES, True), False + ) # it's success, not pending + self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, False), True) + self.assertEqual(ci_cache.is_pending(job, 1, NUM_BATCHES, True), True) + + status2 = ci_cache.get_successful(job, 0, NUM_BATCHES) + assert status2 and status2.pr_num == PR_NUM + status2 = ci_cache.get_successful(job, 1, NUM_BATCHES) + assert status2 is None + + ### check some job values which are not in the cache + self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES + 1, False), False) + self.assertEqual( + ci_cache.is_successful(job, NUM_BATCHES - 1, NUM_BATCHES, False), False + ) + self.assertEqual(ci_cache.is_pending(job, 0, NUM_BATCHES + 1, False), False) + self.assertEqual( + ci_cache.is_pending(job, NUM_BATCHES - 1, NUM_BATCHES, False), False + ) + + +if __name__ == "__main__": + TestCiCache().test_cache() diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index e511d773577f..5e68f2d4b538 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 -from collections import namedtuple import fnmatch import json import time +from collections import namedtuple +from urllib.parse import quote import requests # type: ignore - from lambda_shared.pr import TRUSTED_CONTRIBUTORS from lambda_shared.token import get_cached_access_token @@ -129,7 +129,7 @@ def _exec_post_with_retry(url, token, data=None): def _get_pull_requests_from(repo_url, owner, branch, token): - url = f"{repo_url}/pulls?head={owner}:{branch}" + url = f"{repo_url}/pulls?head={quote(owner)}:{quote(branch)}" return _exec_get_with_retry(url, token) diff --git a/tests/config/config.d/block_number.xml b/tests/config/config.d/block_number.xml new file mode 100644 index 000000000000..b56f1f1afc25 --- /dev/null +++ b/tests/config/config.d/block_number.xml @@ -0,0 +1,6 @@ + + + + 0 + + diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml index b87014d24859..b724d5dd87e3 100644 --- a/tests/config/config.d/keeper_port.xml +++ b/tests/config/config.d/keeper_port.xml @@ -4,6 +4,7 @@ 1 1 + 1 10000 diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 18652826d831..1429dfff724b 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -4,11 +4,17 @@ s3 s3_disk/ - http://localhost:11111/test/common/ + http://localhost:11111/test/s3/ clickhouse clickhouse 20000 + + s3_plain + http://localhost:11111/test/s3_plain/ + clickhouse + clickhouse + cache s3_disk diff --git a/tests/config/install.sh b/tests/config/install.sh index a68a4c195019..cfe810cda843 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -64,6 +64,7 @@ ln -sf $SRC_PATH/config.d/backups.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/filesystem_caches_path.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/validate_tcp_client_information.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zero_copy_destructive_operations.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/block_number.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] diff --git a/tests/config/users.d/readonly.xml b/tests/config/users.d/readonly.xml index 0fe1e3fe6d94..799de11decfc 100644 --- a/tests/config/users.d/readonly.xml +++ b/tests/config/users.d/readonly.xml @@ -9,7 +9,8 @@ - + + ::1 127.0.0.1 diff --git a/tests/config/users.d/session_log_test.xml b/tests/config/users.d/session_log_test.xml index cc2c2c5fcde8..f93b0efd8284 100644 --- a/tests/config/users.d/session_log_test.xml +++ b/tests/config/users.d/session_log_test.xml @@ -18,7 +18,8 @@ - + + ::1 127.0.0.1 diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index c0b145b047a3..1d96563251bd 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3776,7 +3776,9 @@ def stop_clickhouse(self, stop_wait_sec=30, kill=False): except Exception as e: logging.warning(f"Stop ClickHouse raised an error {e}") - def start_clickhouse(self, start_wait_sec=60, retry_start=True): + def start_clickhouse( + self, start_wait_sec=60, retry_start=True, expected_to_fail=False + ): if not self.stay_alive: raise Exception( "ClickHouse can be started again only with stay_alive=True instance" @@ -3794,10 +3796,15 @@ def start_clickhouse(self, start_wait_sec=60, retry_start=True): ["bash", "-c", "{} --daemon".format(self.clickhouse_start_command)], user=str(os.getuid()), ) + if expected_to_fail: + self.wait_start_failed(start_wait_sec + start_time - time.time()) + return time.sleep(1) continue else: logging.debug("Clickhouse process running.") + if expected_to_fail: + raise Exception("ClickHouse was expected not to be running.") try: self.wait_start(start_wait_sec + start_time - time.time()) return @@ -3849,6 +3856,30 @@ def wait_start(self, start_wait_sec): if last_err is not None: raise last_err + def wait_start_failed(self, start_wait_sec): + start_time = time.time() + while time.time() <= start_time + start_wait_sec: + pid = self.get_process_pid("clickhouse") + if pid is None: + return + time.sleep(1) + logging.error( + f"No time left to shutdown. Process is still running. Will dump threads." + ) + ps_clickhouse = self.exec_in_container( + ["bash", "-c", "ps -C clickhouse"], nothrow=True, user="root" + ) + logging.info(f"PS RESULT:\n{ps_clickhouse}") + pid = self.get_process_pid("clickhouse") + if pid is not None: + self.exec_in_container( + ["bash", "-c", f"gdb -batch -ex 'thread apply all bt full' -p {pid}"], + user="root", + ) + raise Exception( + "ClickHouse server is still running, but was expected to shutdown. Check logs." + ) + def restart_clickhouse(self, stop_start_wait_sec=60, kill=False): self.stop_clickhouse(stop_start_wait_sec, kill) self.start_clickhouse(stop_start_wait_sec) diff --git a/tests/integration/helpers/s3_tools.py b/tests/integration/helpers/s3_tools.py index 777b3394dc14..0c3538c3c39c 100644 --- a/tests/integration/helpers/s3_tools.py +++ b/tests/integration/helpers/s3_tools.py @@ -36,6 +36,14 @@ def get_file_contents(minio_client, bucket, s3_path): return data_str.decode() +def list_s3_objects(minio_client, bucket, prefix=""): + prefix_len = len(prefix) + return [ + obj.object_name[prefix_len:] + for obj in minio_client.list_objects(bucket, prefix=prefix, recursive=True) + ] + + # Creates S3 bucket for tests and allows anonymous read-write access to it. def prepare_s3_bucket(started_cluster): # Allows read-write access for bucket without authorization. diff --git a/tests/queries/0_stateless/02696_inverted_idx_checksums.reference b/tests/integration/test_async_insert_adaptive_busy_timeout/__init__.py similarity index 100% rename from tests/queries/0_stateless/02696_inverted_idx_checksums.reference rename to tests/integration/test_async_insert_adaptive_busy_timeout/__init__.py diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml b/tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml new file mode 100644 index 000000000000..755dc4ac2696 --- /dev/null +++ b/tests/integration/test_async_insert_adaptive_busy_timeout/configs/users.xml @@ -0,0 +1,14 @@ + + + + 1 + + + + + + + default + + + diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/configs/zookeeper_config.xml b/tests/integration/test_async_insert_adaptive_busy_timeout/configs/zookeeper_config.xml new file mode 100644 index 000000000000..18412349228f --- /dev/null +++ b/tests/integration/test_async_insert_adaptive_busy_timeout/configs/zookeeper_config.xml @@ -0,0 +1,8 @@ + + + + zoo1 + 2181 + + + diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/test.py b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py new file mode 100644 index 000000000000..93319a56d0f3 --- /dev/null +++ b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py @@ -0,0 +1,372 @@ +import copy +import logging +import pytest +import random +import timeit + +from math import floor +from multiprocessing import Pool +from itertools import repeat + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + + +node = cluster.add_instance( + "node", + main_configs=["configs/zookeeper_config.xml"], + user_configs=[ + "configs/users.xml", + ], + with_zookeeper=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +_query_settings = {"async_insert": 1, "wait_for_async_insert": 1} + + +def _generate_values(size, min_int, max_int, array_size_range): + gen_tuple = lambda _min_int, _max_int, _array_size_range: ( + random.randint(_min_int, _max_int), + [ + random.randint(_min_int, _max_int) + for _ in range(random.randint(*_array_size_range)) + ], + ) + + return map(lambda _: gen_tuple(min_int, max_int, array_size_range), range(size)) + + +def _insert_query(table_name, settings, *args, **kwargs): + settings_s = ", ".join("{}={}".format(k, settings[k]) for k in settings) + INSERT_QUERY = "INSERT INTO {} SETTINGS {} VALUES {}" + node.query( + INSERT_QUERY.format( + table_name, + settings_s, + ", ".join(map(str, _generate_values(*args, **kwargs))), + ) + ) + + +def _insert_queries_sequentially( + table_name, settings, iterations, max_values_size, array_size_range +): + for iter in range(iterations): + _insert_query( + table_name, + settings, + random.randint(1, max_values_size), + iter * max_values_size, + (iter + 1) * max_values_size - 1, + array_size_range, + ) + + +def _insert_queries_in_parallel( + table_name, settings, thread_num, tasks, max_values_size, array_size_range +): + sizes = [random.randint(1, max_values_size) for _ in range(tasks)] + min_ints = [iter * max_values_size for iter in range(tasks)] + max_ints = [(iter + 1) * max_values_size - 1 for iter in range(tasks)] + with Pool(thread_num) as p: + p.starmap( + _insert_query, + zip( + repeat(table_name), + repeat(settings), + sizes, + min_ints, + max_ints, + repeat(array_size_range), + ), + ) + + +def test_with_merge_tree(): + table_name = "async_insert_mt_table" + node.query( + "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format( + table_name + ) + ) + + _insert_queries_sequentially( + table_name, + _query_settings, + iterations=100, + max_values_size=1000, + array_size_range=[10, 50], + ) + + node.query("DROP TABLE IF EXISTS {}".format(table_name)) + + +def test_with_merge_tree_multithread(): + thread_num = 15 + table_name = "async_insert_mt_multithread_table" + node.query( + "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format( + table_name + ) + ) + + _insert_queries_in_parallel( + table_name, + _query_settings, + thread_num=15, + tasks=1000, + max_values_size=1000, + array_size_range=[10, 15], + ) + + node.query("DROP TABLE IF EXISTS {}".format(table_name)) + + +def test_with_replicated_merge_tree(): + table_name = "async_insert_replicated_mt_table" + + create_query = " ".join( + ( + "CREATE TABLE {} (a UInt64, b Array(UInt64))".format(table_name), + "ENGINE=ReplicatedMergeTree('/clickhouse/tables/test/{}', 'node')".format( + table_name + ), + "ORDER BY a", + ) + ) + + node.query(create_query) + + settings = _query_settings + _insert_queries_sequentially( + table_name, + settings, + iterations=100, + max_values_size=1000, + array_size_range=[10, 50], + ) + + node.query("DROP TABLE IF EXISTS {}".format(table_name)) + + +def test_with_replicated_merge_tree_multithread(): + thread_num = 15 + table_name = "async_insert_replicated_mt_multithread_table" + + create_query = " ".join( + ( + "CREATE TABLE {} (a UInt64, b Array(UInt64))".format(table_name), + "ENGINE=ReplicatedMergeTree('/clickhouse/tables/test/{}', 'node')".format( + table_name + ), + "ORDER BY a", + ) + ) + + node.query(create_query) + + _insert_queries_in_parallel( + table_name, + _query_settings, + thread_num=15, + tasks=1000, + max_values_size=1000, + array_size_range=[10, 15], + ) + + node.query("DROP TABLE IF EXISTS {}".format(table_name)) + + +# Ensure that the combined duration of inserts with adaptive timeouts is less than +# the combined duration for fixed timeouts. +def test_compare_sequential_inserts_durations_for_adaptive_and_fixed_async_timeouts(): + fixed_tm_table_name = "async_insert_mt_fixed_async_timeout" + node.query( + "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format( + fixed_tm_table_name + ) + ) + + fixed_tm_settings = copy.copy(_query_settings) + fixed_tm_settings["async_insert_use_adaptive_busy_timeout"] = 0 + fixed_tm_settings["async_insert_busy_timeout_ms"] = 200 + + fixed_tm_run_duration = timeit.timeit( + lambda: _insert_queries_sequentially( + fixed_tm_table_name, + fixed_tm_settings, + iterations=100, + max_values_size=1000, + array_size_range=[10, 50], + ), + setup="pass", + number=3, + ) + + node.query("DROP TABLE IF EXISTS {}".format(fixed_tm_table_name)) + + logging.debug( + "Run duration with fixed asynchronous timeout is {} seconds".format( + fixed_tm_run_duration + ) + ) + + adaptive_tm_table_name = "async_insert_mt_adaptive_async_timeout" + node.query( + "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format( + adaptive_tm_table_name + ) + ) + + adaptive_tm_settings = copy.copy(_query_settings) + adaptive_tm_settings["async_insert_busy_timeout_min_ms"] = 10 + adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 1000 + + adaptive_tm_run_duration = timeit.timeit( + lambda: _insert_queries_sequentially( + adaptive_tm_table_name, + adaptive_tm_settings, + iterations=100, + max_values_size=1000, + array_size_range=[10, 50], + ), + setup="pass", + number=3, + ) + + logging.debug( + "Run duration with adaptive asynchronous timeout is {} seconds.".format( + adaptive_tm_run_duration + ) + ) + + node.query("DROP TABLE IF EXISTS {}".format(adaptive_tm_table_name)) + + assert adaptive_tm_run_duration <= fixed_tm_run_duration + + +# Ensure that the combined duration of inserts with adaptive timeouts is less than +# the combined duration for fixed timeouts. +def test_compare_parallel_inserts_durations_for_adaptive_and_fixed_async_timeouts(): + fixed_tm_table_name = "async_insert_mt_fixed_async_timeout" + node.query( + "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format( + fixed_tm_table_name + ) + ) + + fixed_tm_settings = copy.copy(_query_settings) + fixed_tm_settings["async_insert_use_adaptive_busy_timeout"] = 0 + fixed_tm_settings["async_insert_busy_timeout_ms"] = 200 + + fixed_tm_run_duration = timeit.timeit( + lambda: _insert_queries_in_parallel( + fixed_tm_table_name, + fixed_tm_settings, + thread_num=15, + tasks=1000, + max_values_size=1000, + array_size_range=[10, 50], + ), + setup="pass", + number=3, + ) + + node.query("DROP TABLE IF EXISTS {}".format(fixed_tm_table_name)) + + logging.debug( + "Run duration with fixed asynchronous timeout is {} seconds".format( + fixed_tm_run_duration + ) + ) + + adaptive_tm_table_name = "async_insert_mt_adaptive_async_timeout" + node.query( + "CREATE TABLE {} (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a".format( + adaptive_tm_table_name + ) + ) + + adaptive_tm_settings = copy.copy(_query_settings) + adaptive_tm_settings["async_insert_busy_timeout_min_ms"] = 10 + adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 200 + + adaptive_tm_run_duration = timeit.timeit( + lambda: _insert_queries_in_parallel( + adaptive_tm_table_name, + adaptive_tm_settings, + thread_num=15, + tasks=100, + max_values_size=1000, + array_size_range=[10, 50], + ), + setup="pass", + number=3, + ) + + logging.debug( + "Run duration with adaptive asynchronous timeout is {} seconds.".format( + adaptive_tm_run_duration + ) + ) + + node.query("DROP TABLE IF EXISTS {}".format(adaptive_tm_table_name)) + + assert adaptive_tm_run_duration <= fixed_tm_run_duration + + +# Ensure that the delay converges to a minimum for sequential inserts and wait_for_async_insert=1. +def test_change_queries_frequency(): + table_name = "async_insert_mt_change_queries_frequencies" + + create_query = " ".join( + ( + "CREATE TABLE {} (a UInt64, b Array(UInt64))".format(table_name), + "ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_frequencies/{}', 'node')".format( + table_name + ), + "ORDER BY a", + ) + ) + + node.query(create_query) + + settings = copy.copy(_query_settings) + min_ms = 50 + settings["async_insert_busy_timeout_min_ms"] = min_ms + settings["async_insert_busy_timeout_max_ms"] = 2000 + + _insert_queries_in_parallel( + table_name, + settings, + thread_num=15, + tasks=2000, + max_values_size=1000, + array_size_range=[10, 15], + ) + + _insert_queries_sequentially( + table_name, + settings, + iterations=200, + max_values_size=1000, + array_size_range=[10, 50], + ) + + select_log_query = "SELECT timeout_milliseconds FROM system.asynchronous_insert_log ORDER BY event_time DESC LIMIT 50" + res = node.query(select_log_query) + for line in res.splitlines(): + assert int(line) == min_ms + + node.query("DROP TABLE IF EXISTS {}".format(table_name)) diff --git a/tests/queries/0_stateless/02862_index_inverted_incorrect_args.reference b/tests/integration/test_attach_partition_distinct_expression_replicated/__init__.py similarity index 100% rename from tests/queries/0_stateless/02862_index_inverted_incorrect_args.reference rename to tests/integration/test_attach_partition_distinct_expression_replicated/__init__.py diff --git a/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml b/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml new file mode 100644 index 000000000000..b40730e9f7d5 --- /dev/null +++ b/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + replica1 + 9000 + + + replica2 + 9000 + + + + + diff --git a/tests/integration/test_attach_partition_distinct_expression_replicated/test.py b/tests/integration/test_attach_partition_distinct_expression_replicated/test.py new file mode 100644 index 000000000000..1d8ac4e9e370 --- /dev/null +++ b/tests/integration/test_attach_partition_distinct_expression_replicated/test.py @@ -0,0 +1,214 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +replica1 = cluster.add_instance( + "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] +) +replica2 = cluster.add_instance( + "replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + except Exception as ex: + print(ex) + finally: + cluster.shutdown() + + +def cleanup(nodes): + for node in nodes: + node.query("DROP TABLE IF EXISTS source SYNC") + node.query("DROP TABLE IF EXISTS destination SYNC") + + +def create_table(node, table_name, replicated): + replica = node.name + engine = ( + f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" + if replicated + else "MergeTree()" + ) + partition_expression = ( + "toYYYYMMDD(timestamp)" if table_name == "source" else "toYYYYMM(timestamp)" + ) + node.query_with_retry( + """ + CREATE TABLE {table_name}(timestamp DateTime) + ENGINE = {engine} + ORDER BY tuple() PARTITION BY {partition_expression} + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; + """.format( + table_name=table_name, + engine=engine, + partition_expression=partition_expression, + ) + ) + + +def test_both_replicated(start_cluster): + for node in [replica1, replica2]: + create_table(node, "source", True) + create_table(node, "destination", True) + + replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')") + replica1.query("SYSTEM SYNC REPLICA source") + replica1.query("SYSTEM SYNC REPLICA destination") + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source" + ) + + assert_eq_with_retry( + replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n" + ) + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination", + replica2.query(f"SELECT * FROM destination"), + ) + + cleanup([replica1, replica2]) + + +def test_only_destination_replicated(start_cluster): + create_table(replica1, "source", False) + create_table(replica1, "destination", True) + create_table(replica2, "destination", True) + + replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')") + replica1.query("SYSTEM SYNC REPLICA destination") + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source" + ) + + assert_eq_with_retry( + replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n" + ) + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination", + replica2.query(f"SELECT * FROM destination"), + ) + + cleanup([replica1, replica2]) + + +def test_both_replicated_partitioned_to_unpartitioned(start_cluster): + def create_tables(nodes): + for node in nodes: + source_engine = ( + f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')" + ) + node.query( + """ + CREATE TABLE source(timestamp DateTime) + ENGINE = {engine} + ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp) + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; + """.format( + engine=source_engine, + ) + ) + + destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')" + node.query( + """ + CREATE TABLE destination(timestamp DateTime) + ENGINE = {engine} + ORDER BY tuple() PARTITION BY tuple() + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; + """.format( + engine=destination_engine, + ) + ) + + create_tables([replica1, replica2]) + + replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')") + replica1.query("INSERT INTO source VALUES ('2010-03-03 02:01:01')") + replica1.query("SYSTEM SYNC REPLICA source") + replica1.query("SYSTEM SYNC REPLICA destination") + + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source" + ) + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION ID '20100303' FROM source" + ) + + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination ORDER BY timestamp", + "2010-03-02 02:01:01\n2010-03-03 02:01:01\n", + ) + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination ORDER BY timestamp", + replica2.query(f"SELECT * FROM destination ORDER BY timestamp"), + ) + + cleanup([replica1, replica2]) + + +def test_both_replicated_different_exp_same_id(start_cluster): + def create_tables(nodes): + for node in nodes: + source_engine = ( + f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')" + ) + node.query( + """ + CREATE TABLE source(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8) + ENGINE = {engine} + ORDER BY tuple() PARTITION BY a % 3 + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; + """.format( + engine=source_engine, + ) + ) + + destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')" + node.query( + """ + CREATE TABLE destination(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8) + ENGINE = {engine} + ORDER BY tuple() PARTITION BY a + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; + """.format( + engine=destination_engine, + ) + ) + + create_tables([replica1, replica2]) + + replica1.query( + "INSERT INTO source (a, b, c, extra, sign) VALUES (1, 5, 9, 1000, 1)" + ) + replica1.query( + "INSERT INTO source (a, b, c, extra, sign) VALUES (2, 6, 10, 1000, 1)" + ) + replica1.query("SYSTEM SYNC REPLICA source") + replica1.query("SYSTEM SYNC REPLICA destination") + + replica1.query(f"ALTER TABLE destination ATTACH PARTITION 1 FROM source") + replica1.query(f"ALTER TABLE destination ATTACH PARTITION 2 FROM source") + + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination ORDER BY a", + "1\t5\t9\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n2\t6\t10\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n", + ) + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination ORDER BY a", + replica2.query(f"SELECT * FROM destination ORDER BY a"), + ) + + cleanup([replica1, replica2]) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 20f538cca58c..027c9736c323 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -1087,9 +1087,11 @@ def test_stop_other_host_during_backup(kill): status = node1.query(f"SELECT status FROM system.backups WHERE id='{id}'").strip() if kill: - assert status in ["BACKUP_CREATED", "BACKUP_FAILED"] + expected_statuses = ["BACKUP_CREATED", "BACKUP_FAILED"] else: - assert status == "BACKUP_CREATED" + expected_statuses = ["BACKUP_CREATED", "BACKUP_CANCELLED"] + + assert status in expected_statuses node2.start_clickhouse() diff --git a/tests/queries/0_stateless/02895_forbid_create_inverted_index.reference b/tests/integration/test_broken_projections/__init__.py similarity index 100% rename from tests/queries/0_stateless/02895_forbid_create_inverted_index.reference rename to tests/integration/test_broken_projections/__init__.py diff --git a/tests/integration/test_broken_projections/config.d/backups.xml b/tests/integration/test_broken_projections/config.d/backups.xml new file mode 100644 index 000000000000..4da8edffd67f --- /dev/null +++ b/tests/integration/test_broken_projections/config.d/backups.xml @@ -0,0 +1,13 @@ + + + + + local + /var/lib/clickhouse/disks/backups/ + + + + + backups + + diff --git a/tests/integration/test_broken_projections/test.py b/tests/integration/test_broken_projections/test.py new file mode 100644 index 000000000000..4a4690a5d0a0 --- /dev/null +++ b/tests/integration/test_broken_projections/test.py @@ -0,0 +1,576 @@ +import time +import pytest +import logging +import string +import random +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=["config.d/backups.xml"], + stay_alive=True, + with_zookeeper=True, + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def create_table(node, table, replica, data_prefix="", aggressive_merge=True): + if data_prefix == "": + data_prefix = table + + if aggressive_merge: + vertical_merge_algorithm_min_rows_to_activate = 1 + vertical_merge_algorithm_min_columns_to_activate = 1 + max_parts_to_merge_at_once = 3 + else: + vertical_merge_algorithm_min_rows_to_activate = 100000 + vertical_merge_algorithm_min_columns_to_activate = 100 + max_parts_to_merge_at_once = 3 + + node.query( + f""" + DROP TABLE IF EXISTS {table} SYNC; + CREATE TABLE {table} + ( + a String, + b String, + c Int64, + d Int64, + e Int64, + PROJECTION proj1 + ( + SELECT c ORDER BY d + ), + PROJECTION proj2 + ( + SELECT d ORDER BY c + ) + ) + ENGINE = ReplicatedMergeTree('/test_broken_projection_{data_prefix}/data/', '{replica}') ORDER BY a + SETTINGS min_bytes_for_wide_part = 0, + max_parts_to_merge_at_once={max_parts_to_merge_at_once}, + enable_vertical_merge_algorithm=0, + vertical_merge_algorithm_min_rows_to_activate = {vertical_merge_algorithm_min_rows_to_activate}, + vertical_merge_algorithm_min_columns_to_activate = {vertical_merge_algorithm_min_columns_to_activate}, + compress_primary_key=0; + """ + ) + + +def insert(node, table, offset, size): + node.query( + f""" + INSERT INTO {table} + SELECT number, number, number, number, number%2 FROM numbers({offset}, {size}) + SETTINGS insert_keeper_fault_injection_probability=0.0; + """ + ) + + +def get_parts(node, table): + return ( + node.query( + f""" + SELECT name + FROM system.parts + WHERE table='{table}' AND database=currentDatabase() AND active = 1 + ORDER BY name;" + """ + ) + .strip() + .split("\n") + ) + + +def bash(node, command): + node.exec_in_container(["bash", "-c", command], privileged=True, user="root") + + +def break_projection(node, table, part, parent_part, break_type): + part_path = node.query( + f""" + SELECT path + FROM system.projection_parts + WHERE table='{table}' + AND database=currentDatabase() + AND active=1 + AND part_name='{part}' + AND parent_name='{parent_part}' + ORDER BY modification_time DESC + LIMIT 1; + """ + ).strip() + + node.query( + f"select throwIf(substring('{part_path}', 1, 1) != '/', 'Path is relative: {part_path}')" + ) + + if break_type == "data": + bash(node, f"rm '{part_path}/d.bin'") + bash(node, f"rm '{part_path}/c.bin'") + elif break_type == "metadata": + bash(node, f"rm '{part_path}/columns.txt'") + elif break_type == "part": + bash(node, f"rm -r '{part_path}'") + + +def break_part(node, table, part): + part_path = node.query( + f""" + SELECT path + FROM system.parts + WHERE table='{table}' + AND database=currentDatabase() + AND active=1 + AND part_name='{part}' + ORDER BY modification_time DESC + LIMIT 1; + """ + ).strip() + + node.query( + f"select throwIf(substring('{part_path}', 1, 1) != '/', 'Path is relative: {part_path}')" + ) + bash(node, f"rm '{part_path}/columns.txt'") + + +def get_broken_projections_info(node, table): + return node.query( + f""" + SELECT parent_name, name, errors.name FROM + ( + SELECT parent_name, name, exception_code + FROM system.projection_parts + WHERE table='{table}' + AND database=currentDatabase() + AND is_broken = 1 + ) AS parts_info + INNER JOIN system.errors AS errors + ON parts_info.exception_code = errors.code + ORDER BY parent_name, name + """ + ).strip() + + +def get_projections_info(node, table): + return node.query( + f""" + SELECT parent_name, name, is_broken + FROM system.projection_parts + WHERE table='{table}' + AND active = 1 + AND database=currentDatabase() + ORDER BY parent_name, name + """ + ).strip() + + +def optimize(node, table, final, no_wait): + query = f"OPTIMIZE TABLE {table}" + if final: + query += " FINAL" + if no_wait: + query += " SETTINGS alter_sync=0" + node.query(query) + + +def reattach(node, table): + node.query( + f""" + DETACH TABLE {table}; + ATTACH TABLE {table}; + """ + ) + + +def materialize_projection(node, table, proj): + node.query( + f"ALTER TABLE {table} MATERIALIZE PROJECTION {proj} SETTINGS mutations_sync=2" + ) + + +def check_table_full(node, table): + return node.query( + f"CHECK TABLE {table} SETTINGS check_query_single_value_result = 0;" + ).strip() + + +def random_str(length=6): + alphabet = string.ascii_lowercase + string.digits + return "".join(random.SystemRandom().choice(alphabet) for _ in range(length)) + + +def check(node, table, check_result, expect_broken_part="", expected_error=""): + if expect_broken_part == "proj1": + assert expected_error in node.query_and_get_error( + f"SELECT c FROM '{table}' WHERE d == 12 ORDER BY c" + ) + else: + query_id = node.query( + f"SELECT queryID() FROM (SELECT c FROM '{table}' WHERE d == 12 ORDER BY c)" + ).strip() + node.query("SYSTEM FLUSH LOGS") + res = node.query( + f""" + SELECT query, splitByChar('.', arrayJoin(projections))[-1] + FROM system.query_log + WHERE query_id='{query_id}' AND type='QueryFinish' + """ + ) + if res == "": + res = node.query( + """ + SELECT query_id, query, splitByChar('.', arrayJoin(projections))[-1] + FROM system.query_log ORDER BY query_start_time_microseconds DESC + """ + ) + print(f"LOG: {res}") + assert False + assert "proj1" in res + + if expect_broken_part == "proj2": + assert expected_error in node.query_and_get_error( + f"SELECT d FROM '{table}' WHERE c == 12 ORDER BY d" + ) + else: + query_id = node.query( + f"SELECT queryID() FROM (SELECT d FROM '{table}' WHERE c == 12 ORDER BY d)" + ).strip() + node.query("SYSTEM FLUSH LOGS") + res = node.query( + f""" + SELECT query, splitByChar('.', arrayJoin(projections))[-1] + FROM system.query_log + WHERE query_id='{query_id}' AND type='QueryFinish' + """ + ) + if res == "": + res = node.query( + """ + SELECT query_id, query, splitByChar('.', arrayJoin(projections))[-1] + FROM system.query_log ORDER BY query_start_time_microseconds DESC + """ + ) + print(f"LOG: {res}") + assert False + assert "proj2" in res + + assert check_result == int(node.query(f"CHECK TABLE {table}")) + + +def test_broken_ignored(cluster): + node = cluster.instances["node"] + + table_name = "test1" + create_table(node, table_name, 1) + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + # Break metadata (columns.txt) file of projection 'proj1' + break_projection(node, table_name, "proj1", "all_2_2_0", "metadata") + + # Do select and after "check table" query. + # Select works because it does not read columns.txt. + # But expect check table result as 0. + check(node, table_name, 0) + + # Projection 'proj1' from part all_2_2_0 will now appear in broken parts info + # because it was marked broken during "check table" query. + assert "all_2_2_0\tproj1\tFILE_DOESNT_EXIST" in get_broken_projections_info( + node, table_name + ) + + # Check table query will also show a list of parts which have broken projections. + assert "all_2_2_0" in check_table_full(node, table_name) + + # Break data file of projection 'proj2' for part all_2_2_0 + break_projection(node, table_name, "proj2", "all_2_2_0", "data") + + # It will not yet appear in broken projections info. + assert "proj2" not in get_broken_projections_info(node, table_name) + + # Select now fails with error "File doesn't exist" + check(node, table_name, 0, "proj2", "FILE_DOESNT_EXIST") + + # Projection 'proj2' from part all_2_2_0 will now appear in broken parts info. + assert "all_2_2_0\tproj2\tNO_FILE_IN_DATA_PART" in get_broken_projections_info( + node, table_name + ) + + # Second select works, because projection is now marked as broken. + check(node, table_name, 0) + + # Break data file of projection 'proj2' for part all_3_3_0 + break_projection(node, table_name, "proj2", "all_3_3_0", "data") + + # It will not yet appear in broken projections info. + assert "all_3_3_0" not in get_broken_projections_info(node, table_name) + + insert(node, table_name, 20, 5) + insert(node, table_name, 25, 5) + + # Part all_3_3_0 has 'proj' and 'proj2' projections, but 'proj2' is broken and server does NOT know it yet. + # Parts all_4_4_0 and all_5_5_0 have both non-broken projections. + # So a merge will be create for future part all_3_5_1. + # During merge it will fail to read from 'proj2' of part all_3_3_0 and proj2 will be marked broken. + # Merge will be retried and on second attempt it will succeed. + # The result part all_3_5_1 will have only 1 projection - 'proj', because + # it will skip 'proj2' as it will see that one part does not have it anymore in the set of valid projections. + optimize(node, table_name, 0, 1) + time.sleep(5) + + # table_uuid=node.query(f"SELECT uuid FROM system.tables WHERE table='{table_name}' and database=currentDatabase()").strip() + # assert 0 < int( + # node.query( + # f""" + # SYSTEM FLUSH LOGS; + # SELECT count() FROM system.text_log + # WHERE level='Error' + # AND logger_name='MergeTreeBackgroundExecutor' + # AND message like 'Exception while executing background task %{table_uuid}:all_3_5_1%%Cannot open file%proj2.proj/c.bin%' + # """) + # ) + + assert "all_3_3_0" in get_broken_projections_info(node, table_name) + check(node, table_name, 0) + + +def test_materialize_broken_projection(cluster): + node = cluster.instances["node"] + + table_name = "test2" + create_table(node, table_name, 1) + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + break_projection(node, table_name, "proj1", "all_1_1_0", "metadata") + reattach(node, table_name) + + assert "all_1_1_0\tproj1\tNO_FILE_IN_DATA_PART" in get_broken_projections_info( + node, table_name + ) + assert "Part all_1_1_0 has a broken projection proj1" in check_table_full( + node, table_name + ) + + break_projection(node, table_name, "proj2", "all_1_1_0", "data") + reattach(node, table_name) + + assert "all_1_1_0\tproj2\tFILE_DOESNT_EXIST" in get_broken_projections_info( + node, table_name + ) + assert "Part all_1_1_0 has a broken projection proj2" in check_table_full( + node, table_name + ) + + materialize_projection(node, table_name, "proj1") + + assert "has a broken projection" not in check_table_full(node, table_name) + + +def test_broken_ignored_replicated(cluster): + node = cluster.instances["node"] + + table_name = "test3" + table_name2 = "test3_replica" + create_table(node, table_name, 1) + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + check(node, table_name, 1) + + create_table(node, table_name2, 2, table_name) + check(node, table_name2, 1) + + break_projection(node, table_name, "proj1", "all_0_0_0", "data") + assert "Part all_0_0_0 has a broken projection proj1" in check_table_full( + node, table_name + ) + + break_part(node, table_name, "all_0_0_0") + node.query(f"SYSTEM SYNC REPLICA {table_name}") + assert "has a broken projection" not in check_table_full(node, table_name) + + +def get_random_string(string_length=8): + alphabet = string.ascii_letters + string.digits + return "".join((random.choice(alphabet) for _ in range(string_length))) + + +def test_broken_projections_in_backups_1(cluster): + node = cluster.instances["node"] + + table_name = "test4" + create_table(node, table_name, 1, aggressive_merge=False, data_prefix=table_name) + + node.query("SYSTEM STOP MERGES") + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + check(node, table_name, 1) + + break_projection(node, table_name, "proj1", "all_2_2_0", "data") + check(node, table_name, 0, "proj1", "FILE_DOESNT_EXIST") + + assert "all_2_2_0\tproj1\tNO_FILE_IN_DATA_PART" in get_broken_projections_info( + node, table_name + ) + + backup_name = f"b1-{get_random_string()}" + assert "BACKUP_CREATED" in node.query( + f""" + set backup_restore_keeper_fault_injection_probability=0.0; + backup table {table_name} to Disk('backups', '{backup_name}') settings check_projection_parts=false; + """ + ) + + assert "RESTORED" in node.query( + f""" + drop table {table_name} sync; + set backup_restore_keeper_fault_injection_probability=0.0; + restore table {table_name} from Disk('backups', '{backup_name}'); + """ + ) + + node.query("SYSTEM STOP MERGES") + + check(node, table_name, 1) + assert "" == get_broken_projections_info(node, table_name) + + +def test_broken_projections_in_backups_2(cluster): + node = cluster.instances["node"] + + table_name = "test5" + create_table(node, table_name, 1, aggressive_merge=False, data_prefix=table_name) + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + check(node, table_name, 1) + break_projection(node, table_name, "proj2", "all_2_2_0", "part") + check(node, table_name, 0, "proj2", "ErrnoException") + + assert "all_2_2_0\tproj2\tFILE_DOESNT_EXIST" == get_broken_projections_info( + node, table_name + ) + + assert "FILE_DOESNT_EXIST" in node.query_and_get_error( + f""" + set backup_restore_keeper_fault_injection_probability=0.0; + backup table {table_name} to Disk('backups', 'b2') + """ + ) + + materialize_projection(node, table_name, "proj2") + check(node, table_name, 1) + + backup_name = f"b3-{get_random_string()}" + assert "BACKUP_CREATED" in node.query( + f""" + set backup_restore_keeper_fault_injection_probability=0.0; + backup table {table_name} to Disk('backups', '{backup_name}') settings check_projection_parts=false; + """ + ) + + assert "RESTORED" in node.query( + f""" + drop table {table_name} sync; + set backup_restore_keeper_fault_injection_probability=0.0; + restore table {table_name} from Disk('backups', '{backup_name}'); + """ + ) + check(node, table_name, 1) + + +def test_broken_projections_in_backups_3(cluster): + node = cluster.instances["node"] + + table_name = "test6" + create_table(node, table_name, 1, aggressive_merge=False, data_prefix=table_name) + + node.query("SYSTEM STOP MERGES") + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + check(node, table_name, 1) + + break_projection(node, table_name, "proj1", "all_1_1_0", "part") + assert "Part all_1_1_0 has a broken projection proj1" in check_table_full( + node, table_name + ) + assert "all_1_1_0\tproj1\tFILE_DOESNT_EXIST" == get_broken_projections_info( + node, table_name + ) + + backup_name = f"b4-{get_random_string()}" + assert "BACKUP_CREATED" in node.query( + f""" + set backup_restore_keeper_fault_injection_probability=0.0; + backup table {table_name} to Disk('backups', '{backup_name}') settings check_projection_parts=false, allow_backup_broken_projections=true; + """ + ) + + assert "RESTORED" in node.query( + f""" + drop table {table_name} sync; + set backup_restore_keeper_fault_injection_probability=0.0; + restore table {table_name} from Disk('backups', '{backup_name}'); + """ + ) + + check(node, table_name, 0) + assert "all_1_1_0\tproj1\tNO_FILE_IN_DATA_PART" == get_broken_projections_info( + node, table_name + ) diff --git a/tests/integration/test_dictionaries_update_and_reload/test.py b/tests/integration/test_dictionaries_update_and_reload/test.py index 3d96d0b8dd47..648ea847afba 100644 --- a/tests/integration/test_dictionaries_update_and_reload/test.py +++ b/tests/integration/test_dictionaries_update_and_reload/test.py @@ -281,7 +281,7 @@ def test_reload_after_fail_in_cache_dictionary(started_cluster): query_and_get_error = instance.query_and_get_error # Can't get a value from the cache dictionary because the source (table `test.xypairs`) doesn't respond. - expected_error = "Table test.xypairs does not exist" + expected_error = "UNKNOWN_TABLE" update_error = "Could not update cache dictionary cache_xypairs now" assert expected_error in query_and_get_error( "SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(1))" diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index a71fdeff302b..4b175d188efc 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -172,7 +172,7 @@ def test_incorrect_usage(cluster): assert "Table is read-only" in result result = node2.query_and_get_error("OPTIMIZE TABLE test0 FINAL") - assert "Only read-only operations are supported" in result + assert "Table is in readonly mode due to static storage" in result node2.query("DROP TABLE test0 SYNC") diff --git a/tests/integration/test_parallel_replicas_invisible_parts/test.py b/tests/integration/test_parallel_replicas_invisible_parts/test.py index 1a95d2a468db..cab3fb46fe92 100644 --- a/tests/integration/test_parallel_replicas_invisible_parts/test.py +++ b/tests/integration/test_parallel_replicas_invisible_parts/test.py @@ -56,6 +56,8 @@ def _create_tables(table_name, table_size, index_granularity): """ ) + nodes[0].query(f"SYSTEM SYNC REPLICA ON CLUSTER {cluster_name} {table_name}") + nodes[0].query(f"SYSTEM STOP FETCHES ON CLUSTER {cluster_name} {table_name}") for node in nodes: diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index cec14b0af73a..bf64b57a7bff 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -40,7 +40,7 @@ def system_quota_usage(canonical): canonical_tsv = TSV(canonical) query = ( "SELECT quota_name, quota_key, duration, queries, max_queries, query_selects, max_query_selects, query_inserts, max_query_inserts, errors, max_errors, result_rows, max_result_rows," - "result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time " + "result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time, max_failed_sequential_authentications " "FROM system.quota_usage ORDER BY duration" ) r = TSV(instance.query(query)) @@ -52,7 +52,7 @@ def system_quotas_usage(canonical): canonical_tsv = TSV(canonical) query = ( "SELECT quota_name, quota_key, is_current, duration, queries, max_queries, query_selects, max_query_selects, query_inserts, max_query_inserts, errors, max_errors, result_rows, max_result_rows, " - "result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time " + "result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time, max_failed_sequential_authentications " "FROM system.quotas_usage ORDER BY quota_name, quota_key, duration" ) r = TSV(instance.query(query)) @@ -130,6 +130,7 @@ def test_quota_from_users_xml(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -156,6 +157,7 @@ def test_quota_from_users_xml(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -183,6 +185,7 @@ def test_quota_from_users_xml(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -211,6 +214,7 @@ def test_quota_from_users_xml(): 200, "\\N", "\\N", + "\\N", ] ] ) @@ -239,6 +243,7 @@ def test_quota_from_users_xml(): 400, "\\N", "\\N", + "\\N", ] ] ) @@ -285,6 +290,7 @@ def test_simpliest_quota(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -313,6 +319,7 @@ def test_simpliest_quota(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -351,6 +358,7 @@ def test_tracking_quota(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -377,6 +385,7 @@ def test_tracking_quota(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -405,6 +414,7 @@ def test_tracking_quota(): 200, "\\N", "\\N", + "\\N", ] ] ) @@ -433,6 +443,7 @@ def test_tracking_quota(): 400, "\\N", "\\N", + "\\N", ] ] ) @@ -456,7 +467,7 @@ def test_exceed_quota(): ] ) system_quota_limits( - [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N", "\\N"]] + [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N", "\\N", "1"]] ) system_quota_usage( [ @@ -481,6 +492,7 @@ def test_exceed_quota(): 0, "\\N", "\\N", + "1", ] ] ) @@ -512,6 +524,7 @@ def test_exceed_quota(): 0, "\\N", "\\N", + "1", ] ] ) @@ -548,6 +561,7 @@ def test_exceed_quota(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -574,6 +588,7 @@ def test_exceed_quota(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -602,6 +617,7 @@ def test_exceed_quota(): 200, "\\N", "\\N", + "\\N", ] ] ) @@ -638,6 +654,7 @@ def test_add_remove_interval(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -664,6 +681,7 @@ def test_add_remove_interval(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -700,6 +718,7 @@ def test_add_remove_interval(): "\\N", "\\N", "\\N", + "\\N", ], [ "myQuota", @@ -715,6 +734,7 @@ def test_add_remove_interval(): 20000, 120, "\\N", + "\\N", ], ] ) @@ -741,6 +761,7 @@ def test_add_remove_interval(): 0, "\\N", "\\N", + "\\N", ], [ "myQuota", @@ -763,6 +784,7 @@ def test_add_remove_interval(): 0, 20000, 120, + "\\N", ], ] ) @@ -791,6 +813,7 @@ def test_add_remove_interval(): 200, "\\N", "\\N", + "\\N", ], [ "myQuota", @@ -813,6 +836,7 @@ def test_add_remove_interval(): 200, 20000, 120, + "\\N", ], ] ) @@ -849,6 +873,7 @@ def test_add_remove_interval(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -875,6 +900,7 @@ def test_add_remove_interval(): 200, "\\N", "\\N", + "\\N", ] ] ) @@ -903,6 +929,7 @@ def test_add_remove_interval(): 400, "\\N", "\\N", + "\\N", ] ] ) @@ -947,6 +974,7 @@ def test_add_remove_interval(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -975,6 +1003,7 @@ def test_add_remove_interval(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -1011,6 +1040,7 @@ def test_add_remove_interval(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -1037,6 +1067,7 @@ def test_add_remove_interval(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -1073,6 +1104,7 @@ def test_add_remove_quota(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -1100,6 +1132,7 @@ def test_add_remove_quota(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -1146,6 +1179,7 @@ def test_add_remove_quota(): "\\N", "\\N", "\\N", + "\\N", ], [ "myQuota2", @@ -1161,6 +1195,7 @@ def test_add_remove_quota(): 400000, 60, "\\N", + "3", ], [ "myQuota2", @@ -1176,6 +1211,7 @@ def test_add_remove_quota(): "\\N", 1800, "\\N", + "\\N", ], ] ) @@ -1203,6 +1239,7 @@ def test_add_remove_quota(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -1239,6 +1276,7 @@ def test_add_remove_quota(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -1266,6 +1304,7 @@ def test_add_remove_quota(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -1308,6 +1347,7 @@ def test_add_remove_quota(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -1335,6 +1375,7 @@ def test_add_remove_quota(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -1371,6 +1412,7 @@ def test_reload_users_xml_by_timer(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -1397,7 +1439,7 @@ def test_reload_users_xml_by_timer(): assert_eq_with_retry( instance, "SELECT * FROM system.quota_limits", - [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N", "\\N"]], + [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N", "\\N", "1"]], ) @@ -1447,15 +1489,15 @@ def test_dcl_introspection(): ) assert ( instance.query("SHOW CREATE QUOTA myQuota2") - == "CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, FOR INTERVAL 1 month MAX execution_time = 1800\n" + == "CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, failed_sequential_authentications = 3, FOR INTERVAL 1 month MAX execution_time = 1800\n" ) assert ( instance.query("SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" - "CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, FOR INTERVAL 1 month MAX execution_time = 1800\n" + "CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, failed_sequential_authentications = 3, FOR INTERVAL 1 month MAX execution_time = 1800\n" ) assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) @@ -1478,13 +1520,13 @@ def test_dcl_management(): == "CREATE QUOTA qA FOR INTERVAL 5 quarter MAX queries = 123 TO default\n" ) assert re.match( - "qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) instance.query("SELECT * from test_table") assert re.match( - "qA\\t\\t.*\\t39446190\\t1\\t123\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t39446190\\t1\\t123\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) @@ -1496,15 +1538,15 @@ def test_dcl_management(): == "CREATE QUOTA qA FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default\n" ) assert re.match( - "qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\\t0\\t\\\\N\n" - "qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", + "qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\\t0\\t\\\\N\\t0\\t\\\\N\n" + "qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\n", instance.query("SHOW QUOTA"), ) instance.query("SELECT * from test_table") assert re.match( - "qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\\t0\\t\\\\N\n" - "qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", + "qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\\t0\\t\\\\N\\t0\\t\\\\N\n" + "qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) @@ -1518,7 +1560,7 @@ def test_dcl_management(): instance.query("SELECT * from test_table") assert re.match( - "qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", + "qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) @@ -1528,13 +1570,13 @@ def test_dcl_management(): == "CREATE QUOTA qB FOR RANDOMIZED INTERVAL 16 month TRACKING ONLY TO default\n" ) assert re.match( - "qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) instance.query("SELECT * from test_table") assert re.match( - "qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n", + "qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\n", instance.query("SHOW QUOTA"), ) @@ -1579,6 +1621,7 @@ def test_query_inserts(): "\\N", "\\N", "\\N", + "\\N", ] ] ) @@ -1605,6 +1648,7 @@ def test_query_inserts(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -1632,6 +1676,7 @@ def test_query_inserts(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -1663,6 +1708,7 @@ def test_query_inserts(): 0, "\\N", "\\N", + "\\N", ] ] ) @@ -1691,6 +1737,7 @@ def test_query_inserts(): 0, "\\N", "\\N", + "\\N", ] ] ) diff --git a/tests/integration/test_quota/tiny_limits.xml b/tests/integration/test_quota/tiny_limits.xml index 5821935bb6dd..b5014674f98b 100644 --- a/tests/integration/test_quota/tiny_limits.xml +++ b/tests/integration/test_quota/tiny_limits.xml @@ -12,6 +12,7 @@ 1 1 1 + 1 diff --git a/tests/integration/test_quota/two_quotas.xml b/tests/integration/test_quota/two_quotas.xml index 13872286dc60..e3b91b1bf434 100644 --- a/tests/integration/test_quota/two_quotas.xml +++ b/tests/integration/test_quota/two_quotas.xml @@ -18,6 +18,7 @@ 400000 400000 60 + 3 2629746 diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 9846e88a3c0e..b47f86a843d5 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -1398,3 +1398,47 @@ def restart_verify_not_readonly(): main_node.query("DROP DATABASE modify_comment_db SYNC") dummy_node.query("DROP DATABASE modify_comment_db SYNC") + + +def test_table_metadata_corruption(started_cluster): + main_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") + dummy_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") + + main_node.query( + "CREATE DATABASE table_metadata_corruption ENGINE = Replicated('/clickhouse/databases/table_metadata_corruption', 'shard1', 'replica1');" + ) + dummy_node.query( + "CREATE DATABASE table_metadata_corruption ENGINE = Replicated('/clickhouse/databases/table_metadata_corruption', 'shard1', 'replica2');" + ) + + create_some_tables("table_metadata_corruption") + + main_node.query("SYSTEM SYNC DATABASE REPLICA table_metadata_corruption") + dummy_node.query("SYSTEM SYNC DATABASE REPLICA table_metadata_corruption") + + # Server should handle this by throwing an exception during table loading, which should lead to server shutdown + corrupt = "sed --follow-symlinks -i 's/ReplicatedMergeTree/CorruptedMergeTree/' /var/lib/clickhouse/metadata/table_metadata_corruption/rmt1.sql" + + print(f"Corrupting metadata using `{corrupt}`") + dummy_node.stop_clickhouse(kill=True) + dummy_node.exec_in_container(["bash", "-c", corrupt]) + + query = ( + "SELECT name, uuid, create_table_query FROM system.tables WHERE database='table_metadata_corruption' AND name NOT LIKE '.inner_id.%' " + "ORDER BY name SETTINGS show_table_uuid_in_table_create_query_if_not_nil=1" + ) + expected = main_node.query(query) + + # We expect clickhouse server to shutdown without LOGICAL_ERRORs or deadlocks + dummy_node.start_clickhouse(expected_to_fail=True) + assert not dummy_node.contains_in_log("LOGICAL_ERROR") + + fix_corrupt = "sed --follow-symlinks -i 's/CorruptedMergeTree/ReplicatedMergeTree/' /var/lib/clickhouse/metadata/table_metadata_corruption/rmt1.sql" + print(f"Fix corrupted metadata using `{fix_corrupt}`") + dummy_node.exec_in_container(["bash", "-c", fix_corrupt]) + + dummy_node.start_clickhouse() + assert_eq_with_retry(dummy_node, query, expected) + + main_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") + dummy_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 621d2b89fc5e..25f0b58e0f5f 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -26,8 +26,14 @@ from datetime import datetime from pyspark.sql.functions import monotonically_increasing_id, row_number from pyspark.sql.window import Window +from minio.deleteobjects import DeleteObject -from helpers.s3_tools import prepare_s3_bucket, upload_directory, get_file_contents +from helpers.s3_tools import ( + prepare_s3_bucket, + upload_directory, + get_file_contents, + list_s3_objects, +) SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -55,6 +61,7 @@ def started_cluster(): main_configs=["configs/config.d/named_collections.xml"], user_configs=["configs/users.d/users.xml"], with_minio=True, + stay_alive=True, ) logging.info("Starting cluster...") @@ -111,12 +118,12 @@ def get_delta_metadata(delta_metadata_file): return combined_json -def create_delta_table(node, table_name): +def create_delta_table(node, table_name, bucket="root"): node.query( f""" DROP TABLE IF EXISTS {table_name}; CREATE TABLE {table_name} - ENGINE=DeltaLake(s3, filename = '{table_name}/')""" + ENGINE=DeltaLake(s3, filename = '{table_name}/', url = 'http://minio1:9001/{bucket}/')""" ) @@ -401,3 +408,106 @@ def test_types(started_cluster): ["e", "Nullable(Bool)"], ] ) + + +def test_restart_broken(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = "broken" + TABLE_NAME = "test_restart_broken" + + if not minio_client.bucket_exists(bucket): + minio_client.make_bucket(bucket) + + parquet_data_path = create_initial_data_file( + started_cluster, + instance, + "SELECT number, toString(number) FROM numbers(100)", + TABLE_NAME, + ) + + write_delta_from_file(spark, parquet_data_path, f"/{TABLE_NAME}") + upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + create_delta_table(instance, TABLE_NAME, bucket=bucket) + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + s3_objects = list_s3_objects(minio_client, bucket, prefix="") + assert ( + len( + list( + minio_client.remove_objects( + bucket, + [DeleteObject(obj) for obj in s3_objects], + ) + ) + ) + == 0 + ) + minio_client.remove_bucket(bucket) + + instance.restart_clickhouse() + + assert "NoSuchBucket" in instance.query_and_get_error( + f"SELECT count() FROM {TABLE_NAME}" + ) + + minio_client.make_bucket(bucket) + + upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + +def test_restart_broken_table_function(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = "broken2" + TABLE_NAME = "test_restart_broken_table_function" + + if not minio_client.bucket_exists(bucket): + minio_client.make_bucket(bucket) + + parquet_data_path = create_initial_data_file( + started_cluster, + instance, + "SELECT number, toString(number) FROM numbers(100)", + TABLE_NAME, + ) + + write_delta_from_file(spark, parquet_data_path, f"/{TABLE_NAME}") + upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} + AS deltaLake(s3, filename = '{TABLE_NAME}/', url = 'http://minio1:9001/{bucket}/')""" + ) + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + s3_objects = list_s3_objects(minio_client, bucket, prefix="") + assert ( + len( + list( + minio_client.remove_objects( + bucket, + [DeleteObject(obj) for obj in s3_objects], + ) + ) + ) + == 0 + ) + minio_client.remove_bucket(bucket) + + instance.restart_clickhouse() + + assert "NoSuchBucket" in instance.query_and_get_error( + f"SELECT count() FROM {TABLE_NAME}" + ) + + minio_client.make_bucket(bucket) + + upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index 30962dc619c5..d9dee0541b00 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -27,8 +27,14 @@ from pyspark.sql.functions import monotonically_increasing_id, row_number from pyspark.sql.window import Window from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2 +from minio.deleteobjects import DeleteObject -from helpers.s3_tools import prepare_s3_bucket, upload_directory, get_file_contents +from helpers.s3_tools import ( + prepare_s3_bucket, + upload_directory, + get_file_contents, + list_s3_objects, +) SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -61,6 +67,7 @@ def started_cluster(): main_configs=["configs/config.d/named_collections.xml"], user_configs=["configs/users.d/users.xml"], with_minio=True, + stay_alive=True, ) logging.info("Starting cluster...") @@ -135,12 +142,12 @@ def generate_data(spark, start, end): return df -def create_iceberg_table(node, table_name, format="Parquet"): +def create_iceberg_table(node, table_name, format="Parquet", bucket="root"): node.query( f""" DROP TABLE IF EXISTS {table_name}; CREATE TABLE {table_name} - ENGINE=Iceberg(s3, filename = 'iceberg_data/default/{table_name}/', format={format})""" + ENGINE=Iceberg(s3, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')""" ) @@ -558,3 +565,56 @@ def test_metadata_file_format_with_uuid(started_cluster, format_version): create_iceberg_table(instance, TABLE_NAME) assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 500 + + +def test_restart_broken(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = "broken2" + TABLE_NAME = "test_restart_broken_table_function" + + if not minio_client.bucket_exists(bucket): + minio_client.make_bucket(bucket) + + parquet_data_path = create_initial_data_file( + started_cluster, + instance, + "SELECT number, toString(number) FROM numbers(100)", + TABLE_NAME, + ) + + write_iceberg_from_file(spark, parquet_data_path, TABLE_NAME, format_version="1") + files = upload_directory( + minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" + ) + create_iceberg_table(instance, TABLE_NAME, bucket=bucket) + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + s3_objects = list_s3_objects(minio_client, bucket, prefix="") + assert ( + len( + list( + minio_client.remove_objects( + bucket, + [DeleteObject(obj) for obj in s3_objects], + ) + ) + ) + == 0 + ) + minio_client.remove_bucket(bucket) + + instance.restart_clickhouse() + + assert "NoSuchBucket" in instance.query_and_get_error( + f"SELECT count() FROM {TABLE_NAME}" + ) + + minio_client.make_bucket(bucket) + + files = upload_directory( + minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" + ) + + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 6924f2e15089..b778e9fb5569 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -3538,3 +3538,14 @@ def test_rabbitmq_handle_error_mode_stream(rabbitmq_cluster): expected = "".join(sorted(expected)) assert broken_messages == expected + + +def test_attach_broken_table(rabbitmq_cluster): + instance.query( + "ATTACH TABLE rabbit_queue UUID '2d1cdf1a-f060-4a61-a7c9-5b59e59992c6' (`payload` String) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'nonexisting:5671', rabbitmq_format = 'JSONEachRow', rabbitmq_username = 'test', rabbitmq_password = 'test'" + ) + + error = instance.query_and_get_error("SELECT * FROM rabbit_queue") + assert "CANNOT_CONNECT_RABBITMQ" in error + error = instance.query_and_get_error("INSERT INTO rabbit_queue VALUES ('test')") + assert "CANNOT_CONNECT_RABBITMQ" in error diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 7d40060fec6c..810c4f29e9dd 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -89,6 +89,7 @@ def started_cluster(): "configs/zookeeper.xml", "configs/s3queue_log.xml", ], + stay_alive=True, ) cluster.add_instance( "instance2", @@ -98,6 +99,7 @@ def started_cluster(): main_configs=[ "configs/s3queue_log.xml", ], + stay_alive=True, ) logging.info("Starting cluster...") @@ -165,6 +167,7 @@ def create_table( file_format="CSV", auth=DEFAULT_AUTH, bucket=None, + expect_error=False, ): auth_params = ",".join(auth) bucket = started_cluster.minio_bucket if bucket is None else bucket @@ -184,6 +187,10 @@ def create_table( ENGINE = S3Queue('{url}', {auth_params}, {file_format}) SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))} """ + + if expect_error: + return node.query_and_get_error(create_query) + node.query(create_query) @@ -533,10 +540,7 @@ def test_multiple_tables_meta_mismatch(started_cluster): }, ) except QueryRuntimeException as e: - assert ( - "Metadata with the same `s3queue_zookeeper_path` was already created but with different settings" - in str(e) - ) + assert "Existing table metadata in ZooKeeper differs in engine mode" in str(e) failed = True assert failed is True @@ -960,3 +964,425 @@ def wait_all_processed(files_num): s3_clients_after = get_created_s3_clients_count() assert s3_clients_before == s3_clients_after + + +@pytest.mark.parametrize("mode", ["unordered", "ordered"]) +def test_processing_threads(started_cluster, mode): + node = started_cluster.instances["instance"] + table_name = f"processing_threads_{mode}" + dst_table_name = f"{table_name}_dst" + keeper_path = f"/clickhouse/test_{table_name}" + files_path = f"{table_name}_data" + files_to_generate = 300 + processing_threads = 32 + + create_table( + started_cluster, + node, + table_name, + mode, + files_path, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_processing_threads_num": processing_threads, + }, + ) + create_mv(node, table_name, dst_table_name) + + total_values = generate_random_files( + started_cluster, files_path, files_to_generate, row_num=1 + ) + + def get_count(table_name): + return int(run_query(node, f"SELECT count() FROM {table_name}")) + + for _ in range(100): + if (get_count(f"{dst_table_name}")) == files_to_generate: + break + time.sleep(1) + + assert get_count(dst_table_name) == files_to_generate + + res = [ + list(map(int, l.split())) + for l in node.query( + f"SELECT column1, column2, column3 FROM {dst_table_name}" + ).splitlines() + ] + assert {tuple(v) for v in res} == set([tuple(i) for i in total_values]) + + if mode == "ordered": + zk = started_cluster.get_kazoo_client("zoo1") + processed_nodes = zk.get_children(f"{keeper_path}/processed/") + assert len(processed_nodes) == processing_threads + + +@pytest.mark.parametrize( + "mode, processing_threads", + [ + pytest.param("unordered", 1), + pytest.param("unordered", 8), + pytest.param("ordered", 1), + pytest.param("ordered", 8), + ], +) +def test_shards(started_cluster, mode, processing_threads): + node = started_cluster.instances["instance"] + table_name = f"test_shards_{mode}_{processing_threads}" + dst_table_name = f"{table_name}_dst" + keeper_path = f"/clickhouse/test_{table_name}" + files_path = f"{table_name}_data" + files_to_generate = 300 + shards_num = 3 + + for i in range(shards_num): + table = f"{table_name}_{i + 1}" + dst_table = f"{dst_table_name}_{i + 1}" + create_table( + started_cluster, + node, + table, + mode, + files_path, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_processing_threads_num": processing_threads, + "s3queue_total_shards_num": shards_num, + }, + ) + create_mv(node, table, dst_table) + + total_values = generate_random_files( + started_cluster, files_path, files_to_generate, row_num=1 + ) + + def get_count(table_name): + return int(run_query(node, f"SELECT count() FROM {table_name}")) + + for _ in range(100): + if ( + get_count(f"{dst_table_name}_1") + + get_count(f"{dst_table_name}_2") + + get_count(f"{dst_table_name}_3") + ) == files_to_generate: + break + time.sleep(1) + + if ( + get_count(f"{dst_table_name}_1") + + get_count(f"{dst_table_name}_2") + + get_count(f"{dst_table_name}_3") + ) != files_to_generate: + info = node.query( + f"SELECT * FROM system.s3queue WHERE zookeeper_path like '%{table_name}' ORDER BY file_name FORMAT Vertical" + ) + logging.debug(info) + assert False + + res1 = [ + list(map(int, l.split())) + for l in node.query( + f"SELECT column1, column2, column3 FROM {dst_table_name}_1" + ).splitlines() + ] + res2 = [ + list(map(int, l.split())) + for l in node.query( + f"SELECT column1, column2, column3 FROM {dst_table_name}_2" + ).splitlines() + ] + res3 = [ + list(map(int, l.split())) + for l in node.query( + f"SELECT column1, column2, column3 FROM {dst_table_name}_3" + ).splitlines() + ] + assert {tuple(v) for v in res1 + res2 + res3} == set( + [tuple(i) for i in total_values] + ) + + # Checking that all files were processed only once + time.sleep(10) + assert ( + get_count(f"{dst_table_name}_1") + + get_count(f"{dst_table_name}_2") + + get_count(f"{dst_table_name}_3") + ) == files_to_generate + + if mode == "ordered": + zk = started_cluster.get_kazoo_client("zoo1") + processed_nodes = zk.get_children(f"{keeper_path}/processed/") + assert len(processed_nodes) == shards_num * processing_threads + shard_nodes = zk.get_children(f"{keeper_path}/shards/") + assert len(shard_nodes) == shards_num + + +@pytest.mark.parametrize( + "mode, processing_threads", + [ + pytest.param("unordered", 1), + pytest.param("unordered", 8), + pytest.param("ordered", 1), + pytest.param("ordered", 8), + ], +) +def test_shards_distributed(started_cluster, mode, processing_threads): + node = started_cluster.instances["instance"] + node_2 = started_cluster.instances["instance2"] + table_name = f"test_shards_distributed_{mode}_{processing_threads}" + dst_table_name = f"{table_name}_dst" + keeper_path = f"/clickhouse/test_{table_name}" + files_path = f"{table_name}_data" + files_to_generate = 300 + row_num = 50 + total_rows = row_num * files_to_generate + shards_num = 2 + + i = 0 + for instance in [node, node_2]: + create_table( + started_cluster, + instance, + table_name, + mode, + files_path, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_processing_threads_num": processing_threads, + "s3queue_total_shards_num": shards_num, + }, + ) + i += 1 + + for instance in [node, node_2]: + create_mv(instance, table_name, dst_table_name) + + total_values = generate_random_files( + started_cluster, files_path, files_to_generate, row_num=row_num + ) + + def get_count(node, table_name): + return int(run_query(node, f"SELECT count() FROM {table_name}")) + + for _ in range(150): + if ( + get_count(node, dst_table_name) + get_count(node_2, dst_table_name) + ) == total_rows: + break + time.sleep(1) + + if ( + get_count(node, dst_table_name) + get_count(node_2, dst_table_name) + ) != total_rows: + info = node.query( + f"SELECT * FROM system.s3queue WHERE zookeeper_path like '%{table_name}' ORDER BY file_name FORMAT Vertical" + ) + logging.debug(info) + assert False + + get_query = f"SELECT column1, column2, column3 FROM {dst_table_name}" + res1 = [list(map(int, l.split())) for l in run_query(node, get_query).splitlines()] + res2 = [ + list(map(int, l.split())) for l in run_query(node_2, get_query).splitlines() + ] + + assert len(res1) + len(res2) == total_rows + + # Checking that all engines have made progress + assert len(res1) > 0 + assert len(res2) > 0 + + assert {tuple(v) for v in res1 + res2} == set([tuple(i) for i in total_values]) + + # Checking that all files were processed only once + time.sleep(10) + assert ( + get_count(node, dst_table_name) + get_count(node_2, dst_table_name) + ) == total_rows + + if mode == "ordered": + zk = started_cluster.get_kazoo_client("zoo1") + processed_nodes = zk.get_children(f"{keeper_path}/processed/") + assert len(processed_nodes) == shards_num * processing_threads + shard_nodes = zk.get_children(f"{keeper_path}/shards/") + assert len(shard_nodes) == shards_num + + node.restart_clickhouse() + time.sleep(10) + assert ( + get_count(node, dst_table_name) + get_count(node_2, dst_table_name) + ) == total_rows + + +def test_settings_check(started_cluster): + node = started_cluster.instances["instance"] + node_2 = started_cluster.instances["instance2"] + table_name = f"test_settings_check" + dst_table_name = f"{table_name}_dst" + keeper_path = f"/clickhouse/test_{table_name}" + files_path = f"{table_name}_data" + mode = "ordered" + + create_table( + started_cluster, + node, + table_name, + mode, + files_path, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_processing_threads_num": 5, + "s3queue_total_shards_num": 2, + }, + ) + + assert ( + "Existing table metadata in ZooKeeper differs in s3queue_total_shards_num setting. Stored in ZooKeeper: 2, local: 3" + in create_table( + started_cluster, + node_2, + table_name, + mode, + files_path, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_processing_threads_num": 5, + "s3queue_total_shards_num": 3, + }, + expect_error=True, + ) + ) + + assert ( + "Existing table metadata in ZooKeeper differs in s3queue_processing_threads_num setting. Stored in ZooKeeper: 5, local: 2" + in create_table( + started_cluster, + node_2, + table_name, + mode, + files_path, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_processing_threads_num": 2, + "s3queue_total_shards_num": 2, + }, + expect_error=True, + ) + ) + + assert "s3queue_current_shard_num = 0" in node.query( + f"SHOW CREATE TABLE {table_name}" + ) + + node.restart_clickhouse() + + assert "s3queue_current_shard_num = 0" in node.query( + f"SHOW CREATE TABLE {table_name}" + ) + + node.query(f"DROP TABLE {table_name} SYNC") + + +@pytest.mark.parametrize("processing_threads", [1, 5]) +def test_processed_file_setting(started_cluster, processing_threads): + node = started_cluster.instances["instance"] + table_name = f"test_processed_file_setting_{processing_threads}" + dst_table_name = f"{table_name}_dst" + keeper_path = f"/clickhouse/test_{table_name}" + files_path = f"{table_name}_data" + files_to_generate = 10 + + create_table( + started_cluster, + node, + table_name, + "ordered", + files_path, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_processing_threads_num": processing_threads, + "s3queue_last_processed_path": f"{files_path}/test_5.csv", + }, + ) + total_values = generate_random_files( + started_cluster, files_path, files_to_generate, start_ind=0, row_num=1 + ) + + create_mv(node, table_name, dst_table_name) + + def get_count(): + return int(node.query(f"SELECT count() FROM {dst_table_name}")) + + expected_rows = 4 + for _ in range(20): + if expected_rows == get_count(): + break + time.sleep(1) + + assert expected_rows == get_count() + + node.restart_clickhouse() + time.sleep(10) + + expected_rows = 4 + for _ in range(20): + if expected_rows == get_count(): + break + time.sleep(1) + + assert expected_rows == get_count() + + +@pytest.mark.parametrize("processing_threads", [1, 5]) +def test_processed_file_setting_distributed(started_cluster, processing_threads): + node = started_cluster.instances["instance"] + node_2 = started_cluster.instances["instance2"] + table_name = f"test_processed_file_setting_distributed_{processing_threads}" + dst_table_name = f"{table_name}_dst" + keeper_path = f"/clickhouse/test_{table_name}" + files_path = f"{table_name}_data" + files_to_generate = 10 + + for instance in [node, node_2]: + create_table( + started_cluster, + instance, + table_name, + "ordered", + files_path, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_processing_threads_num": processing_threads, + "s3queue_last_processed_path": f"{files_path}/test_5.csv", + "s3queue_total_shards_num": 2, + }, + ) + + total_values = generate_random_files( + started_cluster, files_path, files_to_generate, start_ind=0, row_num=1 + ) + + for instance in [node, node_2]: + create_mv(instance, table_name, dst_table_name) + + def get_count(): + query = f"SELECT count() FROM {dst_table_name}" + return int(node.query(query)) + int(node_2.query(query)) + + expected_rows = 4 + for _ in range(20): + if expected_rows == get_count(): + break + time.sleep(1) + assert expected_rows == get_count() + + for instance in [node, node_2]: + instance.restart_clickhouse() + + time.sleep(10) + expected_rows = 4 + for _ in range(20): + if expected_rows == get_count(): + break + time.sleep(1) + assert expected_rows == get_count() diff --git a/tests/performance/group_array_sorted.xml b/tests/performance/group_array_sorted.xml new file mode 100644 index 000000000000..d5887998341c --- /dev/null +++ b/tests/performance/group_array_sorted.xml @@ -0,0 +1,31 @@ + + + 30000000000 + + + + + millions + + 50 + 100 + + + + window + + 10 + 1000 + 10000 + + + + + create table sorted_{millions}m engine MergeTree order by k as select number % 100 k, rand() v from numbers_mt(1000000 * {millions}) + optimize table sorted_{millions}m final + + select k, groupArraySorted({window})(v) from sorted_{millions}m group by k format Null + select k % 10 kk, groupArraySorted({window})(v) from sorted_{millions}m group by kk format Null + + drop table if exists sorted_{millions}m + diff --git a/tests/performance/if.xml b/tests/performance/if.xml index f4d0e8f9773c..0f1dca91ac25 100644 --- a/tests/performance/if.xml +++ b/tests/performance/if.xml @@ -1,12 +1,24 @@ + 42949673, zero + 1, zero + 2)) ]]> + + + + + + + + + with rand32() % 2 as x select if(x, materialize(1.234), materialize(2.456)) from numbers(100000000) format Null + with rand32() % 2 as x, 1.234::Decimal64(3) as a, 2.456::Decimal64(3) as b select if(x, materialize(a), materialize(b)) from numbers(100000000) format Null - 42949673, zero + 1, zero + 2)) ]]> - - - - - - - - + + with rand32() % 2 as x, 1::Int8 as a, -1::Int8 as b select if(x, a, b) from numbers(100000000) format Null + with rand32() % 2 as x, 1::Int64 as a, -1::Int64 as b select if(x, a, b) from numbers(100000000) format Null + with rand32() % 2 as x, 1::Int32 as a, -1::Int32 as b select if(x, a, b) from numbers(100000000) format Null + with rand32() % 2 as x, 1::Decimal32(3) as a, -1::Decimal32(3) as b select if(x, a, b) from numbers(100000000) format Null + with rand32() % 2 as x, 1::Decimal64(3) as a, -1::Decimal64(3) as b select if(x, a, b) from numbers(100000000) format Null + with rand32() % 2 as x, 1::Decimal128(3) as a, -1::Decimal128(3) as b select if(x, a, b) from numbers(100000000) format Null + with rand32() % 2 as x, 1::Decimal256(3) as a, -1::Decimal256(3) as b select if(x, a, b) from numbers(100000000) format Null + with rand32() % 2 as x, 1::Int128 as a, -1::Int128 as b select if(x, a, b) from numbers(100000000) format Null + with rand32() % 2 as x, 1::Int256 as a, -1::Int256 as b select if(x, a, b) from numbers(100000000) format Null diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index 2bfcf2c91575..1e879607dac7 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -4,8 +4,9 @@ element_type - UInt8 - Int16 + + + Int32 Int64 Float32 @@ -46,7 +47,7 @@ rand(n*10+140), rand(n*10+141), rand(n*10+142), rand(n*10+143), rand(n*10+144), rand(n*10+145), rand(n*10+146), rand(n*10+147), rand(n*10+148), rand(n*10+149) ] AS v FROM system.numbers - LIMIT 8000000 + LIMIT 5000000 ); diff --git a/tests/performance/scripts/compare.sh b/tests/performance/scripts/compare.sh index 7dc522dca7ad..39c6854fbf91 100755 --- a/tests/performance/scripts/compare.sh +++ b/tests/performance/scripts/compare.sh @@ -444,10 +444,10 @@ create view query_logs as create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-arrays.tsv') as with ( - -- sumMapState with the list of all keys with '-0.' values. Negative zero is because - -- sumMap removes keys with positive zeros. + -- sumMapState with the list of all keys with nullable '0' values because sumMap removes keys with default values + -- and 0::Nullable != NULL with (select groupUniqArrayArray(mapKeys(ProfileEvents)) from query_logs) as all_names - select arrayReduce('sumMapState', [(all_names, arrayMap(x->-0., all_names))]) + select arrayReduce('sumMapState', [(all_names, arrayMap(x->0::Nullable(Float64), all_names))]) ) as all_metrics select test, query_index, version, query_id, (finalizeAggregation( @@ -456,17 +456,15 @@ create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric- all_metrics, arrayReduce('sumMapState', [(mapKeys(ProfileEvents), - arrayMap(x->toFloat64(x), mapValues(ProfileEvents)))] + arrayMap(x->toNullable(toFloat64(x)), mapValues(ProfileEvents)))] ), arrayReduce('sumMapState', [( ['client_time', 'server_time', 'memory_usage'], - arrayMap(x->if(x != 0., x, -0.), [ - toFloat64(query_runs.time), - toFloat64(query_duration_ms / 1000.), - toFloat64(memory_usage)]))]) + [toNullable(toFloat64(query_runs.time)), toNullable(toFloat64(query_duration_ms / 1000.)), toNullable(toFloat64(memory_usage))] + )]) ] )) as metrics_tuple).1 metric_names, - metrics_tuple.2 metric_values + arrayMap(x->if(isNaN(x),0,x), metrics_tuple.2) metric_values from query_logs right join query_runs on query_logs.query_id = query_runs.query_id diff --git a/tests/performance/scripts/entrypoint.sh b/tests/performance/scripts/entrypoint.sh index ec7e4d96dde4..0c3bfa550f4e 100755 --- a/tests/performance/scripts/entrypoint.sh +++ b/tests/performance/scripts/entrypoint.sh @@ -118,8 +118,8 @@ then # far in the future and have unrelated test changes. base=$(git -C right/ch merge-base pr origin/master) git -C right/ch diff --name-only "$base" pr -- . | tee all-changed-files.txt - git -C right/ch diff --name-only "$base" pr -- tests/performance | tee changed-test-definitions.txt - git -C right/ch diff --name-only "$base" pr -- :!tests/performance :!docker/test/performance-comparison | tee other-changed-files.txt + git -C right/ch diff --name-only "$base" pr -- tests/performance/*.xml | tee changed-test-definitions.txt + git -C right/ch diff --name-only "$base" pr -- :!tests/performance/*.xml :!docker/test/performance-comparison | tee other-changed-files.txt fi # Set python output encoding so that we can print queries with non-ASCII letters. diff --git a/tests/queries/0_stateless/00273_quantiles.sql b/tests/queries/0_stateless/00273_quantiles.sql index eba5e7729975..791ced6bc5d0 100644 --- a/tests/queries/0_stateless/00273_quantiles.sql +++ b/tests/queries/0_stateless/00273_quantiles.sql @@ -2,13 +2,13 @@ SELECT quantiles(0.5)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001 SELECT quantilesExact(0.5)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesTDigest(0.5)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesDeterministic(0.5)(x, x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); -SELECT arrayMap(a -> round(a, 2), quantilesDDSketch(0.01, 0.5)(x)) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); +SELECT arrayMap(a -> round(a, 2), quantilesDD(0.01, 0.5)(x)) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantiles(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesExact(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesTDigest(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesDeterministic(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x, x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); -SELECT arrayMap(a -> round(a, 2), quantilesDDSketch(0.01, 0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x)) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); +SELECT arrayMap(a -> round(a, 2), quantilesDD(0.01, 0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x)) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); -- The result slightly differs but it's ok since `quantilesDeterministic` is an approximate function. SET max_bytes_before_external_group_by = 0; diff --git a/tests/queries/0_stateless/01297_create_quota.reference b/tests/queries/0_stateless/01297_create_quota.reference index 308bbf790241..456c9fc56bba 100644 --- a/tests/queries/0_stateless/01297_create_quota.reference +++ b/tests/queries/0_stateless/01297_create_quota.reference @@ -57,10 +57,10 @@ q2_01297 local_directory [] [5259492] 0 ['r1_01297','u1_01297'] [] q3_01297 local_directory ['client_key','user_name'] [5259492,15778476] 0 [] [] q4_01297 local_directory [] [604800] 1 [] ['u1_01297'] -- system.quota_limits -q2_01297 5259492 0 100 \N \N 11 1000 10000 1001 10001 2.5 \N -q3_01297 5259492 0 \N \N \N \N 1002 \N \N \N \N \N -q3_01297 15778476 0 100 \N \N 11 \N \N \N \N \N \N -q4_01297 604800 0 \N \N \N \N \N \N \N \N \N \N +q2_01297 5259492 0 100 \N \N 11 1000 10000 1001 10001 2.5 \N \N +q3_01297 5259492 0 \N \N \N \N 1002 \N \N \N \N \N \N +q3_01297 15778476 0 100 \N \N 11 \N \N \N \N \N \N \N +q4_01297 604800 0 \N \N \N \N \N \N \N \N \N \N \N -- query_selects query_inserts CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297 CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297 diff --git a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.reference b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.reference index 9459d4ba2a0d..6de0a5be0a5e 100644 --- a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.reference +++ b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.reference @@ -1 +1,24 @@ 1.1 +SELECT dictGet(\'dictdb_01376.dict_exists\', \'value\', toUInt64(1)) AS val +FROM numbers(2) +GROUP BY toUInt64(1) +QUERY id: 0 + PROJECTION COLUMNS + val Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: dictGet, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 3 + CONSTANT id: 4, constant_value: \'dictdb_01376.dict_exists\', constant_value_type: String + CONSTANT id: 5, constant_value: \'value\', constant_value_type: String + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_2, constant_value_type: UInt8 + GROUP BY + LIST id: 10, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql index 29ffcb46fbfd..5a070b443aa4 100644 --- a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql +++ b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql @@ -23,7 +23,7 @@ INSERT INTO dictdb_01376.table_for_dict VALUES (1, 1.1); CREATE DICTIONARY IF NOT EXISTS dictdb_01376.dict_exists ( key_column UInt64, - value Float64 DEFAULT 77.77 + value Float64 DEFAULT 77.77 INJECTIVE ) PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'dictdb_01376')) @@ -32,6 +32,14 @@ LAYOUT(FLAT()); SELECT dictGet('dictdb_01376.dict_exists', 'value', toUInt64(1)) as val FROM numbers(2) GROUP BY val; +EXPLAIN SYNTAX SELECT dictGet('dictdb_01376.dict_exists', 'value', toUInt64(1)) as val FROM numbers(2) GROUP BY val; + +EXPLAIN QUERY TREE +SELECT dictGet('dictdb_01376.dict_exists', 'value', number) as val +FROM numbers(2) +GROUP BY val +SETTINGS allow_experimental_analyzer = 1; + DROP DICTIONARY dictdb_01376.dict_exists; DROP TABLE dictdb_01376.table_for_dict; DROP DATABASE dictdb_01376; diff --git a/tests/queries/0_stateless/02015_async_inserts_2.sh b/tests/queries/0_stateless/02015_async_inserts_2.sh index 48523ccd9a9d..606d4cc37b67 100755 --- a/tests/queries/0_stateless/02015_async_inserts_2.sh +++ b/tests/queries/0_stateless/02015_async_inserts_2.sh @@ -5,7 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=600000&async_insert_max_query_number=3&async_insert_deduplicate=1" +# With adaptive timeout enabled, the asynchronous queue can be flushed synchronously, depending on the elapsed since the last insert. +# This may result in test flakiness. +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=600000&async_insert_max_query_number=3&async_insert_deduplicate=1&async_insert_use_adaptive_busy_timeout=0" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" diff --git a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh index 437df01d4454..2f7e15f201ac 100755 --- a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh +++ b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh @@ -11,7 +11,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function insert1() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV 1,"a" 2,"b" @@ -22,7 +23,8 @@ function insert1() function insert2() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' done } @@ -30,28 +32,32 @@ function insert2() function insert3() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', $CLICKHOUSE_DATABASE, async_inserts) VALUES (7, 'g') (8, 'h')" done } function select1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts FORMAT Null" done } function select2() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CLIENT} -q "SELECT * FROM system.asynchronous_inserts FORMAT Null" done } function truncate1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do sleep 0.1 ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE async_inserts" done @@ -70,14 +76,14 @@ export -f select2 export -f truncate1 for _ in {1..5}; do - timeout $TIMEOUT bash -c insert1 & - timeout $TIMEOUT bash -c insert2 & - timeout $TIMEOUT bash -c insert3 & + insert1 $TIMEOUT & + insert2 $TIMEOUT & + insert3 $TIMEOUT & done -timeout $TIMEOUT bash -c select1 & -timeout $TIMEOUT bash -c select2 & -timeout $TIMEOUT bash -c truncate1 & +select1 $TIMEOUT & +select2 $TIMEOUT & +truncate1 $TIMEOUT & wait echo "OK" diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index e89d589857e2..1b758f4132b9 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -686,6 +686,9 @@ CREATE TABLE system.projection_parts `rows_where_ttl_info.expression` Array(String), `rows_where_ttl_info.min` Array(DateTime), `rows_where_ttl_info.max` Array(DateTime), + `is_broken` UInt8, + `exception_code` Int32, + `exception` String, `bytes` UInt64 ALIAS bytes_on_disk, `marks_size` UInt64 ALIAS marks_bytes, `part_name` String ALIAS name @@ -762,7 +765,8 @@ CREATE TABLE system.quota_limits `max_read_rows` Nullable(UInt64), `max_read_bytes` Nullable(UInt64), `max_execution_time` Nullable(Float64), - `max_written_bytes` Nullable(UInt64) + `max_written_bytes` Nullable(UInt64), + `max_failed_sequential_authentications` Nullable(UInt64) ) ENGINE = SystemQuotaLimits COMMENT 'SYSTEM TABLE is built on the fly.' @@ -792,7 +796,9 @@ CREATE TABLE system.quota_usage `execution_time` Nullable(Float64), `max_execution_time` Nullable(Float64), `written_bytes` Nullable(UInt64), - `max_written_bytes` Nullable(UInt64) + `max_written_bytes` Nullable(UInt64), + `failed_sequential_authentications` Nullable(UInt64), + `max_failed_sequential_authentications` Nullable(UInt64) ) ENGINE = SystemQuotaUsage COMMENT 'SYSTEM TABLE is built on the fly.' @@ -836,7 +842,9 @@ CREATE TABLE system.quotas_usage `execution_time` Nullable(Float64), `max_execution_time` Nullable(Float64), `written_bytes` Nullable(UInt64), - `max_written_bytes` Nullable(UInt64) + `max_written_bytes` Nullable(UInt64), + `failed_sequential_authentications` Nullable(UInt64), + `max_failed_sequential_authentications` Nullable(UInt64) ) ENGINE = SystemQuotasUsage COMMENT 'SYSTEM TABLE is built on the fly.' diff --git a/tests/queries/0_stateless/02134_async_inserts_formats.sh b/tests/queries/0_stateless/02134_async_inserts_formats.sh index 631809e5dc2e..89705bf64156 100755 --- a/tests/queries/0_stateless/02134_async_inserts_formats.sh +++ b/tests/queries/0_stateless/02134_async_inserts_formats.sh @@ -4,7 +4,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" +# With adaptive timeout enabled, the asynchronous queue can be flushed synchronously, depending on the elapsed since the last insert. +# This may result in test flakiness. +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_use_adaptive_busy_timeout=0" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" diff --git a/tests/queries/0_stateless/02252_jit_profile_events.sql b/tests/queries/0_stateless/02252_jit_profile_events.sql index fbd6040c21c1..fb7f806c46be 100644 --- a/tests/queries/0_stateless/02252_jit_profile_events.sql +++ b/tests/queries/0_stateless/02252_jit_profile_events.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-ubsan, no-asan, no-msan, no-cpu-aarch64 +-- Tags: no-fasttest, no-parallel, no-cpu-aarch64, no-msan SET compile_expressions = 1; SET min_count_to_compile_expression = 0; diff --git a/tests/queries/0_stateless/02294_decimal_second_errors.sql b/tests/queries/0_stateless/02294_decimal_second_errors.sql index b6059dc3d48c..52d2279be418 100644 --- a/tests/queries/0_stateless/02294_decimal_second_errors.sql +++ b/tests/queries/0_stateless/02294_decimal_second_errors.sql @@ -4,7 +4,7 @@ SELECT 1 SETTINGS max_execution_time=-Infinity; -- { clientError 72 }; -- Ok values SELECT 1 SETTINGS max_execution_time=-0.5; -SELECT 1 SETTINGS max_execution_time=0.5; +SELECT 1 SETTINGS max_execution_time=5.5; SELECT 1 SETTINGS max_execution_time=-1; SELECT 1 SETTINGS max_execution_time=0.0; SELECT 1 SETTINGS max_execution_time=-0.0; diff --git a/tests/queries/0_stateless/02303_query_kind.reference b/tests/queries/0_stateless/02303_query_kind.reference index 53a0df682b2a..9f1c026f8891 100644 --- a/tests/queries/0_stateless/02303_query_kind.reference +++ b/tests/queries/0_stateless/02303_query_kind.reference @@ -20,17 +20,17 @@ clickhouse-client --allow_experimental_analyzer=1 --query_kind initial_query -q Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(__table1.dummy) String + Header: __table1.dummy UInt8 Expression ((Before GROUP BY + Change column names to column identifiers)) - Header: toString(__table1.dummy) String + Header: __table1.dummy UInt8 ReadFromStorage (SystemOne) Header: dummy UInt8 clickhouse-local --allow_experimental_analyzer=1 --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(__table1.dummy) String + Header: __table1.dummy UInt8 Expression ((Before GROUP BY + Change column names to column identifiers)) - Header: toString(__table1.dummy) String + Header: __table1.dummy UInt8 ReadFromStorage (SystemOne) Header: dummy UInt8 diff --git a/tests/queries/0_stateless/02346_inverted_index_mutation.reference b/tests/queries/0_stateless/02346_inverted_index_bug47393.reference similarity index 100% rename from tests/queries/0_stateless/02346_inverted_index_mutation.reference rename to tests/queries/0_stateless/02346_inverted_index_bug47393.reference diff --git a/tests/queries/0_stateless/02346_inverted_index_bug47393.sql b/tests/queries/0_stateless/02346_inverted_index_bug47393.sql new file mode 100644 index 000000000000..166e051b1205 --- /dev/null +++ b/tests/queries/0_stateless/02346_inverted_index_bug47393.sql @@ -0,0 +1,25 @@ +SET allow_experimental_inverted_index = 1; + +DROP TABLE IF EXISTS tab; +CREATE TABLE tab +( + id UInt64, + str String, + INDEX idx str TYPE inverted(3) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1; + +INSERT INTO tab (str) VALUES ('I am inverted'); + +SELECT data_version FROM system.parts WHERE database = currentDatabase() AND table = 'tab' AND active = 1; + +-- update column synchronously +ALTER TABLE tab UPDATE str = 'I am not inverted' WHERE 1 SETTINGS mutations_sync=1; + +SELECT data_version FROM system.parts WHERE database = currentDatabase() AND table = 'tab' AND active = 1; + +SELECT str FROM tab WHERE str LIKE '%inverted%' SETTINGS force_data_skipping_indices = 'idx'; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02346_inverted_index_bug52019.reference b/tests/queries/0_stateless/02346_inverted_index_bug52019.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql b/tests/queries/0_stateless/02346_inverted_index_bug52019.sql similarity index 62% rename from tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql rename to tests/queries/0_stateless/02346_inverted_index_bug52019.sql index 7ba122a71555..c61e17d9ceae 100644 --- a/tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql +++ b/tests/queries/0_stateless/02346_inverted_index_bug52019.sql @@ -1,9 +1,20 @@ --- https://github.com/ClickHouse/ClickHouse/issues/52019 -DROP TABLE IF EXISTS tab; +-- Test for Bug 52019: Undefined behavior + SET allow_experimental_inverted_index=1; -CREATE TABLE tab (`k` UInt64, `s` Map(String, String), INDEX af mapKeys(s) TYPE inverted(2) GRANULARITY 1) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi'; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab ( + k UInt64, + s Map(String, String), + INDEX idx mapKeys(s) TYPE inverted(2) GRANULARITY 1) +ENGINE = MergeTree +ORDER BY k +SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi'; + INSERT INTO tab (k) VALUES (0); SELECT * FROM tab PREWHERE (s[NULL]) = 'Click a03' SETTINGS allow_experimental_analyzer=1; SELECT * FROM tab PREWHERE (s[1]) = 'Click a03' SETTINGS allow_experimental_analyzer=1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT * FROM tab PREWHERE (s['foo']) = 'Click a03' SETTINGS allow_experimental_analyzer=1; + DROP TABLE tab; diff --git a/tests/queries/0_stateless/02346_inverted_index_bug59039.reference b/tests/queries/0_stateless/02346_inverted_index_bug59039.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02346_inverted_index_bug59039.sql b/tests/queries/0_stateless/02346_inverted_index_bug59039.sql new file mode 100644 index 000000000000..0ef0cb0c7337 --- /dev/null +++ b/tests/queries/0_stateless/02346_inverted_index_bug59039.sql @@ -0,0 +1,20 @@ +-- This is supposed to test that DROP INDEX removes all index related files. Can't test this directly but at least run the statement and +-- check that no bad things happen. + +SET allow_experimental_inverted_index = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + id UInt64, + doc String, + INDEX text_idx doc TYPE inverted +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi', min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0; + +ALTER TABLE tab DROP INDEX text_idx; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02346_inverted_index_detach_attach.reference b/tests/queries/0_stateless/02346_inverted_index_detach_attach.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02696_inverted_idx_checksums.sql b/tests/queries/0_stateless/02346_inverted_index_detach_attach.sql similarity index 75% rename from tests/queries/0_stateless/02696_inverted_idx_checksums.sql rename to tests/queries/0_stateless/02346_inverted_index_detach_attach.sql index 92ffa7a61969..762d78922fec 100644 --- a/tests/queries/0_stateless/02696_inverted_idx_checksums.sql +++ b/tests/queries/0_stateless/02346_inverted_index_detach_attach.sql @@ -2,8 +2,8 @@ SET allow_experimental_inverted_index = 1; CREATE TABLE t ( - `key` UInt64, - `str` String, + key UInt64, + str String, INDEX inv_idx str TYPE inverted(0) GRANULARITY 1 ) ENGINE = MergeTree @@ -13,4 +13,4 @@ INSERT INTO t VALUES (1, 'Hello World'); ALTER TABLE t DETACH PART 'all_1_1_0'; -ALTER TABLE t ATTACH PART 'all_1_1_0'; \ No newline at end of file +ALTER TABLE t ATTACH PART 'all_1_1_0'; diff --git a/tests/queries/0_stateless/02346_inverted_index_experimental_flag.reference b/tests/queries/0_stateless/02346_inverted_index_experimental_flag.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02895_forbid_create_inverted_index.sql b/tests/queries/0_stateless/02346_inverted_index_experimental_flag.sql similarity index 72% rename from tests/queries/0_stateless/02895_forbid_create_inverted_index.sql rename to tests/queries/0_stateless/02346_inverted_index_experimental_flag.sql index dc92d9198fb8..bf89265372ee 100644 --- a/tests/queries/0_stateless/02895_forbid_create_inverted_index.sql +++ b/tests/queries/0_stateless/02346_inverted_index_experimental_flag.sql @@ -1,4 +1,7 @@ +-- Tests that the inverted index can only be supported when allow_experimental_inverted_index = 1. + SET allow_experimental_inverted_index = 0; + DROP TABLE IF EXISTS tab; CREATE TABLE tab ( diff --git a/tests/queries/0_stateless/02951_inverted_index_support_match.reference b/tests/queries/0_stateless/02346_inverted_index_match_predicate.reference similarity index 100% rename from tests/queries/0_stateless/02951_inverted_index_support_match.reference rename to tests/queries/0_stateless/02346_inverted_index_match_predicate.reference diff --git a/tests/queries/0_stateless/02951_inverted_index_support_match.sql b/tests/queries/0_stateless/02346_inverted_index_match_predicate.sql similarity index 97% rename from tests/queries/0_stateless/02951_inverted_index_support_match.sql rename to tests/queries/0_stateless/02346_inverted_index_match_predicate.sql index 9ebf10412d9c..99405c0acf22 100644 --- a/tests/queries/0_stateless/02951_inverted_index_support_match.sql +++ b/tests/queries/0_stateless/02346_inverted_index_match_predicate.sql @@ -1,3 +1,5 @@ +-- Tests that match() utilizes the inverted index + SET allow_experimental_inverted_index = true; DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02346_inverted_index_mutation.sql b/tests/queries/0_stateless/02346_inverted_index_mutation.sql deleted file mode 100644 index 83b73807cd7f..000000000000 --- a/tests/queries/0_stateless/02346_inverted_index_mutation.sql +++ /dev/null @@ -1,25 +0,0 @@ -SET allow_experimental_inverted_index=1; - -DROP TABLE IF EXISTS t; -CREATE TABLE t -( - `timestamp` UInt64, - `s` String, - INDEX idx s TYPE inverted(3) GRANULARITY 1 -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1; - -INSERT INTO t (s) VALUES ('I am inverted'); - -SELECT data_version FROM system.parts WHERE database=currentDatabase() AND table='t' AND active=1; - --- do update column synchronously -ALTER TABLE t UPDATE s='I am not inverted' WHERE 1 SETTINGS mutations_sync=1; - -SELECT data_version FROM system.parts WHERE database=currentDatabase() AND table='t' AND active=1; - -SELECT s FROM t WHERE s LIKE '%inverted%' SETTINGS force_data_skipping_indices='idx'; - -DROP TABLE t; diff --git a/tests/queries/0_stateless/02346_full_text_search.reference b/tests/queries/0_stateless/02346_inverted_index_search.reference similarity index 100% rename from tests/queries/0_stateless/02346_full_text_search.reference rename to tests/queries/0_stateless/02346_inverted_index_search.reference diff --git a/tests/queries/0_stateless/02346_full_text_search.sql b/tests/queries/0_stateless/02346_inverted_index_search.sql similarity index 100% rename from tests/queries/0_stateless/02346_full_text_search.sql rename to tests/queries/0_stateless/02346_inverted_index_search.sql diff --git a/tests/queries/0_stateless/02366_kql_create_table.reference b/tests/queries/0_stateless/02366_kql_create_table.reference deleted file mode 100644 index 35136b5ff425..000000000000 --- a/tests/queries/0_stateless/02366_kql_create_table.reference +++ /dev/null @@ -1,4 +0,0 @@ --- test create table -- -Theodore -Diaz -Theodore Diaz 28 diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql deleted file mode 100644 index b266679b06aa..000000000000 --- a/tests/queries/0_stateless/02366_kql_create_table.sql +++ /dev/null @@ -1,29 +0,0 @@ -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); -Select '-- test create table --' ; -Select * from kql(Customers|project FirstName) limit 1;; -DROP TABLE IF EXISTS kql_table1; -CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName | filter LastName=='Diaz'); -select LastName from kql_table1 limit 1; -DROP TABLE IF EXISTS kql_table2; -CREATE TABLE kql_table2 -( - FirstName Nullable(String), - LastName String, - Age Nullable(UInt8) -) ENGINE = Memory; -INSERT INTO kql_table2 select * from kql(Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'); -select * from kql_table2 limit 1; --- select * from kql(Customers | where FirstName !in ("test", "test2")); -DROP TABLE IF EXISTS Customers; -DROP TABLE IF EXISTS kql_table1; -DROP TABLE IF EXISTS kql_table2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_datatype.reference b/tests/queries/0_stateless/02366_kql_datatype.reference deleted file mode 100644 index fe666f3734c7..000000000000 --- a/tests/queries/0_stateless/02366_kql_datatype.reference +++ /dev/null @@ -1,105 +0,0 @@ --- bool -true -\N --- int -123 -\N --- long -123 -255 --1 -\N -456 --- real -0.01 -\N -nan -inf --inf --- datetime -2015-12-31 23:59:59.900000000 -2015-12-31 00:00:00.000000000 -2014-05-25 08:20:03.123456000 -2014-11-08 15:55:55.000000000 -2014-11-08 15:55:00.000000000 -2014-11-08 00:00:00.000000000 -\N -2014-05-25 08:20:03.123456000 -2014-11-08 15:55:55.123456000 --- time -1216984.12345 -45055.123 -86400 --86400 -6.000000000000001e-9 -6e-7 -172800 -259200 --- guid -\N --- timespan (time) -172800 -1800 -10 -0.1 -0.00001 -1e-7 -1120343 --- null -1 -\N \N \N \N \N --- decimal -\N -123.345 -100000 --- dynamic -\N -1 -86400 -[1,2,3] -[[1],[2],[3]] -['a','b','c'] --- cast functions -true -1 --- tobool("false") -false -1 --- tobool(1) -true -1 --- tobool(123) -true -1 --- tobool("abc") -\N -\N --- todouble() -123.4 -\N --- toreal() -123.4 -\N --- toint() -1 -\N --- tostring() -123 -1 --- todatetime() -1 -\N --- make_timespan() -01:12:00 01:12:30 1.12:30:55 --- totimespan() -1e-7 -60 -\N -1120343 --- tolong() -123 -\N --- todecimal() -123.345 -\N -\N diff --git a/tests/queries/0_stateless/02366_kql_datatype.sql b/tests/queries/0_stateless/02366_kql_datatype.sql deleted file mode 100644 index ecd295042984..000000000000 --- a/tests/queries/0_stateless/02366_kql_datatype.sql +++ /dev/null @@ -1,117 +0,0 @@ -set dialect = 'kusto'; - -print '-- bool' -print bool(true); -print bool(true); -print bool(null); -print '-- int'; -print int(123); -print int(null); -print int('4'); -- { clientError BAD_ARGUMENTS } -print '-- long'; -print long(123); -print long(0xff); -print long(-1); -print long(null); -print 456; -print '-- real'; -print real(0.01); -print real(null); -print real(nan); -print real(+inf); -print real(-inf); -print double('4.2'); -- { clientError BAD_ARGUMENTS } -print '-- datetime'; -print datetime(2015-12-31 23:59:59.9); -print datetime(2015-12-31); -print datetime('2014-05-25T08:20:03.123456'); -print datetime('2014-11-08 15:55:55'); -print datetime('2014-11-08 15:55'); -print datetime('2014-11-08'); -print datetime(null); -print datetime('2014-05-25T08:20:03.123456Z'); -print datetime('2014-11-08 15:55:55.123456Z'); -print '-- time'; -print time('14.02:03:04.12345'); -print time('12:30:55.123'); -print time(1d); -print time(-1d); -print time(6nanoseconds); -print time(6tick); -print time(2); -print time(2) + 1d; -print '-- guid' -print guid(74be27de-1e4e-49d9-b579-fe0b331d3642); -print guid(null); -print '-- timespan (time)'; -print timespan(2d); -- 2 days ---print timespan(1.5h); -- 1.5 hour -print timespan(30m); -- 30 minutes -print timespan(10s); -- 10 seconds ---print timespan(0.1s); -- 0.1 second -print timespan(100ms); -- 100 millisecond -print timespan(10microsecond); -- 10 microseconds -print timespan(1tick); -- 100 nanoseconds ---print timespan(1.5h) / timespan(30m); -print timespan('12.23:12:23') / timespan(1s); -print '-- null'; -print isnull(null); -print bool(null), int(null), long(null), real(null), double(null); -print '-- decimal'; -print decimal(null); -print decimal(123.345); -print decimal(1e5); -print '-- dynamic'; -- no support for mixed types and bags for now -print dynamic(null); -print dynamic(1); -print dynamic(timespan(1d)); -print dynamic([1,2,3]); -print dynamic([[1], [2], [3]]); -print dynamic(['a', "b", 'c']); -print '-- cast functions' -print '--tobool("true")'; -- == true -print tobool('true'); -- == true -print tobool('true') == toboolean('true'); -- == true -print '-- tobool("false")'; -- == false -print tobool('false'); -- == false -print tobool('false') == toboolean('false'); -- == false -print '-- tobool(1)'; -- == true -print tobool(1); -- == true -print tobool(1) == toboolean(1); -- == true -print '-- tobool(123)'; -- == true -print tobool(123); -- == true -print tobool(123) == toboolean(123); -- == true -print '-- tobool("abc")'; -- == null -print tobool('abc'); -- == null -print tobool('abc') == toboolean('abc'); -- == null -print '-- todouble()'; -print todouble('123.4'); -print todouble('abc') == null; -print '-- toreal()'; -print toreal("123.4"); -print toreal('abc') == null; -print '-- toint()'; -print toint("123") == int(123); -print toint('abc'); -print '-- tostring()'; -print tostring(123); -print tostring(null) == ''; -print '-- todatetime()'; -print todatetime("2015-12-24") == datetime(2015-12-24); -print todatetime('abc') == null; -print '-- make_timespan()'; -print v1=make_timespan(1,12), v2=make_timespan(1,12,30), v3=make_timespan(1,12,30,55.123); -print '-- totimespan()'; -print totimespan(1tick); -print totimespan('0.00:01:00'); -print totimespan('abc'); -print totimespan('12.23:12:23') / totimespan(1s); --- print totimespan(strcat('12.', '23', ':12:', '23')) / timespan(1s); -> 1120343 -print '-- tolong()'; -print tolong('123'); -print tolong('abc'); -print '-- todecimal()'; -print todecimal(123.345); -print todecimal(null); -print todecimal('abc'); --- print todecimal(4 * 2 + 3); -> 11 diff --git a/tests/queries/0_stateless/02366_kql_distinct.reference b/tests/queries/0_stateless/02366_kql_distinct.reference deleted file mode 100644 index 2100f44f18c9..000000000000 --- a/tests/queries/0_stateless/02366_kql_distinct.reference +++ /dev/null @@ -1,27 +0,0 @@ --- distinct * -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 -\N why Professional Partial College 38 --- distinct one column -- -Skilled Manual -Management abcd defg -Professional --- distinct two column -- -Skilled Manual Bachelors -Management abcd defg Bachelors -Skilled Manual Graduate Degree -Professional Graduate Degree -Professional Partial College --- distinct with where -- -Skilled Manual Bachelors -Management abcd defg Bachelors -Skilled Manual Graduate Degree -Professional Graduate Degree -Professional Partial College --- distinct with where, order -- -Skilled Manual Bachelors -Skilled Manual Graduate Degree -Professional Graduate Degree diff --git a/tests/queries/0_stateless/02366_kql_distinct.sql b/tests/queries/0_stateless/02366_kql_distinct.sql deleted file mode 100644 index 3c997eb4865b..000000000000 --- a/tests/queries/0_stateless/02366_kql_distinct.sql +++ /dev/null @@ -1,28 +0,0 @@ -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); - -set dialect = 'kusto'; - -print '-- distinct * --'; -Customers | distinct *; - -print '-- distinct one column --'; -Customers | distinct Occupation; - -print '-- distinct two column --'; -Customers | distinct Occupation, Education; - -print '-- distinct with where --'; -Customers where Age <30 | distinct Occupation, Education; - -print '-- distinct with where, order --'; -Customers |where Age <30 | order by Age| distinct Occupation, Education; diff --git a/tests/queries/0_stateless/02366_kql_extend.reference b/tests/queries/0_stateless/02366_kql_extend.reference deleted file mode 100644 index 2936c9ea19c1..000000000000 --- a/tests/queries/0_stateless/02366_kql_extend.reference +++ /dev/null @@ -1,32 +0,0 @@ --- extend #1 -- -Aldi Apple 4 2016-09-10 400 -Costco Apple 2 2016-09-11 200 --- extend #2 -- -Apple 200 -Apple 400 --- extend #3 -- -Apple cost 480 on average based on 5 samples. -Snargaluff cost 28080 on average based on 5 samples. --- extend #4 -- -1 --- extend #5 -- -Aldi Apple 4 2016-09-10 Apple was purchased from Aldi for $4 on 2016-09-10 400 -Costco Apple 2 2016-09-11 Apple was purchased from Costco for $2 on 2016-09-11 200 --- extend #6 -- -Aldi Apple 2016-09-10 400 -Costco Apple 2016-09-11 200 -Aldi Apple 2016-09-10 600 -Costco Snargaluff 2016-09-12 10000 -Aldi Apple 2016-09-12 700 -Aldi Snargaluff 2016-09-11 40000 -Costco Snargaluff 2016-09-12 10400 -Aldi Apple 2016-09-12 500 -Aldi Snargaluff 2016-09-11 60000 -Costco Snargaluff 2016-09-10 20000 --- extend #7 -- -5 --- extend #8 -- --- extend #9 -- --- extend #10 -- --- extend #11 -- -5 [2,1] diff --git a/tests/queries/0_stateless/02366_kql_extend.sql b/tests/queries/0_stateless/02366_kql_extend.sql deleted file mode 100644 index 0a3c1f3dcd43..000000000000 --- a/tests/queries/0_stateless/02366_kql_extend.sql +++ /dev/null @@ -1,61 +0,0 @@ --- datatable(Supplier:string, Fruit:string, Price: real, Purchase:datetime) --- [ --- 'Aldi','Apple',4,'2016-09-10', --- 'Costco','Apple',2,'2016-09-11', --- 'Aldi','Apple',6,'2016-09-10', --- 'Costco','Snargaluff',100,'2016-09-12', --- 'Aldi','Apple',7,'2016-09-12', --- 'Aldi','Snargaluff',400,'2016-09-11', --- 'Costco','Snargaluff',104,'2016-09-12', --- 'Aldi','Apple',5,'2016-09-12', --- 'Aldi','Snargaluff',600,'2016-09-11', --- 'Costco','Snargaluff',200,'2016-09-10', --- ] - - -DROP TABLE IF EXISTS Ledger; -CREATE TABLE Ledger -( - Supplier Nullable(String), - Fruit String , - Price Float64, - Purchase Date -) ENGINE = Memory; -INSERT INTO Ledger VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); - --- This test requies sorting after some of aggregations but I don't know KQL, sorry -set max_bytes_before_external_group_by = 0; -set dialect = 'kusto'; - -print '-- extend #1 --'; -Ledger | extend PriceInCents = 100 * Price | take 2; - -print '-- extend #2 --'; -Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | take 2; - -print '-- extend #3 --'; -Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; - -print '-- extend #4 --'; -Ledger | extend a = Price | extend b = a | extend c = a, d = b + 500 | extend Pass = bool(b == a and c == a and d == b + 500) | summarize binary_all_and(Pass); - -print '-- extend #5 --'; -Ledger | take 2 | extend strcat(Fruit, ' was purchased from ', Supplier, ' for $', tostring(Price), ' on ', tostring(Purchase)) | extend PriceInCents = 100 * Price; - -print '-- extend #6 --'; -Ledger | extend Price = 100 * Price; - -print '-- extend #7 --'; -print a = 4 | extend a = 5; - -print '-- extend #8 --'; --- print x = 5 | extend array_sort_desc(range(0, x), range(1, x + 1)) - -print '-- extend #9 --'; -print x = 19 | extend = 4 + ; -- { clientError SYNTAX_ERROR } - -print '-- extend #10 --'; -Ledger | extend PriceInCents = * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; -- { clientError SYNTAX_ERROR } - -print '-- extend #11 --'; -- should ideally return this in the future: 5 [2,1] because of the alias ex -print x = 5 | extend ex = array_sort_desc(dynamic([1, 2]), dynamic([3, 4])); diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference deleted file mode 100644 index 6276cd6d8675..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_binary.reference +++ /dev/null @@ -1,7 +0,0 @@ - -- binary functions -4 7 -1 -1 -1 -7 3 -1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql deleted file mode 100644 index 824022b564ce..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_binary.sql +++ /dev/null @@ -1,8 +0,0 @@ -set dialect='kusto'; -print ' -- binary functions'; -print binary_and(4,7), binary_or(4,7); -print binary_shift_left(1, 1) == binary_shift_left(1, 65); -print binary_shift_right(2, 1) == binary_shift_right(2, 65); -print binary_shift_right(binary_shift_left(1, 65), 65) == 1; -print binary_xor(2, 5), bitset_count_ones(42); -print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.reference b/tests/queries/0_stateless/02366_kql_func_datetime.reference deleted file mode 100644 index 40d8d7e19ac9..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_datetime.reference +++ /dev/null @@ -1,76 +0,0 @@ --- dayofmonth() -31 --- dayofweek() -4.00:00:00 --- dayofyear() -365 --- getmonth() -10 --- getyear() -2015 --- hoursofday() -23 --- startofday() -2017-01-01 00:00:00.000000000 -2016-12-31 00:00:00.000000000 -2017-01-02 00:00:00.000000000 --- endofday() -2017-01-01 23:59:59.999999000 -2016-12-31 23:59:59.999999000 -2017-01-02 23:59:59.999999000 --- endofmonth() -2017-01-31 23:59:59.999999000 -2016-12-31 23:59:59.999999000 -2017-02-28 23:59:59.999999000 -2022-09-30 23:59:59.999999000 --- startofweek() -2017-01-01 00:00:00.000000000 -2016-12-25 00:00:00.000000000 -2017-01-08 00:00:00.000000000 --- endofweek() -2017-01-07 23:59:59.999999000 -2016-12-31 23:59:59.999999000 -2017-01-14 23:59:59.999999000 --- startofyear() -2017-01-01 00:00:00.000000000 -2016-01-01 00:00:00.000000000 -2018-01-01 00:00:00.000000000 --- endofyear() -2017-12-31 23:59:59.999999000 -2016-12-31 23:59:59.999999000 -2018-12-31 23:59:59.999999000 --- unixtime_seconds_todatetime() -2019-01-01 00:00:00.000000000 -1970-01-02 00:00:00.000000000 -1969-12-31 00:00:00.000000000 --- unixtime_microseconds_todatetime -2019-01-01 00:00:00.000000 --- unixtime_milliseconds_todatetime() -2019-01-01 00:00:00.000 --- unixtime_nanoseconds_todatetime() -2019-01-01 00:00:00.000000000 --- weekofyear() -52 --- monthofyear() -12 --- weekofyear() -52 --- now() -1 --- make_datetime() -1 -2017-10-01 12:10:00.0000000 -2017-10-01 12:11:00.0000000 --- format_datetime -15-12-14 02:03:04.1234500 -17-01-29 [09:00:05] 2017-01-29 [09:00:05] 17-01-29 [09:00:05 AM] --- format_timespan() -02:03:04.1234500 -29.09:00:05:12 --- ago() --- datetime_diff() -17 2 13 4 29 2 5 10 --- datetime_part() -2017 4 10 44 30 303 01 02 03 --- datetime_add() -2018-01-01 00:00:00.0000000 2017-04-01 00:00:00.0000000 2017-02-01 00:00:00.0000000 2017-01-08 00:00:00.0000000 2017-01-02 00:00:00.0000000 2017-01-01 01:00:00.0000000 2017-01-01 00:01:00.0000000 2017-01-01 00:00:01.0000000 diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.sql b/tests/queries/0_stateless/02366_kql_func_datetime.sql deleted file mode 100644 index b1fba4166a9e..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_datetime.sql +++ /dev/null @@ -1,86 +0,0 @@ -set dialect = 'kusto'; - -print '-- dayofmonth()'; -print dayofmonth(datetime(2015-12-31)); -print '-- dayofweek()'; -print dayofweek(datetime(2015-12-31)); -print '-- dayofyear()'; -print dayofyear(datetime(2015-12-31)); -print '-- getmonth()'; -print getmonth(datetime(2015-10-12)); -print '-- getyear()'; -print getyear(datetime(2015-10-12)); -print '-- hoursofday()'; -print hourofday(datetime(2015-12-31 23:59:59.9)); -print '-- startofday()'; -print startofday(datetime(2017-01-01 10:10:17)); -print startofday(datetime(2017-01-01 10:10:17), -1); -print startofday(datetime(2017-01-01 10:10:17), 1); -print '-- endofday()'; -print endofday(datetime(2017-01-01 10:10:17)); -print endofday(datetime(2017-01-01 10:10:17), -1); -print endofday(datetime(2017-01-01 10:10:17), 1); -print '-- endofmonth()'; -print endofmonth(datetime(2017-01-01 10:10:17)); -print endofmonth(datetime(2017-01-01 10:10:17), -1); -print endofmonth(datetime(2017-01-01 10:10:17), 1); -print endofmonth(datetime(2022-09-23)); -print '-- startofweek()'; -print startofweek(datetime(2017-01-01 10:10:17)); -print startofweek(datetime(2017-01-01 10:10:17), -1); -print startofweek(datetime(2017-01-01 10:10:17), 1); -print '-- endofweek()'; -print endofweek(datetime(2017-01-01 10:10:17)); -print endofweek(datetime(2017-01-01 10:10:17), -1); -print endofweek(datetime(2017-01-01 10:10:17), 1); -print '-- startofyear()'; -print startofyear(datetime(2017-01-01 10:10:17)); -print startofyear(datetime(2017-01-01 10:10:17), -1); -print startofyear(datetime(2017-01-01 10:10:17), 1); -print '-- endofyear()'; -print endofyear(datetime(2017-01-01 10:10:17)); -print endofyear(datetime(2017-01-01 10:10:17), -1); -print endofyear(datetime(2017-01-01 10:10:17), 1); -print '-- unixtime_seconds_todatetime()'; -print unixtime_seconds_todatetime(1546300800); -print unixtime_seconds_todatetime(1d); -print unixtime_seconds_todatetime(-1d); -print '-- unixtime_microseconds_todatetime'; -print unixtime_microseconds_todatetime(1546300800000000); -print '-- unixtime_milliseconds_todatetime()'; -print unixtime_milliseconds_todatetime(1546300800000); -print '-- unixtime_nanoseconds_todatetime()'; -print unixtime_nanoseconds_todatetime(1546300800000000000); -print '-- weekofyear()'; -print week_of_year(datetime(2000-01-01)); -print '-- monthofyear()'; -print monthofyear(datetime(2015-12-31)); -print '-- weekofyear()'; -print week_of_year(datetime(2000-01-01)); -print '-- now()'; -print getyear(now(-2d))>1900; -print '-- make_datetime()'; -print make_datetime(2017,10,01,12,10) == datetime(2017-10-01 12:10:00); -print year_month_day_hour_minute = make_datetime(2017,10,01,12,10); -print year_month_day_hour_minute_second = make_datetime(2017,10,01,12,11,0.1234567); -print '-- format_datetime'; -print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s.fffffff'); -print v1=format_datetime(datetime(2017-01-29 09:00:05),'yy-MM-dd [HH:mm:ss]'), v2=format_datetime(datetime(2017-01-29 09:00:05), 'yyyy-M-dd [H:mm:ss]'), v3=format_datetime(datetime(2017-01-29 09:00:05), 'yy-MM-dd [hh:mm:ss tt]'); -print '-- format_timespan()'; -print format_timespan(time('14.02:03:04.12345'), 'h:m:s.fffffff'); -print v1=format_timespan(time('29.09:00:05.12345'), 'dd.hh:mm:ss:FF'); --- print v2=format_timespan(time('29.09:00:05.12345'), 'ddd.h:mm:ss [fffffff]'); == '029.9:00:05 [1234500]' -print '-- ago()'; --- print ago(1d) - now(); -print '-- datetime_diff()'; -print year = datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31)), quarter = datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30)), month = datetime_diff('month',datetime(2017-01-01),datetime(2015-12-30)), week = datetime_diff('week',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), day = datetime_diff('day',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), hour = datetime_diff('hour',datetime(2017-10-31 01:00),datetime(2017-10-30 23:59)), minute = datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59)), second = datetime_diff('second',datetime(2017-10-30 23:00:10.100),datetime(2017-10-30 23:00:00.900)); --- millisecond = datetime_diff('millisecond',datetime(2017-10-30 23:00:00.200100),datetime(2017-10-30 23:00:00.100900)), --- microsecond = datetime_diff('microsecond',datetime(2017-10-30 23:00:00.1009001),datetime(2017-10-30 23:00:00.1008009)), --- nanosecond = datetime_diff('nanosecond',datetime(2017-10-30 23:00:00.0000000),datetime(2017-10-30 23:00:00.0000007)) -print '-- datetime_part()'; -print year = datetime_part("year", datetime(2017-10-30 01:02:03.7654321)),quarter = datetime_part("quarter", datetime(2017-10-30 01:02:03.7654321)),month = datetime_part("month", datetime(2017-10-30 01:02:03.7654321)),weekOfYear = datetime_part("week_of_year", datetime(2017-10-30 01:02:03.7654321)),day = datetime_part("day", datetime(2017-10-30 01:02:03.7654321)),dayOfYear = datetime_part("dayOfYear", datetime(2017-10-30 01:02:03.7654321)),hour = datetime_part("hour", datetime(2017-10-30 01:02:03.7654321)),minute = datetime_part("minute", datetime(2017-10-30 01:02:03.7654321)),second = datetime_part("second", datetime(2017-10-30 01:02:03.7654321)); --- millisecond = datetime_part("millisecond", dt), --- microsecond = datetime_part("microsecond", dt), --- nanosecond = datetime_part("nanosecond", dt) -print '-- datetime_add()'; -print year = datetime_add('year',1,make_datetime(2017,1,1)),quarter = datetime_add('quarter',1,make_datetime(2017,1,1)),month = datetime_add('month',1,make_datetime(2017,1,1)),week = datetime_add('week',1,make_datetime(2017,1,1)),day = datetime_add('day',1,make_datetime(2017,1,1)),hour = datetime_add('hour',1,make_datetime(2017,1,1)),minute = datetime_add('minute',1,make_datetime(2017,1,1)),second = datetime_add('second',1,make_datetime(2017,1,1)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.reference b/tests/queries/0_stateless/02366_kql_func_dynamic.reference deleted file mode 100644 index 564f1eebc4bb..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_dynamic.reference +++ /dev/null @@ -1,152 +0,0 @@ --- constant index value -1 c ['A',NULL,'C'] --- array_length() -1 -1 --- array_sum() -1 -1 --- array_index_of() -3 -1 --- array_iif() -[1,5,3] -[1,5,3] -[1,5,NULL] -[NULL,NULL,NULL] --- array_concat() -[1,2,3,4,5,6] --- array_reverse() -[] -[1] -[4,3,2,1] -['example','an','is','this'] --- array_rotate_left() -[] -[] -[] -[3,4,5,1,2] -[1,2,3,4,5] -[3,4,5,1,2] -[4,5,1,2,3] -[1,2,3,4,5] -[4,5,1,2,3] --- array_rotate_right() -[] -[] -[] -[4,5,1,2,3] -[1,2,3,4,5] -[4,5,1,2,3] -[3,4,5,1,2] -[1,2,3,4,5] -[3,4,5,1,2] --- array_shift_left() -[] -[] -[] -[3,4,5,NULL,NULL] -[NULL,NULL,1,2,3] -[3,4,5,-1,-1] -['c','',''] --- array_shift_right() -[] -[] -[] -[3,4,5,NULL,NULL] -[NULL,NULL,1,2,3] -[3,4,5,-1,-1] -['c','',''] --- array_slice() -[3,4] --- array_split() -[[1],[2,3],[4,5]] -[[1,2],[3,4,5]] -[[1],[2,3],[4,5]] -[[1,2,3,4],[],[4,5]] --- array_sort_asc() -(['a','c','c','d',NULL]) -([1,2,3,4]) -['a','b','c'] -(['p','q','r'],['hello','clickhouse','world']) -([NULL,'a','c','c','d']) -([NULL,'a','c','c','d']) -([NULL,NULL,NULL]) -[1,2,3,NULL,NULL] -['a','e','b','c','d'] -(['George','John','Paul','Ringo']) -(['blue','green','yellow',NULL,NULL]) -([NULL,NULL,'blue','green','yellow']) --- array_sort_desc() -(['d','c','c','a',NULL]) -([4,3,2,1]) -['c','b','a'] -(['r','q','p'],['world','clickhouse','hello']) -([NULL,'d','c','c','a']) -([NULL,'d','c','c','a']) -([NULL,NULL,NULL]) -[3,2,1,NULL,NULL] -['d','c','b','e','a'] -(['Ringo','Paul','John','George']) -(['yellow','green','blue',NULL,NULL]) -([NULL,NULL,'yellow','green','blue']) --- jaccard_index() -0.75 -0 -0 -nan -0 -0.75 -0.25 --- pack_array() -1 2 4 [1,2,4] -['ab','0.0.0.42','4.2'] --- repeat() -[] -[1,1,1] -['asd','asd','asd'] -[86400,86400,86400] -[true,true,true] -[NULL] -[NULL] --- set_difference() -[] -[] -[] -[] -[4,5,6] -[4] -[1,3] -[1,2,3] -['d','s'] -['Chewbacca','Han Solo'] --- set_has_element() -0 -1 -0 -1 -0 --- set_intersect() -[] -[1,2,3] -[1,2,3] -[] -[5] -[] -['a'] -['Darth Vader'] --- set_union() -[] -[1,2,3] -[1,2,3,4,5,6] -[1,2,3,4] -[1,2,3,4,5] -[1,2,3] -['a','d','f','s'] -['Chewbacca','Darth Sidious','Darth Vader','Han Solo'] --- zip() -[] -[[1,2],[3,4],[5,6]] -[['Darth','Vader','has a suit'],['Master','Yoda','doesn\'t have a suit']] -[[1,10],[2,20],[3,NULL]] -[[NULL,1],[NULL,2],[NULL,3]] diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.sql b/tests/queries/0_stateless/02366_kql_func_dynamic.sql deleted file mode 100644 index b0956f032d0c..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_dynamic.sql +++ /dev/null @@ -1,161 +0,0 @@ -DROP TABLE IF EXISTS array_test; -CREATE TABLE array_test (floats Array(Float64), - strings Array(String), - nullable_strings Array(Nullable(String)) - ) ENGINE=Memory; -INSERT INTO array_test VALUES([1.0, 2.5], ['a', 'c'], ['A', NULL, 'C']); -set dialect = 'kusto'; -print '-- constant index value'; -array_test | project floats[0], strings[1], nullable_strings; -print '-- array_length()'; -print array_length(dynamic(['John', 'Denver', 'Bob', 'Marley'])) == 4; -print array_length(dynamic([1, 2, 3])) == 3; -print '-- array_sum()'; -print array_sum(dynamic([2, 5, 3])) == 10; -print array_sum(dynamic([2.5, 5.5, 3])) == 11; -print '-- array_index_of()'; -print array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley'); -print array_index_of(dynamic([1, 2, 3]), 2); -print '-- array_iif()'; -print array_iif(dynamic([true,false,true]), dynamic([1,2,3]), dynamic([4,5,6])); -print array_iif(dynamic([1,0,1]), dynamic([1,2,3]), dynamic([4,5,6])); -print array_iif(dynamic([true,false,true]), dynamic([1,2]), dynamic([4,5,6])); -print array_iif(dynamic(['a','b','c']), dynamic([1,2,3]), dynamic([4,5,6])); -print '-- array_concat()'; -print array_concat(dynamic([1,2,3]),dynamic([4,5,6])); -print '-- array_reverse()'; -print array_reverse(dynamic([])); -print array_reverse(dynamic([1])); -print array_reverse(dynamic([1,2,3,4])); -print array_reverse(dynamic(["this", "is", "an", "example"])); -print '-- array_rotate_left()'; -print array_rotate_left(dynamic([]), 0); -print array_rotate_left(dynamic([]), 500); -print array_rotate_left(dynamic([]), -500); -print array_rotate_left(dynamic([1,2,3,4,5]), 2); -print array_rotate_left(dynamic([1,2,3,4,5]), 5); -print array_rotate_left(dynamic([1,2,3,4,5]), 7); -print array_rotate_left(dynamic([1,2,3,4,5]), -2); -print array_rotate_left(dynamic([1,2,3,4,5]), -5); -print array_rotate_left(dynamic([1,2,3,4,5]), -7); -print '-- array_rotate_right()'; -print array_rotate_right(dynamic([]), 0); -print array_rotate_right(dynamic([]), 500); -print array_rotate_right(dynamic([]), -500); -print array_rotate_right(dynamic([1,2,3,4,5]), 2); -print array_rotate_right(dynamic([1,2,3,4,5]), 5); -print array_rotate_right(dynamic([1,2,3,4,5]), 7); -print array_rotate_right(dynamic([1,2,3,4,5]), -2); -print array_rotate_right(dynamic([1,2,3,4,5]), -5); -print array_rotate_right(dynamic([1,2,3,4,5]), -7); -print '-- array_shift_left()'; -print array_shift_left(dynamic([]), 0); -print array_shift_left(dynamic([]), 555); -print array_shift_left(dynamic([]), -555); -print array_shift_left(dynamic([1,2,3,4,5]), 2); -print array_shift_left(dynamic([1,2,3,4,5]), -2); -print array_shift_left(dynamic([1,2,3,4,5]), 2, -1); -print array_shift_left(dynamic(['a', 'b', 'c']), 2); -print '-- array_shift_right()'; -print array_shift_left(dynamic([]), 0); -print array_shift_left(dynamic([]), 555); -print array_shift_left(dynamic([]), -555); -print array_shift_right(dynamic([1,2,3,4,5]), -2); -print array_shift_right(dynamic([1,2,3,4,5]), 2); -print array_shift_right(dynamic([1,2,3,4,5]), -2, -1); -print array_shift_right(dynamic(['a', 'b', 'c']), -2); -print '-- array_slice()'; ---print array_slice(dynamic([1,2,3]), 1, 2); -- will enable whe analyzer dixed -print array_slice(dynamic([1,2,3,4,5]), -3, -2); -print '-- array_split()'; -print array_split(dynamic([1,2,3,4,5]), dynamic([1,-2])); -print array_split(dynamic([1,2,3,4,5]), 2); -print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])); -print array_split(dynamic([1,2,3,4,5]), dynamic([-1,-2])); -print '-- array_sort_asc()'; -print array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c'])); -print array_sort_asc(dynamic([4, 1, 3, 2])); -print array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; -print array_sort_asc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); -print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false); -print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); -print array_sort_asc( dynamic([null, null, null]) , false); -print array_sort_asc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; -print array_sort_asc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; -print array_sort_asc(split("John,Paul,George,Ringo", ",")); -print array_sort_asc(dynamic([null,"blue","yellow","green",null])); -print array_sort_asc(dynamic([null,"blue","yellow","green",null]), false); -print '-- array_sort_desc()'; -print array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c'])); -print array_sort_desc(dynamic([4, 1, 3, 2])); -print array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; -print array_sort_desc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); -print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false); -print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); -print array_sort_desc( dynamic([null, null, null]) , false); -print array_sort_desc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; -print array_sort_desc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; -print array_sort_desc(split("John,Paul,George,Ringo", ",")); -print array_sort_desc(dynamic([null,"blue","yellow","green",null])); -print array_sort_desc(dynamic([null,"blue","yellow","green",null]), false); -print '-- jaccard_index()'; -print jaccard_index(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3, 4, 4, 4])); -print jaccard_index(dynamic([1, 2, 3]), dynamic([])); -print jaccard_index(dynamic([]), dynamic([1, 2, 3, 4])); -print jaccard_index(dynamic([]), dynamic([])); -print jaccard_index(dynamic([1, 2, 3]), dynamic([4, 5, 6, 7])); -print jaccard_index(dynamic(['a', 's', 'd']), dynamic(['f', 'd', 's', 'a'])); -print jaccard_index(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); -print '-- pack_array()'; -print pack_array(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -print x = 1 | extend y = x * 2 | extend z = y * 2 | extend pack_array(x,y,z); -print pack_array(strcat('a', 'b'), format_ipv4(42), tostring(4.2)); -print '-- repeat()'; -print repeat(1, 0); -print repeat(1, 3); -print repeat("asd", 3); -print repeat(timespan(1d), 3); -print repeat(true, 3); -print repeat(1, -3); -print repeat(6.7,-4); -print '-- set_difference()'; -print set_difference(dynamic([]), dynamic([])); -print set_difference(dynamic([]), dynamic([9])); -print set_difference(dynamic([]), dynamic(["asd"])); -print set_difference(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])); -print array_sort_asc(set_difference(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; -print set_difference(dynamic([4]), dynamic([1, 2, 3])); -print array_sort_asc(set_difference(dynamic([1, 2, 3, 4, 5]), dynamic([5]), dynamic([2, 4])))[0]; -print array_sort_asc(set_difference(dynamic([1, 2, 3]), dynamic([])))[0]; -print array_sort_asc(set_difference(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; -print array_sort_asc(set_difference(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; -print '-- set_has_element()'; -print set_has_element(dynamic([]), 9); -print set_has_element(dynamic(["this", "is", "an", "example"]), "example"); -print set_has_element(dynamic(["this", "is", "an", "example"]), "examplee"); -print set_has_element(dynamic([1, 2, 3]), 2); -print set_has_element(dynamic([1, 2, 3, 4.2]), 4); -print '-- set_intersect()'; -print set_intersect(dynamic([]), dynamic([])); -print array_sort_asc(set_intersect(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; -print array_sort_asc(set_intersect(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; -print set_intersect(dynamic([4]), dynamic([1, 2, 3])); -print set_intersect(dynamic([1, 2, 3, 4, 5]), dynamic([1, 3, 5]), dynamic([2, 5])); -print set_intersect(dynamic([1, 2, 3]), dynamic([])); -print set_intersect(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])); -print set_intersect(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); -print '-- set_union()'; -print set_union(dynamic([]), dynamic([])); -print array_sort_asc(set_union(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; -print array_sort_asc(set_union(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; -print array_sort_asc(set_union(dynamic([4]), dynamic([1, 2, 3])))[0]; -print array_sort_asc(set_union(dynamic([1, 3, 4]), dynamic([5]), dynamic([2, 4])))[0]; -print array_sort_asc(set_union(dynamic([1, 2, 3]), dynamic([])))[0]; -print array_sort_asc(set_union(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; -print array_sort_asc(set_union(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; -print '-- zip()'; -print zip(dynamic([]), dynamic([])); -print zip(dynamic([1,3,5]), dynamic([2,4,6])); -print zip(dynamic(['Darth','Master']), dynamic(['Vader','Yoda']), dynamic(['has a suit','doesn\'t have a suit'])); -print zip(dynamic([1,2,3]), dynamic([10,20])); -print zip(dynamic([]), dynamic([1,2,3])); \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference deleted file mode 100644 index 2a0bbf53fff4..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_ip.reference +++ /dev/null @@ -1,123 +0,0 @@ --- ipv4_is_private(\'127.0.0.1\') -0 --- ipv4_is_private(\'10.1.2.3\') -1 --- ipv4_is_private(\'192.168.1.1/24\') -1 -ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\')) -1 --- ipv4_is_private(\'abc\') -\N --- ipv4_netmask_suffix(\'192.168.1.1/24\') -24 --- ipv4_netmask_suffix(\'192.168.1.1\') -32 --- ipv4_netmask_suffix(\'127.0.0.1/16\') -16 --- ipv4_netmask_suffix(\'abc\') -\N -ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) -16 --- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\') -1 --- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\') -1 --- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\') -0 --- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') -0 --- ipv4_is_in_range(\'abc\', \'127.0.0.1\') -\N --- parse_ipv6(127.0.0.1) -0000:0000:0000:0000:0000:ffff:7f00:0001 --- parse_ipv6(fe80::85d:e82c:9446:7994) -fe80:0000:0000:0000:085d:e82c:9446:7994 --- parse_ipv4(\'127.0.0.1\') -2130706433 --- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\') -1 --- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\')) --- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432 -2130706432 --- parse_ipv4_mask(\'abc\', 31) -\N -\N --- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31) -3221334018 -3221334018 --- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\') -1 --- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\') -0 --- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\') -1 --- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24) -1 --- ipv4_is_match(\'abc\', \'def\', 24) -\N --- ipv4_compare() -0 --1 -1 -0 -0 -0 -0 -0 -0 -0 -0 --- format_ipv4() -192.168.1.0 -192.168.1.1 -192.168.1.0 -192.168.1.0 -1 -1 -127.0.0.0 --- format_ipv4_mask() -192.168.1.0/24 -192.168.1.0/24 -192.168.1.0/24 -192.168.1.1/32 -192.168.1.0/24 -1 -1 -127.0.0.0/24 --- parse_ipv6_mask() -0000:0000:0000:0000:0000:0000:0000:0000 -fe80:0000:0000:0000:085d:e82c:9446:7900 -0000:0000:0000:0000:0000:ffff:c0a8:ff00 -0000:0000:0000:0000:0000:ffff:c0a8:ff00 -0000:0000:0000:0000:0000:ffff:ffff:ffff -fe80:0000:0000:0000:085d:e82c:9446:7994 -fe80:0000:0000:0000:085d:e82c:9446:7900 -0000:0000:0000:0000:0000:ffff:c0a8:ffff -0000:0000:0000:0000:0000:ffff:c0a8:ff00 --- ipv6_is_match() -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql deleted file mode 100644 index c9b335f203a2..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_ip.sql +++ /dev/null @@ -1,131 +0,0 @@ -set dialect='kusto'; -print '-- ipv4_is_private(\'127.0.0.1\')'; -print ipv4_is_private('127.0.0.1'); -print '-- ipv4_is_private(\'10.1.2.3\')'; -print ipv4_is_private('10.1.2.3'); -print '-- ipv4_is_private(\'192.168.1.1/24\')'; -print ipv4_is_private('192.168.1.1/24'); -print 'ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\'))'; -print ipv4_is_private(strcat('192.','168.','1.','1','/24')); -print '-- ipv4_is_private(\'abc\')'; -print ipv4_is_private('abc'); -- == null - -print '-- ipv4_netmask_suffix(\'192.168.1.1/24\')'; -print ipv4_netmask_suffix('192.168.1.1/24'); -- == 24 -print '-- ipv4_netmask_suffix(\'192.168.1.1\')'; -print ipv4_netmask_suffix('192.168.1.1'); -- == 32 -print '-- ipv4_netmask_suffix(\'127.0.0.1/16\')'; -print ipv4_netmask_suffix('127.0.0.1/16'); -- == 16 -print '-- ipv4_netmask_suffix(\'abc\')'; -print ipv4_netmask_suffix('abc'); -- == null -print 'ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\'))'; -print ipv4_netmask_suffix(strcat('127.', '0.', '0.1/16')); -- == 16 - -print '-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\')'; -print ipv4_is_in_range('127.0.0.1', '127.0.0.1'); -- == true -print '-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\')'; -print ipv4_is_in_range('192.168.1.6', '192.168.1.1/24'); -- == true -print '-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\')'; -print ipv4_is_in_range('192.168.1.1', '192.168.2.1/24'); -- == false -print '-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; -print ipv4_is_in_range(strcat('192.','168.', '1.1'), '192.168.2.1/24'); -- == false -print '-- ipv4_is_in_range(\'abc\', \'127.0.0.1\')'; -- == null -print ipv4_is_in_range('abc', '127.0.0.1'); - -print '-- parse_ipv6(127.0.0.1)'; -print parse_ipv6('127.0.0.1'); -print '-- parse_ipv6(fe80::85d:e82c:9446:7994)'; -print parse_ipv6('fe80::85d:e82c:9446:7994'); -print '-- parse_ipv4(\'127.0.0.1\')'; -print parse_ipv4('127.0.0.1'); -print '-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\')'; -print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); -print '-- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\'))'; -print parse_ipv4(arrayStringConcat(['127', '0', '0', '1'], '.')); -- { clientError UNKNOWN_FUNCTION } - -print '-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432'; -print parse_ipv4_mask('127.0.0.1', 24); -print '-- parse_ipv4_mask(\'abc\', 31)'; -print parse_ipv4_mask('abc', 31) -print '-- parse_ipv4_mask(\'192.1.168.2\', 1000)'; -print parse_ipv4_mask('192.1.168.2', 1000); -print '-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31)'; ---print parse_ipv4_mask('192.1.168.2', 31) == parse_ipv4_mask('192.1.168.3', 31); // this qual failed in analyzer 3221334018 -print parse_ipv4_mask('192.1.168.2', 31); -print parse_ipv4_mask('192.1.168.3', 31); -print '-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\')'; -print ipv4_is_match('127.0.0.1', '127.0.0.1'); -print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\')'; -print ipv4_is_match('192.168.1.1', '192.168.1.255'); -print '-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\')'; -print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24'); -print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24)'; -print ipv4_is_match('192.168.1.1', '192.168.1.255', 24); -print '-- ipv4_is_match(\'abc\', \'def\', 24)'; -print ipv4_is_match('abc', 'dev', 24); -print '-- ipv4_compare()'; -print ipv4_compare('127.0.0.1', '127.0.0.1'); -print ipv4_compare('192.168.1.1', '192.168.1.255'); -print ipv4_compare('192.168.1.255', '192.168.1.1'); -print ipv4_compare('192.168.1.1/24', '192.168.1.255/24'); -print ipv4_compare('192.168.1.1', '192.168.1.255', 24); -print ipv4_compare('192.168.1.1/24', '192.168.1.255'); -print ipv4_compare('192.168.1.1', '192.168.1.255/24'); -print ipv4_compare('192.168.1.1/30', '192.168.1.255/24'); -print ipv4_compare('192.168.1.1', '192.168.1.0', 31); -print ipv4_compare('192.168.1.1/24', '192.168.1.255', 31); -print ipv4_compare('192.168.1.1', '192.168.1.255', 24); -print '-- format_ipv4()'; -print format_ipv4('192.168.1.255', 24); -print format_ipv4('192.168.1.1', 32); -print format_ipv4('192.168.1.1/24', 32); -print format_ipv4(3232236031, 24); -print format_ipv4('192.168.1.1/24', -1) == ''; -print format_ipv4('abc', 24) == ''; -print format_ipv4(strcat('127.0', '.0.', '1', '/32'), 12 + 12); -print '-- format_ipv4_mask()'; -print format_ipv4_mask('192.168.1.255', 24); -print format_ipv4_mask(3232236031, 24); -print format_ipv4_mask('192.168.1.1', 24); -print format_ipv4_mask('192.168.1.1', 32); -print format_ipv4_mask('192.168.1.1/24', 32); -print format_ipv4_mask('192.168.1.1/24', -1) == ''; -print format_ipv4_mask('abc', 24) == ''; -print format_ipv4_mask(strcat('127.0', '.0.', '1', '/32'), 12 + 12); -print '-- parse_ipv6_mask()'; -print parse_ipv6_mask("127.0.0.1", 24); -print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 120); -print parse_ipv6_mask("192.168.255.255", 120); -print parse_ipv6_mask("192.168.255.255/24", 124); -print parse_ipv6_mask("255.255.255.255", 128); -print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 128); -print parse_ipv6_mask("fe80::85d:e82c:9446:7994/120", 124); -print parse_ipv6_mask("::192.168.255.255", 128); -print parse_ipv6_mask("::192.168.255.255/24", 128); -print '-- ipv6_is_match()'; -print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true; -print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false; -print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true; -print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true; -print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true; -print ipv6_is_match('192.168.1.1', '192.168.1.1'); -- // Equal IPs -print ipv6_is_match('192.168.1.1/24', '192.168.1.255'); -- // 24 bit IP4-prefix is used for comparison -print ipv6_is_match('192.168.1.1', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison -print ipv6_is_match('192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7994'); -- // Equal IPs -print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998'); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('192.168.1.1', '::ffff:c0a8:0101'); -- // Equal IPs -print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff'); -- // 24 bit IP-prefix is used for comparison -print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison -print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison -print ipv6_is_match('192.168.1.1', '192.168.1.0', 31); -- // 31 bit IP4-prefix is used for comparison -print ipv6_is_match('192.168.1.1/24', '192.168.1.255', 31); -- // 24 bit IP4-prefix is used for comparison -print ipv6_is_match('192.168.1.1', '192.168.1.255', 24); -- // 24 bit IP4-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127); -- // 127 bit IP6-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7998', 120); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998', 127); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff', 127); -- // 127 bit IP6-prefix is used for comparison -print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255', 120); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24', 127); -- // 120 bit IP6-prefix is used for comparison \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_math.reference b/tests/queries/0_stateless/02366_kql_func_math.reference deleted file mode 100644 index 92f283abcb6e..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_math.reference +++ /dev/null @@ -1,4 +0,0 @@ --- isnan -- -1 -0 -0 diff --git a/tests/queries/0_stateless/02366_kql_func_math.sql b/tests/queries/0_stateless/02366_kql_func_math.sql deleted file mode 100644 index 4e83622eb6b8..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_math.sql +++ /dev/null @@ -1,7 +0,0 @@ -set dialect = 'kusto'; -print '-- isnan --'; -print isnan(double(nan)); -print isnan(4.2); -print isnan(4); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } -print isnan(real(+inf)); -print isnan(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.reference b/tests/queries/0_stateless/02366_kql_func_scalar.reference deleted file mode 100644 index b7fa62c5d437..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_scalar.reference +++ /dev/null @@ -1,16 +0,0 @@ --- bin_at() -4.5 --12:0:0 -2017-05-14 12:00:00.000000000 -2017-05-14 00:00:00.000000000 -2018-02-25 15:14:00.000000000 5 -2018-02-24 15:14:00.000000000 3 -2018-02-23 15:14:00.000000000 4 --- bin() -4 -1970-05-11 00:00:00.000000000 -336:0:0 -1970-05-11 13:45:07.345000000 -1970-05-11 13:45:07.345623000 -2022-09-26 10:13:23.987232000 -1970-05-11 13:45:07.456336000 diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.sql b/tests/queries/0_stateless/02366_kql_func_scalar.sql deleted file mode 100644 index d7e94cfd9d15..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_scalar.sql +++ /dev/null @@ -1,26 +0,0 @@ -DROP TABLE IF EXISTS Bin_at_test; -CREATE TABLE Bin_at_test -( - `Date` DateTime('UTC'), - Num Nullable(UInt8) -) ENGINE = Memory; -INSERT INTO Bin_at_test VALUES ('2018-02-24T15:14:01',3), ('2018-02-23T16:14:01',4), ('2018-02-26T15:14:01',5); - -set dialect = 'kusto'; -print '-- bin_at()'; -print bin_at(6.5, 2.5, 7); -print bin_at(1h, 1d, 12h); -print bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0)); -print bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0)); -Bin_at_test | summarize sum(Num) by d = todatetime(bin_at(Date, 1d, datetime('2018-02-24 15:14:00'))) | order by d; -print '-- bin()'; -print bin(4.5, 1); -print bin(datetime(1970-05-11 13:45:07), 1d); -print bin(16d, 7d); -print bin(datetime(1970-05-11 13:45:07.345623), 1ms); --- print bin(datetime(2022-09-26 10:13:23.987234), 6ms); -> 2022-09-26 10:13:23.982000000 -print bin(datetime(1970-05-11 13:45:07.345623), 1microsecond); -print bin(datetime(2022-09-26 10:13:23.987234), 6microseconds); -print bin(datetime(1970-05-11 13:45:07.456345672), 16microseconds); --- print bin(datetime(2022-09-26 10:13:23.987234128), 1tick); -> 2022-09-26 10:13:23.987234100 --- print bin(datetime(2022-09-26 10:13:23.987234128), 99nanosecond); -> null diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference deleted file mode 100644 index 9bdd38ca5dba..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_string.reference +++ /dev/null @@ -1,360 +0,0 @@ --- test String Functions -- --- Customers |where Education contains \'degree\' -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 - --- Customers |where Education !contains \'degree\' -\N why Professional Partial College 38 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers |where Education contains \'Degree\' -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 - --- Customers |where Education !contains \'Degree\' -\N why Professional Partial College 38 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where FirstName endswith \'RE\' -Theodore Diaz Skilled Manual Bachelors 28 - --- Customers | where ! FirstName endswith \'RE\' -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - ---Customers | where FirstName endswith_cs \'re\' -Theodore Diaz Skilled Manual Bachelors 28 - --- Customers | where FirstName !endswith_cs \'re\' -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation == \'Skilled Manual\' -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation != \'Skilled Manual\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation has \'skilled\' -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation !has \'skilled\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation has \'Skilled\' -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation !has \'Skilled\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation hasprefix_cs \'Ab\' - --- Customers | where Occupation !hasprefix_cs \'Ab\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation hasprefix_cs \'ab\' -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation !hasprefix_cs \'ab\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation hassuffix \'Ent\' -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation !hassuffix \'Ent\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation hassuffix \'ent\' -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation hassuffix \'ent\' -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers |where Education in (\'Bachelors\',\'High School\') -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where Education !in (\'Bachelors\',\'High School\') -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 - --- Customers | where FirstName matches regex \'P.*r\' -Peter Nara Skilled Manual Graduate Degree 26 - --- Customers | where FirstName startswith \'pet\' -Peter Nara Skilled Manual Graduate Degree 26 - --- Customers | where FirstName !startswith \'pet\' -Latoya Shen Professional Graduate Degree 25 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where FirstName startswith_cs \'pet\' - --- Customers | where FirstName !startswith_cs \'pet\' -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where isempty(LastName) -Apple Skilled Manual Bachelors 28 - --- Customers | where isnotempty(LastName) -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 -\N why Professional Partial College 38 - --- Customers | where isnotnull(FirstName) -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where isnull(FirstName) -\N why Professional Partial College 38 - --- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1 -https://www.test.com/hello word - --- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1 -https%3A%2F%2Fwww.test.com%2Fhello%20word - --- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2)) -\N -Lat en -Pet ra -The az -Ste x -App - --- Customers | project name = strcat(FirstName, \' \', LastName) -\N -Latoya Shen -Peter Nara -Theodore Diaz -Stephanie Cox -Apple - --- Customers | project FirstName, strlen(FirstName) -\N \N -Latoya 6 -Peter 5 -Theodore 8 -Stephanie 9 -Apple 5 - --- Customers | project strrep(FirstName,2,\'_\') -\N -Latoya_Latoya -Peter_Peter -Theodore_Theodore -Stephanie_Stephanie -Apple_Apple - --- Customers | project toupper(FirstName) -\N -LATOYA -PETER -THEODORE -STEPHANIE -APPLE - --- Customers | project tolower(FirstName) -\N -latoya -peter -theodore -stephanie -apple - --- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Apple Skilled Manual Bachelors 28 - --- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) -3 -3 -1 - --- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) -PINEAPPLE ice cream is 20 -PINEAPPLE -20 - -20 -\N -\N -\N -\N -\N -45.6 -45.6 - --- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet -[['T','h','e'],['p','ric','e'],['P','INEAPPL','E'],['i','c','e'],['c','rea','m']] - --- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction) - - -John -iPhone -\N -26 -26 -26 -26 -\N - --- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) -['aa','bb'] -['bbb'] -[''] -['a','','b'] -['aa','cc'] -['aabbcc'] -['aaa','bbb','ccc'] -[NULL] - --- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now. -1-2-Ab - --- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction); TODO: length and occurrence not supported yet -2 -2 --1 --- base64_encode_fromguid() -8jMxriJurkmwahbmqbIS6w== --- base64_decode_toarray() -[] -[75,117,115,116,111] --- base64_decode_toguid() -10e99626-bc2b-4c75-bb3e-fe606de25700 -1 --- base64_encode_tostring - -S3VzdG8x --- base64_decode_tostring - -Kusto1 --- parse_url() -{"Scheme":"scheme","Host":"","Port":"0","Path":"/this/is/a/path","Username":"username","Password":"password","Query Parameters":{"k1":"v1","k2":"v2"},"Fragment":"fragment"} --- parse_urlquery() -{"Query Parameters":{"k1":"v1","k2":"v2","k3":"v3"}} --- strcmp() -0 1 -1 1 --- substring() -CD --- translate() -kusto xxx --- trim() -https://www.ibm.com -Te st1 - asd -asd -sd --- trim_start() -www.ibm.com -Te st1// $ -asdw - -asd --- trim_end() -https -- Te st1 -wasd - -asd --- trim, trim_start, trim_end all at once ---https://bing.com-- -- https://bing.com-- --https://bing.com https://bing.com --- replace_regex -Number was: 1 --- has_any_index() -0 1 -1 -1 --- parse_version() -1000000020000000300000040 -1000000020000000000000000 -1000000020000000000000000 -\N -\N -\N -\N -1000000020000000300000004 -1000000020000000000000000 -1000000020000000300000000 -1000000000000000000000000 --- parse_json() -[1,2,3] -[{"a":123.5,"b":"{\\"c\\":456}"}] --- parse_command_line() -[NULL] -[NULL] --- reverse() -321 -43.321 - -dsa -][ -]3,2,1[ -]\'redaV\',\'htraD\'[ -000000000.00:00:21 51-01-7102 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 --- parse_csv() -[''] -['aaa'] -['aa','b','cc'] -['record1','a','b','c'] diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql deleted file mode 100644 index d251b04e08bf..000000000000 --- a/tests/queries/0_stateless/02366_kql_func_string.sql +++ /dev/null @@ -1,313 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); - --- datatable (Version:string) [ --- '1.2.3.4', --- '1.2', --- '1.2.3', --- '1' --- ] - -DROP TABLE IF EXISTS Versions; -CREATE TABLE Versions -( - Version String -) ENGINE = Memory; -INSERT INTO Versions VALUES ('1.2.3.4'),('1.2'),('1.2.3'),('1'); - - -set dialect='kusto'; -print '-- test String Functions --'; - -print '-- Customers |where Education contains \'degree\''; -Customers |where Education contains 'degree' | order by LastName; -print ''; -print '-- Customers |where Education !contains \'degree\''; -Customers |where Education !contains 'degree' | order by LastName; -print ''; -print '-- Customers |where Education contains \'Degree\''; -Customers |where Education contains 'Degree' | order by LastName; -print ''; -print '-- Customers |where Education !contains \'Degree\''; -Customers |where Education !contains 'Degree' | order by LastName; -print ''; -print '-- Customers | where FirstName endswith \'RE\''; -Customers | where FirstName endswith 'RE' | order by LastName; -print ''; -print '-- Customers | where ! FirstName endswith \'RE\''; -Customers | where FirstName ! endswith 'RE' | order by LastName; -print ''; -print '--Customers | where FirstName endswith_cs \'re\''; -Customers | where FirstName endswith_cs 're' | order by LastName; -print ''; -print '-- Customers | where FirstName !endswith_cs \'re\''; -Customers | where FirstName !endswith_cs 're' | order by LastName; -print ''; -print '-- Customers | where Occupation == \'Skilled Manual\''; -Customers | where Occupation == 'Skilled Manual' | order by LastName; -print ''; -print '-- Customers | where Occupation != \'Skilled Manual\''; -Customers | where Occupation != 'Skilled Manual' | order by LastName; -print ''; -print '-- Customers | where Occupation has \'skilled\''; -Customers | where Occupation has 'skilled' | order by LastName; -print ''; -print '-- Customers | where Occupation !has \'skilled\''; -Customers | where Occupation !has 'skilled' | order by LastName; -print ''; -print '-- Customers | where Occupation has \'Skilled\''; -Customers | where Occupation has 'Skilled'| order by LastName; -print ''; -print '-- Customers | where Occupation !has \'Skilled\''; -Customers | where Occupation !has 'Skilled'| order by LastName; -print ''; -print '-- Customers | where Occupation hasprefix_cs \'Ab\''; -Customers | where Occupation hasprefix_cs 'Ab'| order by LastName; -print ''; -print '-- Customers | where Occupation !hasprefix_cs \'Ab\''; -Customers | where Occupation !hasprefix_cs 'Ab'| order by LastName; -print ''; -print '-- Customers | where Occupation hasprefix_cs \'ab\''; -Customers | where Occupation hasprefix_cs 'ab'| order by LastName; -print ''; -print '-- Customers | where Occupation !hasprefix_cs \'ab\''; -Customers | where Occupation !hasprefix_cs 'ab'| order by LastName; -print ''; -print '-- Customers | where Occupation hassuffix \'Ent\''; -Customers | where Occupation hassuffix 'Ent'| order by LastName; -print ''; -print '-- Customers | where Occupation !hassuffix \'Ent\''; -Customers | where Occupation !hassuffix 'Ent'| order by LastName; -print ''; -print '-- Customers | where Occupation hassuffix \'ent\''; -Customers | where Occupation hassuffix 'ent'| order by LastName; -print ''; -print '-- Customers | where Occupation hassuffix \'ent\''; -Customers | where Occupation hassuffix 'ent'| order by LastName; -print ''; -print '-- Customers |where Education in (\'Bachelors\',\'High School\')'; -Customers |where Education in ('Bachelors','High School')| order by LastName; -print ''; -print '-- Customers | where Education !in (\'Bachelors\',\'High School\')'; -Customers | where Education !in ('Bachelors','High School')| order by LastName; -print ''; -print '-- Customers | where FirstName matches regex \'P.*r\''; -Customers | where FirstName matches regex 'P.*r'| order by LastName; -print ''; -print '-- Customers | where FirstName startswith \'pet\''; -Customers | where FirstName startswith 'pet'| order by LastName; -print ''; -print '-- Customers | where FirstName !startswith \'pet\''; -Customers | where FirstName !startswith 'pet'| order by LastName; -print ''; -print '-- Customers | where FirstName startswith_cs \'pet\''; -Customers | where FirstName startswith_cs 'pet'| order by LastName; -print ''; -print '-- Customers | where FirstName !startswith_cs \'pet\''; -Customers | where FirstName !startswith_cs 'pet'| order by LastName; -print ''; -print '-- Customers | where isempty(LastName)'; -Customers | where isempty(LastName); -print ''; -print '-- Customers | where isnotempty(LastName)'; -Customers | where isnotempty(LastName); -print ''; -print '-- Customers | where isnotnull(FirstName)'; -Customers | where isnotnull(FirstName)| order by LastName; -print ''; -print '-- Customers | where isnull(FirstName)'; -Customers | where isnull(FirstName)| order by LastName; -print ''; -print '-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1'; -Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1; -print ''; -print '-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1'; -Customers | project url_encode('https://www.test.com/hello word') | take 1; -print ''; -print '-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2))'; -Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName; -print ''; -print '-- Customers | project name = strcat(FirstName, \' \', LastName)'; -Customers | project name = strcat(FirstName, ' ', LastName)| order by LastName; -print ''; -print '-- Customers | project FirstName, strlen(FirstName)'; -Customers | project FirstName, strlen(FirstName)| order by LastName; -print ''; -print '-- Customers | project strrep(FirstName,2,\'_\')'; -Customers | project strrep(FirstName,2,'_')| order by LastName; -print ''; -print '-- Customers | project toupper(FirstName)'; -Customers | project toupper(FirstName)| order by LastName; -print ''; -print '-- Customers | project tolower(FirstName)'; -Customers | project tolower(FirstName)| order by LastName; -print ''; -print '-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet'; -Customers | where Age in ((Customers|project Age|where Age < 30)) | order by LastName; --- Customer | where LastName in~ ("diaz", "cox") -print ''; -print '-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet'; -Customers | where Occupation has_all ('manual', 'skilled') | order by LastName; -print ''; -print '-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet'; -Customers|where Occupation has_any ('Skilled','abcd'); -print ''; -print '-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction)'; -Customers | project countof('The cat sat on the mat', 'at') | take 1; -Customers | project countof('The cat sat on the mat', 'at', 'normal') | take 1; -Customers | project countof('The cat sat on the mat', '\\s.he', 'regex') | take 1; -print ''; -print '-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction)'; -print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20'); -print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20'); -print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20'); -print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20'); -print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(bool)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(date)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(guid)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(int)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(long)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(real)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(decimal)); -print ''; -print '-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet'; -Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20') | take 1; -print ''; -print '-- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction)'; -print extract_json('', ''); -- { serverError BAD_ARGUMENTS } -print extract_json('a', ''); -- { serverError BAD_ARGUMENTS } -print extract_json('$.firstName', ''); -print extract_json('$.phoneNumbers[0].type', ''); -print extractjson('$.firstName', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); -print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(string)); -print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); -print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); -print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); -print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(long)); --- print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(bool)); -> true -print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(double)); -print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(guid)); --- print extract_json('$.phoneNumbers', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(dynamic)); we won't be able to handle this particular case for a while, because it should return a dictionary -print ''; -print '-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction)'; -Customers | project split('aa_bb', '_') | take 1; -Customers | project split('aaa_bbb_ccc', '_', 1) | take 1; -Customers | project split('', '_') | take 1; -Customers | project split('a__b', '_') | take 1; -Customers | project split('aabbcc', 'bb') | take 1; -Customers | project split('aabbcc', '') | take 1; -Customers | project split('aaa_bbb_ccc', '_', -1) | take 1; -Customers | project split('aaa_bbb_ccc', '_', 10) | take 1; -print ''; -print '-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now.'; -Customers | project strcat_delim('-', '1', '2', strcat('A','b')) | take 1; --- Customers | project strcat_delim('-', '1', '2', 'A' , 1s); -print ''; -print '-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction); TODO: length and occurrence not supported yet'; -Customers | project indexof('abcdefg','cde') | take 1; -Customers | project indexof('abcdefg','cde',2) | take 1; -Customers | project indexof('abcdefg','cde',6) | take 1; -print '-- base64_encode_fromguid()'; --- print base64_encode_fromguid(guid(null)); -print base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')); -print base64_encode_fromguid(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } -print base64_encode_fromguid("abcd1231"); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } -print '-- base64_decode_toarray()'; -print base64_decode_toarray(''); -print base64_decode_toarray('S3VzdG8='); -print '-- base64_decode_toguid()'; -print base64_decode_toguid("JpbpECu8dUy7Pv5gbeJXAA=="); -print base64_decode_toguid(base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'))) == guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'); -print '-- base64_encode_tostring'; -print base64_encode_tostring(''); -print base64_encode_tostring('Kusto1'); -print '-- base64_decode_tostring'; -print base64_decode_tostring(''); -print base64_decode_tostring('S3VzdG8x'); -print '-- parse_url()'; -print parse_url('scheme://username:password@host:1234/this/is/a/path?k1=v1&k2=v2#fragment'); -print '-- parse_urlquery()'; -print parse_urlquery('k1=v1&k2=v2&k3=v3'); -print '-- strcmp()'; -print strcmp('ABC','ABC'), strcmp('abc','ABC'), strcmp('ABC','abc'), strcmp('abcde','abc'); -print '-- substring()'; -print substring("ABCD", -2, 2); -print '-- translate()'; -print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translate('abc', 'x', 'abc'); -print '-- trim()'; -print trim("--", "--https://www.ibm.com--"); -print trim("[^\w]+", strcat("- ","Te st", "1", "// $")); -print trim("", " asd "); -print trim("a$", "asd"); -print trim("^a", "asd"); -print '-- trim_start()'; -print trim_start("https://", "https://www.ibm.com"); -print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $")); -print trim_start("asd$", "asdw"); -print trim_start("asd$", "asd"); -print trim_start("d$", "asd"); -print '-- trim_end()'; -print trim_end("://www.ibm.com", "https://www.ibm.com"); -print trim_end("[^\w]+", strcat("- ","Te st", "1", "// $")); -print trim_end("^asd", "wasd"); -print trim_end("^asd", "asd"); -print trim_end("^a", "asd"); -print '-- trim, trim_start, trim_end all at once'; -print str = "--https://bing.com--", pattern = '--' | extend start = trim_start(pattern, str), end = trim_end(pattern, str), both = trim(pattern, str); -print '-- replace_regex'; -print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1'); -print '-- has_any_index()'; -print has_any_index('this is an example', dynamic(['this', 'example'])), has_any_index("this is an example", dynamic(['not', 'example'])), has_any_index("this is an example", dynamic(['not', 'found'])), has_any_index("this is an example", dynamic([])); -print '-- parse_version()'; -print parse_version(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } --- print parse_version(''); -> NULL -print parse_version('1.2.3.40'); -print parse_version('1.2'); -print parse_version(strcat('1.', '2')); -print parse_version('1.2.4.5.6'); -print parse_version('moo'); -print parse_version('moo.boo.foo'); -print parse_version(strcat_delim('.', 'moo', 'boo', 'foo')); -Versions | project parse_version(Version); -print '-- parse_json()'; -print parse_json(dynamic([1, 2, 3])); -print parse_json('{"a":123.5, "b":"{\\"c\\":456}"}'); -print '-- parse_command_line()'; -print parse_command_line(55, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } --- print parse_command_line((52 + 3) * 4 % 2, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -print parse_command_line('', 'windows'); -print parse_command_line(strrep(' ', 6), 'windows'); --- print parse_command_line('echo \"hello world!\" print$?', 'windows'); -> ["echo","hello world!","print$?"] --- print parse_command_line("yolo swag 'asd bcd' \"moo moo \"", 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] --- print parse_command_line(strcat_delim(' ', "yolo", "swag", "\'asd bcd\'", "\"moo moo \""), 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] -print '-- reverse()'; -print reverse(123); -print reverse(123.34); -print reverse(''); -print reverse("asd"); -print reverse(dynamic([])); -print reverse(dynamic([1, 2, 3])); -print reverse(dynamic(['Darth', "Vader"])); -print reverse(datetime(2017-10-15 12:00)); --- print reverse(timespan(3h)); -> 00:00:30 -Customers | where Education contains 'degree' | order by reverse(FirstName); -print '-- parse_csv()'; -print parse_csv(''); -print parse_csv(65); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -print parse_csv('aaa'); -print result=parse_csv('aa,b,cc'); -print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z'); --- print result=parse_csv('aa,"b,b,b",cc,"Escaping quotes: ""Title""","line1\nline2"'); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] --- print parse_csv(strcat(strcat_delim(',', 'aa', '"b,b,b"', 'cc', '"Escaping quotes: ""Title"""', '"line1\nline2"'), '\r\n', strcat_delim(',', 'asd', 'qcf'))); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] diff --git a/tests/queries/0_stateless/02366_kql_makeseries.reference b/tests/queries/0_stateless/02366_kql_makeseries.reference deleted file mode 100644 index 8e7fde997bfb..000000000000 --- a/tests/queries/0_stateless/02366_kql_makeseries.reference +++ /dev/null @@ -1,60 +0,0 @@ --- from to -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,2,0] -Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,500,0] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] --- from -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000'] [0,2] -Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000'] [0,500] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] --- to -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Costco Apple ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [2,0] -Aldi Snargaluff ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [500,0] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] --- without from/to -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Costco Apple ['2016-09-11 00:00:00.000000000'] [2] -Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] --- without by -['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [70,334,54] --- without aggregation alias -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] -Costco Apple ['2016-09-11 00:00:00.000000000'] [2] --- assign group alias -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] -Costco Apple ['2016-09-11 00:00:00.000000000'] [2] --- 3d step -Costco Snargaluff ['2016-09-10 00:00:00.000000000'] [134.66666666666666] -Costco Apple ['2016-09-10 00:00:00.000000000'] [2] -Aldi Snargaluff ['2016-09-10 00:00:00.000000000'] [500] -Aldi Apple ['2016-09-10 00:00:00.000000000'] [5.5] --- numeric column -Costco Snargaluff [10,11,12,13,14] [200,0,102,0,0] -Aldi Snargaluff [10,11,12,13,14] [0,500,0,0,0] -Aldi Apple [10,11,12,13,14] [5,0,6,0,0] -Costco Apple [10,11,12,13,14] [0,2,0,0,0] --- from -Costco Snargaluff [10,11,12] [200,0,102] -Aldi Snargaluff [10,11] [0,500] -Aldi Apple [10,11,12] [5,0,6] -Costco Apple [10,11] [0,2] --- to -Costco Snargaluff [8,12,16] [200,102,0] -Aldi Snargaluff [8,12,16] [500,0,0] -Aldi Apple [8,12,16] [5,6,0] -Costco Apple [8,12,16] [2,0,0] --- without from/to -Costco Snargaluff [10,12] [200,102] -Aldi Snargaluff [10] [500] -Aldi Apple [10,12] [5,6] -Costco Apple [10] [2] --- without by -[10,12] [202,54] -['2017-01-01 00:00:00.000000000','2017-01-02 00:00:00.000000000','2017-01-03 00:00:00.000000000','2017-01-04 00:00:00.000000000','2017-01-05 00:00:00.000000000','2017-01-06 00:00:00.000000000','2017-01-07 00:00:00.000000000','2017-01-08 00:00:00.000000000','2017-01-09 00:00:00.000000000'] [4,3,5,0,10.5,4,3,8,6.5] diff --git a/tests/queries/0_stateless/02366_kql_makeseries.sql b/tests/queries/0_stateless/02366_kql_makeseries.sql deleted file mode 100644 index c9ca91c0be0c..000000000000 --- a/tests/queries/0_stateless/02366_kql_makeseries.sql +++ /dev/null @@ -1,77 +0,0 @@ --- Azure Data Explore Test Data --- let make_series_test_table = datatable (Supplier:string, Fruit:string, Price: real, Purchase:datetime) --- [ --- 'Aldi','Apple',4,'2016-09-10', --- 'Costco','Apple',2,'2016-09-11', --- 'Aldi','Apple',6,'2016-09-10', --- 'Costco','Snargaluff',100,'2016-09-12', --- 'Aldi','Apple',7,'2016-09-12', --- 'Aldi','Snargaluff',400,'2016-09-11', --- 'Costco','Snargaluff',104,'2016-09-12', --- 'Aldi','Apple',5,'2016-09-12', --- 'Aldi','Snargaluff',600,'2016-09-11', --- 'Costco','Snargaluff',200,'2016-09-10', --- ]; -DROP TABLE IF EXISTS make_series_test_table; -CREATE TABLE make_series_test_table -( - Supplier Nullable(String), - Fruit String , - Price Float64, - Purchase Date -) ENGINE = Memory; -INSERT INTO make_series_test_table VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); -DROP TABLE IF EXISTS make_series_test_table2; -CREATE TABLE make_series_test_table2 -( - Supplier Nullable(String), - Fruit String , - Price Int32, - Purchase Int32 -) ENGINE = Memory; -INSERT INTO make_series_test_table2 VALUES ('Aldi','Apple',4,10),('Costco','Apple',2,11),('Aldi','Apple',6,10),('Costco','Snargaluff',100,12),('Aldi','Apple',7,12),('Aldi','Snargaluff',400,11),('Costco','Snargaluff',104,12),('Aldi','Apple',5,12),('Aldi','Snargaluff',600,11),('Costco','Snargaluff',200,10); -DROP TABLE IF EXISTS make_series_test_table3; -CREATE TABLE make_series_test_table3 -( - timestamp datetime, - metric Float64, -) ENGINE = Memory; -INSERT INTO make_series_test_table3 VALUES (parseDateTimeBestEffort('2016-12-31T06:00', 'UTC'), 50), (parseDateTimeBestEffort('2017-01-01', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-02', 'UTC'), 3), (parseDateTimeBestEffort('2017-01-03', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-03T03:00', 'UTC'), 6), (parseDateTimeBestEffort('2017-01-05', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-05T13:40', 'UTC'), 13), (parseDateTimeBestEffort('2017-01-06', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-07', 'UTC'), 3), (parseDateTimeBestEffort('2017-01-08', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-08T21:00', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-09', 'UTC'), 2), (parseDateTimeBestEffort('2017-01-09T12:00', 'UTC'), 11), (parseDateTimeBestEffort('2017-01-10T05:00', 'UTC'), 5); - --- This test requies sorting after some of aggregations but I don't know KQL, sorry -set max_bytes_before_external_group_by = 0; -set dialect = 'kusto'; - -print '-- from to'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; -print '-- from'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) step 1d by Supplier, Fruit | order by Supplier, Fruit; -print '-- to'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; -print '-- without from/to'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit | order by Supplier, Fruit; -print '-- without by'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d; -print '-- without aggregation alias'; -make_series_test_table | make-series avg(Price) default=0 on Purchase step 1d by Supplier, Fruit; -print '-- assign group alias'; -make_series_test_table | make-series avg(Price) default=0 on Purchase step 1d by Supplier_Name = Supplier, Fruit; -print '-- 3d step'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 3d by Supplier, Fruit | order by Supplier, Fruit; - -print '-- numeric column' -print '-- from to'; -make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit; -print '-- from'; -make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 step 1.0 by Supplier, Fruit; -print '-- to'; -make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase to 18 step 4.0 by Supplier, Fruit; -print '-- without from/to'; -make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0 by Supplier, Fruit; -print '-- without by'; -make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0; - -make_series_test_table3 | make-series avg(metric) default=0 on timestamp from datetime(2017-01-01) to datetime(2017-01-10) step 1d - --- print '-- summarize --' --- make_series_test_table | summarize count() by format_datetime(bin(Purchase, 1d), 'yy-MM-dd'); diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.reference b/tests/queries/0_stateless/02366_kql_mvexpand.reference deleted file mode 100644 index 25be070eb0b7..000000000000 --- a/tests/queries/0_stateless/02366_kql_mvexpand.reference +++ /dev/null @@ -1,65 +0,0 @@ --- mv-expand -- --- mv_expand_test_table | mv-expand c -- -1 ['Salmon','Steak','Chicken'] 1 [5,6,7,8] -1 ['Salmon','Steak','Chicken'] 2 [5,6,7,8] -1 ['Salmon','Steak','Chicken'] 3 [5,6,7,8] -1 ['Salmon','Steak','Chicken'] 4 [5,6,7,8] --- mv_expand_test_table | mv-expand c, d -- -1 ['Salmon','Steak','Chicken'] 1 5 -1 ['Salmon','Steak','Chicken'] 2 6 -1 ['Salmon','Steak','Chicken'] 3 7 -1 ['Salmon','Steak','Chicken'] 4 8 --- mv_expand_test_table | mv-expand b | mv-expand c -- -1 Salmon 1 [5,6,7,8] -1 Salmon 2 [5,6,7,8] -1 Salmon 3 [5,6,7,8] -1 Salmon 4 [5,6,7,8] -1 Steak 1 [5,6,7,8] -1 Steak 2 [5,6,7,8] -1 Steak 3 [5,6,7,8] -1 Steak 4 [5,6,7,8] -1 Chicken 1 [5,6,7,8] -1 Chicken 2 [5,6,7,8] -1 Chicken 3 [5,6,7,8] -1 Chicken 4 [5,6,7,8] --- mv_expand_test_table | mv-expand with_itemindex=index b, c, d -- -0 1 Salmon 1 5 -1 1 Steak 2 6 -2 1 Chicken 3 7 -3 1 4 8 --- mv_expand_test_table | mv-expand array_concat(c,d) -- -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 --- mv_expand_test_table | mv-expand x = c, y = d -- -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 5 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 6 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 7 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 8 --- mv_expand_test_table | mv-expand xy = array_concat(c, d) -- -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 --- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy -- -1 1 -2 1 --- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) -- -0 1 ['Salmon','Steak','Chicken'] 1 true -1 1 ['Salmon','Steak','Chicken'] 2 true -2 1 ['Salmon','Steak','Chicken'] 3 true -3 1 ['Salmon','Steak','Chicken'] 4 true --- mv_expand_test_table | mv-expand c to typeof(bool) -- -1 ['Salmon','Steak','Chicken'] [5,6,7,8] true -1 ['Salmon','Steak','Chicken'] [5,6,7,8] true -1 ['Salmon','Steak','Chicken'] [5,6,7,8] true -1 ['Salmon','Steak','Chicken'] [5,6,7,8] true diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.sql b/tests/queries/0_stateless/02366_kql_mvexpand.sql deleted file mode 100644 index e77986096463..000000000000 --- a/tests/queries/0_stateless/02366_kql_mvexpand.sql +++ /dev/null @@ -1,35 +0,0 @@ --- datatable(a: int, b: dynamic, c: dynamic, d: dynamic) [ --- 1, dynamic(['Salmon', 'Steak', 'Chicken']), dynamic([1, 2, 3, 4]), dynamic([5, 6, 7, 8]) --- ] - -DROP TABLE IF EXISTS mv_expand_test_table; -CREATE TABLE mv_expand_test_table -( - a UInt8, - b Array(String), - c Array(Int8), - d Array(Int8) -) ENGINE = Memory; -INSERT INTO mv_expand_test_table VALUES (1, ['Salmon', 'Steak','Chicken'],[1,2,3,4],[5,6,7,8]); -set dialect='kusto'; -print '-- mv-expand --'; -print '-- mv_expand_test_table | mv-expand c --'; -mv_expand_test_table | mv-expand c; -print '-- mv_expand_test_table | mv-expand c, d --'; -mv_expand_test_table | mv-expand c, d; -print '-- mv_expand_test_table | mv-expand b | mv-expand c --'; -mv_expand_test_table | mv-expand b | mv-expand c; -print '-- mv_expand_test_table | mv-expand with_itemindex=index b, c, d --'; -mv_expand_test_table | mv-expand with_itemindex=index b, c, d; -print '-- mv_expand_test_table | mv-expand array_concat(c,d) --'; -mv_expand_test_table | mv-expand array_concat(c,d); -print '-- mv_expand_test_table | mv-expand x = c, y = d --'; -mv_expand_test_table | mv-expand x = c, y = d; -print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) --'; -mv_expand_test_table | mv-expand xy = array_concat(c, d); -print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy --'; -mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy; -print '-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) --'; -mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool); -print '-- mv_expand_test_table | mv-expand c to typeof(bool) --'; -mv_expand_test_table | mv-expand c to typeof(bool); diff --git a/tests/queries/0_stateless/02366_kql_native_interval_format.reference b/tests/queries/0_stateless/02366_kql_native_interval_format.reference deleted file mode 100644 index 8a12c6885c4e..000000000000 --- a/tests/queries/0_stateless/02366_kql_native_interval_format.reference +++ /dev/null @@ -1,23 +0,0 @@ -numeric -kusto -00:00:00 -00:00:00.0000001 -00:00:00.0010000 -00:00:42 -01:06:00 -2.18:00:00 -5.00:00:00 -7.00:00:00 -14.00:00:00 -('00:01:12','21.00:00:00','00:00:00.0000002') -numeric -99 -100 -1 -42 -66 -66 -5 -1 -2 -(72,3,200) diff --git a/tests/queries/0_stateless/02366_kql_native_interval_format.sql.j2 b/tests/queries/0_stateless/02366_kql_native_interval_format.sql.j2 deleted file mode 100644 index 0731687222da..000000000000 --- a/tests/queries/0_stateless/02366_kql_native_interval_format.sql.j2 +++ /dev/null @@ -1,16 +0,0 @@ -select value from system.settings where name = 'interval_output_format'; - -{% for format in ['kusto', 'numeric'] -%} -select '{{ format }}'; -set interval_output_format = '{{ format }}'; -select toIntervalNanosecond(99); -select toIntervalNanosecond(100); -select toIntervalMillisecond(1); -select toIntervalSecond(42); -select toIntervalMinute(66); -select toIntervalHour(66); -select toIntervalDay(5); -select toIntervalWeek(1); -select toIntervalWeek(2); -select toIntervalSecond(72) + toIntervalWeek(3) + toIntervalNanosecond(200); -{% endfor -%} diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.reference b/tests/queries/0_stateless/02366_kql_operator_in_sql.reference deleted file mode 100644 index 4e0987aa5c38..000000000000 --- a/tests/queries/0_stateless/02366_kql_operator_in_sql.reference +++ /dev/null @@ -1,60 +0,0 @@ --- #1 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 --- #2 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #3 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #4 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #5 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #6 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #7 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #8 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #9 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #10 -- --- #11 -- --- #12 -- --- #13 -- --- #14 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #15 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.sql b/tests/queries/0_stateless/02366_kql_operator_in_sql.sql deleted file mode 100644 index 0b02faa06807..000000000000 --- a/tests/queries/0_stateless/02366_kql_operator_in_sql.sql +++ /dev/null @@ -1,42 +0,0 @@ -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); -Select '-- #1 --' ; -select * from kql($$Customers | where FirstName !in ('Peter', 'Latoya')$$); -Select '-- #2 --' ; -select * from kql($$Customers | where FirstName !in ("test", "test2")$$); -Select '-- #3 --' ; -select * from kql($$Customers | where FirstName !contains 'Pet'$$); -Select '-- #4 --' ; -select * from kql($$Customers | where FirstName !contains_cs 'Pet'$$); -Select '-- #5 --' ; -select * from kql($$Customers | where FirstName !endswith 'ter'$$); -Select '-- #6 --' ; -select * from kql($$Customers | where FirstName !endswith_cs 'ter'$$); -Select '-- #7 --' ; -select * from kql($$Customers | where FirstName != 'Peter'$$); -Select '-- #8 --' ; -select * from kql($$Customers | where FirstName !has 'Peter'$$); -Select '-- #9 --' ; -select * from kql($$Customers | where FirstName !has_cs 'peter'$$); -Select '-- #10 --' ; --- select * from kql($$Customers | where FirstName !hasprefix 'Peter'$$); -- will enable when analyzer fixed `and` issue -Select '-- #11 --' ; ---select * from kql($$Customers | where FirstName !hasprefix_cs 'Peter'$$); -Select '-- #12 --' ; ---select * from kql($$Customers | where FirstName !hassuffix 'Peter'$$); -Select '-- #13 --' ; ---select * from kql($$Customers | where FirstName !hassuffix_cs 'Peter'$$); -Select '-- #14 --' ; -select * from kql($$Customers | where FirstName !startswith 'Peter'$$); -Select '-- #15 --' ; -select * from kql($$Customers | where FirstName !startswith_cs 'Peter'$$); -DROP TABLE IF EXISTS Customers; diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference deleted file mode 100644 index aeb42feb6bea..000000000000 --- a/tests/queries/0_stateless/02366_kql_summarize.reference +++ /dev/null @@ -1,92 +0,0 @@ --- test summarize -- -12 25 46 32.416666666666664 389 -Skilled Manual 5 26 36 30.2 151 -Professional 6 25 46 34.166666666666664 205 -Management abcd defg 1 33 33 33 33 -Skilled Manual 0 -Professional 2 -Management abcd defg 0 -Skilled Manual 36 -Professional 38 -Management abcd defg 33 -Skilled Manual 26 -Professional 25 -Management abcd defg 33 -Skilled Manual 30.2 -Professional 29.25 -Management abcd defg 33 -Skilled Manual 151 -Professional 117 -Management abcd defg 33 -4 -2 -40 2 -30 4 -20 6 -Skilled Manual 5 -Professional 6 -Management abcd defg 1 --- make_list() -- -Skilled Manual ['Bachelors','Graduate Degree','High School','Partial College','Bachelors'] -Professional ['Graduate Degree','Partial College','Partial College','Partial College','Partial College','Partial College'] -Management abcd defg ['Bachelors'] -Skilled Manual ['Bachelors','Graduate Degree'] -Professional ['Graduate Degree','Partial College'] -Management abcd defg ['Bachelors'] --- make_list_if() -- -Skilled Manual ['Edward','Christine'] -Professional ['Dalton','Angel'] -Management abcd defg ['Stephanie'] -Skilled Manual ['Edward'] -Professional ['Dalton'] -Management abcd defg ['Stephanie'] --- make_set() -- -Skilled Manual ['Graduate Degree','High School','Partial College','Bachelors'] -Professional ['Graduate Degree','Partial College'] -Management abcd defg ['Bachelors'] -Skilled Manual ['Graduate Degree','Bachelors'] -Professional ['Graduate Degree','Partial College'] -Management abcd defg ['Bachelors'] --- make_set_if() -- -Skilled Manual ['Partial College','High School'] -Professional ['Partial College'] -Management abcd defg ['Bachelors'] -Skilled Manual ['High School'] -Professional ['Partial College'] -Management abcd defg ['Bachelors'] --- stdev() -- -6.855102059227432 --- stdevif() -- -7.557189365836421 --- binary_all_and -- -42 --- binary_all_or -- -46 --- binary_all_xor -- -4 -43.8 -25.55 30.5 43.8 -30.5 -35 -[25,35,45] --- Summarize following sort -- -Skilled Manual 5 -Professional 6 -Management abcd defg 1 --- summarize with bin -- -0 1 -245000 2 -0 1 -245 2 -0 1 -245 2 -2015-10-12 00:00:00.000000000 -2016-10-12 00:00:00.000000000 --- make_list_with_nulls -- -['Theodore','Stephanie','Peter','Latoya','Joshua','Edward','Dalton','Christine','Cameron','Angel','Apple',NULL] -Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] -Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] -Management abcd defg ['Stephanie'] -Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] [28,26,36,33,28] -Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] [25,26,42,28,46,38] -Management abcd defg ['Stephanie'] [33] diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql deleted file mode 100644 index bb12d1f251f5..000000000000 --- a/tests/queries/0_stateless/02366_kql_summarize.sql +++ /dev/null @@ -1,102 +0,0 @@ --- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ --- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, --- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, --- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, --- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, --- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, --- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, --- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, --- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, --- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, --- 'Angel', 'Stewart', 'Professional', 'Partial College', 46, --- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, --- dynamic(null), 'why', 'Professional', 'Partial College', 38 --- ] - -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Joshua','Lee','Professional','Partial College',26),('Edward','Hernandez','Skilled Manual','High School',36),('Dalton','Wood','Professional','Partial College',42),('Christine','Nara','Skilled Manual','Partial College',33),('Cameron','Rodriguez','Professional','Partial College',28),('Angel','Stewart','Professional','Partial College',46),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); - -drop table if exists EventLog; -create table EventLog -( - LogEntry String, - Created Int64 -) ENGINE = Memory; - -insert into EventLog values ('Darth Vader has entered the room.', 546), ('Rambo is suspciously looking at Darth Vader.', 245234), ('Darth Sidious electrocutes both using Force Lightning.', 245554); - -drop table if exists Dates; -create table Dates -( - EventTime DateTime, -) ENGINE = Memory; - -Insert into Dates VALUES ('2015-10-12') , ('2016-10-12') -Select '-- test summarize --' ; -set dialect='kusto'; -Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age); -Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age) by Occupation | order by Occupation; -Customers | summarize countif(Age>40) by Occupation | order by Occupation; -Customers | summarize MyMax = maxif(Age, Age<40) by Occupation | order by Occupation; -Customers | summarize MyMin = minif(Age, Age<40) by Occupation | order by Occupation; -Customers | summarize MyAvg = avgif(Age, Age<40) by Occupation | order by Occupation; -Customers | summarize MySum = sumif(Age, Age<40) by Occupation | order by Occupation; -Customers | summarize dcount(Education); -Customers | summarize dcountif(Education, Occupation=='Professional'); -Customers | summarize count_ = count() by bin(Age, 10) | order by count_ asc; -Customers | summarize job_count = count() by Occupation | where job_count > 0 | order by Occupation; -Customers | summarize 'Edu Count'=count() by Education | sort by 'Edu Count' desc; -- { clientError 62 } - -print '-- make_list() --'; -Customers | summarize f_list = make_list(Education) by Occupation | sort by Occupation; -Customers | summarize f_list = make_list(Education, 2) by Occupation | sort by Occupation; -print '-- make_list_if() --'; -Customers | summarize f_list = make_list_if(FirstName, Age>30) by Occupation | sort by Occupation; -Customers | summarize f_list = make_list_if(FirstName, Age>30, 1) by Occupation | sort by Occupation; -print '-- make_set() --'; -Customers | summarize f_list = make_set(Education) by Occupation | sort by Occupation; -Customers | summarize f_list = make_set(Education, 2) by Occupation | sort by Occupation; -print '-- make_set_if() --'; -Customers | summarize f_list = make_set_if(Education, Age>30) by Occupation | sort by Occupation; -Customers | summarize f_list = make_set_if(Education, Age>30, 1) by Occupation | sort by Occupation; -print '-- stdev() --'; -Customers | project Age | summarize stdev(Age); -print '-- stdevif() --'; -Customers | project Age | summarize stdevif(Age, Age%2==0); -print '-- binary_all_and --'; -Customers | project Age | where Age > 40 | summarize binary_all_and(Age); -print '-- binary_all_or --'; -Customers | project Age | where Age > 40 | summarize binary_all_or(Age); -print '-- binary_all_xor --'; -Customers | project Age | where Age > 40 | summarize binary_all_xor(Age); - -Customers | project Age | summarize percentile(Age, 95); -Customers | project Age | summarize percentiles(Age, 5, 50, 95)|project round(percentiles_Age[0],2),round(percentiles_Age[1],2),round(percentiles_Age[2],2); -Customers | project Age | summarize percentiles(Age, 5, 50, 95)[1]; -Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilew(AgeBucket, w, 75); -Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilesw(AgeBucket, w, 50, 75, 99.9); - -print '-- Summarize following sort --'; -Customers | sort by FirstName | summarize count() by Occupation | sort by Occupation; - -print '-- summarize with bin --'; -EventLog | summarize count=count() by bin(Created, 1000) | sort by count asc; -EventLog | summarize count=count() by bin(unixtime_seconds_todatetime(Created/1000), 1s) | sort by count asc; -EventLog | summarize count=count() by time_label=bin(Created/1000, 1s) | sort by count asc; -Dates | project bin(datetime(EventTime), 1m); -print '-- make_list_with_nulls --'; -Customers | summarize t = make_list_with_nulls(FirstName); -Customers | summarize f_list = make_list_with_nulls(FirstName) by Occupation | sort by Occupation; -Customers | summarize f_list = make_list_with_nulls(FirstName), a_list = make_list_with_nulls(Age) by Occupation | sort by Occupation; --- TODO: --- arg_max() --- arg_min() diff --git a/tests/queries/0_stateless/02366_kql_tabular.reference b/tests/queries/0_stateless/02366_kql_tabular.reference deleted file mode 100644 index e70c02ce34fa..000000000000 --- a/tests/queries/0_stateless/02366_kql_tabular.reference +++ /dev/null @@ -1,139 +0,0 @@ --- test Query only has table name: -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 -Joshua Lee Professional Partial College 26 -Edward Hernandez Skilled Manual High School 36 -Dalton Wood Professional Partial College 42 -Christine Nara Skilled Manual Partial College 33 -Cameron Rodriguez Professional Partial College 28 -Angel Stewart Professional Partial College 46 --- Query has Column Selection -- -Theodore Diaz Skilled Manual -Stephanie Cox Management -Peter Nara Skilled Manual -Latoya Shen Professional -Joshua Lee Professional -Edward Hernandez Skilled Manual -Dalton Wood Professional -Christine Nara Skilled Manual -Cameron Rodriguez Professional -Angel Stewart Professional --- Query has limit -- -Theodore Diaz Skilled Manual -Stephanie Cox Management -Peter Nara Skilled Manual -Latoya Shen Professional -Joshua Lee Professional -Theodore Diaz Skilled Manual -Stephanie Cox Management -Peter Nara Skilled Manual -Latoya Shen Professional -Joshua Lee Professional --- Query has second limit with bigger value -- -Theodore Diaz Skilled Manual -Stephanie Cox Management -Peter Nara Skilled Manual -Latoya Shen Professional -Joshua Lee Professional --- Query has second limit with smaller value -- -Theodore Diaz Skilled Manual -Stephanie Cox Management -Peter Nara Skilled Manual --- Query has second Column selection -- -Theodore Diaz -Stephanie Cox -Peter Nara --- Query has second Column selection with extra column -- --- Query with desc sort -- -Theodore -Stephanie -Peter -Latoya -Joshua -Skilled Manual -Skilled Manual -Professional -Professional -Management --- Query with asc sort -- -Management -Professional -Professional -Skilled Manual -Skilled Manual --- Query with sort (without keyword asc desc) -- -Theodore -Stephanie -Peter -Latoya -Joshua -Skilled Manual -Skilled Manual -Professional -Professional -Management --- Query with sort 2 Columns with different direction -- -Stephanie Cox Management -Latoya Shen Professional -Joshua Lee Professional -Peter Nara Skilled Manual -Theodore Diaz Skilled Manual --- Query with second sort -- -Stephanie Cox Management -Latoya Shen Professional -Joshua Lee Professional -Peter Nara Skilled Manual -Theodore Diaz Skilled Manual --- Test String Equals (==) -- -Theodore Diaz Skilled Manual -Peter Nara Skilled Manual -Edward Hernandez Skilled Manual -Christine Nara Skilled Manual --- Test String Not equals (!=) -- -Stephanie Cox Management -Latoya Shen Professional -Joshua Lee Professional -Dalton Wood Professional -Cameron Rodriguez Professional -Angel Stewart Professional --- Test Filter using a list (in) -- -Theodore Diaz Skilled Manual Bachelors -Stephanie Cox Management Bachelors -Edward Hernandez Skilled Manual High School --- Test Filter using a list (!in) -- -Peter Nara Skilled Manual Graduate Degree -Latoya Shen Professional Graduate Degree -Joshua Lee Professional Partial College -Dalton Wood Professional Partial College -Christine Nara Skilled Manual Partial College -Cameron Rodriguez Professional Partial College -Angel Stewart Professional Partial College --- Test Filter using common string operations (contains_cs) -- -Joshua Lee Professional Partial College -Dalton Wood Professional Partial College -Christine Nara Skilled Manual Partial College -Cameron Rodriguez Professional Partial College -Angel Stewart Professional Partial College --- Test Filter using common string operations (startswith_cs) -- -Latoya Shen Professional Graduate Degree -Joshua Lee Professional Partial College -Dalton Wood Professional Partial College -Cameron Rodriguez Professional Partial College -Angel Stewart Professional Partial College --- Test Filter using common string operations (endswith_cs) -- -Latoya Shen Professional Graduate Degree -Joshua Lee Professional Partial College --- Test Filter using numerical equal (==) -- -Peter Nara Skilled Manual Graduate Degree 26 -Joshua Lee Professional Partial College 26 --- Test Filter using numerical great and less (> , <) -- -Stephanie Cox Management Bachelors 33 -Edward Hernandez Skilled Manual High School 36 -Christine Nara Skilled Manual Partial College 33 --- Test Filter using multi where -- -Dalton Wood Professional Partial College 42 -Angel Stewart Professional Partial College 46 --- Complex query with unknown function -- --- Missing column in front of startsWith -- diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql deleted file mode 100644 index f73c4c09ccaa..000000000000 --- a/tests/queries/0_stateless/02366_kql_tabular.sql +++ /dev/null @@ -1,88 +0,0 @@ -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); - -set dialect='kusto'; -print '-- test Query only has table name: --'; -Customers; - -print '-- Query has Column Selection --'; -Customers | project FirstName,LastName,Occupation; - -print '-- Query has limit --'; -Customers | project FirstName,LastName,Occupation | take 5; -Customers | project FirstName,LastName,Occupation | limit 5; - -print '-- Query has second limit with bigger value --'; -Customers | project FirstName,LastName,Occupation | take 5 | take 7; - -print '-- Query has second limit with smaller value --'; -Customers | project FirstName,LastName,Occupation | take 5 | take 3; - -print '-- Query has second Column selection --'; -Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName; - -print '-- Query has second Column selection with extra column --'; -Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education;-- { serverError 47 } - -print '-- Query with desc sort --'; -Customers | project FirstName | take 5 | sort by FirstName desc; -Customers | project Occupation | take 5 | order by Occupation desc; - -print '-- Query with asc sort --'; -Customers | project Occupation | take 5 | sort by Occupation asc; - -print '-- Query with sort (without keyword asc desc) --'; -Customers | project FirstName | take 5 | sort by FirstName; -Customers | project Occupation | take 5 | order by Occupation; - -print '-- Query with sort 2 Columns with different direction --'; -Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc, LastName desc; - -print '-- Query with second sort --'; -Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation desc |sort by Occupation asc, LastName desc; - -print '-- Test String Equals (==) --'; -Customers | project FirstName,LastName,Occupation | where Occupation == 'Skilled Manual'; - -print '-- Test String Not equals (!=) --'; -Customers | project FirstName,LastName,Occupation | where Occupation != 'Skilled Manual'; - -print '-- Test Filter using a list (in) --'; -Customers | project FirstName,LastName,Occupation,Education | where Education in ('Bachelors','High School'); - -print '-- Test Filter using a list (!in) --'; -set dialect='kusto'; -Customers | project FirstName,LastName,Occupation,Education | where Education !in ('Bachelors','High School'); - -print '-- Test Filter using common string operations (contains_cs) --'; -Customers | project FirstName,LastName,Occupation,Education | where Education contains_cs 'Coll'; - -print '-- Test Filter using common string operations (startswith_cs) --'; -Customers | project FirstName,LastName,Occupation,Education | where Occupation startswith_cs 'Prof'; - -print '-- Test Filter using common string operations (endswith_cs) --'; -Customers | project FirstName,LastName,Occupation,Education | where FirstName endswith_cs 'a'; - -print '-- Test Filter using numerical equal (==) --'; -Customers | project FirstName,LastName,Occupation,Education,Age | where Age == 26; - -print '-- Test Filter using numerical great and less (> , <) --'; -Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; - -print '-- Test Filter using multi where --'; -Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; - -print '-- Complex query with unknown function --'; -hits | where CounterID == 62 and EventDate >= '2013-07-14' and EventDate <= '2013-07-15' and IsRefresh == 0 and DontCountHits == 0 | summarize count() by d=bin(poopoo(EventTime), 1m) | order by d | limit 10; -- { clientError UNKNOWN_FUNCTION } - -print '-- Missing column in front of startsWith --'; -StormEvents | where startswith "W" | summarize Count=count() by State; -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference new file mode 100644 index 000000000000..f1d036b08bf2 --- /dev/null +++ b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference @@ -0,0 +1,467 @@ +-- { echoOn } +-- Should be allowed since destination partition expr is monotonically increasing and compatible +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '20100302' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +-- Should be allowed since destination partition expr is monotonically increasing and compatible. Note that even though +-- the destination partition expression is more granular, the data would still fall in the same partition. Thus, it is valid +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +20100302 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '201003' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +20100302 +-- Should be allowed since destination partition expr is monotonically increasing and compatible for those specific values +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); +CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 1); +ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 1 +2010-03-02 02:01:03 1 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 1 +2010-03-02 02:01:03 1 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION 0 FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 1 +2010-03-02 02:01:03 1 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 1 +2010-03-02 02:01:03 1 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1 +-- Should be allowed because dst partition exp is monot inc and data is not split +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); +ALTER TABLE destination ATTACH PARTITION ID '17908065610379824077' from source; +SELECT * FROM source ORDER BY productName; +mop general +rice food +spaghetti food +SELECT * FROM destination ORDER BY productName; +rice food +spaghetti food +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +59532f3c39a412a413f0f014c7750a9d +59532f3c39a412a413f0f014c7750a9d +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '17908065610379824077' from source; +SELECT * FROM source ORDER BY productName; +mop general +rice food +spaghetti food +SELECT * FROM destination ORDER BY productName; +rice food +spaghetti food +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +59532f3c39a412a413f0f014c7750a9d +59532f3c39a412a413f0f014c7750a9d +-- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747574133 + +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY intDiv(timestamp, 86400000); +CREATE TABLE destination (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY toYear(toDateTime(intDiv(timestamp, 1000))); +INSERT INTO TABLE source VALUES (1267495261123); +ALTER TABLE destination ATTACH PARTITION ID '14670' FROM source; +SELECT * FROM source ORDER BY timestamp; +1267495261123 +SELECT * FROM destination ORDER BY timestamp; +1267495261123 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +2010 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '14670' from source; +SELECT * FROM source ORDER BY timestamp; +1267495261123 +SELECT * FROM destination ORDER BY timestamp; +1267495261123 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +2010 +-- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747511726 + +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY toYear(timestamp); +CREATE TABLE destination (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY (intDiv(toUInt32(timestamp),86400)); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01',1,1),('2010-03-02 02:01:01',1,1),('2011-02-02 02:01:03',1,1); +ALTER TABLE destination ATTACH PARTITION ID '2010' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 1 1 +2010-03-02 02:01:01 1 1 +2011-02-02 02:01:03 1 1 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 1 1 +2010-03-02 02:01:01 1 1 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +14670 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '2010' from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 1 1 +2010-03-02 02:01:01 1 1 +2011-02-02 02:01:03 1 1 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 1 1 +2010-03-02 02:01:01 1 1 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +14670 +-- Should be allowed, partitioned table to unpartitioned. Since the destination is unpartitioned, parts would ultimately +-- fall into the same partition. +-- Destination partition by expression is omitted, which causes StorageMetadata::getPartitionKeyAST() to be nullptr. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple(); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +all +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '201003' from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +all +-- Same as above, but destination partition by expression is explicitly defined. Test case required to validate that +-- partition by tuple() is accepted. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY tuple(); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +all +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '201003' from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +all +-- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns +-- Columns in this case refer to the expression elements, not to the actual table columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); +CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b); +INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); +ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; +SELECT * FROM source ORDER BY (a, b, c); +1 2 3 +1 2 4 +SELECT * FROM destination ORDER BY (a, b, c); +1 2 3 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1-2 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; +SELECT * FROM source ORDER BY (a, b, c); +1 2 3 +1 2 4 +SELECT * FROM destination ORDER BY (a, b, c); +1 2 3 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1-2 +-- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns +-- Columns in this case refer to the expression elements, not to the actual table columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); +CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY a; +INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); +ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; +SELECT * FROM source ORDER BY (a, b, c); +1 2 3 +1 2 4 +SELECT * FROM destination ORDER BY (a, b, c); +1 2 3 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; +SELECT * FROM source ORDER BY (a, b, c); +1 2 3 +1 2 4 +SELECT * FROM destination ORDER BY (a, b, c); +1 2 3 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1 +-- Should be allowed. Special test case, tricky to explain. First column of source partition expression is +-- timestamp, while first column of destination partition expression is `A`. One of the previous implementations +-- would not match the columns, which could lead to `timestamp` min max being used to calculate monotonicity of `A`. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY tuple(toYYYYMM(timestamp), intDiv(A, 6)) ORDER BY timestamp; +CREATE TABLE destination (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY A ORDER BY timestamp; +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 5); +ALTER TABLE destination ATTACH PARTITION ID '201003-0' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 5 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 5 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +5 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION (201003, 0) from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 5 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 5 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +5 +-- Should be allowed. Destination partition expression contains multiple expressions, but all of them are monotonically +-- increasing in the source partition min max indexes. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); +CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); +INSERT INTO TABLE source VALUES (6, 12); +ALTER TABLE destination ATTACH PARTITION ID '6-12' FROM source; +SELECT * FROM source ORDER BY A; +6 12 +SELECT * FROM destination ORDER BY A; +6 12 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +3-6 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION (6, 12) from source; +SELECT * FROM source ORDER BY A; +6 12 +SELECT * FROM destination ORDER BY A; +6 12 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +3-6 +-- Should be allowed. The same scenario as above, but partition expressions inverted. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); +CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); +INSERT INTO TABLE source VALUES (6, 12); +ALTER TABLE destination ATTACH PARTITION ID '3-6' FROM source; +SELECT * FROM source ORDER BY A; +6 12 +SELECT * FROM destination ORDER BY A; +6 12 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +6-12 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION (3, 6) from source; +SELECT * FROM source ORDER BY A; +6 12 +SELECT * FROM destination ORDER BY A; +6 12 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +6-12 +-- Should be allowed, it is a local operation, no different than regular attach. Replicated to replicated. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE + source(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/source_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMMDD(timestamp) + ORDER BY tuple(); +CREATE TABLE + destination(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMM(timestamp) + ORDER BY tuple(); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '20100302' from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +-- Should be allowed, it is a local operation, no different than regular attach. Non replicated to replicated +DROP TABLE IF EXISTS source SYNC; +DROP TABLE IF EXISTS destination SYNC; +CREATE TABLE source(timestamp DateTime) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(timestamp) ORDER BY tuple(); +CREATE TABLE + destination(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_non_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMM(timestamp) + ORDER BY tuple(); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '20100302' from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +-- Should not be allowed because data would be split into two different partitions +DROP TABLE IF EXISTS source SYNC; +DROP TABLE IF EXISTS destination SYNC; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-03 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -- { serverError 248 } +ALTER TABLE destination ATTACH PARTITION '201003' from source; -- { serverError 248 } +-- Should not be allowed because data would be split into two different partitions +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); +CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 2); +ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -- { serverError 248 } +ALTER TABLE destination ATTACH PARTITION 0 FROM source; -- { serverError 248 } +-- Should not be allowed because dst partition exp takes more than two arguments, so it's not considered monotonically inc +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY substring(category, 1, 2); +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); +ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } +-- Should not be allowed because dst partition exp depends on a different set of columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(productName); +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); +ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } +-- Should not be allowed because dst partition exp is not monotonically increasing +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY left(productName, 2); +CREATE TABLE destination (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(productName); +INSERT INTO TABLE source VALUES ('bread'), ('mop'); +INSERT INTO TABLE source VALUES ('broccoli'); +ALTER TABLE destination ATTACH PARTITION ID '4589453b7ee96ce9de1265bd57674496' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'br' from source; -- { serverError 36 } +-- Empty/ non-existent partition, same partition expression. Nothing should happen +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; +ALTER TABLE destination ATTACH PARTITION 1 FROM source; +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +-- Empty/ non-existent partition, different partition expression. Nothing should happen +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; +ALTER TABLE destination ATTACH PARTITION 1 FROM source; +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +-- Replace instead of attach. Empty/ non-existent partition, same partition expression. Nothing should happen +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +ALTER TABLE destination REPLACE PARTITION '1' FROM source; +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +-- Replace instead of attach. Empty/ non-existent partition to non-empty partition, same partition id. +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; +CREATE TABLE destination (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; +INSERT INTO TABLE destination VALUES (1); +ALTER TABLE destination REPLACE PARTITION '1' FROM source; +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; diff --git a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql new file mode 100644 index 000000000000..9547d6ae249c --- /dev/null +++ b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql @@ -0,0 +1,485 @@ +-- { echoOn } +-- Should be allowed since destination partition expr is monotonically increasing and compatible +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '20100302' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed since destination partition expr is monotonically increasing and compatible. Note that even though +-- the destination partition expression is more granular, the data would still fall in the same partition. Thus, it is valid +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '201003' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed since destination partition expr is monotonically increasing and compatible for those specific values +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); + +CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 1); + +ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION 0 FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed because dst partition exp is monot inc and data is not split +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); + +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); + +ALTER TABLE destination ATTACH PARTITION ID '17908065610379824077' from source; + +SELECT * FROM source ORDER BY productName; +SELECT * FROM destination ORDER BY productName; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '17908065610379824077' from source; + +SELECT * FROM source ORDER BY productName; +SELECT * FROM destination ORDER BY productName; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747574133 + +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY intDiv(timestamp, 86400000); +CREATE TABLE destination (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY toYear(toDateTime(intDiv(timestamp, 1000))); + +INSERT INTO TABLE source VALUES (1267495261123); + +ALTER TABLE destination ATTACH PARTITION ID '14670' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '14670' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747511726 + +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY toYear(timestamp); +CREATE TABLE destination (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY (intDiv(toUInt32(timestamp),86400)); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01',1,1),('2010-03-02 02:01:01',1,1),('2011-02-02 02:01:03',1,1); + +ALTER TABLE destination ATTACH PARTITION ID '2010' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '2010' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed, partitioned table to unpartitioned. Since the destination is unpartitioned, parts would ultimately +-- fall into the same partition. +-- Destination partition by expression is omitted, which causes StorageMetadata::getPartitionKeyAST() to be nullptr. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple(); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '201003' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Same as above, but destination partition by expression is explicitly defined. Test case required to validate that +-- partition by tuple() is accepted. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY tuple(); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '201003' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns +-- Columns in this case refer to the expression elements, not to the actual table columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); +CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b); + +INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); + +ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; + +SELECT * FROM source ORDER BY (a, b, c); +SELECT * FROM destination ORDER BY (a, b, c); +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; + +SELECT * FROM source ORDER BY (a, b, c); +SELECT * FROM destination ORDER BY (a, b, c); +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns +-- Columns in this case refer to the expression elements, not to the actual table columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); +CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY a; + +INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); + +ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; + +SELECT * FROM source ORDER BY (a, b, c); +SELECT * FROM destination ORDER BY (a, b, c); +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; + +SELECT * FROM source ORDER BY (a, b, c); +SELECT * FROM destination ORDER BY (a, b, c); +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed. Special test case, tricky to explain. First column of source partition expression is +-- timestamp, while first column of destination partition expression is `A`. One of the previous implementations +-- would not match the columns, which could lead to `timestamp` min max being used to calculate monotonicity of `A`. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY tuple(toYYYYMM(timestamp), intDiv(A, 6)) ORDER BY timestamp; +CREATE TABLE destination (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY A ORDER BY timestamp; + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 5); + +ALTER TABLE destination ATTACH PARTITION ID '201003-0' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION (201003, 0) from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed. Destination partition expression contains multiple expressions, but all of them are monotonically +-- increasing in the source partition min max indexes. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); +CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); + +INSERT INTO TABLE source VALUES (6, 12); + +ALTER TABLE destination ATTACH PARTITION ID '6-12' FROM source; + +SELECT * FROM source ORDER BY A; +SELECT * FROM destination ORDER BY A; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION (6, 12) from source; + +SELECT * FROM source ORDER BY A; +SELECT * FROM destination ORDER BY A; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed. The same scenario as above, but partition expressions inverted. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); +CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); + +INSERT INTO TABLE source VALUES (6, 12); + +ALTER TABLE destination ATTACH PARTITION ID '3-6' FROM source; + +SELECT * FROM source ORDER BY A; +SELECT * FROM destination ORDER BY A; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION (3, 6) from source; + +SELECT * FROM source ORDER BY A; +SELECT * FROM destination ORDER BY A; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed, it is a local operation, no different than regular attach. Replicated to replicated. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE + source(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/source_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMMDD(timestamp) + ORDER BY tuple(); + +CREATE TABLE + destination(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMM(timestamp) + ORDER BY tuple(); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '20100302' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed, it is a local operation, no different than regular attach. Non replicated to replicated +DROP TABLE IF EXISTS source SYNC; +DROP TABLE IF EXISTS destination SYNC; +CREATE TABLE source(timestamp DateTime) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(timestamp) ORDER BY tuple(); + +CREATE TABLE + destination(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_non_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMM(timestamp) + ORDER BY tuple(); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '20100302' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should not be allowed because data would be split into two different partitions +DROP TABLE IF EXISTS source SYNC; +DROP TABLE IF EXISTS destination SYNC; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-03 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -- { serverError 248 } +ALTER TABLE destination ATTACH PARTITION '201003' from source; -- { serverError 248 } + +-- Should not be allowed because data would be split into two different partitions +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); + +CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 2); + +ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -- { serverError 248 } +ALTER TABLE destination ATTACH PARTITION 0 FROM source; -- { serverError 248 } + +-- Should not be allowed because dst partition exp takes more than two arguments, so it's not considered monotonically inc +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY substring(category, 1, 2); + +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); + +ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } + +-- Should not be allowed because dst partition exp depends on a different set of columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(productName); + +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); + +ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } + +-- Should not be allowed because dst partition exp is not monotonically increasing +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY left(productName, 2); +CREATE TABLE destination (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(productName); + +INSERT INTO TABLE source VALUES ('bread'), ('mop'); +INSERT INTO TABLE source VALUES ('broccoli'); + +ALTER TABLE destination ATTACH PARTITION ID '4589453b7ee96ce9de1265bd57674496' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'br' from source; -- { serverError 36 } + +-- Empty/ non-existent partition, same partition expression. Nothing should happen +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); + +ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; +ALTER TABLE destination ATTACH PARTITION 1 FROM source; + +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Empty/ non-existent partition, different partition expression. Nothing should happen +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); + +ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; +ALTER TABLE destination ATTACH PARTITION 1 FROM source; + +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Replace instead of attach. Empty/ non-existent partition, same partition expression. Nothing should happen +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); + +ALTER TABLE destination REPLACE PARTITION '1' FROM source; + +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Replace instead of attach. Empty/ non-existent partition to non-empty partition, same partition id. +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; +CREATE TABLE destination (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; + +INSERT INTO TABLE destination VALUES (1); + +ALTER TABLE destination REPLACE PARTITION '1' FROM source; + +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh index aa58d27fa50a..f5c48608639e 100755 --- a/tests/queries/0_stateless/02475_bson_each_row_format.sh +++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh @@ -5,6 +5,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# In case of parallel parsing and small block +# (--min_chunk_bytes_for_parallel_parsing) we may have multiple blocks, and +# this will break sorting order, so let's limit number of threads to avoid +# reordering. +CLICKHOUSE_CLIENT+="--allow_repeated_settings --max_threads 1" + echo "Integers" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Bool as bool, number::Int8 as int8, number::UInt8 as uint8, number::Int16 as int16, number::UInt16 as uint16, number::Int32 as int32, number::UInt32 as uint32, number::Int64 as int64, number::UInt64 as uint64 from numbers(5) settings engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'bool Bool, int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64')" diff --git a/tests/queries/0_stateless/02495_concat_with_separator.reference b/tests/queries/0_stateless/02495_concat_with_separator.reference index 8f0ea917f4be..ebff5deb6aa7 100644 --- a/tests/queries/0_stateless/02495_concat_with_separator.reference +++ b/tests/queries/0_stateless/02495_concat_with_separator.reference @@ -14,6 +14,45 @@ 1 1 1 +1 +1 \N \N \N +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 diff --git a/tests/queries/0_stateless/02495_concat_with_separator.sql b/tests/queries/0_stateless/02495_concat_with_separator.sql index 916c4cda1b75..7167d48a1da5 100644 --- a/tests/queries/0_stateless/02495_concat_with_separator.sql +++ b/tests/queries/0_stateless/02495_concat_with_separator.sql @@ -1,27 +1,72 @@ -select concatWithSeparator('|', 'a', 'b') == 'a|b'; -select concatWithSeparator('|', 'a', materialize('b')) == 'a|b'; -select concatWithSeparator('|', materialize('a'), 'b') == 'a|b'; -select concatWithSeparator('|', materialize('a'), materialize('b')) == 'a|b'; - -select concatWithSeparator('|', 'a', toFixedString('b', 1)) == 'a|b'; -select concatWithSeparator('|', 'a', materialize(toFixedString('b', 1))) == 'a|b'; -select concatWithSeparator('|', materialize('a'), toFixedString('b', 1)) == 'a|b'; -select concatWithSeparator('|', materialize('a'), materialize(toFixedString('b', 1))) == 'a|b'; - -select concatWithSeparator('|', toFixedString('a', 1), 'b') == 'a|b'; -select concatWithSeparator('|', toFixedString('a', 1), materialize('b')) == 'a|b'; -select concatWithSeparator('|', materialize(toFixedString('a', 1)), 'b') == 'a|b'; -select concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize('b')) == 'a|b'; - -select concatWithSeparator('|', toFixedString('a', 1), toFixedString('b', 1)) == 'a|b'; -select concatWithSeparator('|', toFixedString('a', 1), materialize(toFixedString('b', 1))) == 'a|b'; -select concatWithSeparator('|', materialize(toFixedString('a', 1)), toFixedString('b', 1)) == 'a|b'; -select concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize(toFixedString('b', 1))) == 'a|b'; - -select concatWithSeparator(null, 'a', 'b') == null; -select concatWithSeparator('1', null, 'b') == null; -select concatWithSeparator('1', 'a', null) == null; - -select concatWithSeparator(materialize('|'), 'a', 'b'); -- { serverError 44 } -select concatWithSeparator(); -- { serverError 42 } -select concatWithSeparator('|', 'a', 100); -- { serverError 43 } +SET allow_suspicious_low_cardinality_types=1; + +-- negative tests +SELECT concatWithSeparator(materialize('|'), 'a', 'b'); -- { serverError ILLEGAL_COLUMN } +SELECT concatWithSeparator(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +-- special cases +SELECT concatWithSeparator('|') = ''; +SELECT concatWithSeparator('|', 'a') == 'a'; + +SELECT concatWithSeparator('|', 'a', 'b') == 'a|b'; +SELECT concatWithSeparator('|', 'a', materialize('b')) == 'a|b'; +SELECT concatWithSeparator('|', materialize('a'), 'b') == 'a|b'; +SELECT concatWithSeparator('|', materialize('a'), materialize('b')) == 'a|b'; + +SELECT concatWithSeparator('|', 'a', toFixedString('b', 1)) == 'a|b'; +SELECT concatWithSeparator('|', 'a', materialize(toFixedString('b', 1))) == 'a|b'; +SELECT concatWithSeparator('|', materialize('a'), toFixedString('b', 1)) == 'a|b'; +SELECT concatWithSeparator('|', materialize('a'), materialize(toFixedString('b', 1))) == 'a|b'; + +SELECT concatWithSeparator('|', toFixedString('a', 1), 'b') == 'a|b'; +SELECT concatWithSeparator('|', toFixedString('a', 1), materialize('b')) == 'a|b'; +SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), 'b') == 'a|b'; +SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize('b')) == 'a|b'; + +SELECT concatWithSeparator('|', toFixedString('a', 1), toFixedString('b', 1)) == 'a|b'; +SELECT concatWithSeparator('|', toFixedString('a', 1), materialize(toFixedString('b', 1))) == 'a|b'; +SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), toFixedString('b', 1)) == 'a|b'; +SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize(toFixedString('b', 1))) == 'a|b'; + +SELECT concatWithSeparator(null, 'a', 'b') == null; +SELECT concatWithSeparator('1', null, 'b') == null; +SELECT concatWithSeparator('1', 'a', null) == null; + +-- Const String + non-const non-String/non-FixedString type' +SELECT concatWithSeparator('|', 'a', materialize(42 :: Int8)) == 'a|42'; +SELECT concatWithSeparator('|', 'a', materialize(43 :: Int16)) == 'a|43'; +SELECT concatWithSeparator('|', 'a', materialize(44 :: Int32)) == 'a|44'; +SELECT concatWithSeparator('|', 'a', materialize(45 :: Int64)) == 'a|45'; +SELECT concatWithSeparator('|', 'a', materialize(46 :: Int128)) == 'a|46'; +SELECT concatWithSeparator('|', 'a', materialize(47 :: Int256)) == 'a|47'; +SELECT concatWithSeparator('|', 'a', materialize(48 :: UInt8)) == 'a|48'; +SELECT concatWithSeparator('|', 'a', materialize(49 :: UInt16)) == 'a|49'; +SELECT concatWithSeparator('|', 'a', materialize(50 :: UInt32)) == 'a|50'; +SELECT concatWithSeparator('|', 'a', materialize(51 :: UInt64)) == 'a|51'; +SELECT concatWithSeparator('|', 'a', materialize(52 :: UInt128)) == 'a|52'; +SELECT concatWithSeparator('|', 'a', materialize(53 :: UInt256)) == 'a|53'; +SELECT concatWithSeparator('|', 'a', materialize(42.42 :: Float32)) == 'a|42.42'; +SELECT concatWithSeparator('|', 'a', materialize(43.43 :: Float64)) == 'a|43.43'; +SELECT concatWithSeparator('|', 'a', materialize(44.44 :: Decimal(2))) == 'a|44'; +SELECT concatWithSeparator('|', 'a', materialize(true :: Bool)) == 'a|true'; +SELECT concatWithSeparator('|', 'a', materialize(false :: Bool)) == 'a|false'; +SELECT concatWithSeparator('|', 'a', materialize('foo' :: String)) == 'a|foo'; +SELECT concatWithSeparator('|', 'a', materialize('bar' :: FixedString(3))) == 'a|bar'; +SELECT concatWithSeparator('|', 'a', materialize('foo' :: Nullable(String))) == 'a|foo'; +SELECT concatWithSeparator('|', 'a', materialize('bar' :: Nullable(FixedString(3)))) == 'a|bar'; +SELECT concatWithSeparator('|', 'a', materialize('foo' :: LowCardinality(String))) == 'a|foo'; +SELECT concatWithSeparator('|', 'a', materialize('bar' :: LowCardinality(FixedString(3)))) == 'a|bar'; +SELECT concatWithSeparator('|', 'a', materialize('foo' :: LowCardinality(Nullable(String)))) == 'a|foo'; +SELECT concatWithSeparator('|', 'a', materialize('bar' :: LowCardinality(Nullable(FixedString(3))))) == 'a|bar'; +SELECT concatWithSeparator('|', 'a', materialize(42 :: LowCardinality(Nullable(UInt32)))) == 'a|42'; +SELECT concatWithSeparator('|', 'a', materialize(42 :: LowCardinality(UInt32))) == 'a|42'; +SELECT concatWithSeparator('|', 'a', materialize('fae310ca-d52a-4923-9e9b-02bf67f4b009' :: UUID)) == 'a|fae310ca-d52a-4923-9e9b-02bf67f4b009'; +SELECT concatWithSeparator('|', 'a', materialize('2023-11-14' :: Date)) == 'a|2023-11-14'; +SELECT concatWithSeparator('|', 'a', materialize('2123-11-14' :: Date32)) == 'a|2123-11-14'; +SELECT concatWithSeparator('|', 'a', materialize('2023-11-14 05:50:12' :: DateTime('Europe/Amsterdam'))) == 'a|2023-11-14 05:50:12'; +SELECT concatWithSeparator('|', 'a', materialize('hallo' :: Enum('hallo' = 1))) == 'a|hallo'; +SELECT concatWithSeparator('|', 'a', materialize(['foo', 'bar'] :: Array(String))) == 'a|[\'foo\',\'bar\']'; +SELECT concatWithSeparator('|', 'a', materialize((42, 'foo') :: Tuple(Int32, String))) == 'a|(42,\'foo\')'; +SELECT concatWithSeparator('|', 'a', materialize(map(42, 'foo') :: Map(Int32, String))) == 'a|{42:\'foo\'}'; +SELECT concatWithSeparator('|', 'a', materialize('122.233.64.201' :: IPv4)) == 'a|122.233.64.201'; +SELECT concatWithSeparator('|', 'a', materialize('2001:0001:130F:0002:0003:09C0:876A:130B' :: IPv6)) == 'a|2001:0001:130F:0002:0003:09C0:876A:130B'; diff --git a/tests/queries/0_stateless/02661_quantile_approx.reference b/tests/queries/0_stateless/02661_quantile_approx.reference index 8369363aa9b2..0ee846a268bd 100644 --- a/tests/queries/0_stateless/02661_quantile_approx.reference +++ b/tests/queries/0_stateless/02661_quantile_approx.reference @@ -19,6 +19,20 @@ select quantilesGK(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(numbe [99,199,249,313,776] select quantilesGK(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); [100,200,250,314,777] +SELECT quantileGKMerge(100, 0.5)(x) +FROM +( + SELECT quantileGKState(100, 0.5)(number + 1) AS x + FROM numbers(49999) +); +24902 +SELECT quantilesGKMerge(100, 0.5, 0.9, 0.99)(x) +FROM +( + SELECT quantilesGKState(100, 0.5, 0.9, 0.99)(number + 1) AS x + FROM numbers(49999) +); +[24902,44518,49999] select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS } select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02661_quantile_approx.sql b/tests/queries/0_stateless/02661_quantile_approx.sql index 52c2979ad444..c0004260fa16 100644 --- a/tests/queries/0_stateless/02661_quantile_approx.sql +++ b/tests/queries/0_stateless/02661_quantile_approx.sql @@ -15,6 +15,19 @@ select quantilesGK(100, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number select quantilesGK(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); select quantilesGK(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); +SELECT quantileGKMerge(100, 0.5)(x) +FROM +( + SELECT quantileGKState(100, 0.5)(number + 1) AS x + FROM numbers(49999) +); + +SELECT quantilesGKMerge(100, 0.5, 0.9, 0.99)(x) +FROM +( + SELECT quantilesGKState(100, 0.5, 0.9, 0.99)(number + 1) AS x + FROM numbers(49999) +); select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS } select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/02676_trailing_commas.reference b/tests/queries/0_stateless/02676_trailing_commas.reference index 76d173ca23e6..cfb2ccd6a0f6 100644 --- a/tests/queries/0_stateless/02676_trailing_commas.reference +++ b/tests/queries/0_stateless/02676_trailing_commas.reference @@ -3,3 +3,6 @@ 1 1 2 0 1 +(1,'foo') +(1,'foo') +(1,(2,'foo')) diff --git a/tests/queries/0_stateless/02676_trailing_commas.sql b/tests/queries/0_stateless/02676_trailing_commas.sql index 048405c4d20a..7fb64bb57a35 100644 --- a/tests/queries/0_stateless/02676_trailing_commas.sql +++ b/tests/queries/0_stateless/02676_trailing_commas.sql @@ -3,3 +3,7 @@ SELECT 1, FROM numbers(1); WITH 1 as a SELECT a, FROM numbers(1); WITH 1 as from SELECT from, from + from, from in [0], FROM numbers(1); SELECT n, FROM (SELECT 1 AS n); +SELECT (1, 'foo')::Tuple(a Int, b String,); +SELECT (1, 'foo')::Tuple(a Int, b String,,); -- { clientError SYNTAX_ERROR } +SELECT (1, 'foo')::Tuple(Int, String,); +SELECT (1, (2,'foo'))::Tuple(Int, Tuple(Int, String,),); diff --git a/tests/queries/0_stateless/02720_row_policy_column_with_dots.reference b/tests/queries/0_stateless/02720_row_policy_column_with_dots.reference index dd2c30cc9f8b..d00491fd7e5b 100644 --- a/tests/queries/0_stateless/02720_row_policy_column_with_dots.reference +++ b/tests/queries/0_stateless/02720_row_policy_column_with_dots.reference @@ -1 +1 @@ -2024-01-01 Hello World +1 diff --git a/tests/queries/0_stateless/02720_row_policy_column_with_dots.sql b/tests/queries/0_stateless/02720_row_policy_column_with_dots.sql index 361bd0e0ec74..fcb0bf62859b 100644 --- a/tests/queries/0_stateless/02720_row_policy_column_with_dots.sql +++ b/tests/queries/0_stateless/02720_row_policy_column_with_dots.sql @@ -1,6 +1,6 @@ -CREATE table if not exists table_with_dot_column (date Date, regular_column String, `other_column.2` String) ENGINE = MergeTree() ORDER BY date; -INSERT INTO table_with_dot_column select '2020-01-01', 'Hello', 'World'; -INSERT INTO table_with_dot_column select '2024-01-01', 'Hello', 'World'; +CREATE TABLE IF NOT EXISTS table_with_dot_column (date Date, regular_column String, `other_column.2` String) ENGINE = MergeTree() ORDER BY date; +INSERT INTO table_with_dot_column SELECT '2020-01-01', 'Hello', 'World'; +INSERT INTO table_with_dot_column SELECT toDate(now() + 48*3600), 'Hello', 'World'; CREATE ROW POLICY IF NOT EXISTS row_policy ON table_with_dot_column USING toDate(date) >= today() - 30 TO ALL; -SELECT * FROM table_with_dot_column; +SELECT count(*) FROM table_with_dot_column; DROP TABLE table_with_dot_column; diff --git a/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql index a9a6d3058b29..88561f9d895d 100644 --- a/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql +++ b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-ubsan, no-msan, no-cpu-aarch64 +-- Tags: no-fasttest, no-cpu-aarch64, no-msan drop table if exists dummy; CREATE TABLE dummy ( num1 Int32, num2 Enum8('foo' = 0, 'bar' = 1, 'tar' = 2) ) diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql index 98e78045b85e..1ae24e4f3dad 100644 --- a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -6,7 +6,8 @@ CREATE TABLE t_async_inserts_flush (a UInt64) ENGINE = Memory; SET async_insert = 1; SET wait_for_async_insert = 0; -SET async_insert_busy_timeout_ms = 1000000; +SET async_insert_busy_timeout_min_ms = 1000000; +SET async_insert_busy_timeout_max_ms = 10000000; INSERT INTO t_async_inserts_flush VALUES (1) (2); INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 10} {"a": 20}; diff --git a/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh b/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh index 804cd894ebc9..57950af89754 100755 --- a/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh +++ b/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS 02810_async_insert_dedup_collapsing" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE 02810_async_insert_dedup_collapsing (stringvalue String, sign Int8) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/{database}/02810_async_insert_dedup', 'r1', sign) ORDER BY stringvalue" -url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=3000&async_insert_deduplicate=1" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=3000&async_insert_use_adaptive_busy_timeout=0&async_insert_deduplicate=1" # insert value with same key and sign so it's collapsed on insert ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO 02810_async_insert_dedup_collapsing VALUES ('string1', 1)" & @@ -36,4 +36,4 @@ wait ${CLICKHOUSE_CLIENT} -q "SELECT stringvalue FROM 02810_async_insert_dedup_collapsing ORDER BY stringvalue" ${CLICKHOUSE_CLIENT} -q "SELECT '------------'" -${CLICKHOUSE_CLIENT} -q "DROP TABLE 02810_async_insert_dedup_collapsing" \ No newline at end of file +${CLICKHOUSE_CLIENT} -q "DROP TABLE 02810_async_insert_dedup_collapsing" diff --git a/tests/queries/0_stateless/02813_seriesDecomposeSTL.reference b/tests/queries/0_stateless/02813_seriesDecomposeSTL.reference index dc30e7f8371f..28dae705335e 100644 --- a/tests/queries/0_stateless/02813_seriesDecomposeSTL.reference +++ b/tests/queries/0_stateless/02813_seriesDecomposeSTL.reference @@ -1,4 +1,4 @@ -[[-13.529999,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.530001,-3.18,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1799994,16.71,-13.529999,-3.1799994,16.709997],[23.63,23.63,23.630003,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.630001,23.630001,23.630001,23.630003],[0,0.0000019073486,-0.0000019073486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0000019073486,0,0]] -[[4.04452e-8,-1.7846537e-8,-5.9488454e-9,0,0,0,0,0,0,-1.9868216e-8,-9.5297715e-8,2.2540547e-9,3.4229203e-8,8.573613e-8],[1.9999999,2,2,2,2,2,2,2,2,2,2,2,1.9999996,1.9999996],[1.1920929e-7,0,0,0,0,0,0,0,0,0,0,0,3.5762787e-7,2.3841858e-7]] -[[-13.529999,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.530001,-3.18,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1799994,16.71,-13.529999,-3.1799994,16.709997],[23.63,23.63,23.630003,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.630001,23.630001,23.630001,23.630003],[0,0.0000019073486,-0.0000019073486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0000019073486,0,0]] -[[53.946846,-4.8119445,43.525013,-23.71359,-42.472305,-51.636955,-50.458298,-51.982674,37.62072,-15.9006605,56.65076,-5.809669,57.143845,-2.0370207,54.050922,-4.897961,43.954018,-23.808758,-42.651337,-51.86827,-50.709732,-52.18156,37.734905,-15.853402,56.91643,-5.8815174,57.253094,-2.012879,54.157806,-4.9817176,44.384747,-23.902956,-42.830154,-52.10025,-50.96271,-52.3829,37.84573,-15.81032,57.177113,-5.958963,57.356136,-1.9952412,54.27533,-5.066312,44.878296,-23.956438,-42.993656,-52.337124,-51.208073,-52.615646,37.91102,-15.8062525,57.49891,-6.056076,57.45604,-1.9797823,54.39525,-5.1483474,45.374573],[88.028534,88.95315,89.87776,90.802376,91.64913,92.49588,93.342636,94.19737,95.0521,95.90684,96.712975,97.51912,98.32526,98.36342,98.40158,98.43974,98.36777,98.29579,98.223816,98.536446,98.849075,99.161705,99.7552,100.348694,100.94219,101.53184,102.12149,102.711136,103.79921,104.88729,105.975365,107.50462,109.033875,110.56313,111.79767,113.032196,114.26673,115.02128,115.775826,116.53037,117.15541,117.78044,118.40548,118.86489,119.3243,119.783714,120.04031,120.29691,120.55351,120.78621,121.01891,121.25161,121.533585,121.81555,122.09753,122.41821,122.7389,123.059586,123.39267],[-2.97538,2.8587952,-23.402771,0.91121674,4.8231735,9.141075,8.115662,10.785301,0.32717896,5.99382,-12.363731,5.29055,0.53089905,-2.3264008,-3.4524994,1.4582214,-2.321785,2.51297,5.4275208,3.3318253,5.8606567,0.019859314,-4.4901123,-12.495293,-5.8586197,-1.650322,-11.374588,4.3017426,4.042984,1.094429,9.639885,3.3983307,-3.20372,-5.462883,-5.834961,-6.649292,-1.1124649,3.7890396,16.047066,-2.5714111,8.488449,-2.785202,2.319191,-0.79857635,13.797401,-5.827278,-6.0466614,-5.9597855,-7.3454437,-3.1705627,6.0700684,3.5546417,1.9675064,-0.7594757,2.446434,0.5615692,0.86585236,-3.9112396,1.2327576]] +[[-13.529999,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.530001,-3.18,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1799994,16.71,-13.529999,-3.1799994,16.709997],[23.63,23.63,23.630003,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.630001,23.630001,23.630001,23.630003],[0,0.0000019073486,-0.0000019073486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0000019073486,0,0],[10.1,20.449999,40.340004,10.100001,20.45,40.34,10.100001,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.100002,20.45,40.34]] +[[4.04452e-8,-1.7846537e-8,-5.9488454e-9,0,0,0,0,0,0,-1.9868216e-8,-9.5297715e-8,2.2540547e-9,3.4229203e-8,8.573613e-8],[1.9999999,2,2,2,2,2,2,2,2,2,2,2,1.9999996,1.9999996],[1.1920929e-7,0,0,0,0,0,0,0,0,0,0,0,3.5762787e-7,2.3841858e-7],[1.9999999,2,2,2,2,2,2,2,2,2,1.9999999,2,1.9999996,1.9999998]] +[[-13.529999,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.530001,-3.18,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1799994,16.71,-13.529999,-3.1799994,16.709997],[23.63,23.63,23.630003,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.630001,23.630001,23.630001,23.630003],[0,0.0000019073486,-0.0000019073486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0000019073486,0,0],[10.1,20.449999,40.340004,10.100001,20.45,40.34,10.100001,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.100002,20.45,40.34]] +[[53.946846,-4.8119445,43.525013,-23.71359,-42.472305,-51.636955,-50.458298,-51.982674,37.62072,-15.9006605,56.65076,-5.809669,57.143845,-2.0370207,54.050922,-4.897961,43.954018,-23.808758,-42.651337,-51.86827,-50.709732,-52.18156,37.734905,-15.853402,56.91643,-5.8815174,57.253094,-2.012879,54.157806,-4.9817176,44.384747,-23.902956,-42.830154,-52.10025,-50.96271,-52.3829,37.84573,-15.81032,57.177113,-5.958963,57.356136,-1.9952412,54.27533,-5.066312,44.878296,-23.956438,-42.993656,-52.337124,-51.208073,-52.615646,37.91102,-15.8062525,57.49891,-6.056076,57.45604,-1.9797823,54.39525,-5.1483474,45.374573],[88.028534,88.95315,89.87776,90.802376,91.64913,92.49588,93.342636,94.19737,95.0521,95.90684,96.712975,97.51912,98.32526,98.36342,98.40158,98.43974,98.36777,98.29579,98.223816,98.536446,98.849075,99.161705,99.7552,100.348694,100.94219,101.53184,102.12149,102.711136,103.79921,104.88729,105.975365,107.50462,109.033875,110.56313,111.79767,113.032196,114.26673,115.02128,115.775826,116.53037,117.15541,117.78044,118.40548,118.86489,119.3243,119.783714,120.04031,120.29691,120.55351,120.78621,121.01891,121.25161,121.533585,121.81555,122.09753,122.41821,122.7389,123.059586,123.39267],[-2.97538,2.8587952,-23.402771,0.91121674,4.8231735,9.141075,8.115662,10.785301,0.32717896,5.99382,-12.363731,5.29055,0.53089905,-2.3264008,-3.4524994,1.4582214,-2.321785,2.51297,5.4275208,3.3318253,5.8606567,0.019859314,-4.4901123,-12.495293,-5.8586197,-1.650322,-11.374588,4.3017426,4.042984,1.094429,9.639885,3.3983307,-3.20372,-5.462883,-5.834961,-6.649292,-1.1124649,3.7890396,16.047066,-2.5714111,8.488449,-2.785202,2.319191,-0.79857635,13.797401,-5.827278,-6.0466614,-5.9597855,-7.3454437,-3.1705627,6.0700684,3.5546417,1.9675064,-0.7594757,2.446434,0.5615692,0.86585236,-3.9112396,1.2327576],[141.97537,84.141205,133.40277,67.08878,49.176826,40.858925,42.88434,42.2147,132.67282,80.00618,153.36374,91.70945,155.4691,96.3264,152.4525,93.54178,142.32178,74.48703,55.57248,46.668175,48.139343,46.980145,137.49011,84.49529,157.85863,95.65032,159.37459,100.69826,157.95702,99.90557,150.3601,83.60167,66.20372,58.462883,60.834957,60.649296,152.11246,99.21096,172.95294,110.57141,174.51155,115.7852,172.68082,113.79858,164.2026,95.82728,77.04666,67.95979,69.34544,68.17056,158.92993,105.44536,179.0325,115.759476,179.55356,120.43843,177.13416,117.91124,168.76724]] diff --git a/tests/queries/0_stateless/02841_group_array_sorted.reference b/tests/queries/0_stateless/02841_group_array_sorted.reference new file mode 100644 index 000000000000..1043f949590f --- /dev/null +++ b/tests/queries/0_stateless/02841_group_array_sorted.reference @@ -0,0 +1,12 @@ +[0,1,2,3,4] +[0,1,2,3,4] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99] +['0','1','10','11','12','13','14','15','16','17','18','19','2','20','21','22','23','24','25','26','27','28','29','3','4','5','6','7','8','9'] +[0,0,1,1,2,2,3,3,4,4] +[[1,2,3,4],[2,3,4,5],[3,4,5,6]] +[(2,1),(15,25),(30,60),(100,200)] +[0.2,2.2,6.6,12.5] +['AAA','Aaa','aaa','abc','bbc'] +1000000 +1000000 +[0,1] diff --git a/tests/queries/0_stateless/02841_group_array_sorted.sql b/tests/queries/0_stateless/02841_group_array_sorted.sql new file mode 100644 index 000000000000..a8cd6791ff3d --- /dev/null +++ b/tests/queries/0_stateless/02841_group_array_sorted.sql @@ -0,0 +1,41 @@ +SELECT groupArraySorted(5)(number) FROM numbers(100); + +SELECT groupArraySorted(10)(number) FROM numbers(5); + +SELECT groupArraySorted(100)(number) FROM numbers(1000); + +SELECT groupArraySorted(30)(str) FROM (SELECT toString(number) as str FROM numbers(30)); + +SELECT groupArraySorted(10)(toInt64(number/2)) FROM numbers(100); + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Array(UInt64)) engine=MergeTree ORDER BY a; +INSERT INTO test VALUES ([3,4,5,6]), ([1,2,3,4]), ([2,3,4,5]); +SELECT groupArraySorted(3)(a) FROM test; +DROP TABLE test; + +CREATE TABLE IF NOT EXISTS test (id Int32, data Tuple(Int32, Int32)) ENGINE = MergeTree() ORDER BY id; +INSERT INTO test (id, data) VALUES (1, (100, 200)), (2, (15, 25)), (3, (2, 1)), (4, (30, 60)); +SELECT groupArraySorted(4)(data) FROM test; +DROP TABLE test; + +CREATE TABLE IF NOT EXISTS test (id Int32, data Decimal32(2)) ENGINE = MergeTree() ORDER BY id; +INSERT INTO test (id, data) VALUES (1, 12.5), (2, 0.2), (3, 6.6), (4, 2.2); +SELECT groupArraySorted(4)(data) FROM test; +DROP TABLE test; + +CREATE TABLE IF NOT EXISTS test (id Int32, data FixedString(3)) ENGINE = MergeTree() ORDER BY id; +INSERT INTO test (id, data) VALUES (1, 'AAA'), (2, 'bbc'), (3, 'abc'), (4, 'aaa'), (5, 'Aaa'); +SELECT groupArraySorted(5)(data) FROM test; +DROP TABLE test; + +CREATE TABLE test (id Decimal(76, 53), str String) ENGINE = MergeTree ORDER BY id; +INSERT INTO test SELECT number, 'test' FROM numbers(1000000); +SELECT count(id) FROM test; +SELECT count(concat(toString(id), 'a')) FROM test; +DROP TABLE test; + +CREATE TABLE test (id UInt64, agg AggregateFunction(groupArraySorted(2), UInt64)) engine=MergeTree ORDER BY id; +INSERT INTO test SELECT 1, groupArraySortedState(2)(number) FROM numbers(10); +SELECT groupArraySortedMerge(2)(agg) FROM test; +DROP TABLE test; diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.sh b/tests/queries/0_stateless/02841_not_ready_set_bug.sh index fd7f62d28bf2..3aaffe515789 100755 --- a/tests/queries/0_stateless/02841_not_ready_set_bug.sh +++ b/tests/queries/0_stateless/02841_not_ready_set_bug.sh @@ -9,3 +9,4 @@ $CLICKHOUSE_CLIENT -q "create table t1 (number UInt64) engine = MergeTree order $CLICKHOUSE_CLIENT -q "insert into t1 select number from numbers(10);" $CLICKHOUSE_CLIENT --max_threads=2 --max_result_rows=1 --result_overflow_mode=break -q "with tab as (select min(number) from t1 prewhere number in (select number from view(select number, row_number() OVER (partition by number % 2 ORDER BY number DESC) from numbers_mt(1e4)) where number != 2 order by number)) select number from t1 union all select * from tab;" > /dev/null +$CLICKHOUSE_CLIENT -q "SELECT * FROM system.tables WHERE 1 in (SELECT number from numbers(2)) AND database = currentDatabase() format Null" diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh index 82e2bb709f90..7f5830873366 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT -n -q " CREATE TABLE t_async_insert_native_1 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " -async_insert_options="--async_insert 1 --wait_for_async_insert 0 --async_insert_busy_timeout_ms 1000000" +async_insert_options="--async_insert 1 --wait_for_async_insert 0 --async_insert_busy_timeout_min_ms 1000000 --async_insert_busy_timeout_max_ms 10000000" echo '{"id": 1, "s": "aaa"} {"id": 2, "s": "bbb"}' | $CLICKHOUSE_CLIENT $async_insert_options -q 'INSERT INTO t_async_insert_native_1 FORMAT JSONEachRow' $CLICKHOUSE_CLIENT $async_insert_options -q 'INSERT INTO t_async_insert_native_1 FORMAT JSONEachRow {"id": 3, "s": "ccc"}' diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh index abe6be9e2bc6..c9d399607d03 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT -n -q " CREATE TABLE t_async_insert_native_3 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " -async_insert_options="--async_insert 1 --wait_for_async_insert 0 --async_insert_busy_timeout_ms 1000000" +async_insert_options="--async_insert 1 --wait_for_async_insert 0 --async_insert_busy_timeout_min_ms 1000000 --async_insert_busy_timeout_max_ms 10000000" echo '{"id": 1, "s": "aaa"} {"id": 2, "s": "bbb"}' | $CLICKHOUSE_CLIENT $async_insert_options -q 'INSERT INTO t_async_insert_native_3 FORMAT JSONEachRow' echo "(3, 'ccc') (4, 'ddd') (5, 'eee')" | $CLICKHOUSE_CLIENT $async_insert_options -q 'INSERT INTO t_async_insert_native_3 FORMAT Values' diff --git a/tests/queries/0_stateless/02884_async_insert_skip_settings.sql b/tests/queries/0_stateless/02884_async_insert_skip_settings.sql index facd39d10790..9bc689fb4ecb 100644 --- a/tests/queries/0_stateless/02884_async_insert_skip_settings.sql +++ b/tests/queries/0_stateless/02884_async_insert_skip_settings.sql @@ -9,7 +9,8 @@ ORDER BY id; SET async_insert = 1; SET async_insert_deduplicate = 1; SET wait_for_async_insert = 0; -SET async_insert_busy_timeout_ms = 100000; +SET async_insert_busy_timeout_min_ms = 100000; +SET async_insert_busy_timeout_max_ms = 1000000; SET insert_deduplication_token = '1'; SET log_comment = 'async_insert_skip_settings_1'; diff --git a/tests/queries/0_stateless/02884_authentication_quota.reference b/tests/queries/0_stateless/02884_authentication_quota.reference new file mode 100644 index 000000000000..638034bab829 --- /dev/null +++ b/tests/queries/0_stateless/02884_authentication_quota.reference @@ -0,0 +1,54 @@ +> Drop the user, quota, and role if those were created. +> Create the user with quota with the maximum single authentication attempt. +> Check if the quota has been created. +1 +> Try to login to the user account with correct password +> Login to the user account using the wrong password. +password is incorrect +> Quota is exceeded 1 >= 1. Login with correct password should fail. +QUOTA_EXCEEDED +> Check the failed_sequential_authentications, max_failed_sequential_authentications fields. +2 1 +> Alter the quota with MAX FAILED SEQUENTIAL AUTHENTICATIONS = 4 +> Try to login to the user account with correct password +> Successfull login should reset failed authentications counter. Check the failed_sequential_authentications, max_failed_sequential_authentications fields. +0 4 +> Login to the user account using the wrong password before exeeding the quota. +password is incorrect +password is incorrect +password is incorrect +password is incorrect +QUOTA_EXCEEDED +> Also try to login with correct password. Quota should stay exceeded. +QUOTA_EXCEEDED +> Check the failed_sequential_authentications, max_failed_sequential_authentications fields. +6 4 +> Reset the quota by increasing MAX FAILED SEQUENTIAL AUTHENTICATIONS and succesfull login +> and check failed_sequential_authentications, max_failed_sequential_authentications. +0 7 + --------------------------------------------------------------------------- +> Create the role with quota with the maximum single authentication attempt. +> Try to login to the user account with correct password +> Login to the user account using the wrong password. +password is incorrect +> Quota is exceeded 1 >= 1. Login with correct password should fail. +QUOTA_EXCEEDED +> Check the failed_sequential_authentications, max_failed_sequential_authentications fields. +2 1 +> Alter the quota with MAX FAILED SEQUENTIAL AUTHENTICATIONS = 4 +> Try to login to the user account with correct password +> Successfull login should reset failed authentications counter. Check the failed_sequential_authentications, max_failed_sequential_authentications fields. +0 4 +> Login to the user account using the wrong password before exeeding the quota. +password is incorrect +password is incorrect +password is incorrect +password is incorrect +QUOTA_EXCEEDED +> Also try to login with correct password. Quota should stay exceeded. +QUOTA_EXCEEDED +> Check the failed_sequential_authentications, max_failed_sequential_authentications fields. +6 4 +> Reset the quota by increasing MAX FAILED SEQUENTIAL AUTHENTICATIONS and succesfull login +> and check failed_sequential_authentications, max_failed_sequential_authentications. +0 7 diff --git a/tests/queries/0_stateless/02884_authentication_quota.sh b/tests/queries/0_stateless/02884_authentication_quota.sh new file mode 100755 index 000000000000..f013bb4d6397 --- /dev/null +++ b/tests/queries/0_stateless/02884_authentication_quota.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +QUOTA="2884_quota_$$" +USER="2884_user_$$" +ROLE="2884_role_$$" + + +function login_test() +{ + echo "> Try to login to the user account with correct password" + ${CLICKHOUSE_CLIENT} --user ${USER} --password "pass" --query "select 1 format Null" + + echo "> Login to the user account using the wrong password." + ${CLICKHOUSE_CLIENT} --user ${USER} --password "wrong_pass" --query "select 1 format Null" 2>&1 | grep -m1 -o 'password is incorrect' + + echo "> Quota is exceeded 1 >= 1. Login with correct password should fail." + ${CLICKHOUSE_CLIENT} --user ${USER} --password "pass" --query "select 1 format Null" 2>&1 | grep -m1 -o 'QUOTA_EXCEEDED' + + echo "> Check the failed_sequential_authentications, max_failed_sequential_authentications fields." + ${CLICKHOUSE_CLIENT} -q "SELECT failed_sequential_authentications, max_failed_sequential_authentications FROM system.quotas_usage WHERE quota_name = '${QUOTA}'" + + echo "> Alter the quota with MAX FAILED SEQUENTIAL AUTHENTICATIONS = 4" + ${CLICKHOUSE_CLIENT} -q "ALTER QUOTA ${QUOTA} FOR INTERVAL 100 YEAR MAX FAILED SEQUENTIAL AUTHENTICATIONS = 4 TO ${USER}" + + echo "> Try to login to the user account with correct password" + ${CLICKHOUSE_CLIENT} --user ${USER} --password "pass" --query "select 1 format Null" + + echo "> Successfull login should reset failed authentications counter. Check the failed_sequential_authentications, max_failed_sequential_authentications fields." + ${CLICKHOUSE_CLIENT} -q "SELECT failed_sequential_authentications, max_failed_sequential_authentications FROM system.quotas_usage WHERE quota_name = '${QUOTA}'" + + echo "> Login to the user account using the wrong password before exeeding the quota." + ${CLICKHOUSE_CLIENT} --user ${USER} --password "wrong_pass" --query "select 1 format Null" 2>&1 | grep -m1 -o 'password is incorrect' + ${CLICKHOUSE_CLIENT} --user ${USER} --password "wrong_pass" --query "select 1 format Null" 2>&1 | grep -m1 -o 'password is incorrect' + ${CLICKHOUSE_CLIENT} --user ${USER} --password "wrong_pass" --query "select 1 format Null" 2>&1 | grep -m1 -o 'password is incorrect' + ${CLICKHOUSE_CLIENT} --user ${USER} --password "wrong_pass" --query "select 1 format Null" 2>&1 | grep -m1 -o 'password is incorrect' + ${CLICKHOUSE_CLIENT} --user ${USER} --password "wrong_pass" --query "select 1 format Null" 2>&1 | grep -m1 -o 'QUOTA_EXCEEDED' + + echo "> Also try to login with correct password. Quota should stay exceeded." + ${CLICKHOUSE_CLIENT} --user ${USER} --password "pass" --query "select 1 format Null" 2>&1 | grep -m1 -o 'QUOTA_EXCEEDED' + + echo "> Check the failed_sequential_authentications, max_failed_sequential_authentications fields." + ${CLICKHOUSE_CLIENT} -q "SELECT failed_sequential_authentications, max_failed_sequential_authentications FROM system.quotas_usage WHERE quota_name = '${QUOTA}'" + + echo "> Reset the quota by increasing MAX FAILED SEQUENTIAL AUTHENTICATIONS and succesfull login" + echo "> and check failed_sequential_authentications, max_failed_sequential_authentications." + ${CLICKHOUSE_CLIENT} -q "ALTER QUOTA ${QUOTA} FOR INTERVAL 100 YEAR MAX FAILED SEQUENTIAL AUTHENTICATIONS = 7 TO ${USER}" + ${CLICKHOUSE_CLIENT} --user ${USER} --password "pass" --query "select 1 format Null" + ${CLICKHOUSE_CLIENT} -q "SELECT failed_sequential_authentications, max_failed_sequential_authentications FROM system.quotas_usage WHERE quota_name = '${QUOTA}'" +} + +echo "> Drop the user, quota, and role if those were created." +${CLICKHOUSE_CLIENT} -q "DROP USER IF EXISTS ${USER}" +${CLICKHOUSE_CLIENT} -q "DROP QUOTA IF EXISTS ${QUOTA}" +${CLICKHOUSE_CLIENT} -q "DROP ROLE IF EXISTS ${ROLE}" + +echo "> Create the user with quota with the maximum single authentication attempt." +${CLICKHOUSE_CLIENT} -q "CREATE USER ${USER} IDENTIFIED WITH plaintext_password BY 'pass'" +${CLICKHOUSE_CLIENT} -q "CREATE QUOTA ${QUOTA} FOR INTERVAL 100 YEAR MAX FAILED SEQUENTIAL AUTHENTICATIONS = 1 TO ${USER}" + +echo "> Check if the quota has been created." +${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.quotas WHERE name = '${QUOTA}'" + +login_test + +echo " ---------------------------------------------------------------------------" +echo "> Create the role with quota with the maximum single authentication attempt." +${CLICKHOUSE_CLIENT} -q "CREATE ROLE ${ROLE}" +${CLICKHOUSE_CLIENT} -q "GRANT ALL ON *.* TO ${ROLE}" +${CLICKHOUSE_CLIENT} -q "GRANT ${ROLE} to ${USER}" +${CLICKHOUSE_CLIENT} -q "ALTER QUOTA ${QUOTA} FOR INTERVAL 100 YEAR MAX FAILED SEQUENTIAL AUTHENTICATIONS = 1 TO ${ROLE}" + +login_test + +${CLICKHOUSE_CLIENT} -q "DROP USER IF EXISTS ${USER}" +${CLICKHOUSE_CLIENT} -q "DROP QUOTA IF EXISTS ${QUOTA}" +${CLICKHOUSE_CLIENT} -q "DROP ROLE IF EXISTS ${ROLE}" diff --git a/tests/queries/0_stateless/02919_ddsketch_quantile.sql b/tests/queries/0_stateless/02919_ddsketch_quantile.sql index 99eace15d2d2..d98978c117ef 100644 --- a/tests/queries/0_stateless/02919_ddsketch_quantile.sql +++ b/tests/queries/0_stateless/02919_ddsketch_quantile.sql @@ -1,23 +1,23 @@ SELECT '1'; -- simple test -SELECT round(quantileDDSketch(0.01, 0.5)(number), 2) FROM numbers(200); -SELECT round(quantileDDSketch(0.0001, 0.69)(number), 2) FROM numbers(500); -SELECT round(quantileDDSketch(0.003, 0.42)(number), 2) FROM numbers(200); -SELECT round(quantileDDSketch(0.02, 0.99)(number), 2) FROM numbers(500); +SELECT round(quantileDD(0.01, 0.5)(number), 2) FROM numbers(200); +SELECT round(quantileDD(0.0001, 0.69)(number), 2) FROM numbers(500); +SELECT round(quantileDD(0.003, 0.42)(number), 2) FROM numbers(200); +SELECT round(quantileDD(0.02, 0.99)(number), 2) FROM numbers(500); SELECT '2'; -- median is close to 0 -SELECT round(quantileDDSketch(0.01, 0.5)(number), 2) +SELECT round(quantileDD(0.01, 0.5)(number), 2) FROM ( SELECT arrayJoin([toInt64(number), number - 10]) AS number FROM numbers(0, 10) ); -SELECT round(quantileDDSketch(0.01, 0.5)(number - 10), 2) FROM numbers(21); +SELECT round(quantileDD(0.01, 0.5)(number - 10), 2) FROM numbers(21); SELECT '3'; -- all values are negative -SELECT round(quantileDDSketch(0.01, 0.99)(-number), 2) FROM numbers(1, 500); +SELECT round(quantileDD(0.01, 0.99)(-number), 2) FROM numbers(1, 500); SELECT '4'; -- min and max values of integer types (-2^63, 2^63-1) -SELECT round(quantileDDSketch(0.01, 0.5)(number), 2) +SELECT round(quantileDD(0.01, 0.5)(number), 2) FROM ( SELECT arrayJoin([toInt64(number), number - 9223372036854775808, toInt64(number + 9223372036854775798)]) AS number @@ -25,7 +25,7 @@ FROM ); SELECT '5'; -- min and max values of floating point types -SELECT round(quantileDDSketch(0.01, 0.42)(number), 2) +SELECT round(quantileDD(0.01, 0.42)(number), 2) FROM ( SELECT arrayJoin([toFloat32(number), number - 3.4028235e+38, toFloat32(number + 3.4028235e+38)]) AS number @@ -33,7 +33,7 @@ FROM ); SELECT '6'; -- denormalized floats -SELECT round(quantileDDSketch(0.01, 0.69)(number), 2) +SELECT round(quantileDD(0.01, 0.69)(number), 2) FROM ( SELECT arrayJoin([toFloat32(number), number - 1.1754944e-38, toFloat32(number + 1.1754944e-38)]) AS number @@ -41,7 +41,7 @@ FROM ); SELECT '7'; -- NaNs -SELECT round(quantileDDSketch(0.01, 0.5)(number), 2) +SELECT round(quantileDD(0.01, 0.5)(number), 2) FROM ( SELECT arrayJoin([toFloat32(number), NaN * number]) AS number @@ -50,7 +50,7 @@ FROM SELECT '8'; -- sparse sketch -SELECT round(quantileDDSketch(0.01, 0.75)(number), 2) +SELECT round(quantileDD(0.01, 0.75)(number), 2) FROM ( SELECT number * 1e7 AS number @@ -63,11 +63,11 @@ DROP TABLE IF EXISTS `02919_ddsketch_quantile`; CREATE TABLE `02919_ddsketch_quantile` ENGINE = Log AS -SELECT quantilesDDSketchState(0.001, 0.9)(number) AS sketch +SELECT quantilesDDState(0.001, 0.9)(number) AS sketch FROM numbers(1000); -INSERT INTO `02919_ddsketch_quantile` SELECT quantilesDDSketchState(0.001, 0.9)(number + 1000) +INSERT INTO `02919_ddsketch_quantile` SELECT quantilesDDState(0.001, 0.9)(number + 1000) FROM numbers(1000); -SELECT arrayMap(a -> round(a, 2), (quantilesDDSketchMerge(0.001, 0.9)(sketch))) +SELECT arrayMap(a -> round(a, 2), (quantilesDDMerge(0.001, 0.9)(sketch))) FROM `02919_ddsketch_quantile`; diff --git a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference index f9c7b26d245f..feebf7dbf492 100644 --- a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference +++ b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference @@ -1,13 +1,97 @@ +\N +\N +[nan,nan] +\N [nan,nan] 0 \N 0 0 +0 +\N +\N + +\N +[nan,nan] +[nan,nan] + +[nan,nan] +-- notinhgs: +\N Nullable(Nothing) +0 UInt8 +\N Nullable(Nothing) +0 UInt64 +0 UInt64 +0 UInt64 +\N Nullable(Nothing) +\N Nullable(Nothing) +\N Nullable(Nothing) +-- quantile: +\N +\N +\N +\N +1 +1 +1 +-- quantiles: +[nan,nan] +[nan,nan] +[nan,nan] +[nan,nan] +[1,1] +[1,1] +[1,1] +-- nothing: +\N +\N +\N +\N +0 +0 +0 +-- nothing(UInt64): +0 +0 +0 +0 0 0 +0 +-- nothing(Nullable(Nothing)): +\N +\N +\N +\N +\N +\N +\N +-- sum: +\N +\N +\N +\N +6 +6 +6 +-- count: +0 +0 +0 +0 +6 +6 +6 +0 0 0 \N +0 \N +0 \N + 0 \N 0 0 +\N \N [nan,nan] + +\N \N [nan,nan] diff --git a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql index a064c091df05..5b9343c6e13b 100644 --- a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql +++ b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql @@ -1,7 +1,87 @@ -#!/usr/bin/env -S ${HOME}/clickhouse-client --progress --queries-file +select sum(NULL); +select quantile(0.5)(NULL); +select quantiles(0.1, 0.2)(NULL :: Nullable(UInt32)); +select quantile(0.5)(NULL), quantiles(0.1, 0.2)(NULL :: Nullable(UInt32)), count(NULL), sum(NULL); SELECT count(NULL) FROM remote('127.0.0.{1,2}', numbers(3)) GROUP BY number % 2 WITH TOTALS; +SELECT quantile(0.5)(NULL) FROM remote('127.0.0.{1,2}', numbers(3)) GROUP BY number % 2 WITH TOTALS; +SELECT quantiles(0.1, 0.2)(NULL :: Nullable(UInt32)) FROM remote('127.0.0.{1,2}', numbers(3)) GROUP BY number % 2 WITH TOTALS; + +SELECT '-- notinhgs:'; +SELECT nothing() as n, toTypeName(n); +SELECT nothing(1) as n, toTypeName(n); +SELECT nothing(NULL) as n, toTypeName(n); +SELECT nothingUInt64() as n, toTypeName(n); +SELECT nothingUInt64(1) as n, toTypeName(n); +SELECT nothingUInt64(NULL) as n, toTypeName(n); +SELECT nothingNull() as n, toTypeName(n); +SELECT nothingNull(1) as n, toTypeName(n); +SELECT nothingNull(NULL) as n, toTypeName(n); + +SELECT '-- quantile:'; +SELECT quantileArray(0.5)([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileArrayIf(0.5)([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileArrayIf(0.5)([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileIfArray(0.5)([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileIfArray(0.5)([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileIfArrayIf(0.5)([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileIfArrayArray(0.5)([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- quantiles:'; +select quantilesArray(0.5, 0.9)([NULL :: Nullable(UInt64), NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesArrayIf(0.5, 0.9)([NULL :: Nullable(UInt64)], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesArrayIf(0.5, 0.9)([NULL :: Nullable(UInt64)], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesIfArray(0.5, 0.9)([NULL :: Nullable(UInt64), NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesIfArray(0.5, 0.9)([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesIfArrayIf(0.5, 0.9)([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesIfArrayArray(0.5, 0.9)([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- nothing:'; +SELECT nothingArray([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingArrayIf([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingArrayIf([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingIfArray([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingIfArray([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingIfArrayIf([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingIfArrayArray([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- nothing(UInt64):'; +SELECT nothingUInt64Array([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64ArrayIf([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64ArrayIf([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64IfArray([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64IfArray([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64IfArrayIf([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64IfArrayArray([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- nothing(Nullable(Nothing)):'; +SELECT nothingNullArray([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullArrayIf([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullArrayIf([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullIfArray([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullIfArray([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullIfArrayIf([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullIfArrayArray([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- sum:'; +SELECT sumArray([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumArrayIf([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumArrayIf([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumIfArray([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumIfArray([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumIfArrayIf([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumIfArrayArray([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- count:'; +SELECT countArray([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countArrayIf([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countArrayIf([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countIfArray([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countIfArray([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countIfArrayIf([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countIfArrayArray([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + DROP TABLE IF EXISTS t1; CREATE TABLE t1 (`n` UInt64) ENGINE = MergeTree ORDER BY tuple(); @@ -18,7 +98,7 @@ SET SELECT count(NULL) FROM t1 WITH TOTALS; SELECT count(NULL as a), a FROM t1 WITH TOTALS; --- result differs in old and new analyzer: --- SELECT count(NULL as a), sum(a) FROM t1 WITH TOTALS; +SELECT count(NULL as a), sum(a) FROM t1 WITH TOTALS; SELECT uniq(NULL) FROM t1 WITH TOTALS; +SELECT quantile(0.5)(NULL), quantile(0.9)(NULL), quantiles(0.1, 0.2)(NULL :: Nullable(UInt32)) FROM t1 WITH TOTALS; diff --git a/tests/queries/0_stateless/02940_variant_text_deserialization.reference b/tests/queries/0_stateless/02940_variant_text_deserialization.reference new file mode 100644 index 000000000000..8836e6c4e571 --- /dev/null +++ b/tests/queries/0_stateless/02940_variant_text_deserialization.reference @@ -0,0 +1,516 @@ +JSON +String +{"v":null,"variantElement(v, 'String')":null} +{"v":"string","variantElement(v, 'String')":"string"} +{"v":"42","variantElement(v, 'String')":null} +FixedString +{"v":null,"variantElement(v, 'FixedString(4)')":null} +{"v":"string","variantElement(v, 'FixedString(4)')":null} +{"v":"abcd","variantElement(v, 'FixedString(4)')":"abcd"} +Bool +{"v":null,"variantElement(v, 'Bool')":null} +{"v":"string","variantElement(v, 'Bool')":null} +{"v":true,"variantElement(v, 'Bool')":true} +Integers +{"v":null,"variantElement(v, 'Int8')":null} +{"v":"string","variantElement(v, 'Int8')":null} +{"v":-1,"variantElement(v, 'Int8')":-1} +{"v":0,"variantElement(v, 'Int8')":0} +{"v":"10000000000","variantElement(v, 'Int8')":null} +{"v":null,"variantElement(v, 'UInt8')":null} +{"v":"string","variantElement(v, 'UInt8')":null} +{"v":"-1","variantElement(v, 'UInt8')":null} +{"v":0,"variantElement(v, 'UInt8')":0} +{"v":"10000000000","variantElement(v, 'UInt8')":null} +{"v":null,"variantElement(v, 'Int16')":null} +{"v":"string","variantElement(v, 'Int16')":null} +{"v":-1,"variantElement(v, 'Int16')":-1} +{"v":0,"variantElement(v, 'Int16')":0} +{"v":"10000000000","variantElement(v, 'Int16')":null} +{"v":null,"variantElement(v, 'UInt16')":null} +{"v":"string","variantElement(v, 'UInt16')":null} +{"v":"-1","variantElement(v, 'UInt16')":null} +{"v":0,"variantElement(v, 'UInt16')":0} +{"v":"10000000000","variantElement(v, 'UInt16')":null} +{"v":null,"variantElement(v, 'Int32')":null} +{"v":"string","variantElement(v, 'Int32')":null} +{"v":-1,"variantElement(v, 'Int32')":-1} +{"v":0,"variantElement(v, 'Int32')":0} +{"v":"10000000000","variantElement(v, 'Int32')":null} +{"v":null,"variantElement(v, 'UInt32')":null} +{"v":"string","variantElement(v, 'UInt32')":null} +{"v":"-1","variantElement(v, 'UInt32')":null} +{"v":0,"variantElement(v, 'UInt32')":0} +{"v":"10000000000","variantElement(v, 'UInt32')":null} +{"v":null,"variantElement(v, 'Int64')":null} +{"v":"string","variantElement(v, 'Int64')":null} +{"v":"-1","variantElement(v, 'Int64')":"-1"} +{"v":"0","variantElement(v, 'Int64')":"0"} +{"v":"10000000000000000000000","variantElement(v, 'Int64')":null} +{"v":null,"variantElement(v, 'UInt64')":null} +{"v":"string","variantElement(v, 'UInt64')":null} +{"v":"-1","variantElement(v, 'UInt64')":null} +{"v":"0","variantElement(v, 'UInt64')":"0"} +{"v":"10000000000000000000000","variantElement(v, 'UInt64')":null} +{"v":null,"variantElement(v, 'Int128')":null} +{"v":"string","variantElement(v, 'Int128')":null} +{"v":"-1","variantElement(v, 'Int128')":"-1"} +{"v":"0","variantElement(v, 'Int128')":"0"} +{"v":null,"variantElement(v, 'UInt128')":null} +{"v":"string","variantElement(v, 'UInt128')":null} +{"v":"-1","variantElement(v, 'UInt128')":null} +{"v":"0","variantElement(v, 'UInt128')":"0"} +Floats +{"v":null,"variantElement(v, 'Float32')":null} +{"v":"string","variantElement(v, 'Float32')":null} +{"v":42.42,"variantElement(v, 'Float32')":42.42} +{"v":null,"variantElement(v, 'Float64')":null} +{"v":"string","variantElement(v, 'Float64')":null} +{"v":42.42,"variantElement(v, 'Float64')":42.42} +Decimals +{"v":null,"variantElement(v, 'Decimal32(6)')":null} +{"v":"string","variantElement(v, 'Decimal32(6)')":null} +{"v":42.42,"variantElement(v, 'Decimal32(6)')":42.42} +{"v":"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242","variantElement(v, 'Decimal32(6)')":null} +{"v":null,"variantElement(v, 'Decimal64(6)')":null} +{"v":"string","variantElement(v, 'Decimal64(6)')":null} +{"v":42.42,"variantElement(v, 'Decimal64(6)')":42.42} +{"v":"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242","variantElement(v, 'Decimal64(6)')":null} +{"v":null,"variantElement(v, 'Decimal128(6)')":null} +{"v":"string","variantElement(v, 'Decimal128(6)')":null} +{"v":42.42,"variantElement(v, 'Decimal128(6)')":42.42} +{"v":"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242","variantElement(v, 'Decimal128(6)')":null} +{"v":null,"variantElement(v, 'Decimal256(6)')":null} +{"v":"string","variantElement(v, 'Decimal256(6)')":null} +{"v":42.42,"variantElement(v, 'Decimal256(6)')":42.42} +{"v":"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242","variantElement(v, 'Decimal256(6)')":null} +Dates and DateTimes +{"v":null,"variantElement(v, 'Date')":null} +{"v":"string","variantElement(v, 'Date')":null} +{"v":"2020-01-01","variantElement(v, 'Date')":"2020-01-01"} +{"v":"2020-01-01 00:00:00.999","variantElement(v, 'Date')":null} +{"v":null,"variantElement(v, 'Date32')":null} +{"v":"string","variantElement(v, 'Date32')":null} +{"v":"1900-01-01","variantElement(v, 'Date32')":"1900-01-01"} +{"v":"2020-01-01 00:00:00.999","variantElement(v, 'Date32')":null} +{"v":null,"variantElement(v, 'DateTime')":null} +{"v":"string","variantElement(v, 'DateTime')":null} +{"v":"2020-01-01 00:00:00","variantElement(v, 'DateTime')":"2020-01-01 00:00:00"} +{"v":"2020-01-01 00:00:00.999","variantElement(v, 'DateTime')":null} +{"v":null,"variantElement(v, 'DateTime64')":null} +{"v":"string","variantElement(v, 'DateTime64')":null} +{"v":"2020-01-01 00:00:00.999","variantElement(v, 'DateTime64')":"2020-01-01 00:00:00.999"} +{"v":"2020-01-01 00:00:00.999999999 ABC","variantElement(v, 'DateTime64')":null} +UUID +{"v":null,"variantElement(v, 'UUID')":null} +{"v":"string","variantElement(v, 'UUID')":null} +{"v":"c8619cca-0caa-445e-ae76-1d4f6e0b3927","variantElement(v, 'UUID')":"c8619cca-0caa-445e-ae76-1d4f6e0b3927"} +IPv4 +{"v":null,"variantElement(v, 'IPv4')":null} +{"v":"string","variantElement(v, 'IPv4')":null} +{"v":"127.0.0.1","variantElement(v, 'IPv4')":"127.0.0.1"} +IPv6 +{"v":null,"variantElement(v, 'IPv6')":null} +{"v":"string","variantElement(v, 'IPv6')":null} +{"v":"2001:db8:85a3::8a2e:370:7334","variantElement(v, 'IPv6')":"2001:db8:85a3::8a2e:370:7334"} +Enum +{"v":null,"variantElement(v, 'Enum(\\'a\\' = 1)')":null} +{"v":"string","variantElement(v, 'Enum(\\'a\\' = 1)')":null} +{"v":"a","variantElement(v, 'Enum(\\'a\\' = 1)')":"a"} +{"v":"a","variantElement(v, 'Enum(\\'a\\' = 1)')":"a"} +{"v":2,"variantElement(v, 'Enum(\\'a\\' = 1)')":null} +Map +{"v":null,"variantElement(v, 'Map(String, UInt64)')":{}} +{"v":"string","variantElement(v, 'Map(String, UInt64)')":{}} +{"v":{"a":"42","b":"43","c":"0"},"variantElement(v, 'Map(String, UInt64)')":{"a":"42","b":"43","c":"0"}} +{"v":"{\"c\" : 44, \"d\" : [1,2,3]}","variantElement(v, 'Map(String, UInt64)')":{}} +Tuple +{"v":null,"variantElement(v, 'Tuple(a UInt64, b UInt64)')":{"a":"0","b":"0"}} +{"v":"string","variantElement(v, 'Tuple(a UInt64, b UInt64)')":{"a":"0","b":"0"}} +{"v":{"a":"42","b":"0"},"variantElement(v, 'Tuple(a UInt64, b UInt64)')":{"a":"42","b":"0"}} +{"v":{"a":"44","b":"0"},"variantElement(v, 'Tuple(a UInt64, b UInt64)')":{"a":"44","b":"0"}} +\N (0,0) +string (0,0) +(42,0) (42,0) +{"a" : 44, "d" : 32} (0,0) +Array +{"v":null,"variantElement(v, 'Array(UInt64)')":[]} +{"v":"string","variantElement(v, 'Array(UInt64)')":[]} +{"v":["1","2","3"],"variantElement(v, 'Array(UInt64)')":["1","2","3"]} +{"v":["0","0","0"],"variantElement(v, 'Array(UInt64)')":["0","0","0"]} +{"v":"[1, 2, \"hello\"]","variantElement(v, 'Array(UInt64)')":[]} +LowCardinality +{"v":null,"variantElement(v, 'LowCardinality(String)')":null} +{"v":"string","variantElement(v, 'LowCardinality(String)')":"string"} +{"v":"42","variantElement(v, 'LowCardinality(String)')":null} +{"v":null,"variantElement(v, 'Array(LowCardinality(Nullable(String)))')":[]} +{"v":["string",null],"variantElement(v, 'Array(LowCardinality(Nullable(String)))')":["string",null]} +{"v":"42","variantElement(v, 'Array(LowCardinality(Nullable(String)))')":[]} +Nullable +{"v":null,"variantElement(v, 'Array(Nullable(String))')":[]} +{"v":"string","variantElement(v, 'Array(Nullable(String))')":[]} +{"v":["hello",null,"world"],"variantElement(v, 'Array(Nullable(String))')":["hello",null,"world"]} +{"repeat('-', 80)":"--------------------------------------------------------------------------------"} +CSV +String +\N,\N +"string","string" +"string","string" +42,\N +FixedString +\N,\N +"string",\N +"string",\N +"abcd","abcd" +Bool +\N,\N +"Truee",\N +true,true +Integers +\N,\N +"string",\N +-1,-1 +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,\N +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,-1 +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,\N +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,-1 +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,\N +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,-1 +0,0 +10000000000000000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,\N +0,0 +10000000000000000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,-1 +0,0 +"42d42",\N +\N,\N +"string",\N +-1,\N +0,0 +"42d42",\N +Floats +\N,\N +"string",\N +42.42,42.42 +"42.d42",\N +\N,\N +"string",\N +42.42,42.42 +"42.d42",\N +Decimals +\N,\N +"string",\N +42.42,42.42 +"42d42",\N +"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242",\N +\N,\N +"string",\N +42.42,42.42 +"42d42",\N +"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242",\N +\N,\N +"string",\N +42.42,42.42 +"42d42",\N +"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242",\N +\N,\N +"string",\N +42.42,42.42 +"42d42",\N +"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242",\N +Dates and DateTimes +\N,\N +"string",\N +"2020-01-d1",\N +"2020-01-01","2020-01-01" +"2020-01-01 00:00:00.999",\N +\N,\N +"string",\N +"2020-01-d1",\N +"1900-01-01","1900-01-01" +"2020-01-01 00:00:00.999",\N +\N,\N +"string",\N +"2020-01-d1",\N +"2020-01-01 00:00:00","2020-01-01 00:00:00" +"2020-01-01 00:00:00.999",\N +\N,\N +"string",\N +"2020-01-d1",\N +"2020-01-01 00:00:00.999","2020-01-01 00:00:00.999" +"2020-01-01 00:00:00.999999999 ABC",\N +UUID +\N,\N +"string",\N +"c8619cca-0caa-445e-ae76-1d4f6e0b3927","c8619cca-0caa-445e-ae76-1d4f6e0b3927" +"c8619cca-0caa-445e-ae76-1d4f6e0b3927AAA",\N +IPv4 +\N,\N +"string",\N +"127.0.0.1","127.0.0.1" +"127.0.0.1AAA",\N +IPv6 +\N,\N +"string",\N +"2001:db8:85a3::8a2e:370:7334","2001:db8:85a3::8a2e:370:7334" +"2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA",\N +Enum +\N,\N +"string",\N +"a","a" +"a","a" +2,\N +"aa",\N +Map +\N,"{}" +"string","{}" +"{'a':42,'b':43,'c':0}","{'a':42,'b':43,'c':0}" +"{'c' : 44, 'd' : [1,2,3]}","{}" +"{'c' : 44","{}" +Array +\N,"[]" +"string","[]" +"[1,2,3]","[1,2,3]" +"[0,0,0]","[0,0,0]" +"[1, 2, 'hello']","[]" +"[1, 2","[]" +LowCardinality +\N,\N +"string","string" +42,\N +\N,"[]" +"['string',NULL]","['string',NULL]" +"['string', nul]","[]" +42,"[]" +Nullable +\N,"[]" +"string","[]" +"['hello',NULL,'world']","['hello',NULL,'world']" +"['hello', nul]","[]" +{"repeat('-', 80)":"--------------------------------------------------------------------------------"} +TSV +String +\N \N +string string +42 \N +FixedString +\N \N +string \N +abcd abcd +Bool +\N \N +Truee \N +true true +Integers +\N \N +string \N +-1 -1 +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 \N +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 -1 +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 \N +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 -1 +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 \N +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 -1 +0 0 +10000000000000000000000 \N +42d42 \N +\N \N +string \N +-1 \N +0 0 +10000000000000000000000 \N +42d42 \N +\N \N +string \N +-1 -1 +0 0 +42d42 \N +\N \N +string \N +-1 \N +0 0 +42d42 \N +Floats +\N \N +string \N +42.42 42.42 +42.d42 \N +\N \N +string \N +42.42 42.42 +42.d42 \N +Decimals +\N \N +string \N +42.42 42.42 +42d42 \N +4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242 \N +\N \N +string \N +42.42 42.42 +42d42 \N +4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242 \N +\N \N +string \N +42.42 42.42 +42d42 \N +4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242 \N +\N \N +string \N +42.42 42.42 +42d42 \N +4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242 \N +Dates and DateTimes +\N \N +string \N +2020-01-d1 \N +2020-01-01 2020-01-01 +2020-01-01 00:00:00.999 \N +\N \N +string \N +2020-01-d1 \N +1900-01-01 1900-01-01 +2020-01-01 00:00:00.999 \N +\N \N +string \N +2020-01-d1 \N +2020-01-01 00:00:00 2020-01-01 00:00:00 +2020-01-01 00:00:00.999 \N +\N \N +string \N +2020-01-d1 \N +2020-01-01 00:00:00.999 2020-01-01 00:00:00.999 +2020-01-01 00:00:00.999999999 ABC \N +UUID +\N \N +string \N +c8619cca-0caa-445e-ae76-1d4f6e0b3927 c8619cca-0caa-445e-ae76-1d4f6e0b3927 +c8619cca-0caa-445e-ae76-1d4f6e0b3927AAA \N +IPv4 +\N \N +string \N +127.0.0.1 127.0.0.1 +127.0.0.1AAA \N +IPv6 +\N \N +string \N +2001:db8:85a3::8a2e:370:7334 2001:db8:85a3::8a2e:370:7334 +2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA \N +Enum +\N \N +string \N +a a +a a +2 \N +aa \N +Map +\N {} +string {} +{'a':42,'b':43,'c':0} {'a':42,'b':43,'c':0} +{\'c\' : 44, \'d\' : [1,2,3]} {} +{\'c\' : 44 {} +Array +\N [] +string [] +[1,2,3] [1,2,3] +[0,0,0] [0,0,0] +[1, 2, \'hello\'] [] +[1, 2 [] +LowCardinality +\N \N +string string +42 \N +\N [] +['string',NULL] ['string',NULL] +[\'string\', nul] [] +42 [] +Nullable +\N [] +string [] +['hello',NULL,'world'] ['hello',NULL,'world'] +[\'hello\', nul] [] +{"repeat('-', 80)":"--------------------------------------------------------------------------------"} +Values +String +(NULL,NULL),('string','string'),(42,NULL)FixedString +(NULL,NULL),('string',NULL),('abcd','abcd')Bool +(NULL,NULL),(true,true)Integers +(NULL,NULL),('string',NULL),(-1,-1),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,NULL),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,-1),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,NULL),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,-1),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,NULL),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,-1),(0,0),(10000000000000000000000,NULL)(NULL,NULL),('string',NULL),(-1,NULL),(0,0),(10000000000000000000000,NULL)(NULL,NULL),('string',NULL),(-1,-1),(0,0)(NULL,NULL),('string',NULL),(-1,NULL),(0,0)Floats +(NULL,NULL),('string',NULL),(42.42,42.42)(NULL,NULL),('string',NULL),(42.42,42.42)Decimals +(NULL,NULL),('string',NULL),(42.42,42.42)(NULL,NULL),('string',NULL),(42.42,42.42)(NULL,NULL),('string',NULL),(42.42,42.42)(NULL,NULL),('string',NULL),(42.42,42.42)Dates and DateTimes +(NULL,NULL),('string',NULL),('2020-01-d1',NULL),('2020-01-01','2020-01-01'),('2020-01-01 00:00:00.999',NULL)(NULL,NULL),('string',NULL),('2020-01-d1',NULL),('1900-01-01','1900-01-01'),('2020-01-01 00:00:00.999',NULL)(NULL,NULL),('string',NULL),('2020-01-d1',NULL),('2020-01-01 00:00:00','2020-01-01 00:00:00'),('2020-01-01 00:00:00.999',NULL)(NULL,NULL),('string',NULL),('2020-01-d1',NULL),('2020-01-01 00:00:00.999','2020-01-01 00:00:00.999'),('2020-01-01 00:00:00.999999999 ABC',NULL)UUID +(NULL,NULL),('string',NULL),('c8619cca-0caa-445e-ae76-1d4f6e0b3927','c8619cca-0caa-445e-ae76-1d4f6e0b3927'),('c8619cca-0caa-445e-ae76-1d4f6e0b3927AAA',NULL)IPv4 +(NULL,NULL),('string',NULL),('127.0.0.1','127.0.0.1'),('127.0.0.1AAA',NULL)IPv6 +(NULL,NULL),('string',NULL),('2001:db8:85a3::8a2e:370:7334','2001:db8:85a3::8a2e:370:7334'),('2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA',NULL)Enum +(NULL,NULL),('string',NULL),('a','a'),(1,NULL),(2,NULL),('aa',NULL)Map +(NULL,{}),('string',{}),({'a':42,'b':43,'c':0},{'a':42,'b':43,'c':0})Array +(NULL,[]),('string',[]),([1,2,3],[1,2,3]),([0,0,0],[0,0,0])LowCardinality +(NULL,NULL),('string','string'),(42,NULL)(NULL,[]),(['string',NULL],['string',NULL]),(42,[])Nullable +(NULL,[]),('string',[]),(['hello',NULL,'world'],['hello',NULL,'world']) diff --git a/tests/queries/0_stateless/02940_variant_text_deserialization.sql b/tests/queries/0_stateless/02940_variant_text_deserialization.sql new file mode 100644 index 000000000000..041d02088efe --- /dev/null +++ b/tests/queries/0_stateless/02940_variant_text_deserialization.sql @@ -0,0 +1,266 @@ +set allow_experimental_variant_type = 1; +set session_timezone = 'UTC'; + +select 'JSON'; +select 'String'; +select v, variantElement(v, 'String') from format(JSONEachRow, 'v Variant(String, UInt64)', '{"v" : null}, {"v" : "string"}, {"v" : 42}') format JSONEachRow; + +select 'FixedString'; +select v, variantElement(v, 'FixedString(4)') from format(JSONEachRow, 'v Variant(String, FixedString(4))', '{"v" : null}, {"v" : "string"}, {"v" : "abcd"}') format JSONEachRow; + +select 'Bool'; +select v, variantElement(v, 'Bool') from format(JSONEachRow, 'v Variant(String, Bool)', '{"v" : null}, {"v" : "string"}, {"v" : true}') format JSONEachRow; + +select 'Integers'; +select v, variantElement(v, 'Int8') from format(JSONEachRow, 'v Variant(String, Int8, UInt64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'UInt8') from format(JSONEachRow, 'v Variant(String, UInt8, Int64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'Int16') from format(JSONEachRow, 'v Variant(String, Int16, Int64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'UInt16') from format(JSONEachRow, 'v Variant(String, UInt16, Int64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'Int32') from format(JSONEachRow, 'v Variant(String, Int32, Int64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'UInt32') from format(JSONEachRow, 'v Variant(String, UInt32, Int64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'Int64') from format(JSONEachRow, 'v Variant(String, Int64, Int128)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000000000000000}') format JSONEachRow; +select v, variantElement(v, 'UInt64') from format(JSONEachRow, 'v Variant(String, UInt64, Int128)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000000000000000}') format JSONEachRow; +select v, variantElement(v, 'Int128') from format(JSONEachRow, 'v Variant(String, Int128, Int256)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}') format JSONEachRow; +select v, variantElement(v, 'UInt128') from format(JSONEachRow, 'v Variant(String, UInt128, Int256)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}') format JSONEachRow; + +select 'Floats'; +select v, variantElement(v, 'Float32') from format(JSONEachRow, 'v Variant(String, Float32)', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}') format JSONEachRow; +select v, variantElement(v, 'Float64') from format(JSONEachRow, 'v Variant(String, Float64)', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}') format JSONEachRow; + +select 'Decimals'; +select v, variantElement(v, 'Decimal32(6)') from format(JSONEachRow, 'v Variant(String, Decimal32(6))', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}, {"v" : 4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242}') format JSONEachRow; +select v, variantElement(v, 'Decimal64(6)') from format(JSONEachRow, 'v Variant(String, Decimal64(6))', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}, {"v" : 4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242}') format JSONEachRow; +select v, variantElement(v, 'Decimal128(6)') from format(JSONEachRow, 'v Variant(String, Decimal128(6))', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}, {"v" : 4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242}') format JSONEachRow; +select v, variantElement(v, 'Decimal256(6)') from format(JSONEachRow, 'v Variant(String, Decimal256(6))', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}, {"v" : 4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242}') format JSONEachRow; + +select 'Dates and DateTimes'; +select v, variantElement(v, 'Date') from format(JSONEachRow, 'v Variant(String, Date, DateTime64)', '{"v" : null}, {"v" : "string"}, {"v" : "2020-01-01"}, {"v" : "2020-01-01 00:00:00.999"}') format JSONEachRow; +select v, variantElement(v, 'Date32') from format(JSONEachRow, 'v Variant(String, Date32, DateTime64)', '{"v" : null}, {"v" : "string"}, {"v" : "1900-01-01"}, {"v" : "2020-01-01 00:00:00.999"}') format JSONEachRow; +select v, variantElement(v, 'DateTime') from format(JSONEachRow, 'v Variant(String, DateTime, DateTime64)', '{"v" : null}, {"v" : "string"}, {"v" : "2020-01-01 00:00:00"}, {"v" : "2020-01-01 00:00:00.999"}') format JSONEachRow; +select v, variantElement(v, 'DateTime64') from format(JSONEachRow, 'v Variant(String, DateTime64)', '{"v" : null}, {"v" : "string"}, {"v" : "2020-01-01 00:00:00.999"}, {"v" : "2020-01-01 00:00:00.999999999 ABC"}') format JSONEachRow; + +select 'UUID'; +select v, variantElement(v, 'UUID') from format(JSONEachRow, 'v Variant(String, UUID)', '{"v" : null}, {"v" : "string"}, {"v" : "c8619cca-0caa-445e-ae76-1d4f6e0b3927"}') format JSONEachRow; + +select 'IPv4'; +select v, variantElement(v, 'IPv4') from format(JSONEachRow, 'v Variant(String, IPv4)', '{"v" : null}, {"v" : "string"}, {"v" : "127.0.0.1"}') format JSONEachRow; + +select 'IPv6'; +select v, variantElement(v, 'IPv6') from format(JSONEachRow, 'v Variant(String, IPv6)', '{"v" : null}, {"v" : "string"}, {"v" : "2001:0db8:85a3:0000:0000:8a2e:0370:7334"}') format JSONEachRow; + +select 'Enum'; +select v, variantElement(v, 'Enum(''a'' = 1)') from format(JSONEachRow, 'v Variant(String, UInt32, Enum(''a'' = 1))', '{"v" : null}, {"v" : "string"}, {"v" : "a"}, {"v" : 1}, {"v" : 2}') format JSONEachRow; + +select 'Map'; +select v, variantElement(v, 'Map(String, UInt64)') from format(JSONEachRow, 'v Variant(String, Map(String, UInt64))', '{"v" : null}, {"v" : "string"}, {"v" : {"a" : 42, "b" : 43, "c" : null}}, {"v" : {"c" : 44, "d" : [1,2,3]}}') format JSONEachRow; + +select 'Tuple'; +select v, variantElement(v, 'Tuple(a UInt64, b UInt64)') from format(JSONEachRow, 'v Variant(String, Tuple(a UInt64, b UInt64))', '{"v" : null}, {"v" : "string"}, {"v" : {"a" : 42, "b" : null}}, {"v" : {"a" : 44, "d" : 32}}') format JSONEachRow; +select v, variantElement(v, 'Tuple(a UInt64, b UInt64)') from format(JSONEachRow, 'v Variant(String, Tuple(a UInt64, b UInt64))', '{"v" : null}, {"v" : "string"}, {"v" : {"a" : 42, "b" : null}}, {"v" : {"a" : 44, "d" : 32}}') settings input_format_json_defaults_for_missing_elements_in_named_tuple=0; + +select 'Array'; +select v, variantElement(v, 'Array(UInt64)') from format(JSONEachRow, 'v Variant(String, Array(UInt64))', '{"v" : null}, {"v" : "string"}, {"v" : [1, 2, 3]}, {"v" : [null, null, null]} {"v" : [1, 2, "hello"]}') format JSONEachRow; + +select 'LowCardinality'; +select v, variantElement(v, 'LowCardinality(String)') from format(JSONEachRow, 'v Variant(LowCardinality(String), UInt64)', '{"v" : null}, {"v" : "string"}, {"v" : 42}') format JSONEachRow; +select v, variantElement(v, 'Array(LowCardinality(Nullable(String)))') from format(JSONEachRow, 'v Variant(Array(LowCardinality(Nullable(String))), UInt64)', '{"v" : null}, {"v" : ["string", null]}, {"v" : 42}') format JSONEachRow; + +select 'Nullable'; +select v, variantElement(v, 'Array(Nullable(String))') from format(JSONEachRow, 'v Variant(String, Array(Nullable(String)))', '{"v" : null}, {"v" : "string"}, {"v" : ["hello", null, "world"]}') format JSONEachRow; + +select repeat('-', 80) format JSONEachRow; + +select 'CSV'; +select 'String'; +select v, variantElement(v, 'String') from format(CSV, 'v Variant(String, UInt64)', '\\N\n"string"\nstring\n42') format CSV; + +select 'FixedString'; +select v, variantElement(v, 'FixedString(4)') from format(CSV, 'v Variant(String, FixedString(4))', '\\N\n"string"\nstring\n"abcd"') format CSV; + +select 'Bool'; +select v, variantElement(v, 'Bool') from format(CSV, 'v Variant(String, Bool)', '\\N\nTruee\nTrue') format CSV; + +select 'Integers'; +select v, variantElement(v, 'Int8') from format(CSV, 'v Variant(String, Int8, UInt64)', '\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'UInt8') from format(CSV, 'v Variant(String, UInt8, Int64)', '\\N\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'Int16') from format(CSV, 'v Variant(String, Int16, Int64)', '\\N\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'UInt16') from format(CSV, 'v Variant(String, UInt16, Int64)', '\\N\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'Int32') from format(CSV, 'v Variant(String, Int32, Int64)', '\\N\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'UInt32') from format(CSV, 'v Variant(String, UInt32, Int64)', '\\N\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'Int64') from format(CSV, 'v Variant(String, Int64, Int128)', '\\N\n"string"\n-1\n0\n10000000000000000000000\n42d42') format CSV; +select v, variantElement(v, 'UInt64') from format(CSV, 'v Variant(String, UInt64, Int128)', '\\N\n"string"\n-1\n0\n10000000000000000000000\n42d42') format CSV; +select v, variantElement(v, 'Int128') from format(CSV, 'v Variant(String, Int128, Int256)', '\\N\n"string"\n-1\n0\n42d42') format CSV; +select v, variantElement(v, 'UInt128') from format(CSV, 'v Variant(String, UInt128, Int256)', '\\N\n"string"\n-1\n0\n42d42') format CSV; + +select 'Floats'; +select v, variantElement(v, 'Float32') from format(CSV, 'v Variant(String, Float32)', '\\N\n"string"\n42.42\n42.d42') format CSV; +select v, variantElement(v, 'Float64') from format(CSV, 'v Variant(String, Float64)', '\\N\n"string"\n42.42\n42.d42') format CSV; + +select 'Decimals'; +select v, variantElement(v, 'Decimal32(6)') from format(CSV, 'v Variant(String, Decimal32(6))', '\\N\n"string"\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format CSV; +select v, variantElement(v, 'Decimal64(6)') from format(CSV, 'v Variant(String, Decimal64(6))', '\\N\n"string"\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format CSV; +select v, variantElement(v, 'Decimal128(6)') from format(CSV, 'v Variant(String, Decimal128(6))', '\\N\n"string"\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format CSV; +select v, variantElement(v, 'Decimal256(6)') from format(CSV, 'v Variant(String, Decimal256(6))', '\\N\n"string"\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format CSV; + +select 'Dates and DateTimes'; +select v, variantElement(v, 'Date') from format(CSV, 'v Variant(String, Date, DateTime64)', '\\N\n"string"\n"2020-01-d1"\n"2020-01-01"\n"2020-01-01 00:00:00.999"') format CSV; +select v, variantElement(v, 'Date32') from format(CSV, 'v Variant(String, Date32, DateTime64)', '\\N\n"string"\n"2020-01-d1"\n"1900-01-01"\n"2020-01-01 00:00:00.999"') format CSV; +select v, variantElement(v, 'DateTime') from format(CSV, 'v Variant(String, DateTime, DateTime64)', '\\N\n"string"\n"2020-01-d1"\n"2020-01-01 00:00:00"\n"2020-01-01 00:00:00.999"') format CSV; +select v, variantElement(v, 'DateTime64') from format(CSV, 'v Variant(String, DateTime64)', '\\N\n"string"\n"2020-01-d1"\n"2020-01-01 00:00:00.999"\n"2020-01-01 00:00:00.999999999 ABC"') format CSV; + +select 'UUID'; +select v, variantElement(v, 'UUID') from format(CSV, 'v Variant(String, UUID)', '\\N\n"string"\n"c8619cca-0caa-445e-ae76-1d4f6e0b3927"\nc8619cca-0caa-445e-ae76-1d4f6e0b3927AAA') format CSV; + +select 'IPv4'; +select v, variantElement(v, 'IPv4') from format(CSV, 'v Variant(String, IPv4)', '\\N\n"string"\n"127.0.0.1"\n"127.0.0.1AAA"') format CSV; + +select 'IPv6'; +select v, variantElement(v, 'IPv6') from format(CSV, 'v Variant(String, IPv6)', '\\N\n"string"\n"2001:0db8:85a3:0000:0000:8a2e:0370:7334"\n2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA') format CSV; + +select 'Enum'; +select v, variantElement(v, 'Enum(''a'' = 1)') from format(CSV, 'v Variant(String, UInt32, Enum(''a'' = 1))', '\\N\n"string"\n"a"\n1\n2\naa') format CSV; + +select 'Map'; +select v, variantElement(v, 'Map(String, UInt64)') from format(CSV, 'v Variant(String, Map(String, UInt64))', '\\N\n"string"\n"{''a'' : 42, ''b'' : 43, ''c'' : null}"\n"{''c'' : 44, ''d'' : [1,2,3]}"\n"{''c'' : 44"') format CSV; + +select 'Array'; +select v, variantElement(v, 'Array(UInt64)') from format(CSV, 'v Variant(String, Array(UInt64))', '\\N\n"string"\n"[1, 2, 3]"\n"[null, null, null]"\n"[1, 2, ''hello'']"\n"[1, 2"') format CSV; + +select 'LowCardinality'; +select v, variantElement(v, 'LowCardinality(String)') from format(CSV, 'v Variant(LowCardinality(String), UInt64)', '\\N\n"string"\n42') format CSV; +select v, variantElement(v, 'Array(LowCardinality(Nullable(String)))') from format(CSV, 'v Variant(Array(LowCardinality(Nullable(String))), UInt64, String)', '\\N\n"[''string'', null]"\n"[''string'', nul]"\n42') format CSV; + +select 'Nullable'; +select v, variantElement(v, 'Array(Nullable(String))') from format(CSV, 'v Variant(String, Array(Nullable(String)))', '\\N\n"string"\n"[''hello'', null, ''world'']"\n"[''hello'', nul]"') format CSV; + +select repeat('-', 80) format JSONEachRow; + +select 'TSV'; +select 'String'; +select v, variantElement(v, 'String') from format(TSV, 'v Variant(String, UInt64)', '\\N\nstring\n42') format TSV; + +select 'FixedString'; +select v, variantElement(v, 'FixedString(4)') from format(TSV, 'v Variant(String, FixedString(4))', '\\N\nstring\nabcd') format TSV; + +select 'Bool'; +select v, variantElement(v, 'Bool') from format(TSV, 'v Variant(String, Bool)', '\\N\nTruee\nTrue') format TSV; + +select 'Integers'; +select v, variantElement(v, 'Int8') from format(TSV, 'v Variant(String, Int8, UInt64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'UInt8') from format(TSV, 'v Variant(String, UInt8, Int64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'Int16') from format(TSV, 'v Variant(String, Int16, Int64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'UInt16') from format(TSV, 'v Variant(String, UInt16, Int64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'Int32') from format(TSV, 'v Variant(String, Int32, Int64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'UInt32') from format(TSV, 'v Variant(String, UInt32, Int64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'Int64') from format(TSV, 'v Variant(String, Int64, Int128)', '\\N\nstring\n-1\n0\n10000000000000000000000\n42d42') format TSV; +select v, variantElement(v, 'UInt64') from format(TSV, 'v Variant(String, UInt64, Int128)', '\\N\nstring\n-1\n0\n10000000000000000000000\n42d42') format TSV; +select v, variantElement(v, 'Int128') from format(TSV, 'v Variant(String, Int128, Int256)', '\\N\nstring\n-1\n0\n42d42') format TSV; +select v, variantElement(v, 'UInt128') from format(TSV, 'v Variant(String, UInt128, Int256)', '\\N\nstring\n-1\n0\n42d42') format TSV; + +select 'Floats'; +select v, variantElement(v, 'Float32') from format(TSV, 'v Variant(String, Float32)', '\\N\nstring\n42.42\n42.d42') format TSV; +select v, variantElement(v, 'Float64') from format(TSV, 'v Variant(String, Float64)', '\\N\nstring\n42.42\n42.d42') format TSV; + +select 'Decimals'; +select v, variantElement(v, 'Decimal32(6)') from format(TSV, 'v Variant(String, Decimal32(6))', '\\N\nstring\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format TSV; +select v, variantElement(v, 'Decimal64(6)') from format(TSV, 'v Variant(String, Decimal64(6))', '\\N\nstring\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format TSV; +select v, variantElement(v, 'Decimal128(6)') from format(TSV, 'v Variant(String, Decimal128(6))', '\\N\nstring\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format TSV; +select v, variantElement(v, 'Decimal256(6)') from format(TSV, 'v Variant(String, Decimal256(6))', '\\N\nstring\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format TSV; + +select 'Dates and DateTimes'; +select v, variantElement(v, 'Date') from format(TSV, 'v Variant(String, Date, DateTime64)', '\\N\nstring\n2020-01-d1\n2020-01-01\n2020-01-01 00:00:00.999') format TSV; +select v, variantElement(v, 'Date32') from format(TSV, 'v Variant(String, Date32, DateTime64)', '\\N\nstring\n2020-01-d1\n1900-01-01\n2020-01-01 00:00:00.999') format TSV; +select v, variantElement(v, 'DateTime') from format(TSV, 'v Variant(String, DateTime, DateTime64)', '\\N\nstring\n2020-01-d1\n2020-01-01 00:00:00\n2020-01-01 00:00:00.999') format TSV; +select v, variantElement(v, 'DateTime64') from format(TSV, 'v Variant(String, DateTime64)', '\\N\nstring\n2020-01-d1\n2020-01-01 00:00:00.999\n2020-01-01 00:00:00.999999999 ABC') format TSV; + +select 'UUID'; +select v, variantElement(v, 'UUID') from format(TSV, 'v Variant(String, UUID)', '\\N\nstring\nc8619cca-0caa-445e-ae76-1d4f6e0b3927\nc8619cca-0caa-445e-ae76-1d4f6e0b3927AAA') format TSV; + +select 'IPv4'; +select v, variantElement(v, 'IPv4') from format(TSV, 'v Variant(String, IPv4)', '\\N\nstring\n127.0.0.1\n127.0.0.1AAA') format TSV; + +select 'IPv6'; +select v, variantElement(v, 'IPv6') from format(TSV, 'v Variant(String, IPv6)', '\\N\nstring\n2001:0db8:85a3:0000:0000:8a2e:0370:7334\n2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA') format TSV; + +select 'Enum'; +select v, variantElement(v, 'Enum(''a'' = 1)') from format(TSV, 'v Variant(String, UInt32, Enum(''a'' = 1))', '\\N\nstring\na\n1\n2\naa') format TSV; + +select 'Map'; +select v, variantElement(v, 'Map(String, UInt64)') from format(TSV, 'v Variant(String, Map(String, UInt64))', '\\N\nstring\n{''a'' : 42, ''b'' : 43, ''c'' : null}\n{''c'' : 44, ''d'' : [1,2,3]}\n{''c'' : 44') format TSV; + +select 'Array'; +select v, variantElement(v, 'Array(UInt64)') from format(TSV, 'v Variant(String, Array(UInt64))', '\\N\nstring\n[1, 2, 3]\n[null, null, null]\n[1, 2, ''hello'']\n[1, 2') format TSV; + +select 'LowCardinality'; +select v, variantElement(v, 'LowCardinality(String)') from format(TSV, 'v Variant(LowCardinality(String), UInt64)', '\\N\nstring\n42') format TSV; +select v, variantElement(v, 'Array(LowCardinality(Nullable(String)))') from format(TSV, 'v Variant(Array(LowCardinality(Nullable(String))), UInt64, String)', '\\N\n[''string'', null]\n[''string'', nul]\n42') format TSV; + +select 'Nullable'; +select v, variantElement(v, 'Array(Nullable(String))') from format(TSV, 'v Variant(String, Array(Nullable(String)))', '\\N\nstring\n[''hello'', null, ''world'']\n[''hello'', nul]') format TSV; + +select repeat('-', 80) format JSONEachRow; + +select 'Values'; +select 'String'; +select v, variantElement(v, 'String') from format(Values, 'v Variant(String, UInt64)', '(NULL), (''string''), (42)') format Values; + +select 'FixedString'; +select v, variantElement(v, 'FixedString(4)') from format(Values, 'v Variant(String, FixedString(4))', '(NULL), (''string''), (''abcd'')') format Values; + +select 'Bool'; +select v, variantElement(v, 'Bool') from format(Values, 'v Variant(String, Bool)', '(NULL), (True)') format Values; + +select 'Integers'; +select v, variantElement(v, 'Int8') from format(Values, 'v Variant(String, Int8, UInt64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'UInt8') from format(Values, 'v Variant(String, UInt8, Int64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'Int16') from format(Values, 'v Variant(String, Int16, Int64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'UInt16') from format(Values, 'v Variant(String, UInt16, Int64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'Int32') from format(Values, 'v Variant(String, Int32, Int64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'UInt32') from format(Values, 'v Variant(String, UInt32, Int64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'Int64') from format(Values, 'v Variant(String, Int64, Int128)', '(NULL), (''string''), (-1), (0), (10000000000000000000000)') format Values; +select v, variantElement(v, 'UInt64') from format(Values, 'v Variant(String, UInt64, Int128)', '(NULL), (''string''), (-1), (0), (10000000000000000000000)') format Values; +select v, variantElement(v, 'Int128') from format(Values, 'v Variant(String, Int128, Int256)', '(NULL), (''string''), (-1), (0)') format Values; +select v, variantElement(v, 'UInt128') from format(Values, 'v Variant(String, UInt128, Int256)', '(NULL), (''string''), (-1), (0)') format Values; + +select 'Floats'; +select v, variantElement(v, 'Float32') from format(Values, 'v Variant(String, Float32)', '(NULL), (''string''), (42.42)') format Values; +select v, variantElement(v, 'Float64') from format(Values, 'v Variant(String, Float64)', '(NULL), (''string''), (42.42)') format Values; + +select 'Decimals'; +select v, variantElement(v, 'Decimal32(6)') from format(Values, 'v Variant(String, Decimal32(6))', '(NULL), (''string''), (42.42)') format Values; +select v, variantElement(v, 'Decimal64(6)') from format(Values, 'v Variant(String, Decimal64(6))', '(NULL), (''string''), (42.42)') format Values; +select v, variantElement(v, 'Decimal128(6)') from format(Values, 'v Variant(String, Decimal128(6))', '(NULL), (''string''), (42.42)') format Values; +select v, variantElement(v, 'Decimal256(6)') from format(Values, 'v Variant(String, Decimal256(6))', '(NULL), (''string''), (42.42)') format Values; + +select 'Dates and DateTimes'; +select v, variantElement(v, 'Date') from format(Values, 'v Variant(String, Date, DateTime64)', '(NULL), (''string''), (''2020-01-d1''), (''2020-01-01''), (''2020-01-01 00:00:00.999'')') format Values; +select v, variantElement(v, 'Date32') from format(Values, 'v Variant(String, Date32, DateTime64)', '(NULL), (''string''), (''2020-01-d1''), (''1900-01-01''), (''2020-01-01 00:00:00.999'')') format Values; +select v, variantElement(v, 'DateTime') from format(Values, 'v Variant(String, DateTime, DateTime64)', '(NULL), (''string''), (''2020-01-d1''), (''2020-01-01 00:00:00''), (''2020-01-01 00:00:00.999'')') format Values; +select v, variantElement(v, 'DateTime64') from format(Values, 'v Variant(String, DateTime64)', '(NULL), (''string''), (''2020-01-d1''), (''2020-01-01 00:00:00.999''), (''2020-01-01 00:00:00.999999999 ABC'')') format Values; + +select 'UUID'; +select v, variantElement(v, 'UUID') from format(Values, 'v Variant(String, UUID)', '(NULL), (''string''), (''c8619cca-0caa-445e-ae76-1d4f6e0b3927''), (''c8619cca-0caa-445e-ae76-1d4f6e0b3927AAA'')') format Values; + +select 'IPv4'; +select v, variantElement(v, 'IPv4') from format(Values, 'v Variant(String, IPv4)', '(NULL), (''string''), (''127.0.0.1''), (''127.0.0.1AAA'')') format Values; + +select 'IPv6'; +select v, variantElement(v, 'IPv6') from format(Values, 'v Variant(String, IPv6)', '(NULL), (''string''), (''2001:0db8:85a3:0000:0000:8a2e:0370:7334''), (''2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA'')') format Values; + +select 'Enum'; +select v, variantElement(v, 'Enum(''a'' = 1)') from format(Values, 'v Variant(String, UInt32, Enum(''a'' = 1))', '(NULL), (''string''), (''a''), (1), (2), (''aa'')') format Values; + +select 'Map'; +select v, variantElement(v, 'Map(String, UInt64)') from format(Values, 'v Variant(String, Map(String, UInt64))', '(NULL), (''string''), ({''a'' : 42, ''b'' : 43, ''c'' : null})') format Values; + +select 'Array'; +select v, variantElement(v, 'Array(UInt64)') from format(Values, 'v Variant(String, Array(UInt64))', '(NULL), (''string''), ([1, 2, 3]), ([null, null, null])') format Values; + +select 'LowCardinality'; +select v, variantElement(v, 'LowCardinality(String)') from format(Values, 'v Variant(LowCardinality(String), UInt64)', '(NULL), (''string''), (42)') format Values; +select v, variantElement(v, 'Array(LowCardinality(Nullable(String)))') from format(Values, 'v Variant(Array(LowCardinality(Nullable(String))), UInt64, String)', '(NULL), ([''string'', null]), (42)') format Values; + +select 'Nullable'; +select v, variantElement(v, 'Array(Nullable(String))') from format(Values, 'v Variant(String, Array(Nullable(String)))', '(NULL), (''string''), ([''hello'', null, ''world''])') format Values; + +select ''; \ No newline at end of file diff --git a/tests/queries/0_stateless/02941_variant_type_1.reference b/tests/queries/0_stateless/02941_variant_type_1.reference new file mode 100644 index 000000000000..8a6e77d4f6df --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_1.reference @@ -0,0 +1,2472 @@ +Memory +test1 insert +test1 select +\N +\N +\N +0 +1 +2 +str_0 +str_1 +str_2 +lc_str_0 +lc_str_1 +lc_str_2 +(0,1) +(1,2) +(2,3) +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +lc_str_1 +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(1,2) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +\N +\N +\N +0 +\N +2 +str_0 +\N +str_2 +lc_str_0 +\N +lc_str_2 +(0,1) +\N +(2,3) +[0] +\N +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +\N +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(0,0) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +----------------------------------------------------------------------------------------------------------- +test3 insert +test3 select +\N +str_1 +2 +lc_str_3 +(4,5) +[0,1,2,3,4,5] +\N +str_7 +8 +lc_str_9 +(10,11) +[0,1,2,3,4,5,6,7,8,9,10,11] +\N +str_13 +14 +lc_str_15 +(16,17) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +str_1 +\N +\N +\N +\N +\N +str_7 +\N +\N +\N +\N +\N +str_13 +\N +\N +\N +\N +\N +\N +2 +\N +\N +\N +\N +\N +8 +\N +\N +\N +\N +\N +14 +\N +\N +\N +\N +\N +\N +lc_str_3 +\N +\N +\N +\N +\N +lc_str_9 +\N +\N +\N +\N +\N +lc_str_15 +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(4,5) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(10,11) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(16,17) +(0,0) +\N +\N +\N +\N +4 +\N +\N +\N +\N +\N +10 +\N +\N +\N +\N +\N +16 +\N +\N +\N +\N +\N +5 +\N +\N +\N +\N +\N +11 +\N +\N +\N +\N +\N +17 +\N +[] +[] +[] +[] +[] +[0,1,2,3,4,5] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +\N +\N +\N +\N +6 +\N +\N +\N +\N +\N +12 +\N +\N +\N +\N +\N +18 +----------------------------------------------------------------------------------------------------------- +MergeTree compact +test1 insert +test1 select +\N +\N +\N +0 +1 +2 +str_0 +str_1 +str_2 +lc_str_0 +lc_str_1 +lc_str_2 +(0,1) +(1,2) +(2,3) +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +lc_str_1 +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(1,2) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +----------------------------------------------------------------------------------------------------------- +test1 select +\N +\N +\N +0 +1 +2 +str_0 +str_1 +str_2 +lc_str_0 +lc_str_1 +lc_str_2 +(0,1) +(1,2) +(2,3) +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +lc_str_1 +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(1,2) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +\N +\N +\N +0 +\N +2 +str_0 +\N +str_2 +lc_str_0 +\N +lc_str_2 +(0,1) +\N +(2,3) +[0] +\N +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +\N +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(0,0) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +----------------------------------------------------------------------------------------------------------- +test2 select +\N +\N +\N +0 +\N +2 +str_0 +\N +str_2 +lc_str_0 +\N +lc_str_2 +(0,1) +\N +(2,3) +[0] +\N +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +\N +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(0,0) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +----------------------------------------------------------------------------------------------------------- +test3 insert +test3 select +\N +str_1 +2 +lc_str_3 +(4,5) +[0,1,2,3,4,5] +\N +str_7 +8 +lc_str_9 +(10,11) +[0,1,2,3,4,5,6,7,8,9,10,11] +\N +str_13 +14 +lc_str_15 +(16,17) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +str_1 +\N +\N +\N +\N +\N +str_7 +\N +\N +\N +\N +\N +str_13 +\N +\N +\N +\N +\N +\N +2 +\N +\N +\N +\N +\N +8 +\N +\N +\N +\N +\N +14 +\N +\N +\N +\N +\N +\N +lc_str_3 +\N +\N +\N +\N +\N +lc_str_9 +\N +\N +\N +\N +\N +lc_str_15 +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(4,5) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(10,11) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(16,17) +(0,0) +\N +\N +\N +\N +4 +\N +\N +\N +\N +\N +10 +\N +\N +\N +\N +\N +16 +\N +\N +\N +\N +\N +5 +\N +\N +\N +\N +\N +11 +\N +\N +\N +\N +\N +17 +\N +[] +[] +[] +[] +[] +[0,1,2,3,4,5] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +\N +\N +\N +\N +6 +\N +\N +\N +\N +\N +12 +\N +\N +\N +\N +\N +18 +----------------------------------------------------------------------------------------------------------- +test3 select +\N +str_1 +2 +lc_str_3 +(4,5) +[0,1,2,3,4,5] +\N +str_7 +8 +lc_str_9 +(10,11) +[0,1,2,3,4,5,6,7,8,9,10,11] +\N +str_13 +14 +lc_str_15 +(16,17) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +str_1 +\N +\N +\N +\N +\N +str_7 +\N +\N +\N +\N +\N +str_13 +\N +\N +\N +\N +\N +\N +2 +\N +\N +\N +\N +\N +8 +\N +\N +\N +\N +\N +14 +\N +\N +\N +\N +\N +\N +lc_str_3 +\N +\N +\N +\N +\N +lc_str_9 +\N +\N +\N +\N +\N +lc_str_15 +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(4,5) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(10,11) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(16,17) +(0,0) +\N +\N +\N +\N +4 +\N +\N +\N +\N +\N +10 +\N +\N +\N +\N +\N +16 +\N +\N +\N +\N +\N +5 +\N +\N +\N +\N +\N +11 +\N +\N +\N +\N +\N +17 +\N +[] +[] +[] +[] +[] +[0,1,2,3,4,5] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +\N +\N +\N +\N +6 +\N +\N +\N +\N +\N +12 +\N +\N +\N +\N +\N +18 +----------------------------------------------------------------------------------------------------------- +MergeTree wide +test1 insert +test1 select +\N +\N +\N +0 +1 +2 +str_0 +str_1 +str_2 +lc_str_0 +lc_str_1 +lc_str_2 +(0,1) +(1,2) +(2,3) +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +lc_str_1 +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(1,2) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +----------------------------------------------------------------------------------------------------------- +test1 select +\N +\N +\N +0 +1 +2 +str_0 +str_1 +str_2 +lc_str_0 +lc_str_1 +lc_str_2 +(0,1) +(1,2) +(2,3) +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +lc_str_1 +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(1,2) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +\N +\N +\N +0 +\N +2 +str_0 +\N +str_2 +lc_str_0 +\N +lc_str_2 +(0,1) +\N +(2,3) +[0] +\N +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +\N +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(0,0) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +----------------------------------------------------------------------------------------------------------- +test2 select +\N +\N +\N +0 +\N +2 +str_0 +\N +str_2 +lc_str_0 +\N +lc_str_2 +(0,1) +\N +(2,3) +[0] +\N +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +\N +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(0,0) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +----------------------------------------------------------------------------------------------------------- +test3 insert +test3 select +\N +str_1 +2 +lc_str_3 +(4,5) +[0,1,2,3,4,5] +\N +str_7 +8 +lc_str_9 +(10,11) +[0,1,2,3,4,5,6,7,8,9,10,11] +\N +str_13 +14 +lc_str_15 +(16,17) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +str_1 +\N +\N +\N +\N +\N +str_7 +\N +\N +\N +\N +\N +str_13 +\N +\N +\N +\N +\N +\N +2 +\N +\N +\N +\N +\N +8 +\N +\N +\N +\N +\N +14 +\N +\N +\N +\N +\N +\N +lc_str_3 +\N +\N +\N +\N +\N +lc_str_9 +\N +\N +\N +\N +\N +lc_str_15 +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(4,5) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(10,11) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(16,17) +(0,0) +\N +\N +\N +\N +4 +\N +\N +\N +\N +\N +10 +\N +\N +\N +\N +\N +16 +\N +\N +\N +\N +\N +5 +\N +\N +\N +\N +\N +11 +\N +\N +\N +\N +\N +17 +\N +[] +[] +[] +[] +[] +[0,1,2,3,4,5] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +\N +\N +\N +\N +6 +\N +\N +\N +\N +\N +12 +\N +\N +\N +\N +\N +18 +----------------------------------------------------------------------------------------------------------- +test3 select +\N +str_1 +2 +lc_str_3 +(4,5) +[0,1,2,3,4,5] +\N +str_7 +8 +lc_str_9 +(10,11) +[0,1,2,3,4,5,6,7,8,9,10,11] +\N +str_13 +14 +lc_str_15 +(16,17) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +str_1 +\N +\N +\N +\N +\N +str_7 +\N +\N +\N +\N +\N +str_13 +\N +\N +\N +\N +\N +\N +2 +\N +\N +\N +\N +\N +8 +\N +\N +\N +\N +\N +14 +\N +\N +\N +\N +\N +\N +lc_str_3 +\N +\N +\N +\N +\N +lc_str_9 +\N +\N +\N +\N +\N +lc_str_15 +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(4,5) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(10,11) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(16,17) +(0,0) +\N +\N +\N +\N +4 +\N +\N +\N +\N +\N +10 +\N +\N +\N +\N +\N +16 +\N +\N +\N +\N +\N +5 +\N +\N +\N +\N +\N +11 +\N +\N +\N +\N +\N +17 +\N +[] +[] +[] +[] +[] +[0,1,2,3,4,5] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +\N +\N +\N +\N +6 +\N +\N +\N +\N +\N +12 +\N +\N +\N +\N +\N +18 +----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_1.sh b/tests/queries/0_stateless/02941_variant_type_1.sh new file mode 100755 index 000000000000..ed365bbd2447 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_1.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" + +function test1_insert() +{ + echo "test1 insert" + $CH_CLIENT -nmq "insert into test select number, NULL from numbers(3); +insert into test select number + 3, number from numbers(3); +insert into test select number + 6, 'str_' || toString(number) from numbers(3); +insert into test select number + 9, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(3); +insert into test select number + 12, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(3); +insert into test select number + 15, range(number + 1)::Array(UInt64) from numbers(3);" +} + +function test1_select() +{ + echo "test1 select" + $CH_CLIENT -nmq "select v from test order by id; +select v.String from test order by id; +select v.UInt64 from test order by id; +select v.\`LowCardinality(String)\` from test order by id; +select v.\`Tuple(a UInt32, b UInt32)\` from test order by id; +select v.\`Tuple(a UInt32, b UInt32)\`.a from test order by id; +select v.\`Tuple(a UInt32, b UInt32)\`.b from test order by id; +select v.\`Array(UInt64)\` from test order by id; +select v.\`Array(UInt64)\`.size0 from test order by id;" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function test2_insert() +{ + echo "test2 insert" + $CH_CLIENT -nmq "insert into test select number, NULL from numbers(3); +insert into test select number + 3, number % 2 ? NULL : number from numbers(3); +insert into test select number + 6, number % 2 ? NULL : 'str_' || toString(number) from numbers(3); +insert into test select number + 9, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(('lc_str_' || toString(number))::LowCardinality(String), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3); +insert into test select number + 12, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3); +insert into test select number + 15, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(range(number + 1)::Array(UInt64), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3);" +} + +function test2_select() +{ + echo "test2 select" + $CH_CLIENT -nmq "select v from test order by id; +select v.String from test order by id; +select v.UInt64 from test order by id; +select v.\`LowCardinality(String)\` from test order by id; +select v.\`Tuple(a UInt32, b UInt32)\` from test order by id; +select v.\`Tuple(a UInt32, b UInt32)\`.a from test order by id; +select v.\`Tuple(a UInt32, b UInt32)\`.b from test order by id; +select v.\`Array(UInt64)\` from test order by id; +select v.\`Array(UInt64)\`.size0 from test order by id;" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function test3_insert() +{ + echo "test3 insert" + $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST('str_' || toString(number), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number + 1)::Array(UInt64), type)) as res from numbers(18);" +} + +function test3_select() +{ + echo "test3 select" + $CH_CLIENT -nmq "select v from test order by id; +select v.String from test order by id; +select v.UInt64 from test order by id; +select v.\`LowCardinality(String)\` from test order by id; +select v.\`Tuple(a UInt32, b UInt32)\` from test order by id; +select v.\`Tuple(a UInt32, b UInt32)\`.a from test order by id; +select v.\`Tuple(a UInt32, b UInt32)\`.b from test order by id; +select v.\`Array(UInt64)\` from test order by id; +select v.\`Array(UInt64)\`.size0 from test order by id;" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function run() +{ + test1_insert + test1_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test1_select + fi + $CH_CLIENT -q "truncate table test;" + test2_insert + test2_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test2_select + fi + $CH_CLIENT -q "truncate table test;" + test3_insert + test3_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test3_select + fi + $CH_CLIENT -q "truncate table test;" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=Memory;" +run 0 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run 1 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run 1 +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_2.reference b/tests/queries/0_stateless/02941_variant_type_2.reference new file mode 100644 index 000000000000..4b6d53c52ac7 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_2.reference @@ -0,0 +1,51 @@ +Memory +test4 insert +test4 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +MergeTree compact +test4 insert +test4 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +test4 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +MergeTree wide +test4 insert +test4 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +test4 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh new file mode 100755 index 000000000000..23666a9b4a86 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" + +function test4_insert() +{ + echo "test4 insert" + $CH_CLIENT -nmq "insert into test select number, NULL from numbers(200000); +insert into test select number + 200000, number from numbers(200000); +insert into test select number + 400000, 'str_' || toString(number) from numbers(200000); +insert into test select number + 600000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(200000); +insert into test select number + 800000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(200000); +insert into test select number + 1000000, range(number % 20 + 1)::Array(UInt64) from numbers(200000);" +} + +function test4_select +{ + echo "test4 select" + $CH_CLIENT -nmq "select v from test format Null; +select count() from test where isNotNull(v); +select v.String from test format Null; +select count() from test where isNotNull(v.String); +select v.UInt64 from test format Null; +select count() from test where isNotNull(v.UInt64); +select v.\`LowCardinality(String)\` from test format Null; +select count() from test where isNotNull(v.\`LowCardinality(String)\`); +select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; +select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; +select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); +select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; +select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); +select v.\`Array(UInt64)\` from test format Null; +select count() from test where not empty(v.\`Array(UInt64)\`); +select v.\`Array(UInt64)\`.size0 from test format Null; +select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +} + +function run() +{ + test4_insert + test4_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test4_select + fi + $CH_CLIENT -q "truncate table test;" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=Memory;" +run 0 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run 1 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run 1 +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_3.reference b/tests/queries/0_stateless/02941_variant_type_3.reference new file mode 100644 index 000000000000..1ccdb3acdff9 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_3.reference @@ -0,0 +1,51 @@ +Memory +test5 insert +test5 select +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 +MergeTree compact +test5 insert +test5 select +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 +test5 select +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 +MergeTree wide +test5 insert +test5 select +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 +test5 select +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh new file mode 100755 index 000000000000..d6309e264140 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" + +function test5_insert() +{ + echo "test5 insert" + $CH_CLIENT -nmq " +insert into test select number, NULL from numbers(200000); +insert into test select number + 200000, number % 2 ? NULL : number from numbers(200000); +insert into test select number + 400000, number % 2 ? NULL : 'str_' || toString(number) from numbers(200000); +insert into test select number + 600000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(('lc_str_' || toString(number))::LowCardinality(String), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000); +insert into test select number + 800000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000); +insert into test select number + 1000000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(range(number % 20 + 1)::Array(UInt64), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000);" +} + +function test5_select() +{ + echo "test5 select" + $CH_CLIENT -nmq " +select v from test format Null; +select count() from test where isNotNull(v); +select v.String from test format Null; +select count() from test where isNotNull(v.String); +select v.UInt64 from test format Null; +select count() from test where isNotNull(v.UInt64); +select v.\`LowCardinality(String)\` from test format Null; +select count() from test where isNotNull(v.\`LowCardinality(String)\`); +select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; +select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; +select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); +select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; +select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); +select v.\`Array(UInt64)\` from test format Null; +select count() from test where not empty(v.\`Array(UInt64)\`); +select v.\`Array(UInt64)\`.size0 from test format Null; +select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +} + +function run() +{ + test5_insert + test5_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test5_select + fi + $CH_CLIENT -q "truncate table test;" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=Memory;" +run 0 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run 1 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run 1 +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_4.reference b/tests/queries/0_stateless/02941_variant_type_4.reference new file mode 100644 index 000000000000..e13d58203433 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_4.reference @@ -0,0 +1,56 @@ +Memory +test6 insert +test6 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +----------------------------------------------------------------------------------------------------------- +MergeTree compact +test6 insert +test6 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +----------------------------------------------------------------------------------------------------------- +test6 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +----------------------------------------------------------------------------------------------------------- +MergeTree wide +test6 insert +test6 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +----------------------------------------------------------------------------------------------------------- +test6 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh new file mode 100755 index 000000000000..5ea04db4bb45 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" + +function test6_insert() +{ + echo "test6 insert" + $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST('str_' || toString(number), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number % 20 + 1)::Array(UInt64), type)) as res from numbers(1200000);" +} + +function test6_select() +{ + echo "test6 select" + $CH_CLIENT -nmq "select v from test format Null; + select count() from test where isNotNull(v); + select v.String from test format Null; + select count() from test where isNotNull(v.String); + select v.UInt64 from test format Null; + select count() from test where isNotNull(v.UInt64); + select v.\`LowCardinality(String)\` from test format Null; + select count() from test where isNotNull(v.\`LowCardinality(String)\`); + select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; + select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; + select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); + select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; + select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); + select v.\`Array(UInt64)\` from test format Null; + select count() from test where not empty(v.\`Array(UInt64)\`); + select v.\`Array(UInt64)\`.size0 from test format Null; + select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function run() +{ + test6_insert + test6_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test6_select + fi + $CH_CLIENT -q "truncate table test;" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=Memory;" +run 0 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run 1 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run 1 +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_alters.reference b/tests/queries/0_stateless/02941_variant_type_alters.reference new file mode 100644 index 000000000000..52c834e455bf --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_alters.reference @@ -0,0 +1,330 @@ +Memory +initial insert +alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +insert after alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +3 3 3 \N 3 +4 4 4 \N 4 +5 5 5 \N 5 +6 6 str_6 str_6 \N +7 7 str_7 str_7 \N +8 8 str_8 str_8 \N +9 9 \N \N \N +10 10 \N \N \N +11 11 \N \N \N +12 12 12 \N 12 +13 13 str_13 str_13 \N +14 14 \N \N \N +alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +insert after alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +15 15 1970-01-16 \N \N 1970-01-16 +16 16 1970-01-17 \N \N 1970-01-17 +17 17 1970-01-18 \N \N 1970-01-18 +18 18 1970-01-19 \N \N 1970-01-19 +19 19 \N \N \N \N +20 20 20 \N 20 \N +21 21 str_21 str_21 \N \N +alter modify column 2 +0 0 \N \N \N \N \N \N +1 1 \N \N \N \N \N \N +2 2 \N \N \N \N \N \N +3 3 \N \N 3 \N 3 \N +4 4 \N \N 4 \N 4 \N +5 5 \N \N 5 \N 5 \N +6 6 \N \N str_6 str_6 \N \N +7 7 \N \N str_7 str_7 \N \N +8 8 \N \N str_8 str_8 \N \N +9 9 \N \N \N \N \N \N +10 10 \N \N \N \N \N \N +11 11 \N \N \N \N \N \N +12 12 \N \N 12 \N 12 \N +13 13 \N \N str_13 str_13 \N \N +14 14 \N \N \N \N \N \N +15 15 \N \N 1970-01-16 \N \N 1970-01-16 +16 16 \N \N 1970-01-17 \N \N 1970-01-17 +17 17 \N \N 1970-01-18 \N \N 1970-01-18 +18 18 \N \N 1970-01-19 \N \N 1970-01-19 +19 19 \N \N \N \N \N \N +20 20 \N \N 20 \N 20 \N +21 21 \N \N str_21 str_21 \N \N +insert after alter modify column 2 +0 0 \N \N \N \N \N \N +1 1 \N \N \N \N \N \N +2 2 \N \N \N \N \N \N +3 3 \N \N 3 \N 3 \N +4 4 \N \N 4 \N 4 \N +5 5 \N \N 5 \N 5 \N +6 6 \N \N str_6 str_6 \N \N +7 7 \N \N str_7 str_7 \N \N +8 8 \N \N str_8 str_8 \N \N +9 9 \N \N \N \N \N \N +10 10 \N \N \N \N \N \N +11 11 \N \N \N \N \N \N +12 12 \N \N 12 \N 12 \N +13 13 \N \N str_13 str_13 \N \N +14 14 \N \N \N \N \N \N +15 15 \N \N 1970-01-16 \N \N 1970-01-16 +16 16 \N \N 1970-01-17 \N \N 1970-01-17 +17 17 \N \N 1970-01-18 \N \N 1970-01-18 +18 18 \N \N 1970-01-19 \N \N 1970-01-19 +19 19 \N \N \N \N \N \N +20 20 \N \N 20 \N 20 \N +21 21 \N \N str_21 str_21 \N \N +22 str_22 \N str_22 \N \N \N \N +23 \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N +MergeTree compact +initial insert +alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +insert after alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +3 3 3 \N 3 +4 4 4 \N 4 +5 5 5 \N 5 +6 6 str_6 str_6 \N +7 7 str_7 str_7 \N +8 8 str_8 str_8 \N +9 9 \N \N \N +10 10 \N \N \N +11 11 \N \N \N +12 12 12 \N 12 +13 13 str_13 str_13 \N +14 14 \N \N \N +alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +insert after alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +15 15 1970-01-16 \N \N 1970-01-16 +16 16 1970-01-17 \N \N 1970-01-17 +17 17 1970-01-18 \N \N 1970-01-18 +18 18 1970-01-19 \N \N 1970-01-19 +19 19 \N \N \N \N +20 20 20 \N 20 \N +21 21 str_21 str_21 \N \N +alter modify column 2 +0 0 0 \N \N \N \N \N +1 1 1 \N \N \N \N \N +2 2 2 \N \N \N \N \N +3 3 3 \N 3 \N 3 \N +4 4 4 \N 4 \N 4 \N +5 5 5 \N 5 \N 5 \N +6 6 6 \N str_6 str_6 \N \N +7 7 7 \N str_7 str_7 \N \N +8 8 8 \N str_8 str_8 \N \N +9 9 9 \N \N \N \N \N +10 10 10 \N \N \N \N \N +11 11 11 \N \N \N \N \N +12 12 12 \N 12 \N 12 \N +13 13 13 \N str_13 str_13 \N \N +14 14 14 \N \N \N \N \N +15 15 15 \N 1970-01-16 \N \N 1970-01-16 +16 16 16 \N 1970-01-17 \N \N 1970-01-17 +17 17 17 \N 1970-01-18 \N \N 1970-01-18 +18 18 18 \N 1970-01-19 \N \N 1970-01-19 +19 19 19 \N \N \N \N \N +20 20 20 \N 20 \N 20 \N +21 21 21 \N str_21 str_21 \N \N +insert after alter modify column 2 +0 0 0 \N \N \N \N \N +1 1 1 \N \N \N \N \N +2 2 2 \N \N \N \N \N +3 3 3 \N 3 \N 3 \N +4 4 4 \N 4 \N 4 \N +5 5 5 \N 5 \N 5 \N +6 6 6 \N str_6 str_6 \N \N +7 7 7 \N str_7 str_7 \N \N +8 8 8 \N str_8 str_8 \N \N +9 9 9 \N \N \N \N \N +10 10 10 \N \N \N \N \N +11 11 11 \N \N \N \N \N +12 12 12 \N 12 \N 12 \N +13 13 13 \N str_13 str_13 \N \N +14 14 14 \N \N \N \N \N +15 15 15 \N 1970-01-16 \N \N 1970-01-16 +16 16 16 \N 1970-01-17 \N \N 1970-01-17 +17 17 17 \N 1970-01-18 \N \N 1970-01-18 +18 18 18 \N 1970-01-19 \N \N 1970-01-19 +19 19 19 \N \N \N \N \N +20 20 20 \N 20 \N 20 \N +21 21 21 \N str_21 str_21 \N \N +22 str_22 \N str_22 \N \N \N \N +23 \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N +MergeTree wide +initial insert +alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +insert after alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +3 3 3 \N 3 +4 4 4 \N 4 +5 5 5 \N 5 +6 6 str_6 str_6 \N +7 7 str_7 str_7 \N +8 8 str_8 str_8 \N +9 9 \N \N \N +10 10 \N \N \N +11 11 \N \N \N +12 12 12 \N 12 +13 13 str_13 str_13 \N +14 14 \N \N \N +alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +insert after alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +15 15 1970-01-16 \N \N 1970-01-16 +16 16 1970-01-17 \N \N 1970-01-17 +17 17 1970-01-18 \N \N 1970-01-18 +18 18 1970-01-19 \N \N 1970-01-19 +19 19 \N \N \N \N +20 20 20 \N 20 \N +21 21 str_21 str_21 \N \N +alter modify column 2 +0 0 0 \N \N \N \N \N +1 1 1 \N \N \N \N \N +2 2 2 \N \N \N \N \N +3 3 3 \N 3 \N 3 \N +4 4 4 \N 4 \N 4 \N +5 5 5 \N 5 \N 5 \N +6 6 6 \N str_6 str_6 \N \N +7 7 7 \N str_7 str_7 \N \N +8 8 8 \N str_8 str_8 \N \N +9 9 9 \N \N \N \N \N +10 10 10 \N \N \N \N \N +11 11 11 \N \N \N \N \N +12 12 12 \N 12 \N 12 \N +13 13 13 \N str_13 str_13 \N \N +14 14 14 \N \N \N \N \N +15 15 15 \N 1970-01-16 \N \N 1970-01-16 +16 16 16 \N 1970-01-17 \N \N 1970-01-17 +17 17 17 \N 1970-01-18 \N \N 1970-01-18 +18 18 18 \N 1970-01-19 \N \N 1970-01-19 +19 19 19 \N \N \N \N \N +20 20 20 \N 20 \N 20 \N +21 21 21 \N str_21 str_21 \N \N +insert after alter modify column 2 +0 0 0 \N \N \N \N \N +1 1 1 \N \N \N \N \N +2 2 2 \N \N \N \N \N +3 3 3 \N 3 \N 3 \N +4 4 4 \N 4 \N 4 \N +5 5 5 \N 5 \N 5 \N +6 6 6 \N str_6 str_6 \N \N +7 7 7 \N str_7 str_7 \N \N +8 8 8 \N str_8 str_8 \N \N +9 9 9 \N \N \N \N \N +10 10 10 \N \N \N \N \N +11 11 11 \N \N \N \N \N +12 12 12 \N 12 \N 12 \N +13 13 13 \N str_13 str_13 \N \N +14 14 14 \N \N \N \N \N +15 15 15 \N 1970-01-16 \N \N 1970-01-16 +16 16 16 \N 1970-01-17 \N \N 1970-01-17 +17 17 17 \N 1970-01-18 \N \N 1970-01-18 +18 18 18 \N 1970-01-19 \N \N 1970-01-19 +19 19 19 \N \N \N \N \N +20 20 20 \N 20 \N 20 \N +21 21 21 \N str_21 str_21 \N \N +22 str_22 \N str_22 \N \N \N \N +23 \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N diff --git a/tests/queries/0_stateless/02941_variant_type_alters.sh b/tests/queries/0_stateless/02941_variant_type_alters.sh new file mode 100755 index 000000000000..7c151d1fe9e1 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_alters.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 " + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column 1" + $CH_CLIENT -q "alter table test add column v Variant(UInt64, String) settings mutations_sync=1" + $CH_CLIENT -q "select x, y, v, v.String, v.UInt64 from test order by x" + + echo "insert after alter add column 1" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select x, y, v, v.String, v.UInt64 from test order by x" + + echo "alter modify column 1" + $CH_CLIENT -q "alter table test modify column v Variant(UInt64, String, Date) settings mutations_sync=1" + $CH_CLIENT -q "select x, y, v, v.String, v.UInt64, v.Date from test order by x" + + echo "insert after alter modify column 1" + $CH_CLIENT -q "insert into test select number, number, toDate(number) from numbers(15, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(18, 4)" + $CH_CLIENT -q "select x, y, v, v.String, v.UInt64, v.Date from test order by x" + + echo "alter modify column 2" + $CH_CLIENT -q "alter table test modify column y Variant(UInt64, String) settings mutations_sync=1" + $CH_CLIENT -q "select x, y, y.UInt64, y.String, v, v.String, v.UInt64, v.Date from test order by x" + + echo "insert after alter modify column 2" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(22, 3)" + $CH_CLIENT -q "select x, y, y.UInt64, y.String, v, v.String, v.UInt64, v.Date from test order by x" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=Memory" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02942_variant_cast.reference b/tests/queries/0_stateless/02942_variant_cast.reference new file mode 100644 index 000000000000..f3fd7a9ba33b --- /dev/null +++ b/tests/queries/0_stateless/02942_variant_cast.reference @@ -0,0 +1,25 @@ +\N +42 +0 +\N +2 +\N +Hello +Hello +NULL +Hello +Hello +\N +Hello +\N +0 +\N +42 +\N +Hello +2 +\N +Hello +5 +0 +1 diff --git a/tests/queries/0_stateless/02942_variant_cast.sql b/tests/queries/0_stateless/02942_variant_cast.sql new file mode 100644 index 000000000000..fc2d1d636576 --- /dev/null +++ b/tests/queries/0_stateless/02942_variant_cast.sql @@ -0,0 +1,24 @@ +set allow_experimental_variant_type=1; +set allow_experimental_analyzer=0; -- It's currently doesn't work with analyzer because of the way it works with constants, but it will be refactored and fixed in future + +select NULL::Variant(String, UInt64); +select 42::UInt64::Variant(String, UInt64); +select 42::UInt32::Variant(String, UInt64); -- {serverError CANNOT_CONVERT_TYPE} +select now()::Variant(String, UInt64); -- {serverError CANNOT_CONVERT_TYPE} +select CAST(number % 2 ? NULL : number, 'Variant(String, UInt64)') from numbers(4); +select 'Hello'::LowCardinality(String)::Variant(LowCardinality(String), UInt64); +select 'Hello'::LowCardinality(Nullable(String))::Variant(LowCardinality(String), UInt64); +select 'NULL'::LowCardinality(Nullable(String))::Variant(LowCardinality(String), UInt64); +select 'Hello'::LowCardinality(Nullable(String))::Variant(LowCardinality(String), UInt64); +select CAST(CAST(number % 2 ? NULL : 'Hello', 'LowCardinality(Nullable(String))'), 'Variant(LowCardinality(String), UInt64)') from numbers(4); + +select NULL::Variant(String, UInt64)::UInt64; +select NULL::Variant(String, UInt64)::Nullable(UInt64); +select '42'::Variant(String, UInt64)::UInt64; +select 'str'::Variant(String, UInt64)::UInt64; -- {serverError CANNOT_PARSE_TEXT} +select CAST(multiIf(number % 3 == 0, NULL::Variant(String, UInt64), number % 3 == 1, 'Hello'::Variant(String, UInt64), number::Variant(String, UInt64)), 'Nullable(String)') from numbers(6); +select CAST(multiIf(number == 1, NULL::Variant(String, UInt64), number == 2, 'Hello'::Variant(String, UInt64), number::Variant(String, UInt64)), 'UInt64') from numbers(6); -- {serverError CANNOT_PARSE_TEXT} + + +select number::Variant(UInt64)::Variant(String, UInt64)::Variant(Array(String), String, UInt64) from numbers(2); +select 'str'::Variant(String, UInt64)::Variant(String, Array(UInt64)); -- {serverError CANNOT_CONVERT_TYPE} diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference index 48d828b69246..6eed33cc68dc 100644 --- a/tests/queries/0_stateless/02943_order_by_all.reference +++ b/tests/queries/0_stateless/02943_order_by_all.reference @@ -82,3 +82,12 @@ B 3 10 D 1 20 A 2 30 C \N 40 +-- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause +A 2 30 +B 3 10 +C \N 40 +D 1 20 +A 2 30 +B 3 10 +C \N 40 +D 1 20 diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql index 0756563946c3..0960d75ad965 100644 --- a/tests/queries/0_stateless/02943_order_by_all.sql +++ b/tests/queries/0_stateless/02943_order_by_all.sql @@ -87,3 +87,23 @@ SET allow_experimental_analyzer = 1; SELECT a, b, all FROM order_by_all ORDER BY all, a; DROP TABLE order_by_all; + +SELECT '-- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause'; + +CREATE TABLE order_by_all +( + a String, + b Nullable(Int32), + c UInt64, +) + ENGINE = Memory; + +INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30); + +SET allow_experimental_analyzer = 0; +SELECT * FROM order_by_all ORDER BY ALL; + +SET allow_experimental_analyzer = 1; +SELECT * FROM order_by_all ORDER BY ALL; + +DROP TABLE order_by_all; diff --git a/tests/queries/0_stateless/02943_variant_element.reference b/tests/queries/0_stateless/02943_variant_element.reference new file mode 100644 index 000000000000..ab8aaa8fdefd --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_element.reference @@ -0,0 +1,44 @@ +\N +\N +\N +\N +0 +1 +2 +3 +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +[] +[[0]] +[[NULL]] +[[2]] +[[NULL]] diff --git a/tests/queries/0_stateless/02943_variant_element.sql b/tests/queries/0_stateless/02943_variant_element.sql new file mode 100644 index 000000000000..556c0147e565 --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_element.sql @@ -0,0 +1,16 @@ +set allow_experimental_variant_type=1; +set use_variant_as_common_type=1; + +select variantElement(NULL::Variant(String, UInt64), 'UInt64') from numbers(4); +select variantElement(number::Variant(String, UInt64), 'UInt64') from numbers(4); +select variantElement(number::Variant(String, UInt64), 'String') from numbers(4); +select variantElement((number % 2 ? NULL : number)::Variant(String, UInt64), 'UInt64') from numbers(4); +select variantElement((number % 2 ? NULL : number)::Variant(String, UInt64), 'String') from numbers(4); +select variantElement((number % 2 ? NULL : 'str_' || toString(number))::LowCardinality(Nullable(String))::Variant(LowCardinality(String), UInt64), 'LowCardinality(String)') from numbers(4); +select variantElement(NULL::LowCardinality(Nullable(String))::Variant(LowCardinality(String), UInt64), 'LowCardinality(String)') from numbers(4); +select variantElement((number % 2 ? NULL : number)::Variant(Array(UInt64), UInt64), 'Array(UInt64)') from numbers(4); +select variantElement(NULL::Variant(Array(UInt64), UInt64), 'Array(UInt64)') from numbers(4); +select variantElement(number % 2 ? NULL : range(number + 1), 'Array(UInt64)') from numbers(4); + +select variantElement([[(number % 2 ? NULL : number)::Variant(String, UInt64)]], 'UInt64') from numbers(4); + diff --git a/tests/queries/0_stateless/02943_variant_read_subcolumns.reference b/tests/queries/0_stateless/02943_variant_read_subcolumns.reference new file mode 100644 index 000000000000..4b93782cddf5 --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_read_subcolumns.reference @@ -0,0 +1,6 @@ +Memory +test +MergeTree compact +test +MergeTree wide +test diff --git a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh new file mode 100755 index 000000000000..88be09c20364 --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 " + + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(1000000) settings min_insert_block_size_rows=100000" + $CH_CLIENT -q "select v, v.UInt64, v.\`Array(Variant(String, UInt64))\`, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64 from test order by id format Null" + $CH_CLIENT -q "select v.UInt64, v.\`Array(Variant(String, UInt64))\`, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64 from test order by id format Null" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64, v.\`Array(Variant(String, UInt64))\`.String from test order by id format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference new file mode 100644 index 000000000000..1736a307c429 --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference @@ -0,0 +1,244 @@ +Memory +test1 insert +test1 select +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +10 \N 10 +\N \N \N +12 \N 12 +\N \N \N +14 \N 14 +\N \N \N +16 \N 16 +\N \N \N +18 \N 18 +\N \N \N +str_20 str_20 \N +\N \N \N +str_22 str_22 \N +\N \N \N +str_24 str_24 \N +\N \N \N +str_26 str_26 \N +\N \N \N +str_28 str_28 \N +\N \N \N +30 \N 30 +\N \N \N +32 \N 32 +\N \N \N +34 \N 34 +\N \N \N +str_36 str_36 \N +\N \N \N +str_38 str_38 \N +\N \N \N +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +2500000 +750000 +1750000 +----------------------------------------------------------------------------------------------------------- +MergeTree compact +test1 insert +test1 select +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +10 \N 10 +\N \N \N +12 \N 12 +\N \N \N +14 \N 14 +\N \N \N +16 \N 16 +\N \N \N +18 \N 18 +\N \N \N +str_20 str_20 \N +\N \N \N +str_22 str_22 \N +\N \N \N +str_24 str_24 \N +\N \N \N +str_26 str_26 \N +\N \N \N +str_28 str_28 \N +\N \N \N +30 \N 30 +\N \N \N +32 \N 32 +\N \N \N +34 \N 34 +\N \N \N +str_36 str_36 \N +\N \N \N +str_38 str_38 \N +\N \N \N +----------------------------------------------------------------------------------------------------------- +test1 select +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +10 \N 10 +\N \N \N +12 \N 12 +\N \N \N +14 \N 14 +\N \N \N +16 \N 16 +\N \N \N +18 \N 18 +\N \N \N +str_20 str_20 \N +\N \N \N +str_22 str_22 \N +\N \N \N +str_24 str_24 \N +\N \N \N +str_26 str_26 \N +\N \N \N +str_28 str_28 \N +\N \N \N +30 \N 30 +\N \N \N +32 \N 32 +\N \N \N +34 \N 34 +\N \N \N +str_36 str_36 \N +\N \N \N +str_38 str_38 \N +\N \N \N +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +2500000 +750000 +1750000 +----------------------------------------------------------------------------------------------------------- +test2 select +2500000 +750000 +1750000 +----------------------------------------------------------------------------------------------------------- +MergeTree wide +test1 insert +test1 select +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +10 \N 10 +\N \N \N +12 \N 12 +\N \N \N +14 \N 14 +\N \N \N +16 \N 16 +\N \N \N +18 \N 18 +\N \N \N +str_20 str_20 \N +\N \N \N +str_22 str_22 \N +\N \N \N +str_24 str_24 \N +\N \N \N +str_26 str_26 \N +\N \N \N +str_28 str_28 \N +\N \N \N +30 \N 30 +\N \N \N +32 \N 32 +\N \N \N +34 \N 34 +\N \N \N +str_36 str_36 \N +\N \N \N +str_38 str_38 \N +\N \N \N +----------------------------------------------------------------------------------------------------------- +test1 select +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +10 \N 10 +\N \N \N +12 \N 12 +\N \N \N +14 \N 14 +\N \N \N +16 \N 16 +\N \N \N +18 \N 18 +\N \N \N +str_20 str_20 \N +\N \N \N +str_22 str_22 \N +\N \N \N +str_24 str_24 \N +\N \N \N +str_26 str_26 \N +\N \N \N +str_28 str_28 \N +\N \N \N +30 \N 30 +\N \N \N +32 \N 32 +\N \N \N +34 \N 34 +\N \N \N +str_36 str_36 \N +\N \N \N +str_38 str_38 \N +\N \N \N +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +2500000 +750000 +1750000 +----------------------------------------------------------------------------------------------------------- +test2 select +2500000 +750000 +1750000 +----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh new file mode 100755 index 000000000000..d089ed3cb2fd --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 " + + +function test1_insert() +{ + echo "test1 insert" + $CH_CLIENT -q "insert into test select number, number::Variant(UInt64)::Variant(UInt64, Array(UInt64)) from numbers(10) settings max_block_size=3" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)) as res from numbers(10, 10) settings max_block_size=3" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64)) as res from numbers(20, 10) settings max_block_size=3" + $CH_CLIENT -q "insert into test select number, if(number < 35, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)), if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64))) from numbers(30, 10) settings max_block_size=3" +} + +function test1_select() +{ + echo "test1 select" + $CH_CLIENT -q "select v, v.String, v.UInt64 from test order by id;" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function test2_insert() +{ + echo "test2 insert" + $CH_CLIENT -q "insert into test select number, number::Variant(UInt64)::Variant(UInt64, Array(UInt64)) from numbers(1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)) as res from numbers(1000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64)) as res from numbers(2000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" + $CH_CLIENT -q "insert into test select number, if(number < 3500000, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)), if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64))) from numbers(3000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" +} + +function test2_select() +{ + echo "test2 select" + $CH_CLIENT -q "select v, v.String, v.UInt64 from test format Null;" + $CH_CLIENT -q "select v from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v);" + $CH_CLIENT -q "select v.String from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.String);" + $CH_CLIENT -q "select v.UInt64 from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.UInt64);" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function run() +{ + test1_insert + test1_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test1_select + fi + $CH_CLIENT -q "truncate table test;" + test2_insert + test2_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test2_select + fi + $CH_CLIENT -q "truncate table test;" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, String, Array(UInt64))) engine=Memory;" +run 0 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, String, Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run 1 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, String, Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run 1 +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02944_variant_as_common_type.reference b/tests/queries/0_stateless/02944_variant_as_common_type.reference new file mode 100644 index 000000000000..0425a8cfa30e --- /dev/null +++ b/tests/queries/0_stateless/02944_variant_as_common_type.reference @@ -0,0 +1,103 @@ +Array(UInt8) [1,2,3] +Array(UInt8) [1,2,3] +String str_1 +Nullable(String) str_1 +String str_1 +Nullable(String) str_1 +Variant(Array(UInt8), String) str_1 +Variant(Array(UInt8), String) str_1 +Array(UInt8) [1,2,3] +Array(UInt8) [1,2,3] +String str_1 +Nullable(String) str_1 +String str_1 +Nullable(String) str_1 +Variant(Array(UInt8), String) str_1 +Variant(Array(UInt8), String) str_1 +Array(UInt8) [1,2,3] +Array(UInt8) [1,2,3] +String str_1 +Nullable(String) str_1 +String str_1 +Nullable(String) str_1 +Variant(Array(UInt8), String) str_1 +Variant(Array(UInt8), String) str_1 +String str_0 +String str_1 +String str_2 +String str_3 +Nullable(String) str_0 +Nullable(String) str_1 +Nullable(String) str_2 +Nullable(String) str_3 +Array(UInt64) [0] +Array(UInt64) [0,1] +Array(UInt64) [0,1,2] +Array(UInt64) [0,1,2,3] +Array(UInt64) [0] +Array(UInt64) [0,1] +Array(UInt64) [0,1,2] +Array(UInt64) [0,1,2,3] +String str_0 +String str_1 +String str_2 +String str_3 +Nullable(String) str_0 +Nullable(String) str_1 +Nullable(String) str_2 +Nullable(String) str_3 +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) str_1 +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) str_3 +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) str_1 +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) str_3 +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) [0,1] +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) [0,1,2,3] +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) [0,1] +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) [0,1,2,3] +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) [0,1] +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) [0,1,2,3] +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) [0,1] +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) [0,1,2,3] +Variant(Array(UInt64), String, UInt64) [0] +Variant(Array(UInt64), String, UInt64) 1 +Variant(Array(UInt64), String, UInt64) str_2 +Variant(Array(UInt64), String, UInt64) [0,1,2,3] +Variant(Array(UInt64), String, UInt64) 4 +Variant(Array(UInt64), String, UInt64) str_5 +Variant(Array(UInt64), String, UInt64) [0] +Variant(Array(UInt64), String, UInt64) 1 +Variant(Array(UInt64), String, UInt64) str_2 +Variant(Array(UInt64), String, UInt64) [0,1,2,3] +Variant(Array(UInt64), String, UInt64) 4 +Variant(Array(UInt64), String, UInt64) str_5 +Variant(Array(UInt64), String, UInt64) [0] +Variant(Array(UInt64), String, UInt64) 1 +Variant(Array(UInt64), String, UInt64) str_2 +Variant(Array(UInt64), String, UInt64) [0,1,2,3] +Variant(Array(UInt64), String, UInt64) 4 +Variant(Array(UInt64), String, UInt64) str_5 +Variant(Array(UInt64), String, UInt64) [0] +Variant(Array(UInt64), String, UInt64) 1 +Variant(Array(UInt64), String, UInt64) str_2 +Variant(Array(UInt64), String, UInt64) [0,1,2,3] +Variant(Array(UInt64), String, UInt64) 4 +Variant(Array(UInt64), String, UInt64) str_5 +Array(Variant(String, UInt8)) [1,'str_1',2,'str_2'] +Array(Variant(Array(String), Array(UInt8))) [[1,2,3],['str_1','str_2','str_3']] +Array(Variant(Array(UInt8), Array(Variant(Array(String), Array(UInt8))))) [[[1,2,3],['str_1','str_2','str_3']],[1,2,3]] +Array(Variant(Array(Array(UInt8)), Array(UInt8))) [[1,2,3],[[1,2,3]]] +Map(String, Variant(String, UInt8)) {'a':1,'b':'str_1'} +Map(String, Variant(Map(String, Variant(String, UInt8)), UInt8)) {'a':1,'b':{'c':2,'d':'str_1'}} +Map(String, Variant(Array(Array(UInt8)), Array(UInt8), UInt8)) {'a':1,'b':[1,2,3],'c':[[4,5,6]]} diff --git a/tests/queries/0_stateless/02944_variant_as_common_type.sql b/tests/queries/0_stateless/02944_variant_as_common_type.sql new file mode 100644 index 000000000000..e985cf365dd9 --- /dev/null +++ b/tests/queries/0_stateless/02944_variant_as_common_type.sql @@ -0,0 +1,76 @@ +set allow_experimental_analyzer=0; -- The result type for if function with constant is different with analyzer. It wil be fixed after refactoring around constants in analyzer. + +set allow_experimental_variant_type=1; +set use_variant_as_common_type=1; + +select toTypeName(res), if(1, [1,2,3], 'str_1') as res; +select toTypeName(res), if(1, [1,2,3], 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(0, [1,2,3], 'str_1') as res; +select toTypeName(res), if(0, [1,2,3], 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(NULL, [1,2,3], 'str_1') as res; +select toTypeName(res), if(NULL, [1,2,3], 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), [1,2,3], 'str_1') as res; +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), [1,2,3], 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(1, materialize([1,2,3]), 'str_1') as res; +select toTypeName(res), if(1, materialize([1,2,3]), 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(0, materialize([1,2,3]), 'str_1') as res; +select toTypeName(res), if(0, materialize([1,2,3]), 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(NULL, materialize([1,2,3]), 'str_1') as res; +select toTypeName(res), if(NULL, materialize([1,2,3]), 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), materialize([1,2,3]), 'str_1') as res; +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), materialize([1,2,3]), 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(1, [1,2,3], materialize('str_1')) as res; +select toTypeName(res), if(1, [1,2,3], materialize('str_1')::Nullable(String)) as res; + +select toTypeName(res), if(0, [1,2,3], materialize('str_1')) as res; +select toTypeName(res), if(0, [1,2,3], materialize('str_1')::Nullable(String)) as res; + +select toTypeName(res), if(NULL, [1,2,3], materialize('str_1')) as res; +select toTypeName(res), if(NULL, [1,2,3], materialize('str_1')::Nullable(String)) as res; + +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), [1,2,3], materialize('str_1')) as res; +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), [1,2,3], materialize('str_1')::Nullable(String)) as res; + + +select toTypeName(res), if(0, range(number + 1), 'str_' || toString(number)) as res from numbers(4); +select toTypeName(res), if(0, range(number + 1), ('str_' || toString(number))::Nullable(String)) as res from numbers(4); + +select toTypeName(res), if(1, range(number + 1), 'str_' || toString(number)) as res from numbers(4); +select toTypeName(res), if(1, range(number + 1), ('str_' || toString(number))::Nullable(String)) as res from numbers(4); + +select toTypeName(res), if(NULL, range(number + 1), 'str_' || toString(number)) as res from numbers(4); +select toTypeName(res), if(NULL, range(number + 1), ('str_' || toString(number))::Nullable(String)) as res from numbers(4); + +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), range(number + 1), 'str_' || toString(number)) as res from numbers(4); +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), range(number + 1), ('str_' || toString(number))::Nullable(String)) as res from numbers(4); + +select toTypeName(res), if(number % 2, range(number + 1), 'str_' || toString(number)) as res from numbers(4); +select toTypeName(res), if(number % 2, range(number + 1), ('str_' || toString(number))::Nullable(String)) as res from numbers(4); + +select toTypeName(res), if(number % 2, range(number + 1), ('str_' || toString(number))::LowCardinality(String)) as res from numbers(4); +select toTypeName(res), if(number % 2, range(number + 1), ('str_' || toString(number))::LowCardinality(Nullable(String))) as res from numbers(4); + + +select toTypeName(res), multiIf(number % 3 == 0, range(number + 1), number % 3 == 1, number, 'str_' || toString(number)) as res from numbers(6); +select toTypeName(res), multiIf(number % 3 == 0, range(number + 1), number % 3 == 1, number, ('str_' || toString(number))::Nullable(String)) as res from numbers(6); +select toTypeName(res), multiIf(number % 3 == 0, range(number + 1), number % 3 == 1, number, ('str_' || toString(number))::LowCardinality(String)) as res from numbers(6); +select toTypeName(res), multiIf(number % 3 == 0, range(number + 1), number % 3 == 1, number, ('str_' || toString(number))::LowCardinality(Nullable(String))) as res from numbers(6); + + +select toTypeName(res), array(1, 'str_1', 2, 'str_2') as res; +select toTypeName(res), array([1, 2, 3], ['str_1', 'str_2', 'str_3']) as res; +select toTypeName(res), array(array([1, 2, 3], ['str_1', 'str_2', 'str_3']), [1, 2, 3]) as res; +select toTypeName(res), array([1, 2, 3], [[1, 2, 3]]) as res; + +select toTypeName(res), map('a', 1, 'b', 'str_1') as res; +select toTypeName(res), map('a', 1, 'b', map('c', 2, 'd', 'str_1')) as res; +select toTypeName(res), map('a', 1, 'b', [1, 2, 3], 'c', [[4, 5, 6]]) as res; + diff --git a/tests/queries/0_stateless/02967_index_hint_crash.reference b/tests/queries/0_stateless/02967_index_hint_crash.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02967_index_hint_crash.sql b/tests/queries/0_stateless/02967_index_hint_crash.sql new file mode 100644 index 000000000000..e33a4992c6c3 --- /dev/null +++ b/tests/queries/0_stateless/02967_index_hint_crash.sql @@ -0,0 +1,16 @@ +CREATE TABLE tab +( + `foo` Array(LowCardinality(String)), + INDEX idx foo TYPE bloom_filter GRANULARITY 1 +) +ENGINE = MergeTree +PRIMARY KEY tuple(); + +INSERT INTO tab SELECT if(number % 2, ['value'], []) +FROM system.numbers +LIMIT 10000; + +SELECT * +FROM tab +PREWHERE indexHint() +FORMAT Null; diff --git a/tests/queries/0_stateless/02967_mysql_settings_override.reference b/tests/queries/0_stateless/02967_mysql_settings_override.reference new file mode 100644 index 000000000000..96cf7ecc403b --- /dev/null +++ b/tests/queries/0_stateless/02967_mysql_settings_override.reference @@ -0,0 +1,23 @@ +-- Init +s +a +b +c +d +-- Uppercase setting name +s +a +b +name value +send_timeout 22 +name value +receive_timeout 33 +-- Lowercase setting name +s +a +b +c +name value +send_timeout 55 +name value +receive_timeout 66 diff --git a/tests/queries/0_stateless/02967_mysql_settings_override.sh b/tests/queries/0_stateless/02967_mysql_settings_override.sh new file mode 100755 index 000000000000..59a2099190a8 --- /dev/null +++ b/tests/queries/0_stateless/02967_mysql_settings_override.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: requires mysql client + +# Tests that certain MySQL-proprietary settings are mapped to ClickHouse-native settings. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +CHANGED_SETTINGS_QUERY="SELECT name, value FROM system.settings WHERE name IN ('send_timeout', 'receive_timeout') AND changed;" + +TEST_TABLE="mysql_settings_override_test" + +DROP_TABLE="DROP TABLE IF EXISTS $TEST_TABLE;" +CREATE_TABLE="CREATE TABLE $TEST_TABLE (s String) ENGINE MergeTree ORDER BY s;" +INSERT_STMT="INSERT INTO $TEST_TABLE VALUES ('a'), ('b'), ('c'), ('d');" +SELECT_STMT="SELECT * FROM $TEST_TABLE ORDER BY s;" + +echo "-- Init" +${MYSQL_CLIENT} --execute "$DROP_TABLE $CREATE_TABLE $INSERT_STMT $SELECT_STMT" # should fetch all 4 records + +echo "-- Uppercase setting name" +${MYSQL_CLIENT} --execute "SET SQL_SELECT_LIMIT = 2; $SELECT_STMT" # should fetch 2 records out of 4 +${MYSQL_CLIENT} --execute "SET NET_WRITE_TIMEOUT = 22; $CHANGED_SETTINGS_QUERY" +${MYSQL_CLIENT} --execute "SET NET_READ_TIMEOUT = 33; $CHANGED_SETTINGS_QUERY" + +echo "-- Lowercase setting name" +${MYSQL_CLIENT} --execute "set sql_select_limit=3; $SELECT_STMT" # should fetch 3 records out of 4 +${MYSQL_CLIENT} --execute "set net_write_timeout=55; $CHANGED_SETTINGS_QUERY" +${MYSQL_CLIENT} --execute "set net_read_timeout=66; $CHANGED_SETTINGS_QUERY" + +${MYSQL_CLIENT} --execute "$DROP_TABLE" diff --git a/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.reference b/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql b/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql new file mode 100644 index 000000000000..f9606cace6e6 --- /dev/null +++ b/tests/queries/0_stateless/02968_adaptive_async_insert_timeout.sql @@ -0,0 +1,51 @@ +DROP TABLE IF EXISTS async_insert_mt_test; +CREATE TABLE async_insert_mt_test (a UInt64, b Array(UInt64)) ENGINE=MergeTree() ORDER BY a; + +SET async_insert_use_adaptive_busy_timeout = 1; + +INSERT INTO async_insert_mt_test + SETTINGS + async_insert=1, + wait_for_async_insert=1, + async_insert_busy_timeout_min_ms=10, + async_insert_busy_timeout_max_ms=500, + async_insert_busy_timeout_increase_rate=1.0, + async_insert_busy_timeout_decrease_rate=1.0 + VALUES (3, []), (1, [1, 3]), (2, [7, 8]), (4, [5, 9]), (5, [2, 6]); + + +INSERT INTO async_insert_mt_test + SETTINGS + async_insert=1, + wait_for_async_insert=1, + async_insert_busy_timeout_ms=500, + async_insert_busy_timeout_min_ms=500 + VALUES (3, []), (1, [1, 3]), (2, [7, 8]), (4, [5, 9]), (5, [2, 6]); + + +INSERT INTO async_insert_mt_test + SETTINGS + async_insert=1, + wait_for_async_insert=1, + async_insert_busy_timeout_ms=100, + async_insert_busy_timeout_min_ms=500 + VALUES (3, []), (1, [1, 3]), (2, [7, 8]), (4, [5, 9]), (5, [2, 6]); + + +INSERT INTO async_insert_mt_test + SETTINGS + async_insert=1, + wait_for_async_insert=1, + async_insert_busy_timeout_increase_rate=-1.0 + VALUES (3, []), (1, [1, 3]), (2, [7, 8]), (4, [5, 9]), (5, [2, 6]); -- { serverError INVALID_SETTING_VALUE } + + +INSERT INTO async_insert_mt_test + SETTINGS + async_insert=1, + wait_for_async_insert=1, + async_insert_busy_timeout_decrease_rate=-1.0 + VALUES (3, []), (1, [1, 3]), (2, [7, 8]), (4, [5, 9]), (5, [2, 6]); -- { serverError INVALID_SETTING_VALUE } + + +DROP TABLE IF EXISTS async_insert_mt_test; diff --git a/tests/queries/0_stateless/02968_sumMap_with_nan.reference b/tests/queries/0_stateless/02968_sumMap_with_nan.reference new file mode 100644 index 000000000000..83a2d98375fa --- /dev/null +++ b/tests/queries/0_stateless/02968_sumMap_with_nan.reference @@ -0,0 +1,2 @@ +([6.7],[3]) +([1,4,5,6.7,nan],[2.3,5,1,3,inf]) diff --git a/tests/queries/0_stateless/02968_sumMap_with_nan.sql b/tests/queries/0_stateless/02968_sumMap_with_nan.sql new file mode 100644 index 000000000000..330da94cfea3 --- /dev/null +++ b/tests/queries/0_stateless/02968_sumMap_with_nan.sql @@ -0,0 +1,4 @@ +SELECT sumMapFiltered([6.7])([x], [y]) +FROM values('x Float64, y Float64', (0, 1), (1, 2.3), (nan, inf), (6.7, 3), (4, 4), (5, 1)); + +SELECT sumMap([x],[y]) FROM values('x Float64, y Float64', (4, 1), (1, 2.3), (nan,inf), (6.7,3), (4,4), (5, 1)); diff --git a/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.reference b/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.reference new file mode 100644 index 000000000000..72d83e5cf6a4 --- /dev/null +++ b/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.reference @@ -0,0 +1,142 @@ +QUERY id: 0 + PROJECTION COLUMNS + val String + count() UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8 + GROUP BY + LIST id: 14, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + ORDER BY + LIST id: 15, nodes: 1 + SORT id: 16, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 +1 1 +2 1 +QUERY id: 0 + PROJECTION COLUMNS + val String + count() UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8 + GROUP BY + LIST id: 14, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + ORDER BY + LIST id: 15, nodes: 1 + SORT id: 16, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 +CHECK WITH TOTALS +QUERY id: 0, is_group_by_with_totals: 1 + PROJECTION COLUMNS + val String + count() UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8 + GROUP BY + LIST id: 14, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + ORDER BY + LIST id: 15, nodes: 1 + SORT id: 16, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 +1 1 +2 1 + +0 2 diff --git a/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.sql b/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.sql new file mode 100644 index 000000000000..15f2550a63e1 --- /dev/null +++ b/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.sql @@ -0,0 +1,31 @@ +set allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE +SELECT toString(toString(number + 1)) as val, count() +FROM numbers(2) +GROUP BY val +ORDER BY val; + +SELECT toString(toString(number + 1)) as val, count() +FROM numbers(2) +GROUP BY ALL +ORDER BY val; + +EXPLAIN QUERY TREE +SELECT toString(toString(number + 1)) as val, count() +FROM numbers(2) +GROUP BY ALL +ORDER BY val; + +SELECT 'CHECK WITH TOTALS'; + +EXPLAIN QUERY TREE +SELECT toString(toString(number + 1)) as val, count() +FROM numbers(2) +GROUP BY val WITH TOTALS +ORDER BY val; + +SELECT toString(toString(number + 1)) as val, count() +FROM numbers(2) +GROUP BY val WITH TOTALS +ORDER BY val; diff --git a/tests/queries/0_stateless/02971_analyzer_remote_id.reference b/tests/queries/0_stateless/02971_analyzer_remote_id.reference new file mode 100644 index 000000000000..b8626c4cff28 --- /dev/null +++ b/tests/queries/0_stateless/02971_analyzer_remote_id.reference @@ -0,0 +1 @@ +4 diff --git a/tests/queries/0_stateless/02971_analyzer_remote_id.sh b/tests/queries/0_stateless/02971_analyzer_remote_id.sh new file mode 100755 index 000000000000..463e4cc1f0c5 --- /dev/null +++ b/tests/queries/0_stateless/02971_analyzer_remote_id.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_02971" +${CLICKHOUSE_CLIENT} --query="CREATE DATABASE test_02971" + +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test_02971.x ENGINE = MergeTree() ORDER BY number AS SELECT * FROM numbers(2)" +${CLICKHOUSE_LOCAL} --query="SELECT count() FROM remote('127.0.0.{2,3}', 'test_02971.x') SETTINGS allow_experimental_analyzer = 1" 2>&1 \ + | grep -av "ASan doesn't fully support makecontext/swapcontext functions" + +${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_02971" diff --git a/tests/queries/0_stateless/02971_limit_by_distributed.reference b/tests/queries/0_stateless/02971_limit_by_distributed.reference new file mode 100644 index 000000000000..69c6437d04d4 --- /dev/null +++ b/tests/queries/0_stateless/02971_limit_by_distributed.reference @@ -0,0 +1,16 @@ +-- { echoOn } +-- with limit +SELECT k +FROM remote('127.0.0.{2,3}', currentDatabase(), tlb) +ORDER BY k ASC +LIMIT 1 BY k +LIMIT 100; +0 +1 +-- w/o limit +SELECT k +FROM remote('127.0.0.{2,3}', currentDatabase(), tlb) +ORDER BY k ASC +LIMIT 1 BY k; +0 +1 diff --git a/tests/queries/0_stateless/02971_limit_by_distributed.sql b/tests/queries/0_stateless/02971_limit_by_distributed.sql new file mode 100644 index 000000000000..66a85137f32b --- /dev/null +++ b/tests/queries/0_stateless/02971_limit_by_distributed.sql @@ -0,0 +1,25 @@ +-- Tags: shard + +drop table if exists tlb; +create table tlb (k UInt64) engine MergeTree order by k; + +INSERT INTO tlb (k) SELECT 0 FROM numbers(100); +INSERT INTO tlb (k) SELECT 1; + +-- { echoOn } +-- with limit +SELECT k +FROM remote('127.0.0.{2,3}', currentDatabase(), tlb) +ORDER BY k ASC +LIMIT 1 BY k +LIMIT 100; + +-- w/o limit +SELECT k +FROM remote('127.0.0.{2,3}', currentDatabase(), tlb) +ORDER BY k ASC +LIMIT 1 BY k; + +-- { echoOff } + +DROP TABLE tlb; diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference new file mode 100644 index 000000000000..71c9053d644a --- /dev/null +++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference @@ -0,0 +1,9 @@ +0 +ds_1_1 all_1_1_0 0 +ds_1_2 all_1_1_0 0 +ds_2_1 all_1_1_0 0 +ds_2_1 all_2_2_0 0 +ds_3_1 all_1_1_0 0 +ds_3_1 all_2_2_0 0 +landing all_1_1_0 0 +10 diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql new file mode 100644 index 000000000000..242133e9122d --- /dev/null +++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql @@ -0,0 +1,103 @@ +SET insert_deduplicate = 1; +SET deduplicate_blocks_in_dependent_materialized_views = 1; +SET update_insert_deduplication_token_in_dependent_materialized_views = 1; +SET insert_deduplication_token = 'test'; + +DROP TABLE IF EXISTS landing; +CREATE TABLE landing +( + timestamp UInt64, + value UInt64 +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000; + +DROP TABLE IF EXISTS ds_1_1; +CREATE TABLE ds_1_1 +( + t UInt64, + v UInt64 +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000; + +DROP VIEW IF EXISTS mv_1_1; +CREATE MATERIALIZED VIEW mv_1_1 TO ds_1_1 as +SELECT + timestamp t, sum(value) v +FROM landing +GROUP BY t; + +DROP TABLE IF EXISTS ds_1_2; +CREATE TABLE ds_1_2 +( + t UInt64, + v UInt64 +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000; + +DROP VIEW IF EXISTS mv_1_2; +CREATE MATERIALIZED VIEW mv_1_2 TO ds_1_2 as +SELECT + timestamp t, sum(value) v +FROM landing +GROUP BY t; + +DROP TABLE IF EXISTS ds_2_1; +CREATE TABLE ds_2_1 +( + l String, + t DateTime, + v UInt64 +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000; + +DROP VIEW IF EXISTS mv_2_1; +CREATE MATERIALIZED VIEW mv_2_1 TO ds_2_1 as +SELECT '2_1' l, t, v +FROM ds_1_1; + +DROP VIEW IF EXISTS mv_2_2; +CREATE MATERIALIZED VIEW mv_2_2 TO ds_2_1 as +SELECT '2_2' l, t, v +FROM ds_1_2; + +DROP TABLE IF EXISTS ds_3_1; +CREATE TABLE ds_3_1 +( + l String, + t DateTime, + v UInt64 +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000; + +DROP VIEW IF EXISTS mv_3_1; +CREATE MATERIALIZED VIEW mv_3_1 TO ds_3_1 as +SELECT '3_1' l, t, v +FROM ds_2_1; + +INSERT INTO landing SELECT 1 as timestamp, 1 AS value FROM numbers(10); + +SELECT sleep(3); + +INSERT INTO landing SELECT 1 as timestamp, 1 AS value FROM numbers(10); + +SYSTEM FLUSH LOGS; +SELECT table, name, error FROM system.part_log +WHERE database = currentDatabase() +ORDER BY table, name; + +SELECT count() FROM landing; + +DROP TABLE landing; + +DROP TABLE ds_1_1; +DROP VIEW mv_1_1; + +DROP TABLE ds_1_2; +DROP VIEW mv_1_2; + +DROP TABLE ds_2_1; +DROP VIEW mv_2_1; +DROP VIEW mv_2_2; + +DROP TABLE ds_3_1; +DROP VIEW mv_3_1; diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.reference b/tests/queries/0_stateless/02972_parallel_replicas_cte.reference new file mode 100644 index 000000000000..449fe3d34e33 --- /dev/null +++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.reference @@ -0,0 +1,3 @@ +990000 +990000 +10 diff --git a/tests/queries/0_stateless/02972_parallel_replicas_cte.sql b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql new file mode 100644 index 000000000000..c39ad172a277 --- /dev/null +++ b/tests/queries/0_stateless/02972_parallel_replicas_cte.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS pr_1; +DROP TABLE IF EXISTS pr_2; + +CREATE TABLE pr_1 (`a` UInt32) ENGINE = MergeTree ORDER BY a PARTITION BY a % 10 AS +SELECT 10 * intDiv(number, 10) + 1 FROM numbers(1_000_000); + +CREATE TABLE pr_2 (`a` UInt32) ENGINE = MergeTree ORDER BY a AS +SELECT * FROM numbers(1_000_000); + +WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) +SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a; + +WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) +SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_two_shards', max_parallel_replicas = 3; + +-- Testing that it is disabled for allow_experimental_analyzer=0. With analyzer it will be supported (with correct result) +WITH filtered_groups AS (SELECT a FROM pr_1 WHERE a >= 10000) +SELECT count() FROM pr_2 INNER JOIN filtered_groups ON pr_2.a = filtered_groups.a +SETTINGS allow_experimental_analyzer = 0, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_two_shards', max_parallel_replicas = 3; -- { serverError SUPPORT_IS_DISABLED } + +-- Sanitizer +SELECT count() FROM pr_2 JOIN numbers(10) as pr_1 ON pr_2.a = pr_1.number +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_two_shards', max_parallel_replicas = 3; + +DROP TABLE IF EXISTS pr_1; +DROP TABLE IF EXISTS pr_2; diff --git a/tests/queries/0_stateless/02972_to_string_nullable_timezone.reference b/tests/queries/0_stateless/02972_to_string_nullable_timezone.reference new file mode 100644 index 000000000000..6c362c0207ec --- /dev/null +++ b/tests/queries/0_stateless/02972_to_string_nullable_timezone.reference @@ -0,0 +1,3 @@ +2022-01-01 11:13:14 +2022-01-01 11:13:14 +2022-01-01 11:13:14 diff --git a/tests/queries/0_stateless/02972_to_string_nullable_timezone.sql b/tests/queries/0_stateless/02972_to_string_nullable_timezone.sql new file mode 100644 index 000000000000..d8cff4f3c007 --- /dev/null +++ b/tests/queries/0_stateless/02972_to_string_nullable_timezone.sql @@ -0,0 +1,4 @@ +SET session_timezone='Europe/Amsterdam'; +SELECT toString(toDateTime('2022-01-01 12:13:14'), CAST('UTC', 'Nullable(String)')); +SELECT toString(toDateTime('2022-01-01 12:13:14'), materialize(CAST('UTC', 'Nullable(String)'))); +SELECT toString(CAST(toDateTime('2022-01-01 12:13:14'), 'Nullable(DateTime)'), materialize(CAST('UTC', 'Nullable(String)'))); diff --git a/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.reference b/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.reference new file mode 100644 index 000000000000..00479541d226 --- /dev/null +++ b/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.reference @@ -0,0 +1,2 @@ +0 +1000000 Hello, world Hello, world diff --git a/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.sh b/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.sh new file mode 100755 index 000000000000..b212e42061fa --- /dev/null +++ b/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest +# Because we are creating a backup with fixed path. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery " +DROP TABLE IF EXISTS test; +CREATE TABLE test (x String) ENGINE = Memory SETTINGS compress = 1; +INSERT INTO test SELECT 'Hello, world' FROM numbers(1000000); +" + +$CLICKHOUSE_CLIENT --multiquery " +BACKUP TABLE test TO File('test.zip'); +" --format Null + +$CLICKHOUSE_CLIENT --multiquery " +TRUNCATE TABLE test; +SELECT count() FROM test; +" + +$CLICKHOUSE_CLIENT --multiquery " +RESTORE TABLE test FROM File('test.zip'); +" --format Null + +$CLICKHOUSE_CLIENT --multiquery " +SELECT count(), min(x), max(x) FROM test; +DROP TABLE test; +" diff --git a/tests/queries/0_stateless/02973_block_number_sparse_serialization_and_mutation.reference b/tests/queries/0_stateless/02973_block_number_sparse_serialization_and_mutation.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02973_block_number_sparse_serialization_and_mutation.sql b/tests/queries/0_stateless/02973_block_number_sparse_serialization_and_mutation.sql new file mode 100644 index 000000000000..7a1de2897fbf --- /dev/null +++ b/tests/queries/0_stateless/02973_block_number_sparse_serialization_and_mutation.sql @@ -0,0 +1,39 @@ +-- Tags: zookeeper + +-- we need exact block-numbers +SET insert_keeper_fault_injection_probability=0; + +DROP TABLE IF EXISTS table_with_some_columns; + +CREATE TABLE table_with_some_columns( + key UInt64, + value0 UInt8 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/table_with_some_columns', '1') +ORDER BY key +SETTINGS allow_experimental_block_number_column=1, +ratio_of_defaults_for_sparse_serialization=0.0001, +min_bytes_for_wide_part = 0, +replace_long_file_name_to_hash=0; -- simpler to debug + +INSERT INTO table_with_some_columns SELECT rand(), number + 10 from numbers(100000); + +INSERT INTO table_with_some_columns SELECT rand(), number + 10 from numbers(1); + +OPTIMIZE TABLE table_with_some_columns FINAL; + +INSERT INTO table_with_some_columns SELECT rand(), number+222222222 from numbers(1); + +OPTIMIZE TABLE table_with_some_columns FINAL; + +set alter_sync = 2; + +ALTER TABLE table_with_some_columns DROP COLUMN value0; + +INSERT INTO table_with_some_columns SELECT rand() from numbers(1); + +OPTIMIZE TABLE table_with_some_columns FINAL; + +SELECT *, _block_number FROM table_with_some_columns where not ignore(*) Format Null; + +DROP TABLE IF EXISTS table_with_some_columns; diff --git a/tests/queries/0_stateless/02973_dictionary_table_exception_fix.reference b/tests/queries/0_stateless/02973_dictionary_table_exception_fix.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02973_dictionary_table_exception_fix.sql b/tests/queries/0_stateless/02973_dictionary_table_exception_fix.sql new file mode 100644 index 000000000000..f8061b426708 --- /dev/null +++ b/tests/queries/0_stateless/02973_dictionary_table_exception_fix.sql @@ -0,0 +1,6 @@ +CREATE TABLE test_table (i Int64) engine=MergeTree order by i; +CREATE DICTIONARY test_dict (y String, value UInt64 DEFAULT 0) PRIMARY KEY y SOURCE(CLICKHOUSE(TABLE 'test_table')) LAYOUT(DIRECT()); +CREATE TABLE test_dict (y Int64) engine=MergeTree order by y; -- { serverError DICTIONARY_ALREADY_EXISTS } +CREATE DICTIONARY test_table (y String, value UInt64 DEFAULT 0) PRIMARY KEY y SOURCE(CLICKHOUSE(TABLE 'test_table')) LAYOUT(DIRECT()); -- { serverError TABLE_ALREADY_EXISTS } +CREATE DICTIONARY test_dict (y String, value UInt64 DEFAULT 0) PRIMARY KEY y SOURCE(CLICKHOUSE(TABLE 'test_table')) LAYOUT(DIRECT()); -- { serverError DICTIONARY_ALREADY_EXISTS } +CREATE TABLE test_table (y Int64) engine=MergeTree order by y; -- { serverError TABLE_ALREADY_EXISTS } diff --git a/tests/queries/0_stateless/02973_s3_compressed_file_in_error_message.reference b/tests/queries/0_stateless/02973_s3_compressed_file_in_error_message.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/02973_s3_compressed_file_in_error_message.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02973_s3_compressed_file_in_error_message.sh b/tests/queries/0_stateless/02973_s3_compressed_file_in_error_message.sh new file mode 100755 index 000000000000..a4984583637e --- /dev/null +++ b/tests/queries/0_stateless/02973_s3_compressed_file_in_error_message.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=none -q "select * from s3('http://localhost:11111/test/a.tsv', TSV, 'x String', 'gzip')" 2>&1 | grep -c "(in file/uri.*a\.tsv)" diff --git a/tests/queries/0_stateless/02974_backup_query_format_null.reference b/tests/queries/0_stateless/02974_backup_query_format_null.reference new file mode 100644 index 000000000000..67bfe658c1f0 --- /dev/null +++ b/tests/queries/0_stateless/02974_backup_query_format_null.reference @@ -0,0 +1,3 @@ +2 +80 +-12345 diff --git a/tests/queries/0_stateless/02974_backup_query_format_null.sh b/tests/queries/0_stateless/02974_backup_query_format_null.sh new file mode 100755 index 000000000000..ddba2f6de163 --- /dev/null +++ b/tests/queries/0_stateless/02974_backup_query_format_null.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE IF EXISTS tbl; +CREATE TABLE tbl (a Int32) ENGINE = MergeTree() ORDER BY tuple(); +INSERT INTO tbl VALUES (2), (80), (-12345); +" + +backup_name="Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}')" + +${CLICKHOUSE_CLIENT} --query "BACKUP TABLE tbl TO ${backup_name} FORMAT Null" + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE tbl; +RESTORE ALL FROM ${backup_name} FORMAT Null +" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM tbl" diff --git a/tests/queries/0_stateless/02975_system_zookeeper_retries.reference b/tests/queries/0_stateless/02975_system_zookeeper_retries.reference new file mode 100644 index 000000000000..9a636ba56d04 --- /dev/null +++ b/tests/queries/0_stateless/02975_system_zookeeper_retries.reference @@ -0,0 +1,3 @@ +/keeper api_version +/keeper feature_flags +1 diff --git a/tests/queries/0_stateless/02975_system_zookeeper_retries.sql b/tests/queries/0_stateless/02975_system_zookeeper_retries.sql new file mode 100644 index 000000000000..8b402ec6d65c --- /dev/null +++ b/tests/queries/0_stateless/02975_system_zookeeper_retries.sql @@ -0,0 +1,22 @@ +-- Tags: zookeeper, no-parallel, no-fasttest + +SELECT path, name +FROM system.zookeeper +WHERE path = '/keeper' +ORDER BY path, name +SETTINGS + insert_keeper_retry_initial_backoff_ms = 1, + insert_keeper_retry_max_backoff_ms = 20, + insert_keeper_fault_injection_probability=0.3, + insert_keeper_fault_injection_seed=4, + log_comment='02975_system_zookeeper_retries'; + + +SYSTEM FLUSH LOGS; + +-- Check that there where zk session failures +SELECT ProfileEvents['ZooKeeperHardwareExceptions'] > 0 +FROM system.query_log +WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND log_comment='02975_system_zookeeper_retries' +ORDER BY event_time_microseconds DESC +LIMIT 1; diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference new file mode 100644 index 000000000000..531163e1d84e --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference @@ -0,0 +1,30 @@ +data after INSERT 1 +data after ATTACH 1 +Files before DETACH TABLE +all_1_1_0 + +backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +primary.cidx +serialization.json +metadata_version.txt +default_compression_codec.txt +data.bin +data.cmrk3 +count.txt +columns.txt +checksums.txt + +Files after DETACH TABLE +all_1_1_0 + +backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +primary.cidx +serialization.json +metadata_version.txt +default_compression_codec.txt +data.bin +data.cmrk3 +count.txt +columns.txt +checksums.txt + diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh new file mode 100755 index 000000000000..386c29704b66 --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings, no-random-merge-tree-settings +# Tag no-fasttest: requires S3 +# Tag no-random-settings, no-random-merge-tree-settings: to avoid creating extra files like serialization.json, this test too exocit anyway + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# config for clickhouse-disks (to check leftovers) +config="${BASH_SOURCE[0]/.sh/.yml}" + +# only in Atomic ATTACH from s3_plain works +new_database="ordinary_$CLICKHOUSE_DATABASE" +$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "create database $new_database engine=Ordinary" +CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT/--database=$CLICKHOUSE_DATABASE/--database=$new_database} +CLICKHOUSE_DATABASE="$new_database" + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data; + create table data (key Int) engine=MergeTree() order by key; + insert into data values (1); + select 'data after INSERT', count() from data; +" + +# suppress output +$CLICKHOUSE_CLIENT -q "backup table data to S3('http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE', 'test', 'testtest')" > /dev/null + +$CLICKHOUSE_CLIENT -nm -q " + drop table data; + attach table data (key Int) engine=MergeTree() order by key + settings + max_suspicious_broken_parts=0, + disk=disk(type=s3_plain, + endpoint='http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE', + access_key_id='test', + secret_access_key='testtest'); + select 'data after ATTACH', count() from data; + + insert into data values (1); -- { serverError TABLE_IS_READ_ONLY } + optimize table data final; -- { serverError TABLE_IS_READ_ONLY } +" + +path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM system.tables WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'data'") +# trim / to fix "Unable to parse ExceptionName: XMinioInvalidObjectName Message: Object name contains unsupported characters." +path=${path%/} + +echo "Files before DETACH TABLE" +clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 + +$CLICKHOUSE_CLIENT -q "detach table data" +echo "Files after DETACH TABLE" +clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 + +# metadata file is left +$CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.yml b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.yml new file mode 100644 index 000000000000..ca5036736d83 --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.yml @@ -0,0 +1,7 @@ +storage_configuration: + disks: + s3_plain_disk: + type: s3_plain + endpoint: http://localhost:11111/test/s3_plain/ + access_key_id: clickhouse + secret_access_key: clickhouse diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference new file mode 100644 index 000000000000..1e191b719a5a --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference @@ -0,0 +1,30 @@ +data after INSERT 1 +data after ATTACH 1 +Files before DETACH TABLE +all_X_X_X + +backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +primary.cidx +serialization.json +metadata_version.txt +default_compression_codec.txt +data.bin +data.cmrk3 +count.txt +columns.txt +checksums.txt + +Files after DETACH TABLE +all_X_X_X + +backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +primary.cidx +serialization.json +metadata_version.txt +default_compression_codec.txt +data.bin +data.cmrk3 +count.txt +columns.txt +checksums.txt + diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh new file mode 100755 index 000000000000..bf20247c7aa1 --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings, no-random-merge-tree-settings +# Tag no-fasttest: requires S3 +# Tag no-random-settings, no-random-merge-tree-settings: to avoid creating extra files like serialization.json, this test too exocit anyway + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +config="${BASH_SOURCE[0]/.sh/.yml}" + +# only in Atomic ATTACH from s3_plain works +new_database="ordinary_$CLICKHOUSE_DATABASE" +$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "create database $new_database engine=Ordinary" +CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT/--database=$CLICKHOUSE_DATABASE/--database=$new_database} +CLICKHOUSE_DATABASE="$new_database" + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data_read; + drop table if exists data_write; + + create table data_write (key Int) engine=ReplicatedMergeTree('/tables/{database}/data', 'write') order by key; + create table data_read (key Int) engine=ReplicatedMergeTree('/tables/{database}/data', 'read') order by key; + + insert into data_write values (1); + system sync replica data_read; + select 'data after INSERT', count() from data_read; +" + +# suppress output +$CLICKHOUSE_CLIENT -q "backup table data_read to S3('http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE', 'test', 'testtest')" > /dev/null + +$CLICKHOUSE_CLIENT -nm -q " + drop table data_read; + attach table data_read (key Int) engine=ReplicatedMergeTree('/tables/{database}/data', 'read') order by key + settings + max_suspicious_broken_parts=0, + disk=disk(type=s3_plain, + endpoint='http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE', + access_key_id='test', + secret_access_key='testtest'); + select 'data after ATTACH', count() from data_read; + + insert into data_read values (1); -- { serverError TABLE_IS_READ_ONLY } + optimize table data_read final; -- { serverError TABLE_IS_READ_ONLY } + system sync replica data_read; -- { serverError TABLE_IS_READ_ONLY } +" + +path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM system.tables WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'data_read'") +# trim / to fix "Unable to parse ExceptionName: XMinioInvalidObjectName Message: Object name contains unsupported characters." +path=${path%/} + +echo "Files before DETACH TABLE" +# sed to match any part, since in case of fault injection part name may not be all_0_0_0 but all_1_1_0 +clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' + +$CLICKHOUSE_CLIENT -nm -q " + detach table data_read; + detach table data_write; +" +echo "Files after DETACH TABLE" +clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' + +# metadata file is left +$CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.yml b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.yml new file mode 100644 index 000000000000..ca5036736d83 --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.yml @@ -0,0 +1,7 @@ +storage_configuration: + disks: + s3_plain_disk: + type: s3_plain + endpoint: http://localhost:11111/test/s3_plain/ + access_key_id: clickhouse + secret_access_key: clickhouse diff --git a/tests/queries/0_stateless/02981_translate_fixedstring.reference b/tests/queries/0_stateless/02981_translate_fixedstring.reference new file mode 100644 index 000000000000..e506d4a22f7d --- /dev/null +++ b/tests/queries/0_stateless/02981_translate_fixedstring.reference @@ -0,0 +1,5 @@ +AAA\0\0\0\0\0\0\0 +A +1 +2 +3 diff --git a/tests/queries/0_stateless/02981_translate_fixedstring.sql b/tests/queries/0_stateless/02981_translate_fixedstring.sql new file mode 100644 index 000000000000..209efa4ba4a6 --- /dev/null +++ b/tests/queries/0_stateless/02981_translate_fixedstring.sql @@ -0,0 +1,2 @@ +SELECT translate('aaa'::FixedString(10), 'a','A'); +SELECT translate(number::String::FixedString(1), '0','A') from numbers(4); diff --git a/tests/queries/0_stateless/02981_variant_type_function.reference b/tests/queries/0_stateless/02981_variant_type_function.reference new file mode 100644 index 000000000000..4fae89810ef8 --- /dev/null +++ b/tests/queries/0_stateless/02981_variant_type_function.reference @@ -0,0 +1,10 @@ +None +UInt64 +String +Array(UInt64) +Enum8(\'None\' = -1, \'Array(UInt64)\' = 0, \'String\' = 1, \'UInt64\' = 2) +None +UInt64 +String +Array(UInt64) +Enum8(\'None\' = -1, \'Array(UInt64)\' = 0, \'Date\' = 1, \'String\' = 2, \'UInt64\' = 3) diff --git a/tests/queries/0_stateless/02981_variant_type_function.sql b/tests/queries/0_stateless/02981_variant_type_function.sql new file mode 100644 index 000000000000..cba653d7374d --- /dev/null +++ b/tests/queries/0_stateless/02981_variant_type_function.sql @@ -0,0 +1,13 @@ +SET allow_experimental_variant_type = 1; +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT variantType(v) as type FROM test; +SELECT toTypeName(variantType(v)) from test limit 1; + +SELECT variantType() FROM test; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT variantType(v, v) FROM test; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT variantType(v.String) FROM test; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + +SELECT variantType(v::Variant(UInt64, String, Array(UInt64), Date)) as type FROM test; +SELECT toTypeName(variantType(v::Variant(UInt64, String, Array(UInt64), Date))) from test limit 1; + diff --git a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.reference b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.reference new file mode 100644 index 000000000000..60c254e152bc --- /dev/null +++ b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.reference @@ -0,0 +1 @@ +Vertical OK diff --git a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql new file mode 100644 index 000000000000..1305f02c0444 --- /dev/null +++ b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql @@ -0,0 +1,35 @@ +-- Tags: long + +DROP TABLE IF EXISTS t_vertical_merge_memory; + +CREATE TABLE t_vertical_merge_memory (id UInt64, arr Array(String)) +ENGINE = MergeTree ORDER BY id +SETTINGS + min_bytes_for_wide_part = 0, + vertical_merge_algorithm_min_rows_to_activate = 1, + vertical_merge_algorithm_min_columns_to_activate = 1, + index_granularity = 8192, + index_granularity_bytes = '10M', + merge_max_block_size = 8192, + merge_max_block_size_bytes = '10M'; + +INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(30000); +INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(30000); + +OPTIMIZE TABLE t_vertical_merge_memory FINAL; + +SYSTEM FLUSH LOGS; + +SELECT + merge_algorithm, + peak_memory_usage < 500 * 1024 * 1024 + ? 'OK' + : format('FAIL: memory usage: {}', formatReadableSize(peak_memory_usage)) +FROM system.part_log +WHERE + database = currentDatabase() + AND table = 't_vertical_merge_memory' + AND event_type = 'MergeParts' + AND length(merged_from) = 2; + +DROP TABLE IF EXISTS t_vertical_merge_memory; diff --git a/tests/queries/1_stateful/00165_jit_aggregate_functions.reference b/tests/queries/1_stateful/00165_jit_aggregate_functions.reference index fa084170f537..62baba2af8b2 100644 --- a/tests/queries/1_stateful/00165_jit_aggregate_functions.reference +++ b/tests/queries/1_stateful/00165_jit_aggregate_functions.reference @@ -68,73 +68,3 @@ Simple functions with non compilable function without key 4611686725751467379 9223371678237104442 3626326766789368100 61384643584599682996279588 408650940859.2896 104735.01095549858 8873898 9223372036854775807 4611686018427387904 3818489297630359920 Simple functions if combinator without key 4611687533683519016 9223371678237104442 4124667747700004330 930178817930.5122 321189.2280948817 4434274 9223372036854775806 4611686018427387904 2265422677606390266 -Aggregation without JIT compilation -Simple functions -1704509 4611700827100483880 9223360787015464643 10441337359398154812 19954243669348.844 9648741.579254271 523264 9223372036854775807 4611686018427387904 4544239379628300646 -732797 4611701940806302259 9223355550934604746 977192643464016658 2054229034942.3723 51998323.94457991 475698 9223372036854775807 4611686018427387904 4091184823334377716 -598875 4611701407242345792 9223362250391155632 9312163881623734456 27615161624211.875 12261797.824844675 337212 9223372036854775807 4611686018427387904 3725992504798702670 -792887 4611699550286611812 9223290551912005343 6930300520201292824 27479710385933.586 53095331.60360441 252197 9223372036854775807 4611686018427387904 6536441508464694614 -3807842 4611710821592843606 9223326163906184987 16710274896338005145 85240848090850.69 22373416.533275086 196036 9223372036854775807 4611686018427387904 1797862753609257231 -25703952 4611709443519524003 9223353913449113943 9946868158853570839 67568783303242.086 3154349.826950714 147211 9223372036854775807 4611686018427387904 8737124378202300429 -716829 4611852156092872082 9223361623076951140 15381015774917924786 170693446547158.72 201431892.4773785 90109 9223372036854775807 4611686018427387904 8209915323001116338 -59183 4611730685242027332 9223354909338698162 8078812522502896568 94622946187035.42 1425270865.0901496 85379 9223372036854775807 4611686018427387904 8909082036598843562 -33010362 4611704682869732882 9223268545373999677 2064452191838585926 26532987929602.555 3695122.4062526934 77807 9223372036854775807 4611686018427387904 5411365383789552292 -800784 4611752907938305166 9223340418389788041 18082918611792817587 233352070043266.62 36535786.81446395 77492 9223372036854775807 4611686018427387904 2059255810151375435 -20810645 4611712185532639162 9223218900001937412 4996531385439292694 68246505203164.63 6316535.831023813 73213 9223372036854775807 4611686018427387904 8852740550386113674 -25843850 4611690025407720929 9223346023778617822 12755881190906812868 185015319325648.16 9962165.34831339 68945 9223372036854775807 4611686018427387904 7849665866595760148 -23447120 4611796031755620254 9223329309291309758 17231649548755339966 255019232629204.38 7937191.271698021 67570 9223372036854775807 4611686018427387904 3435410911925610424 -14739804 4611692230555590277 9223313509005166531 2458378896777063244 38308020331864.36 14590240.469105456 64174 9223372036854775807 4611686018427387904 511910855240035342 -32077710 4611884228437061959 9223352444952988904 12965822147651192908 214467085941034.7 7257521.096258734 60456 9223372036854775807 4611686018427387904 2256071920672551964 -22446879 4611846229717089436 9223124373140579096 13530160492087688838 231724477077663.4 4737362.521046629 58389 9223372036854775807 4611686018427387904 6236276364886386410 -170282 4611833225706935900 9223371583739401906 8076893424988479310 141657635880324.8 1613795518.1065989 57017 9223372036854775807 4611686018427387904 4755775861151848768 -11482817 4611708000353743073 9223337838355779113 14841435427430843458 283531099960470.8 9938452.835998287 52345 9223372036854775807 4611686018427387904 5371586112642152558 -63469 4611695097019173921 9223353530156141191 6296784708578574520 120762239817777.88 579655378.4603049 52142 9223372036854775807 4611686018427387904 4150567963952988110 -29103473 4611744585914335132 9223333530281362537 5908285283932344933 123712996438970.34 867841.595541967 47758 9223372036854775807 4611686018427387904 3238284030821087319 -Simple functions with non compilable function -1704509 4611700827100483880 9223360787015464643 10441337359398154812 3620921835565807284859452 19954243669348.844 9648741.579254271 523264 9223372036854775807 4611686018427387904 4544239379628300646 -732797 4611701940806302259 9223355550934604746 977192643464016658 3289442827160604417733394 2054229034942.3723 51998323.94457991 475698 9223372036854775807 4611686018427387904 4091184823334377716 -598875 4611701407242345792 9223362250391155632 9312163881623734456 2330921446573746856380600 27615161624211.875 12261797.824844675 337212 9223372036854775807 4611686018427387904 3725992504798702670 -792887 4611699550286611812 9223290551912005343 6930300520201292824 1745179600137886041476120 27479710385933.586 53095331.60360441 252197 9223372036854775807 4611686018427387904 6536441508464694614 -3807842 4611710821592843606 9223326163906184987 16710274896338005145 1356295121550317411019929 85240848090850.69 22373416.533275086 196036 9223372036854775807 4611686018427387904 1797862753609257231 -25703952 4611709443519524003 9223353913449113943 9946868158853570839 1018731388338768841564439 67568783303242.086 3154349.826950714 147211 9223372036854775807 4611686018427387904 8737124378202300429 -716829 4611852156092872082 9223361623076951140 15381015774917924786 623810478612337115371442 170693446547158.72 201431892.4773785 90109 9223372036854775807 4611686018427387904 8209915323001116338 -59183 4611730685242027332 9223354909338698162 8078812522502896568 589916507545680254024632 94622946187035.42 1425270865.0901496 85379 9223372036854775807 4611686018427387904 8909082036598843562 -33010362 4611704682869732882 9223268545373999677 2064452191838585926 538517864195994778911814 26532987929602.555 3695122.4062526934 77807 9223372036854775807 4611686018427387904 5411365383789552292 -800784 4611752907938305166 9223340418389788041 18082918611792817587 535545510122473785781683 233352070043266.62 36535786.81446395 77492 9223372036854775807 4611686018427387904 2059255810151375435 -20810645 4611712185532639162 9223218900001937412 4996531385439292694 506405014842860050255126 68246505203164.63 6316535.831023813 73213 9223372036854775807 4611686018427387904 8852740550386113674 -25843850 4611690025407720929 9223346023778617822 12755881190906812868 476547495537329753708996 185015319325648.16 9962165.34831339 68945 9223372036854775807 4611686018427387904 7849665866595760148 -23447120 4611796031755620254 9223329309291309758 17231649548755339966 467236365548464278670014 255019232629204.38 7937191.271698021 67570 9223372036854775807 4611686018427387904 3435410911925610424 -14739804 4611692230555590277 9223313509005166531 2458378896777063244 444126268697527941770060 38308020331864.36 14590240.469105456 64174 9223372036854775807 4611686018427387904 511910855240035342 -32077710 4611884228437061959 9223352444952988904 12965822147651192908 417407443977973675608140 214467085941034.7 7257521.096258734 60456 9223372036854775807 4611686018427387904 2256071920672551964 -22446879 4611846229717089436 9223124373140579096 13530160492087688838 403462269796593691082374 231724477077663.4 4737362.521046629 58389 9223372036854775807 4611686018427387904 6236276364886386410 -170282 4611833225706935900 9223371583739401906 8076893424988479310 394417911933408911581006 141657635880324.8 1613795518.1065989 57017 9223372036854775807 4611686018427387904 4755775861151848768 -11482817 4611708000353743073 9223337838355779113 14841435427430843458 361995300393829962204226 283531099960470.8 9938452.835998287 52345 9223372036854775807 4611686018427387904 5371586112642152558 -63469 4611695097019173921 9223353530156141191 6296784708578574520 360843057610541117735096 120762239817777.88 579655378.4603049 52142 9223372036854775807 4611686018427387904 4150567963952988110 -29103473 4611744585914335132 9223333530281362537 5908285283932344933 330534668598011678200421 123712996438970.34 867841.595541967 47758 9223372036854775807 4611686018427387904 3238284030821087319 -Simple functions if combinator -1704509 4611700827100483880 9223310246721229500 16398241567152875142 62618822667209.71 2224726.7626273884 261874 9223372036854775806 4611686018427387904 4518874482384062894 -732797 4611721382223060002 9223355550934604746 16281585268876620522 68472164943295.68 5898616.931652982 237784 9223372036854775806 4611686018427387904 3641900047478154650 -598875 4611701407242345792 9223362250391155632 3577699408183553052 21300140553347.42 53771550.26565126 167966 9223372036854775806 4611686018427387904 1688477495230210408 -792887 4611699550286611812 9223164887726235740 7088177025760385824 56461952267903.89 92835869.96920013 125539 9223372036854775806 4611686018427387904 4850868151095058072 -3807842 4611710821592843606 9223283397553859544 5756765290752687660 58835559208469.4 39794091.419183925 97845 9223372036854775806 4611686018427387904 6845214684357194564 -25703952 4611784761593342388 9223241341744449690 4782279928971192568 65182094768443.91 9276773.708181158 73368 9223372036854775806 4611686018427387904 1384302533387727316 -716829 4611852156092872082 9223361623076951140 8613712481895484190 191445613359755.62 291083243.75407773 44993 9223372036854775806 4611686018427387904 6344483471397203854 -59183 4611730685242027332 9223354909338698162 18369075291092794110 429013599530392 5925109959.715378 42817 9223372036854775806 4611686018427387904 5909305558020042898 -33010362 4611704682869732882 9223092117352620518 9991152681891671022 257099731913529.5 12412830.045471078 38861 9223372036854775806 4611686018427387904 4672855013852508626 -800784 4611752907938305166 9223309994342931384 5251877538869750510 135472890315726.03 53535427.52018088 38767 9223372036854775806 4611686018427387904 7801864489649220514 -20810645 4611712185532639162 9223218900001937412 11803718472901310700 323593455407553 10496765.20741332 36477 9223372036854775806 4611686018427387904 5941995311893397960 -25843850 4611744529689964352 9223346023778617822 127137885677350808 3700925266420.715 18966925.191309396 34353 9223372036854775806 4611686018427387904 6700111718676827412 -23447120 4611796031755620254 9223329309291309758 1841522159325376278 54534534450526.42 6271211.193812284 33768 9223372036854775806 4611686018427387904 2325654077031843898 -14739804 4611762063154116632 9223007205463222212 16302703534054321116 506987919332451.8 6885575.861759452 32156 9223372036854775806 4611686018427387904 2114922310535979832 -32077710 4612033458080771112 9223352444952988904 421072759851674408 13955745719596.793 12220152.393889504 30172 9223372036854775806 4611686018427387904 4399934528735249092 -22446879 4611846229717089436 9223124373140579096 6577134317587565298 224866980668999.47 2482202.163802278 29249 9223372036854775806 4611686018427387904 8763910740678180498 -170282 4611833225706935900 9223371583739401906 15764226366913732386 551447384017691 2515144222.953728 28587 9223372036854775806 4611686018427387904 8217388408377809010 -11482817 4611990575414646848 9223302669582414438 9828522700609834800 378121905921203.2 34845264.2080656 25993 9223372036854775806 4611686018427387904 4689180182672571856 -63469 4612175339998036670 9222961628400798084 17239621485933250238 663164390134376.5 7825349797.6059 25996 9223372036854775806 4611686018427387904 2067736879306995526 -29103473 4611744585914335132 9223035551850347954 12590190375872647672 525927999326314.7 26049107.15514301 23939 9223372036854775806 4611686018427387904 8318055464870862444 -Simple functions without key -4611686725751467379 9223371678237104442 3626326766789368100 408650940859.2896 104735.01095549858 8873898 9223372036854775807 4611686018427387904 3818489297630359920 -Simple functions with non compilable function without key -4611686725751467379 9223371678237104442 3626326766789368100 61384643584599682996279588 408650940859.2896 104735.01095549858 8873898 9223372036854775807 4611686018427387904 3818489297630359920 -Simple functions if combinator without key -4611687533683519016 9223371678237104442 4124667747700004330 930178817930.5122 321189.2280948817 4434274 9223372036854775806 4611686018427387904 2265422677606390266 diff --git a/tests/queries/1_stateful/00165_jit_aggregate_functions.sql b/tests/queries/1_stateful/00165_jit_aggregate_functions.sql index 6017fc57c525..03d296018041 100644 --- a/tests/queries/1_stateful/00165_jit_aggregate_functions.sql +++ b/tests/queries/1_stateful/00165_jit_aggregate_functions.sql @@ -1,4 +1,3 @@ -SET compile_aggregate_expressions = 1; SET min_count_to_compile_aggregate_expression = 0; -- The test uses many aggregations. A low max_bytes_before_external_group_by value will lead to high disk usage -- which in CI leads to timeouts @@ -103,104 +102,3 @@ SELECT FROM test.hits ORDER BY min_watch_id DESC LIMIT 20; - -SET compile_aggregate_expressions = 0; - -SELECT 'Aggregation without JIT compilation'; - -SELECT 'Simple functions'; - -SELECT - CounterID, - min(WatchID), - max(WatchID), - sum(WatchID), - avg(WatchID), - avgWeighted(WatchID, CounterID), - count(WatchID), - groupBitOr(WatchID), - groupBitAnd(WatchID), - groupBitXor(WatchID) -FROM test.hits -GROUP BY CounterID ORDER BY count() DESC LIMIT 20; - -SELECT 'Simple functions with non compilable function'; -SELECT - CounterID, - min(WatchID), - max(WatchID), - sum(WatchID), - sum(toUInt128(WatchID)), - avg(WatchID), - avgWeighted(WatchID, CounterID), - count(WatchID), - groupBitOr(WatchID), - groupBitAnd(WatchID), - groupBitXor(WatchID) -FROM test.hits -GROUP BY CounterID ORDER BY count() DESC LIMIT 20; - -SELECT 'Simple functions if combinator'; - -WITH (WatchID % 2 == 0) AS predicate -SELECT - CounterID, - minIf(WatchID,predicate), - maxIf(WatchID, predicate), - sumIf(WatchID, predicate), - avgIf(WatchID, predicate), - avgWeightedIf(WatchID, CounterID, predicate), - countIf(WatchID, predicate), - groupBitOrIf(WatchID, predicate), - groupBitAndIf(WatchID, predicate), - groupBitXorIf(WatchID, predicate) -FROM test.hits -GROUP BY CounterID ORDER BY count() DESC LIMIT 20; - -SELECT 'Simple functions without key'; - -SELECT - min(WatchID) AS min_watch_id, - max(WatchID), - sum(WatchID), - avg(WatchID), - avgWeighted(WatchID, CounterID), - count(WatchID), - groupBitOr(WatchID), - groupBitAnd(WatchID), - groupBitXor(WatchID) -FROM test.hits -ORDER BY min_watch_id DESC LIMIT 20; - -SELECT 'Simple functions with non compilable function without key'; - -SELECT - min(WatchID) AS min_watch_id, - max(WatchID), - sum(WatchID), - sum(toUInt128(WatchID)), - avg(WatchID), - avgWeighted(WatchID, CounterID), - count(WatchID), - groupBitOr(WatchID), - groupBitAnd(WatchID), - groupBitXor(WatchID) -FROM test.hits -ORDER BY min_watch_id DESC LIMIT 20; - -SELECT 'Simple functions if combinator without key'; - -WITH (WatchID % 2 == 0) AS predicate -SELECT - minIf(WatchID, predicate) as min_watch_id, - maxIf(WatchID, predicate), - sumIf(WatchID, predicate), - avgIf(WatchID, predicate), - avgWeightedIf(WatchID, CounterID, predicate), - countIf(WatchID, predicate), - groupBitOrIf(WatchID, predicate), - groupBitAndIf(WatchID, predicate), - groupBitXorIf(WatchID, predicate) -FROM test.hits -ORDER BY min_watch_id -DESC LIMIT 20; diff --git a/tests/queries/1_stateful/00178_quantile_ddsketch.sql b/tests/queries/1_stateful/00178_quantile_ddsketch.sql index 6844dc05cf92..c1ef4b9f4f23 100644 --- a/tests/queries/1_stateful/00178_quantile_ddsketch.sql +++ b/tests/queries/1_stateful/00178_quantile_ddsketch.sql @@ -1,5 +1,5 @@ -SELECT CounterID AS k, round(quantileDDSketch(0.01, 0.5)(ResolutionWidth), 2) FROM test.hits GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; -SELECT CounterID AS k, arrayMap(a -> round(a, 2), quantilesDDSketch(0.01, 0.1, 0.5, 0.9, 0.99, 0.999)(ResolutionWidth)) FROM test.hits GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; +SELECT CounterID AS k, round(quantileDD(0.01, 0.5)(ResolutionWidth), 2) FROM test.hits GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; +SELECT CounterID AS k, arrayMap(a -> round(a, 2), quantilesDD(0.01, 0.1, 0.5, 0.9, 0.99, 0.999)(ResolutionWidth)) FROM test.hits GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; -SELECT CounterID AS k, round(quantileDDSketch(0.01, 0.5)(ResolutionWidth), 2) FROM remote('127.0.0.{1,2}', test.hits) GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; -SELECT CounterID AS k, arrayMap(a -> round(a, 2), quantilesDDSketch(0.01, 0.1, 0.5, 0.9, 0.99, 0.999)(ResolutionWidth)) FROM remote('127.0.0.{1,2}', test.hits) GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; +SELECT CounterID AS k, round(quantileDD(0.01, 0.5)(ResolutionWidth), 2) FROM remote('127.0.0.{1,2}', test.hits) GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; +SELECT CounterID AS k, arrayMap(a -> round(a, 2), quantilesDD(0.01, 0.1, 0.5, 0.9, 0.99, 0.999)(ResolutionWidth)) FROM remote('127.0.0.{1,2}', test.hits) GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; diff --git a/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.reference b/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.reference new file mode 100644 index 000000000000..d05b1f927f4b --- /dev/null +++ b/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.reference @@ -0,0 +1 @@ +0 0 diff --git a/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.sh b/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.sh new file mode 100755 index 000000000000..2e1b807c4967 --- /dev/null +++ b/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# Tags: no-parallel, no-random-settings, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# Test assumes that the whole table is residing in the cache, but `hits_s3` has only 128Mi of cache. +# So we need to create a smaller table. +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS hits_s3_sampled" +$CLICKHOUSE_CLIENT -q "CREATE TABLE hits_s3_sampled AS test.hits_s3" +$CLICKHOUSE_CLIENT -q "INSERT INTO hits_s3_sampled SELECT * FROM test.hits_s3 SAMPLE 0.01" +$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE hits_s3_sampled FINAL" + +$CLICKHOUSE_CLIENT -q "SYSTEM DROP FILESYSTEM CACHE" + +# Warm up the cache +$CLICKHOUSE_CLIENT -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null" +$CLICKHOUSE_CLIENT -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null" + +query_id=02906_read_from_cache_$RANDOM +$CLICKHOUSE_CLIENT --query_id ${query_id} -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null" + +$CLICKHOUSE_CLIENT -nq " + SYSTEM FLUSH LOGS; + + -- AsynchronousReaderIgnoredBytes = 0: no seek-avoiding happened + -- CachedReadBufferReadFromSourceBytes = 0: sanity check to ensure we read only from cache + SELECT ProfileEvents['AsynchronousReaderIgnoredBytes'], ProfileEvents['CachedReadBufferReadFromSourceBytes'] + FROM system.query_log + WHERE query_id = '$query_id' AND type = 'QueryFinish' AND event_date >= yesterday() AND current_database = currentDatabase() +" + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS hits_s3_sampled" diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index f4be6ebcf093..cbd53e5dfceb 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2657 +personal_ws-1.1 en 2724 AArch ACLs ALTERs @@ -12,6 +12,7 @@ ARMv ASLR ASOF ASan +AWND AWST Actian ActionsMenu @@ -214,7 +215,7 @@ DatabaseOrdinaryThreadsActive DateTime DateTimes DbCL -DDSketch +DD Decrypted Deduplicate Deduplication @@ -238,6 +239,7 @@ DistributedSend DockerHub DoubleDelta Doxygen +Durre ECMA Ecto EdgeAngle @@ -289,6 +291,7 @@ ForEach FreeBSD Fuzzer Fuzzers +GHCN GTID GTest Gb @@ -444,6 +447,7 @@ Khanna KittenHouse Klickhouse Kolmogorov +Korzeniewski Kubernetes LDAP LGPL @@ -503,6 +507,7 @@ MaxMind MaxPartCountForPartition MaxPushedDDLEntryID Mbps +McNeal Memcheck MemoryCode MemoryDataAndStack @@ -512,6 +517,7 @@ MemorySanitizer MemoryShared MemoryTracking MemoryVirtual +Menne MergeJoin MergeState MergeTree @@ -556,6 +562,7 @@ NEWDATE NEWDECIMAL NFKC NFKD +NOAA NULLIF NVME NVMe @@ -576,6 +583,7 @@ NetworkSendBytes NetworkSendDrop NetworkSendErrors NetworkSendPackets +Noaa NodeJs NuRaft NumHexagons @@ -656,8 +664,10 @@ OrZero OvercommitTracker PAAMAYIM PCRE +PRCP PREWHERE PROCESSLIST +PSUN PagerDuty ParallelFormattingOutputFormatThreads ParallelFormattingOutputFormatThreadsActive @@ -802,6 +812,7 @@ SIMD SLES SLRU SMALLINT +SNWD SPNEGO SQEs SQLAlchemy @@ -874,11 +885,14 @@ SupersetDocker SystemReplicasThreads SystemReplicasThreadsActive TABLUM +TAVG TCPConnection TCPThreads TDigest TINYINT TLSv +TMAX +TMIN TPCH TSDB TSVRaw @@ -980,7 +994,9 @@ VersionedCollapsingMergeTree VideoContainer ViewAllLink VirtualBox +Vose WALs +WSFG Welch's Werror Wether @@ -999,6 +1015,7 @@ Xeon YAML YAMLRegExpTree YYYY +YYYYMMDD YYYYMMDDToDate YYYYMMDDhhmmssToDateTime Yandex @@ -1570,6 +1587,7 @@ getSetting getSizeOfEnumType getblockinfo getevents +ghcnd github glibc globalIn @@ -1593,6 +1611,7 @@ groupArrayLast groupArrayMovingAvg groupArrayMovingSum groupArraySample +groupArraySorted groupBitAnd groupBitOr groupBitXor @@ -1607,6 +1626,7 @@ grouparraylast grouparraymovingavg grouparraymovingsum grouparraysample +grouparraysorted groupbitand groupbitmap groupbitmapand @@ -1952,6 +1972,7 @@ ngramSimHashCaseInsensitiveUTF ngramSimHashUTF ngrambf ngrams +noaa nonNegativeDerivative noop normalizeQuery @@ -2060,7 +2081,6 @@ prebuild prebuilt preemptable preferServerCiphers -prefertch prefetch prefetchsize preloaded @@ -2137,7 +2157,7 @@ quantiletdigestweighted quantiletiming quantiletimingweighted quantileddsketch -quantileDDSketch +quantileDD quartile queryID queryString @@ -2209,6 +2229,7 @@ reinterpretAsString reinterpretAsUInt reinterpretAsUUID remoteSecure +repivot replaceAll replaceOne replaceRegexpAll @@ -2637,6 +2658,8 @@ uuid varPop varSamp variadic +variantElement +variantType varint varpop varsamp @@ -2703,3 +2726,6 @@ znode znodes zookeeperSessionUptime zstd +iTerm +shortkeys +Shortkeys diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index b2983033e448..36b1db583a8d 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,5 +1,9 @@ +v24.1.2.5-stable 2024-02-02 +v24.1.1.2048-stable 2024-01-30 +v23.12.3.40-stable 2024-02-02 v23.12.2.59-stable 2024-01-05 v23.12.1.1368-stable 2023-12-28 +v23.11.5.29-stable 2024-02-02 v23.11.4.24-stable 2024-01-05 v23.11.3.23-stable 2023-12-21 v23.11.2.11-stable 2023-12-13