From d3b1bcc9595c1da6e9cd03b8a96c0d5e1c2850cd Mon Sep 17 00:00:00 2001 From: liujiwen-up Date: Fri, 20 Dec 2024 16:56:41 +0800 Subject: [PATCH 1/8] feat:add catalog for apache doris --- superset/db_engine_specs/doris.py | 49 ++++++++++++++---- .../unit_tests/db_engine_specs/test_doris.py | 50 +++++++++++++++++++ 2 files changed, 90 insertions(+), 9 deletions(-) diff --git a/superset/db_engine_specs/doris.py b/superset/db_engine_specs/doris.py index e502f5bda2be7..40962f58569b6 100644 --- a/superset/db_engine_specs/doris.py +++ b/superset/db_engine_specs/doris.py @@ -22,11 +22,13 @@ from flask_babel import gettext as __ from sqlalchemy import Float, Integer, Numeric, String, TEXT, types +from sqlalchemy.engine.reflection import Inspector from sqlalchemy.engine.url import URL from sqlalchemy.sql.type_api import TypeEngine from superset.db_engine_specs.mysql import MySQLEngineSpec from superset.errors import SupersetErrorType +from superset.models.core import Database from superset.utils.core import GenericDataType # Regular expressions to catch custom errors @@ -111,6 +113,7 @@ class DorisEngineSpec(MySQLEngineSpec): ) encryption_parameters = {"ssl": "0"} supports_dynamic_schema = True + supports_catalog = supports_dynamic_catalog = True column_type_mappings = ( # type: ignore ( @@ -245,17 +248,45 @@ def adjust_engine_params( catalog: Optional[str] = None, schema: Optional[str] = None, ) -> tuple[URL, dict[str, Any]]: - database = uri.database - if schema and database: - schema = parse.quote(schema, safe="") - if "." in database: - database = database.split(".")[0] + "." + schema - else: - database = "internal." + schema - uri = uri.set(database=database) - + if uri.database and "." in uri.database: + current_catalog, _ = uri.database.split(".", 1) + else: + current_catalog = uri.database + + # In Apache Doris, each catalog has an information_schema for BI tool + # compatibility. See: https://github.com/apache/doris/pull/28919 + adjusted_database = ".".join( + [catalog or current_catalog or "", "information_schema"] + ).rstrip(".") + + uri = uri.set(database=adjusted_database) return uri, connect_args + @classmethod + def get_default_catalog(cls, database: Database) -> Optional[str]: + """ + Return the default catalog. + """ + if database.url_object.database is None: + return None + + return database.url_object.database.split(".")[0] + + @classmethod + def get_catalog_names( + cls, + database: Database, + inspector: Inspector, + ) -> set[str]: + """ + Get all catalogs. + For Doris, the SHOW CATALOGS command returns multiple columns: + CatalogId, CatalogName, Type, IsCurrent, CreateTime, LastUpdateTime, Comment + We need to extract just the CatalogName column. + """ + result = inspector.bind.execute("SHOW CATALOGS") + return {row.CatalogName for row in result} + @classmethod def get_schema_from_engine_params( cls, diff --git a/tests/unit_tests/db_engine_specs/test_doris.py b/tests/unit_tests/db_engine_specs/test_doris.py index ced1a6862b83e..5dbcb228401be 100644 --- a/tests/unit_tests/db_engine_specs/test_doris.py +++ b/tests/unit_tests/db_engine_specs/test_doris.py @@ -16,6 +16,7 @@ # under the License. from typing import Any, Optional +from unittest.mock import Mock import pytest from sqlalchemy import JSON, types @@ -145,3 +146,52 @@ def test_get_schema_from_engine_params() -> None: ) is None ) + + +def test_get_default_catalog() -> None: + """ + Test the ``get_default_catalog`` method. + """ + from superset.db_engine_specs.doris import DorisEngineSpec + from superset.models.core import Database + + database = Mock(spec=Database) + + # Test with catalog.schema format + database.url_object.database = "catalog1.schema1" + assert DorisEngineSpec.get_default_catalog(database) == "catalog1" + + # Test with only catalog format + database.url_object.database = "catalog1" + assert DorisEngineSpec.get_default_catalog(database) == "catalog1" + + # Test with None + database.url_object.database = None + assert DorisEngineSpec.get_default_catalog(database) is None + + +def test_get_catalog_names() -> None: + """ + Test the ``get_catalog_names`` method. + """ + from superset.db_engine_specs.doris import DorisEngineSpec + from superset.models.core import Database + + database = Mock(spec=Database) + inspector = Mock() + + # Mock the execute result + mock_result = [ + Mock(CatalogName="catalog1"), + Mock(CatalogName="catalog2"), + Mock(CatalogName="catalog3"), + ] + inspector.bind.execute.return_value = mock_result + + catalogs = DorisEngineSpec.get_catalog_names(database, inspector) + + # Verify the SQL query + inspector.bind.execute.assert_called_once_with("SHOW CATALOGS") + + # Verify the returned catalog names + assert catalogs == {"catalog1", "catalog2", "catalog3"} From 34403e4e5c32d178124b71453e08b060f801eed7 Mon Sep 17 00:00:00 2001 From: liujiwen-up Date: Fri, 20 Dec 2024 17:43:10 +0800 Subject: [PATCH 2/8] feat:add catalog for apache doris --- superset/db_engine_specs/doris.py | 2 +- tests/unit_tests/db_engine_specs/test_doris.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/superset/db_engine_specs/doris.py b/superset/db_engine_specs/doris.py index 40962f58569b6..fb0f3b74f613d 100644 --- a/superset/db_engine_specs/doris.py +++ b/superset/db_engine_specs/doris.py @@ -251,7 +251,7 @@ def adjust_engine_params( if uri.database and "." in uri.database: current_catalog, _ = uri.database.split(".", 1) else: - current_catalog = uri.database + current_catalog = "internal" # In Apache Doris, each catalog has an information_schema for BI tool # compatibility. See: https://github.com/apache/doris/pull/28919 diff --git a/tests/unit_tests/db_engine_specs/test_doris.py b/tests/unit_tests/db_engine_specs/test_doris.py index 5dbcb228401be..573f561d8928e 100644 --- a/tests/unit_tests/db_engine_specs/test_doris.py +++ b/tests/unit_tests/db_engine_specs/test_doris.py @@ -86,25 +86,25 @@ def test_get_column_spec( ( "doris://user:password@host/db1", {"param1": "some_value"}, - "db1", + "internal.information_schema", {"param1": "some_value"}, ), ( "pydoris://user:password@host/db1", {"param1": "some_value"}, - "db1", + "internal.information_schema", {"param1": "some_value"}, ), ( "doris://user:password@host/catalog1.db1", {"param1": "some_value"}, - "catalog1.db1", + "catalog1.information_schema", {"param1": "some_value"}, ), ( "pydoris://user:password@host/catalog1.db1", {"param1": "some_value"}, - "catalog1.db1", + "catalog1.information_schema", {"param1": "some_value"}, ), ], @@ -121,6 +121,11 @@ def test_adjust_engine_params( returned_url, returned_connect_args = DorisEngineSpec.adjust_engine_params( url, connect_args ) + # # Update expected schema to include .information_schema + # if return_schema not in ".": + # expected_schema = f"{return_schema}.information_schema" + # else: + # expected_schema = return_schema assert returned_url.database == return_schema assert returned_connect_args == return_connect_args From 2e50b17b0c17cea6751cce7d710be90b019c7d50 Mon Sep 17 00:00:00 2001 From: liujiwen-up Date: Mon, 23 Dec 2024 15:00:59 +0800 Subject: [PATCH 3/8] fix test_doris case --- .../unit_tests/db_engine_specs/test_doris.py | 90 +++++++++++-------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/tests/unit_tests/db_engine_specs/test_doris.py b/tests/unit_tests/db_engine_specs/test_doris.py index 573f561d8928e..d79bc7dcbbd24 100644 --- a/tests/unit_tests/db_engine_specs/test_doris.py +++ b/tests/unit_tests/db_engine_specs/test_doris.py @@ -121,16 +121,21 @@ def test_adjust_engine_params( returned_url, returned_connect_args = DorisEngineSpec.adjust_engine_params( url, connect_args ) - # # Update expected schema to include .information_schema - # if return_schema not in ".": - # expected_schema = f"{return_schema}.information_schema" - # else: - # expected_schema = return_schema + assert returned_url.database == return_schema assert returned_connect_args == return_connect_args -def test_get_schema_from_engine_params() -> None: +@pytest.mark.parametrize( + "url,expected_schema", + [ + ("doris://localhost:9030/hive.test", "test"), + ("doris://localhost:9030/hive", None), + ], +) +def test_get_schema_from_engine_params( + url: str, expected_schema: Optional[str] +) -> None: """ Test the ``get_schema_from_engine_params`` method. """ @@ -138,22 +143,24 @@ def test_get_schema_from_engine_params() -> None: assert ( DorisEngineSpec.get_schema_from_engine_params( - make_url("doris://localhost:9030/hive.test"), - {}, - ) - == "test" - ) - - assert ( - DorisEngineSpec.get_schema_from_engine_params( - make_url("doris://localhost:9030/hive"), + make_url(url), {}, ) - is None + == expected_schema ) -def test_get_default_catalog() -> None: +@pytest.mark.parametrize( + "database_value,expected_catalog", + [ + ("catalog1.schema1", "catalog1"), + ("catalog1", "catalog1"), + (None, None), + ], +) +def test_get_default_catalog( + database_value: Optional[str], expected_catalog: Optional[str] +) -> None: """ Test the ``get_default_catalog`` method. """ @@ -161,21 +168,35 @@ def test_get_default_catalog() -> None: from superset.models.core import Database database = Mock(spec=Database) + database.url_object.database = database_value - # Test with catalog.schema format - database.url_object.database = "catalog1.schema1" - assert DorisEngineSpec.get_default_catalog(database) == "catalog1" - - # Test with only catalog format - database.url_object.database = "catalog1" - assert DorisEngineSpec.get_default_catalog(database) == "catalog1" + assert DorisEngineSpec.get_default_catalog(database) == expected_catalog - # Test with None - database.url_object.database = None - assert DorisEngineSpec.get_default_catalog(database) is None - -def test_get_catalog_names() -> None: +@pytest.mark.parametrize( + "mock_catalogs,expected_result", + [ + ( + [ + Mock(CatalogName="catalog1"), + Mock(CatalogName="catalog2"), + Mock(CatalogName="catalog3"), + ], + {"catalog1", "catalog2", "catalog3"}, + ), + ( + [Mock(CatalogName="single_catalog")], + {"single_catalog"}, + ), + ( + [], + set(), + ), + ], +) +def test_get_catalog_names( + mock_catalogs: list[Mock], expected_result: set[str] +) -> None: """ Test the ``get_catalog_names`` method. """ @@ -184,14 +205,7 @@ def test_get_catalog_names() -> None: database = Mock(spec=Database) inspector = Mock() - - # Mock the execute result - mock_result = [ - Mock(CatalogName="catalog1"), - Mock(CatalogName="catalog2"), - Mock(CatalogName="catalog3"), - ] - inspector.bind.execute.return_value = mock_result + inspector.bind.execute.return_value = mock_catalogs catalogs = DorisEngineSpec.get_catalog_names(database, inspector) @@ -199,4 +213,4 @@ def test_get_catalog_names() -> None: inspector.bind.execute.assert_called_once_with("SHOW CATALOGS") # Verify the returned catalog names - assert catalogs == {"catalog1", "catalog2", "catalog3"} + assert catalogs == expected_result From 8e3382819c1af62c6afdb8e32dde18fa09caa4d7 Mon Sep 17 00:00:00 2001 From: liujiwen-up Date: Tue, 24 Dec 2024 11:34:35 +0800 Subject: [PATCH 4/8] Optimizing the code --- superset/db_engine_specs/doris.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/superset/db_engine_specs/doris.py b/superset/db_engine_specs/doris.py index fb0f3b74f613d..6357bcf8f460b 100644 --- a/superset/db_engine_specs/doris.py +++ b/superset/db_engine_specs/doris.py @@ -248,17 +248,16 @@ def adjust_engine_params( catalog: Optional[str] = None, schema: Optional[str] = None, ) -> tuple[URL, dict[str, Any]]: - if uri.database and "." in uri.database: - current_catalog, _ = uri.database.split(".", 1) + if catalog: + pass + elif uri.database and "." in uri.database: + catalog, _ = uri.database.split(".", 1) else: - current_catalog = "internal" + catalog = "internal" # In Apache Doris, each catalog has an information_schema for BI tool # compatibility. See: https://github.com/apache/doris/pull/28919 - adjusted_database = ".".join( - [catalog or current_catalog or "", "information_schema"] - ).rstrip(".") - + adjusted_database = ".".join([catalog or "", "information_schema"]) uri = uri.set(database=adjusted_database) return uri, connect_args From 05957e4ae6a03f480e504497003128cac47d2a7b Mon Sep 17 00:00:00 2001 From: liujiwen-up Date: Tue, 24 Dec 2024 18:50:14 +0800 Subject: [PATCH 5/8] fix schema --- superset/db_engine_specs/doris.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/superset/db_engine_specs/doris.py b/superset/db_engine_specs/doris.py index 6357bcf8f460b..4cc1d9ce554be 100644 --- a/superset/db_engine_specs/doris.py +++ b/superset/db_engine_specs/doris.py @@ -257,7 +257,10 @@ def adjust_engine_params( # In Apache Doris, each catalog has an information_schema for BI tool # compatibility. See: https://github.com/apache/doris/pull/28919 - adjusted_database = ".".join([catalog or "", "information_schema"]) + if schema: + adjusted_database = ".".join([catalog or "", schema]) + else: + adjusted_database = ".".join([catalog or "", "information_schema"]) uri = uri.set(database=adjusted_database) return uri, connect_args From 1d7483b58e326d1389ad121676c8a8128c572aa8 Mon Sep 17 00:00:00 2001 From: Jiwen liu <61498169+liujiwen-up@users.noreply.github.com> Date: Mon, 30 Dec 2024 20:07:24 +0800 Subject: [PATCH 6/8] Update superset/db_engine_specs/doris.py Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> --- superset/db_engine_specs/doris.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/superset/db_engine_specs/doris.py b/superset/db_engine_specs/doris.py index 4cc1d9ce554be..d14d21661b558 100644 --- a/superset/db_engine_specs/doris.py +++ b/superset/db_engine_specs/doris.py @@ -257,11 +257,9 @@ def adjust_engine_params( # In Apache Doris, each catalog has an information_schema for BI tool # compatibility. See: https://github.com/apache/doris/pull/28919 - if schema: - adjusted_database = ".".join([catalog or "", schema]) - else: - adjusted_database = ".".join([catalog or "", "information_schema"]) - uri = uri.set(database=adjusted_database) + schema = schema or "information_schema" + database = ".".join([catalog or "", schema]) + uri = uri.set(database=database) return uri, connect_args @classmethod From 56d26609ff0c21a8bbdf4ac3998b75ede7814503 Mon Sep 17 00:00:00 2001 From: Ville Brofeldt <33317356+villebro@users.noreply.github.com> Date: Mon, 6 Jan 2025 09:30:31 -0800 Subject: [PATCH 7/8] Update superset/db_engine_specs/doris.py Co-authored-by: Beto Dealmeida --- superset/db_engine_specs/doris.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/db_engine_specs/doris.py b/superset/db_engine_specs/doris.py index d14d21661b558..36ecbe5bce489 100644 --- a/superset/db_engine_specs/doris.py +++ b/superset/db_engine_specs/doris.py @@ -258,7 +258,7 @@ def adjust_engine_params( # In Apache Doris, each catalog has an information_schema for BI tool # compatibility. See: https://github.com/apache/doris/pull/28919 schema = schema or "information_schema" - database = ".".join([catalog or "", schema]) + database = ".".join([catalog, schema]) uri = uri.set(database=database) return uri, connect_args From a11fea599ef306fa716a84e8d10d4460df0b9d95 Mon Sep 17 00:00:00 2001 From: Ville Brofeldt <33317356+villebro@users.noreply.github.com> Date: Mon, 6 Jan 2025 09:47:06 -0800 Subject: [PATCH 8/8] Update superset/db_engine_specs/doris.py --- superset/db_engine_specs/doris.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/db_engine_specs/doris.py b/superset/db_engine_specs/doris.py index 36ecbe5bce489..d14d21661b558 100644 --- a/superset/db_engine_specs/doris.py +++ b/superset/db_engine_specs/doris.py @@ -258,7 +258,7 @@ def adjust_engine_params( # In Apache Doris, each catalog has an information_schema for BI tool # compatibility. See: https://github.com/apache/doris/pull/28919 schema = schema or "information_schema" - database = ".".join([catalog, schema]) + database = ".".join([catalog or "", schema]) uri = uri.set(database=database) return uri, connect_args