Skip to content

Commit

Permalink
Folder parser meta fields and docs (#142)
Browse files Browse the repository at this point in the history
  • Loading branch information
gouline authored Aug 23, 2022
1 parent 7f4c1fe commit a78e863
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 94 deletions.
33 changes: 13 additions & 20 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,9 @@ There are two approaches provided by this library to read your dbt project:
1. Artifacts
^^^^^^^^^^^^

The recommended approach is to instruct dbt-metabase to read your ``manifest.json``, a
`dbt artifact`_ containing the full representation of your dbt project's resources. If
your dbt project uses multiple schemas, multiple databases or model aliases, you must use
this approach.
You can instruct dbt-metabase to read your ``manifest.json``, a `dbt artifact`_ containing
the full representation of your dbt project's resources. If your dbt project uses multiple schemas,
multiple databases or model aliases, you must use this approach.

Note that you you have to run ``dbt compile --target prod`` or any of the other dbt commands
listed in the dbt documentation above to get a fresh copy of your ``manifest.json``. Remember
Expand All @@ -203,10 +202,9 @@ project).
2. Direct parsing
^^^^^^^^^^^^^^^^^

The second alternative is to provide the path to your dbt project root folder
using the argument ``--dbt_path``. dbt-metabase will then look for all .yml files
and parse your documentation and tests directly from there. It will not support
dbt projects with custom schemas.
Alternatively, you can provide the path to your dbt project root folder using the argument
``--dbt_path``. dbt-metabase will then look for all .yml files and parse your documentation
and tests directly from there. It does not support dbt projects with custom schemas.

Semantic Types
--------------
Expand Down Expand Up @@ -254,15 +252,10 @@ See `documentation`_ for a more complete list.
Foreign Keys
------------

By default, dbt-metabase parses the relationship tests to figure out PK-FK
relationships between two tables. Alternatively, you can also use the meta
fields ``fk_target_table`` and ``fk_target_field`` to set the relationships
just like semantic types. You can set the ``semantic_type`` as ``type/FK``
without setting those two fields, but you cannot set those two fields
without the ``semantic_type`` set to ``type/FK``. If both, meta fields
and relationship test, are set for a field, meta fields take precedence.

Here is an example of how you could to this:
Built-in relationship tests are the recommended way of defining foreign keys,
however you can alternatively use ``fk_target_table`` and ``fk_target_field``
meta fields (``semantic_type`` is optional and will be inferred). If both are
set for a column, meta fields take precedence.

.. code-block:: yaml
Expand All @@ -273,9 +266,9 @@ Here is an example of how you could to this:
metabase.fk_target_table: analytics_dims.dim_countries
metabase.fk_target_field: id
Importantly, the ``fk_target_table`` needs to be in the format
``schema_name.table_name``. If the model has an alias, use the alias, not
the original model name here.
You can provide ``fk_target_table`` in the format ``schema_name.table_name`` or
just ``table_name`` to use the current schema. If your model has an alias, provide
that alias (rather than the original name).

Visibility Types
----------------
Expand Down
16 changes: 9 additions & 7 deletions dbtmetabase/models/metabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
from typing import Sequence, Optional, MutableMapping

# Allowed metabase.* fields
# Should be covered by attributes in the MetabaseColumn class
METABASE_META_FIELDS = [
"special_type",
"semantic_type",
"visibility_type",
# Must be covered by MetabaseModel attributes
METABASE_MODEL_META_FIELDS = [
"display_name",
"fk_target_table",
"fk_target_field",
"visibility_type",
"points_of_interest",
"caveats",
]
# Must be covered by MetabaseColumn attributes
METABASE_COLUMN_META_FIELDS = METABASE_MODEL_META_FIELDS + [
"semantic_type",
]


Expand Down
54 changes: 51 additions & 3 deletions dbtmetabase/parsers/dbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from os.path import expanduser
from typing import Optional, Mapping, MutableMapping, Iterable, Tuple, List

from ..models.metabase import METABASE_META_FIELDS, MetabaseModel, NullValue
from ..logger.logging import logger
from ..models.metabase import MetabaseModel, MetabaseColumn, NullValue


class DbtReader(metaclass=ABCMeta):
Expand Down Expand Up @@ -45,20 +46,67 @@ def read_models(
) -> Tuple[List[MetabaseModel], MutableMapping]:
pass

def set_column_foreign_key(
self,
column: Mapping,
metabase_column: MetabaseColumn,
table: Optional[str],
field: Optional[str],
schema: Optional[str],
):
"""Sets foreign key target on a column.
Args:
column (Mapping): Schema column definition.
metabase_column (MetabaseColumn): Metabase column definition.
table (str): Foreign key target table.
field (str): Foreign key target field.
schema (str): Current schema name.
"""
# Meta fields take precedence
meta = column.get("meta", {})
table = meta.get("metabase.fk_target_table", table)
field = meta.get("metabase.fk_target_field", field)

if not table or not field:
if table or field:
logger().warning(
"Foreign key requires table and field for column %s",
metabase_column.name,
)
return

table_path = table.split(".")
if len(table_path) == 1 and schema:
table_path.insert(0, schema)

metabase_column.semantic_type = "type/FK"
metabase_column.fk_target_table = ".".join(
[x.strip('"').upper() for x in table_path]
)
metabase_column.fk_target_field = field.strip('"').upper()
logger().debug(
"Relation from %s to %s.%s",
metabase_column.name,
metabase_column.fk_target_table,
metabase_column.fk_target_field,
)

@staticmethod
def read_meta_fields(obj: Mapping) -> Mapping:
def read_meta_fields(obj: Mapping, fields: List) -> Mapping:
"""Reads meta fields from a schem object.
Args:
obj (Mapping): Schema object.
fields (List): List of fields to read.
Returns:
Mapping: Field values.
"""

vals = {}
meta = obj.get("meta", [])
for field in METABASE_META_FIELDS:
for field in fields:
if f"metabase.{field}" in meta:
value = meta[f"metabase.{field}"]
vals[field] = value if value is not None else NullValue
Expand Down
73 changes: 28 additions & 45 deletions dbtmetabase/parsers/dbt_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@
from pathlib import Path
from typing import List, Mapping, MutableMapping, Optional, Tuple

from ..models.metabase import MetabaseModel, MetabaseColumn, ModelType
from ..logger.logging import logger
from ..models.metabase import (
MetabaseModel,
MetabaseColumn,
ModelType,
METABASE_MODEL_META_FIELDS,
METABASE_COLUMN_META_FIELDS,
)
from .dbt import DbtReader


Expand Down Expand Up @@ -130,10 +136,6 @@ def _read_model(
metabase_columns.append(self._read_column(column, schema))

description = model.get("description", "")
meta = model.get("meta", {})
points_of_interest = meta.get("metabase.points_of_interest")
caveats = meta.get("metabase.caveats")

if include_tags:
tags = model.get("tags", [])
if tags:
Expand All @@ -144,7 +146,6 @@ def _read_model(

# Resolved name is what the name will be in the database
resolved_name = model.get("alias", model.get("identifier"))
display_name = meta.get("metabase.display_name")
dbt_name = None
if not resolved_name:
resolved_name = model["name"]
Expand All @@ -153,15 +154,13 @@ def _read_model(

return MetabaseModel(
name=resolved_name,
display_name=display_name,
schema=schema,
description=description,
points_of_interest=points_of_interest,
caveats=caveats,
columns=metabase_columns,
model_type=model_type,
source=source,
dbt_name=dbt_name,
**self.read_meta_fields(model, METABASE_MODEL_META_FIELDS),
)

def _read_column(self, column: Mapping, schema: str) -> MetabaseColumn:
Expand All @@ -178,51 +177,35 @@ def _read_column(self, column: Mapping, schema: str) -> MetabaseColumn:
column_name = column.get("name", "").upper().strip('"')
column_description = column.get("description")

meta = column.get("meta", {})
display_name = meta.get("metabase.display_name")

# Set explicitly (relationships override this)
fk_to = meta.get("metabase.foreign_key_to")
fk_field = meta.get("metabase.foreign_key_field")

metabase_column = MetabaseColumn(
name=column_name,
description=column_description,
display_name=display_name,
**self.read_meta_fields(column, METABASE_COLUMN_META_FIELDS),
)

fk_target_table = None
fk_target_field = None

for test in column.get("tests") or []:
if isinstance(test, dict):
if "relationships" in test:
relationships = test["relationships"]
fk_to = relationships["to"]
fk_field = relationships["field"]

if fk_to and fk_field:
fk_table = self.parse_ref(fk_to)
if fk_table:
metabase_column.semantic_type = "type/FK"
metabase_column.fk_target_table = f"{schema}.{fk_table}".upper()
metabase_column.fk_target_field = str(fk_field).upper().strip('"')
logger().debug(
"Relation from %s to %s.%s",
column.get("name", "").upper().strip('"'),
metabase_column.fk_target_table,
metabase_column.fk_target_field,
)
else:
logger().warning(
"Could not resolve foreign key target table for column %s",
metabase_column.name,
)
elif fk_to or fk_field:
logger().warning(
"Foreign key 'to' and 'field' must be provided for column %s",
metabase_column.name,
)

for field, value in DbtReader.read_meta_fields(column).items():
setattr(metabase_column, field, value)
fk_target_table = self.parse_ref(relationships["to"])
if not fk_target_table:
logger().warning(
"Could not resolve foreign key target table for column %s",
metabase_column.name,
)
continue
fk_target_field = relationships["field"]

self.set_column_foreign_key(
column=column,
metabase_column=metabase_column,
table=fk_target_table,
field=fk_target_field,
schema=schema,
)

return metabase_column

Expand Down
36 changes: 17 additions & 19 deletions dbtmetabase/parsers/dbt_manifest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import json
from typing import List, Tuple, Mapping, Optional, MutableMapping

from ..models.metabase import MetabaseModel, MetabaseColumn, ModelType
from ..logger.logging import logger
from ..models.metabase import (
MetabaseModel,
MetabaseColumn,
ModelType,
METABASE_MODEL_META_FIELDS,
METABASE_COLUMN_META_FIELDS,
)
from .dbt import DbtReader


Expand Down Expand Up @@ -260,9 +266,6 @@ def _read_model(
)

description = model.get("description", "")
meta = model.get("meta", {})
points_of_interest = meta.get("metabase.points_of_interest")
caveats = meta.get("metabase.caveats")

if include_tags:
tags = model.get("tags", [])
Expand All @@ -289,18 +292,16 @@ def _read_model(
name=resolved_name,
schema=model["schema"].upper(),
description=description,
points_of_interest=points_of_interest,
caveats=caveats,
columns=metabase_column,
model_type=model_type,
unique_id=unique_id,
source=source,
dbt_name=dbt_name,
**DbtReader.read_meta_fields(model),
**self.read_meta_fields(model, METABASE_MODEL_META_FIELDS),
)

@staticmethod
def _read_column(
self,
column: Mapping,
relationship: Optional[Mapping],
) -> MetabaseColumn:
Expand All @@ -319,18 +320,15 @@ def _read_column(
metabase_column = MetabaseColumn(
name=column_name,
description=column_description,
**DbtReader.read_meta_fields(column),
**self.read_meta_fields(column, METABASE_COLUMN_META_FIELDS),
)

if relationship:
metabase_column.semantic_type = "type/FK"
metabase_column.fk_target_table = relationship["fk_target_table"].upper()
metabase_column.fk_target_field = relationship["fk_target_field"].upper()
logger().debug(
"Relation from %s to %s.%s",
column.get("name", "").upper().strip('"'),
metabase_column.fk_target_table,
metabase_column.fk_target_field,
)
self.set_column_foreign_key(
column=column,
metabase_column=metabase_column,
table=relationship["fk_target_table"] if relationship else None,
field=relationship["fk_target_field"] if relationship else None,
schema=self.schema,
)

return metabase_column

0 comments on commit a78e863

Please sign in to comment.