Skip to content

Commit

Permalink
Include dbt files in distribution.
Browse files Browse the repository at this point in the history
  • Loading branch information
Erik Oosterop committed Sep 24, 2023
1 parent 7611f60 commit 4de1001
Show file tree
Hide file tree
Showing 12 changed files with 352 additions and 3 deletions.
16 changes: 15 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,20 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
<!-- insertion marker -->
## [Unreleased]

## [0.0.1] 2023-09-13
## [0.0.2] 2023-09-23

### Fixed
- dbt transform files are now included with the distribution package

## [0.0.1] 2023-09-23

### Added
- Initial version. Loads data from [GIE REST API](https://agsi.gie.eu/) into [motherduck](https://motherduck.com/).
- API key and motherduck token need to be set in environment variables, `ENV_GIE_XKEY` and `DESTINATION__MOTHERDUCK__CREDENTIALS` respectively.
- Published on pypi as [ternyxmimosa](https://pypi.org/project/ternyxmimosa/): `pip install ternyxmimosa`
- Within a Python code, use (for example): `import mimosa.cli as mimosa`

## [working_notes]

### Added
- Setup core dependencies:
Expand Down Expand Up @@ -54,3 +67,4 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### Removed
- None

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ disallow_incomplete_defs = true

[tool.poetry]
name = "ternyxmimosa"
version = "0.0.1"
version = "0.0.2"
description = "A minimal modern data stack with working data pipelines in a single Docker container."
authors = ["Erik Oosterop <[email protected]>"]
license = "MIT"
Expand Down
35 changes: 35 additions & 0 deletions src/mimosa/dbt/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: 'gie_european_gas_storage'
version: '0.1'
config-version: 2

profile: 'gie'

require-dbt-version: [">=1.0.0", "<2.0.0"]

model-paths: ["models"]
seed-paths: ["seeds"]
test-paths: ["tests"]
analysis-paths: ["analysis"]
macro-paths: ["macros"]
log-path: "logs"
packages-install-path: "dbt_packages"
target-path: "target"
clean-targets:
- "target"
- "dbt_modules"

# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models

# In this example config, we tell dbt to build all models in the example/
# directory as views. These settings can be overridden in the individual model
# files using the `{{ config(...) }}` macro.
models:
materialized: table
gie:
# Config indicated by + and applies to all files under models/example/
materialized: table
+tags:
- gie
example:
+materialized: view
47 changes: 47 additions & 0 deletions src/mimosa/dbt/models/country.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
with


gas_region as (
select
*
from
{{ source('gie_stage', 'storage') }} r
),

gas_country as (
select
t.gas_day_start::DATE gas_day_start,
t.code,
t.name,
TRY_CAST(t.consumption as DOUBLE) as consumption,
TRY_CAST(t.consumption_full as DOUBLE) as consumption_full,
t._dlt_id as _country_dlt_id,
t._dlt_root_id
from
{{ source('gie_stage', 'storage__children') }} t
),

gas_loading as (
select
a.started_at,
b.value as _dlt_load_id_root
from
{{ source('gie_stage', '_load_info') }} as a
right join
{{ source('gie_stage', '_load_info__loads_ids') }} as b
on
a._dlt_id = b._dlt_parent_id
)

select
gas_loading.started_at as _sdc_extracted_at,
transaction_timestamp() as _sdc_batched_at,
gas_region.name as region,
gas_country.*,
year(gas_country.gas_day_start)::INTEGER as reporting_year,
make_date(2000, month(gas_country.gas_day_start), day(gas_country.gas_day_start)) as reporting_day,
gas_region._dlt_load_id as _root_dlt_load_id
from
gas_region join gas_country on gas_region._dlt_id = gas_country._dlt_root_id
left join
gas_loading on gas_region._dlt_load_id = gas_loading._dlt_load_id_root
23 changes: 23 additions & 0 deletions src/mimosa/dbt/models/country.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
version: 2

models:
- name: country
description: Reporting table containing daily natural gas related data at country (SSO) level.
+tags:
- gie

tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- facility_eic
- gas_day_start

columns:
- name: _sdc_batched_at
description: Timestamp when the data was captured in the database.

- name: _sdc_extracted_at
description: Timestamp when the data was retrieved from the REST API.

- name: consumption
description: Annual natural gas consumption in TWh.
10 changes: 10 additions & 0 deletions src/mimosa/dbt/models/gas_trace.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
select
a.started_at,
b.value as _dlt_load_id_root
from
{{ source('gie_stage', '_load_info') }} as a
right join
{{ source('gie_stage', '_load_info__loads_ids') }} as b
on
a._dlt_id = b._dlt_parent_id

5 changes: 5 additions & 0 deletions src/mimosa/dbt/models/overview.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{% docs __overview__ %}

# Placeholder for GEI European Gas Storage model

{% enddocs %}
26 changes: 26 additions & 0 deletions src/mimosa/dbt/models/sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: 2

sources:
- name: gie_stage
# database: main
# schema: stage_gas
schema: landing
loader: dlt pipeline.
description: "Raw gas storage and in/outlfow data for EU countries provided by GIE (https://agsi.gie.eu/)."
freshness:
warn_after: {count: 12, period: hour}
#error_after: {count: 24, period: hour}
#loaded_at_field: _sdc_extracted_at
tables:
- name: storage
description: Source data at EU/non-EU level.
- name: storage__children
description: Source data at country level.
- name: storage__children__children
description: Source data at company level.
- name: storage__children__children__children
description: Source data at SSO/storage facility level.
- name: _load_info
description: Load info for dlt jobs
- name: _load_info__loads_ids
description: Additional load info for dlt jobs
63 changes: 63 additions & 0 deletions src/mimosa/dbt/models/storage_lvl_facility.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
with


gas_region as (
select
*
from
{{ source('gie_stage', 'storage') }} r
),

gas_storage as (
select
t.gas_day_start::DATE gas_day_start,
split_part(t.url, '/', 2) as country,
c.code as company_eic,
c.name as company_name,
null key_hash,
t.code as facility_eic,
t.name as facility_name,
t.status,
TRY_CAST(t.full AS DOUBLE) as facility_fill_ratio,
TRY_CAST(t.gas_in_storage AS DOUBLE) as gas_in_storage,
TRY_CAST(t.working_gas_volume AS DOUBLE) as working_gas_volume,
TRY_CAST(t.injection as DOUBLE) as injection,
TRY_CAST(t.withdrawal AS DOUBLE) as withdrawal,
t.url,
t._dlt_id as _fac_dlt_id,
t_country._dlt_id as _country_dlt_id,
t._dlt_root_id
from
{{ source('gie_stage', 'storage__children__children__children') }} as t
left join
{{ source('gie_stage', 'storage__children__children') }} as c
on t._dlt_parent_id = c._dlt_id
left join
{{ source('gie_stage', 'storage__children') }} as t_country
on c._dlt_parent_id = t_country._dlt_id
),

gas_loading as (
select
a.started_at,
b.value as _dlt_load_id_root
from
{{ source('gie_stage', '_load_info') }} as a
right join
{{ source('gie_stage', '_load_info__loads_ids') }} as b
on
a._dlt_id = b._dlt_parent_id
)

select
gas_loading.started_at as _sdc_extracted_at,
transaction_timestamp() as _sdc_batched_at,
gas_region.name as region,
gas_storage.*,
year(gas_storage.gas_day_start)::INTEGER as reporting_year,
make_date(2000, month(gas_storage.gas_day_start), day(gas_storage.gas_day_start)) as reporting_day,
gas_region._dlt_load_id as _root_dlt_load_id
from
gas_region join gas_storage on gas_region._dlt_id = gas_storage._dlt_root_id
left join
gas_loading on gas_region._dlt_load_id = gas_loading._dlt_load_id_root
79 changes: 79 additions & 0 deletions src/mimosa/dbt/models/storage_lvl_facility.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
version: 2

models:
- name: rpt_gie_storage
description: Reporting table containing daily gas inventory and flow data at storage facility (SSO) level.
+tags:
- gie

tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- facility_eic
- gas_day_start

columns:
- name: _sdc_batched_at
description: Timestamp when the data was captured in the database.

- name: _sdc_extracted_at
description: Timestamp when the data was retrieved from the REST API.

- name: region
description: Code reflecting the region where the storage facility is located.

- name: key_hash
description: Has of sso_eic and gasdaystart.
tests:
- unique
- not_null

- name: gas_day_start
description: Date of the observation. Ex. the injection field refers to the injection on this date. gasinstorage as per end of the gasdaystart.
tests:
- not_null

- name: country
description: Code reflecting the country where the storage facility is located.

- name: company_eic
description: Unique ID for the company that is the primary owner of the storage facility.
tests:
- not_null

- name: company_name
description: Name of the company.

- name: facility_eic
description: Unique ID for SSO/storage facility.
tests:
- not_null

- name: facility_name
quote: true
description: Name of SSO/storage facility.
tests:
- not_null

- name: status
description: E (estimated) C (confirmed) N (no data)
tests:
- accepted_values:
values: ["E", "C", "N"]
config:
severity: warn

- name: facility_fill_ratio
description: Gas in storage as a percentage of working gas volume. At SSO/facility level.

- name: gas_in_storage
description: Total amount of gas in storage at the facility, status at end of gas day (4 digits accuracy). In TWh.

- name: working_gas_volume
description: Maximum amount that can be stored at the facility (technical capacity) (4 digits accuracy). In TWh.

- name: injection
description: Injection during the day, in GWh/d.

- name: withdrawal
description: Withdrawal during the day, in GWh/d.
27 changes: 27 additions & 0 deletions src/mimosa/dbt/profiles.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles
config:
send_anonymous_usage_stats: False
use_colors: True
gei:
#target: "{{ env_var('MELTANO_ENVIRONMENT', 'dev') }}"
target: "dev"
outputs:
dev:
type: duckdb
#path: "{{ env_var('DBT_DUCKDB_PATH') }}"
path: "/workspaces/mimosa/gas_storage.duckdb"
schema: gas_storage
database: main
threads: 2
#threads: 1
#schema: main
test:
type: duckdb
#path: "{{ env_var('DBT_DUCKDB_PATH') }}"
path: "/project/data/test/data.duckdb"
threads: 4
prod:
type: duckdb
#path: "{{ env_var('DBT_DUCKDB_PATH') }}"
path: "/workspaces/mimosa/gas_storage.duckdb"
threads: 4
Loading

0 comments on commit 4de1001

Please sign in to comment.