Skip to content

Commit

Permalink
Merge pull request #7 from EJOOSTEROP/wip_investigate_speed
Browse files Browse the repository at this point in the history
Correct missing dates functionality
  • Loading branch information
EJOOSTEROP authored Mar 2, 2024
2 parents f67cc10 + 7f8db98 commit afbc1fc
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 144 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
<!-- insertion marker -->
## [Unreleased]

## [0.4.3] 2024-03-01
### Fixed
- Correct missing dates with flexible start and end dates (add where clause to query)

## [0.4.2] 2024-02-23
### Fixed
- Date conversion to integer and back
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ disallow_incomplete_defs = true

[tool.poetry]
name = "ternyxmimosa"
version = "0.4.2"
version = "0.4.3"
description = "A minimal modern data stack with working data pipelines in a single Docker container."
authors = ["Erik Oosterop <[email protected]>"]
license = "MIT"
Expand Down
24 changes: 15 additions & 9 deletions src/mimosa/dateswithoutdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,16 @@ def get_existing_dates_as_integer(start_dt=date(2018, 1, 1), end_dt=None):
Returns the list of integer interpretation of dates for which data exists,
plus the day before start date and the day after end date.
"""
if not end_dt:
end_dt = datetime.datetime.now(tz=datetime.timezone.utc).date()

# connect to MotherDuck, string slighly different than for dlt hence the replace
conn_str = os.environ["DESTINATION__MOTHERDUCK__CREDENTIALS"].replace("/", "")
con = duckdb.connect(conn_str)
query = "select distinct gas_day_start from landing.storage order by 1 asc"
result = con.execute(query).fetchall()
query = "select distinct gas_day_start from landing.storage where gas_day_start >= ? and gas_day_start <= ? order by 1 asc"
result = con.execute(
query, [start_dt.strftime("%Y-%m-%d"), end_dt.strftime("%Y-%m-%d")]
).fetchall()

# convert results to list of integers. Only consider first column.
dates_list = [
Expand All @@ -69,14 +74,15 @@ def get_existing_dates_as_integer(start_dt=date(2018, 1, 1), end_dt=None):
for date in result
]

min_date = date_to_integer(start_dt)
if dates_list[0] > min_date:
dates_list.insert(0, min_date - 1)
if len(dates_list) == 0:
dates_list.insert(0, date_to_integer(start_dt) - 1)
dates_list.append(date_to_integer(end_dt) + 1)
else:
min_date = date_to_integer(start_dt)
if dates_list[0] > min_date:
dates_list.insert(0, min_date - 1)

if not end_dt:
max_date = date_to_integer(
datetime.datetime.now(tz=datetime.timezone.utc).date()
)
max_date = date_to_integer(end_dt)
if dates_list[-1] < max_date:
dates_list.append(max_date + 1)

Expand Down
140 changes: 7 additions & 133 deletions src/mimosa/schemas/export/gas_storage.schema.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
version: 16
version_hash: up2lgxqJST5vZxfprWaNmeOvIj0cV97uBpdWbMQTyqI=
version: 29
version_hash: QWZ0isizlDRzp1d4uuuDocQB2yGgP8ceQI20u+XnTB8=
engine_version: 8
name: gas_storage
tables:
Expand Down Expand Up @@ -611,7 +611,7 @@ tables:
data_type: bool
nullable: true
parent: _load_info__load_packages__tables
_load_info__metrics___1708734696_877426:
_load_info__metrics___1709340056_3433225:
columns:
started_at:
data_type: timestamp
Expand All @@ -631,7 +631,7 @@ tables:
nullable: false
unique: true
parent: _load_info
_load_info__metrics___1708739392_789778:
_load_info__metrics___1709340283_6312084:
columns:
started_at:
data_type: timestamp
Expand All @@ -651,127 +651,7 @@ tables:
nullable: false
unique: true
parent: _load_info
_load_info__metrics___1708739419_239704:
columns:
started_at:
data_type: timestamp
nullable: true
finished_at:
data_type: timestamp
nullable: true
_dlt_parent_id:
data_type: text
nullable: false
foreign_key: true
_dlt_list_idx:
data_type: bigint
nullable: false
_dlt_id:
data_type: text
nullable: false
unique: true
parent: _load_info
_load_info__metrics___1708739448_7387874:
columns:
started_at:
data_type: timestamp
nullable: true
finished_at:
data_type: timestamp
nullable: true
_dlt_parent_id:
data_type: text
nullable: false
foreign_key: true
_dlt_list_idx:
data_type: bigint
nullable: false
_dlt_id:
data_type: text
nullable: false
unique: true
parent: _load_info
_load_info__metrics___1708739476_5063293:
columns:
started_at:
data_type: timestamp
nullable: true
finished_at:
data_type: timestamp
nullable: true
_dlt_parent_id:
data_type: text
nullable: false
foreign_key: true
_dlt_list_idx:
data_type: bigint
nullable: false
_dlt_id:
data_type: text
nullable: false
unique: true
parent: _load_info
_load_info__metrics___1708739502_7937295:
columns:
started_at:
data_type: timestamp
nullable: true
finished_at:
data_type: timestamp
nullable: true
_dlt_parent_id:
data_type: text
nullable: false
foreign_key: true
_dlt_list_idx:
data_type: bigint
nullable: false
_dlt_id:
data_type: text
nullable: false
unique: true
parent: _load_info
_load_info__metrics___1708739534_017471:
columns:
started_at:
data_type: timestamp
nullable: true
finished_at:
data_type: timestamp
nullable: true
_dlt_parent_id:
data_type: text
nullable: false
foreign_key: true
_dlt_list_idx:
data_type: bigint
nullable: false
_dlt_id:
data_type: text
nullable: false
unique: true
parent: _load_info
_load_info__metrics___1708746455_4630005:
columns:
started_at:
data_type: timestamp
nullable: true
finished_at:
data_type: timestamp
nullable: true
_dlt_parent_id:
data_type: text
nullable: false
foreign_key: true
_dlt_list_idx:
data_type: bigint
nullable: false
_dlt_id:
data_type: text
nullable: false
unique: true
parent: _load_info
_load_info__metrics___1708746486_9415627:
_load_info__metrics___1709340314_1349206:
columns:
started_at:
data_type: timestamp
Expand Down Expand Up @@ -817,14 +697,8 @@ normalizers:
storage:
_dlt_id: _dlt_root_id
previous_hashes:
- gyQeK13R6kx0TYoqbjD624/aaRwICbJlEKVa7Ra2NRA=
- cHg5JFW1o1My2VMCTTn1p1IO8ndcvBscQ1buzcEHgWQ=
- hfEnEVGhLErH3XfS3DmIeFNrZnKGyC3GedfJg/SOoaA=
- 50ezOTWMSH/694XnvMa22u4j6D+m6T0ofgoKf5pFE8A=
- +PKcCyg8cL/XXhT+RV+mpOeVqQL1I15Th+VGPMRarmw=
- c1rUo5Au5MI03BthCEWomQOW8iK5WYsj/rlAgPZp0V4=
- +m8M4k2rsONnQj91vwYhvesufa0nUDJiDTMnu/dwNmE=
- vMe/lW70AfmPzPU5xrieydZWvzSnOFNCSY/YdsxRA1k=
- M5YA/vqYt72ValPvsIryBPpB4NsobBzlKaOqS3rOUx0=
- OgaYRoSWU+EIKXUbVzmkjRfjCfpt/3TwkUEpb3nj73I=
- FYwkDb/AIJAQNsYG273Odt2ZE6XWKNqDKGvx60t+qso=
- zDYyBWKq9Ahg2vP82LN0x3veOwMZ2KM3Ue+6T13d8Bc=
- KIWO4Ei4vYQeAxDYxo7GbLDj7jCzeyz+mGtkhVKYDCk=
Expand Down
6 changes: 5 additions & 1 deletion src/mimosa/wip.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Loads data incrementally into DuckDB.
"""

from datetime import date # F401

from mimosa.pipelines import GEI

Expand All @@ -15,6 +14,7 @@

run_this = True
if run_this:
"""
pipeline.run_landing_pipeline(
gas_date=date(2024, 2, 15), # still from 2019-01-01 to 2019-09-01
to_gas_date=date(2024, 2, 16),
Expand All @@ -25,6 +25,10 @@
reporting_update=reporting_update,
)
"""
pipeline.run_landing_pipeline(
reporting_update=reporting_update,
)
"""
else:
if destination == "motherduck":
pipeline.run_landing_pipeline(reporting_update=reporting_update)
Expand Down

0 comments on commit afbc1fc

Please sign in to comment.