From dad120bdfa1652ed5240d576b660b341e2f22caa Mon Sep 17 00:00:00 2001 From: Erik Oosterop Date: Sat, 2 Mar 2024 01:39:56 +0000 Subject: [PATCH 1/2] Correct missing dates (add where clause to query). --- src/mimosa/dateswithoutdata.py | 24 +-- .../schemas/export/gas_storage.schema.yaml | 140 +----------------- src/mimosa/wip.py | 6 +- 3 files changed, 27 insertions(+), 143 deletions(-) diff --git a/src/mimosa/dateswithoutdata.py b/src/mimosa/dateswithoutdata.py index 9a733f5..4dffe23 100644 --- a/src/mimosa/dateswithoutdata.py +++ b/src/mimosa/dateswithoutdata.py @@ -55,11 +55,16 @@ def get_existing_dates_as_integer(start_dt=date(2018, 1, 1), end_dt=None): Returns the list of integer interpretation of dates for which data exists, plus the day before start date and the day after end date. """ + if not end_dt: + end_dt = datetime.datetime.now(tz=datetime.timezone.utc).date() + # connect to MotherDuck, string slighly different than for dlt hence the replace conn_str = os.environ["DESTINATION__MOTHERDUCK__CREDENTIALS"].replace("/", "") con = duckdb.connect(conn_str) - query = "select distinct gas_day_start from landing.storage order by 1 asc" - result = con.execute(query).fetchall() + query = "select distinct gas_day_start from landing.storage where gas_day_start >= ? and gas_day_start <= ? order by 1 asc" + result = con.execute( + query, [start_dt.strftime("%Y-%m-%d"), end_dt.strftime("%Y-%m-%d")] + ).fetchall() # convert results to list of integers. Only consider first column. dates_list = [ @@ -69,14 +74,15 @@ def get_existing_dates_as_integer(start_dt=date(2018, 1, 1), end_dt=None): for date in result ] - min_date = date_to_integer(start_dt) - if dates_list[0] > min_date: - dates_list.insert(0, min_date - 1) + if len(dates_list) == 0: + dates_list.insert(0, date_to_integer(start_dt) - 1) + dates_list.append(date_to_integer(end_dt) + 1) + else: + min_date = date_to_integer(start_dt) + if dates_list[0] > min_date: + dates_list.insert(0, min_date - 1) - if not end_dt: - max_date = date_to_integer( - datetime.datetime.now(tz=datetime.timezone.utc).date() - ) + max_date = date_to_integer(end_dt) if dates_list[-1] < max_date: dates_list.append(max_date + 1) diff --git a/src/mimosa/schemas/export/gas_storage.schema.yaml b/src/mimosa/schemas/export/gas_storage.schema.yaml index f950726..a4d8b61 100644 --- a/src/mimosa/schemas/export/gas_storage.schema.yaml +++ b/src/mimosa/schemas/export/gas_storage.schema.yaml @@ -1,5 +1,5 @@ -version: 16 -version_hash: up2lgxqJST5vZxfprWaNmeOvIj0cV97uBpdWbMQTyqI= +version: 29 +version_hash: QWZ0isizlDRzp1d4uuuDocQB2yGgP8ceQI20u+XnTB8= engine_version: 8 name: gas_storage tables: @@ -611,7 +611,7 @@ tables: data_type: bool nullable: true parent: _load_info__load_packages__tables - _load_info__metrics___1708734696_877426: + _load_info__metrics___1709340056_3433225: columns: started_at: data_type: timestamp @@ -631,7 +631,7 @@ tables: nullable: false unique: true parent: _load_info - _load_info__metrics___1708739392_789778: + _load_info__metrics___1709340283_6312084: columns: started_at: data_type: timestamp @@ -651,127 +651,7 @@ tables: nullable: false unique: true parent: _load_info - _load_info__metrics___1708739419_239704: - columns: - started_at: - data_type: timestamp - nullable: true - finished_at: - data_type: timestamp - nullable: true - _dlt_parent_id: - data_type: text - nullable: false - foreign_key: true - _dlt_list_idx: - data_type: bigint - nullable: false - _dlt_id: - data_type: text - nullable: false - unique: true - parent: _load_info - _load_info__metrics___1708739448_7387874: - columns: - started_at: - data_type: timestamp - nullable: true - finished_at: - data_type: timestamp - nullable: true - _dlt_parent_id: - data_type: text - nullable: false - foreign_key: true - _dlt_list_idx: - data_type: bigint - nullable: false - _dlt_id: - data_type: text - nullable: false - unique: true - parent: _load_info - _load_info__metrics___1708739476_5063293: - columns: - started_at: - data_type: timestamp - nullable: true - finished_at: - data_type: timestamp - nullable: true - _dlt_parent_id: - data_type: text - nullable: false - foreign_key: true - _dlt_list_idx: - data_type: bigint - nullable: false - _dlt_id: - data_type: text - nullable: false - unique: true - parent: _load_info - _load_info__metrics___1708739502_7937295: - columns: - started_at: - data_type: timestamp - nullable: true - finished_at: - data_type: timestamp - nullable: true - _dlt_parent_id: - data_type: text - nullable: false - foreign_key: true - _dlt_list_idx: - data_type: bigint - nullable: false - _dlt_id: - data_type: text - nullable: false - unique: true - parent: _load_info - _load_info__metrics___1708739534_017471: - columns: - started_at: - data_type: timestamp - nullable: true - finished_at: - data_type: timestamp - nullable: true - _dlt_parent_id: - data_type: text - nullable: false - foreign_key: true - _dlt_list_idx: - data_type: bigint - nullable: false - _dlt_id: - data_type: text - nullable: false - unique: true - parent: _load_info - _load_info__metrics___1708746455_4630005: - columns: - started_at: - data_type: timestamp - nullable: true - finished_at: - data_type: timestamp - nullable: true - _dlt_parent_id: - data_type: text - nullable: false - foreign_key: true - _dlt_list_idx: - data_type: bigint - nullable: false - _dlt_id: - data_type: text - nullable: false - unique: true - parent: _load_info - _load_info__metrics___1708746486_9415627: + _load_info__metrics___1709340314_1349206: columns: started_at: data_type: timestamp @@ -817,14 +697,8 @@ normalizers: storage: _dlt_id: _dlt_root_id previous_hashes: -- gyQeK13R6kx0TYoqbjD624/aaRwICbJlEKVa7Ra2NRA= -- cHg5JFW1o1My2VMCTTn1p1IO8ndcvBscQ1buzcEHgWQ= -- hfEnEVGhLErH3XfS3DmIeFNrZnKGyC3GedfJg/SOoaA= -- 50ezOTWMSH/694XnvMa22u4j6D+m6T0ofgoKf5pFE8A= -- +PKcCyg8cL/XXhT+RV+mpOeVqQL1I15Th+VGPMRarmw= -- c1rUo5Au5MI03BthCEWomQOW8iK5WYsj/rlAgPZp0V4= -- +m8M4k2rsONnQj91vwYhvesufa0nUDJiDTMnu/dwNmE= -- vMe/lW70AfmPzPU5xrieydZWvzSnOFNCSY/YdsxRA1k= +- M5YA/vqYt72ValPvsIryBPpB4NsobBzlKaOqS3rOUx0= +- OgaYRoSWU+EIKXUbVzmkjRfjCfpt/3TwkUEpb3nj73I= - FYwkDb/AIJAQNsYG273Odt2ZE6XWKNqDKGvx60t+qso= - zDYyBWKq9Ahg2vP82LN0x3veOwMZ2KM3Ue+6T13d8Bc= - KIWO4Ei4vYQeAxDYxo7GbLDj7jCzeyz+mGtkhVKYDCk= diff --git a/src/mimosa/wip.py b/src/mimosa/wip.py index e4de462..0258d69 100755 --- a/src/mimosa/wip.py +++ b/src/mimosa/wip.py @@ -5,7 +5,6 @@ Loads data incrementally into DuckDB. """ -from datetime import date # F401 from mimosa.pipelines import GEI @@ -15,6 +14,7 @@ run_this = True if run_this: + """ pipeline.run_landing_pipeline( gas_date=date(2024, 2, 15), # still from 2019-01-01 to 2019-09-01 to_gas_date=date(2024, 2, 16), @@ -25,6 +25,10 @@ reporting_update=reporting_update, ) """ + pipeline.run_landing_pipeline( + reporting_update=reporting_update, + ) + """ else: if destination == "motherduck": pipeline.run_landing_pipeline(reporting_update=reporting_update) From 7f8db988e6df67434f32a6d8d2c1ce9cbeadccd2 Mon Sep 17 00:00:00 2001 From: Erik Oosterop Date: Sat, 2 Mar 2024 01:42:47 +0000 Subject: [PATCH 2/2] Version update --- CHANGELOG.md | 4 ++++ pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f37b047..2345a43 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [0.4.3] 2024-03-01 +### Fixed +- Correct missing dates with flexible start and end dates (add where clause to query) + ## [0.4.2] 2024-02-23 ### Fixed - Date conversion to integer and back diff --git a/pyproject.toml b/pyproject.toml index 9e971e2..8b61765 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ disallow_incomplete_defs = true [tool.poetry] name = "ternyxmimosa" -version = "0.4.2" +version = "0.4.3" description = "A minimal modern data stack with working data pipelines in a single Docker container." authors = ["Erik Oosterop "] license = "MIT"