From dcf505a0d2a9e8f18f580afc3ef7c9c779e357f4 Mon Sep 17 00:00:00 2001 From: "W.F. Stoel" Date: Wed, 27 Mar 2024 10:15:45 +0100 Subject: [PATCH 1/5] Improve readme Signed-off-by: W.F. Stoel --- README.md | 63 ++++++++++++++++++++++--------------------------------- 1 file changed, 25 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index eb21c957c..466f4b9d3 100644 --- a/README.md +++ b/README.md @@ -4,45 +4,32 @@ SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project - -[![Python Build](https://github.com/openstef/openstef/actions/workflows/python-build.yaml/badge.svg)](https://github.com/openstef/openstef/actions/workflows/python-build.yaml) -[![REUSE Compliance Check](https://github.com/openstef/openstef/actions/workflows/reuse-compliance.yaml/badge.svg)](https://github.com/openstef/openstef/actions/workflows/reuse-compliance.yaml) - -[![Bugs](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=bugs)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef) -[![Code Smells](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=code_smells)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef) -[![Coverage](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=coverage)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef) -[![Duplicated Lines (%)](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=duplicated_lines_density)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef) -[![Maintainability Rating](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=sqale_rating)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef) -[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=reliability_rating)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef) -[![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=security_rating)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef) -[![Technical Debt](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=sqale_index)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef) -[![Vulnerabilities](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=vulnerabilities)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef) -[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/5585/badge)](https://bestpractices.coreinfrastructure.org/projects/5585) +# OpenSTEF + [![Downloads](https://static.pepy.tech/badge/openstef)](https://pepy.tech/project/openstef) [![Downloads](https://static.pepy.tech/badge/openstef/month)](https://pepy.tech/project/openstef) +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/5585/badge)](https://bestpractices.coreinfrastructure.org/projects/5585) -# OpenSTEF OpenSTEF is a Python package designed for generating short-term forecasts in the energy sector. The repository includes all the essential components required for machine learning pipelines that facilitate the forecasting process. To utilize the package, users are required to furnish their own data storage and retrieval interface. # Table of contents - [External information sources](#external-information-sources) -- [Installation](install) -- [Usage](usage) -- [Reference Implementation](reference-implementation) -- [Database connector for OpenSTEF](Openstef-dbc-Database-connector-for-openstef) +- [Installation](#install) +- [Usage](#usage) + - [Example notebooks](#example-notebooks) + - [Reference Implementation](#reference-implementation) + - [Database connector for OpenSTEF](#database-connector-for-openstef) - [License](license) -- [Licences third-party libraries](licenses-third-party-libraries) -- [Contributing](contributing) -- [Contact](contact) +- [Contributing](#contributing) +- [Contact](#contact) # External information sources - [Documentation website](https://openstef.github.io/openstef/index.html); - [Python package](https://pypi.org/project/openstef/); -- [Project website](https://www.lfenergy.org/projects/openstef/); +- [Linux Foundation project page](https://www.lfenergy.org/projects/openstef/); - [Documentation on dashboard](https://raw.githack.com/OpenSTEF/.github/main/profile/html/openstef_dashboard_doc.html); -- [Linux Foundation project page](https://openstef.github.io/openstef/index.html) - [Video about OpenSTEF](https://www.lfenergy.org/forecasting-to-create-a-more-resilient-optimized-grid/); -- [Teams channel](https://teams.microsoft.com/l/team/19%3ac08a513650524fc988afb296cd0358cc%40thread.tacv2/conversations?groupId=bfcb763a-3a97-4938-81d7-b14512aa537d&tenantId=697f104b-d7cb-48c8-ac9f-bd87105bafdc) +- [Teams channel](https://teams.microsoft.com/l/team/19%3ac08a513650524fc988afb296cd0358cc%40thread.tacv2/conversations?groupId=bfcb763a-3a97-4938-81d7-b14512aa537d&tenantId=697f104b-d7cb-48c8-ac9f-bd87105bafdc) # Installation @@ -66,33 +53,33 @@ For more information on this issue see the [readme of pywin32](https://github.co # Usage -To run a task use: - -```shell -python -m openstef task -``` +## Example notebooks +To help you get started, a set of fundamental example notebooks has been created. You can access these offline examples [here](https://github.com/OpenSTEF/openstef-offline-example). ## Reference Implementation A complete implementation including databases, user interface, example data, etc. is available at: https://github.com/OpenSTEF/openstef-reference ![screenshot](https://user-images.githubusercontent.com/60883372/146760483-29af3ac7-62af-4f13-98c7-982a79c517d1.jpg) Screenshot of the operational dashboard showing the key functionality of OpenSTEF. -Dashboard documentation can be found [here](https://github.com/OpenSTEF/.github/blob/main/profile/README.md). +Dashboard documentation can be found [here](https://raw.githack.com/OpenSTEF/.github/main/profile/html/openstef_dashboard_doc.html). -## Openstef-dbc - Database connector for openstef -This repository provides an interface to OpenSTEF (reference) databases. The repository can be found [here](https://github.com/OpenSTEF/openstef-dbc). +To run a task use: -## Example notebooks -To help you get started, a set of fundamental example notebooks has been created. You can access these offline examples [here](https://github.com/OpenSTEF/openstef-offline-example). +```shell +python -m openstef task +``` + +## Database connector for openstef +This repository provides an interface to OpenSTEF (reference) databases. The repository can be found [here](https://github.com/OpenSTEF/openstef-dbc). -## License +# License This project is licensed under the Mozilla Public License, version 2.0 - see LICENSE for details. ## Licenses third-party libraries This project includes third-party libraries, which are licensed under their own respective Open-Source licenses. SPDX-License-Identifier headers are used to show which license is applicable. The concerning license files can be found in the LICENSES directory. -## Contributing +# Contributing Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/CODE_OF_CONDUCT.md), [CONTRIBUTING.md](https://github.com/OpenSTEF/.github/blob/main/CONTRIBUTING.md) and [PROJECT_GOVERNANCE.md](https://github.com/OpenSTEF/.github/blob/main/PROJECT_GOVERNANCE.md) for details on the process for submitting pull requests to us. -## Contact +# Contact Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project From a567159c7f00c0bd5f6897e4d988d5500e96e278 Mon Sep 17 00:00:00 2001 From: "W.F. Stoel" Date: Wed, 27 Mar 2024 16:35:11 +0100 Subject: [PATCH 2/5] Improve overall code quality Signed-off-by: W.F. Stoel --- openstef/enums.py | 2 -- .../feature_engineering/weather_features.py | 1 - openstef/metrics/metrics.py | 8 +++--- .../model/confidence_interval_applicator.py | 2 +- openstef/model/fallback.py | 4 +-- .../metamodels/missing_values_handler.py | 4 +-- openstef/model/regressors/custom_regressor.py | 2 +- openstef/model/serializer.py | 5 ++-- .../model/standard_deviation_generator.py | 2 +- openstef/model_selection/model_selection.py | 2 +- openstef/postprocessing/postprocessing.py | 5 ---- openstef/tasks/calculate_kpi.py | 11 ++++---- openstef/tasks/create_basecase_forecast.py | 8 +++--- openstef/tasks/create_components_forecast.py | 12 ++++----- openstef/tasks/create_forecast.py | 6 ++--- openstef/tasks/create_solar_forecast.py | 5 ++-- openstef/tasks/optimize_hyperparameters.py | 6 ++--- openstef/tasks/split_forecast.py | 7 +++-- openstef/tasks/train_model.py | 10 +++---- openstef/validation/validation.py | 12 ++++----- test/component/test_component.py | 6 ++--- .../feature_engineering/test_feature_adder.py | 10 +++---- .../test_feature_applicator.py | 2 +- .../test_feature_free_days.py | 8 +++--- test/unit/model/regressors/test_arima.py | 8 +++--- test/unit/model/regressors/test_linear.py | 2 +- .../model/regressors/test_xgb_quantile.py | 2 +- test/unit/model/test_basecase.py | 6 ++--- .../test_confidence_interval_applicator.py | 4 +-- test/unit/model/test_custom_models.py | 2 +- test/unit/model/test_serializer.py | 26 +++++++++---------- test/unit/pipeline/test_create_basecase.py | 4 +-- .../test_create_component_forecast.py | 6 ++--- .../pipeline/test_pipeline_train_model.py | 24 ++++++++--------- test/unit/pipeline/test_train.py | 6 ++--- .../pipeline/test_train_predict_backtest.py | 18 ++++++------- .../tasks/test_create_basecase_forecast.py | 6 ++--- ...alidation_detect_ongoing_zero_flatliner.py | 4 +-- 38 files changed, 123 insertions(+), 135 deletions(-) diff --git a/openstef/enums.py b/openstef/enums.py index 4340ca19a..df8f69b98 100644 --- a/openstef/enums.py +++ b/openstef/enums.py @@ -3,8 +3,6 @@ # SPDX-License-Identifier: MPL-2.0 from enum import Enum - -# TODO replace this with ModelType (MLModelType == Machine Learning model type) class MLModelType(Enum): XGB = "xgb" XGB_QUANTILE = "xgb_quantile" diff --git a/openstef/feature_engineering/weather_features.py b/openstef/feature_engineering/weather_features.py index 74ee3d0a4..e008c6b47 100644 --- a/openstef/feature_engineering/weather_features.py +++ b/openstef/feature_engineering/weather_features.py @@ -390,7 +390,6 @@ def calculate_dni(radiation: pd.Series, pj: PredictionJobDataClass) -> pd.Series solar_zenith = solpos.apparent_zenith # convert radiation (ghi) to right unit (J/m^2 to kWh/m^2) - # TODO: check whether unit conversion is necessary ghi_forecasted = radiation / 3600 # convert ghi to dni dni_converted = pvlib.irradiance.dni( diff --git a/openstef/metrics/metrics.py b/openstef/metrics/metrics.py index df661278f..107194fd8 100644 --- a/openstef/metrics/metrics.py +++ b/openstef/metrics/metrics.py @@ -9,7 +9,7 @@ # # SPDX-License-Identifier: MIT """This module contains all metrics to assess forecast quality.""" -from typing import Callable +from typing import Callable, Optional, Tuple import numpy as np import pandas as pd @@ -291,12 +291,12 @@ def skill_score_positive_peaks( def franks_skill_score( - realised: pd.Series, forecast: pd.Series, basecase: pd.Series, range_: float = 1.0 + realised: pd.Series, forecast: pd.Series, basecase: pd.Series, range_: Optional[float] = 1.0 ) -> float: """Calculate Franks skill score.""" # Combine series in one DataFrame combined = pd.concat([realised, forecast], axis=1) - if range_ == 1.0: + if not range_: range_ = ( combined["load"].max() - combined["load"].min() if (combined["load"].max() - combined["load"].min()) != 0 @@ -352,7 +352,7 @@ def franks_skill_score_peaks( def xgb_quantile_eval( preds: np.ndarray, dmatrix: xgboost.DMatrix, quantile: float = 0.2 -) -> str: +) -> Tuple: """Customized evaluational metric that equals to quantile regression loss (also known as pinball loss). Quantile regression is regression that estimates a specified quantile of target's distribution conditional on given features. diff --git a/openstef/model/confidence_interval_applicator.py b/openstef/model/confidence_interval_applicator.py index 284301da1..0159f73f8 100644 --- a/openstef/model/confidence_interval_applicator.py +++ b/openstef/model/confidence_interval_applicator.py @@ -112,7 +112,7 @@ def _add_standard_deviation_to_forecast( # Determine now, rounded on 15 minutes, # Rounding helps to prevent fractional t_aheads now = ( - pd.Series(datetime.utcnow().replace(tzinfo=forecast_copy.index.tzinfo)) + pd.Series(datetime.now(tz=forecast_copy.index.tzinfo)) .min() .round(f"{minimal_resolution}T") .to_pydatetime() diff --git a/openstef/model/fallback.py b/openstef/model/fallback.py index 924e091f1..64d460fb0 100644 --- a/openstef/model/fallback.py +++ b/openstef/model/fallback.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project # noqa E501> # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime +from datetime import datetime, UTC import pandas as pd @@ -43,7 +43,7 @@ def generate_fallback( # Find most extreme historic day (do not count today as it is incomplete) day_with_highest_load_date = ( - load[load.index.tz_localize(None).date != datetime.utcnow().date()] + load[load.index < datetime.now(tz=UTC)] .idxmax() .load.date() ) diff --git a/openstef/model/metamodels/missing_values_handler.py b/openstef/model/metamodels/missing_values_handler.py index 99886c5ea..fbe4cc36a 100644 --- a/openstef/model/metamodels/missing_values_handler.py +++ b/openstef/model/metamodels/missing_values_handler.py @@ -90,7 +90,7 @@ def _get_tags(self): def fit(self, x, y): """Fit model.""" _, y = check_X_y(x, y, force_all_finite="allow-nan", y_numeric=True) - if type(x) != pd.DataFrame: + if not isinstance(x, pd.DataFrame): x = pd.DataFrame(np.asarray(x)) self.feature_in_names_ = list(x.columns) self.n_features_in_ = x.shape[1] @@ -133,6 +133,6 @@ def predict(self, x): x, force_all_finite="allow-nan", ) - if type(x) != pd.DataFrame: + if not isinstance(x, pd.DataFrame): x = pd.DataFrame(np.array(x)) return self.pipeline_.predict(x[self.non_null_columns_]) diff --git a/openstef/model/regressors/custom_regressor.py b/openstef/model/regressors/custom_regressor.py index 33939cedd..196d16216 100644 --- a/openstef/model/regressors/custom_regressor.py +++ b/openstef/model/regressors/custom_regressor.py @@ -28,7 +28,7 @@ def valid_kwargs() -> list[str]: @classmethod @abstractmethod - def objective(self) -> Type[RegressorObjective]: + def objective() -> Type[RegressorObjective]: ... diff --git a/openstef/model/serializer.py b/openstef/model/serializer.py index a1d0f933d..36b953e90 100644 --- a/openstef/model/serializer.py +++ b/openstef/model/serializer.py @@ -4,7 +4,7 @@ import json import os import shutil -from datetime import datetime +from datetime import datetime, UTC from json import JSONDecodeError from typing import Optional, Union from urllib.parse import unquote, urlparse @@ -273,8 +273,7 @@ def _determine_model_age_from_mlflow_run(self, run: pd.Series) -> Union[int, flo """Determines how many days ago a model is trained from the mlflow run.""" try: model_datetime = run.end_time.to_pydatetime() - model_datetime = model_datetime.replace(tzinfo=None) - model_age_days = (datetime.utcnow() - model_datetime).days + model_age_days = (datetime.now(tz=UTC) - model_datetime).days except Exception as e: self.logger.warning( "Could not get model age. Returning infinite age!", exception=str(e) diff --git a/openstef/model/standard_deviation_generator.py b/openstef/model/standard_deviation_generator.py index f268b4b0e..f462121ca 100644 --- a/openstef/model/standard_deviation_generator.py +++ b/openstef/model/standard_deviation_generator.py @@ -69,7 +69,7 @@ def _calculate_standard_deviation( # Calculate the error for each predicted point error = realised - predicted error.index = error.index.hour # Hour only, remove the rest - # For the time starts with 00, 01, 02, etc. TODO (MAKE MORE ELEGANT SOLUTION THAN A LOOP) + # For the time starts with 00, 01, 02, etc. for hour in range(24): hour_error = error[error.index == hour] diff --git a/openstef/model_selection/model_selection.py b/openstef/model_selection/model_selection.py index 0b18e3e19..560cdff66 100644 --- a/openstef/model_selection/model_selection.py +++ b/openstef/model_selection/model_selection.py @@ -227,7 +227,7 @@ def split_data_train_validation_test( for date_set in [max_dates, min_dates, other_dates]: n_days_val = max(1, int(validation_fraction * len(date_set))) val_dates += list( - np.random.choice(list(date_set), n_days_val, replace=False) + np.random.default_rng().choice(list(date_set), n_days_val, replace=False) ) train_dates += [x for x in date_set if x not in val_dates] diff --git a/openstef/postprocessing/postprocessing.py b/openstef/postprocessing/postprocessing.py index 0d9ff040f..a1c205b29 100644 --- a/openstef/postprocessing/postprocessing.py +++ b/openstef/postprocessing/postprocessing.py @@ -232,11 +232,6 @@ def add_prediction_job_properties_to_forecast( if forecast_quality is not None: forecast["quality"] = forecast_quality - # TODO rename prediction job typ to type - # TODO algtype = model_file_path, perhaps we can find a more logical name - # TODO perhaps better to make a forecast its own class! - # TODO double check and sync this with make_basecase_forecast (other fields are added) - # !!!!! TODO fix the requirement for customer forecast["pid"] = pj["id"] forecast["customer"] = pj["name"] forecast["description"] = pj["description"] diff --git a/openstef/tasks/calculate_kpi.py b/openstef/tasks/calculate_kpi.py index 3fa1e2a8f..3ba0c3a78 100644 --- a/openstef/tasks/calculate_kpi.py +++ b/openstef/tasks/calculate_kpi.py @@ -19,7 +19,7 @@ """ # Import builtins -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path import numpy as np @@ -53,8 +53,9 @@ def main(model_type: MLModelType = None, config=None, database=None) -> None: with TaskContext(taskname, config, database) as context: # Set start and end time - start_time = datetime.utcnow() - timedelta(days=1) - end_time = datetime.utcnow() + end_time = datetime.now(tz=UTC) + start_time = end_time - timedelta(days=1) + PredictionJobLoop(context, model_type=model_type).map( check_kpi_task, @@ -72,9 +73,9 @@ def check_kpi_task( ) -> None: # Apply default parameters if none are provided if start_time is None: - start_time = datetime.utcnow() - timedelta(days=1) + start_time = datetime.now(tz=UTC) - timedelta(days=1) if end_time is None: - end_time = datetime.utcnow() + end_time = datetime.now(tz=UTC) # Get realised load data realised = context.database.get_load_pid(pj["id"], start_time, end_time, "15T") diff --git a/openstef/tasks/create_basecase_forecast.py b/openstef/tasks/create_basecase_forecast.py index a7c539f73..76cd443c6 100644 --- a/openstef/tasks/create_basecase_forecast.py +++ b/openstef/tasks/create_basecase_forecast.py @@ -16,7 +16,7 @@ $ python create_basecase_forecast.py """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path import pandas as pd @@ -63,8 +63,8 @@ def create_basecase_forecast_task( return # Define datetime range for input data - datetime_start = datetime.utcnow() - timedelta(days=T_BEHIND_DAYS) - datetime_end = datetime.utcnow() + timedelta(days=T_AHEAD_DAYS) + datetime_start = datetime.now(tz=UTC) - timedelta(days=T_BEHIND_DAYS) + datetime_end = datetime.now(tz=UTC) + timedelta(days=T_AHEAD_DAYS) # Retrieve input data input_data = context.database.get_model_input( @@ -81,7 +81,7 @@ def create_basecase_forecast_task( # Those should be updated by regular forecast process. basecase_forecast = basecase_forecast.loc[ basecase_forecast.index - > (pd.to_datetime(datetime.utcnow(), utc=True) + timedelta(hours=48)), + > (pd.to_datetime(datetime.now(tz=UTC), utc=True) + timedelta(hours=48)), :, ] diff --git a/openstef/tasks/create_components_forecast.py b/openstef/tasks/create_components_forecast.py index 419c61acd..869841366 100644 --- a/openstef/tasks/create_components_forecast.py +++ b/openstef/tasks/create_components_forecast.py @@ -21,7 +21,7 @@ $ python create_components_forecast.py """ -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta, UTC from pathlib import Path import structlog @@ -60,8 +60,8 @@ def create_components_forecast_task( return # Define datetime range for input data - datetime_start = datetime.utcnow() - timedelta(days=T_BEHIND_DAYS) - datetime_end = datetime.utcnow() + timedelta(days=T_AHEAD_DAYS) + datetime_start = datetime.now(tz=UTC) - timedelta(days=T_BEHIND_DAYS) + datetime_end = datetime.now(tz=UTC) + timedelta(days=T_AHEAD_DAYS) logger.info( "Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end @@ -73,7 +73,7 @@ def create_components_forecast_task( ) # Check if input_data is not empty if len(input_data) == 0: - logger.warning(f"No forecast found. Skipping pid", pid=pj["id"]) + logger.warning("No forecast found. Skipping pid", pid=pj["id"]) return logger.info("retrieving weather data") @@ -104,9 +104,7 @@ def create_components_forecast_task( logger.debug("Written forecast to database") # Check if forecast was complete enough, otherwise raise exception - if forecasts.index.max() < datetime.utcnow().replace( - tzinfo=timezone.utc - ) + timedelta(hours=30): + if forecasts.index.max() < datetime.now(tz=UTC) + timedelta(hours=30): # Check which input data is missing the most. # Do this by counting the NANs for (load)forecast, radiation and windspeed max_index = forecasts.index.max() diff --git a/openstef/tasks/create_forecast.py b/openstef/tasks/create_forecast.py index 65c6f2244..caf3832c1 100644 --- a/openstef/tasks/create_forecast.py +++ b/openstef/tasks/create_forecast.py @@ -20,7 +20,7 @@ $ python create_forecast.py """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path from openstef.data_classes.prediction_job import PredictionJobDataClass @@ -70,8 +70,8 @@ def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> No mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri # Define datetime range for input data - datetime_start = datetime.utcnow() - timedelta(days=T_BEHIND_DAYS) - datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60) + datetime_start = datetime.now(tz=UTC) - timedelta(days=T_BEHIND_DAYS) + datetime_end = datetime.now(tz=UTC) + timedelta(seconds=pj.horizon_minutes * 60) # Retrieve input data input_data = context.database.get_model_input( diff --git a/openstef/tasks/create_solar_forecast.py b/openstef/tasks/create_solar_forecast.py index ded5e6905..f62d9b2b3 100644 --- a/openstef/tasks/create_solar_forecast.py +++ b/openstef/tasks/create_solar_forecast.py @@ -12,7 +12,7 @@ $ python create_solar_forecast """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path import numpy as np @@ -23,7 +23,6 @@ from openstef.tasks.utils.predictionjobloop import PredictionJobLoop from openstef.tasks.utils.taskcontext import TaskContext -# TODO move to config PV_COEFS_FILEPATH = PROJECT_ROOT / "openstef" / "data" / "pv_single_coefs.csv" @@ -231,7 +230,7 @@ def main(config=None, database=None): num_prediction_jobs = len(prediction_jobs) # only make customer = Provincie once an hour - utc_now_minute = datetime.utcnow().minute + utc_now_minute = datetime.now(tz=UTC)().minute if utc_now_minute >= 15: prediction_jobs = [ pj for pj in prediction_jobs if str(pj["name"]).startswith("Provincie") diff --git a/openstef/tasks/optimize_hyperparameters.py b/openstef/tasks/optimize_hyperparameters.py index 6d3cfa6fb..35c6364d2 100644 --- a/openstef/tasks/optimize_hyperparameters.py +++ b/openstef/tasks/optimize_hyperparameters.py @@ -16,7 +16,7 @@ $ python optimize_hyperparameters.py """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path from openstef.data_classes.prediction_job import PredictionJobDataClass @@ -88,8 +88,8 @@ def optimize_hyperparameters_task( ) return - datetime_start = datetime.utcnow() - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS) - datetime_end = datetime.utcnow() + datetime_start = datetime.now(tz=UTC) - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS) + datetime_end = datetime..now(tz=UTC) input_data = context.database.get_model_input( pid=pj["id"], diff --git a/openstef/tasks/split_forecast.py b/openstef/tasks/split_forecast.py index b686f85f6..e0f461cec 100644 --- a/openstef/tasks/split_forecast.py +++ b/openstef/tasks/split_forecast.py @@ -22,7 +22,7 @@ $ python split_forecast.py """ -from datetime import datetime +from datetime import datetime, UTC from pathlib import Path import numpy as np @@ -86,7 +86,6 @@ def split_forecast_task( components, coefdict = find_components(input_split_function) # Calculate mean absolute error (MAE) - # TODO: use a standard metric function for this error = components[["load", "Inschatting"]].diff(axis=1).iloc[:, 1] mae = error.abs().mean() coefdict.update({"MAE": mae}) @@ -176,7 +175,7 @@ def convert_coefdict_to_coefsdf( pj["id"], input_split_function.index.min().date(), input_split_function.index.max().date(), - datetime.utcnow(), + datetime.now(tz=UTC), ] coefsdf = pd.DataFrame( {"coef_name": list(coefdict.keys()), "coef_value": list(coefdict.values())} @@ -230,7 +229,7 @@ def weighted_sum(x, *args): # Carry out fitting # See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html # noqa - coefs, cov = scipy.optimize.curve_fit( + coefs, _ = scipy.optimize.curve_fit( weighted_sum, xdata=df.iloc[:, 1:].values.T, ydata=load.values, diff --git a/openstef/tasks/train_model.py b/openstef/tasks/train_model.py index e13d619ee..0fd508d1d 100644 --- a/openstef/tasks/train_model.py +++ b/openstef/tasks/train_model.py @@ -19,7 +19,7 @@ $ python model_train.py """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path from openstef.data_classes.prediction_job import PredictionJobDataClass @@ -113,7 +113,7 @@ def train_model_task( # Define start and end of the training input data if datetime_end is None: - datetime_end = datetime.utcnow() + datetime_end = datetime.now(tz=UTC) if datetime_start is None: datetime_start = datetime_end - timedelta(days=TRAINING_PERIOD_DAYS) @@ -151,9 +151,9 @@ def train_model_task( "'save_train_forecasts option was activated.'" ) context.database.write_train_forecasts(pj, data_sets) - context.logger.debug(f"Saved Forecasts from trained model on datasets") + context.logger.debug("Saved Forecasts from trained model on datasets") except SkipSaveTrainingForecasts: - context.logger.debug(f"Skip saving forecasts") + context.logger.debug("Skip saving forecasts") except InputDataOngoingZeroFlatlinerError: if ( context.config.known_zero_flatliners @@ -180,7 +180,7 @@ def main(model_type=None, config=None, database=None): model_type = [ml.value for ml in MLModelType] taskname = Path(__file__).name.replace(".py", "") - datetime_now = datetime.utcnow() + datetime_now = datetime.now(tz=UTC) with TaskContext(taskname, config, database) as context: PredictionJobLoop(context, model_type=model_type).map( train_model_task, context, datetime_end=datetime_now diff --git a/openstef/validation/validation.py b/openstef/validation/validation.py index 2ace5faee..be9476a21 100644 --- a/openstef/validation/validation.py +++ b/openstef/validation/validation.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project # noqa E501> # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from typing import Union import math @@ -183,7 +183,7 @@ def calc_completeness_features( df_copy.drop("horizon", inplace=True, axis=1) if weights is None: - weights = np.array([1] * ((len(df_copy.columns)))) + weights = np.array([1] * (len(df_copy.columns))) length_weights = len(weights) length_features = len(df_copy.columns) @@ -223,7 +223,7 @@ def detect_ongoing_zero_flatliner( """ # remove all timestamps in the future - load = load[load.index.tz_localize(None) <= datetime.utcnow()] + load = load[load.index<= datetime.now(tz=UTC)] latest_measurement_time = load.index.max() latest_measurements = load[ latest_measurement_time - timedelta(minutes=duration_threshold_minutes) : @@ -272,9 +272,9 @@ def calc_completeness_dataframe( # timecols: {delay:number of points expected to be missing} # number of points expected to be missing = numberOfPointsUpToTwoDaysAhead - numberOfPointsAvailable timecols = { - x: len(df) - eval(x[2:].replace("min", "/60").replace("d", "*24.0")) / 0.25 - for x in df.columns - if x[:2] == "T-" + column: len(df) - eval(column[2:].replace("min", "/60").replace("d", "*24.0")) / 0.25 + for column in df.columns + if column.startswith("T-") } non_na_count = df.count() diff --git a/test/component/test_component.py b/test/component/test_component.py index 9b2f109a1..21682ea18 100644 --- a/test/component/test_component.py +++ b/test/component/test_component.py @@ -78,6 +78,6 @@ def test_component_training_prediction_happyflow(self): forecast["horizon"] = forecast_data.iloc[:, -1] # Verify forecast works correctly - self.assertTrue("forecast" in forecast.columns) - self.assertTrue("realised" in forecast.columns) - self.assertTrue("horizon" in forecast.columns) + self.assertIn("forecast", forecast.columns) + self.assertIn("realised", forecast.columns) + self.assertIn("horizon", forecast.columns) diff --git a/test/unit/feature_engineering/test_feature_adder.py b/test/unit/feature_engineering/test_feature_adder.py index b802ec880..9d137747c 100644 --- a/test/unit/feature_engineering/test_feature_adder.py +++ b/test/unit/feature_engineering/test_feature_adder.py @@ -75,8 +75,8 @@ def test_dispatch_features(self): feat_disp = FeatureDispatcher([DummyFeature()]) df_out = feat_disp.apply_features(self.input_data, feature_names) # Test if the features have been correctly added - self.assertTrue( - set(feature_names + list(self.input_data.columns)) == set(df_out.columns) + self.assertEqual( + set(feature_names + list(self.input_data.columns)), set(df_out.columns) ) self.assertTrue((df_out["dummy_0"] == 0).all()) self.assertTrue((df_out["dummy_-1"] == -1).all()) @@ -88,6 +88,6 @@ def test_load_modules(self): ["test.unit.feature_engineering.test_feature_adder"] ) adders_type = [type(adder) for adder in adders] - self.assertTrue(len(adders) == 2) - self.assertTrue(DummyFeature in adders_type) - self.assertTrue(DummyIntFeature in adders_type) + self.assertEqual(len(adders), 2) + self.assertIn(DummyFeature, adders_type) + self.assertIn(DummyIntFeature, adders_type) diff --git a/test/unit/feature_engineering/test_feature_applicator.py b/test/unit/feature_engineering/test_feature_applicator.py index a04406afb..2bbaffbe1 100644 --- a/test/unit/feature_engineering/test_feature_applicator.py +++ b/test/unit/feature_engineering/test_feature_applicator.py @@ -57,7 +57,7 @@ def test_operational_feature_applicator_correct_order(self): horizons=[0.25] ).add_features(self.input_data[["load"]]) self.assertEqual(data_with_features.columns.to_list()[0], "load") - self.assertTrue("horizon" not in data_with_features.columns.to_list()) + self.assertNotIn("horizon", data_with_features.columns.to_list()) def test_operational_feature_applicator_one_horizon(self): # Test for expected column order of the output diff --git a/test/unit/feature_engineering/test_feature_free_days.py b/test/unit/feature_engineering/test_feature_free_days.py index efef94966..257f9123e 100644 --- a/test/unit/feature_engineering/test_feature_free_days.py +++ b/test/unit/feature_engineering/test_feature_free_days.py @@ -46,11 +46,11 @@ def test_create_holiday_functions(self): ) # Assert for every holiday a function is available and no extra functions are generated - self.assertEqual( - all([key in holiday_functions.keys() for key in expected_keys]), True + self.assertTrue( + all([key in holiday_functions.keys() for key in expected_keys]) ) - self.assertEqual( - all([key in expected_keys for key in holiday_functions.keys()]), True + self.assertTrue( + all([key in expected_keys for key in holiday_functions.keys()]) ) diff --git a/test/unit/model/regressors/test_arima.py b/test/unit/model/regressors/test_arima.py index b1b9cde86..021b27cc6 100644 --- a/test/unit/model/regressors/test_arima.py +++ b/test/unit/model/regressors/test_arima.py @@ -26,7 +26,7 @@ def test_fit(self): self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) def test_update_historic_data(self): """Test happy flow of the update of historic data""" @@ -40,7 +40,7 @@ def test_update_historic_data(self): self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) def test_predict_wrong_historic(self): """Test the prediction with the wrong historic (missing data)""" @@ -100,5 +100,5 @@ def test_score_backtest(self): score_r2 = model.score( self.train_input.iloc[:150, 1:], self.train_input.iloc[:150, 0] ) - self.assertTrue(score_r2 <= 1.0) - self.assertTrue(score_r2 >= 0.5) + self.assertLessEqual(score_r2, 1.0) + self.assertGreaterEqual(score_r2, 0.5) diff --git a/test/unit/model/regressors/test_linear.py b/test/unit/model/regressors/test_linear.py index 302fdab68..b701c2c75 100644 --- a/test/unit/model/regressors/test_linear.py +++ b/test/unit/model/regressors/test_linear.py @@ -32,7 +32,7 @@ def test_fit(self): self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) def test_non_null_columns_retrieval(self): n_sample = self.train_input.shape[0] diff --git a/test/unit/model/regressors/test_xgb_quantile.py b/test/unit/model/regressors/test_xgb_quantile.py index fc589db51..3eed87fac 100644 --- a/test/unit/model/regressors/test_xgb_quantile.py +++ b/test/unit/model/regressors/test_xgb_quantile.py @@ -65,7 +65,7 @@ def test_quantile_fit(self): self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) def test_value_error_raised(self): # Check if Value Error is raised when 0.5 is not in the requested quantiles list diff --git a/test/unit/model/test_basecase.py b/test/unit/model/test_basecase.py index c96a8c0f0..47c766398 100644 --- a/test/unit/model/test_basecase.py +++ b/test/unit/model/test_basecase.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: MPL-2.0 import unittest -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta, timezone, UTC from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData @@ -20,7 +20,7 @@ def test_basecase_raises_value_error_too_early_start(self): # Shift example data to match current time interval as code expects data # available relative to the current time. utc_now = ( - pd.Series(datetime.utcnow().replace(tzinfo=timezone.utc)) + pd.Series(datetime.now(tz=UTC)) .min() .round("15T") .to_pydatetime() @@ -39,7 +39,7 @@ def test_basecase_raises_value_error_missing_features(self): # Shift example data to match current time interval as code expects data # available relative to the current time. utc_now = ( - pd.Series(datetime.utcnow().replace(tzinfo=timezone.utc)) + pd.Series(datetime.now(tz=UTC)) .min() .round("15T") .to_pydatetime() diff --git a/test/unit/model/test_confidence_interval_applicator.py b/test/unit/model/test_confidence_interval_applicator.py index edf03a0bf..6e8047700 100644 --- a/test/unit/model/test_confidence_interval_applicator.py +++ b/test/unit/model/test_confidence_interval_applicator.py @@ -96,7 +96,7 @@ def test_add_standard_deviation_to_forecast(self): actual_stdev_forecast = ConfidenceIntervalApplicator( MockModel(), self.stdev_forecast )._add_standard_deviation_to_forecast(forecast) - self.assertTrue("stdev" in actual_stdev_forecast.columns) + self.assertIn("stdev", actual_stdev_forecast.columns) self.assertEqual(actual_stdev_forecast["stdev"][0], 2.9) self.assertEqual(actual_stdev_forecast["stdev"][1], 2.9) self.assertEqual(actual_stdev_forecast["stdev"][2], 1.6) @@ -164,7 +164,7 @@ def test_add_standard_deviation_to_forecast_in_past(self): actual_stdev_forecast = ConfidenceIntervalApplicator( MockModelMultiHorizonStdev(), self.stdev_forecast )._add_standard_deviation_to_forecast(forecast) - self.assertTrue("stdev" in actual_stdev_forecast.columns) + self.assertIn("stdev", actual_stdev_forecast.columns) self.assertGreaterEqual( actual_stdev_forecast["stdev"].min(), 0.1 ) # => MockModel.standard_deviation.stdev.min() diff --git a/test/unit/model/test_custom_models.py b/test/unit/model/test_custom_models.py index 3f51829c3..57c3c6c21 100644 --- a/test/unit/model/test_custom_models.py +++ b/test/unit/model/test_custom_models.py @@ -26,7 +26,7 @@ def valid_kwargs() -> list[str]: return [] @classmethod - def objective(self) -> Type[DummyObjective]: + def objective() -> Type[DummyObjective]: return DummyObjective @property diff --git a/test/unit/model/test_serializer.py b/test/unit/model/test_serializer.py index 5ca11fac2..45975a05e 100644 --- a/test/unit/model/test_serializer.py +++ b/test/unit/model/test_serializer.py @@ -6,7 +6,7 @@ import tempfile import yaml from pathlib import Path -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from distutils.dir_util import copy_tree from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData @@ -71,8 +71,8 @@ def test_serializer_load_model_feature_names_keyerror( "run_id": [1, 2], "artifact_uri": ["path1", "path2"], "end_time": [ - datetime.utcnow() - timedelta(days=2), - datetime.utcnow() - timedelta(days=3), + datetime.now(tz=UTC) - timedelta(days=2), + datetime.now(tz=UTC) - timedelta(days=3), ], } ) @@ -97,8 +97,8 @@ def test_serializer_load_model_feature_names_attributeerror( # give wrong feature_name type, something else than a str of a list or dict "tags.feature_names": [1, 2], "end_time": [ - datetime.utcnow() - timedelta(days=2), - datetime.utcnow() - timedelta(days=3), + datetime.now(tz=UTC) - timedelta(days=2), + datetime.now(tz=UTC) - timedelta(days=3), ], } ) @@ -123,8 +123,8 @@ def test_serializer_load_model_feature_names_jsonerror( # give wrong feature_name type, something else than a str of a list or dict "tags.feature_names": ["feature1", "feature1"], "end_time": [ - datetime.utcnow() - timedelta(days=2), - datetime.utcnow() - timedelta(days=3), + datetime.now(tz=UTC) - timedelta(days=2), + datetime.now(tz=UTC) - timedelta(days=3), ], } ) @@ -150,8 +150,8 @@ def test_serializer_load_model_feature_modules_attributeerror( # give wrong feature_module type, something else than a str of a list or dict "tags.feature_modules": [1, 2], "end_time": [ - datetime.utcnow() - timedelta(days=2), - datetime.utcnow() - timedelta(days=3), + datetime.now(tz=UTC) - timedelta(days=2), + datetime.now(tz=UTC) - timedelta(days=3), ], } ) @@ -176,8 +176,8 @@ def test_serializer_load_model_feature_modules_jsonerror( # give wrong feature_module type, something else than a str of a list or dict "tags.feature_modules": ["feature_module1", "feature_module1"], "end_time": [ - datetime.utcnow() - timedelta(days=2), - datetime.utcnow() - timedelta(days=3), + datetime.now(tz=UTC) - timedelta(days=2), + datetime.now(tz=UTC) - timedelta(days=3), ], } ) @@ -239,7 +239,7 @@ def test_serializer_get_model_age_no_hyperparameter_optimization( data={ "run_id": [1], "artifact_uri": ["path1"], - "end_time": [datetime.utcnow() - timedelta(days=2)], + "end_time": [datetime..now(tz=UTC) - timedelta(days=2)], } ) mock_find_models.return_value = models_df @@ -256,7 +256,7 @@ def test_serializer_get_model_age_hyperparameter_optimization( data={ "run_id": [1, 2], "artifact_uri": ["path1", "path2"], - "end_time": [datetime.utcnow() - timedelta(days=8), datetime.utcnow()], + "end_time": [datetime.now(tz=UTC) - timedelta(days=8), datetime.now(tz=UTC)], } ) mock_find_models.return_value = models_df diff --git a/test/unit/pipeline/test_create_basecase.py b/test/unit/pipeline/test_create_basecase.py index fdd3b0999..7c316218d 100644 --- a/test/unit/pipeline/test_create_basecase.py +++ b/test/unit/pipeline/test_create_basecase.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project # noqa E501> # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData @@ -24,7 +24,7 @@ def setUp(self) -> None: ] = np.nan # Shift so the input matches 'now' offset_seconds = ( - pd.to_datetime(datetime.utcnow(), utc=True) + pd.to_datetime(datetime.now(tz=UTC)) - (forecast_input.index.max() - timedelta(days=7)) ).total_seconds() forecast_input = forecast_input.shift( diff --git a/test/unit/pipeline/test_create_component_forecast.py b/test/unit/pipeline/test_create_component_forecast.py index 23666311b..b0d1ca789 100644 --- a/test/unit/pipeline/test_create_component_forecast.py +++ b/test/unit/pipeline/test_create_component_forecast.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project # noqa E501> # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta, timezone, UTC from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData import joblib @@ -75,7 +75,7 @@ def test_component_forecast_pipeline_happy_flow(self): # Shift example data to match current time interval as code expects data # available relative to the current time. utc_now = ( - pd.Series(datetime.utcnow().replace(tzinfo=timezone.utc)) + pd.Series(datetime.now(tz=UTC)) .min() .round("15T") .to_pydatetime() @@ -117,7 +117,7 @@ def test_component_forecast_pipeline_not_all_weather_data_available(self): # Shift example data to match current time interval as code expects data # available relative to the current time. utc_now = ( - pd.Series(datetime.utcnow().replace(tzinfo=timezone.utc)) + pd.Series(datetime.now(tz=UTC)) .min() .round("15T") .to_pydatetime() diff --git a/test/unit/pipeline/test_pipeline_train_model.py b/test/unit/pipeline/test_pipeline_train_model.py index 78427a95a..42b821c7f 100644 --- a/test/unit/pipeline/test_pipeline_train_model.py +++ b/test/unit/pipeline/test_pipeline_train_model.py @@ -5,7 +5,7 @@ import glob import os import unittest -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData from unittest.mock import MagicMock, patch @@ -49,7 +49,7 @@ def valid_kwargs(): return [] @property - def objective(self): + def objective(): return DummyObjective @property @@ -89,8 +89,8 @@ def setUp(self) -> None: self.pj, self.model_specs = TestData.get_prediction_job_and_modelspecs(pid=307) # Set n_estimators to a small number to speed up training self.model_specs.hyper_params["n_estimators"] = 3 - datetime_start = datetime.utcnow() - timedelta(days=90) - datetime_end = datetime.utcnow() + datetime_start = datetime.now(tz=UTC) - timedelta(days=90) + datetime_end = datetime.now(tz=UTC) self.data_table = TestData.load("input_data_train.csv").head(8641) self.data = pd.DataFrame( index=pd.date_range(datetime_start, datetime_end, freq="15T") @@ -168,10 +168,10 @@ def test_train_model_pipeline_core_happy_flow(self): self.assertIsNotNone(model.feature_names) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) # check if report is a Report - self.assertTrue(isinstance(report, Report)) + self.assertIsInstance(report, Report) # Validate and clean data validated_data = validation.drop_target_na( @@ -238,10 +238,10 @@ def test_train_model_pipeline_core_happy_flow_with_legacy_data_prep(self): self.assertIsNotNone(model.feature_names) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) # check if report is a Report - self.assertTrue(isinstance(report, Report)) + self.assertIsInstance(report, Report) # Validate and clean data validated_data = validation.drop_target_na( @@ -281,7 +281,7 @@ def test_train_model_pipeline_with_featureAdders(self): pj.default_modelspecs = model_specs train_input = self.train_input.iloc[::50, :] - model, report, modelspecs, _ = train_model_pipeline_core( + model, report, _, _ = train_model_pipeline_core( pj=pj, model_specs=model_specs, input_data=train_input ) @@ -290,13 +290,13 @@ def test_train_model_pipeline_with_featureAdders(self): # check if the model has a feature_names property self.assertIsNotNone(model.feature_names) - self.assertTrue(dummy_feature in model.feature_names) + self.assertIn(dummy_feature, model.feature_names) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) # check if report is a Report - self.assertTrue(isinstance(report, Report)) + self.assertIsInstance(report, Report) @patch("openstef.pipeline.train_model.MLflowSerializer") def test_train_model_pipeline_with_default_modelspecs(self, mock_serializer): diff --git a/test/unit/pipeline/test_train.py b/test/unit/pipeline/test_train.py index 09f0a01bd..a7bf7485f 100644 --- a/test/unit/pipeline/test_train.py +++ b/test/unit/pipeline/test_train.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: MPL-2.0 import unittest -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData @@ -43,8 +43,8 @@ class TestTrain(BaseTestCase): def setUp(self) -> None: super().setUp() self.pj = TestData.get_prediction_job(pid=307) - datetime_start = datetime.utcnow() - timedelta(days=90) - datetime_end = datetime.utcnow() + datetime_start = datetime.now(tz=UTC) - timedelta(days=90) + datetime_end = datetime.now(tz=UTC) self.data_table = TestData.load("input_data_train.csv").head(8641) self.data = pd.DataFrame( index=pd.date_range(datetime_start, datetime_end, freq="15T") diff --git a/test/unit/pipeline/test_train_predict_backtest.py b/test/unit/pipeline/test_train_predict_backtest.py index aea3de19c..1eeea4b2c 100644 --- a/test/unit/pipeline/test_train_predict_backtest.py +++ b/test/unit/pipeline/test_train_predict_backtest.py @@ -50,9 +50,9 @@ def test_train_model_pipeline_core_happy_flow(self): training_horizons=[0.25, 24.0], ) - self.assertTrue("forecast" in forecast.columns) - self.assertTrue("realised" in forecast.columns) - self.assertTrue("horizon" in forecast.columns) + self.assertIn("forecast", forecast.columns) + self.assertIn("realised", forecast.columns) + self.assertIn("horizon", forecast.columns) self.assertEqual(set(forecast.horizon.unique()), {0.25, 24.0}) def test_train_model_pipeline_core_happy_flow_nfold(self): @@ -72,9 +72,9 @@ def test_train_model_pipeline_core_happy_flow_nfold(self): n_folds=4, ) - self.assertTrue("forecast" in forecast.columns) - self.assertTrue("realised" in forecast.columns) - self.assertTrue("horizon" in forecast.columns) + self.assertIn("forecast", forecast.columns) + self.assertIn("realised", forecast.columns) + self.assertIn("horizon", forecast.columns) self.assertEqual(sorted(list(forecast.horizon.unique())), [0.25, 24.0]) # check if forecast is indeed of the entire range of the input data @@ -148,9 +148,9 @@ def test_train_model_pipeline_core_custom_split(self): n_folds=4, ) - self.assertTrue("forecast" in forecast.columns) - self.assertTrue("realised" in forecast.columns) - self.assertTrue("horizon" in forecast.columns) + self.assertIn("forecast", forecast.columns) + self.assertIn("realised", forecast.columns) + self.assertIn("horizon", forecast.columns) self.assertEqual(sorted(list(forecast.horizon.unique())), [0.25, 24.0]) # check if forecast is indeed of the entire range of the input data diff --git a/test/unit/tasks/test_create_basecase_forecast.py b/test/unit/tasks/test_create_basecase_forecast.py index 2989e08a0..c46e72eae 100644 --- a/test/unit/tasks/test_create_basecase_forecast.py +++ b/test/unit/tasks/test_create_basecase_forecast.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project # noqa E501> # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from test.unit.utils.data import TestData from unittest import TestCase from unittest.mock import MagicMock, patch @@ -15,11 +15,11 @@ # Make sure this has a datetime of at least NOW+48hours, # since this is filtered in the task FORECAST_MOCK = pd.DataFrame( - index=pd.to_datetime([datetime.utcnow()], utc=True) + timedelta(days=3), + index=pd.to_datetime([datetime..now(tz=UTC)]) + timedelta(days=3), data=dict(forecast=[10.0]), ) FORECAST_NEAR_FUTURE_MOCK = pd.DataFrame( - index=pd.to_datetime([datetime.utcnow()], utc=True) + timedelta(days=1), + index=pd.to_datetime([datetime.now(tz=UTC)]) + timedelta(days=1), data=dict(forecast=[10.0]), ) diff --git a/test/unit/validation/test_validation_detect_ongoing_zero_flatliner.py b/test/unit/validation/test_validation_detect_ongoing_zero_flatliner.py index 42f9a5dae..56d0252b4 100644 --- a/test/unit/validation/test_validation_detect_ongoing_zero_flatliner.py +++ b/test/unit/validation/test_validation_detect_ongoing_zero_flatliner.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from freezegun import freeze_time from test.unit.utils.base import BaseTestCase import numpy as np @@ -15,7 +15,7 @@ class TestDetectOngoingZeroFlatliners(BaseTestCase): def setUp(self) -> None: super().setUp() - now = datetime.utcnow() + now = datetime.now(tz=UTC) self.three_hour_range = pd.date_range( start=now - timedelta(minutes=180), end=now, freq="0.25H" ) From d3233d3a90b211602d3fde57e5d9eb58854e1f50 Mon Sep 17 00:00:00 2001 From: Willem Frederik Stoel Date: Thu, 28 Mar 2024 13:49:50 +0100 Subject: [PATCH 3/5] Comments Signed-off-by: Willem Frederik Stoel --- openstef/metrics/metrics.py | 5 ++++- openstef/model/regressors/custom_regressor.py | 2 +- test/unit/model/test_custom_models.py | 2 +- test/unit/pipeline/test_pipeline_train_model.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/openstef/metrics/metrics.py b/openstef/metrics/metrics.py index 107194fd8..b11fca685 100644 --- a/openstef/metrics/metrics.py +++ b/openstef/metrics/metrics.py @@ -291,7 +291,10 @@ def skill_score_positive_peaks( def franks_skill_score( - realised: pd.Series, forecast: pd.Series, basecase: pd.Series, range_: Optional[float] = 1.0 + realised: pd.Series, + forecast: pd.Series, + basecase: pd.Series, + range_: Optional[float] = None ) -> float: """Calculate Franks skill score.""" # Combine series in one DataFrame diff --git a/openstef/model/regressors/custom_regressor.py b/openstef/model/regressors/custom_regressor.py index 196d16216..11decd621 100644 --- a/openstef/model/regressors/custom_regressor.py +++ b/openstef/model/regressors/custom_regressor.py @@ -26,7 +26,7 @@ class CustomOpenstfRegressor(OpenstfRegressor): def valid_kwargs() -> list[str]: ... - @classmethod + @staticmethod @abstractmethod def objective() -> Type[RegressorObjective]: ... diff --git a/test/unit/model/test_custom_models.py b/test/unit/model/test_custom_models.py index 57c3c6c21..128843add 100644 --- a/test/unit/model/test_custom_models.py +++ b/test/unit/model/test_custom_models.py @@ -25,7 +25,7 @@ class DummyRegressor(CustomOpenstfRegressor): def valid_kwargs() -> list[str]: return [] - @classmethod + @staticmethod def objective() -> Type[DummyObjective]: return DummyObjective diff --git a/test/unit/pipeline/test_pipeline_train_model.py b/test/unit/pipeline/test_pipeline_train_model.py index 42b821c7f..221d27af9 100644 --- a/test/unit/pipeline/test_pipeline_train_model.py +++ b/test/unit/pipeline/test_pipeline_train_model.py @@ -48,7 +48,7 @@ class DummyRegressor(CustomOpenstfRegressor): def valid_kwargs(): return [] - @property + @staticmethod def objective(): return DummyObjective From a8aa6d23bcbbcf9a6248c2fa6a19f7b3bbabad82 Mon Sep 17 00:00:00 2001 From: lschilders Date: Mon, 13 Jan 2025 13:42:39 +0100 Subject: [PATCH 4/5] fix errors --- test/unit/model/test_serializer.py | 7 +++++-- test/unit/tasks/test_create_basecase_forecast.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/test/unit/model/test_serializer.py b/test/unit/model/test_serializer.py index 138b52124..17f5231f3 100644 --- a/test/unit/model/test_serializer.py +++ b/test/unit/model/test_serializer.py @@ -239,7 +239,7 @@ def test_serializer_get_model_age_no_hyperparameter_optimization( data={ "run_id": [1], "artifact_uri": ["path1"], - "end_time": [datetime..now(tz=UTC) - timedelta(days=2)], + "end_time": [datetime.now(tz=UTC) - timedelta(days=2)], } ) mock_find_models.return_value = models_df @@ -256,7 +256,10 @@ def test_serializer_get_model_age_hyperparameter_optimization( data={ "run_id": [1, 2], "artifact_uri": ["path1", "path2"], - "end_time": [datetime.now(tz=UTC) - timedelta(days=8), datetime.now(tz=UTC)], + "end_time": [ + datetime.now(tz=UTC) - timedelta(days=8), + datetime.now(tz=UTC), + ], } ) mock_find_models.return_value = models_df diff --git a/test/unit/tasks/test_create_basecase_forecast.py b/test/unit/tasks/test_create_basecase_forecast.py index c46e72eae..c4a7f11a1 100644 --- a/test/unit/tasks/test_create_basecase_forecast.py +++ b/test/unit/tasks/test_create_basecase_forecast.py @@ -15,7 +15,7 @@ # Make sure this has a datetime of at least NOW+48hours, # since this is filtered in the task FORECAST_MOCK = pd.DataFrame( - index=pd.to_datetime([datetime..now(tz=UTC)]) + timedelta(days=3), + index=pd.to_datetime([datetime.now(tz=UTC)]) + timedelta(days=3), data=dict(forecast=[10.0]), ) FORECAST_NEAR_FUTURE_MOCK = pd.DataFrame( From 75c5788217d4948c7f20f10fcda1ab9cc946175c Mon Sep 17 00:00:00 2001 From: lschilders Date: Mon, 13 Jan 2025 13:56:11 +0100 Subject: [PATCH 5/5] fix datetime comparison --- openstef/validation/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openstef/validation/validation.py b/openstef/validation/validation.py index eba06fdfb..79cebd4af 100644 --- a/openstef/validation/validation.py +++ b/openstef/validation/validation.py @@ -243,7 +243,7 @@ def detect_ongoing_zero_flatliner( """ # remove all timestamps in the future - load = load[load.index.tz_localize(None) <= datetime.now(tz=UTC)] + load = load[load.index <= datetime.now(tz=UTC)] latest_measurement_time = load.dropna().index.max() latest_measurements = load[ latest_measurement_time - timedelta(minutes=duration_threshold_minutes) :