OpenSTEF · wfstoel · Mar 27, 2024 · Mar 27, 2024 · Mar 27, 2024 · Mar 28, 2024
diff --git a/README.md b/README.md
@@ -4,50 +4,34 @@ SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.te
 SPDX-License-Identifier: MPL-2.0
 -->
 
-<!-- Github Actions badges -->
-[![Python Build](https://github.com/openstef/openstef/actions/workflows/python-build.yaml/badge.svg)](https://github.com/openstef/openstef/actions/workflows/python-build.yaml)
-[![REUSE Compliance Check](https://github.com/openstef/openstef/actions/workflows/reuse-compliance.yaml/badge.svg)](https://github.com/openstef/openstef/actions/workflows/reuse-compliance.yaml)
-<!-- SonarCloud badges -->
-[![Bugs](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=bugs)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef)
-[![Code Smells](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=code_smells)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef)
-[![Coverage](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=coverage)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef)
-[![Duplicated Lines (%)](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=duplicated_lines_density)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef)
-[![Maintainability Rating](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=sqale_rating)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef)
-[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=reliability_rating)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef)
-[![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=security_rating)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef)
-[![Technical Debt](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=sqale_index)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef)
-[![Vulnerabilities](https://sonarcloud.io/api/project_badges/measure?project=OpenSTEF_openstef&metric=vulnerabilities)](https://sonarcloud.io/dashboard?id=OpenSTEF_openstef)
-[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/5585/badge)](https://bestpractices.coreinfrastructure.org/projects/5585)
+# OpenSTEF
+<!-- Badges -->
 [![Downloads](https://static.pepy.tech/badge/openstef)](https://pepy.tech/project/openstef)
 [![Downloads](https://static.pepy.tech/badge/openstef/month)](https://pepy.tech/project/openstef)
+[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/5585/badge)](https://bestpractices.coreinfrastructure.org/projects/5585)
 
-# OpenSTEF
 OpenSTEF is a Python package designed for generating short-term forecasts in the energy sector. The repository includes all the essential components required for machine learning pipelines that facilitate the forecasting process. To utilize the package, users are required to furnish their own data storage and retrieval interface.
 
 # Table of contents
 - [OpenSTEF](#openstef)
 - [Table of contents](#table-of-contents)
 - [External information sources](#external-information-sources)
-- [Installation](#installation)
-  - [Install the openstef package](#install-the-openstef-package)
-    - [Remark regarding installation within a **conda environment on Windows**:](#remark-regarding-installation-within-a-conda-environment-on-windows)
+- [Installation](#install)
 - [Usage](#usage)
-  - [Reference Implementation](#reference-implementation)
-  - [Openstef-dbc - Database connector for openstef](#openstef-dbc---database-connector-for-openstef)
-  - [Example notebooks](#example-notebooks)
-  - [License](#license)
-  - [Licenses third-party libraries](#licenses-third-party-libraries)
-  - [Contributing](#contributing)
-  - [Contact](#contact)
+    - [Example notebooks](#example-notebooks)
+    - [Reference Implementation](#reference-implementation)
+    - [Database connector for OpenSTEF](#database-connector-for-openstef)
+- [License](license)
+- [Contributing](#contributing)
+- [Contact](#contact)
 
 # External information sources
 - [Documentation website](https://openstef.github.io/openstef/index.html);
 - [Python package](https://pypi.org/project/openstef/);
-- [Project website](https://www.lfenergy.org/projects/openstef/);
+- [Linux Foundation project page](https://www.lfenergy.org/projects/openstef/);
 - [Documentation on dashboard](https://raw.githack.com/OpenSTEF/.github/main/profile/html/openstef_dashboard_doc.html);
-- [Linux Foundation project page](https://openstef.github.io/openstef/index.html)
 - [Video about OpenSTEF](https://www.lfenergy.org/forecasting-to-create-a-more-resilient-optimized-grid/);
-- [Teams channel](https://teams.microsoft.com/l/team/19%3ac08a513650524fc988afb296cd0358cc%40thread.tacv2/conversations?groupId=bfcb763a-3a97-4938-81d7-b14512aa537d&tenantId=697f104b-d7cb-48c8-ac9f-bd87105bafdc) 
+- [Teams channel](https://teams.microsoft.com/l/team/19%3ac08a513650524fc988afb296cd0358cc%40thread.tacv2/conversations?groupId=bfcb763a-3a97-4938-81d7-b14512aa537d&tenantId=697f104b-d7cb-48c8-ac9f-bd87105bafdc)
 
 # Installation
 
@@ -67,33 +51,33 @@ For more information on this issue see the [readme of pywin32](https://github.co
 
 # Usage
 
-To run a task use:
-
-```shell
-python -m openstef task <task_name>
-```
+## Example notebooks
+To help you get started, a set of fundamental example notebooks has been created. You can access these offline examples [here](https://github.com/OpenSTEF/openstef-offline-example).
 
 ## Reference Implementation
 A complete implementation including databases, user interface, example data, etc. is available at: https://github.com/OpenSTEF/openstef-reference
 
 ![screenshot](https://user-images.githubusercontent.com/60883372/146760483-29af3ac7-62af-4f13-98c7-982a79c517d1.jpg)
 Screenshot of the operational dashboard showing the key functionality of OpenSTEF.
-Dashboard documentation can be found [here](https://github.com/OpenSTEF/.github/blob/main/profile/README.md).
+Dashboard documentation can be found [here](https://raw.githack.com/OpenSTEF/.github/main/profile/html/openstef_dashboard_doc.html).
 
-## Openstef-dbc - Database connector for openstef
-This repository provides an interface to OpenSTEF (reference) databases. The repository can be found [here](https://github.com/OpenSTEF/openstef-dbc).
+To run a task use:
 
-## Example notebooks 
-To help you get started, a set of fundamental example notebooks has been created. You can access these offline examples [here](https://github.com/OpenSTEF/openstef-offline-example).
+```shell
+python -m openstef task <task_name>
+```
+
+## Database connector for openstef
+This repository provides an interface to OpenSTEF (reference) databases. The repository can be found [here](https://github.com/OpenSTEF/openstef-dbc).
 
-## License
+# License
 This project is licensed under the Mozilla Public License, version 2.0 - see LICENSE for details.
 
 ## Licenses third-party libraries
 This project includes third-party libraries, which are licensed under their own respective Open-Source licenses. SPDX-License-Identifier headers are used to show which license is applicable. The concerning license files can be found in the LICENSES directory.
 
-## Contributing
+# Contributing
 Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/CODE_OF_CONDUCT.md), [CONTRIBUTING.md](https://github.com/OpenSTEF/.github/blob/main/CONTRIBUTING.md) and [PROJECT_GOVERNANCE.md](https://github.com/OpenSTEF/.github/blob/main/PROJECT_GOVERNANCE.md) for details on the process for submitting pull requests to us.
 
-## Contact
+# Contact
 Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project
diff --git a/openstef/enums.py b/openstef/enums.py
@@ -3,8 +3,6 @@
 # SPDX-License-Identifier: MPL-2.0
 from enum import Enum
 
-
-# TODO replace this with ModelType (MLModelType == Machine Learning model type)
 class MLModelType(Enum):
     XGB = "xgb"
     XGB_QUANTILE = "xgb_quantile"

diff --git a/openstef/feature_engineering/weather_features.py b/openstef/feature_engineering/weather_features.py
@@ -390,7 +390,6 @@ def calculate_dni(radiation: pd.Series, pj: PredictionJobDataClass) -> pd.Series
     solar_zenith = solpos.apparent_zenith
 
     # convert radiation (ghi) to right unit (J/m^2 to kWh/m^2)
-    # TODO: check whether unit conversion is necessary
     ghi_forecasted = radiation / 3600
     # convert ghi to dni
     dni_converted = pvlib.irradiance.dni(

diff --git a/openstef/metrics/metrics.py b/openstef/metrics/metrics.py
@@ -9,7 +9,7 @@
 #
 # SPDX-License-Identifier: MIT
 """This module contains all metrics to assess forecast quality."""
-from typing import Callable
+from typing import Callable, Optional, Tuple
 
 import numpy as np
 import pandas as pd
@@ -291,12 +291,15 @@ def skill_score_positive_peaks(
 
 
 def franks_skill_score(
-    realised: pd.Series, forecast: pd.Series, basecase: pd.Series, range_: float = 1.0
+    realised: pd.Series,
+    forecast: pd.Series,
+    basecase: pd.Series,
+    range_: Optional[float] = None
 ) -> float:
     """Calculate Franks skill score."""
     # Combine series in one DataFrame
     combined = pd.concat([realised, forecast], axis=1)
-    if range_ == 1.0:
+    if not range_:
         range_ = (
             combined["load"].max() - combined["load"].min()
             if (combined["load"].max() - combined["load"].min()) != 0
@@ -352,7 +355,7 @@ def franks_skill_score_peaks(
 
 def xgb_quantile_eval(
     preds: np.ndarray, dmatrix: xgboost.DMatrix, quantile: float = 0.2
-) -> str:
+) -> Tuple:
     """Customized evaluational metric that equals to quantile regression loss (also known as pinball loss).
 
     Quantile regression is regression that estimates a specified quantile of target's distribution conditional on given features.

diff --git a/openstef/model/confidence_interval_applicator.py b/openstef/model/confidence_interval_applicator.py
@@ -112,7 +112,7 @@ def _add_standard_deviation_to_forecast(
             # Determine now, rounded on 15 minutes,
             # Rounding helps to prevent fractional t_aheads
             now = (
-                pd.Series(datetime.utcnow().replace(tzinfo=forecast_copy.index.tzinfo))
+                pd.Series(datetime.now(tz=forecast_copy.index.tzinfo))
                 .min()
                 .round(f"{minimal_resolution}T")
                 .to_pydatetime()

diff --git a/openstef/model/fallback.py b/openstef/model/fallback.py
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <[email protected]> # noqa E501>
 #
 # SPDX-License-Identifier: MPL-2.0
-from datetime import datetime
+from datetime import datetime, UTC
 
 import pandas as pd
 
@@ -43,7 +43,7 @@ def generate_fallback(
 
         # Find most extreme historic day (do not count today as it is incomplete)
         day_with_highest_load_date = (
-            load[load.index.tz_localize(None).date != datetime.utcnow().date()]
+            load[load.index < datetime.now(tz=UTC)]
             .idxmax()
             .load.date()
-            load[load.index < datetime.now(tz=UTC)]
-            .idxmax()
-            .load.date()
+            load[load.index < datetime.now(tz=UTC)].idxmax().load.date()
-            load[load.index < datetime.now(tz=UTC)]
-            .idxmax()
-            .load.date()
+            load[load.index < datetime.now(tz=UTC)].idxmax().load.date()
         )

diff --git a/openstef/model/metamodels/missing_values_handler.py b/openstef/model/metamodels/missing_values_handler.py
@@ -90,7 +90,7 @@ def _get_tags(self):
     def fit(self, x, y):
         """Fit model."""
         _, y = check_X_y(x, y, force_all_finite="allow-nan", y_numeric=True)
-        if type(x) != pd.DataFrame:
+        if not isinstance(x, pd.DataFrame):
             x = pd.DataFrame(np.asarray(x))
         self.feature_in_names_ = list(x.columns)
         self.n_features_in_ = x.shape[1]
@@ -133,6 +133,6 @@ def predict(self, x):
             x,
             force_all_finite="allow-nan",
         )
-        if type(x) != pd.DataFrame:
+        if not isinstance(x, pd.DataFrame):
             x = pd.DataFrame(np.array(x))
         return self.pipeline_.predict(x[self.non_null_columns_])
diff --git a/openstef/model/regressors/custom_regressor.py b/openstef/model/regressors/custom_regressor.py
@@ -26,9 +26,9 @@ class CustomOpenstfRegressor(OpenstfRegressor):
     def valid_kwargs() -> list[str]:
         ...
 
-    @classmethod
+    @staticmethod
     @abstractmethod
-    def objective(self) -> Type[RegressorObjective]:
+    def objective() -> Type[RegressorObjective]:
         ...
 
 

diff --git a/openstef/model/serializer.py b/openstef/model/serializer.py
@@ -4,7 +4,7 @@
 import json
 import os
 import shutil
-from datetime import datetime
+from datetime import datetime, UTC
 from json import JSONDecodeError
 from typing import Optional, Union
 from urllib.parse import unquote, urlparse
@@ -273,8 +273,7 @@ def _determine_model_age_from_mlflow_run(self, run: pd.Series) -> Union[int, flo
         """Determines how many days ago a model is trained from the mlflow run."""
         try:
             model_datetime = run.end_time.to_pydatetime()
-            model_datetime = model_datetime.replace(tzinfo=None)
-            model_age_days = (datetime.utcnow() - model_datetime).days
+            model_age_days = (datetime.now(tz=UTC) - model_datetime).days
         except Exception as e:
             self.logger.warning(
                 "Could not get model age. Returning infinite age!", exception=str(e)

diff --git a/openstef/model/standard_deviation_generator.py b/openstef/model/standard_deviation_generator.py
@@ -69,7 +69,7 @@ def _calculate_standard_deviation(
         # Calculate the error for each predicted point
         error = realised - predicted
         error.index = error.index.hour  # Hour only, remove the rest
-        # For the time starts with 00, 01, 02, etc. TODO (MAKE MORE ELEGANT SOLUTION THAN A LOOP)
+        # For the time starts with 00, 01, 02, etc.
         for hour in range(24):
             hour_error = error[error.index == hour]
 

diff --git a/openstef/model_selection/model_selection.py b/openstef/model_selection/model_selection.py
@@ -227,7 +227,7 @@ def split_data_train_validation_test(
         for date_set in [max_dates, min_dates, other_dates]:
             n_days_val = max(1, int(validation_fraction * len(date_set)))
             val_dates += list(
-                np.random.choice(list(date_set), n_days_val, replace=False)
+                np.random.default_rng().choice(list(date_set), n_days_val, replace=False)
             )
             train_dates += [x for x in date_set if x not in val_dates]
 

diff --git a/openstef/postprocessing/postprocessing.py b/openstef/postprocessing/postprocessing.py
@@ -232,11 +232,6 @@ def add_prediction_job_properties_to_forecast(
     if forecast_quality is not None:
         forecast["quality"] = forecast_quality
 
-    # TODO rename prediction job typ to type
-    # TODO algtype = model_file_path, perhaps we can find a more logical name
-    # TODO perhaps better to make a forecast its own class!
-    # TODO double check and sync this with make_basecase_forecast (other fields are added)
-    # !!!!! TODO fix the requirement for customer
     forecast["pid"] = pj["id"]
     forecast["customer"] = pj["name"]
     forecast["description"] = pj["description"]

diff --git a/openstef/tasks/calculate_kpi.py b/openstef/tasks/calculate_kpi.py
@@ -19,7 +19,7 @@
 
 """
 # Import builtins
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 import numpy as np
@@ -53,8 +53,9 @@ def main(model_type: MLModelType = None, config=None, database=None) -> None:
 
     with TaskContext(taskname, config, database) as context:
         # Set start and end time
-        start_time = datetime.utcnow() - timedelta(days=1)
-        end_time = datetime.utcnow()
+        end_time = datetime.now(tz=UTC)
+        start_time = end_time - timedelta(days=1)
+
 
         PredictionJobLoop(context, model_type=model_type).map(
             check_kpi_task,
@@ -72,9 +73,9 @@ def check_kpi_task(
 ) -> None:
     # Apply default parameters if none are provided
     if start_time is None:
-        start_time = datetime.utcnow() - timedelta(days=1)
+        start_time = datetime.now(tz=UTC) - timedelta(days=1)
     if end_time is None:
-        end_time = datetime.utcnow()
+        end_time = datetime.now(tz=UTC)
 
     # Get realised load data
     realised = context.database.get_load_pid(pj["id"], start_time, end_time, "15T")

diff --git a/openstef/tasks/create_basecase_forecast.py b/openstef/tasks/create_basecase_forecast.py
@@ -16,7 +16,7 @@
         $ python create_basecase_forecast.py
 
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 import pandas as pd
@@ -63,8 +63,8 @@ def create_basecase_forecast_task(
         return
 
     # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=T_BEHIND_DAYS)
-    datetime_end = datetime.utcnow() + timedelta(days=T_AHEAD_DAYS)
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=T_BEHIND_DAYS)
+    datetime_end = datetime.now(tz=UTC) + timedelta(days=T_AHEAD_DAYS)
 
     # Retrieve input data
     input_data = context.database.get_model_input(
@@ -81,7 +81,7 @@ def create_basecase_forecast_task(
     # Those should be updated by regular forecast process.
     basecase_forecast = basecase_forecast.loc[
         basecase_forecast.index
-        > (pd.to_datetime(datetime.utcnow(), utc=True) + timedelta(hours=48)),
+        > (pd.to_datetime(datetime.now(tz=UTC), utc=True) + timedelta(hours=48)),
         :,
     ]
 

diff --git a/openstef/tasks/create_components_forecast.py b/openstef/tasks/create_components_forecast.py
@@ -21,7 +21,7 @@
         $ python create_components_forecast.py
 
 """
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 import structlog
@@ -60,8 +60,8 @@ def create_components_forecast_task(
         return
 
     # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=T_BEHIND_DAYS)
-    datetime_end = datetime.utcnow() + timedelta(days=T_AHEAD_DAYS)
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=T_BEHIND_DAYS)
+    datetime_end = datetime.now(tz=UTC) + timedelta(days=T_AHEAD_DAYS)
 
     logger.info(
         "Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end
@@ -73,7 +73,7 @@ def create_components_forecast_task(
     )
     # Check if input_data is not empty
     if len(input_data) == 0:
-        logger.warning(f"No forecast found. Skipping pid", pid=pj["id"])
+        logger.warning("No forecast found. Skipping pid", pid=pj["id"])
         return
 
     logger.info("retrieving weather data")
@@ -104,9 +104,7 @@ def create_components_forecast_task(
     logger.debug("Written forecast to database")
 
     # Check if forecast was complete enough, otherwise raise exception
-    if forecasts.index.max() < datetime.utcnow().replace(
-        tzinfo=timezone.utc
-    ) + timedelta(hours=30):
+    if forecasts.index.max() < datetime.now(tz=UTC) + timedelta(hours=30):
         # Check which input data is missing the most.
         # Do this by counting the NANs for (load)forecast, radiation and windspeed
         max_index = forecasts.index.max()

diff --git a/openstef/tasks/create_forecast.py b/openstef/tasks/create_forecast.py
@@ -20,7 +20,7 @@
         $ python create_forecast.py
 
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 from openstef.data_classes.prediction_job import PredictionJobDataClass
@@ -70,8 +70,8 @@ def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> No
     mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
 
     # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=T_BEHIND_DAYS)
-    datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60)
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=T_BEHIND_DAYS)
+    datetime_end = datetime.now(tz=UTC) + timedelta(seconds=pj.horizon_minutes * 60)
 
     # Retrieve input data
     input_data = context.database.get_model_input(