OpenSTEF · wfstoel · Mar 27, 2024 · Mar 27, 2024 · Mar 27, 2024 · Mar 28, 2024
diff --git a/openstef/feature_engineering/weather_features.py b/openstef/feature_engineering/weather_features.py
@@ -397,7 +397,6 @@ def calculate_dni(radiation: pd.Series, pj: PredictionJobDataClass) -> pd.Series
     solar_zenith = solpos.apparent_zenith
 
     # convert radiation (ghi) to right unit (J/m^2 to kWh/m^2)
-    # TODO: check whether unit conversion is necessary
     ghi_forecasted = radiation / 3600
     # convert ghi to dni
     dni_converted = pvlib.irradiance.dni(

diff --git a/openstef/metrics/metrics.py b/openstef/metrics/metrics.py
@@ -9,7 +9,7 @@
 #
 # SPDX-License-Identifier: MIT
 """This module contains all metrics to assess forecast quality."""
-from typing import Callable
+from typing import Callable, Optional, Tuple
 
 import numpy as np
 import pandas as pd
@@ -297,12 +297,15 @@ def skill_score_positive_peaks(
 
 
 def franks_skill_score(
-    realised: pd.Series, forecast: pd.Series, basecase: pd.Series, range_: float = 1.0
+    realised: pd.Series,
+    forecast: pd.Series,
+    basecase: pd.Series,
+    range_: Optional[float] = None,
 ) -> float:
     """Calculate Franks skill score."""
     # Combine series in one DataFrame
     combined = pd.concat([realised, forecast], axis=1)
-    if range_ == 1.0:
+    if not range_:
         range_ = (
             combined["load"].max() - combined["load"].min()
             if (combined["load"].max() - combined["load"].min()) != 0
@@ -358,7 +361,7 @@ def franks_skill_score_peaks(
 
 def xgb_quantile_eval(
     preds: np.ndarray, dmatrix: xgboost.DMatrix, quantile: float = 0.2
-) -> str:
+) -> Tuple:
     """Customized evaluational metric that equals to quantile regression loss (also known as pinball loss).
 
     Quantile regression is regression that estimates a specified quantile of target's distribution conditional on given features.

diff --git a/openstef/model/confidence_interval_applicator.py b/openstef/model/confidence_interval_applicator.py
@@ -137,7 +137,7 @@ def _add_standard_deviation_to_forecast(
             # Determine now, rounded on 15 minutes,
             # Rounding helps to prevent fractional t_aheads
             now = (
-                pd.Series(datetime.utcnow().replace(tzinfo=forecast_copy.index.tzinfo))
+                pd.Series(datetime.now(tz=forecast_copy.index.tzinfo))
                 .min()
                 .round(f"{minimal_resolution}T")
                 .to_pydatetime()

diff --git a/openstef/model/fallback.py b/openstef/model/fallback.py
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <[email protected]> # noqa E501>
 #
 # SPDX-License-Identifier: MPL-2.0
-from datetime import datetime
+from datetime import datetime, UTC
 
 import pandas as pd
 
@@ -43,7 +43,7 @@ def generate_fallback(
 
         # Find most extreme historic day (do not count today as it is incomplete)
         day_with_highest_load_date = (
-            load[load.index.tz_localize(None).date != datetime.utcnow().date()]
+            load[load.index < datetime.now(tz=UTC)]
             .idxmax()
             .load.date()
-            load[load.index < datetime.now(tz=UTC)]
-            .idxmax()
-            .load.date()
+            load[load.index < datetime.now(tz=UTC)].idxmax().load.date()
-            load[load.index < datetime.now(tz=UTC)]
-            .idxmax()
-            .load.date()
+            load[load.index < datetime.now(tz=UTC)].idxmax().load.date()
         )

diff --git a/openstef/model/metamodels/missing_values_handler.py b/openstef/model/metamodels/missing_values_handler.py
@@ -90,7 +90,7 @@ def _get_tags(self):
     def fit(self, x, y):
         """Fit model."""
         _, y = check_X_y(x, y, force_all_finite="allow-nan", y_numeric=True)
-        if type(x) != pd.DataFrame:
+        if not isinstance(x, pd.DataFrame):
             x = pd.DataFrame(np.asarray(x))
         self.feature_in_names_ = list(x.columns)
         self.n_features_in_ = x.shape[1]
@@ -133,6 +133,6 @@ def predict(self, x):
             x,
             force_all_finite="allow-nan",
         )
-        if type(x) != pd.DataFrame:
+        if not isinstance(x, pd.DataFrame):
             x = pd.DataFrame(np.array(x))
         return self.pipeline_.predict(x[self.non_null_columns_])
diff --git a/openstef/model/regressors/custom_regressor.py b/openstef/model/regressors/custom_regressor.py
@@ -26,9 +26,9 @@ class CustomOpenstfRegressor(OpenstfRegressor):
     def valid_kwargs() -> list[str]:
         ...
 
-    @classmethod
+    @staticmethod
     @abstractmethod
-    def objective(self) -> Type[RegressorObjective]:
+    def objective() -> Type[RegressorObjective]:
         ...
 
 

diff --git a/openstef/model/serializer.py b/openstef/model/serializer.py
@@ -5,7 +5,7 @@
 import logging
 import os
 import shutil
-from datetime import datetime
+from datetime import datetime, UTC
 from json import JSONDecodeError
 from typing import Optional, Union
 from urllib.parse import unquote, urlparse
@@ -283,8 +283,7 @@ def _determine_model_age_from_mlflow_run(self, run: pd.Series) -> Union[int, flo
         """Determines how many days ago a model is trained from the mlflow run."""
         try:
             model_datetime = run.end_time.to_pydatetime()
-            model_datetime = model_datetime.replace(tzinfo=None)
-            model_age_days = (datetime.utcnow() - model_datetime).days
+            model_age_days = (datetime.now(tz=UTC) - model_datetime).days
         except Exception as e:
             self.logger.warning(
                 "Could not get model age. Returning infinite age!", exception=str(e)

diff --git a/openstef/model/standard_deviation_generator.py b/openstef/model/standard_deviation_generator.py
@@ -69,7 +69,7 @@ def _calculate_standard_deviation(
         # Calculate the error for each predicted point
         error = realised - predicted
         error.index = error.index.hour  # Hour only, remove the rest
-        # For the time starts with 00, 01, 02, etc. TODO (MAKE MORE ELEGANT SOLUTION THAN A LOOP)
+        # For the time starts with 00, 01, 02, etc.
         for hour in range(24):
             hour_error = error[error.index == hour]
 

diff --git a/openstef/model_selection/model_selection.py b/openstef/model_selection/model_selection.py
@@ -230,7 +230,9 @@ def split_data_train_validation_test(
         for date_set in [max_dates, min_dates, other_dates]:
             n_days_val = max(1, int(validation_fraction * len(date_set)))
             val_dates += list(
-                np.random.choice(list(date_set), n_days_val, replace=False)
+                np.random.default_rng().choice(
+                    list(date_set), n_days_val, replace=False
+                )
             )
             train_dates += [x for x in date_set if x not in val_dates]
 

diff --git a/openstef/postprocessing/postprocessing.py b/openstef/postprocessing/postprocessing.py
@@ -239,11 +239,6 @@ def add_prediction_job_properties_to_forecast(
     if forecast_quality is not None:
         forecast["quality"] = forecast_quality
 
-    # TODO rename prediction job typ to type
-    # TODO algtype = model_file_path, perhaps we can find a more logical name
-    # TODO perhaps better to make a forecast its own class!
-    # TODO double check and sync this with make_basecase_forecast (other fields are added)
-    # !!!!! TODO fix the requirement for customer
     forecast["pid"] = pj["id"]
     forecast["customer"] = pj["name"]
     forecast["description"] = pj["description"]

diff --git a/openstef/tasks/calculate_kpi.py b/openstef/tasks/calculate_kpi.py
@@ -21,7 +21,7 @@
 import logging
 
 # Import builtins
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 import numpy as np
@@ -56,8 +56,8 @@ def main(model_type: ModelType = None, config=None, database=None) -> None:
 
     with TaskContext(taskname, config, database) as context:
         # Set start and end time
-        start_time = datetime.utcnow() - timedelta(days=1)
-        end_time = datetime.utcnow()
+        end_time = datetime.now(tz=UTC)
+        start_time = end_time - timedelta(days=1)
 
         PredictionJobLoop(context, model_type=model_type).map(
             check_kpi_task,
@@ -77,9 +77,9 @@ def check_kpi_task(
 ) -> None:
     # Apply default parameters if none are provided
     if start_time is None:
-        start_time = datetime.utcnow() - timedelta(days=1)
+        start_time = datetime.now(tz=UTC) - timedelta(days=1)
     if end_time is None:
-        end_time = datetime.utcnow()
+        end_time = datetime.now(tz=UTC)
 
     # Get realised load data
     realised = context.database.get_load_pid(pj["id"], start_time, end_time, "15T")

diff --git a/openstef/tasks/create_basecase_forecast.py b/openstef/tasks/create_basecase_forecast.py
@@ -16,7 +16,7 @@
         $ python create_basecase_forecast.py
 
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 import pandas as pd
@@ -68,8 +68,8 @@ def create_basecase_forecast_task(
         return
 
     # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
-    datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days)
+    datetime_end = datetime.now(tz=UTC) + timedelta(days=t_ahead_days)
 
     # Retrieve input data
     input_data = context.database.get_model_input(
@@ -87,7 +87,7 @@ def create_basecase_forecast_task(
     basecase_forecast = basecase_forecast.loc[
         basecase_forecast.index
         > (
-            pd.to_datetime(datetime.utcnow(), utc=True)
+            pd.to_datetime(datetime.now(tz=UTC), utc=True)
             + timedelta(minutes=pj.horizon_minutes)
         ),
         :,

diff --git a/openstef/tasks/create_components_forecast.py b/openstef/tasks/create_components_forecast.py
@@ -22,7 +22,7 @@
 
 """
 import logging
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 import pandas as pd
@@ -76,8 +76,8 @@ def create_components_forecast_task(
         return
 
     # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
-    datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days)
+    datetime_end = datetime.now(tz=UTC) + timedelta(days=t_ahead_days)
 
     logger.info(
         "Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end
@@ -120,9 +120,7 @@ def create_components_forecast_task(
     logger.debug("Written forecast to database")
 
     # Check if forecast was complete enough, otherwise raise exception
-    if forecasts.index.max() < datetime.utcnow().replace(
-        tzinfo=timezone.utc
-    ) + timedelta(hours=30):
+    if forecasts.index.max() < datetime.now(tz=UTC) + timedelta(hours=30):
         # Check which input data is missing the most.
         # Do this by counting the NANs for (load)forecast, radiation and windspeed
         max_index = forecasts.index.max()

diff --git a/openstef/tasks/create_forecast.py b/openstef/tasks/create_forecast.py
@@ -20,7 +20,7 @@
         $ python create_forecast.py
 
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 from openstef.data_classes.prediction_job import PredictionJobDataClass
@@ -73,8 +73,8 @@ def create_forecast_task(
     mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
 
     # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
-    datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60)
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days)
+    datetime_end = datetime.now(tz=UTC) + timedelta(seconds=pj.horizon_minutes * 60)
 
     # Retrieve input data
     input_data = context.database.get_model_input(

diff --git a/openstef/tasks/create_solar_forecast.py b/openstef/tasks/create_solar_forecast.py
@@ -12,7 +12,7 @@
         $ python create_solar_forecast
 
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 import numpy as np
@@ -23,7 +23,6 @@
 from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
 from openstef.tasks.utils.taskcontext import TaskContext
 
-# TODO move to config
 PV_COEFS_FILEPATH = PROJECT_ROOT / "openstef" / "data" / "pv_single_coefs.csv"
 
 
@@ -231,7 +230,7 @@ def main(config=None, database=None, **kwargs):
         num_prediction_jobs = len(prediction_jobs)
 
         # only make customer = Provincie once an hour
-        utc_now_minute = datetime.utcnow().minute
+        utc_now_minute = datetime.now(tz=UTC)().minute
         if utc_now_minute >= 15:
             prediction_jobs = [
                 pj for pj in prediction_jobs if str(pj["name"]).startswith("Provincie")

diff --git a/openstef/tasks/optimize_hyperparameters.py b/openstef/tasks/optimize_hyperparameters.py
@@ -16,7 +16,7 @@
         $ python optimize_hyperparameters.py
 
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 from openstef.data_classes.prediction_job import PredictionJobDataClass
@@ -88,8 +88,8 @@ def optimize_hyperparameters_task(
         )
         return
 
-    datetime_start = datetime.utcnow() - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS)
-    datetime_end = datetime.utcnow()
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS)
+    datetime_end = datetime.now(tz=UTC)
 
     input_data = context.database.get_model_input(
         pid=pj["id"],

diff --git a/openstef/tasks/split_forecast.py b/openstef/tasks/split_forecast.py
@@ -23,7 +23,7 @@
 
 """
 import logging
-from datetime import datetime
+from datetime import datetime, UTC
 from pathlib import Path
 
 import numpy as np
@@ -93,7 +93,6 @@ def split_forecast_task(
     components, coefdict = find_components(input_split_function)
 
     # Calculate mean absolute error (MAE)
-    # TODO: use a standard metric function for this
     error = components[["load", "Inschatting"]].diff(axis=1).iloc[:, 1]
     mae = error.abs().mean()
     coefdict.update({"MAE": mae})
@@ -183,7 +182,7 @@ def convert_coefdict_to_coefsdf(
         pj["id"],
         input_split_function.index.min().date(),
         input_split_function.index.max().date(),
-        datetime.utcnow(),
+        datetime.now(tz=UTC),
     ]
     coefsdf = pd.DataFrame(
         {"coef_name": list(coefdict.keys()), "coef_value": list(coefdict.values())}
@@ -237,7 +236,7 @@ def weighted_sum(x, *args):
 
     # Carry out fitting
     # See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html # noqa
-    coefs, cov = scipy.optimize.curve_fit(
+    coefs, _ = scipy.optimize.curve_fit(
         weighted_sum,
         xdata=df.iloc[:, 1:].values.T,
         ydata=load.values,

diff --git a/openstef/tasks/train_model.py b/openstef/tasks/train_model.py
@@ -19,7 +19,7 @@
         $ python model_train.py
 
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 
 import pandas as pd
@@ -123,7 +123,7 @@ def train_model_task(
     )
 
     if datetime_end is None:
-        datetime_end = datetime.utcnow()
+        datetime_end = datetime.now(tz=UTC)
     if datetime_start is None:
         datetime_start = datetime_end - timedelta(days=training_period_days_to_fetch)
 
@@ -184,9 +184,9 @@ def train_model_task(
                     "'save_train_forecasts option was activated.'"
                 )
             context.database.write_train_forecasts(pj, data_sets)
-            context.logger.debug(f"Saved Forecasts from trained model on datasets")
+            context.logger.debug("Saved Forecasts from trained model on datasets")
     except SkipSaveTrainingForecasts:
-        context.logger.debug(f"Skip saving forecasts")
+        context.logger.debug("Skip saving forecasts")
     except InputDataOngoingZeroFlatlinerError:
         if (
             context.config.known_zero_flatliners
@@ -213,7 +213,7 @@ def main(model_type=None, config=None, database=None):
         model_type = [ml.value for ml in ModelType]
 
     taskname = Path(__file__).name.replace(".py", "")
-    datetime_now = datetime.utcnow()
+    datetime_now = datetime.now(tz=UTC)
     with TaskContext(taskname, config, database) as context:
         PredictionJobLoop(context, model_type=model_type).map(
             train_model_task, context, datetime_end=datetime_now