diff --git a/openstef/feature_engineering/weather_features.py b/openstef/feature_engineering/weather_features.py index 170bc1b5d..036d42607 100644 --- a/openstef/feature_engineering/weather_features.py +++ b/openstef/feature_engineering/weather_features.py @@ -397,7 +397,6 @@ def calculate_dni(radiation: pd.Series, pj: PredictionJobDataClass) -> pd.Series solar_zenith = solpos.apparent_zenith # convert radiation (ghi) to right unit (J/m^2 to kWh/m^2) - # TODO: check whether unit conversion is necessary ghi_forecasted = radiation / 3600 # convert ghi to dni dni_converted = pvlib.irradiance.dni( diff --git a/openstef/metrics/metrics.py b/openstef/metrics/metrics.py index 29b9be51c..0d090419b 100644 --- a/openstef/metrics/metrics.py +++ b/openstef/metrics/metrics.py @@ -9,7 +9,7 @@ # # SPDX-License-Identifier: MIT """This module contains all metrics to assess forecast quality.""" -from typing import Callable +from typing import Callable, Optional, Tuple import numpy as np import pandas as pd @@ -297,12 +297,15 @@ def skill_score_positive_peaks( def franks_skill_score( - realised: pd.Series, forecast: pd.Series, basecase: pd.Series, range_: float = 1.0 + realised: pd.Series, + forecast: pd.Series, + basecase: pd.Series, + range_: Optional[float] = None, ) -> float: """Calculate Franks skill score.""" # Combine series in one DataFrame combined = pd.concat([realised, forecast], axis=1) - if range_ == 1.0: + if not range_: range_ = ( combined["load"].max() - combined["load"].min() if (combined["load"].max() - combined["load"].min()) != 0 @@ -358,7 +361,7 @@ def franks_skill_score_peaks( def xgb_quantile_eval( preds: np.ndarray, dmatrix: xgboost.DMatrix, quantile: float = 0.2 -) -> str: +) -> Tuple: """Customized evaluational metric that equals to quantile regression loss (also known as pinball loss). Quantile regression is regression that estimates a specified quantile of target's distribution conditional on given features. diff --git a/openstef/model/confidence_interval_applicator.py b/openstef/model/confidence_interval_applicator.py index 3a31bcbc1..73321046f 100644 --- a/openstef/model/confidence_interval_applicator.py +++ b/openstef/model/confidence_interval_applicator.py @@ -137,7 +137,7 @@ def _add_standard_deviation_to_forecast( # Determine now, rounded on 15 minutes, # Rounding helps to prevent fractional t_aheads now = ( - pd.Series(datetime.utcnow().replace(tzinfo=forecast_copy.index.tzinfo)) + pd.Series(datetime.now(tz=forecast_copy.index.tzinfo)) .min() .round(f"{minimal_resolution}T") .to_pydatetime() diff --git a/openstef/model/fallback.py b/openstef/model/fallback.py index 924e091f1..64d460fb0 100644 --- a/openstef/model/fallback.py +++ b/openstef/model/fallback.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project # noqa E501> # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime +from datetime import datetime, UTC import pandas as pd @@ -43,7 +43,7 @@ def generate_fallback( # Find most extreme historic day (do not count today as it is incomplete) day_with_highest_load_date = ( - load[load.index.tz_localize(None).date != datetime.utcnow().date()] + load[load.index < datetime.now(tz=UTC)] .idxmax() .load.date() ) diff --git a/openstef/model/metamodels/missing_values_handler.py b/openstef/model/metamodels/missing_values_handler.py index 99886c5ea..fbe4cc36a 100644 --- a/openstef/model/metamodels/missing_values_handler.py +++ b/openstef/model/metamodels/missing_values_handler.py @@ -90,7 +90,7 @@ def _get_tags(self): def fit(self, x, y): """Fit model.""" _, y = check_X_y(x, y, force_all_finite="allow-nan", y_numeric=True) - if type(x) != pd.DataFrame: + if not isinstance(x, pd.DataFrame): x = pd.DataFrame(np.asarray(x)) self.feature_in_names_ = list(x.columns) self.n_features_in_ = x.shape[1] @@ -133,6 +133,6 @@ def predict(self, x): x, force_all_finite="allow-nan", ) - if type(x) != pd.DataFrame: + if not isinstance(x, pd.DataFrame): x = pd.DataFrame(np.array(x)) return self.pipeline_.predict(x[self.non_null_columns_]) diff --git a/openstef/model/regressors/custom_regressor.py b/openstef/model/regressors/custom_regressor.py index 33939cedd..11decd621 100644 --- a/openstef/model/regressors/custom_regressor.py +++ b/openstef/model/regressors/custom_regressor.py @@ -26,9 +26,9 @@ class CustomOpenstfRegressor(OpenstfRegressor): def valid_kwargs() -> list[str]: ... - @classmethod + @staticmethod @abstractmethod - def objective(self) -> Type[RegressorObjective]: + def objective() -> Type[RegressorObjective]: ... diff --git a/openstef/model/serializer.py b/openstef/model/serializer.py index 6020f4587..919cd1a24 100644 --- a/openstef/model/serializer.py +++ b/openstef/model/serializer.py @@ -5,7 +5,7 @@ import logging import os import shutil -from datetime import datetime +from datetime import datetime, UTC from json import JSONDecodeError from typing import Optional, Union from urllib.parse import unquote, urlparse @@ -283,8 +283,7 @@ def _determine_model_age_from_mlflow_run(self, run: pd.Series) -> Union[int, flo """Determines how many days ago a model is trained from the mlflow run.""" try: model_datetime = run.end_time.to_pydatetime() - model_datetime = model_datetime.replace(tzinfo=None) - model_age_days = (datetime.utcnow() - model_datetime).days + model_age_days = (datetime.now(tz=UTC) - model_datetime).days except Exception as e: self.logger.warning( "Could not get model age. Returning infinite age!", exception=str(e) diff --git a/openstef/model/standard_deviation_generator.py b/openstef/model/standard_deviation_generator.py index f268b4b0e..f462121ca 100644 --- a/openstef/model/standard_deviation_generator.py +++ b/openstef/model/standard_deviation_generator.py @@ -69,7 +69,7 @@ def _calculate_standard_deviation( # Calculate the error for each predicted point error = realised - predicted error.index = error.index.hour # Hour only, remove the rest - # For the time starts with 00, 01, 02, etc. TODO (MAKE MORE ELEGANT SOLUTION THAN A LOOP) + # For the time starts with 00, 01, 02, etc. for hour in range(24): hour_error = error[error.index == hour] diff --git a/openstef/model_selection/model_selection.py b/openstef/model_selection/model_selection.py index d8ec40824..43df430ed 100644 --- a/openstef/model_selection/model_selection.py +++ b/openstef/model_selection/model_selection.py @@ -230,7 +230,9 @@ def split_data_train_validation_test( for date_set in [max_dates, min_dates, other_dates]: n_days_val = max(1, int(validation_fraction * len(date_set))) val_dates += list( - np.random.choice(list(date_set), n_days_val, replace=False) + np.random.default_rng().choice( + list(date_set), n_days_val, replace=False + ) ) train_dates += [x for x in date_set if x not in val_dates] diff --git a/openstef/postprocessing/postprocessing.py b/openstef/postprocessing/postprocessing.py index b07a62aab..569a54235 100644 --- a/openstef/postprocessing/postprocessing.py +++ b/openstef/postprocessing/postprocessing.py @@ -239,11 +239,6 @@ def add_prediction_job_properties_to_forecast( if forecast_quality is not None: forecast["quality"] = forecast_quality - # TODO rename prediction job typ to type - # TODO algtype = model_file_path, perhaps we can find a more logical name - # TODO perhaps better to make a forecast its own class! - # TODO double check and sync this with make_basecase_forecast (other fields are added) - # !!!!! TODO fix the requirement for customer forecast["pid"] = pj["id"] forecast["customer"] = pj["name"] forecast["description"] = pj["description"] diff --git a/openstef/tasks/calculate_kpi.py b/openstef/tasks/calculate_kpi.py index ea2c10255..d7fc07162 100644 --- a/openstef/tasks/calculate_kpi.py +++ b/openstef/tasks/calculate_kpi.py @@ -21,7 +21,7 @@ import logging # Import builtins -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path import numpy as np @@ -56,8 +56,8 @@ def main(model_type: ModelType = None, config=None, database=None) -> None: with TaskContext(taskname, config, database) as context: # Set start and end time - start_time = datetime.utcnow() - timedelta(days=1) - end_time = datetime.utcnow() + end_time = datetime.now(tz=UTC) + start_time = end_time - timedelta(days=1) PredictionJobLoop(context, model_type=model_type).map( check_kpi_task, @@ -77,9 +77,9 @@ def check_kpi_task( ) -> None: # Apply default parameters if none are provided if start_time is None: - start_time = datetime.utcnow() - timedelta(days=1) + start_time = datetime.now(tz=UTC) - timedelta(days=1) if end_time is None: - end_time = datetime.utcnow() + end_time = datetime.now(tz=UTC) # Get realised load data realised = context.database.get_load_pid(pj["id"], start_time, end_time, "15T") diff --git a/openstef/tasks/create_basecase_forecast.py b/openstef/tasks/create_basecase_forecast.py index 9b349f0a9..5d712f0cb 100644 --- a/openstef/tasks/create_basecase_forecast.py +++ b/openstef/tasks/create_basecase_forecast.py @@ -16,7 +16,7 @@ $ python create_basecase_forecast.py """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path import pandas as pd @@ -68,8 +68,8 @@ def create_basecase_forecast_task( return # Define datetime range for input data - datetime_start = datetime.utcnow() - timedelta(days=t_behind_days) - datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days) + datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days) + datetime_end = datetime.now(tz=UTC) + timedelta(days=t_ahead_days) # Retrieve input data input_data = context.database.get_model_input( @@ -87,7 +87,7 @@ def create_basecase_forecast_task( basecase_forecast = basecase_forecast.loc[ basecase_forecast.index > ( - pd.to_datetime(datetime.utcnow(), utc=True) + pd.to_datetime(datetime.now(tz=UTC), utc=True) + timedelta(minutes=pj.horizon_minutes) ), :, diff --git a/openstef/tasks/create_components_forecast.py b/openstef/tasks/create_components_forecast.py index ee2b43dc4..7e56b14ca 100644 --- a/openstef/tasks/create_components_forecast.py +++ b/openstef/tasks/create_components_forecast.py @@ -22,7 +22,7 @@ """ import logging -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta, UTC from pathlib import Path import pandas as pd @@ -76,8 +76,8 @@ def create_components_forecast_task( return # Define datetime range for input data - datetime_start = datetime.utcnow() - timedelta(days=t_behind_days) - datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days) + datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days) + datetime_end = datetime.now(tz=UTC) + timedelta(days=t_ahead_days) logger.info( "Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end @@ -120,9 +120,7 @@ def create_components_forecast_task( logger.debug("Written forecast to database") # Check if forecast was complete enough, otherwise raise exception - if forecasts.index.max() < datetime.utcnow().replace( - tzinfo=timezone.utc - ) + timedelta(hours=30): + if forecasts.index.max() < datetime.now(tz=UTC) + timedelta(hours=30): # Check which input data is missing the most. # Do this by counting the NANs for (load)forecast, radiation and windspeed max_index = forecasts.index.max() diff --git a/openstef/tasks/create_forecast.py b/openstef/tasks/create_forecast.py index da2f72623..3e4b496c0 100644 --- a/openstef/tasks/create_forecast.py +++ b/openstef/tasks/create_forecast.py @@ -20,7 +20,7 @@ $ python create_forecast.py """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path from openstef.data_classes.prediction_job import PredictionJobDataClass @@ -73,8 +73,8 @@ def create_forecast_task( mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri # Define datetime range for input data - datetime_start = datetime.utcnow() - timedelta(days=t_behind_days) - datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60) + datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days) + datetime_end = datetime.now(tz=UTC) + timedelta(seconds=pj.horizon_minutes * 60) # Retrieve input data input_data = context.database.get_model_input( diff --git a/openstef/tasks/create_solar_forecast.py b/openstef/tasks/create_solar_forecast.py index 9dc162744..826bb4ea9 100644 --- a/openstef/tasks/create_solar_forecast.py +++ b/openstef/tasks/create_solar_forecast.py @@ -12,7 +12,7 @@ $ python create_solar_forecast """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path import numpy as np @@ -23,7 +23,6 @@ from openstef.tasks.utils.predictionjobloop import PredictionJobLoop from openstef.tasks.utils.taskcontext import TaskContext -# TODO move to config PV_COEFS_FILEPATH = PROJECT_ROOT / "openstef" / "data" / "pv_single_coefs.csv" @@ -231,7 +230,7 @@ def main(config=None, database=None, **kwargs): num_prediction_jobs = len(prediction_jobs) # only make customer = Provincie once an hour - utc_now_minute = datetime.utcnow().minute + utc_now_minute = datetime.now(tz=UTC)().minute if utc_now_minute >= 15: prediction_jobs = [ pj for pj in prediction_jobs if str(pj["name"]).startswith("Provincie") diff --git a/openstef/tasks/optimize_hyperparameters.py b/openstef/tasks/optimize_hyperparameters.py index 304547204..fa9f4ff7c 100644 --- a/openstef/tasks/optimize_hyperparameters.py +++ b/openstef/tasks/optimize_hyperparameters.py @@ -16,7 +16,7 @@ $ python optimize_hyperparameters.py """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path from openstef.data_classes.prediction_job import PredictionJobDataClass @@ -88,8 +88,8 @@ def optimize_hyperparameters_task( ) return - datetime_start = datetime.utcnow() - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS) - datetime_end = datetime.utcnow() + datetime_start = datetime.now(tz=UTC) - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS) + datetime_end = datetime.now(tz=UTC) input_data = context.database.get_model_input( pid=pj["id"], diff --git a/openstef/tasks/split_forecast.py b/openstef/tasks/split_forecast.py index be1f83d0d..911c5550e 100644 --- a/openstef/tasks/split_forecast.py +++ b/openstef/tasks/split_forecast.py @@ -23,7 +23,7 @@ """ import logging -from datetime import datetime +from datetime import datetime, UTC from pathlib import Path import numpy as np @@ -93,7 +93,6 @@ def split_forecast_task( components, coefdict = find_components(input_split_function) # Calculate mean absolute error (MAE) - # TODO: use a standard metric function for this error = components[["load", "Inschatting"]].diff(axis=1).iloc[:, 1] mae = error.abs().mean() coefdict.update({"MAE": mae}) @@ -183,7 +182,7 @@ def convert_coefdict_to_coefsdf( pj["id"], input_split_function.index.min().date(), input_split_function.index.max().date(), - datetime.utcnow(), + datetime.now(tz=UTC), ] coefsdf = pd.DataFrame( {"coef_name": list(coefdict.keys()), "coef_value": list(coefdict.values())} @@ -237,7 +236,7 @@ def weighted_sum(x, *args): # Carry out fitting # See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html # noqa - coefs, cov = scipy.optimize.curve_fit( + coefs, _ = scipy.optimize.curve_fit( weighted_sum, xdata=df.iloc[:, 1:].values.T, ydata=load.values, diff --git a/openstef/tasks/train_model.py b/openstef/tasks/train_model.py index 40b7c475e..110d9a10c 100644 --- a/openstef/tasks/train_model.py +++ b/openstef/tasks/train_model.py @@ -19,7 +19,7 @@ $ python model_train.py """ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from pathlib import Path import pandas as pd @@ -123,7 +123,7 @@ def train_model_task( ) if datetime_end is None: - datetime_end = datetime.utcnow() + datetime_end = datetime.now(tz=UTC) if datetime_start is None: datetime_start = datetime_end - timedelta(days=training_period_days_to_fetch) @@ -184,9 +184,9 @@ def train_model_task( "'save_train_forecasts option was activated.'" ) context.database.write_train_forecasts(pj, data_sets) - context.logger.debug(f"Saved Forecasts from trained model on datasets") + context.logger.debug("Saved Forecasts from trained model on datasets") except SkipSaveTrainingForecasts: - context.logger.debug(f"Skip saving forecasts") + context.logger.debug("Skip saving forecasts") except InputDataOngoingZeroFlatlinerError: if ( context.config.known_zero_flatliners @@ -213,7 +213,7 @@ def main(model_type=None, config=None, database=None): model_type = [ml.value for ml in ModelType] taskname = Path(__file__).name.replace(".py", "") - datetime_now = datetime.utcnow() + datetime_now = datetime.now(tz=UTC) with TaskContext(taskname, config, database) as context: PredictionJobLoop(context, model_type=model_type).map( train_model_task, context, datetime_end=datetime_now diff --git a/openstef/validation/validation.py b/openstef/validation/validation.py index 69e73c394..79cebd4af 100644 --- a/openstef/validation/validation.py +++ b/openstef/validation/validation.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: MPL-2.0 import logging import math -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from typing import Union import numpy as np @@ -203,7 +203,7 @@ def calc_completeness_features( df_copy.drop("horizon", inplace=True, axis=1) if weights is None: - weights = np.array([1] * ((len(df_copy.columns)))) + weights = np.array([1] * (len(df_copy.columns))) length_weights = len(weights) length_features = len(df_copy.columns) @@ -243,7 +243,7 @@ def detect_ongoing_zero_flatliner( """ # remove all timestamps in the future - load = load[load.index.tz_localize(None) <= datetime.utcnow()] + load = load[load.index <= datetime.now(tz=UTC)] latest_measurement_time = load.dropna().index.max() latest_measurements = load[ latest_measurement_time - timedelta(minutes=duration_threshold_minutes) : @@ -297,9 +297,10 @@ def calc_completeness_dataframe( # timecols: {delay:number of points expected to be missing} # number of points expected to be missing = numberOfPointsUpToTwoDaysAhead - numberOfPointsAvailable timecols = { - x: len(df) - eval(x[2:].replace("min", "/60").replace("d", "*24.0")) / 0.25 - for x in df.columns - if x[:2] == "T-" + column: len(df) + - eval(column[2:].replace("min", "/60").replace("d", "*24.0")) / 0.25 + for column in df.columns + if column.startswith("T-") } non_na_count = df.count() diff --git a/test/component/test_component.py b/test/component/test_component.py index 9b2f109a1..21682ea18 100644 --- a/test/component/test_component.py +++ b/test/component/test_component.py @@ -78,6 +78,6 @@ def test_component_training_prediction_happyflow(self): forecast["horizon"] = forecast_data.iloc[:, -1] # Verify forecast works correctly - self.assertTrue("forecast" in forecast.columns) - self.assertTrue("realised" in forecast.columns) - self.assertTrue("horizon" in forecast.columns) + self.assertIn("forecast", forecast.columns) + self.assertIn("realised", forecast.columns) + self.assertIn("horizon", forecast.columns) diff --git a/test/unit/feature_engineering/test_feature_adder.py b/test/unit/feature_engineering/test_feature_adder.py index b802ec880..9d137747c 100644 --- a/test/unit/feature_engineering/test_feature_adder.py +++ b/test/unit/feature_engineering/test_feature_adder.py @@ -75,8 +75,8 @@ def test_dispatch_features(self): feat_disp = FeatureDispatcher([DummyFeature()]) df_out = feat_disp.apply_features(self.input_data, feature_names) # Test if the features have been correctly added - self.assertTrue( - set(feature_names + list(self.input_data.columns)) == set(df_out.columns) + self.assertEqual( + set(feature_names + list(self.input_data.columns)), set(df_out.columns) ) self.assertTrue((df_out["dummy_0"] == 0).all()) self.assertTrue((df_out["dummy_-1"] == -1).all()) @@ -88,6 +88,6 @@ def test_load_modules(self): ["test.unit.feature_engineering.test_feature_adder"] ) adders_type = [type(adder) for adder in adders] - self.assertTrue(len(adders) == 2) - self.assertTrue(DummyFeature in adders_type) - self.assertTrue(DummyIntFeature in adders_type) + self.assertEqual(len(adders), 2) + self.assertIn(DummyFeature, adders_type) + self.assertIn(DummyIntFeature, adders_type) diff --git a/test/unit/feature_engineering/test_feature_applicator.py b/test/unit/feature_engineering/test_feature_applicator.py index a04406afb..2bbaffbe1 100644 --- a/test/unit/feature_engineering/test_feature_applicator.py +++ b/test/unit/feature_engineering/test_feature_applicator.py @@ -57,7 +57,7 @@ def test_operational_feature_applicator_correct_order(self): horizons=[0.25] ).add_features(self.input_data[["load"]]) self.assertEqual(data_with_features.columns.to_list()[0], "load") - self.assertTrue("horizon" not in data_with_features.columns.to_list()) + self.assertNotIn("horizon", data_with_features.columns.to_list()) def test_operational_feature_applicator_one_horizon(self): # Test for expected column order of the output diff --git a/test/unit/feature_engineering/test_feature_free_days.py b/test/unit/feature_engineering/test_feature_free_days.py index 4f5c52c9d..7083630a9 100644 --- a/test/unit/feature_engineering/test_feature_free_days.py +++ b/test/unit/feature_engineering/test_feature_free_days.py @@ -46,12 +46,8 @@ def test_create_holiday_functions(self): ) # Assert for every holiday a function is available and no extra functions are generated - self.assertEqual( - all([key in holiday_functions.keys() for key in expected_keys]), True - ) - self.assertEqual( - all([key in expected_keys for key in holiday_functions.keys()]), True - ) + self.assertTrue(all([key in holiday_functions.keys() for key in expected_keys])) + self.assertTrue(all([key in expected_keys for key in holiday_functions.keys()])) if __name__ == "__main__": diff --git a/test/unit/model/regressors/test_arima.py b/test/unit/model/regressors/test_arima.py index b1b9cde86..021b27cc6 100644 --- a/test/unit/model/regressors/test_arima.py +++ b/test/unit/model/regressors/test_arima.py @@ -26,7 +26,7 @@ def test_fit(self): self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) def test_update_historic_data(self): """Test happy flow of the update of historic data""" @@ -40,7 +40,7 @@ def test_update_historic_data(self): self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) def test_predict_wrong_historic(self): """Test the prediction with the wrong historic (missing data)""" @@ -100,5 +100,5 @@ def test_score_backtest(self): score_r2 = model.score( self.train_input.iloc[:150, 1:], self.train_input.iloc[:150, 0] ) - self.assertTrue(score_r2 <= 1.0) - self.assertTrue(score_r2 >= 0.5) + self.assertLessEqual(score_r2, 1.0) + self.assertGreaterEqual(score_r2, 0.5) diff --git a/test/unit/model/regressors/test_linear.py b/test/unit/model/regressors/test_linear.py index 302fdab68..b701c2c75 100644 --- a/test/unit/model/regressors/test_linear.py +++ b/test/unit/model/regressors/test_linear.py @@ -32,7 +32,7 @@ def test_fit(self): self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) def test_non_null_columns_retrieval(self): n_sample = self.train_input.shape[0] diff --git a/test/unit/model/regressors/test_xgb_quantile.py b/test/unit/model/regressors/test_xgb_quantile.py index fc589db51..3eed87fac 100644 --- a/test/unit/model/regressors/test_xgb_quantile.py +++ b/test/unit/model/regressors/test_xgb_quantile.py @@ -65,7 +65,7 @@ def test_quantile_fit(self): self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) def test_value_error_raised(self): # Check if Value Error is raised when 0.5 is not in the requested quantiles list diff --git a/test/unit/model/test_basecase.py b/test/unit/model/test_basecase.py index c96a8c0f0..47c766398 100644 --- a/test/unit/model/test_basecase.py +++ b/test/unit/model/test_basecase.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: MPL-2.0 import unittest -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta, timezone, UTC from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData @@ -20,7 +20,7 @@ def test_basecase_raises_value_error_too_early_start(self): # Shift example data to match current time interval as code expects data # available relative to the current time. utc_now = ( - pd.Series(datetime.utcnow().replace(tzinfo=timezone.utc)) + pd.Series(datetime.now(tz=UTC)) .min() .round("15T") .to_pydatetime() @@ -39,7 +39,7 @@ def test_basecase_raises_value_error_missing_features(self): # Shift example data to match current time interval as code expects data # available relative to the current time. utc_now = ( - pd.Series(datetime.utcnow().replace(tzinfo=timezone.utc)) + pd.Series(datetime.now(tz=UTC)) .min() .round("15T") .to_pydatetime() diff --git a/test/unit/model/test_confidence_interval_applicator.py b/test/unit/model/test_confidence_interval_applicator.py index c48547491..35276aea7 100644 --- a/test/unit/model/test_confidence_interval_applicator.py +++ b/test/unit/model/test_confidence_interval_applicator.py @@ -112,7 +112,7 @@ def test_add_standard_deviation_to_forecast(self): actual_stdev_forecast = ConfidenceIntervalApplicator( MockModel(), self.stdev_forecast )._add_standard_deviation_to_forecast(forecast) - self.assertTrue("stdev" in actual_stdev_forecast.columns) + self.assertIn("stdev", actual_stdev_forecast.columns) self.assertEqual(actual_stdev_forecast["stdev"][0], 2.9) self.assertEqual(actual_stdev_forecast["stdev"][1], 2.9) self.assertEqual(actual_stdev_forecast["stdev"][2], 1.6) @@ -180,7 +180,7 @@ def test_add_standard_deviation_to_forecast_in_past(self): actual_stdev_forecast = ConfidenceIntervalApplicator( MockModelMultiHorizonStdev(), self.stdev_forecast )._add_standard_deviation_to_forecast(forecast) - self.assertTrue("stdev" in actual_stdev_forecast.columns) + self.assertIn("stdev", actual_stdev_forecast.columns) self.assertGreaterEqual( actual_stdev_forecast["stdev"].min(), 0.1 ) # => MockModel.standard_deviation.stdev.min() diff --git a/test/unit/model/test_custom_models.py b/test/unit/model/test_custom_models.py index 3f51829c3..128843add 100644 --- a/test/unit/model/test_custom_models.py +++ b/test/unit/model/test_custom_models.py @@ -25,8 +25,8 @@ class DummyRegressor(CustomOpenstfRegressor): def valid_kwargs() -> list[str]: return [] - @classmethod - def objective(self) -> Type[DummyObjective]: + @staticmethod + def objective() -> Type[DummyObjective]: return DummyObjective @property diff --git a/test/unit/model/test_serializer.py b/test/unit/model/test_serializer.py index e5d1e37fb..17f5231f3 100644 --- a/test/unit/model/test_serializer.py +++ b/test/unit/model/test_serializer.py @@ -4,7 +4,7 @@ import glob import tempfile -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from distutils.dir_util import copy_tree from pathlib import Path from test.unit.utils.base import BaseTestCase @@ -71,8 +71,8 @@ def test_serializer_load_model_feature_names_keyerror( "run_id": [1, 2], "artifact_uri": ["path1", "path2"], "end_time": [ - datetime.utcnow() - timedelta(days=2), - datetime.utcnow() - timedelta(days=3), + datetime.now(tz=UTC) - timedelta(days=2), + datetime.now(tz=UTC) - timedelta(days=3), ], } ) @@ -97,8 +97,8 @@ def test_serializer_load_model_feature_names_attributeerror( # give wrong feature_name type, something else than a str of a list or dict "tags.feature_names": [1, 2], "end_time": [ - datetime.utcnow() - timedelta(days=2), - datetime.utcnow() - timedelta(days=3), + datetime.now(tz=UTC) - timedelta(days=2), + datetime.now(tz=UTC) - timedelta(days=3), ], } ) @@ -123,8 +123,8 @@ def test_serializer_load_model_feature_names_jsonerror( # give wrong feature_name type, something else than a str of a list or dict "tags.feature_names": ["feature1", "feature1"], "end_time": [ - datetime.utcnow() - timedelta(days=2), - datetime.utcnow() - timedelta(days=3), + datetime.now(tz=UTC) - timedelta(days=2), + datetime.now(tz=UTC) - timedelta(days=3), ], } ) @@ -150,8 +150,8 @@ def test_serializer_load_model_feature_modules_attributeerror( # give wrong feature_module type, something else than a str of a list or dict "tags.feature_modules": [1, 2], "end_time": [ - datetime.utcnow() - timedelta(days=2), - datetime.utcnow() - timedelta(days=3), + datetime.now(tz=UTC) - timedelta(days=2), + datetime.now(tz=UTC) - timedelta(days=3), ], } ) @@ -176,8 +176,8 @@ def test_serializer_load_model_feature_modules_jsonerror( # give wrong feature_module type, something else than a str of a list or dict "tags.feature_modules": ["feature_module1", "feature_module1"], "end_time": [ - datetime.utcnow() - timedelta(days=2), - datetime.utcnow() - timedelta(days=3), + datetime.now(tz=UTC) - timedelta(days=2), + datetime.now(tz=UTC) - timedelta(days=3), ], } ) @@ -239,7 +239,7 @@ def test_serializer_get_model_age_no_hyperparameter_optimization( data={ "run_id": [1], "artifact_uri": ["path1"], - "end_time": [datetime.utcnow() - timedelta(days=2)], + "end_time": [datetime.now(tz=UTC) - timedelta(days=2)], } ) mock_find_models.return_value = models_df @@ -256,7 +256,10 @@ def test_serializer_get_model_age_hyperparameter_optimization( data={ "run_id": [1, 2], "artifact_uri": ["path1", "path2"], - "end_time": [datetime.utcnow() - timedelta(days=8), datetime.utcnow()], + "end_time": [ + datetime.now(tz=UTC) - timedelta(days=8), + datetime.now(tz=UTC), + ], } ) mock_find_models.return_value = models_df diff --git a/test/unit/pipeline/test_create_basecase.py b/test/unit/pipeline/test_create_basecase.py index fdd3b0999..7c316218d 100644 --- a/test/unit/pipeline/test_create_basecase.py +++ b/test/unit/pipeline/test_create_basecase.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project # noqa E501> # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData @@ -24,7 +24,7 @@ def setUp(self) -> None: ] = np.nan # Shift so the input matches 'now' offset_seconds = ( - pd.to_datetime(datetime.utcnow(), utc=True) + pd.to_datetime(datetime.now(tz=UTC)) - (forecast_input.index.max() - timedelta(days=7)) ).total_seconds() forecast_input = forecast_input.shift( diff --git a/test/unit/pipeline/test_create_component_forecast.py b/test/unit/pipeline/test_create_component_forecast.py index 7e2c609cd..65b7e2644 100644 --- a/test/unit/pipeline/test_create_component_forecast.py +++ b/test/unit/pipeline/test_create_component_forecast.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project # noqa E501> # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta, timezone, UTC from pathlib import Path from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData @@ -47,12 +47,7 @@ def test_component_forecast_pipeline_happy_flow(self): # Shift example data to match current time interval as code expects data # available relative to the current time. - utc_now = ( - pd.Series(datetime.utcnow().replace(tzinfo=timezone.utc)) - .min() - .round("15T") - .to_pydatetime() - ) + utc_now = pd.Series(datetime.now(tz=UTC)).min().round("15T").to_pydatetime() most_recent_date = forecast_input.index.max().ceil("15T").to_pydatetime() delta = utc_now - most_recent_date + timedelta(3) @@ -144,12 +139,7 @@ def test_component_forecast_pipeline_not_all_weather_data_available(self): # Shift example data to match current time interval as code expects data # available relative to the current time. - utc_now = ( - pd.Series(datetime.utcnow().replace(tzinfo=timezone.utc)) - .min() - .round("15T") - .to_pydatetime() - ) + utc_now = pd.Series(datetime.now(tz=UTC)).min().round("15T").to_pydatetime() most_recent_date = forecast_input.index.max().ceil("15T").to_pydatetime() delta = utc_now - most_recent_date + timedelta(3) diff --git a/test/unit/pipeline/test_pipeline_train_model.py b/test/unit/pipeline/test_pipeline_train_model.py index 67009cb5a..edb0568e5 100644 --- a/test/unit/pipeline/test_pipeline_train_model.py +++ b/test/unit/pipeline/test_pipeline_train_model.py @@ -5,7 +5,7 @@ import glob import os import unittest -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData from unittest.mock import MagicMock, patch @@ -39,8 +39,7 @@ from openstef.validation import validation -class DummyObjective(RegressorObjective): - ... +class DummyObjective(RegressorObjective): ... class DummyRegressor(CustomOpenstfRegressor): @@ -48,8 +47,8 @@ class DummyRegressor(CustomOpenstfRegressor): def valid_kwargs(): return [] - @property - def objective(self): + @staticmethod + def objective(): return DummyObjective @property @@ -89,8 +88,8 @@ def setUp(self) -> None: self.pj, self.model_specs = TestData.get_prediction_job_and_modelspecs(pid=307) # Set n_estimators to a small number to speed up training self.model_specs.hyper_params["n_estimators"] = 3 - datetime_start = datetime.utcnow() - timedelta(days=90) - datetime_end = datetime.utcnow() + datetime_start = datetime.now(tz=UTC) - timedelta(days=90) + datetime_end = datetime.now(tz=UTC) self.data_table = TestData.load("input_data_train.csv").head(8641) self.data = pd.DataFrame( index=pd.date_range(datetime_start, datetime_end, freq="15T") @@ -167,10 +166,10 @@ def test_train_model_pipeline_core_happy_flow(self): self.assertIsNotNone(model.feature_names) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) # check if report is a Report - self.assertTrue(isinstance(report, Report)) + self.assertIsInstance(report, Report) # Validate and clean data validated_data = validation.drop_target_na( @@ -237,10 +236,10 @@ def test_train_model_pipeline_core_happy_flow_with_legacy_data_prep(self): self.assertIsNotNone(model.feature_names) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) # check if report is a Report - self.assertTrue(isinstance(report, Report)) + self.assertIsInstance(report, Report) # Validate and clean data validated_data = validation.drop_target_na( @@ -280,7 +279,7 @@ def test_train_model_pipeline_with_featureAdders(self): pj.default_modelspecs = model_specs train_input = self.train_input.iloc[::50, :] - model, report, modelspecs, _ = train_model_pipeline_core( + model, report, _, _ = train_model_pipeline_core( pj=pj, model_specs=model_specs, input_data=train_input ) @@ -289,13 +288,13 @@ def test_train_model_pipeline_with_featureAdders(self): # check if the model has a feature_names property self.assertIsNotNone(model.feature_names) - self.assertTrue(dummy_feature in model.feature_names) + self.assertIn(dummy_feature, model.feature_names) # check if model is sklearn compatible - self.assertTrue(isinstance(model, sklearn.base.BaseEstimator)) + self.assertIsInstance(model, sklearn.base.BaseEstimator) # check if report is a Report - self.assertTrue(isinstance(report, Report)) + self.assertIsInstance(report, Report) @patch("openstef.pipeline.train_model.MLflowSerializer") def test_train_model_pipeline_with_default_modelspecs(self, mock_serializer): diff --git a/test/unit/pipeline/test_train.py b/test/unit/pipeline/test_train.py index 09f0a01bd..a7bf7485f 100644 --- a/test/unit/pipeline/test_train.py +++ b/test/unit/pipeline/test_train.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: MPL-2.0 import unittest -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from test.unit.utils.base import BaseTestCase from test.unit.utils.data import TestData @@ -43,8 +43,8 @@ class TestTrain(BaseTestCase): def setUp(self) -> None: super().setUp() self.pj = TestData.get_prediction_job(pid=307) - datetime_start = datetime.utcnow() - timedelta(days=90) - datetime_end = datetime.utcnow() + datetime_start = datetime.now(tz=UTC) - timedelta(days=90) + datetime_end = datetime.now(tz=UTC) self.data_table = TestData.load("input_data_train.csv").head(8641) self.data = pd.DataFrame( index=pd.date_range(datetime_start, datetime_end, freq="15T") diff --git a/test/unit/pipeline/test_train_predict_backtest.py b/test/unit/pipeline/test_train_predict_backtest.py index 19e36eb9c..e2b275626 100644 --- a/test/unit/pipeline/test_train_predict_backtest.py +++ b/test/unit/pipeline/test_train_predict_backtest.py @@ -50,9 +50,9 @@ def test_train_model_pipeline_core_happy_flow(self): training_horizons=[0.25, 24.0], ) - self.assertTrue("forecast" in forecast.columns) - self.assertTrue("realised" in forecast.columns) - self.assertTrue("horizon" in forecast.columns) + self.assertIn("forecast", forecast.columns) + self.assertIn("realised", forecast.columns) + self.assertIn("horizon", forecast.columns) self.assertEqual(set(forecast.horizon.unique()), {0.25, 24.0}) def test_train_model_pipeline_core_happy_flow_nfold(self): @@ -72,9 +72,9 @@ def test_train_model_pipeline_core_happy_flow_nfold(self): n_folds=4, ) - self.assertTrue("forecast" in forecast.columns) - self.assertTrue("realised" in forecast.columns) - self.assertTrue("horizon" in forecast.columns) + self.assertIn("forecast", forecast.columns) + self.assertIn("realised", forecast.columns) + self.assertIn("horizon", forecast.columns) self.assertEqual(sorted(list(forecast.horizon.unique())), [0.25, 24.0]) # check if forecast is indeed of the entire range of the input data @@ -148,9 +148,9 @@ def test_train_model_pipeline_core_custom_split(self): n_folds=4, ) - self.assertTrue("forecast" in forecast.columns) - self.assertTrue("realised" in forecast.columns) - self.assertTrue("horizon" in forecast.columns) + self.assertIn("forecast", forecast.columns) + self.assertIn("realised", forecast.columns) + self.assertIn("horizon", forecast.columns) self.assertEqual(sorted(list(forecast.horizon.unique())), [0.25, 24.0]) # check if forecast is indeed of the entire range of the input data diff --git a/test/unit/tasks/test_create_basecase_forecast.py b/test/unit/tasks/test_create_basecase_forecast.py index 2989e08a0..c4a7f11a1 100644 --- a/test/unit/tasks/test_create_basecase_forecast.py +++ b/test/unit/tasks/test_create_basecase_forecast.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project # noqa E501> # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from test.unit.utils.data import TestData from unittest import TestCase from unittest.mock import MagicMock, patch @@ -15,11 +15,11 @@ # Make sure this has a datetime of at least NOW+48hours, # since this is filtered in the task FORECAST_MOCK = pd.DataFrame( - index=pd.to_datetime([datetime.utcnow()], utc=True) + timedelta(days=3), + index=pd.to_datetime([datetime.now(tz=UTC)]) + timedelta(days=3), data=dict(forecast=[10.0]), ) FORECAST_NEAR_FUTURE_MOCK = pd.DataFrame( - index=pd.to_datetime([datetime.utcnow()], utc=True) + timedelta(days=1), + index=pd.to_datetime([datetime.now(tz=UTC)]) + timedelta(days=1), data=dict(forecast=[10.0]), ) diff --git a/test/unit/validation/test_validation_detect_ongoing_zero_flatliner.py b/test/unit/validation/test_validation_detect_ongoing_zero_flatliner.py index e997e3bda..941a88336 100644 --- a/test/unit/validation/test_validation_detect_ongoing_zero_flatliner.py +++ b/test/unit/validation/test_validation_detect_ongoing_zero_flatliner.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC from test.unit.utils.base import BaseTestCase import numpy as np @@ -16,7 +16,7 @@ class TestDetectOngoingZeroFlatliners(BaseTestCase): def setUp(self) -> None: super().setUp() - now = datetime.utcnow() + now = datetime.now(tz=UTC) self.three_hour_range = pd.date_range( start=now - timedelta(minutes=180), end=now, freq="0.25H" )