Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

General code improvements #527

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion openstef/feature_engineering/weather_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,6 @@ def calculate_dni(radiation: pd.Series, pj: PredictionJobDataClass) -> pd.Series
solar_zenith = solpos.apparent_zenith

# convert radiation (ghi) to right unit (J/m^2 to kWh/m^2)
# TODO: check whether unit conversion is necessary
ghi_forecasted = radiation / 3600
# convert ghi to dni
dni_converted = pvlib.irradiance.dni(
Expand Down
11 changes: 7 additions & 4 deletions openstef/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#
# SPDX-License-Identifier: MIT
"""This module contains all metrics to assess forecast quality."""
from typing import Callable
from typing import Callable, Optional, Tuple

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -297,12 +297,15 @@ def skill_score_positive_peaks(


def franks_skill_score(
realised: pd.Series, forecast: pd.Series, basecase: pd.Series, range_: float = 1.0
realised: pd.Series,
forecast: pd.Series,
basecase: pd.Series,
range_: Optional[float] = None,
) -> float:
"""Calculate Franks skill score."""
# Combine series in one DataFrame
combined = pd.concat([realised, forecast], axis=1)
if range_ == 1.0:
if not range_:
wfstoel marked this conversation as resolved.
Show resolved Hide resolved
range_ = (
combined["load"].max() - combined["load"].min()
if (combined["load"].max() - combined["load"].min()) != 0
Expand Down Expand Up @@ -358,7 +361,7 @@ def franks_skill_score_peaks(

def xgb_quantile_eval(
preds: np.ndarray, dmatrix: xgboost.DMatrix, quantile: float = 0.2
) -> str:
) -> Tuple:
"""Customized evaluational metric that equals to quantile regression loss (also known as pinball loss).

Quantile regression is regression that estimates a specified quantile of target's distribution conditional on given features.
Expand Down
2 changes: 1 addition & 1 deletion openstef/model/confidence_interval_applicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def _add_standard_deviation_to_forecast(
# Determine now, rounded on 15 minutes,
# Rounding helps to prevent fractional t_aheads
now = (
pd.Series(datetime.utcnow().replace(tzinfo=forecast_copy.index.tzinfo))
pd.Series(datetime.now(tz=forecast_copy.index.tzinfo))
.min()
.round(f"{minimal_resolution}T")
.to_pydatetime()
Expand Down
4 changes: 2 additions & 2 deletions openstef/model/fallback.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <[email protected]> # noqa E501>
#
# SPDX-License-Identifier: MPL-2.0
from datetime import datetime
from datetime import datetime, UTC

import pandas as pd

Expand Down Expand Up @@ -43,7 +43,7 @@ def generate_fallback(

# Find most extreme historic day (do not count today as it is incomplete)
day_with_highest_load_date = (
load[load.index.tz_localize(None).date != datetime.utcnow().date()]
load[load.index < datetime.now(tz=UTC)]
.idxmax()
.load.date()
Comment on lines +46 to 48
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
load[load.index < datetime.now(tz=UTC)]
.idxmax()
.load.date()
load[load.index < datetime.now(tz=UTC)].idxmax().load.date()

)
Expand Down
4 changes: 2 additions & 2 deletions openstef/model/metamodels/missing_values_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def _get_tags(self):
def fit(self, x, y):
"""Fit model."""
_, y = check_X_y(x, y, force_all_finite="allow-nan", y_numeric=True)
if type(x) != pd.DataFrame:
if not isinstance(x, pd.DataFrame):
x = pd.DataFrame(np.asarray(x))
self.feature_in_names_ = list(x.columns)
self.n_features_in_ = x.shape[1]
Expand Down Expand Up @@ -133,6 +133,6 @@ def predict(self, x):
x,
force_all_finite="allow-nan",
)
if type(x) != pd.DataFrame:
if not isinstance(x, pd.DataFrame):
x = pd.DataFrame(np.array(x))
return self.pipeline_.predict(x[self.non_null_columns_])
4 changes: 2 additions & 2 deletions openstef/model/regressors/custom_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ class CustomOpenstfRegressor(OpenstfRegressor):
def valid_kwargs() -> list[str]:
...

@classmethod
@staticmethod
@abstractmethod
def objective(self) -> Type[RegressorObjective]:
def objective() -> Type[RegressorObjective]:
wfstoel marked this conversation as resolved.
Show resolved Hide resolved
...


Expand Down
5 changes: 2 additions & 3 deletions openstef/model/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import os
import shutil
from datetime import datetime
from datetime import datetime, UTC
from json import JSONDecodeError
from typing import Optional, Union
from urllib.parse import unquote, urlparse
Expand Down Expand Up @@ -283,8 +283,7 @@ def _determine_model_age_from_mlflow_run(self, run: pd.Series) -> Union[int, flo
"""Determines how many days ago a model is trained from the mlflow run."""
try:
model_datetime = run.end_time.to_pydatetime()
model_datetime = model_datetime.replace(tzinfo=None)
model_age_days = (datetime.utcnow() - model_datetime).days
model_age_days = (datetime.now(tz=UTC) - model_datetime).days
except Exception as e:
self.logger.warning(
"Could not get model age. Returning infinite age!", exception=str(e)
Expand Down
2 changes: 1 addition & 1 deletion openstef/model/standard_deviation_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _calculate_standard_deviation(
# Calculate the error for each predicted point
error = realised - predicted
error.index = error.index.hour # Hour only, remove the rest
# For the time starts with 00, 01, 02, etc. TODO (MAKE MORE ELEGANT SOLUTION THAN A LOOP)
# For the time starts with 00, 01, 02, etc.
for hour in range(24):
hour_error = error[error.index == hour]

Expand Down
4 changes: 3 additions & 1 deletion openstef/model_selection/model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,9 @@ def split_data_train_validation_test(
for date_set in [max_dates, min_dates, other_dates]:
n_days_val = max(1, int(validation_fraction * len(date_set)))
val_dates += list(
np.random.choice(list(date_set), n_days_val, replace=False)
np.random.default_rng().choice(
list(date_set), n_days_val, replace=False
)
)
train_dates += [x for x in date_set if x not in val_dates]

Expand Down
5 changes: 0 additions & 5 deletions openstef/postprocessing/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,6 @@ def add_prediction_job_properties_to_forecast(
if forecast_quality is not None:
forecast["quality"] = forecast_quality

# TODO rename prediction job typ to type
# TODO algtype = model_file_path, perhaps we can find a more logical name
# TODO perhaps better to make a forecast its own class!
# TODO double check and sync this with make_basecase_forecast (other fields are added)
# !!!!! TODO fix the requirement for customer
forecast["pid"] = pj["id"]
forecast["customer"] = pj["name"]
forecast["description"] = pj["description"]
Expand Down
10 changes: 5 additions & 5 deletions openstef/tasks/calculate_kpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import logging

# Import builtins
from datetime import datetime, timedelta
from datetime import datetime, timedelta, UTC
from pathlib import Path

import numpy as np
Expand Down Expand Up @@ -56,8 +56,8 @@ def main(model_type: ModelType = None, config=None, database=None) -> None:

with TaskContext(taskname, config, database) as context:
# Set start and end time
start_time = datetime.utcnow() - timedelta(days=1)
end_time = datetime.utcnow()
end_time = datetime.now(tz=UTC)
start_time = end_time - timedelta(days=1)

PredictionJobLoop(context, model_type=model_type).map(
check_kpi_task,
Expand All @@ -77,9 +77,9 @@ def check_kpi_task(
) -> None:
# Apply default parameters if none are provided
if start_time is None:
start_time = datetime.utcnow() - timedelta(days=1)
start_time = datetime.now(tz=UTC) - timedelta(days=1)
if end_time is None:
end_time = datetime.utcnow()
end_time = datetime.now(tz=UTC)

# Get realised load data
realised = context.database.get_load_pid(pj["id"], start_time, end_time, "15T")
Expand Down
8 changes: 4 additions & 4 deletions openstef/tasks/create_basecase_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
$ python create_basecase_forecast.py

"""
from datetime import datetime, timedelta
from datetime import datetime, timedelta, UTC
from pathlib import Path

import pandas as pd
Expand Down Expand Up @@ -68,8 +68,8 @@ def create_basecase_forecast_task(
return

# Define datetime range for input data
datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days)
datetime_end = datetime.now(tz=UTC) + timedelta(days=t_ahead_days)

# Retrieve input data
input_data = context.database.get_model_input(
Expand All @@ -87,7 +87,7 @@ def create_basecase_forecast_task(
basecase_forecast = basecase_forecast.loc[
basecase_forecast.index
> (
pd.to_datetime(datetime.utcnow(), utc=True)
pd.to_datetime(datetime.now(tz=UTC), utc=True)
+ timedelta(minutes=pj.horizon_minutes)
),
:,
Expand Down
10 changes: 4 additions & 6 deletions openstef/tasks/create_components_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

"""
import logging
from datetime import datetime, timedelta, timezone
from datetime import datetime, timedelta, UTC
from pathlib import Path

import pandas as pd
Expand Down Expand Up @@ -76,8 +76,8 @@ def create_components_forecast_task(
return

# Define datetime range for input data
datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days)
datetime_end = datetime.now(tz=UTC) + timedelta(days=t_ahead_days)

logger.info(
"Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end
Expand Down Expand Up @@ -120,9 +120,7 @@ def create_components_forecast_task(
logger.debug("Written forecast to database")

# Check if forecast was complete enough, otherwise raise exception
if forecasts.index.max() < datetime.utcnow().replace(
tzinfo=timezone.utc
) + timedelta(hours=30):
if forecasts.index.max() < datetime.now(tz=UTC) + timedelta(hours=30):
# Check which input data is missing the most.
# Do this by counting the NANs for (load)forecast, radiation and windspeed
max_index = forecasts.index.max()
Expand Down
6 changes: 3 additions & 3 deletions openstef/tasks/create_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
$ python create_forecast.py

"""
from datetime import datetime, timedelta
from datetime import datetime, timedelta, UTC
from pathlib import Path

from openstef.data_classes.prediction_job import PredictionJobDataClass
Expand Down Expand Up @@ -73,8 +73,8 @@ def create_forecast_task(
mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri

# Define datetime range for input data
datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60)
datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days)
datetime_end = datetime.now(tz=UTC) + timedelta(seconds=pj.horizon_minutes * 60)

# Retrieve input data
input_data = context.database.get_model_input(
Expand Down
5 changes: 2 additions & 3 deletions openstef/tasks/create_solar_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
$ python create_solar_forecast

"""
from datetime import datetime, timedelta
from datetime import datetime, timedelta, UTC
from pathlib import Path

import numpy as np
Expand All @@ -23,7 +23,6 @@
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
from openstef.tasks.utils.taskcontext import TaskContext

# TODO move to config
PV_COEFS_FILEPATH = PROJECT_ROOT / "openstef" / "data" / "pv_single_coefs.csv"


Expand Down Expand Up @@ -231,7 +230,7 @@ def main(config=None, database=None, **kwargs):
num_prediction_jobs = len(prediction_jobs)

# only make customer = Provincie once an hour
utc_now_minute = datetime.utcnow().minute
utc_now_minute = datetime.now(tz=UTC)().minute
if utc_now_minute >= 15:
prediction_jobs = [
pj for pj in prediction_jobs if str(pj["name"]).startswith("Provincie")
Expand Down
6 changes: 3 additions & 3 deletions openstef/tasks/optimize_hyperparameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
$ python optimize_hyperparameters.py

"""
from datetime import datetime, timedelta
from datetime import datetime, timedelta, UTC
from pathlib import Path

from openstef.data_classes.prediction_job import PredictionJobDataClass
Expand Down Expand Up @@ -88,8 +88,8 @@ def optimize_hyperparameters_task(
)
return

datetime_start = datetime.utcnow() - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS)
datetime_end = datetime.utcnow()
datetime_start = datetime.now(tz=UTC) - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS)
datetime_end = datetime.now(tz=UTC)

input_data = context.database.get_model_input(
pid=pj["id"],
Expand Down
7 changes: 3 additions & 4 deletions openstef/tasks/split_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

"""
import logging
from datetime import datetime
from datetime import datetime, UTC
from pathlib import Path

import numpy as np
Expand Down Expand Up @@ -93,7 +93,6 @@ def split_forecast_task(
components, coefdict = find_components(input_split_function)

# Calculate mean absolute error (MAE)
# TODO: use a standard metric function for this
error = components[["load", "Inschatting"]].diff(axis=1).iloc[:, 1]
mae = error.abs().mean()
coefdict.update({"MAE": mae})
Expand Down Expand Up @@ -183,7 +182,7 @@ def convert_coefdict_to_coefsdf(
pj["id"],
input_split_function.index.min().date(),
input_split_function.index.max().date(),
datetime.utcnow(),
datetime.now(tz=UTC),
]
coefsdf = pd.DataFrame(
{"coef_name": list(coefdict.keys()), "coef_value": list(coefdict.values())}
Expand Down Expand Up @@ -237,7 +236,7 @@ def weighted_sum(x, *args):

# Carry out fitting
# See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html # noqa
coefs, cov = scipy.optimize.curve_fit(
coefs, _ = scipy.optimize.curve_fit(
weighted_sum,
xdata=df.iloc[:, 1:].values.T,
ydata=load.values,
Expand Down
10 changes: 5 additions & 5 deletions openstef/tasks/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
$ python model_train.py

"""
from datetime import datetime, timedelta
from datetime import datetime, timedelta, UTC
from pathlib import Path

import pandas as pd
Expand Down Expand Up @@ -123,7 +123,7 @@ def train_model_task(
)

if datetime_end is None:
datetime_end = datetime.utcnow()
datetime_end = datetime.now(tz=UTC)
if datetime_start is None:
datetime_start = datetime_end - timedelta(days=training_period_days_to_fetch)

Expand Down Expand Up @@ -184,9 +184,9 @@ def train_model_task(
"'save_train_forecasts option was activated.'"
)
context.database.write_train_forecasts(pj, data_sets)
context.logger.debug(f"Saved Forecasts from trained model on datasets")
context.logger.debug("Saved Forecasts from trained model on datasets")
except SkipSaveTrainingForecasts:
context.logger.debug(f"Skip saving forecasts")
context.logger.debug("Skip saving forecasts")
except InputDataOngoingZeroFlatlinerError:
if (
context.config.known_zero_flatliners
Expand All @@ -213,7 +213,7 @@ def main(model_type=None, config=None, database=None):
model_type = [ml.value for ml in ModelType]

taskname = Path(__file__).name.replace(".py", "")
datetime_now = datetime.utcnow()
datetime_now = datetime.now(tz=UTC)
with TaskContext(taskname, config, database) as context:
PredictionJobLoop(context, model_type=model_type).map(
train_model_task, context, datetime_end=datetime_now
Expand Down
Loading
Loading