Skip to content

Commit

Permalink
add rolling aggregate features + tests + field to pj
Browse files Browse the repository at this point in the history
  • Loading branch information
lschilders committed Feb 4, 2025
1 parent f080aa5 commit fe666a9
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 1 deletion.
3 changes: 2 additions & 1 deletion openstef/data_classes/prediction_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class PredictionJobDataClass(BaseModel):
- ``"lgb"``
- ``"linear"``
- ``"linear_quantile"``
- ``"gblinear_quantile"``
- ``"xgb_multioutput_quantile"``
- ``"flatliner"``
Expand Down Expand Up @@ -83,6 +82,8 @@ class PredictionJobDataClass(BaseModel):
data_balancing_ratio: Optional[float] = None
"""If data balancing is enabled, the data will be balanced with data from 1 year
ago in the future."""
use_rolling_aggregate_features: bool = False
"""If True, rolling aggregate of load will be used as features in the model."""
depends_on: Optional[list[Union[int, str]]]
"""Link to another prediction job on which this prediction job might depend."""
sid: Optional[str]
Expand Down
4 changes: 4 additions & 0 deletions openstef/feature_engineering/apply_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from openstef.feature_engineering.bidding_zone_to_country_mapping import (
BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING,
)
from openstef.feature_engineering.rolling_features import add_rolling_aggregate_features
from openstef.feature_engineering.weather_features import (
add_additional_solar_features,
add_additional_wind_features,
Expand Down Expand Up @@ -130,5 +131,8 @@ def apply_features(
# Adds daylight terrestrial feature
data = add_daylight_terrestrial_feature(data)

if pj.use_rolling_aggregate_features:
data = add_rolling_aggregate_features(data)

# Return dataframe including all requested features
return data
32 changes: 32 additions & 0 deletions openstef/feature_engineering/rolling_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import pandas as pd


def add_rolling_aggregate_features(
data: pd.DataFrame, rolling_window: str = "24h"
) -> pd.DataFrame:
"""
Adds rolling aggregate features to the input dataframe.
These features are calculated with an aggregation over a rolling window of the data.
A list of requested features is used to determine whether to add the rolling features
or not.
Args:
data: Input dataframe to which the rolling features will be added.
rolling_window: Rolling window size in str format following
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases
Returns:
DataFrame with added rolling features.
"""
# Ensure the index is a DatetimeIndex
if not isinstance(data.index, pd.DatetimeIndex):
raise ValueError("The DataFrame index must be a DatetimeIndex.")

if "load" not in data.columns:
raise ValueError("The DataFrame must contain a 'load' column.")
rolling_window_load = data["load"].rolling(window=rolling_window)
data[f"rolling_median_load_{rolling_window}"] = rolling_window_load.median()
data[f"rolling_max_load_{rolling_window}"] = rolling_window_load.max()
data[f"rolling_min_load_{rolling_window}"] = rolling_window_load.min()
return data
88 changes: 88 additions & 0 deletions test/unit/feature_engineering/test_rolling_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import numpy as np
import pandas as pd
import pytest

from openstef.feature_engineering.rolling_features import add_rolling_aggregate_features


def test_add_rolling_aggregate_features():
# Generate 2 days of data at 15-minute intervals
num_points = int(24 * 60 / 15 * 2)
data = pd.DataFrame(
index=pd.date_range(
start="2023-01-01 00:00:00", freq="15min", periods=num_points
)
)
data["load"] = list(range(num_points))

# Apply the function
output_data = add_rolling_aggregate_features(data)

# Verify the columns are created
assert "rolling_median_load_24h" in output_data.columns
assert "rolling_max_load_24h" in output_data.columns
assert "rolling_min_load_24h" in output_data.columns

# Validate the rolling features
rolling_window = "24h"
rolling_window_load = data["load"].rolling(window=rolling_window)
rolling_median_expected = rolling_window_load.median()
rolling_max_expected = rolling_window_load.max()
rolling_min_expected = rolling_window_load.min()

assert np.allclose(
output_data[f"rolling_median_load_{rolling_window}"], rolling_median_expected
)
assert np.allclose(
output_data[f"rolling_max_load_{rolling_window}"], rolling_max_expected
)
assert np.allclose(
output_data[f"rolling_min_load_{rolling_window}"], rolling_min_expected
)


def test_add_rolling_aggregate_features_flatline():
# Generate 2 days of data at 15-minute intervals
num_points = int(24 * 60 / 15 * 2)
data = pd.DataFrame(
index=pd.date_range(
start="2023-01-01 00:00:00", freq="15min", periods=num_points
)
)
all_ones = [1.0] * num_points
data["load"] = all_ones

# Apply the function
output_data = add_rolling_aggregate_features(data)

# Verify the columns are created
assert "rolling_median_load_24h" in output_data.columns
assert "rolling_max_load_24h" in output_data.columns
assert "rolling_min_load_24h" in output_data.columns

# Validate the rolling features
rolling_window = "24h"
assert np.all(output_data[f"rolling_median_load_{rolling_window}"] == all_ones)
assert np.all(output_data[f"rolling_max_load_{rolling_window}"] == all_ones)
assert np.all(output_data[f"rolling_min_load_{rolling_window}"] == all_ones)


def test_add_rolling_aggregate_features_non_datetime_index():
# Test for non-datetime index
data = pd.DataFrame(index=range(10))

with pytest.raises(
ValueError, match="The DataFrame index must be a DatetimeIndex."
):
add_rolling_aggregate_features(data)


def test_add_rolling_aggregate_features_no_load_column():
# Test for dataframe without load column
data = pd.DataFrame(
index=pd.date_range(start="2023-01-01 00:00:00", freq="15min", periods=10),
columns=["not_load"],
)

with pytest.raises(ValueError, match="The DataFrame must contain a 'load' column."):
add_rolling_aggregate_features(data)

0 comments on commit fe666a9

Please sign in to comment.