Skip to content

Commit

Permalink
add timedelta conversion to iso 8601 format (#589)
Browse files Browse the repository at this point in the history
* add timedelta conversion to iso 8601 format

* fix test_apply_features

Signed-off-by: lschilders <[email protected]>

---------

Signed-off-by: lschilders <[email protected]>
  • Loading branch information
lschilders authored Feb 14, 2025
1 parent 1da9026 commit f1f3fa8
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 39 deletions.
17 changes: 16 additions & 1 deletion openstef/feature_engineering/rolling_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,21 @@
import pandas as pd

from openstef.data_classes.prediction_job import PredictionJobDataClass
from pydantic import TypeAdapter


def convert_timedelta_to_isoformat(td: timedelta) -> str:
"""
Converts a timedelta to an ISO 8601 formatted period string.
Args:
td: timedelta object to convert.
Returns:
ISO 8601 formatted period string.
"""
timedelta_adapter = TypeAdapter(timedelta)
return timedelta_adapter.dump_python(td, mode="json")


def add_rolling_aggregate_features(
Expand Down Expand Up @@ -38,6 +53,6 @@ def add_rolling_aggregate_features(

for aggregate_func in pj["rolling_aggregate_features"]:
data[
f"rolling_{aggregate_func.value}_load_{rolling_window}"
f"rolling_{aggregate_func.value}_load_{convert_timedelta_to_isoformat(rolling_window)}"
] = rolling_window_load.aggregate(aggregate_func.value)
return data
12 changes: 3 additions & 9 deletions test/unit/feature_engineering/test_apply_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,15 +323,9 @@ def test_add_rolling_aggregate_features(self):
pj=pj,
)

self.assertIn(
"rolling_mean_load_1 day, 0:00:00", input_data_with_features.columns
)
self.assertIn(
"rolling_max_load_1 day, 0:00:00", input_data_with_features.columns
)
self.assertIn(
"rolling_min_load_1 day, 0:00:00", input_data_with_features.columns
)
self.assertIn("rolling_mean_load_P1D", input_data_with_features.columns)
self.assertIn("rolling_max_load_P1D", input_data_with_features.columns)
self.assertIn("rolling_min_load_P1D", input_data_with_features.columns)

def test_add_rolling_aggregate_features_when_none(self):
pj = {
Expand Down
64 changes: 35 additions & 29 deletions test/unit/feature_engineering/test_rolling_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,23 @@
import pytest

from openstef.enums import AggregateFunction
from openstef.feature_engineering.rolling_features import add_rolling_aggregate_features
from openstef.feature_engineering.rolling_features import (
add_rolling_aggregate_features,
convert_timedelta_to_isoformat,
)


@pytest.mark.parametrize(
"td, expected_str",
[
(timedelta(days=1), "P1D"),
(timedelta(hours=24), "P1D"),
(timedelta(hours=1), "PT1H"),
(timedelta(minutes=15), "PT15M"),
],
)
def test_convert_timedelta_to_isoformat(td, expected_str):
assert convert_timedelta_to_isoformat(td) == expected_str


@pytest.mark.parametrize("rolling_window", [timedelta(days=1), timedelta(hours=24)])
Expand Down Expand Up @@ -36,25 +52,19 @@ def test_add_rolling_aggregate_features(rolling_window):
)

# Verify the columns are created
assert f"rolling_median_load_{rolling_window}" in output_data.columns
assert f"rolling_max_load_{rolling_window}" in output_data.columns
assert f"rolling_min_load_{rolling_window}" in output_data.columns
assert "rolling_median_load_P1D" in output_data.columns
assert "rolling_max_load_P1D" in output_data.columns
assert "rolling_min_load_P1D" in output_data.columns

# Validate the rolling features
rolling_window_load = data["load"].rolling(window=rolling_window)
rolling_median_expected = rolling_window_load.median()
rolling_max_expected = rolling_window_load.max()
rolling_min_expected = rolling_window_load.min()

assert np.allclose(
output_data[f"rolling_median_load_{rolling_window}"], rolling_median_expected
)
assert np.allclose(
output_data[f"rolling_max_load_{rolling_window}"], rolling_max_expected
)
assert np.allclose(
output_data[f"rolling_min_load_{rolling_window}"], rolling_min_expected
)
assert np.allclose(output_data["rolling_median_load_P1D"], rolling_median_expected)
assert np.allclose(output_data["rolling_max_load_P1D"], rolling_max_expected)
assert np.allclose(output_data["rolling_min_load_P1D"], rolling_min_expected)


def test_add_rolling_aggregate_features_flatline():
Expand All @@ -79,14 +89,14 @@ def test_add_rolling_aggregate_features_flatline():
output_data = add_rolling_aggregate_features(data, pj=pj)

# Verify the columns are created
assert "rolling_median_load_1 day, 0:00:00" in output_data.columns
assert "rolling_max_load_1 day, 0:00:00" in output_data.columns
assert "rolling_min_load_1 day, 0:00:00" in output_data.columns
assert "rolling_median_load_P1D" in output_data.columns
assert "rolling_max_load_P1D" in output_data.columns
assert "rolling_min_load_P1D" in output_data.columns

# Validate the rolling features
assert np.all(output_data[f"rolling_median_load_1 day, 0:00:00"] == all_ones)
assert np.all(output_data[f"rolling_max_load_1 day, 0:00:00"] == all_ones)
assert np.all(output_data[f"rolling_min_load_1 day, 0:00:00"] == all_ones)
assert np.all(output_data[f"rolling_median_load_P1D"] == all_ones)
assert np.all(output_data[f"rolling_max_load_P1D"] == all_ones)
assert np.all(output_data[f"rolling_min_load_P1D"] == all_ones)


def test_add_rolling_aggregate_features_nans():
Expand Down Expand Up @@ -115,20 +125,16 @@ def test_add_rolling_aggregate_features_nans():
)

# Verify the columns are created
assert "rolling_median_load_1:00:00" in output_data.columns
assert "rolling_max_load_1:00:00" in output_data.columns
assert "rolling_min_load_1:00:00" in output_data.columns
assert "rolling_median_load_PT1H" in output_data.columns
assert "rolling_max_load_PT1H" in output_data.columns
assert "rolling_min_load_PT1H" in output_data.columns

# Validate the rolling features
assert np.allclose(
output_data["rolling_median_load_1:00:00"], [1, 1.5, 1.5, 2, 4, 5, 5.5, 6.5]
)
assert np.allclose(
output_data["rolling_max_load_1:00:00"], [1, 2, 2, 4, 5, 6, 7, 8]
)
assert np.allclose(
output_data["rolling_min_load_1:00:00"], [1, 1, 1, 1, 2, 4, 4, 5]
output_data["rolling_median_load_PT1H"], [1, 1.5, 1.5, 2, 4, 5, 5.5, 6.5]
)
assert np.allclose(output_data["rolling_max_load_PT1H"], [1, 2, 2, 4, 5, 6, 7, 8])
assert np.allclose(output_data["rolling_min_load_PT1H"], [1, 1, 1, 1, 2, 4, 4, 5])


def test_add_rolling_aggregate_features_non_datetime_index():
Expand Down

0 comments on commit f1f3fa8

Please sign in to comment.