From f1f3fa82ed38bd2b01d94773d9363bafbaebe675 Mon Sep 17 00:00:00 2001 From: Lars Schilders <123180911+lschilders@users.noreply.github.com> Date: Fri, 14 Feb 2025 16:36:50 +0100 Subject: [PATCH] add timedelta conversion to iso 8601 format (#589) * add timedelta conversion to iso 8601 format * fix test_apply_features Signed-off-by: lschilders --------- Signed-off-by: lschilders --- .../feature_engineering/rolling_features.py | 17 ++++- .../test_apply_features.py | 12 +--- .../test_rolling_features.py | 64 ++++++++++--------- 3 files changed, 54 insertions(+), 39 deletions(-) diff --git a/openstef/feature_engineering/rolling_features.py b/openstef/feature_engineering/rolling_features.py index 6054b801..62158f9a 100644 --- a/openstef/feature_engineering/rolling_features.py +++ b/openstef/feature_engineering/rolling_features.py @@ -6,6 +6,21 @@ import pandas as pd from openstef.data_classes.prediction_job import PredictionJobDataClass +from pydantic import TypeAdapter + + +def convert_timedelta_to_isoformat(td: timedelta) -> str: + """ + Converts a timedelta to an ISO 8601 formatted period string. + + Args: + td: timedelta object to convert. + + Returns: + ISO 8601 formatted period string. + """ + timedelta_adapter = TypeAdapter(timedelta) + return timedelta_adapter.dump_python(td, mode="json") def add_rolling_aggregate_features( @@ -38,6 +53,6 @@ def add_rolling_aggregate_features( for aggregate_func in pj["rolling_aggregate_features"]: data[ - f"rolling_{aggregate_func.value}_load_{rolling_window}" + f"rolling_{aggregate_func.value}_load_{convert_timedelta_to_isoformat(rolling_window)}" ] = rolling_window_load.aggregate(aggregate_func.value) return data diff --git a/test/unit/feature_engineering/test_apply_features.py b/test/unit/feature_engineering/test_apply_features.py index e3c9b741..cd703134 100644 --- a/test/unit/feature_engineering/test_apply_features.py +++ b/test/unit/feature_engineering/test_apply_features.py @@ -323,15 +323,9 @@ def test_add_rolling_aggregate_features(self): pj=pj, ) - self.assertIn( - "rolling_mean_load_1 day, 0:00:00", input_data_with_features.columns - ) - self.assertIn( - "rolling_max_load_1 day, 0:00:00", input_data_with_features.columns - ) - self.assertIn( - "rolling_min_load_1 day, 0:00:00", input_data_with_features.columns - ) + self.assertIn("rolling_mean_load_P1D", input_data_with_features.columns) + self.assertIn("rolling_max_load_P1D", input_data_with_features.columns) + self.assertIn("rolling_min_load_P1D", input_data_with_features.columns) def test_add_rolling_aggregate_features_when_none(self): pj = { diff --git a/test/unit/feature_engineering/test_rolling_features.py b/test/unit/feature_engineering/test_rolling_features.py index 345de6c3..eea15170 100644 --- a/test/unit/feature_engineering/test_rolling_features.py +++ b/test/unit/feature_engineering/test_rolling_features.py @@ -8,7 +8,23 @@ import pytest from openstef.enums import AggregateFunction -from openstef.feature_engineering.rolling_features import add_rolling_aggregate_features +from openstef.feature_engineering.rolling_features import ( + add_rolling_aggregate_features, + convert_timedelta_to_isoformat, +) + + +@pytest.mark.parametrize( + "td, expected_str", + [ + (timedelta(days=1), "P1D"), + (timedelta(hours=24), "P1D"), + (timedelta(hours=1), "PT1H"), + (timedelta(minutes=15), "PT15M"), + ], +) +def test_convert_timedelta_to_isoformat(td, expected_str): + assert convert_timedelta_to_isoformat(td) == expected_str @pytest.mark.parametrize("rolling_window", [timedelta(days=1), timedelta(hours=24)]) @@ -36,9 +52,9 @@ def test_add_rolling_aggregate_features(rolling_window): ) # Verify the columns are created - assert f"rolling_median_load_{rolling_window}" in output_data.columns - assert f"rolling_max_load_{rolling_window}" in output_data.columns - assert f"rolling_min_load_{rolling_window}" in output_data.columns + assert "rolling_median_load_P1D" in output_data.columns + assert "rolling_max_load_P1D" in output_data.columns + assert "rolling_min_load_P1D" in output_data.columns # Validate the rolling features rolling_window_load = data["load"].rolling(window=rolling_window) @@ -46,15 +62,9 @@ def test_add_rolling_aggregate_features(rolling_window): rolling_max_expected = rolling_window_load.max() rolling_min_expected = rolling_window_load.min() - assert np.allclose( - output_data[f"rolling_median_load_{rolling_window}"], rolling_median_expected - ) - assert np.allclose( - output_data[f"rolling_max_load_{rolling_window}"], rolling_max_expected - ) - assert np.allclose( - output_data[f"rolling_min_load_{rolling_window}"], rolling_min_expected - ) + assert np.allclose(output_data["rolling_median_load_P1D"], rolling_median_expected) + assert np.allclose(output_data["rolling_max_load_P1D"], rolling_max_expected) + assert np.allclose(output_data["rolling_min_load_P1D"], rolling_min_expected) def test_add_rolling_aggregate_features_flatline(): @@ -79,14 +89,14 @@ def test_add_rolling_aggregate_features_flatline(): output_data = add_rolling_aggregate_features(data, pj=pj) # Verify the columns are created - assert "rolling_median_load_1 day, 0:00:00" in output_data.columns - assert "rolling_max_load_1 day, 0:00:00" in output_data.columns - assert "rolling_min_load_1 day, 0:00:00" in output_data.columns + assert "rolling_median_load_P1D" in output_data.columns + assert "rolling_max_load_P1D" in output_data.columns + assert "rolling_min_load_P1D" in output_data.columns # Validate the rolling features - assert np.all(output_data[f"rolling_median_load_1 day, 0:00:00"] == all_ones) - assert np.all(output_data[f"rolling_max_load_1 day, 0:00:00"] == all_ones) - assert np.all(output_data[f"rolling_min_load_1 day, 0:00:00"] == all_ones) + assert np.all(output_data[f"rolling_median_load_P1D"] == all_ones) + assert np.all(output_data[f"rolling_max_load_P1D"] == all_ones) + assert np.all(output_data[f"rolling_min_load_P1D"] == all_ones) def test_add_rolling_aggregate_features_nans(): @@ -115,20 +125,16 @@ def test_add_rolling_aggregate_features_nans(): ) # Verify the columns are created - assert "rolling_median_load_1:00:00" in output_data.columns - assert "rolling_max_load_1:00:00" in output_data.columns - assert "rolling_min_load_1:00:00" in output_data.columns + assert "rolling_median_load_PT1H" in output_data.columns + assert "rolling_max_load_PT1H" in output_data.columns + assert "rolling_min_load_PT1H" in output_data.columns # Validate the rolling features assert np.allclose( - output_data["rolling_median_load_1:00:00"], [1, 1.5, 1.5, 2, 4, 5, 5.5, 6.5] - ) - assert np.allclose( - output_data["rolling_max_load_1:00:00"], [1, 2, 2, 4, 5, 6, 7, 8] - ) - assert np.allclose( - output_data["rolling_min_load_1:00:00"], [1, 1, 1, 1, 2, 4, 4, 5] + output_data["rolling_median_load_PT1H"], [1, 1.5, 1.5, 2, 4, 5, 5.5, 6.5] ) + assert np.allclose(output_data["rolling_max_load_PT1H"], [1, 2, 2, 4, 5, 6, 7, 8]) + assert np.allclose(output_data["rolling_min_load_PT1H"], [1, 1, 1, 1, 2, 4, 4, 5]) def test_add_rolling_aggregate_features_non_datetime_index():