feature(KTP-753): Added new baseline dazls model as dazls default mod…

…el. (#535) * feature(KTP-753): Added new baseline dazls model as dazls default model. Signed-off-by: Egor Dmitriev <[email protected]> * Format Python code with Black Signed-off-by: black <[email protected]> * chore(KTP-753): Removed old DAZLS models. Signed-off-by: Egor Dmitriev <[email protected]> * fix(KTP-753): Removed old DAZLS models. Fixed tests Signed-off-by: Egor Dmitriev <[email protected]> --------- Signed-off-by: Egor Dmitriev <[email protected]> Signed-off-by: black <[email protected]> Co-authored-by: black <[email protected]>
OpenSTEF · Apr 30, 2024 · 4ecc905 · 4ecc905
1 parent be8f58e
commit 4ecc905
Show file tree

Hide file tree

Showing 25 changed files with 50 additions and 200 deletions.
diff --git a/openstef/data/dazls_model_3.2.49/dazls_stored_3.2.49.sav b/openstef/data/dazls_model_3.2.49/dazls_stored_3.2.49.sav
diff --git a/openstef/data/dazls_model_3.2.49/dazls_stored_3.2.49.sav.license b/openstef/data/dazls_model_3.2.49/dazls_stored_3.2.49.sav.license
diff --git a/openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z b/openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z
diff --git a/openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md b/openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md
@@ -0,0 +1,18 @@
+
+ # Model details : dazls\_model\_baseline\_model
+
+ ## Description
+ **Model Name**: dazls\_model\_baseline\_model   
+
+**Author**: KTP, Alliander   
+
+**Model type**: Energy splitting model   
+
+**Model Architecture**: LinearRegression   
+
+**Date**: 2024-04-26 
+
+ ## Intended use
+ This is a DAZLs model aimed at determining the energy splits for substations. 
+ Each of these splits are determined based on a set of features that are available in production,
+ and in this case have their origin in the Dutch energy grid.******************** 
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z.license b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z.license
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z.license b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z.license
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z.license b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z.license
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z.license b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z.license
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z.license b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z.license
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z.license b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z.license
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md.license b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md.license
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z.license b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z.license
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z
diff --git a/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z.license b/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z.license
diff --git a/openstef/model/regressors/dazls.py b/openstef/model/regressors/dazls.py
@@ -4,65 +4,41 @@
 """This module defines the DAZL model."""
 import numpy as np
 from sklearn.base import BaseEstimator
+from sklearn.compose import TransformedTargetRegressor
+from sklearn.linear_model import LinearRegression
 from sklearn.metrics import mean_squared_error, r2_score
-from sklearn.neighbors import KNeighborsRegressor
+from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import MinMaxScaler
-from sklearn.utils import shuffle
 
 
 class Dazls(BaseEstimator):
     """DAZLS model.
 
-    The model carries out wind and solar power prediction for unseen target substations using training data from
-    other substations with known components.
-    This model has two sub-models:
-
-    - domain model : a model taking a set of 'input' features of a substation and make an 'initial' prediction.
-      Input features can be features such as: weather, geospatial, total load, etc.
-      These features are always directly related to the components' size in some way.
-
-
-    - adaptation model : a model taking a set of 'meta' features of a substation and refines the domain model's
-      prediction. Next to the features, it is trained on the domain model's predictions.
-      'Meta' features are features related to the uncertainty of the data, and include:
-      variance of the total load, standard deviation of the total load, etc.
-
-    Any data-driven model can be plugged and used as the base for the domain and the adaptation model.
-
-    CAUTION : 'Meta' features should be kept out of the domain model, and vice versa input features should be
-    kept out the adaptation model.
-
-    For a full reference, see:
-    Teng, S.Y., van Nooten, C. C., van Doorn, J.M., Ottenbros, A., Huijbregts, M., Jansen, J.J.
-    Improving Near Real-Time Predictions of Renewable Electricity Production at Substation Level (Submitted)
-
-    Args:
-        - BaseEstimator (object) : a base model that can be used to carry out predictions.
+    The model carries out wind and solar power prediction for unseen target substations using training data from other
+    substations with known components.
 
     """
 
+    model_: Pipeline
+
     def __init__(self):
         """Initialize DAZL model."""
         self.__name__ = "DAZLS"
-        self.domain_model_scaler = MinMaxScaler(clip=True)
-        self.adaptation_model_scaler = MinMaxScaler(clip=True)
-        self.target_scaler = MinMaxScaler(clip=True)
-        self.domain_model = KNeighborsRegressor(n_neighbors=20, weights="uniform")
-        self.adaptation_model = KNeighborsRegressor(n_neighbors=20, weights="uniform")
+
+        regressor = TransformedTargetRegressor(
+            regressor=LinearRegression(),
+            transformer=MinMaxScaler(clip=True),
+        )
+
+        self.model_ = Pipeline(
+            [("scaler", MinMaxScaler(clip=True)), ("regressor", regressor)]
+        )
 
         # The input columns for the domain and adaptation models (with description)
-        self.domain_model_input_columns = [
+        self.baseline_input_columns = [
             "radiation",  # Weather parameter
             "windspeed_100m",  # Weather parameter
-            "total_substation",  # Substation's measured total load
-            "lat",  # Latitude
-            "lon",  # Longitude
-            "hour",  # Hour of the day
-            "minute",  # Minute of the hour
-        ]
-        self.adaptation_model_input_columns = [
-            "var_total",  # Variance of the total load
-            "sem_total",  # Standard Error of the Mean of the total load
+            "total_load",
         ]
         self.target_columns = ["total_wind_part", "total_solar_part"]
 
@@ -76,34 +52,15 @@ def fit(self, features, target):
         Args:
             features: inputs for domain and adaptation model (domain_model_input, adaptation_model_input)
             target: the expected output (y_train)
-
         """
-        x, x2, y = (
-            features.loc[:, self.domain_model_input_columns],
-            features.loc[:, self.adaptation_model_input_columns],
+        x, y = (
+            features.loc[:, self.baseline_input_columns],
             target.loc[:, self.target_columns],
         )
-        domain_model_input, adaptation_model_input, y_train = shuffle(
-            x, x2, y, random_state=999
-        )  # just shuffling
-
-        self.domain_model_scaler.fit(domain_model_input)
-        self.adaptation_model_scaler.fit(adaptation_model_input)
-        self.target_scaler.fit(y_train)
-        domain_model_input = self.domain_model_scaler.transform(domain_model_input)
-        adaptation_model_input = self.adaptation_model_scaler.transform(
-            adaptation_model_input
-        )
-        y_train = self.target_scaler.transform(y_train)
 
-        self.domain_model.fit(domain_model_input, y_train)
-        domain_model_pred = self.domain_model.predict(domain_model_input)
-        adaptation_model_input = np.concatenate(
-            (adaptation_model_input, domain_model_pred), axis=1
-        )
-        self.adaptation_model.fit(adaptation_model_input, y_train)
+        self.model_.fit(x, y)
 
-    def predict(self, x: np.array, return_sub_preds: bool = False):
+    def predict(self, x: np.array):
         """Make a prediction.
 
         For the prediction we use the test data x. We use domain_model_input_columns and
@@ -119,41 +76,10 @@ def predict(self, x: np.array, return_sub_preds: bool = False):
 
         Returns:
             prediction: The output prediction after both models.
-
         """
-        domain_model_test_data, adaptation_model_test_data = (
-            x.loc[:, self.domain_model_input_columns],
-            x.loc[:, self.adaptation_model_input_columns],
-        )
-        # Rescale test data for both models (if required)
-        domain_model_test_data_scaled = self.domain_model_scaler.transform(
-            domain_model_test_data
-        )
-        adaptation_model_test_data_scaled = self.adaptation_model_scaler.transform(
-            adaptation_model_test_data
-        )
-        # Use the scaled data to make domain_model_prediction
-        domain_model_test_data_pred = self.domain_model.predict(
-            domain_model_test_data_scaled
-        )
-        # Use the domain_model_prediction to make adaptation_model_prediction
-        adaptation_model_test_data_pred = self.adaptation_model.predict(
-            np.concatenate(
-                [adaptation_model_test_data_scaled, domain_model_test_data_pred], axis=1
-            )
-        )
-        # Rescale adaptation_model_prediction (if required)
-        prediction = self.target_scaler.inverse_transform(
-            adaptation_model_test_data_pred
-        )
+        model_test_data = x.loc[:, self.baseline_input_columns]
 
-        if return_sub_preds:
-            prediction_domain = self.target_scaler.inverse_transform(
-                domain_model_test_data_pred
-            )
-            return prediction, prediction_domain
-        else:
-            return prediction
+        return self.model_.predict(model_test_data)
 
     def score(self, truth, prediction):
         """Evaluation of the prediction's output.
@@ -164,7 +90,6 @@ def score(self, truth, prediction):
 
         Returns:
             RMSE and R2 scores
-
         """
         rmse = (mean_squared_error(truth, prediction)) ** 0.5
         r2_score_value = r2_score(truth, prediction)
@@ -175,17 +100,13 @@ def __str__(self):
 
         Returns:
             Summary represented by a string
-
         """
         summary_str = (
             f"{self.__name__} model summary:\n\n"
-            f"Domain Model: {self.domain_model} \n"
-            f"\tInput columns: {self.domain_model_input_columns} \n"
-            f"\tScaler: {self.domain_model_scaler} \n\n"
-            f"Adaptation Model: {self.adaptation_model} \n"
-            f"\tInput columns: {self.adaptation_model_input_columns} \n"
-            f"\tScaler: {self.adaptation_model_scaler} \n\n"
-            f"Target columns: {self.target_columns}"
+            f"Model: {self.model_} \n"
+            f"\tInput columns: {self.baseline_input_columns} \n"
+            f"\tScaler: {self.model_['scaler']} \n\n"
+            f"\tRegressor: {self.model_['regressor']} \n\n"
         )
 
         return summary_str
diff --git a/openstef/pipeline/create_component_forecast.py b/openstef/pipeline/create_component_forecast.py
@@ -18,7 +18,7 @@
 
 # Set the path for the Dazls stored model
 DAZLS_STORED = str(
-    PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.7" / "dazls_stored_3.4.7_"
+    PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.24" / "dazls_stored_3.4.24_"
 )
 
 
@@ -113,24 +113,7 @@ def create_components_forecast_pipeline(
         # Save and load the model as .sav file (or as .z file)
         # For the code contact: [email protected]
         dazls_model = Dazls()
-        dazls_model.domain_model = joblib.load(DAZLS_STORED + "domain_model.z")
-        dazls_model.domain_model_scaler = joblib.load(
-            DAZLS_STORED + "domain_model_scaler.z"
-        )
-        dazls_model.domain_model_input_columns = joblib.load(
-            DAZLS_STORED + "domain_model_features.z"
-        )
-
-        dazls_model.adaptation_model = joblib.load(DAZLS_STORED + "adaptation_model.z")
-        dazls_model.adaptation_model_scaler = joblib.load(
-            DAZLS_STORED + "adaptation_model_scaler.z"
-        )
-        dazls_model.adaptation_model_input_columns = joblib.load(
-            DAZLS_STORED + "adaptation_model_features.z"
-        )
-
-        dazls_model.target_columns = joblib.load(DAZLS_STORED + "target.z")
-        dazls_model.target_scaler = joblib.load(DAZLS_STORED + "target_scaler.z")
+        dazls_model.model_ = joblib.load(DAZLS_STORED + "baseline_model.z")
 
         logger.info("DAZLS model loaded", dazls_model=str(dazls_model))
 

diff --git a/test/unit/pipeline/test_create_component_forecast.py b/test/unit/pipeline/test_create_component_forecast.py
@@ -28,36 +28,13 @@ def test_load_dazls_model(self):
         Assert that loading the old model generates an exception and the new model does not
         """
 
-        old_model_file = (
-            PROJECT_ROOT / "openstef/data/dazls_model_3.2.49/dazls_stored_3.2.49.sav"
-        )
         new_model_file = str(
-            PROJECT_ROOT / "openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_"
+            PROJECT_ROOT / "openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_"
         )
 
-        self.assertRaises(Exception, joblib.load, old_model_file)
         dazls_model = Dazls()
 
-        dazls_model.domain_model = joblib.load(new_model_file + "domain_model.z")
-        dazls_model.domain_model_scaler = joblib.load(
-            new_model_file + "domain_model_scaler.z"
-        )
-        dazls_model.domain_model_input_columns = joblib.load(
-            new_model_file + "domain_model_features.z"
-        )
-
-        dazls_model.adaptation_model = joblib.load(
-            new_model_file + "adaptation_model.z"
-        )
-        dazls_model.adaptation_model_scaler = joblib.load(
-            new_model_file + "adaptation_model_scaler.z"
-        )
-        dazls_model.adaptation_model_input_columns = joblib.load(
-            new_model_file + "adaptation_model_features.z"
-        )
-
-        dazls_model.target_columns = joblib.load(new_model_file + "target.z")
-        dazls_model.target_scaler = joblib.load(new_model_file + "target_scaler.z")
+        dazls_model.model_ = joblib.load(new_model_file + "baseline_model.z")
 
         assert dazls_model