Skip to content

Commit

Permalink
feature(KTP-753): Added new baseline dazls model as dazls default mod…
Browse files Browse the repository at this point in the history
…el. (#535)

* feature(KTP-753): Added new baseline dazls model as dazls default model.

Signed-off-by: Egor Dmitriev <[email protected]>

* Format Python code with Black

Signed-off-by: black <[email protected]>

* chore(KTP-753): Removed old DAZLS models.

Signed-off-by: Egor Dmitriev <[email protected]>

* fix(KTP-753): Removed old DAZLS models. Fixed tests

Signed-off-by: Egor Dmitriev <[email protected]>

---------

Signed-off-by: Egor Dmitriev <[email protected]>
Signed-off-by: black <[email protected]>
Co-authored-by: black <[email protected]>
  • Loading branch information
egordm and actions-user authored Apr 30, 2024
1 parent be8f58e commit 4ecc905
Show file tree
Hide file tree
Showing 25 changed files with 50 additions and 200 deletions.
3 changes: 0 additions & 3 deletions openstef/data/dazls_model_3.2.49/dazls_stored_3.2.49.sav

This file was deleted.

This file was deleted.

Binary file not shown.
18 changes: 18 additions & 0 deletions openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

# Model details : dazls\_model\_baseline\_model

## Description
**Model Name**: dazls\_model\_baseline\_model

**Author**: KTP, Alliander

**Model type**: Energy splitting model

**Model Architecture**: LinearRegression

**Date**: 2024-04-26

## Intended use
This is a DAZLs model aimed at determining the energy splits for substations.
Each of these splits are determined based on a set of features that are available in production,
and in this case have their origin in the Dutch energy grid.********************
Binary file not shown.

This file was deleted.

Binary file not shown.

This file was deleted.

Binary file not shown.

This file was deleted.

Binary file not shown.

This file was deleted.

This file was deleted.

This file was deleted.

Binary file not shown.

This file was deleted.

14 changes: 0 additions & 14 deletions openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md

This file was deleted.

This file was deleted.

Binary file not shown.

This file was deleted.

Binary file not shown.

This file was deleted.

135 changes: 28 additions & 107 deletions openstef/model/regressors/dazls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,65 +4,41 @@
"""This module defines the DAZL model."""
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.compose import TransformedTargetRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle


class Dazls(BaseEstimator):
"""DAZLS model.
The model carries out wind and solar power prediction for unseen target substations using training data from
other substations with known components.
This model has two sub-models:
- domain model : a model taking a set of 'input' features of a substation and make an 'initial' prediction.
Input features can be features such as: weather, geospatial, total load, etc.
These features are always directly related to the components' size in some way.
- adaptation model : a model taking a set of 'meta' features of a substation and refines the domain model's
prediction. Next to the features, it is trained on the domain model's predictions.
'Meta' features are features related to the uncertainty of the data, and include:
variance of the total load, standard deviation of the total load, etc.
Any data-driven model can be plugged and used as the base for the domain and the adaptation model.
CAUTION : 'Meta' features should be kept out of the domain model, and vice versa input features should be
kept out the adaptation model.
For a full reference, see:
Teng, S.Y., van Nooten, C. C., van Doorn, J.M., Ottenbros, A., Huijbregts, M., Jansen, J.J.
Improving Near Real-Time Predictions of Renewable Electricity Production at Substation Level (Submitted)
Args:
- BaseEstimator (object) : a base model that can be used to carry out predictions.
The model carries out wind and solar power prediction for unseen target substations using training data from other
substations with known components.
"""

model_: Pipeline

def __init__(self):
"""Initialize DAZL model."""
self.__name__ = "DAZLS"
self.domain_model_scaler = MinMaxScaler(clip=True)
self.adaptation_model_scaler = MinMaxScaler(clip=True)
self.target_scaler = MinMaxScaler(clip=True)
self.domain_model = KNeighborsRegressor(n_neighbors=20, weights="uniform")
self.adaptation_model = KNeighborsRegressor(n_neighbors=20, weights="uniform")

regressor = TransformedTargetRegressor(
regressor=LinearRegression(),
transformer=MinMaxScaler(clip=True),
)

self.model_ = Pipeline(
[("scaler", MinMaxScaler(clip=True)), ("regressor", regressor)]
)

# The input columns for the domain and adaptation models (with description)
self.domain_model_input_columns = [
self.baseline_input_columns = [
"radiation", # Weather parameter
"windspeed_100m", # Weather parameter
"total_substation", # Substation's measured total load
"lat", # Latitude
"lon", # Longitude
"hour", # Hour of the day
"minute", # Minute of the hour
]
self.adaptation_model_input_columns = [
"var_total", # Variance of the total load
"sem_total", # Standard Error of the Mean of the total load
"total_load",
]
self.target_columns = ["total_wind_part", "total_solar_part"]

Expand All @@ -76,34 +52,15 @@ def fit(self, features, target):
Args:
features: inputs for domain and adaptation model (domain_model_input, adaptation_model_input)
target: the expected output (y_train)
"""
x, x2, y = (
features.loc[:, self.domain_model_input_columns],
features.loc[:, self.adaptation_model_input_columns],
x, y = (
features.loc[:, self.baseline_input_columns],
target.loc[:, self.target_columns],
)
domain_model_input, adaptation_model_input, y_train = shuffle(
x, x2, y, random_state=999
) # just shuffling

self.domain_model_scaler.fit(domain_model_input)
self.adaptation_model_scaler.fit(adaptation_model_input)
self.target_scaler.fit(y_train)
domain_model_input = self.domain_model_scaler.transform(domain_model_input)
adaptation_model_input = self.adaptation_model_scaler.transform(
adaptation_model_input
)
y_train = self.target_scaler.transform(y_train)

self.domain_model.fit(domain_model_input, y_train)
domain_model_pred = self.domain_model.predict(domain_model_input)
adaptation_model_input = np.concatenate(
(adaptation_model_input, domain_model_pred), axis=1
)
self.adaptation_model.fit(adaptation_model_input, y_train)
self.model_.fit(x, y)

def predict(self, x: np.array, return_sub_preds: bool = False):
def predict(self, x: np.array):
"""Make a prediction.
For the prediction we use the test data x. We use domain_model_input_columns and
Expand All @@ -119,41 +76,10 @@ def predict(self, x: np.array, return_sub_preds: bool = False):
Returns:
prediction: The output prediction after both models.
"""
domain_model_test_data, adaptation_model_test_data = (
x.loc[:, self.domain_model_input_columns],
x.loc[:, self.adaptation_model_input_columns],
)
# Rescale test data for both models (if required)
domain_model_test_data_scaled = self.domain_model_scaler.transform(
domain_model_test_data
)
adaptation_model_test_data_scaled = self.adaptation_model_scaler.transform(
adaptation_model_test_data
)
# Use the scaled data to make domain_model_prediction
domain_model_test_data_pred = self.domain_model.predict(
domain_model_test_data_scaled
)
# Use the domain_model_prediction to make adaptation_model_prediction
adaptation_model_test_data_pred = self.adaptation_model.predict(
np.concatenate(
[adaptation_model_test_data_scaled, domain_model_test_data_pred], axis=1
)
)
# Rescale adaptation_model_prediction (if required)
prediction = self.target_scaler.inverse_transform(
adaptation_model_test_data_pred
)
model_test_data = x.loc[:, self.baseline_input_columns]

if return_sub_preds:
prediction_domain = self.target_scaler.inverse_transform(
domain_model_test_data_pred
)
return prediction, prediction_domain
else:
return prediction
return self.model_.predict(model_test_data)

def score(self, truth, prediction):
"""Evaluation of the prediction's output.
Expand All @@ -164,7 +90,6 @@ def score(self, truth, prediction):
Returns:
RMSE and R2 scores
"""
rmse = (mean_squared_error(truth, prediction)) ** 0.5
r2_score_value = r2_score(truth, prediction)
Expand All @@ -175,17 +100,13 @@ def __str__(self):
Returns:
Summary represented by a string
"""
summary_str = (
f"{self.__name__} model summary:\n\n"
f"Domain Model: {self.domain_model} \n"
f"\tInput columns: {self.domain_model_input_columns} \n"
f"\tScaler: {self.domain_model_scaler} \n\n"
f"Adaptation Model: {self.adaptation_model} \n"
f"\tInput columns: {self.adaptation_model_input_columns} \n"
f"\tScaler: {self.adaptation_model_scaler} \n\n"
f"Target columns: {self.target_columns}"
f"Model: {self.model_} \n"
f"\tInput columns: {self.baseline_input_columns} \n"
f"\tScaler: {self.model_['scaler']} \n\n"
f"\tRegressor: {self.model_['regressor']} \n\n"
)

return summary_str
21 changes: 2 additions & 19 deletions openstef/pipeline/create_component_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

# Set the path for the Dazls stored model
DAZLS_STORED = str(
PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.7" / "dazls_stored_3.4.7_"
PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.24" / "dazls_stored_3.4.24_"
)


Expand Down Expand Up @@ -113,24 +113,7 @@ def create_components_forecast_pipeline(
# Save and load the model as .sav file (or as .z file)
# For the code contact: [email protected]
dazls_model = Dazls()
dazls_model.domain_model = joblib.load(DAZLS_STORED + "domain_model.z")
dazls_model.domain_model_scaler = joblib.load(
DAZLS_STORED + "domain_model_scaler.z"
)
dazls_model.domain_model_input_columns = joblib.load(
DAZLS_STORED + "domain_model_features.z"
)

dazls_model.adaptation_model = joblib.load(DAZLS_STORED + "adaptation_model.z")
dazls_model.adaptation_model_scaler = joblib.load(
DAZLS_STORED + "adaptation_model_scaler.z"
)
dazls_model.adaptation_model_input_columns = joblib.load(
DAZLS_STORED + "adaptation_model_features.z"
)

dazls_model.target_columns = joblib.load(DAZLS_STORED + "target.z")
dazls_model.target_scaler = joblib.load(DAZLS_STORED + "target_scaler.z")
dazls_model.model_ = joblib.load(DAZLS_STORED + "baseline_model.z")

logger.info("DAZLS model loaded", dazls_model=str(dazls_model))

Expand Down
27 changes: 2 additions & 25 deletions test/unit/pipeline/test_create_component_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,36 +28,13 @@ def test_load_dazls_model(self):
Assert that loading the old model generates an exception and the new model does not
"""

old_model_file = (
PROJECT_ROOT / "openstef/data/dazls_model_3.2.49/dazls_stored_3.2.49.sav"
)
new_model_file = str(
PROJECT_ROOT / "openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_"
PROJECT_ROOT / "openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_"
)

self.assertRaises(Exception, joblib.load, old_model_file)
dazls_model = Dazls()

dazls_model.domain_model = joblib.load(new_model_file + "domain_model.z")
dazls_model.domain_model_scaler = joblib.load(
new_model_file + "domain_model_scaler.z"
)
dazls_model.domain_model_input_columns = joblib.load(
new_model_file + "domain_model_features.z"
)

dazls_model.adaptation_model = joblib.load(
new_model_file + "adaptation_model.z"
)
dazls_model.adaptation_model_scaler = joblib.load(
new_model_file + "adaptation_model_scaler.z"
)
dazls_model.adaptation_model_input_columns = joblib.load(
new_model_file + "adaptation_model_features.z"
)

dazls_model.target_columns = joblib.load(new_model_file + "target.z")
dazls_model.target_scaler = joblib.load(new_model_file + "target_scaler.z")
dazls_model.model_ = joblib.load(new_model_file + "baseline_model.z")

assert dazls_model

Expand Down

0 comments on commit 4ecc905

Please sign in to comment.