Merge branch 'develop' into main

OpenSTEF · Mar 5, 2021 · e64ad41 · e64ad41
2 parents 436b6c3 + 3d0a122
commit e64ad41
Show file tree

Hide file tree

Showing 17 changed files with 355 additions and 68 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,13 +1,8 @@
-<!--
-SPDX-FileCopyrightText: 2017-2021 Alliander N.V. <[email protected]>
-
-SPDX-License-Identifier: MPL-2.0
--->
 ---
 name: Bug report
 about: Create a report to help us improve
 title: "[BUG] *descriptive_name*"
-labels: ''
+labels: 'bug'
 assignees: ''
 
 ---

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md.license b/.github/ISSUE_TEMPLATE/bug_report.md.license
@@ -0,0 +1,3 @@
+SPDX-FileCopyrightText: 2017-2021 Alliander N.V. <[email protected]>
+
+SPDX-License-Identifier: MPL-2.0
diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md
@@ -1,8 +1,3 @@
-<!--
-SPDX-FileCopyrightText: 2017-2021 Alliander N.V. <[email protected]>
-
-SPDX-License-Identifier: MPL-2.0
--->
 ---
 name: Documentation request
 about: Suggest documentation for this project

diff --git a/.github/ISSUE_TEMPLATE/documentation.md.license b/.github/ISSUE_TEMPLATE/documentation.md.license
@@ -0,0 +1,3 @@
+SPDX-FileCopyrightText: 2017-2021 Alliander N.V. <[email protected]>
+
+SPDX-License-Identifier: MPL-2.0
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,14 +1,8 @@
-<!--
-SPDX-FileCopyrightText: 2017-2021 Alliander N.V. <[email protected]>
-
-SPDX-License-Identifier: MPL-2.0
--->
-
 ---
 name: Feature request
 about: Suggest an idea for this project
 title: "[Feature] "
-labels: ''
+labels: 'feature'
 assignees: ''
 
 ---

diff --git a/.github/ISSUE_TEMPLATE/feature_request.md.license b/.github/ISSUE_TEMPLATE/feature_request.md.license
@@ -0,0 +1,3 @@
+SPDX-FileCopyrightText: 2017-2021 Alliander N.V. <[email protected]>
+
+SPDX-License-Identifier: MPL-2.0
diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
@@ -0,0 +1,17 @@
+**Fixes issue**: `# name and number of the issue`
+
+### Changes proposed in this PR include:
+
+> Here you can elaborate on the chosen solution strategy, which changes did you make and which goal do they serve. Perhaps also which things are you still unsure of.
+
+-
+-
+- ..
+
+### Could you please pay extra attention to the points below when reviewing the PR:
+
+> Here you can point out modules or complex implementation that require special attention, .e.g have a look at module `foo.py` and `bar.py`.
+
+- 
+- 
+- ..
diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md.license b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md.license
@@ -0,0 +1,3 @@
+SPDX-FileCopyrightText: 2017-2021 Alliander N.V. <[email protected]>
+
+SPDX-License-Identifier: MPL-2.0
diff --git a/.github/workflows/docs-check.yaml b/.github/workflows/docs-check.yaml
@@ -3,8 +3,16 @@
 # SPDX-License-Identifier: MPL-2.0
 name: Docs Check
 
+# Check docs when directly editing develop or main
+# and on pull request
 on:
-- pull_request
+  push:
+    branches:
+      - develop
+      - main
+  pull_request:
+    branches:
+      - "**"
 
 jobs:
   docs:
@@ -17,5 +25,8 @@ jobs:
     - name: Build documentation
       uses: ammaraskar/sphinx-action@master
       with:
-        pre-build-command: "cp requirements.txt docs/requirements.txt | sphinx-apidoc -o docs openstf"
+        pre-build-command: |
+          cp requirements.txt docs/requirements.txt
+          echo "sphinx_rtd_theme" >> docs/requirements.txt
+          sphinx-apidoc -o docs openstf
         docs-folder: "docs/"
diff --git a/.github/workflows/docs-publish.yaml b/.github/workflows/docs-publish.yaml
@@ -19,7 +19,10 @@ jobs:
     - name: Build documentation
       uses: ammaraskar/sphinx-action@master
       with:
-        pre-build-command: "cp requirements.txt docs/requirements.txt |sphinx-apidoc -o docs openstf"
+        pre-build-command: |
+          cp requirements.txt docs/requirements.txt
+          echo "sphinx_rtd_theme" >> docs/requirements.txt
+          sphinx-apidoc -o docs openstf
         docs-folder: "docs/"
     # Upload artifact so it is available from the action-window
     - name: Upload artifact

diff --git a/README.md b/README.md
@@ -48,7 +48,7 @@ python -m openstf task <task_name>
 This project is licensed under the Mozilla Public License, version 2.0 - see LICENSE for details.
 
 ## Licenses third-party libraries
-This project includes third-party libraries, which are licensed under their own respective Open-Source licenses. SPDX-License-Identifier headers are used to show which license is applicable. The concerning license files can be found in the LINCESES directory.
+This project includes third-party libraries, which are licensed under their own respective Open-Source licenses. SPDX-License-Identifier headers are used to show which license is applicable. The concerning license files can be found in the LICENSES directory.
 
 ## Contributing
 

diff --git a/openstf/model/split_energy.py b/openstf/model/split_energy.py
@@ -12,9 +12,12 @@
 
 import openstf.monitoring.teams as monitoring
 
+COEF_MAX_FRACTION_DIFF = 0.3
+
 
 def split_energy(pid):
-    """Function that caries out the energy splitting for a specific prediction job with id pid
+    """Function that caries out the energy splitting for a specific prediction job with
+    id pid.
 
     Args:
         pid (int): Prediction job id
@@ -42,23 +45,79 @@ def split_energy(pid):
     error = components[["load", "Inschatting"]].diff(axis=1).iloc[:, 1]
     mae = error.abs().mean()
     coefdict.update({"MAE": mae})
+    coefsdf = convert_coefdict_to_coefsdf(pj, input_split_function, coefdict)
+
+    # Get the coefs of previous runs and check if new coefs are valid
+    last_coefsdict = db.get_energy_split_coefs(pj)
+    last_coefsdf = convert_coefdict_to_coefsdf(pj, input_split_function, last_coefsdict)
+    invalid_coefs = determine_invalid_coefs(coefsdf, last_coefsdf)
+    if not invalid_coefs.empty:
+        # If coefs not valid, do not update the coefs in the db and send teams
+        # message that something strange is happening
+        monitoring.post_teams_alert(
+            f"New splitting coefficient(s) for pid **{pj['id']}** deviate strongly "
+            f"from previously stored coefficients.",
+            invalid_coefs=invalid_coefs,
+            coefsdf=coefsdf,
+        )
+        # Use the last known coefficients for further processing
+        return last_coefsdf
+    else:
+        # Save Results
+        db.write_energy_splitting_coefficients(coefsdf, if_exists="append")
+        logger.info(
+            "Succesfully wrote energy split coefficients to database", pid=pj["id"]
+        )
+        return coefsdf
+
+
+def determine_invalid_coefs(new_coefs, last_coefs):
+    """Determine which new coefficients are valid and return them.
+
+    Args:
+        new_coefs (pd.DataFrame): df of new coefficients for standard load
+            profiles (i.e. wind, solar, household)
+        last_coefs (pd.DataFrame): df of last coefficients for standard load
+            profiles (i.e. wind, solar, household)
 
-    # Get average coefs of previous runs
-    mean_coefs = db.get_energy_split_coefs(pj, mean=True)
-    # Loop over keys and check if the difference with the average value is not more than 100%
-    # In case the difference is more tha 100% of the average set KPI coefs as expected to False
-    # If no previous coefs are stored an mean_coefs is empty and this loop wil not run
-    for key in mean_coefs.keys():
-        diff = mean_coefs[key] - coefdict[key]
-        if diff > mean_coefs[key]:
-            # Send teams message something strange is happening
-            monitoring.post_teams_alert(
-                "New splitting coefficients for pid {} deviate strongly from previously stored coefficients".format(
-                    pj["id"]
-                )
-            )
-
-    # Prepare dataframe to store in SQL database
+    Returns:
+        pd.DataFrame: df of invalid coefficients
+    """
+    merged_coefs = pd.merge(
+        last_coefs, new_coefs, on="coef_name", how="left", suffixes=["_last", "_new"]
+    )
+    # calculate difference between new and last coefficients, if no new
+    # coefficient, set difference to inf
+    # If coefficient name is not present in new coefficients list, fail. If coefficient
+    # name is not present in last coefficients list, add it.
+    merged_coefs["difference"] = (
+        (merged_coefs.coef_value_last - merged_coefs.coef_value_new)
+        .abs()
+        .fillna(np.inf)
+    )
+    # Check if the absolute difference between last coefficients and new coefficients
+    # is more than COEF_MAX_FRACTION_DIFF x absolute value of last coefficient
+    invalid_coefs = merged_coefs[
+        merged_coefs.difference
+        > (COEF_MAX_FRACTION_DIFF * merged_coefs.coef_value_last).abs()
+    ]
+    return invalid_coefs
+
+
+def convert_coefdict_to_coefsdf(pj, input_split_function, coefdict):
+    """Convert dictionary of coefficients to dataframe with additional data for db
+    storage.
+
+    Args:
+        pj (PredictionJob): prediction job
+        input_split_function (pd.DataFrame): df of columns of standard load profiles,
+            i.e. wind, solar, household
+        coefdict (dict): dict of coefficient per standard load profile
+
+    Returns:
+        pd.DataFrame: df of coefficients to insert in sql
+    """
+    #
     sql_column_labels = ["pid", "date_start", "date_end", "created"]
     sql_colum_values = [
         pj["id"],
@@ -72,26 +131,22 @@ def split_energy(pid):
     for i, column in enumerate(sql_column_labels):
         coefsdf[column] = sql_colum_values[i]
 
-    # Save Results
-    db.write_energy_splitting_coefficients(coefsdf, if_exists="append")
-    logger.info("Succesfully wrote energy split coefficients to database", pid=pj["id"])
     return coefsdf
 
 
 def find_components(df, zero_bound=True):
     """Function that does the actual energy splitting
 
     Args:
-        df: Pandas data frame with input data. The dataframe should contain these
-            columns in exactly this order: [load, wind_ref, pv_ref, mulitple tdcv colums]
-        zerobound: Tells us wheter coefficients can be negative, true if this cannot be
-            the case.
+        df (pandas.DataFrame): Input data. The dataframe should contain these columns
+            in exactly this order: [load, wind_ref, pv_ref, mulitple tdcv colums]
+        zero_bound (bool): If zero_bound is True coefficients can't be negative.
 
     Returns:
         tuple:
-            [0] components: pandas dataframe containing the wind and solar components
-            [0] pandas.DataFrame:
-            [1] coefs: dict containing the coefficients that result from the fitting"""
+            [0] pandas.DataFrame: Containing the wind and solar components
+            [1] dict: The coefficients that result from the fitting
+    """
 
     # Define function to fit
     def weighted_sum(x, *args):
@@ -119,10 +174,11 @@ def weighted_sum(x, *args):
         bounds = ("-inf", "inf")
 
     # Carry out fitting
+    # See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html # noqa
     coefs, cov = scipy.optimize.curve_fit(
         weighted_sum,
-        df.iloc[:, 1:].values.T,
-        load.values,
+        xdata=df.iloc[:, 1:].values.T,
+        ydata=load.values,
         p0=p0,
         bounds=bounds,
         method="trf",

diff --git a/openstf/monitoring/teams.py b/openstf/monitoring/teams.py
@@ -80,29 +80,96 @@ def post_teams(msg, url=None):
     card.send()
 
 
-def post_teams_alert(msg, url=None):
+def post_teams_alert(msg, invalid_coefs=None, coefsdf=None, url=None):
     """Same as post_teams, but posts to alert channel.
 
     Args:
-    msg (mixed): For simple messages a string can be passed. For more
+        msg (mixed): For simple messages a string can be passed. For more
             complex messages pass a dict. The following keys are supported:
             text, links, sections. Each section is a dict and can contain the
             following keys: text, title, images, facts, markdown. Also see:
             https://docs.microsoft.com/en-us/outlook/actionable-messages/send-via-connectors
-
+        invalid_coefs (pd.DatFrame, optional): df of information of invalid
+            coefficients. Defaults to None.
+        coefsdf (pd.DataFrame, optional): df of new coefficients. Defaults to None.
     Note:
         This function is namespace-specific.
-
     """
     config = ConfigManager.get_instance()
-
+    # Add invalid coefficients and manual coefficients-query to message
+    if invalid_coefs is not None and coefsdf is not None:
+        # add invalid coefficient information to message in dict-format
+        invalid_coefs_text = "".join(
+            [
+                f"\n* **{row.coef_name}**: {round(row.coef_value_new, 2)}, "
+                f"(previous: {round(row.coef_value_last, 2)})"
+                for index, row in invalid_coefs.iterrows()
+            ]
+        )
+        query = build_sql_query_string(coefsdf, "energy_split_coefs")
+        query_text = "If you would like to update the coefficients manually in the "
+        f"database, use this query:"
+        msg = {
+            "fallback": msg,
+            "title": "Invalid energy splitting coefficients",
+            "sections": [
+                {
+                    "text": msg,
+                    "markdown": True,
+                },
+                {
+                    "text": invalid_coefs_text,
+                    "markdown": True,
+                },
+                {
+                    "title": "Manual query",
+                    "text": query_text,
+                    "markdown": True,
+                },
+                {
+                    "text": query,
+                    "markdown": True,
+                },
+            ],
+        }
     if url is None:
         if hasattr(config, "teams") is True:
             url = config.teams.alert_url
 
     return post_teams(msg, url=url)
 
 
+def build_sql_query_string(df, table):
+    """Build sql insert query string for Teams message output from df.
+
+    Args:
+        df (pd.DataFrame): df of table values to insert in sql
+        table (string): table to insert df into
+
+    Returns:
+        string: sql query string of insert statement
+    """
+    # round all values to two decimals
+    df = df.round(2)
+    # convert datetime to string format
+    datetime_columns = df.columns[
+        df.columns.isin(["date_start", "date_end", "created"])
+    ]
+    for col in datetime_columns:
+        df[col] = df[col].astype("str")
+
+    sql_texts = [
+        "```INSERT INTO " + table + " (" + str(", ".join(df.columns)) + ") VALUES  \n"
+    ]
+    for index, row in df.iterrows():
+        if index != df.index[0]:
+            sql_texts.append(",  \n")  # 2 spaces and \n create a new line
+        sql_texts.append(str(tuple(row.values)))
+    sql_texts.append("```")
+    query = "".join(sql_texts)
+    return query
+
+
 def send_report_teams_better(pj, feature_importance):
     """Send a report to teams for monitoring input for an improved model.
 

diff --git a/requirements.txt b/requirements.txt
@@ -20,9 +20,3 @@ setuptools~=45.2.0
 sklearn~=0.0
 wheel~=0.36.2
 xgboost~=1.3.3
-lightgbm~=3.1.1
-
-# for ddocumentation
-sphinx
-sphinx_rtd_theme
-
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		SPDX-FileCopyrightText: 2017-2021 Alliander N.V. <[email protected]>

		SPDX-License-Identifier: MPL-2.0