Merge branch 'main' into summation-fresh

openclimatefix · Feb 27, 2025 · 3834ae8 · 3834ae8
2 parents 83aa98b + 76e5c8a
commit 3834ae8
Show file tree

Hide file tree

Showing 6 changed files with 110 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -33,6 +33,8 @@ The following environment variables are used in the app:
 - `USE_OCF_DATA_SAMPLER`: Option to use OCF data sampler. Defaults to true.
 - `FORECAST_VALIDATE_ZIG_ZAG_WARNING`: Threshold for warning on forecast zig-zag, defaults to 250 MW.
 - `FORECAST_VALIDATE_ZIG_ZAG_ERROR`: Threshold for error on forecast zig-zag, defaults to 500 MW.
+- `FORECAST_VALIDATION_SUN_ELEVATION_LOWER_LIMIT`: The sun elevation lower limit for forecast validation. 
+When sun elevation is above this, we expect positive forecast values
 
 ### Examples
 
@@ -90,7 +92,7 @@ After the ML models have run, we check the following
 - The forecast is not above 110% of the national capacity. An error is raised if any forecast value is above 110% of the national capacity.
 - The forecast is not above 100 GW, any forecast value above 30 GW we get a warning but any forecast value above 100 GW we raise an error. 
 - If the forecast goes up, then down, then up, more than 500 MW we raise an error. A warning is made for 250 MW. This stops zig-zag forecasts. 
-- TODO: Check positive values in day: https://github.com/openclimatefix/uk-pvnet-app/issues/200
+- Check positive values in day. If the sun is up, we expect positive values. 
 
 ## Development
 

diff --git a/src/pvnet_app/forecast_compiler.py b/src/pvnet_app/forecast_compiler.py
@@ -286,6 +286,7 @@ def compile_forecasts(self) -> None:
         national_forecast_values = da_abs_national.sel(
             output_label="forecast_mw", gsp_id=0).values
 
+        national_forecast_values = pd.Series(data=national_forecast_values, index=self.valid_times)
         validate_forecast(
             national_forecast_values=national_forecast_values,
             national_capacity=self.national_capacity,
@@ -501,4 +502,4 @@ def change_keys_to_ocf_datapipes_keys(batch):
         nwp_config = batch[BatchKey.nwp]
         for nwp_source in nwp_config.keys():
             batch[BatchKey.nwp][nwp_source][NWPBatchKey.nwp] = batch[BatchKey.nwp][nwp_source]["nwp"]
-            del batch[BatchKey.nwp][nwp_source]["nwp"]
+            del batch[BatchKey.nwp][nwp_source]["nwp"]
diff --git a/src/pvnet_app/validate_forecast.py b/src/pvnet_app/validate_forecast.py
@@ -1,9 +1,11 @@
 import os
 import numpy as np
+import pandas as pd
+import pvlib
 from collections.abc import Callable
 
 def validate_forecast(
-    national_forecast_values: np.ndarray,
+    national_forecast_values: pd.Series,
     national_capacity: float,
     logger_func: Callable[[str], None],
 ) -> None:
@@ -19,7 +21,7 @@ def validate_forecast(
         Exception: if above certain critical thresholds.
     """
     # Compute the maximum from the entire forecast array
-    max_forecast_mw = float(np.max(national_forecast_values))
+    max_forecast_mw = national_forecast_values.max()
 
     # Check it doesn't exceed 10% above national capacity
     if max_forecast_mw > 1.1 * national_capacity:
@@ -45,7 +47,7 @@ def validate_forecast(
     # Compute differences between consecutive timestamps
     zig_zag_gap_warning = float(os.getenv('FORECAST_VALIDATE_ZIG_ZAG_WARNING', 250))
     zig_zag_gap_error = float(os.getenv('FORECAST_VALIDATE_ZIG_ZAG_ERROR', 500))
-    diff = np.diff(national_forecast_values)
+    diff = national_forecast_values.diff().values
     large_jumps = \
         (diff[0:-2] > zig_zag_gap_warning) & \
         (diff[1:-1] < -zig_zag_gap_warning) & \
@@ -62,3 +64,23 @@ def validate_forecast(
     if np.any(critical_jumps):
         raise Exception(
             "FAIL: Forecast has critical fluctuations (≥500 MW up and down).")
+
+    # Set default value for sun elevation lowr limit
+    sun_elevation_lower_limit = float(os.getenv('FORECAST_VALIDATION_SUN_ELEVATION_LOWER_LIMIT', 10))
+
+    # Validate based on sun elevation > 10 degrees
+    solpos = pvlib.solarposition.get_solarposition(
+        time=national_forecast_values.index,
+        latitude=55.3781,  # UK central latitude
+        longitude=-3.4360,  # UK central longtitude
+        method='nrel_numpy'
+    )
+
+    # Check if forecast values are > 0 when sun elevation > 10 degrees
+    elevation_above_limit = solpos["elevation"] > sun_elevation_lower_limit
+
+    # Ensure the index of elevation_above_limit matches the index of national_forecast_values
+    elevation_above_limit = elevation_above_limit.reindex(national_forecast_values.index, fill_value=False)
+
+    if (national_forecast_values[elevation_above_limit] <= 0).any():
+        raise Exception(f"Forecast values must be > 0 when sun elevation > {sun_elevation_lower_limit} degree.")
diff --git a/tests/test_app.py b/tests/test_app.py
@@ -43,6 +43,7 @@ def test_app(test_t0, db_session, nwp_ukv_data, nwp_ecmwf_data, sat_5_data_zero_
         os.environ["SAVE_GSP_SUM"] = "True"
         os.environ["DAY_AHEAD_MODEL"] = "False"
         os.environ["FORECAST_VALIDATE_ZIG_ZAG_ERROR"] = "100000"
+        os.environ["FORECAST_VALIDATION_SUN_ELEVATION_LOWER_LIMIT"] = "90"
 
         # Run prediction
         # These imports need to come after the environ vars have been set
@@ -107,6 +108,7 @@ def test_app_no_sat(test_t0, db_session, nwp_ukv_data, nwp_ecmwf_data, db_url):
         os.environ["DAY_AHEAD_MODEL"] = "False"
         os.environ["USE_OCF_DATA_SAMPLER"] = "True"
         os.environ["FORECAST_VALIDATE_ZIG_ZAG_ERROR"] = "100000"
+        os.environ["FORECAST_VALIDATION_SUN_ELEVATION_LOWER_LIMIT"] = "90"
 
         # Run prediction
         # Thes import needs to come after the environ vars have been set
@@ -173,6 +175,7 @@ def test_app_day_ahead_data_sampler(test_t0, db_session, nwp_ukv_data, nwp_ecmwf
         os.environ["RUN_EXTRA_MODELS"] = "False"
         os.environ["USE_OCF_DATA_SAMPLER"] = "True"
         os.environ["FORECAST_VALIDATE_ZIG_ZAG_ERROR"] = "100000"
+        os.environ["FORECAST_VALIDATION_SUN_ELEVATION_LOWER_LIMIT"] = "90"
 
         # Import at runtime to ensure environment variables are set
         from pvnet_app.app import app

diff --git a/tests/test_app_legacy.py b/tests/test_app_legacy.py
@@ -38,6 +38,7 @@ def test_app_ecwmf_only(test_t0, db_session, nwp_ecmwf_data, db_url):
         os.environ["USE_OCF_DATA_SAMPLER"] = "False"
         os.environ["USE_ECMWF_ONLY"] = "True"
         os.environ["FORECAST_VALIDATE_ZIG_ZAG_ERROR"] = "100000"
+        os.environ["FORECAST_VALIDATION_SUN_ELEVATION_LOWER_LIMIT"] = "90"
 
         # Run prediction
         # Thes import needs to come after the environ vars have been set
@@ -106,6 +107,7 @@ def test_app(test_t0, db_session, nwp_ukv_data, nwp_ecmwf_data, sat_5_data, db_u
         os.environ["USE_OCF_DATA_SAMPLER"] = "False"
         os.environ["USE_ECMWF_ONLY"] = "False"
         os.environ["FORECAST_VALIDATE_ZIG_ZAG_ERROR"] = "100000"
+        os.environ["FORECAST_VALIDATION_SUN_ELEVATION_LOWER_LIMIT"] = "90"
 
         # Run prediction
         # Thes import needs to come after the environ vars have been set
@@ -173,6 +175,7 @@ def test_app_day_ahead_model(test_t0, db_session, nwp_ukv_data, nwp_ecmwf_data,
         os.environ["RUN_EXTRA_MODELS"] = "False"
         os.environ["USE_OCF_DATA_SAMPLER"] = "False"
         os.environ["FORECAST_VALIDATE_ZIG_ZAG_ERROR"] = "100000"
+        os.environ["FORECAST_VALIDATION_SUN_ELEVATION_LOWER_LIMIT"] = "90"
 
         # Run prediction
         # Thes import needs to come after the environ vars have been set

diff --git a/tests/test_validate_forecast.py b/tests/test_validate_forecast.py
@@ -1,6 +1,8 @@
 import logging
 
 import numpy as np
+import pandas as pd
+import pvlib
 import os
 import pytest
 
@@ -15,10 +17,13 @@ def test_validate_forecast_ok():
     # Ccapture log messages in a list so assertions can be done on them if needed
     logs = []
 
-    def dummy_logger(msg: str): logs.append(msg)
+    def dummy_logger(msg: str):
+        logs.append(msg)
 
     # Forecast is significantly below capacity => no warnings or errors
-    national_forecast_values = np.array([10, 20, 30])  # MW
+    national_forecast_values = pd.Series(
+        [10, 20, 30], index=pd.date_range("2025-01-01", "2025-01-01 01:00", 3)
+    )  # MW
     national_capacity = 50  # MW
 
     validate_forecast(
@@ -36,10 +41,15 @@ def test_validate_forecast_above_110percent_raises():
     Test that validate_forecast raises an Exception when the maximum
     forecast value exceeds 110% of capacity.
     """
+
+    national_forecast_values = pd.Series(
+        [60], index=pd.to_datetime(["2025-01-01 00:00"])
+    )  # MW
+
     # 60 MW > 1.1 * 50 MW => should raise an Exception
     with pytest.raises(Exception) as excinfo:
         validate_forecast(
-            national_forecast_values=np.array([60]),
+            national_forecast_values=national_forecast_values,
             national_capacity=50,
             logger_func=lambda x: None,  # We don't care about logs here
         )
@@ -51,10 +61,15 @@ def test_validate_forecast_warns_when_over_30gw(caplog):
     Test that validate_forecast warns if the forecast exceeds 30 GW (30,000 MW).
     We'll use pytest's 'caplog' fixture to check for the warning message.
     """
+
+    national_forecast_values = pd.Series(
+        [31_000], index=pd.date_range("2025-01-01", "2025-01-01 01:00", 1)
+    )  # MW
+
     # 31,000 MW is above 30 GW => Should generate a warning log
     with caplog.at_level(logging.INFO):
         validate_forecast(
-            national_forecast_values=np.array([31_000]),
+            national_forecast_values=national_forecast_values,
             national_capacity=100_000,
             logger_func=logging.info,
         )
@@ -66,15 +81,18 @@ def test_validate_forecast_above_100_gw_raises():
     """
     Test that validate_forecast raises an Exception if forecast is above 100 GW.
     """
+    national_forecast_values = pd.Series(
+        [101_000], index=[pd.date_range("2025-01-01", "2025-01-01 01:00", 1)]
+    )  # MW
+
     # 101,000 MW is above 100 GW => Should raise an Exception
     with pytest.raises(Exception) as excinfo:
         validate_forecast(
-            national_forecast_values=np.array([101_000]),
+            national_forecast_values=national_forecast_values,
             national_capacity=200_000,
             logger_func=lambda x: None,
         )
-    assert "Hard FAIL: The maximum of the forecast is above 100 GW!" in str(
-        excinfo.value)
+    assert "Hard FAIL: The maximum of the forecast is above 100 GW!" in str(excinfo.value)
 
 
 def test_validate_forecast_no_fluctuations():
@@ -85,43 +103,83 @@ def logger_func(message):
         logged_messages.append(message)
 
     os.environ["FORECAST_VALIDATE_ZIG_ZAG_ERROR"] = "500"
-    national_forecast_values = np.array([1000, 1100, 1050, 1200, 1150])
     national_capacity = 2000
+    national_forecast_values = pd.Series(
+        [1000, 1100, 1050, 1200, 1150], index=pd.date_range(start="2025-01-01 00:00",  periods=5, freq="30min")
+    )  # MW
 
     # No warnings or exceptions expected
     validate_forecast(national_forecast_values, national_capacity, logger_func)
 
     assert not logged_messages, "Unexpected warnings logged!"
 
 
-def test_validate_forecast_with_warning():
+def test_validate_forecast_zig_zag_with_warning():
     """Test case where a warning should be logged due to fluctuations ≥250 MW up and down."""
     logged_messages = []
 
     def logger_func(message):
         logged_messages.append(message)
 
     os.environ["FORECAST_VALIDATE_ZIG_ZAG_ERROR"] = "500"
-    national_forecast_values = np.array([1000, 1300, 800, 1200, 500])
     national_capacity = 2000
+    national_forecast_values = pd.Series(
+        [1000, 1300, 800, 1200, 500], index=pd.date_range("2025-01-01", "2025-01-01 01:00", 5)
+    )  # MW
 
     validate_forecast(national_forecast_values, national_capacity, logger_func)
 
-    assert any("WARNING: Forecast has sudden fluctuations" in msg for msg in logged_messages), \
-        "Expected warning not found!"
+    assert any(
+        "WARNING: Forecast has sudden fluctuations" in msg for msg in logged_messages
+    ), "Expected warning not found!"
 
 
-def test_validate_forecast_with_exception():
+def test_validate_forecast_zig_zag_with_exception():
     """Test case where an exception should be raised due to critical fluctuations ≥500 MW up and down."""
     logged_messages = []
 
     def logger_func(message):
         logged_messages.append(message)
 
     os.environ["FORECAST_VALIDATE_ZIG_ZAG_ERROR"] = "500"
-    national_forecast_values = np.array([1000, 1600, 800, 1301, 500])
     national_capacity = 2000
+    national_forecast_values = pd.Series(
+        [1000, 1600, 800, 1301, 500], index=pd.date_range("2025-01-01", "2025-01-01 01:00", 5)
+    )  # MW
 
     with pytest.raises(Exception, match="FAIL: Forecast has critical fluctuations"):
-        validate_forecast(national_forecast_values,
-                          national_capacity, logger_func)
+        validate_forecast(national_forecast_values, national_capacity, logger_func)
+
+
+def test_validate_forecast_sun_elevation_check():
+    """
+    Test that validate_forecast raises an Exception when forecast values
+    are ≤ 0 while sun elevation is above SUN_ELEVATION_LOWER_LIMIT.
+    """
+    # Set environment variable for sun elevation threshold
+    os.environ["FORECAST_VALIDATION_SUN_ELEVATION_LOWER_LIMIT"] = "10"
+    sun_elevation_lower_limit = float(os.getenv("SUN_ELEVATION_LOWER_LIMIT", 10))
+
+    # Create a time range for the test
+    time_range = pd.date_range("2025-01-01 06:00", "2025-01-01 18:00", freq="30min", tz="UTC")
+
+    # Create forecast values (some values are ≤ 0 to trigger the exception)
+    forecast_values = pd.Series(
+        [0, 50, 100, -1, 75] * 5,
+        index=time_range,
+    )
+
+    with pytest.raises(Exception) as excinfo:
+        validate_forecast(
+            national_forecast_values=forecast_values,
+            national_capacity=1000,
+            logger_func=lambda x: None,  # Don't check logs here
+        )
+
+    # Ensure the exception message contains the correct string (with the sun elevation limit)
+    expected_message = (
+        f"Forecast values must be > 0 when sun elevation > {sun_elevation_lower_limit}"
+    )
+    assert expected_message in str(
+        excinfo.value
+    ), f"Expected message not found! Got: {str(excinfo.value)}"