Merge pull request #83 from GeoscienceAustralia/validation_updates

Publish updates to validation code and STAC notebook
GeoscienceAustralia · Apr 12, 2024 · 331aac1 · 331aac1
2 parents d5e8cc6 + b15d94c
commit 331aac1
Show file tree

Hide file tree

Showing 12 changed files with 1,838 additions and 3,382 deletions.
diff --git a/README.md b/README.md
@@ -20,9 +20,9 @@
 
 ---
 
-The DEA Intertidal product suite maps the changing extent, elevation and topography of Australia's exposed intertidal zone. It is the next generation of DEA's intertidal products that have been used across government and industry to help better characterise and understand this complex zone that defines the interface between land and sea.
+The DEA Intertidal product suite maps the changing elevation, exposure and tidal characteristics of Australia's exposed intertidal zone, the complex zone that defines the interface between land and sea. It is the next generation of DEA's intertidal products that have been used across government and industry to help better characterise and understand this complex zone that defines the interface between land and sea.
 
-Incorporating both Sentinel-2 and Landsat data, the product suite adds a temporal component to the elevation product for the intertidal zone, enabling users to better monitor and understand some of the most dynamic regions of Australia's coastlines. With an improved tidal modelling capability, the product suite has been expanded to include a continental scale mapping of intertidal exposure over time, enabling scientists and managers to integrate the data into ecological and migratory species applications and modelling.  
+Incorporating both Sentinel-2 and Landsat data, the product suite provides an annual 10 m resolution elevation product for the intertidal zone, enabling users to better monitor and understand some of the most dynamic regions of Australia's coastlines. Utilising an improved tidal modelling capability, the product suite includes a continental scale mapping of intertidal exposure over time, enabling scientists and managers to integrate the data into ecological and migratory species applications and modelling.
 
 ## Repository structure
 

diff --git a/intertidal/utils.py b/intertidal/utils.py
@@ -134,7 +134,7 @@ def export_intertidal_rasters(
 
 def intertidal_hillshade(
     elevation,
-    extents,
+    freq,
     azdeg=315,
     altdeg=45,
     dyx=10,
@@ -143,14 +143,14 @@ def intertidal_hillshade(
 ):
     """
     Create a hillshade array for an intertidal zone given an elevation
-    array and an extents array.
+    array and a frequency array.
 
     Parameters
     ----------
     elevation : str or xr.DataArray
-        An xr.DataArray or a path to the elevation raster file.
-    extents : str or xr.DataArray
-        xr.DataArray or a path to the extents raster file.
+        Elevation data
+    freq : str or xr.DataArray
+        NDWI frequency data
     azdeg : float, optional
         The azimuth angle of the light source, in degrees. Default is 315.
     altdeg : float, optional
@@ -174,23 +174,12 @@ def intertidal_hillshade(
     import matplotlib.pyplot as plt
     import xarray as xr
 
-    # Read data
-    if isinstance(elevation, str):
-        elevation = xr.open_rasterio(elevation).squeeze("band")
-    if isinstance(extents, str):
-        extents = xr.open_rasterio(extents).squeeze("band")
-
     # Fill upper and bottom of intertidal zone with min and max heights
     # so that hillshade can be applied across the entire raster
-    # elevation_filled = xr.where(extents == 0, elevation.min(), elevation)
-    # elevation_filled = xr.where(extents == 2, elevation.max(), elevation_filled)
-    elevation_filled = xr.where(extents == 0, elevation.max(), elevation)
-    elevation_filled = xr.where(extents == 2, elevation.min(), elevation_filled)
-    elevation_filled = xr.where(extents == 3, elevation.max(), elevation_filled)
-    elevation_filled = xr.where(extents == 4, elevation.max(), elevation_filled)
+    elev_min, elev_max = elevation.quantile([0, 1])    
+    elevation_filled = xr.where(elevation.isnull() & (freq < 50), elev_max, elevation).fillna(elev_min)
 
     from scipy.ndimage import gaussian_filter
-
     input_data = gaussian_filter(elevation_filled, sigma=1)
 
     # Create hillshade based on elevation data
@@ -207,17 +196,17 @@ def intertidal_hillshade(
 
     # Mask out non-intertidal pixels
     hillshade = np.where(
-        np.expand_dims(extents.values == 1, axis=-1), hillshade, np.nan
+        np.expand_dims(elevation.notnull().values, axis=-1), hillshade, np.nan
     )
 
     # Create a new xarray data array from the numpy array
     hillshaded_da = xr.DataArray(
-        hillshade,
+        hillshade * 255,
         dims=["y", "x", "variables"],
         coords={
             "y": elevation.y,
             "x": elevation.x,
-            "variables": ["red", "green", "blue", "alpha"],
+            "variables": ["r", "g", "b", "a"],
         },
     )
 

diff --git a/intertidal/validation.py b/intertidal/validation.py
@@ -2,76 +2,6 @@
 from odc.algo import mask_cleanup
 
 
-def eval_metrics(x, y, round=3, all_regress=False):
-    """
-    Calculate a set of common statistical metrics
-    based on two input actual and predicted vectors.
-
-    These include:
-        - Pearson correlation
-        - Root Mean Squared Error
-        - Mean Absolute Error
-        - R-squared
-        - Bias
-        - Linear regression parameters (slope,
-          p-value, intercept, standard error)
-
-    Parameters
-    ----------
-    x : numpy.array
-        An array providing "actual" variable values
-    y : numpy.array
-        An array providing "predicted" variable values
-    round : int
-        Number of decimal places to round each metric
-        to. Defaults to 3
-    all_regress : bool
-        Whether to return linear regression p-value,
-        intercept and standard error (in addition to
-        only regression slope). Defaults to False
-
-    Returns
-    -------
-    A pandas.Series containing calculated metrics
-    """
-
-    import pandas as pd
-    from sklearn.metrics import mean_squared_error
-    from sklearn.metrics import mean_absolute_error
-    from sklearn.metrics import r2_score
-    from math import sqrt
-    from scipy import stats
-
-    # Create dataframe to drop na
-    xy_df = pd.DataFrame({"x": x, "y": y}).dropna()
-
-    # Compute linear regression
-    lin_reg = stats.linregress(x=xy_df.x, y=xy_df.y)
-
-    # Calculate statistics
-    stats_dict = {
-        "Correlation": xy_df.corr().iloc[0, 1],
-        "RMSE": sqrt(mean_squared_error(xy_df.x, xy_df.y)),
-        "MAE": mean_absolute_error(xy_df.x, xy_df.y),
-        "R-squared": lin_reg.rvalue ** 2,
-        "Bias": (xy_df.y - xy_df.x).mean(),
-        "Regression slope": lin_reg.slope,
-    }
-
-    # Additional regression params
-    if all_regress:
-        stats_dict.update(
-            {
-                "Regression p-value": lin_reg.pvalue,
-                "Regression intercept": lin_reg.intercept,
-                "Regression standard error": lin_reg.stderr,
-            }
-        )
-
-    # Return as
-    return pd.Series(stats_dict).round(round)
-
-
 def map_raster(
     ds,
     dst_nodata=np.nan,
@@ -92,7 +22,7 @@ def map_raster(
     cmap = [cmap] if not isinstance(cmap, list) else cmap
     vmin = [vmin] if not isinstance(vmin, list) else vmin
     vmax = [vmax] if not isinstance(vmax, list) else vmax
-    
+
     # Multiply out visualisation params to length of ds
     cmap = cmap * len(ds) if len(cmap) == 1 else cmap
     vmin = vmin * len(ds) if len(vmin) == 1 else vmin
@@ -139,28 +69,34 @@ def map_raster(
         display(m)
 
 
-def preprocess_validation(modelled_ds, validation_ds, clean_modelled=None, clean_validation=None):
+def preprocess_validation(
+    validation_ds, modelled_ds, uncertainty_ds, lat, hat, clean_slope=True
+):
+    # Remove zero slope areas
+    if clean_slope:
+        import xrspatial.slope
+
+        # Calculate slope then identify invalid flat areas that are
+        # highly likely to be ocean. Buffer these by 1 pixel so we
+        # remove any pixels partially obscured by ocean after
+        # reprojecting to 10 m resolution pixels.
+        validation_slope = xrspatial.slope(agg=validation_ds)
+        validation_flat = mask_cleanup(
+            validation_slope == 0, mask_filters=[("dilation", 1)]
+        )
+        validation_ds = validation_ds.where(~validation_flat)
+
+    # Identify valid intertidal pixels for comparison
+    intertidal = (validation_ds >= lat) & (validation_ds <= hat)
 
-    # Reproject to match array
-    validation_ds = validation_ds.odc.reproject(
-        modelled_ds.odc.geobox, resampling="nearest", dst_nodata=np.nan
+    # Analyse only intertidal pixels that contain valid data in both
+    valid_data = (
+        intertidal.values & modelled_ds.notnull().values & validation_ds.notnull().values
     )
 
-    # Analyse only pixels that contain valid data in both
-    modelled_nodata = modelled_ds.isnull()
-    validation_nodata = validation_ds.isnull()
-
-    # Optionally clean modelled dataset using mask_filters (e.g. `[('dilation', 1)]`)
-    if clean_modelled is not None:
-        modelled_nodata = mask_cleanup(modelled_nodata, mask_filters=clean_modelled)
-
-    # Optionally clean validation dataset using mask_filters (e.g. `[('dilation', 1)]`)
-    if clean_validation is not None:
-        validation_nodata = mask_cleanup(validation_nodata, mask_filters=clean_validation)
-
     # Export 1D modelled and validation data for valid data area
-    invalid_data = modelled_nodata | validation_nodata
-    validation_z = validation_ds.values[~invalid_data.values]
-    modelled_z = modelled_ds.values[~invalid_data.values]
+    validation_z = validation_ds.values[valid_data]
+    modelled_z = modelled_ds.values[valid_data]
+    uncertainty_z = uncertainty_ds.values[valid_data]
 
-    return validation_z, modelled_z
+    return validation_z, modelled_z, uncertainty_z