diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml index 41619708..7de86393 100644 --- a/.github/workflows/unit_test.yml +++ b/.github/workflows/unit_test.yml @@ -30,4 +30,4 @@ jobs: shell: bash run: | cd $GITHUB_WORKSPACE/src/pypromice - python3 -m unittest -v process/aws.py get.py tx/tx.py qc/static_qc_test.py + python3 -m unittest -v process/aws.py get.py tx/tx.py qc/persistence_test.py diff --git a/setup.py b/setup.py index ce2ca38e..84808a3d 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="pypromice", - version="1.2.1", + version="1.3.0", author="GEUS Glaciology and Climate", description="PROMICE/GC-Net data processing toolbox", long_description=long_description, diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 079a7804..49c5f619 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -11,7 +11,7 @@ import os import xarray as xr -from pypromice.qc.static_qc import apply_static_qc +from pypromice.qc.persistence import persistence_qc from pypromice.process.value_clipping import clip_values logger = logging.getLogger(__name__) @@ -62,7 +62,7 @@ def toL2( except Exception: logger.exception('Flagging and fixing failed:') if ds.attrs['format'] == 'TX': - ds = apply_static_qc(ds) # Detect and filter data points that seems to be static + ds = persistence_qc(ds) # Detect and filter data points that seems to be static T_100 = _getTempK(T_0) ds['rh_u_cor'] = correctHumidity(ds['rh_u'], ds['t_u'], diff --git a/src/pypromice/qc/static_qc.py b/src/pypromice/qc/persistence.py similarity index 84% rename from src/pypromice/qc/static_qc.py rename to src/pypromice/qc/persistence.py index 6f9eb5ef..d2ea5ef3 100644 --- a/src/pypromice/qc/static_qc.py +++ b/src/pypromice/qc/persistence.py @@ -6,9 +6,9 @@ from typing import Mapping, Optional, Union __all__ = [ - "apply_static_qc", - "find_static_regions", - "count_consecutive_static_values", + "persistence_qc", + "find_persistent_regions", + "count_consecutive_persistent_values", "count_consecutive_true", ] @@ -22,12 +22,12 @@ } -def apply_static_qc( +def persistence_qc( ds: xr.Dataset, variable_thresholds: Optional[Mapping] = None, ) -> xr.Dataset: """ - Detect and filter data points that seems to be static within a certain period. + Detect and filter data points that seems to be persistent within a certain period. TODO: It could be nice to have a reference to the logger or description of the behaviour here. The AWS logger program is know to return the last successfully read value if it fails reading from the sensor. @@ -58,7 +58,7 @@ def apply_static_qc( if variable_thresholds is None: variable_thresholds = DEFAULT_VARIABLE_THRESHOLDS - logger.debug(f"Running apply_static_qc using {variable_thresholds}") + logger.debug(f"Running persistence_qc using {variable_thresholds}") for k in variable_thresholds.keys(): var_all = [ @@ -66,16 +66,16 @@ def apply_static_qc( k + "_l", k + "_i", ] # apply to upper, lower boom, and instant - max_diff = variable_thresholds[k]["max_diff"] # loading static limit + max_diff = variable_thresholds[k]["max_diff"] # loading persistent limit period = variable_thresholds[k]["period"] # loading diff period for v in var_all: if v in df: - mask = find_static_regions(df[v], period, max_diff) + mask = find_persistent_regions(df[v], period, max_diff) n_masked = mask.sum() n_samples = len(mask) logger.debug( - f"Applying static QC in {v}. Filtering {n_masked}/{n_samples} samples" + f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" ) # setting outliers to NaN df.loc[mask, v] = np.nan @@ -89,7 +89,7 @@ def apply_static_qc( return ds_out -def find_static_regions( +def find_persistent_regions( data: pd.Series, min_repeats: int, max_diff: float, @@ -97,14 +97,14 @@ def find_static_regions( """ Algorithm that ensures values can stay the same within the outliers_mask """ - consecutive_true_df = count_consecutive_static_values(data, max_diff) - static_regions = consecutive_true_df >= min_repeats + consecutive_true_df = count_consecutive_persistent_values(data, max_diff) + persistent_regions = consecutive_true_df >= min_repeats # Ignore entries which already nan in the input data - static_regions[data.isna()] = False - return static_regions + persistent_regions[data.isna()] = False + return persistent_regions -def count_consecutive_static_values( +def count_consecutive_persistent_values( data: pd.Series, max_diff: float, ) -> pd.Series: diff --git a/src/pypromice/qc/static_qc_test.py b/src/pypromice/qc/persistence_test.py similarity index 75% rename from src/pypromice/qc/static_qc_test.py rename to src/pypromice/qc/persistence_test.py index e9aac828..5cd3d928 100644 --- a/src/pypromice/qc/static_qc_test.py +++ b/src/pypromice/qc/persistence_test.py @@ -4,11 +4,11 @@ import numpy.testing import pandas as pd -from pypromice.qc.static_qc import find_static_regions +from pypromice.qc.persistence import find_persistent_regions -class StaticQATestCase(unittest.TestCase): - def test_1_hour_static(self): +class PersistenceQATestCase(unittest.TestCase): + def test_1_hour_persistent(self): self._test_1_hour_repeat(10) def test_1_hour_second_index(self): @@ -28,13 +28,13 @@ def _test_1_hour_repeat(self, index: int): expected_output = input_series.map(lambda _: False) expected_output[index + 1] = True - static_mask = find_static_regions( + persistent_mask = find_persistent_regions( input_series, min_repeats=min_repeats, max_diff=0.001 ) - pd.testing.assert_series_equal(expected_output, static_mask, check_names=False) + pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False) - def test_no_static_period(self): + def test_no_persistent_period(self): time_range = pd.date_range( start="2023-01-26", end="2023-01-27", freq="h", tz="utc", inclusive="left" ) @@ -42,13 +42,13 @@ def test_no_static_period(self): min_repeats = 1 expected_output = input_series.map(lambda _: False) - static_mask = find_static_regions( + persistent_mask = find_persistent_regions( input_series, min_repeats=min_repeats, max_diff=0.001 ) - pd.testing.assert_series_equal(expected_output, static_mask, check_names=False) + pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False) - def test_static_period_longer_than_period_threshold(self): + def test_persistent_period_longer_than_period_threshold(self): time_range = pd.date_range( start="2023-01-26", end="2023-01-28", freq="h", tz="utc", inclusive="left" ) @@ -62,13 +62,13 @@ def test_static_period_longer_than_period_threshold(self): expected_output = input_series.map(lambda _: False) expected_output[expected_filter_start:expected_filter_end] = True - static_mask = find_static_regions( + persistent_mask = find_persistent_regions( input_series, min_repeats=min_repeats, max_diff=0.001 ) - pd.testing.assert_series_equal(expected_output, static_mask, check_names=False) + pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False) - def test_period_threshold_longer_than_static_period(self): + def test_period_threshold_longer_than_persistent_period(self): time_range = pd.date_range( start="2023-01-26", end="2023-01-28", freq="h", tz="utc", inclusive="left" ) @@ -79,13 +79,13 @@ def test_period_threshold_longer_than_static_period(self): input_series[index_start:index_end] = input_series[index_start] expected_output = input_series.map(lambda _: False) - static_mask = find_static_regions( + persistent_mask = find_persistent_regions( input_series, min_repeats=min_repeats, max_diff=0.001 ) - pd.testing.assert_series_equal(expected_output, static_mask, check_names=False) + pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False) - def test_static_period_at_the_end(self): + def test_persistent_period_at_the_end(self): time_range = pd.date_range( start="2023-01-26", end="2023-01-28", freq="h", tz="utc", inclusive="left" ) @@ -97,11 +97,11 @@ def test_static_period_at_the_end(self): expected_output = input_series.map(lambda _: False) expected_output[expected_filter_start:] = True - static_mask = find_static_regions( + persistent_mask = find_persistent_regions( input_series, min_repeats=min_repeats, max_diff=0.001 ) - pd.testing.assert_series_equal(expected_output, static_mask, check_names=False) + pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False) def test_dont_filter_nan_values(self): time_range = pd.date_range( @@ -119,13 +119,13 @@ def test_dont_filter_nan_values(self): # The output mask shouldn't filter nan values. expected_output = input_series.map(lambda _: False) - static_mask = find_static_regions( + persistent_mask = find_persistent_regions( input_series, min_repeats=min_repeats, max_diff=0.001 ) - pd.testing.assert_series_equal(expected_output, static_mask, check_names=False) + pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False) - def test_series_with_nan_values_between_static_values(self): + def test_series_with_nan_values_between_persistent_values(self): time_range = pd.date_range( start="2023-01-26", end="2023-01-27", freq="h", tz="utc", inclusive="left" ) @@ -141,6 +141,10 @@ def test_series_with_nan_values_between_static_values(self): # Note: The station region mask shall not filter nan values expected_mask[16] = True - output_mask = find_static_regions(series, min_repeats=period, max_diff=0.01) + output_mask = find_persistent_regions(series, min_repeats=period, max_diff=0.01) np.testing.assert_equal(expected_mask, output_mask) + + +if __name__ == "__main__": + unittest.main()