Skip to content

Commit

Permalink
DAS-2232 initial commit after PR feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
sudha-murthy committed Sep 26, 2024
1 parent e2ff61f commit 756f7c0
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 136 deletions.
15 changes: 12 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
## v1.1.0
### 2024-09-10

This version of HOSS provides support for products without CF compliance like SMAP L3
Methods added to get dimension scales from coordinate attributes and grid mapping with overrides
in the json file
This version of HOSS provides support for products that do not comply with CF standards like SMAP L3
New methods added to retrieve dimension scales from coordinate attributes and grid mappings, using
overrides specified in the hoss_config.json configuration file. `get_coordinate_variables' gets coordinate
datasets when the dimension scales are not present in the source file. The prefetch gets the coordinate
datasets during prefetch when the dimension scales are not present. `is_variable_one_dimensional' function
checks the dimensionality of the coordinate datasets. `update_dimension_variables' gets a row and column
from the 2D datasets to 1D and uses the crs attribute to get the projection of the granule to convert the
lat/lon array to projected x-y dimension scales. `get_override_projected_dimensions` provides the projected
dimension scale names after the conversion. The `get_variable_crs' also updated when the
grid mapping variable does not exist in the granule and an override is provided in an updated hoss_config.json

`get_override_projection_dimensions`

## v1.0.5
### 2024-08-19
Expand Down
217 changes: 120 additions & 97 deletions hoss/dimension_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@
from numpy.ma.core import MaskedArray
from varinfo import VariableFromDmr, VarInfoFromDmr

from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange
from hoss.exceptions import (
InvalidNamedDimension,
InvalidRequestedRange,
MissingCoordinateDataset,
MissingValidCoordinateDataset,
)
from hoss.projection_utilities import (
get_variable_crs,
get_x_y_extents_from_geographic_points,
Expand Down Expand Up @@ -59,43 +64,7 @@ def is_index_subset(message: Message) -> bool:
)


def get_override_projected_dimensions(
varinfo: VarInfoFromDmr,
override_variable_name: str,
) -> str:
"""returns the x-y projection variable names that would
match the geo coordinate names
"""
projection_variable_name = None
override_variable = varinfo.get_variable(override_variable_name)
if override_variable.is_latitude():
projection_variable_name = 'projected_y'
elif override_variable.is_longitude():
projection_variable_name = 'projected_x'
return projection_variable_name


def get_override_dimensions(
varinfo: VarInfoFromDmr,
required_variables: Set[str],
) -> set[str]:
"""Determine the dimensions that need to be "pre-fetched" from OPeNDAP
If dimensions are not available, get variables that can be
used to generate dimensions
"""
# check if coordinate variables are provided
# this should be be a configurable in hoss_config.json
try:
override_dimensions = varinfo.get_references_for_attribute(
required_variables, 'coordinates'
)
return override_dimensions
except AttributeError:
return set()


def prefetch_dimension_variables(
def get_prefetch_variables(
opendap_url: str,
varinfo: VarInfoFromDmr,
required_variables: Set[str],
Expand All @@ -109,64 +78,105 @@ def prefetch_dimension_variables(
spatial and temporal dimensions, but to support generic dimension
subsets, all required dimensions must be prefetched, along with any
associated bounds variables referred to via the "bounds" metadata
attribute.
attribute. In cases where dimension variables do not exist, coordinate
variables will be prefetched and used to calculate dimension-scale values
"""
required_dimensions = varinfo.get_required_dimensions(required_variables)
if len(required_dimensions) == 0:
required_dimensions = get_override_dimensions(varinfo, required_variables)
logger.info('coordinates: ' f'{required_dimensions}')

if not required_dimensions:
coordinate_variables = get_coordinate_variables(varinfo, required_variables)
logger.info('coordinates: ' f'{coordinate_variables}')
required_dimensions = set(coordinate_variables)
logger.info('required_dimensions: ' f'{required_dimensions}')
bounds = varinfo.get_references_for_attribute(required_dimensions, 'bounds')
logger.info('bounds: ' f'{bounds}')
required_dimensions.update(bounds)

logger.info(
'Variables being retrieved in prefetch request: '
f'{format_variable_set_string(required_dimensions)}'
)

required_dimensions_nc4 = get_opendap_nc4(
opendap_url, required_dimensions, output_dir, logger, access_token, config
)

# Create bounds variables if necessary.
add_bounds_variables(required_dimensions_nc4, required_dimensions, varinfo, logger)
return required_dimensions_nc4


def is_variable_one_dimensional(
prefetch_dataset: Dataset, dimension_variable: VariableFromDmr
) -> bool:
"""Check if a dimension variable is 1D"""
dimensions_array = prefetch_dataset[dimension_variable.full_name_path][:]
return dimensions_array.ndim == 1
def get_override_projected_dimensions(
varinfo: VarInfoFromDmr,
override_variable_name: str,
) -> str | None:
"""returns the x-y projection variable names that would
match the geo coordinate names. The `latitude` coordinate
variable name gets converted to 'projected_y' dimension scale
and the `longitude` coordinate variable name gets converted to
'projected_x'
"""
override_variable = varinfo.get_variable(override_variable_name)
if override_variable is not None:
if override_variable.is_latitude():
projected_dimension_name = 'projected_y'
elif override_variable.is_longitude():
projected_dimension_name = 'projected_x'
else:
projected_dimension_name = None
return projected_dimension_name


def get_coordinate_variables(
varinfo: VarInfoFromDmr,
requested_variables: Set[str],
) -> list[str]:
"""This method returns coordinate variables that are referenced
in the variables requested.
"""

try:
coordinate_variables_set = varinfo.get_references_for_attribute(
requested_variables, 'coordinates'
)
coordinate_variables = []
for coordinate in coordinate_variables_set:
if varinfo.get_variable(coordinate).is_latitude():
coordinate_variables.insert(0, coordinate)
elif varinfo.get_variable(coordinate).is_longitude():
coordinate_variables.insert(1, coordinate)

return coordinate_variables
except AttributeError:
return set()


def update_dimension_variables(
prefetch_dataset: Dataset,
required_dimensions: Set[str],
varinfo: VarInfoFromDmr,
) -> Dict[str, ndarray]:
"""Augment a NetCDF4 file with artificial 1D dimensions variable for each
2D dimension variable"
"""Generate artificial 1D dimensions variable for each
2D dimension or coordinate variable
For each dimension variable:
(1) Check if the dimension variable is 1D.
(2) If it is not 1D and is 2D create a dimensions array from
within the `write_1D_dimensions`
function.
(3) Then write the 1D dimensions variable to the NetCDF4 URL.
(2) If it is not 1D and is 2D get the dimension sizes
(3) Get the corner points from the coordinate variables
(4) Get the x-y max-min values
(5) Generate the x-y dimscale array and return to the calling method
"""
for dimension_name in required_dimensions:
dimension_variable = varinfo.get_variable(dimension_name)
if not is_variable_one_dimensional(prefetch_dataset, dimension_variable):
if prefetch_dataset[dimension_variable.full_name_path][:].ndim > 1:
col_size = prefetch_dataset[dimension_variable.full_name_path][:].shape[0]
row_size = prefetch_dataset[dimension_variable.full_name_path][:].shape[1]
crs = get_variable_crs(dimension_name, varinfo)
crs = get_variable_crs(dimension_name, varinfo)

geo_grid_corners = get_geo_grid_corners(
prefetch_dataset, required_dimensions, varinfo
)
geo_grid_corners = get_geo_grid_corners(
prefetch_dataset, required_dimensions, varinfo
)

x_y_extents = get_x_y_extents_from_geographic_points(geo_grid_corners, crs)

Expand All @@ -181,7 +191,7 @@ def update_dimension_variables(
# create the xy dim scales
x_dim = np.arange(x_min, x_max, x_resolution)

# ascending versus descending..should be based on the coordinate grid
# The origin is the top left. Y values are in decreasing order.
y_dim = np.arange(y_max, y_min, -y_resolution)
return {'projected_y': y_dim, 'projected_x': x_dim}

Expand All @@ -193,10 +203,11 @@ def get_geo_grid_corners(
) -> list[Tuple[float]]:
"""
This method is used to return the lat lon corners from a 2D
coordinate dataset. This does a check for values below -180
which could be fill values. Does not check if there are fill
values in the corner points to go down to the next row and col
coordinate dataset. It gets the row and column of the latitude and longitude
arrays to get the corner points. This does a check for values below -180
which could be fill values. This method does not check if there
are fill values in the corner points to go down to the next row and col
The fill values in the corner points still needs to be addressed.
"""
for dimension_name in required_dimensions:
dimension_variable = varinfo.get_variable(dimension_name)
Expand All @@ -205,20 +216,29 @@ def get_geo_grid_corners(
elif dimension_variable.is_longitude():
lon_arr = prefetch_dataset[dimension_variable.full_name_path][:]

if not lat_arr.size:
raise MissingCoordinateDataset('latitude')
if not lon_arr.size:
raise MissingCoordinateDataset('longitude')

# skip fill values when calculating min values
# topleft = minlon, maxlat
# bottomright = maxlon, minlat
top_left_row_idx = 0
top_left_col_idx = 0
lon_row = lon_arr[top_left_row_idx, :]
lon_row_valid_indices = np.where(lon_row >= -180.0)[0]
if not lon_row_valid_indices.size:
raise MissingValidCoordinateDataset('longitude')
top_left_col_idx = lon_row_valid_indices[lon_row[lon_row_valid_indices].argmin()]
minlon = lon_row[top_left_col_idx]
top_right_col_idx = lon_row_valid_indices[lon_row[lon_row_valid_indices].argmax()]
maxlon = lon_row[top_right_col_idx]

lat_col = lat_arr[:, top_right_col_idx]
lat_col_valid_indices = np.where(lat_col >= -180.0)[0]
if not lat_col_valid_indices.size:
raise MissingValidCoordinateDataset('latitude')
bottom_right_row_idx = lat_col_valid_indices[
lat_col[lat_col_valid_indices].argmin()
]
Expand Down Expand Up @@ -560,40 +580,23 @@ def add_index_range(
"""
variable = varinfo.get_variable(variable_name)
range_strings = []
if variable.dimensions == []:
override_dimensions = get_override_dimensions(varinfo, [variable_name])
if len(override_dimensions) > 0:
for override in reversed(list(override_dimensions)):
dimension = get_override_projected_dimensions(varinfo, override)
dimension_range = index_ranges.get(dimension)
if (
dimension_range is not None
and dimension_range[0] <= dimension_range[1]
):
range_strings.append(f'[{dimension_range[0]}:{dimension_range[1]}]')
else:
range_strings.append('[]')
if variable.dimensions:
range_strings = get_range_strings(variable.dimensions, index_ranges)
else:
coordinate_variables = get_coordinate_variables(varinfo, [variable_name])
if coordinate_variables:
dimensions = []
for coordinate in coordinate_variables:
dimensions.append(
get_override_projected_dimensions(varinfo, coordinate)
)
range_strings = get_range_strings(dimensions, index_ranges)
else:
# if the override is the variable
override = get_override_projected_dimensions(varinfo, variable_name)
if override is not None and override in index_ranges.keys():
for dimension in reversed(index_ranges.keys()):
dimension_range = index_ranges.get(dimension)
if (
dimension_range is not None
and dimension_range[0] <= dimension_range[1]
):
range_strings.append(
f'[{dimension_range[0]}:{dimension_range[1]}]'
)
else:
range_strings.append('[]')
else:
for dimension in variable.dimensions:
dimension_range = index_ranges.get(dimension)

if dimension_range is not None and dimension_range[0] <= dimension_range[1]:
range_strings.append(f'[{dimension_range[0]}:{dimension_range[1]}]')
dimensions = ['projected_y', 'projected_x']
if override is not None and override in dimensions:
range_strings = get_range_strings(dimensions, index_ranges)
else:
range_strings.append('[]')

Expand All @@ -604,6 +607,24 @@ def add_index_range(
return f'{variable_name}{indices_string}'


def get_range_strings(
variable_dimensions: list,
index_ranges: IndexRanges,
) -> list:
"""Calculates the index ranges for each dimension of the variable
and returns the list of index ranges
"""
range_strings = []
for dimension in variable_dimensions:
dimension_range = index_ranges.get(dimension)
if dimension_range is not None and dimension_range[0] <= dimension_range[1]:
range_strings.append(f'[{dimension_range[0]}:{dimension_range[1]}]')
else:
range_strings.append('[]')

return range_strings


def get_fill_slice(dimension: str, fill_ranges: IndexRanges) -> slice:
"""Check the dictionary of dimensions that need to be filled for the
given dimension. If present, the minimum index will be greater than the
Expand Down Expand Up @@ -713,6 +734,8 @@ def get_dimension_bounds(
"""
try:
# For pseudo-variables, `varinfo.get_variable` returns `None` and
# therefore has no `references` attribute.
bounds = varinfo.get_variable(dimension_name).references.get('bounds')
except AttributeError:
bounds = None
Expand Down
30 changes: 30 additions & 0 deletions hoss/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,36 @@ def __init__(self):
)


class MissingCoordinateDataset(CustomError):
"""This exception is raised when HOSS tries to get latitude and longitude
datasets and they are missing or empty. These datasets are referred to
in the science variables with coordinate attributes.
"""

def __init__(self, referring_variable):
super().__init__(
'MissingCoordinateDataset',
f'Coordinate: "{referring_variable}" is '
'not present in source granule file.',
)


class MissingValidCoordinateDataset(CustomError):
"""This exception is raised when HOSS tries to get latitude and longitude
datasets and they are missing or empty. These datasets are referred to
in the science variables with coordinate attributes.
"""

def __init__(self, referring_variable):
super().__init__(
'MissingValidCoordinateDataset',
f'Coordinate: "{referring_variable}" is '
'not valid in source granule file.',
)


class UnsupportedShapeFileFormat(CustomError):
"""This exception is raised when the shape file included in the input
Harmony message is not GeoJSON.
Expand Down
Loading

0 comments on commit 756f7c0

Please sign in to comment.