Skip to content

Commit

Permalink
DAS-2034 - Remove custom retry logic from HOSS in favour of harmony-s…
Browse files Browse the repository at this point in the history
…ervice-lib.
  • Loading branch information
owenlittlejohns authored Dec 20, 2023
1 parent 036f6ad commit 3ba073f
Show file tree
Hide file tree
Showing 9 changed files with 52 additions and 139 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## v1.0.1
### 2023-12-19

This version of HOSS removes custom, redundant download retry logic. Instead
retries are relied upon from `harmony-service-lib` and for each stage in a
Harmony workflow.

## v1.0.0
### 2023-10-06

Expand Down
2 changes: 1 addition & 1 deletion docker/service_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.0
1.0.1
1 change: 1 addition & 0 deletions hoss/bbox_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def get_request_shape_file(message: Message, working_dir: str,
raise UnsupportedShapeFileFormat(message.subset.shape.type)

shape_file_url = message.subset.shape.process('href')
adapter_logger.info('Downloading request shape file')
local_shape_file_path = download(shape_file_url, working_dir,
logger=adapter_logger,
access_token=message.accessToken,
Expand Down
3 changes: 2 additions & 1 deletion hoss/dimension_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
import numpy as np

from harmony.message import Message
from harmony.message_utility import rgetattr
from harmony.util import Config
from varinfo import VarInfoFromDmr

from hoss.bbox_utilities import flatten_list
from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange
from hoss.utilities import (format_variable_set_string, get_opendap_nc4,
get_value_or_default, rgetattr)
get_value_or_default)


IndexRange = Tuple[int]
Expand Down
11 changes: 0 additions & 11 deletions hoss/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,3 @@ class UrlAccessFailed(CustomError):
def __init__(self, url, status_code):
super().__init__('UrlAccessFailed',
f'{status_code} error retrieving: {url}')


class UrlAccessFailedWithRetries(CustomError):
""" This exception is raised when an HTTP request for a given URL has
failed a specified number of times.
"""
def __init__(self, url):
super().__init__('UrlAccessFailedWithRetries',
f'URL: {url} was unsuccessfully requested the '
'maximum number of times.')
3 changes: 2 additions & 1 deletion hoss/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import List, Set

from harmony.message import Message, Source, Variable as HarmonyVariable
from harmony.message_utility import rgetattr
from harmony.util import Config
from netCDF4 import Dataset
from numpy.ma import masked
Expand All @@ -17,7 +18,7 @@
from hoss.dimension_utilities import (add_index_range, get_fill_slice,
IndexRanges, is_index_subset,
get_requested_index_ranges,
prefetch_dimension_variables, rgetattr)
prefetch_dimension_variables)
from hoss.spatial import get_spatial_index_ranges
from hoss.temporal import get_temporal_index_ranges
from hoss.utilities import (download_url, format_variable_set_string,
Expand Down
75 changes: 21 additions & 54 deletions hoss/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@
from harmony.exceptions import ForbiddenException, ServerException
from harmony.util import Config, download as util_download

from hoss.exceptions import UrlAccessFailed, UrlAccessFailedWithRetries


HTTP_REQUEST_ATTEMPTS = 3
from hoss.exceptions import UrlAccessFailed


def get_file_mimetype(file_name: str) -> Tuple[Optional[str], Optional[str]]:
Expand Down Expand Up @@ -90,12 +87,12 @@ def download_url(url: str, destination: str, logger: Logger,
access_token: str = None, config: Config = None,
data=None) -> str:
""" Use built-in Harmony functionality to download from a URL. This is
expected to be used for obtaining the granule `.dmr` and the granule
itself (only the required variables).
expected to be used for obtaining the granule `.dmr`, a prefetch of
only dimensions and bounds variables, and the subsetted granule itself.
OPeNDAP can return intermittent 500 errors. This function will retry
the original request in the event of a 500 error, but not for other
error types. In those instances, the original HTTPError is re-raised.
OPeNDAP can return intermittent 500 errors. Retries will be performed
by inbuilt functionality in the `harmony-service-lib`. The OPeNDAP
errors are captured and re-raised as custom exceptions.
The return value is the location in the file-store of the downloaded
content from the URL.
Expand All @@ -106,32 +103,21 @@ def download_url(url: str, destination: str, logger: Logger,
if data is not None:
logger.info(f'POST request data: "{format_dictionary_string(data)}"')

request_completed = False
attempts = 0

while not request_completed and attempts < HTTP_REQUEST_ATTEMPTS:
attempts += 1

try:
response = util_download(
url,
destination,
logger,
access_token=access_token,
data=data,
cfg=config
)
request_completed = True
except ForbiddenException as harmony_exception:
raise UrlAccessFailed(url, 400) from harmony_exception
except ServerException as harmony_exception:
if attempts < HTTP_REQUEST_ATTEMPTS:
logger.info('500 error returned, retrying request.')
else:
raise UrlAccessFailedWithRetries(url) from harmony_exception
except Exception as harmony_exception:
# Not a 500 error, so raise immediately and exit the loop.
raise UrlAccessFailed(url, 'Unknown') from harmony_exception
try:
response = util_download(
url,
destination,
logger,
access_token=access_token,
data=data,
cfg=config
)
except ForbiddenException as harmony_exception:
raise UrlAccessFailed(url, 400) from harmony_exception
except ServerException as harmony_exception:
raise UrlAccessFailed(url, 500) from harmony_exception
except Exception as harmony_exception:
raise UrlAccessFailed(url, 'Unknown') from harmony_exception

return response

Expand Down Expand Up @@ -159,22 +145,3 @@ def get_value_or_default(value: Optional[float], default: float) -> float:
"""
return value if value is not None else default


def rgetattr(input_object, requested_attribute, *args):
""" This is a recursive version of the inbuilt `getattr` method, such that
it can be called to retrieve nested attributes. For example:
the Message.subset.shape within the input Harmony message.
Note, if a default value is specified, this will be returned if any
attribute in the specified chain is absent from the supplied object.
"""
if '.' not in requested_attribute:
result = getattr(input_object, requested_attribute, *args)
else:
attribute_pieces = requested_attribute.split('.')
result = rgetattr(getattr(input_object, attribute_pieces[0], *args),
'.'.join(attribute_pieces[1:]), *args)

return result
2 changes: 1 addition & 1 deletion pip_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This file should contain requirements to be installed via Pip.
# Open source packages available from PyPI
earthdata-varinfo ~= 1.0.0
harmony-service-lib ~= 1.0.23
harmony-service-lib ~= 1.0.25
netCDF4 ~= 1.6.4
numpy ~= 1.24.2
pyproj ~= 3.6.1
Expand Down
87 changes: 17 additions & 70 deletions tests/unit/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
from harmony.exceptions import ForbiddenException, ServerException
from harmony.util import config

from hoss.exceptions import UrlAccessFailed, UrlAccessFailedWithRetries
from hoss.exceptions import UrlAccessFailed
from hoss.utilities import (download_url, format_dictionary_string,
format_variable_set_string,
get_constraint_expression, get_file_mimetype,
get_opendap_nc4, get_value_or_default,
HTTP_REQUEST_ATTEMPTS, move_downloaded_nc4,
rgetattr)
move_downloaded_nc4)


class TestUtilities(TestCase):
Expand Down Expand Up @@ -42,11 +41,9 @@ def test_get_file_mimetype(self):

@patch('hoss.utilities.util_download')
def test_download_url(self, mock_util_download):
""" Ensure that the `harmony.util.download` function is called. Also
ensure that if a 500 error is returned, the request is retried. If
a different HTTPError occurs, the caught HTTPError should be
re-raised. Finally, check the maximum number of request attempts is
not exceeded.
""" Ensure that the `harmony.util.download` function is called. If an
error occurs, the caught exception should be re-raised with a
custom exception with a human-readable error message.
"""
output_directory = 'output/dir'
Expand Down Expand Up @@ -88,14 +85,22 @@ def test_download_url(self, mock_util_download):
)
mock_util_download.reset_mock()

with self.subTest('500 error triggers a retry.'):
with self.subTest('500 error is caught and handled.'):
mock_util_download.side_effect = [self.harmony_500_error,
http_response]

response = download_url(test_url, output_directory, self.logger)
with self.assertRaises(UrlAccessFailed):
download_url(test_url, output_directory, self.logger,
access_token, self.config)

self.assertEqual(response, http_response)
self.assertEqual(mock_util_download.call_count, 2)
mock_util_download.assert_called_once_with(
test_url,
output_directory,
self.logger,
access_token=access_token,
data=None,
cfg=self.config
)
mock_util_download.reset_mock()

with self.subTest('Non-500 error does not retry, and is re-raised.'):
Expand All @@ -116,17 +121,6 @@ def test_download_url(self, mock_util_download):
)
mock_util_download.reset_mock()

with self.subTest('Maximum number of attempts not exceeded.'):
mock_util_download.side_effect = [
self.harmony_500_error
] * (HTTP_REQUEST_ATTEMPTS + 1)
with self.assertRaises(UrlAccessFailedWithRetries):
download_url(test_url, output_directory, self.logger)

self.assertEqual(mock_util_download.call_count,
HTTP_REQUEST_ATTEMPTS)
mock_util_download.reset_mock()

@patch('hoss.utilities.move_downloaded_nc4')
@patch('hoss.utilities.util_download')
def test_get_opendap_nc4(self, mock_download, mock_move_download):
Expand Down Expand Up @@ -259,50 +253,3 @@ def test_get_value_or_default(self):

with self.subTest('Value = None returns the supplied default'):
self.assertEqual(get_value_or_default(None, 20), 20)

def test_rgetattr(self):
""" Ensure that the recursive function can retrieve nested attributes
and uses the default argument when required.
"""
class Child:
def __init__(self, name):
self.name = name

class Parent:
def __init__(self, name, child_name):
self.name = name
self.child = Child(child_name)
self.none = None

test_parent = Parent('parent_name', 'child_name')

with self.subTest('Parent level attribute'):
self.assertEqual(rgetattr(test_parent, 'name'), 'parent_name')

with self.subTest('Nested attribute'):
self.assertEqual(rgetattr(test_parent, 'child.name'), 'child_name')
with self.subTest('Missing parent with default'):
self.assertEqual(rgetattr(test_parent, 'absent', 'default'),
'default')

with self.subTest('Missing child attribute with default'):
self.assertEqual(rgetattr(test_parent, 'child.absent', 'default'),
'default')
with self.subTest('Child requested, parent missing, default'):
self.assertEqual(
rgetattr(test_parent, 'none.something', 'default'),
'default'
)

with self.subTest('Missing parent, with no default'):
with self.assertRaises(AttributeError):
rgetattr(test_parent, 'absent')

with self.subTest('Missing child, with no default'):
with self.assertRaises(AttributeError):
rgetattr(test_parent, 'child.absent')

with self.subTest('Child requested, parent missing, no default'):
with self.assertRaises(AttributeError):
rgetattr(test_parent, 'none.something')

0 comments on commit 3ba073f

Please sign in to comment.