From 3ba073f076af771ec5b0d0adb665a3a78160c09d Mon Sep 17 00:00:00 2001 From: owenlittlejohns Date: Wed, 20 Dec 2023 08:02:40 -0800 Subject: [PATCH] DAS-2034 - Remove custom retry logic from HOSS in favour of harmony-service-lib. --- CHANGELOG.md | 7 +++ docker/service_version.txt | 2 +- hoss/bbox_utilities.py | 1 + hoss/dimension_utilities.py | 3 +- hoss/exceptions.py | 11 ----- hoss/subset.py | 3 +- hoss/utilities.py | 75 +++++++++---------------------- pip_requirements.txt | 2 +- tests/unit/test_utilities.py | 87 +++++++----------------------------- 9 files changed, 52 insertions(+), 139 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 156d06b..037358d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## v1.0.1 +### 2023-12-19 + +This version of HOSS removes custom, redundant download retry logic. Instead +retries are relied upon from `harmony-service-lib` and for each stage in a +Harmony workflow. + ## v1.0.0 ### 2023-10-06 diff --git a/docker/service_version.txt b/docker/service_version.txt index 3eefcb9..7dea76e 100644 --- a/docker/service_version.txt +++ b/docker/service_version.txt @@ -1 +1 @@ -1.0.0 +1.0.1 diff --git a/hoss/bbox_utilities.py b/hoss/bbox_utilities.py index d87af68..e762082 100644 --- a/hoss/bbox_utilities.py +++ b/hoss/bbox_utilities.py @@ -58,6 +58,7 @@ def get_request_shape_file(message: Message, working_dir: str, raise UnsupportedShapeFileFormat(message.subset.shape.type) shape_file_url = message.subset.shape.process('href') + adapter_logger.info('Downloading request shape file') local_shape_file_path = download(shape_file_url, working_dir, logger=adapter_logger, access_token=message.accessToken, diff --git a/hoss/dimension_utilities.py b/hoss/dimension_utilities.py index 85c0394..2db6397 100644 --- a/hoss/dimension_utilities.py +++ b/hoss/dimension_utilities.py @@ -17,13 +17,14 @@ import numpy as np from harmony.message import Message +from harmony.message_utility import rgetattr from harmony.util import Config from varinfo import VarInfoFromDmr from hoss.bbox_utilities import flatten_list from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange from hoss.utilities import (format_variable_set_string, get_opendap_nc4, - get_value_or_default, rgetattr) + get_value_or_default) IndexRange = Tuple[int] diff --git a/hoss/exceptions.py b/hoss/exceptions.py index 216c7f5..a3a05eb 100644 --- a/hoss/exceptions.py +++ b/hoss/exceptions.py @@ -119,14 +119,3 @@ class UrlAccessFailed(CustomError): def __init__(self, url, status_code): super().__init__('UrlAccessFailed', f'{status_code} error retrieving: {url}') - - -class UrlAccessFailedWithRetries(CustomError): - """ This exception is raised when an HTTP request for a given URL has - failed a specified number of times. - - """ - def __init__(self, url): - super().__init__('UrlAccessFailedWithRetries', - f'URL: {url} was unsuccessfully requested the ' - 'maximum number of times.') diff --git a/hoss/subset.py b/hoss/subset.py index 2f4afba..d30c9c8 100644 --- a/hoss/subset.py +++ b/hoss/subset.py @@ -8,6 +8,7 @@ from typing import List, Set from harmony.message import Message, Source, Variable as HarmonyVariable +from harmony.message_utility import rgetattr from harmony.util import Config from netCDF4 import Dataset from numpy.ma import masked @@ -17,7 +18,7 @@ from hoss.dimension_utilities import (add_index_range, get_fill_slice, IndexRanges, is_index_subset, get_requested_index_ranges, - prefetch_dimension_variables, rgetattr) + prefetch_dimension_variables) from hoss.spatial import get_spatial_index_ranges from hoss.temporal import get_temporal_index_ranges from hoss.utilities import (download_url, format_variable_set_string, diff --git a/hoss/utilities.py b/hoss/utilities.py index 9da61ca..bde6ef0 100644 --- a/hoss/utilities.py +++ b/hoss/utilities.py @@ -15,10 +15,7 @@ from harmony.exceptions import ForbiddenException, ServerException from harmony.util import Config, download as util_download -from hoss.exceptions import UrlAccessFailed, UrlAccessFailedWithRetries - - -HTTP_REQUEST_ATTEMPTS = 3 +from hoss.exceptions import UrlAccessFailed def get_file_mimetype(file_name: str) -> Tuple[Optional[str], Optional[str]]: @@ -90,12 +87,12 @@ def download_url(url: str, destination: str, logger: Logger, access_token: str = None, config: Config = None, data=None) -> str: """ Use built-in Harmony functionality to download from a URL. This is - expected to be used for obtaining the granule `.dmr` and the granule - itself (only the required variables). + expected to be used for obtaining the granule `.dmr`, a prefetch of + only dimensions and bounds variables, and the subsetted granule itself. - OPeNDAP can return intermittent 500 errors. This function will retry - the original request in the event of a 500 error, but not for other - error types. In those instances, the original HTTPError is re-raised. + OPeNDAP can return intermittent 500 errors. Retries will be performed + by inbuilt functionality in the `harmony-service-lib`. The OPeNDAP + errors are captured and re-raised as custom exceptions. The return value is the location in the file-store of the downloaded content from the URL. @@ -106,32 +103,21 @@ def download_url(url: str, destination: str, logger: Logger, if data is not None: logger.info(f'POST request data: "{format_dictionary_string(data)}"') - request_completed = False - attempts = 0 - - while not request_completed and attempts < HTTP_REQUEST_ATTEMPTS: - attempts += 1 - - try: - response = util_download( - url, - destination, - logger, - access_token=access_token, - data=data, - cfg=config - ) - request_completed = True - except ForbiddenException as harmony_exception: - raise UrlAccessFailed(url, 400) from harmony_exception - except ServerException as harmony_exception: - if attempts < HTTP_REQUEST_ATTEMPTS: - logger.info('500 error returned, retrying request.') - else: - raise UrlAccessFailedWithRetries(url) from harmony_exception - except Exception as harmony_exception: - # Not a 500 error, so raise immediately and exit the loop. - raise UrlAccessFailed(url, 'Unknown') from harmony_exception + try: + response = util_download( + url, + destination, + logger, + access_token=access_token, + data=data, + cfg=config + ) + except ForbiddenException as harmony_exception: + raise UrlAccessFailed(url, 400) from harmony_exception + except ServerException as harmony_exception: + raise UrlAccessFailed(url, 500) from harmony_exception + except Exception as harmony_exception: + raise UrlAccessFailed(url, 'Unknown') from harmony_exception return response @@ -159,22 +145,3 @@ def get_value_or_default(value: Optional[float], default: float) -> float: """ return value if value is not None else default - - -def rgetattr(input_object, requested_attribute, *args): - """ This is a recursive version of the inbuilt `getattr` method, such that - it can be called to retrieve nested attributes. For example: - the Message.subset.shape within the input Harmony message. - - Note, if a default value is specified, this will be returned if any - attribute in the specified chain is absent from the supplied object. - - """ - if '.' not in requested_attribute: - result = getattr(input_object, requested_attribute, *args) - else: - attribute_pieces = requested_attribute.split('.') - result = rgetattr(getattr(input_object, attribute_pieces[0], *args), - '.'.join(attribute_pieces[1:]), *args) - - return result diff --git a/pip_requirements.txt b/pip_requirements.txt index 7caaae7..41ba9c5 100644 --- a/pip_requirements.txt +++ b/pip_requirements.txt @@ -1,7 +1,7 @@ # This file should contain requirements to be installed via Pip. # Open source packages available from PyPI earthdata-varinfo ~= 1.0.0 -harmony-service-lib ~= 1.0.23 +harmony-service-lib ~= 1.0.25 netCDF4 ~= 1.6.4 numpy ~= 1.24.2 pyproj ~= 3.6.1 diff --git a/tests/unit/test_utilities.py b/tests/unit/test_utilities.py index 1698f08..696805e 100644 --- a/tests/unit/test_utilities.py +++ b/tests/unit/test_utilities.py @@ -5,13 +5,12 @@ from harmony.exceptions import ForbiddenException, ServerException from harmony.util import config -from hoss.exceptions import UrlAccessFailed, UrlAccessFailedWithRetries +from hoss.exceptions import UrlAccessFailed from hoss.utilities import (download_url, format_dictionary_string, format_variable_set_string, get_constraint_expression, get_file_mimetype, get_opendap_nc4, get_value_or_default, - HTTP_REQUEST_ATTEMPTS, move_downloaded_nc4, - rgetattr) + move_downloaded_nc4) class TestUtilities(TestCase): @@ -42,11 +41,9 @@ def test_get_file_mimetype(self): @patch('hoss.utilities.util_download') def test_download_url(self, mock_util_download): - """ Ensure that the `harmony.util.download` function is called. Also - ensure that if a 500 error is returned, the request is retried. If - a different HTTPError occurs, the caught HTTPError should be - re-raised. Finally, check the maximum number of request attempts is - not exceeded. + """ Ensure that the `harmony.util.download` function is called. If an + error occurs, the caught exception should be re-raised with a + custom exception with a human-readable error message. """ output_directory = 'output/dir' @@ -88,14 +85,22 @@ def test_download_url(self, mock_util_download): ) mock_util_download.reset_mock() - with self.subTest('500 error triggers a retry.'): + with self.subTest('500 error is caught and handled.'): mock_util_download.side_effect = [self.harmony_500_error, http_response] - response = download_url(test_url, output_directory, self.logger) + with self.assertRaises(UrlAccessFailed): + download_url(test_url, output_directory, self.logger, + access_token, self.config) - self.assertEqual(response, http_response) - self.assertEqual(mock_util_download.call_count, 2) + mock_util_download.assert_called_once_with( + test_url, + output_directory, + self.logger, + access_token=access_token, + data=None, + cfg=self.config + ) mock_util_download.reset_mock() with self.subTest('Non-500 error does not retry, and is re-raised.'): @@ -116,17 +121,6 @@ def test_download_url(self, mock_util_download): ) mock_util_download.reset_mock() - with self.subTest('Maximum number of attempts not exceeded.'): - mock_util_download.side_effect = [ - self.harmony_500_error - ] * (HTTP_REQUEST_ATTEMPTS + 1) - with self.assertRaises(UrlAccessFailedWithRetries): - download_url(test_url, output_directory, self.logger) - - self.assertEqual(mock_util_download.call_count, - HTTP_REQUEST_ATTEMPTS) - mock_util_download.reset_mock() - @patch('hoss.utilities.move_downloaded_nc4') @patch('hoss.utilities.util_download') def test_get_opendap_nc4(self, mock_download, mock_move_download): @@ -259,50 +253,3 @@ def test_get_value_or_default(self): with self.subTest('Value = None returns the supplied default'): self.assertEqual(get_value_or_default(None, 20), 20) - - def test_rgetattr(self): - """ Ensure that the recursive function can retrieve nested attributes - and uses the default argument when required. - - """ - class Child: - def __init__(self, name): - self.name = name - - class Parent: - def __init__(self, name, child_name): - self.name = name - self.child = Child(child_name) - self.none = None - - test_parent = Parent('parent_name', 'child_name') - - with self.subTest('Parent level attribute'): - self.assertEqual(rgetattr(test_parent, 'name'), 'parent_name') - - with self.subTest('Nested attribute'): - self.assertEqual(rgetattr(test_parent, 'child.name'), 'child_name') - with self.subTest('Missing parent with default'): - self.assertEqual(rgetattr(test_parent, 'absent', 'default'), - 'default') - - with self.subTest('Missing child attribute with default'): - self.assertEqual(rgetattr(test_parent, 'child.absent', 'default'), - 'default') - with self.subTest('Child requested, parent missing, default'): - self.assertEqual( - rgetattr(test_parent, 'none.something', 'default'), - 'default' - ) - - with self.subTest('Missing parent, with no default'): - with self.assertRaises(AttributeError): - rgetattr(test_parent, 'absent') - - with self.subTest('Missing child, with no default'): - with self.assertRaises(AttributeError): - rgetattr(test_parent, 'child.absent') - - with self.subTest('Child requested, parent missing, no default'): - with self.assertRaises(AttributeError): - rgetattr(test_parent, 'none.something')