Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAS-2034 - Remove custom retry logic from HOSS in favour of harmony-service-lib. #4

Merged
merged 4 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## v1.0.1
### 2023-12-19

This version of HOSS removes custom, redundant download retry logic. Instead
retries are relied upon from `harmony-service-lib` and for each stage in a
Harmony workflow.

## v1.0.0
### 2023-10-06

Expand Down
2 changes: 1 addition & 1 deletion docker/service_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.0
1.0.1
1 change: 1 addition & 0 deletions hoss/bbox_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def get_request_shape_file(message: Message, working_dir: str,
raise UnsupportedShapeFileFormat(message.subset.shape.type)

shape_file_url = message.subset.shape.process('href')
adapter_logger.info('Downloading request shape file')
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initially I included the path to the shape file, but this is somewhat meaningless as Harmony creates a temporary file, which obfuscates any original user naming convention (the file name is essentially a generated ID).

local_shape_file_path = download(shape_file_url, working_dir,
logger=adapter_logger,
access_token=message.accessToken,
Expand Down
11 changes: 0 additions & 11 deletions hoss/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,3 @@ class UrlAccessFailed(CustomError):
def __init__(self, url, status_code):
super().__init__('UrlAccessFailed',
f'{status_code} error retrieving: {url}')


class UrlAccessFailedWithRetries(CustomError):
""" This exception is raised when an HTTP request for a given URL has
failed a specified number of times.

"""
def __init__(self, url):
super().__init__('UrlAccessFailedWithRetries',
f'URL: {url} was unsuccessfully requested the '
'maximum number of times.')
56 changes: 21 additions & 35 deletions hoss/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@
from harmony.exceptions import ForbiddenException, ServerException
from harmony.util import Config, download as util_download

from hoss.exceptions import UrlAccessFailed, UrlAccessFailedWithRetries


HTTP_REQUEST_ATTEMPTS = 3
from hoss.exceptions import UrlAccessFailed


def get_file_mimetype(file_name: str) -> Tuple[Optional[str], Optional[str]]:
Expand Down Expand Up @@ -90,12 +87,12 @@ def download_url(url: str, destination: str, logger: Logger,
access_token: str = None, config: Config = None,
data=None) -> str:
""" Use built-in Harmony functionality to download from a URL. This is
expected to be used for obtaining the granule `.dmr` and the granule
itself (only the required variables).
expected to be used for obtaining the granule `.dmr`, a prefetch of
only dimensions and bounds variables, and the subsetted granule itself.

OPeNDAP can return intermittent 500 errors. This function will retry
the original request in the event of a 500 error, but not for other
error types. In those instances, the original HTTPError is re-raised.
OPeNDAP can return intermittent 500 errors. Retries will be performed
by inbuilt functionality in the `harmony-service-lib`. The OPeNDAP
errors are captured and re-raised as custom exceptions.

The return value is the location in the file-store of the downloaded
content from the URL.
Expand All @@ -106,32 +103,21 @@ def download_url(url: str, destination: str, logger: Logger,
if data is not None:
logger.info(f'POST request data: "{format_dictionary_string(data)}"')

request_completed = False
attempts = 0

while not request_completed and attempts < HTTP_REQUEST_ATTEMPTS:
attempts += 1

try:
response = util_download(
url,
destination,
logger,
access_token=access_token,
data=data,
cfg=config
)
request_completed = True
except ForbiddenException as harmony_exception:
raise UrlAccessFailed(url, 400) from harmony_exception
except ServerException as harmony_exception:
if attempts < HTTP_REQUEST_ATTEMPTS:
logger.info('500 error returned, retrying request.')
else:
raise UrlAccessFailedWithRetries(url) from harmony_exception
except Exception as harmony_exception:
# Not a 500 error, so raise immediately and exit the loop.
raise UrlAccessFailed(url, 'Unknown') from harmony_exception
try:
response = util_download(
url,
destination,
logger,
access_token=access_token,
data=data,
cfg=config
)
except ForbiddenException as harmony_exception:
raise UrlAccessFailed(url, 400) from harmony_exception
except ServerException as harmony_exception:
raise UrlAccessFailed(url, 500) from harmony_exception
except Exception as harmony_exception:
raise UrlAccessFailed(url, 'Unknown') from harmony_exception

return response

Expand Down
30 changes: 13 additions & 17 deletions tests/unit/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
from harmony.exceptions import ForbiddenException, ServerException
from harmony.util import config

from hoss.exceptions import UrlAccessFailed, UrlAccessFailedWithRetries
from hoss.exceptions import UrlAccessFailed
from hoss.utilities import (download_url, format_dictionary_string,
format_variable_set_string,
get_constraint_expression, get_file_mimetype,
get_opendap_nc4, get_value_or_default,
HTTP_REQUEST_ATTEMPTS, move_downloaded_nc4,
rgetattr)
move_downloaded_nc4, rgetattr)


class TestUtilities(TestCase):
Expand Down Expand Up @@ -92,10 +91,18 @@ def test_download_url(self, mock_util_download):
mock_util_download.side_effect = [self.harmony_500_error,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This subtest name is incorrect now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. I updated that test name (and also the doc string for the overall test that mentioned retries would be tested)

http_response]

response = download_url(test_url, output_directory, self.logger)
with self.assertRaises(UrlAccessFailed):
download_url(test_url, output_directory, self.logger,
access_token, self.config)

self.assertEqual(response, http_response)
self.assertEqual(mock_util_download.call_count, 2)
mock_util_download.assert_called_once_with(
test_url,
output_directory,
self.logger,
access_token=access_token,
data=None,
cfg=self.config
)
mock_util_download.reset_mock()

with self.subTest('Non-500 error does not retry, and is re-raised.'):
Expand All @@ -116,17 +123,6 @@ def test_download_url(self, mock_util_download):
)
mock_util_download.reset_mock()

with self.subTest('Maximum number of attempts not exceeded.'):
mock_util_download.side_effect = [
self.harmony_500_error
] * (HTTP_REQUEST_ATTEMPTS + 1)
with self.assertRaises(UrlAccessFailedWithRetries):
download_url(test_url, output_directory, self.logger)

self.assertEqual(mock_util_download.call_count,
HTTP_REQUEST_ATTEMPTS)
mock_util_download.reset_mock()

@patch('hoss.utilities.move_downloaded_nc4')
@patch('hoss.utilities.util_download')
def test_get_opendap_nc4(self, mock_download, mock_move_download):
Expand Down