From 4d0de302b3e19b8982722f98db81479259896408 Mon Sep 17 00:00:00 2001 From: wphyojpl <38299756+wphyojpl@users.noreply.github.com> Date: Mon, 9 Dec 2024 05:12:55 -0800 Subject: [PATCH] fix: Wrong location archive keys (#448) * feat: removed duplicated classes + tests * feat: update setup.py with the library * feat: bump version in requirements.txt * feat: update import statements * fix: update test cases * fix: removing boto3 in lambda zip * fix: bump uds lib + make checksum + size valid for empty str + remove duplicates * fix: need to fall back to semi-brute force to avoid duplication * fix: wrong location for archive keys * fix: update test * fix: remove unused file --- .../uds_api/dapa/granules_dapa_query_es.py | 27 ++-- .../uds_api/__init__.py | 0 .../uds_api/dapa/__init__.py | 0 .../dapa/test_granules_dapa_query_es.py | 142 ++++++++++++++++++ tests/integration_tests/test_uds_api.py | 2 +- 5 files changed, 159 insertions(+), 12 deletions(-) create mode 100644 tests/cumulus_lambda_functions/uds_api/__init__.py create mode 100644 tests/cumulus_lambda_functions/uds_api/dapa/__init__.py create mode 100644 tests/cumulus_lambda_functions/uds_api/dapa/test_granules_dapa_query_es.py diff --git a/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py b/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py index a4cf9873..913c2f02 100644 --- a/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py +++ b/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py @@ -75,6 +75,8 @@ def __get_time_range_terms(self): ] def __create_pagination_links(self, page_marker_str): + if self.__pagination_link_obj is None: + return [] new_queries = deepcopy(self.__pagination_link_obj.org_query_params) new_queries['limit'] = int(new_queries['limit'] if 'limit' in new_queries else self.__limit) current_page = f"{self.__pagination_link_obj.requesting_base_url}?{'&'.join([f'{k}={v}' for k, v in new_queries.items()])}" @@ -114,8 +116,20 @@ def archive_single_granule(self, granule_id): daac_archiver.send_to_daac_internal(cnm_response) return + def __restructure_each_granule_result(self, each_granules_query_result_stripped): + if 'event_time' in each_granules_query_result_stripped: + each_granules_query_result_stripped.pop('event_time') + if 'bbox' in each_granules_query_result_stripped: + each_granules_query_result_stripped['bbox'] = GranulesDbIndex.from_es_bbox(each_granules_query_result_stripped['bbox']) + for each_archiving_key in GranulesIndexMapping.archiving_keys: + if each_archiving_key in each_granules_query_result_stripped: + each_granules_query_result_stripped['properties'][each_archiving_key] = each_granules_query_result_stripped.pop(each_archiving_key) + return + def get_single_granule(self, granule_id): granules_query_dsl = { + 'size': 1, + 'sort': [{'id': {'order': 'asc'}}], 'query': {'bool': {'must': [{ 'term': {'id': granule_id} }]}} @@ -132,10 +146,7 @@ def get_single_granule(self, granule_id): each_granules_query_result_stripped = granules_query_result['hits']['hits'][0]['_source'] self_link = Link(rel='self', target=f'{self.__base_url}/{WebServiceConstants.COLLECTIONS}/{self.__collection_id}/items/{each_granules_query_result_stripped["id"]}', media_type='application/json', title=each_granules_query_result_stripped["id"]).to_dict(False) each_granules_query_result_stripped['links'].append(self_link) - if 'event_time' in each_granules_query_result_stripped: - each_granules_query_result_stripped.pop('event_time') - if 'bbox' in each_granules_query_result_stripped: - each_granules_query_result_stripped['bbox'] = GranulesDbIndex.from_es_bbox(each_granules_query_result_stripped['bbox']) + self.__restructure_each_granule_result(each_granules_query_result_stripped) return each_granules_query_result_stripped def start(self): @@ -152,13 +163,7 @@ def start(self): for each_granules_query_result_stripped in granules_query_result_stripped: self_link = Link(rel='self', target=f'{self.__base_url}/{WebServiceConstants.COLLECTIONS}/{self.__collection_id}/items/{each_granules_query_result_stripped["id"]}', media_type='application/json', title=each_granules_query_result_stripped["id"]).to_dict(False) each_granules_query_result_stripped['links'].append(self_link) - if 'event_time' in each_granules_query_result_stripped: - each_granules_query_result_stripped.pop('event_time') - if 'bbox' in each_granules_query_result_stripped: - each_granules_query_result_stripped['bbox'] = GranulesDbIndex.from_es_bbox(each_granules_query_result_stripped['bbox']) - for each_archiving_key in GranulesIndexMapping.archiving_keys: - if each_archiving_key in each_granules_query_result_stripped: - each_granules_query_result_stripped['properties'][each_archiving_key] = each_granules_query_result_stripped.pop(each_archiving_key) + self.__restructure_each_granule_result(each_granules_query_result_stripped) pagination_link = '' if len(granules_query_result['hits']['hits']) < self.__limit else ','.join(granules_query_result['hits']['hits'][-1]['sort']) return { 'statusCode': 200, diff --git a/tests/cumulus_lambda_functions/uds_api/__init__.py b/tests/cumulus_lambda_functions/uds_api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cumulus_lambda_functions/uds_api/dapa/__init__.py b/tests/cumulus_lambda_functions/uds_api/dapa/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cumulus_lambda_functions/uds_api/dapa/test_granules_dapa_query_es.py b/tests/cumulus_lambda_functions/uds_api/dapa/test_granules_dapa_query_es.py new file mode 100644 index 00000000..9cc3fdc4 --- /dev/null +++ b/tests/cumulus_lambda_functions/uds_api/dapa/test_granules_dapa_query_es.py @@ -0,0 +1,142 @@ +import os +from unittest import TestCase + +from cumulus_lambda_functions.lib.uds_db.granules_db_index import GranulesDbIndex +from cumulus_lambda_functions.uds_api.dapa.granules_dapa_query_es import GranulesDapaQueryEs + + +class TestGranulesDapaQueryEs(TestCase): + def setUp(self) -> None: + super().setUp() + self.custom_metadata_body = { + 'tag': {'type': 'keyword'}, + 'c_data1': {'type': 'long'}, + 'c_data2': {'type': 'boolean'}, + 'c_data3': {'type': 'keyword'}, + } + + self.tenant = 'UDS_LOCAL_TEST' # 'uds_local_test' # 'uds_sandbox' + self.tenant_venue = 'DEV' # 'DEV1' # 'dev' + self.collection_name = 'KKK-09' # 'uds_collection' # 'sbx_collection' + self.collection_version = '24.03.20.14.40'.replace('.', '') # '2402011200' + self.collection_version = '001' + return + + def test_get_single_granule_01(self): + os.environ['ES_URL'] = 'vpc-uds-sbx-cumulus-es-qk73x5h47jwmela5nbwjte4yzq.us-west-2.es.amazonaws.com' + os.environ['ES_PORT'] = '9200' + collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}___{self.collection_version}' + granule_id = f'{collection_id}:test_file09' + + mock_feature = { + 'archive_status': 'cnm_r_failed', + 'archive_error_message': 'testing 1 2 3', + 'archive_error_code': 'VALIDATION_ERROR', + + "type": "Feature", + "stac_version": "1.0.0", + "id": "URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001:test_file09", + "properties": { + "datetime": "2024-11-26T23:37:15.288000Z", + "start_datetime": "2016-01-31T18:00:00.009000Z", + "end_datetime": "2016-01-31T19:59:59.991000Z", + "created": "1970-01-01T00:00:00Z", + "updated": "2024-11-26T23:38:01.692000Z", + "status": "completed", + "provider": "unity", + }, + "geometry": { + "type": "Point", + "coordinates": [ + 0.0, + 0.0 + ] + }, + "links": [ + { + "rel": "collection", + "href": "." + } + ], + "assets": { + "test_file09.nc": { + "href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001:test_file09/test_file09.nc", + "title": "test_file09.nc", + "description": "size=0;checksumType=md5;checksum=00000000000000000000000000000000;", + "file:size": 0, + "file:checksum": "00000000000000000000000000000000", + "roles": [ + "data" + ] + }, + "test_file09.nc.cas": { + "href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001:test_file09/test_file09.nc.cas", + "title": "test_file09.nc.cas", + "description": "size=0;checksumType=md5;checksum=00000000000000000000000000000000;", + "file:size": 0, + "file:checksum": "00000000000000000000000000000000", + "roles": [ + "metadata" + ] + }, + "test_file09.nc.stac.json": { + "href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001:test_file09/test_file09.nc.stac.json", + "title": "test_file09.nc.stac.json", + "description": "size=0;checksumType=md5;checksum=00000000000000000000000000000000;", + "file:size": 0, + "file:checksum": "00000000000000000000000000000000", + "roles": [ + "metadata" + ] + }, + "test_file09.cmr.xml": { + "href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001:test_file09/test_file09.cmr.xml", + "title": "test_file09.cmr.xml", + "description": "size=1716;checksumType=md5;checksum=f842ba4e23e76ae81014a01c820b01f7;", + "file:size": 1716, + "file:checksum": "f842ba4e23e76ae81014a01c820b01f7", + "roles": [ + "metadata" + ] + } + }, + "bbox": { + "type": "envelope", + "coordinates": [ + [ + -180.0, + 90.0 + ], + [ + 180.0, + -90.0 + ] + ] + }, + "stac_extensions": [ + "https://stac-extensions.github.io/file/v2.1.0/schema.json" + ], + "collection": "URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001", + "event_time": 1732664287722 + } + + granules_db_index = GranulesDbIndex() + granules_db_index.add_entry(self.tenant, self.tenant_venue, mock_feature, granule_id) + granules_dapa_query = GranulesDapaQueryEs(collection_id, 1, None, None, None, None, f'localhost/api-prefix') + granules_result = granules_dapa_query.get_single_granule(granule_id) + print(granules_result) + sample = { + 'type': 'Feature', + 'stac_version': '1.0.0', + 'id': 'URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53', + 'properties': {'tag': '#sample', 'c_data1': [1, 10, 100, 1000], 'c_data2': [False, True, True, False, True], 'c_data3': ['Bellman Ford'], 'soil10': {'0_0': 0, '0_1': 1, '0_2': 0}, 'datetime': '2024-10-01T13:12:11.810000Z', 'start_datetime': '2016-01-31T18:00:00.009000Z', 'end_datetime': '2016-01-31T19:59:59.991000Z', 'created': '1970-01-01T00:00:00Z', 'updated': '2024-10-01T13:12:55.423000Z', 'status': 'completed', 'provider': 'unity', 'archive_status': 'cnm_r_failed', 'archive_error_message': '[{"uri": "https://uds-distribution-placeholder/uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53/abcd.1234.efgh.test_file-24.08.13.13.53.data.stac.json", "error": "mismatched size: 11 v. -1"}]', 'archive_error_code': 'VALIDATION_ERROR'}, + 'geometry': {'type': 'Point', 'coordinates': [0.0, 0.0]}, 'links': [{'rel': 'collection', 'href': '.'}, {'rel': 'self', 'href': '/Users/wphyo/Projects/unity/unity-data-services/tests/cumulus_lambda_functions/uds_api/dapa/localhost/api-prefix/collections/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/items/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53', 'type': 'application/json', 'title': 'URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53'}], 'assets': {'abcd.1234.efgh.test_file-24.08.13.13.53.data.stac.json': {'href': 's3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53/abcd.1234.efgh.test_file-24.08.13.13.53.data.stac.json', 'title': 'abcd.1234.efgh.test_file-24.08.13.13.53.data.stac.json', 'description': 'size=-1;checksumType=md5;checksum=unknown;', 'file:size': -1, 'file:checksum': 'unknown', 'roles': ['data']}, 'abcd.1234.efgh.test_file-24.08.13.13.53.cmr.xml': {'href': 's3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53/abcd.1234.efgh.test_file-24.08.13.13.53.cmr.xml', 'title': 'abcd.1234.efgh.test_file-24.08.13.13.53.cmr.xml', 'description': 'size=1812;checksumType=md5;checksum=38c9d99e56312b595faa5e99df30b175;', 'file:size': 1812, 'file:checksum': '38c9d99e56312b595faa5e99df30b175', 'roles': ['metadata']}, 'abcd.1234.efgh.test_file-24.08.13.13.53.nc.stac.json': {'href': 's3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53/abcd.1234.efgh.test_file-24.08.13.13.53.nc.stac.json', 'title': 'abcd.1234.efgh.test_file-24.08.13.13.53.nc.stac.json', 'description': 'size=-1;checksumType=md5;checksum=unknown;', 'file:size': -1, 'file:checksum': 'unknown', 'roles': ['metadata']}, 'abcd.1234.efgh.test_file-24.08.13.13.53.nc.cas': {'href': 's3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53/abcd.1234.efgh.test_file-24.08.13.13.53.nc.cas', 'title': 'abcd.1234.efgh.test_file-24.08.13.13.53.nc.cas', 'description': 'size=-1;checksumType=md5;checksum=unknown;', 'file:size': -1, 'file:checksum': 'unknown', 'roles': ['metadata']}}, 'bbox': [-180.0, -90.0, 180.0, 90.0], 'stac_extensions': ['https://stac-extensions.github.io/file/v2.1.0/schema.json'], 'collection': 'URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208'} + self.assertTrue('properties' in granules_result, 'missing properties in granule') + self.assertTrue('archive_status' in granules_result['properties'], 'missing archive_status in granule>properties') + self.assertTrue('archive_error_message' in granules_result['properties'], 'missing archive_error_message in granule>properties') + self.assertTrue('archive_error_code' in granules_result['properties'], 'missing archive_error_code in granule>properties') + self.assertFalse('archive_status' in granules_result, 'missing archive_status in granule') + self.assertFalse('archive_error_message' in granules_result, 'missing archive_error_message in granule') + self.assertFalse('archive_error_code' in granules_result, 'missing archive_error_code in granule') + + return diff --git a/tests/integration_tests/test_uds_api.py b/tests/integration_tests/test_uds_api.py index 0580e5a3..4869162a 100644 --- a/tests/integration_tests/test_uds_api.py +++ b/tests/integration_tests/test_uds_api.py @@ -184,7 +184,7 @@ def test_collections_get_single_granule(self): return def test_granules_get(self): - post_url = f'{self.uds_url}collections/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2408290522/items/' # MCP Dev + post_url = f'{self.uds_url}collections/urn:nasa:unity:unity:dev:SBG-L2A_RFL___1/items/' # MCP Dev headers = { 'Authorization': f'Bearer {self.bearer_token}', }