Skip to content

Commit

Permalink
fix: Wrong location archive keys (#448)
Browse files Browse the repository at this point in the history
* feat: removed duplicated classes + tests

* feat: update setup.py with the library

* feat: bump version in requirements.txt

* feat: update import statements

* fix: update test cases

* fix: removing boto3 in lambda zip

* fix: bump uds lib + make checksum + size valid for empty str + remove duplicates

* fix: need to fall back to semi-brute force to avoid duplication

* fix: wrong location for archive keys

* fix: update test

* fix: remove unused file
  • Loading branch information
wphyojpl authored Dec 9, 2024
1 parent f95b3f5 commit 4d0de30
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 12 deletions.
27 changes: 16 additions & 11 deletions cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def __get_time_range_terms(self):
]

def __create_pagination_links(self, page_marker_str):
if self.__pagination_link_obj is None:
return []
new_queries = deepcopy(self.__pagination_link_obj.org_query_params)
new_queries['limit'] = int(new_queries['limit'] if 'limit' in new_queries else self.__limit)
current_page = f"{self.__pagination_link_obj.requesting_base_url}?{'&'.join([f'{k}={v}' for k, v in new_queries.items()])}"
Expand Down Expand Up @@ -114,8 +116,20 @@ def archive_single_granule(self, granule_id):
daac_archiver.send_to_daac_internal(cnm_response)
return

def __restructure_each_granule_result(self, each_granules_query_result_stripped):
if 'event_time' in each_granules_query_result_stripped:
each_granules_query_result_stripped.pop('event_time')
if 'bbox' in each_granules_query_result_stripped:
each_granules_query_result_stripped['bbox'] = GranulesDbIndex.from_es_bbox(each_granules_query_result_stripped['bbox'])
for each_archiving_key in GranulesIndexMapping.archiving_keys:
if each_archiving_key in each_granules_query_result_stripped:
each_granules_query_result_stripped['properties'][each_archiving_key] = each_granules_query_result_stripped.pop(each_archiving_key)
return

def get_single_granule(self, granule_id):
granules_query_dsl = {
'size': 1,
'sort': [{'id': {'order': 'asc'}}],
'query': {'bool': {'must': [{
'term': {'id': granule_id}
}]}}
Expand All @@ -132,10 +146,7 @@ def get_single_granule(self, granule_id):
each_granules_query_result_stripped = granules_query_result['hits']['hits'][0]['_source']
self_link = Link(rel='self', target=f'{self.__base_url}/{WebServiceConstants.COLLECTIONS}/{self.__collection_id}/items/{each_granules_query_result_stripped["id"]}', media_type='application/json', title=each_granules_query_result_stripped["id"]).to_dict(False)
each_granules_query_result_stripped['links'].append(self_link)
if 'event_time' in each_granules_query_result_stripped:
each_granules_query_result_stripped.pop('event_time')
if 'bbox' in each_granules_query_result_stripped:
each_granules_query_result_stripped['bbox'] = GranulesDbIndex.from_es_bbox(each_granules_query_result_stripped['bbox'])
self.__restructure_each_granule_result(each_granules_query_result_stripped)
return each_granules_query_result_stripped

def start(self):
Expand All @@ -152,13 +163,7 @@ def start(self):
for each_granules_query_result_stripped in granules_query_result_stripped:
self_link = Link(rel='self', target=f'{self.__base_url}/{WebServiceConstants.COLLECTIONS}/{self.__collection_id}/items/{each_granules_query_result_stripped["id"]}', media_type='application/json', title=each_granules_query_result_stripped["id"]).to_dict(False)
each_granules_query_result_stripped['links'].append(self_link)
if 'event_time' in each_granules_query_result_stripped:
each_granules_query_result_stripped.pop('event_time')
if 'bbox' in each_granules_query_result_stripped:
each_granules_query_result_stripped['bbox'] = GranulesDbIndex.from_es_bbox(each_granules_query_result_stripped['bbox'])
for each_archiving_key in GranulesIndexMapping.archiving_keys:
if each_archiving_key in each_granules_query_result_stripped:
each_granules_query_result_stripped['properties'][each_archiving_key] = each_granules_query_result_stripped.pop(each_archiving_key)
self.__restructure_each_granule_result(each_granules_query_result_stripped)
pagination_link = '' if len(granules_query_result['hits']['hits']) < self.__limit else ','.join(granules_query_result['hits']['hits'][-1]['sort'])
return {
'statusCode': 200,
Expand Down
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import os
from unittest import TestCase

from cumulus_lambda_functions.lib.uds_db.granules_db_index import GranulesDbIndex
from cumulus_lambda_functions.uds_api.dapa.granules_dapa_query_es import GranulesDapaQueryEs


class TestGranulesDapaQueryEs(TestCase):
def setUp(self) -> None:
super().setUp()
self.custom_metadata_body = {
'tag': {'type': 'keyword'},
'c_data1': {'type': 'long'},
'c_data2': {'type': 'boolean'},
'c_data3': {'type': 'keyword'},
}

self.tenant = 'UDS_LOCAL_TEST' # 'uds_local_test' # 'uds_sandbox'
self.tenant_venue = 'DEV' # 'DEV1' # 'dev'
self.collection_name = 'KKK-09' # 'uds_collection' # 'sbx_collection'
self.collection_version = '24.03.20.14.40'.replace('.', '') # '2402011200'
self.collection_version = '001'
return

def test_get_single_granule_01(self):
os.environ['ES_URL'] = 'vpc-uds-sbx-cumulus-es-qk73x5h47jwmela5nbwjte4yzq.us-west-2.es.amazonaws.com'
os.environ['ES_PORT'] = '9200'
collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}___{self.collection_version}'
granule_id = f'{collection_id}:test_file09'

mock_feature = {
'archive_status': 'cnm_r_failed',
'archive_error_message': 'testing 1 2 3',
'archive_error_code': 'VALIDATION_ERROR',

"type": "Feature",
"stac_version": "1.0.0",
"id": "URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001:test_file09",
"properties": {
"datetime": "2024-11-26T23:37:15.288000Z",
"start_datetime": "2016-01-31T18:00:00.009000Z",
"end_datetime": "2016-01-31T19:59:59.991000Z",
"created": "1970-01-01T00:00:00Z",
"updated": "2024-11-26T23:38:01.692000Z",
"status": "completed",
"provider": "unity",
},
"geometry": {
"type": "Point",
"coordinates": [
0.0,
0.0
]
},
"links": [
{
"rel": "collection",
"href": "."
}
],
"assets": {
"test_file09.nc": {
"href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001:test_file09/test_file09.nc",
"title": "test_file09.nc",
"description": "size=0;checksumType=md5;checksum=00000000000000000000000000000000;",
"file:size": 0,
"file:checksum": "00000000000000000000000000000000",
"roles": [
"data"
]
},
"test_file09.nc.cas": {
"href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001:test_file09/test_file09.nc.cas",
"title": "test_file09.nc.cas",
"description": "size=0;checksumType=md5;checksum=00000000000000000000000000000000;",
"file:size": 0,
"file:checksum": "00000000000000000000000000000000",
"roles": [
"metadata"
]
},
"test_file09.nc.stac.json": {
"href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001:test_file09/test_file09.nc.stac.json",
"title": "test_file09.nc.stac.json",
"description": "size=0;checksumType=md5;checksum=00000000000000000000000000000000;",
"file:size": 0,
"file:checksum": "00000000000000000000000000000000",
"roles": [
"metadata"
]
},
"test_file09.cmr.xml": {
"href": "s3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001/URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001:test_file09/test_file09.cmr.xml",
"title": "test_file09.cmr.xml",
"description": "size=1716;checksumType=md5;checksum=f842ba4e23e76ae81014a01c820b01f7;",
"file:size": 1716,
"file:checksum": "f842ba4e23e76ae81014a01c820b01f7",
"roles": [
"metadata"
]
}
},
"bbox": {
"type": "envelope",
"coordinates": [
[
-180.0,
90.0
],
[
180.0,
-90.0
]
]
},
"stac_extensions": [
"https://stac-extensions.github.io/file/v2.1.0/schema.json"
],
"collection": "URN:NASA:UNITY:UDS_LOCAL_TEST:DEV:KKK-09___001",
"event_time": 1732664287722
}

granules_db_index = GranulesDbIndex()
granules_db_index.add_entry(self.tenant, self.tenant_venue, mock_feature, granule_id)
granules_dapa_query = GranulesDapaQueryEs(collection_id, 1, None, None, None, None, f'localhost/api-prefix')
granules_result = granules_dapa_query.get_single_granule(granule_id)
print(granules_result)
sample = {
'type': 'Feature',
'stac_version': '1.0.0',
'id': 'URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53',
'properties': {'tag': '#sample', 'c_data1': [1, 10, 100, 1000], 'c_data2': [False, True, True, False, True], 'c_data3': ['Bellman Ford'], 'soil10': {'0_0': 0, '0_1': 1, '0_2': 0}, 'datetime': '2024-10-01T13:12:11.810000Z', 'start_datetime': '2016-01-31T18:00:00.009000Z', 'end_datetime': '2016-01-31T19:59:59.991000Z', 'created': '1970-01-01T00:00:00Z', 'updated': '2024-10-01T13:12:55.423000Z', 'status': 'completed', 'provider': 'unity', 'archive_status': 'cnm_r_failed', 'archive_error_message': '[{"uri": "https://uds-distribution-placeholder/uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53/abcd.1234.efgh.test_file-24.08.13.13.53.data.stac.json", "error": "mismatched size: 11 v. -1"}]', 'archive_error_code': 'VALIDATION_ERROR'},
'geometry': {'type': 'Point', 'coordinates': [0.0, 0.0]}, 'links': [{'rel': 'collection', 'href': '.'}, {'rel': 'self', 'href': '/Users/wphyo/Projects/unity/unity-data-services/tests/cumulus_lambda_functions/uds_api/dapa/localhost/api-prefix/collections/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/items/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53', 'type': 'application/json', 'title': 'URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53'}], 'assets': {'abcd.1234.efgh.test_file-24.08.13.13.53.data.stac.json': {'href': 's3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53/abcd.1234.efgh.test_file-24.08.13.13.53.data.stac.json', 'title': 'abcd.1234.efgh.test_file-24.08.13.13.53.data.stac.json', 'description': 'size=-1;checksumType=md5;checksum=unknown;', 'file:size': -1, 'file:checksum': 'unknown', 'roles': ['data']}, 'abcd.1234.efgh.test_file-24.08.13.13.53.cmr.xml': {'href': 's3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53/abcd.1234.efgh.test_file-24.08.13.13.53.cmr.xml', 'title': 'abcd.1234.efgh.test_file-24.08.13.13.53.cmr.xml', 'description': 'size=1812;checksumType=md5;checksum=38c9d99e56312b595faa5e99df30b175;', 'file:size': 1812, 'file:checksum': '38c9d99e56312b595faa5e99df30b175', 'roles': ['metadata']}, 'abcd.1234.efgh.test_file-24.08.13.13.53.nc.stac.json': {'href': 's3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53/abcd.1234.efgh.test_file-24.08.13.13.53.nc.stac.json', 'title': 'abcd.1234.efgh.test_file-24.08.13.13.53.nc.stac.json', 'description': 'size=-1;checksumType=md5;checksum=unknown;', 'file:size': -1, 'file:checksum': 'unknown', 'roles': ['metadata']}, 'abcd.1234.efgh.test_file-24.08.13.13.53.nc.cas': {'href': 's3://uds-sbx-cumulus-staging/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208:abcd.1234.efgh.test_file-24.08.13.13.53/abcd.1234.efgh.test_file-24.08.13.13.53.nc.cas', 'title': 'abcd.1234.efgh.test_file-24.08.13.13.53.nc.cas', 'description': 'size=-1;checksumType=md5;checksum=unknown;', 'file:size': -1, 'file:checksum': 'unknown', 'roles': ['metadata']}}, 'bbox': [-180.0, -90.0, 180.0, 90.0], 'stac_extensions': ['https://stac-extensions.github.io/file/v2.1.0/schema.json'], 'collection': 'URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2409301208'}
self.assertTrue('properties' in granules_result, 'missing properties in granule')
self.assertTrue('archive_status' in granules_result['properties'], 'missing archive_status in granule>properties')
self.assertTrue('archive_error_message' in granules_result['properties'], 'missing archive_error_message in granule>properties')
self.assertTrue('archive_error_code' in granules_result['properties'], 'missing archive_error_code in granule>properties')
self.assertFalse('archive_status' in granules_result, 'missing archive_status in granule')
self.assertFalse('archive_error_message' in granules_result, 'missing archive_error_message in granule')
self.assertFalse('archive_error_code' in granules_result, 'missing archive_error_code in granule')

return
2 changes: 1 addition & 1 deletion tests/integration_tests/test_uds_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def test_collections_get_single_granule(self):
return

def test_granules_get(self):
post_url = f'{self.uds_url}collections/URN:NASA:UNITY:UDS_MY_LOCAL_ARCHIVE_TEST:DEV:UDS_UNIT_COLLECTION___2408290522/items/' # MCP Dev
post_url = f'{self.uds_url}collections/urn:nasa:unity:unity:dev:SBG-L2A_RFL___1/items/' # MCP Dev
headers = {
'Authorization': f'Bearer {self.bearer_token}',
}
Expand Down

0 comments on commit 4d0de30

Please sign in to comment.