From a605a529acc1835a97addd8988c636f480027324 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 31 Aug 2022 08:58:15 -0700 Subject: [PATCH 01/24] chore: move the unit test --- .../lib/cognito_login/test_cognito_token_retriever.py | 0 tests/lib/__init__.py | 0 tests/lib/cognito_login/__init__.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/{ => cumulus_lambda_functions}/lib/cognito_login/test_cognito_token_retriever.py (100%) delete mode 100644 tests/lib/__init__.py delete mode 100644 tests/lib/cognito_login/__init__.py diff --git a/tests/lib/cognito_login/test_cognito_token_retriever.py b/tests/cumulus_lambda_functions/lib/cognito_login/test_cognito_token_retriever.py similarity index 100% rename from tests/lib/cognito_login/test_cognito_token_retriever.py rename to tests/cumulus_lambda_functions/lib/cognito_login/test_cognito_token_retriever.py diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/lib/cognito_login/__init__.py b/tests/lib/cognito_login/__init__.py deleted file mode 100644 index e69de29b..00000000 From 4f7e60de4fa37c94aa6b5d3d6c4714bd0a56c7a3 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 31 Aug 2022 10:25:53 -0700 Subject: [PATCH 02/24] feat: add create collection for cumulus --- .../cumulus_wrapper/query_collections.py | 30 ++++++++ .../cumulus_wrapper/__init__.py | 0 .../cumulus_wrapper/test_query_collection.py | 71 +++++++++++++++++++ 3 files changed, 101 insertions(+) create mode 100644 tests/cumulus_lambda_functions/cumulus_wrapper/__init__.py create mode 100644 tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py diff --git a/cumulus_lambda_functions/cumulus_wrapper/query_collections.py b/cumulus_lambda_functions/cumulus_wrapper/query_collections.py index 8788b52c..827a31e8 100644 --- a/cumulus_lambda_functions/cumulus_wrapper/query_collections.py +++ b/cumulus_lambda_functions/cumulus_wrapper/query_collections.py @@ -46,6 +46,36 @@ def get_size(self, private_api_prefix: str): total_size = query_result['meta']['count'] return {'total_size': total_size} + def create_creation(self, new_collection: dict, private_api_prefix: str): + payload = { + 'httpMethod': 'POST', + 'resource': '/{proxy+}', + 'path': f'/{self.__collections_key}', + 'headers': { + 'Content-Type': 'application/json', + }, + 'body': json.dumps(new_collection) + } + LOGGER.debug(f'payload: {payload}') + try: + query_result = self._invoke_api(payload, private_api_prefix) + """ + {'statusCode': 500, 'body': '', 'headers': {}} + """ + if query_result['statusCode'] >= 500: + LOGGER.error(f'server error status code: {query_result["statusCode"]}. details: {query_result}') + return {'server_error': query_result} + if query_result['statusCode'] >= 400: + LOGGER.error(f'client error status code: {query_result["statusCode"]}. details: {query_result}') + return {'client_error': query_result} + query_result = json.loads(query_result['body']) + LOGGER.debug(f'json query_result: {query_result}') + if 'message' not in query_result: + return {'server_error': f'invalid response: {query_result}'} + except Exception as e: + LOGGER.exception('error while invoking') + return {'server_error': f'error while invoking:{str(e)}'} + return {'status': query_result['message']} def __get_stats(self, collection_id, private_api_prefix: str): payload = { diff --git a/tests/cumulus_lambda_functions/cumulus_wrapper/__init__.py b/tests/cumulus_lambda_functions/cumulus_wrapper/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py b/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py new file mode 100644 index 00000000..ebbbc2af --- /dev/null +++ b/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py @@ -0,0 +1,71 @@ +from datetime import datetime +from unittest import TestCase + +from cumulus_lambda_functions.cumulus_wrapper.query_collections import CollectionsQuery + + +class TestQueryCollection(TestCase): + def test_01(self): + lambda_prefix = 'am-uds-dev-cumulus' + collection_query = CollectionsQuery('NA', 'NA') + collection_version = int(datetime.utcnow().timestamp()) + sample_collection = { + "name": "UNITY_CUMULUS_DEV_UNIT_TEST", + "version": str(collection_version), + "process": "modis", + # "dataType": "MOD09GQ", + "duplicateHandling": "skip", + # "provider_path": "cumulus-test-data/pdrs", + "granuleId": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0$", + "granuleIdExtraction": "(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+", + "url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}", + "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS", + "files": [ + { + "bucket": "internal", + "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00\\.PDS$", + "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000000.PDS", + "type": "data", + "reportToEms": True + }, + { + "bucket": "internal", + "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS$", + "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS", + "reportToEms": True, + "type": "metadata" + }, + { + "bucket": "internal", + "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS\\.xml$", + "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS.xml", + "reportToEms": True, + "type": "metadata" + }, + { + "bucket": "internal", + "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00.PDS.cmr.xml$", + "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000000.PDS.cmr.xml", + "reportToEms": True, + "type": "metadata" + } + ], + } + # sample_collection = { + # "createdAt": 1647992847582, + # "reportToEms": True, + # "updatedAt": 1647992847582, + # "timestamp": 1647992849273 + # } + response = collection_query.create_creation(sample_collection, lambda_prefix) + self.assertTrue('status' in response, f'status not in response: {response}') + self.assertEqual('Record saved', response['status'], f'wrong status: {response}') + return + + def test_02(self): + lambda_prefix = 'am-uds-dev-cumulus' + collection_query = CollectionsQuery('NA', 'NA') + + collections = collection_query.query_direct_to_private_api(lambda_prefix) + self.assertTrue('results' in collections, f'results not in collections: {collections}') + return From b19cfce9a3235d8f419959188cb0fcc18ddf089e Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 31 Aug 2022 13:21:28 -0700 Subject: [PATCH 03/24] feat: adding stac to cumulus for json --- .../cumulus_stac/collection_transformer.py | 81 ++++++++++++++++--- .../upload_granules.py | 7 +- setup.py | 1 + .../test_collection_transformer.py | 8 +- 4 files changed, 78 insertions(+), 19 deletions(-) diff --git a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py index fd608660..f4f82284 100644 --- a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py +++ b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py @@ -1,7 +1,9 @@ import json from datetime import datetime -from urllib.parse import quote_plus +from urllib.parse import quote_plus, urlparse, unquote_plus +import pystac +from pystac import Link from cumulus_lambda_functions.cumulus_stac.stac_transformer_abstract import StacTransformerAbstract @@ -281,11 +283,12 @@ class CollectionTransformer(StacTransformerAbstract): - def __init__(self): + def __init__(self, report_to_ems:bool = True): self.__stac_collection_schema = json.loads(STAC_COLLECTION_SCHEMA) self.__cumulus_collection_schema = {} + self.__report_to_ems = report_to_ems - def __convert_to_stac_links(self, collection_file_obj: dict): + def __convert_to_stac_links(self, collection_file_obj: dict, rel_type: str = 'item'): """ expected output { @@ -310,7 +313,7 @@ def __convert_to_stac_links(self, collection_file_obj: dict): if collection_file_obj is None: return {} stac_link = { - 'rel': 'item', + 'rel': rel_type, } if 'type' in collection_file_obj: stac_link['type'] = collection_file_obj['type'] @@ -418,14 +421,70 @@ def to_stac(self, source: dict) -> dict: "process": [source['process'] if 'process' in source else ''], "totalGranules": [source['total_size'] if 'total_size' in source else -1], }, - "links": [{ - "rel": "root", - "type": "application/json", - "title": f"{source['name']}___{source['version']}", - "href": "./collection.json" - }] + [self.__convert_to_stac_links(k) for k in source['files']], + "links": [self.__convert_to_stac_links({ + "regex": source['url_path'], + "sampleFileName": source['sampleFileName'], + "type": "application/json", + + }, 'root')] + [self.__convert_to_stac_links(k) for k in source['files']], } return stac_collection + def get_href(self, input_href: str): + parse_result = urlparse(input_href) + if parse_result.query == '': + return '' + query_dict = [k.split('=') for k in parse_result.query.split('&')] + query_dict = {k[0]: unquote_plus(k[1]) for k in query_dict} + return query_dict + + def __convert_from_stac_links(self, link_obj: dict): + output_file_object = { + 'reportToEms': self.__report_to_ems + } + if 'type' in link_obj: + output_file_object['type'] = link_obj['type'] + if 'title' in link_obj: + output_file_object['sampleFileName'] = link_obj['title'] + if 'href' in link_obj: + href_dict = self.get_href(link_obj['href']) + if 'bucket' in href_dict: + output_file_object['bucket'] = href_dict['bucket'] + if 'regex' in href_dict: + output_file_object['regex'] = href_dict['regex'] + return output_file_object + def from_stac(self, source: dict) -> dict: - return {} + input_dapa_collection = pystac.Collection.from_dict(source) + if not input_dapa_collection.validate(): + raise ValueError(f'invalid source dapa: {input_dapa_collection}') + output_collection_cumulus = { + # "createdAt": 1647992847582, + "reportToEms": self.__report_to_ems, + "duplicateHandling": "skip", + # "updatedAt": 1647992847582, + # "timestamp": 1647992849273 + } + summaries = input_dapa_collection.summaries.lists + if 'granuleId' in summaries: + output_collection_cumulus['granuleId'] = summaries['granuleId'][0] + if 'granuleIdExtraction' in summaries: + output_collection_cumulus['granuleIdExtraction'] = summaries['granuleIdExtraction'][0] + if 'process' in summaries: + output_collection_cumulus['process'] = summaries['process'][0] + name_version = input_dapa_collection.id.split('___') + output_collection_cumulus['name'] = name_version[0] + output_collection_cumulus['version'] = name_version[1] + output_files = [] + for each_link_obj in input_dapa_collection.links: + each_link_obj: Link = each_link_obj + each_file_obj = self.__convert_from_stac_links(each_link_obj.to_dict()) + if each_link_obj.rel == 'root': + if 'regex' in each_file_obj: + output_collection_cumulus['url_path'] = each_file_obj['regex'] + if 'sampleFileName' in each_file_obj: + output_collection_cumulus['sampleFileName'] = each_file_obj['sampleFileName'] + else: + output_files.append(each_file_obj) + output_collection_cumulus['files'] = output_files + return output_collection_cumulus diff --git a/cumulus_lambda_functions/cumulus_upload_granules/upload_granules.py b/cumulus_lambda_functions/cumulus_upload_granules/upload_granules.py index 1c5a79f1..7d22c649 100644 --- a/cumulus_lambda_functions/cumulus_upload_granules/upload_granules.py +++ b/cumulus_lambda_functions/cumulus_upload_granules/upload_granules.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse, unquote_plus from cumulus_lambda_functions.cumulus_dapa_client.dapa_client import DapaClient +from cumulus_lambda_functions.cumulus_stac.collection_transformer import CollectionTransformer from cumulus_lambda_functions.lib.aws.aws_s3 import AwsS3 LOGGER = logging.getLogger(__name__) @@ -49,11 +50,7 @@ def __set_props_from_env(self): return self def __get_href(self, input_href: str): - parse_result = urlparse(input_href) - if parse_result.query == '': - return '' - query_dict = [k.split('=') for k in parse_result.query.split('&')] - query_dict = {k[0]: unquote_plus(k[1]) for k in query_dict} + query_dict = CollectionTransformer().get_href(input_href) if 'regex' not in query_dict: raise ValueError(f'missing regex in {input_href}') return query_dict['regex'] diff --git a/setup.py b/setup.py index e7979f1a..0a7287a0 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ ] flask_requires = [ + 'pystac', 'jsonschema', 'flask===2.0.1', 'flask_restful===0.3.9', 'flask-restx===0.5.0', # to create Flask server 'gevent===21.8.0', 'greenlet===1.1.1', # to run flask server 'werkzeug===2.0.1', diff --git a/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py b/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py index 9968ea66..094f9fd4 100644 --- a/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py +++ b/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py @@ -56,7 +56,7 @@ def test_01(self): "url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}", "timestamp": 1647992849273 } - raw = { + converted_stac = { "type": "Collection", "stac_version": "1.0.0", # "stac_extensions": [], @@ -82,6 +82,8 @@ def test_01(self): }, ] } - raw = CollectionTransformer().to_stac(source) - self.assertEqual(None, stac_validator.validate(raw), f'invalid stac format: {stac_validator}') + converted_stac = CollectionTransformer().to_stac(source) + self.assertEqual(None, stac_validator.validate(converted_stac), f'invalid stac format: {stac_validator}') + converted_cumulus = CollectionTransformer().from_stac(converted_stac) + self.assertEqual(1, 2, f'wrong converted_cumulus: {json.dumps(converted_cumulus, indent=4)}') return From a5d58ed8724d5fa55756d9461cbf26d5844244ec Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 31 Aug 2022 13:59:23 -0700 Subject: [PATCH 04/24] feat: finished transforming from stac to cumulus --- .../cumulus_stac/collection_transformer.py | 8 ++++++++ .../cumulus_stac/test_collection_transformer.py | 10 ++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py index f4f82284..301d5297 100644 --- a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py +++ b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py @@ -6,6 +6,7 @@ from pystac import Link from cumulus_lambda_functions.cumulus_stac.stac_transformer_abstract import StacTransformerAbstract +from cumulus_lambda_functions.lib.time_utils import TimeUtils STAC_COLLECTION_SCHEMA = '''{ "$schema": "http://json-schema.org/draft-07/schema#", @@ -487,4 +488,11 @@ def from_stac(self, source: dict) -> dict: else: output_files.append(each_file_obj) output_collection_cumulus['files'] = output_files + if len(input_dapa_collection.extent.temporal.intervals) > 0: + date_interval = input_dapa_collection.extent.temporal.intervals[0] + if len(date_interval) == 2: + if date_interval[0] is not None: + output_collection_cumulus['dateFrom'] = date_interval[0].strftime(TimeUtils.MMDD_FORMAT) + if date_interval[1] is not None: + output_collection_cumulus['dateTo'] = date_interval[1].strftime(TimeUtils.MMDD_FORMAT) return output_collection_cumulus diff --git a/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py b/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py index 094f9fd4..f988631d 100644 --- a/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py +++ b/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py @@ -51,7 +51,7 @@ def test_01(self): "granuleIdExtraction": "(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+", "reportToEms": True, "version": "001", - "duplicateHandling": "replace", + "duplicateHandling": "skip", "updatedAt": 1647992847582, "url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}", "timestamp": 1647992849273 @@ -85,5 +85,11 @@ def test_01(self): converted_stac = CollectionTransformer().to_stac(source) self.assertEqual(None, stac_validator.validate(converted_stac), f'invalid stac format: {stac_validator}') converted_cumulus = CollectionTransformer().from_stac(converted_stac) - self.assertEqual(1, 2, f'wrong converted_cumulus: {json.dumps(converted_cumulus, indent=4)}') + for k, v in source.items(): + if k in ['updatedAt', 'timestamp', 'createdAt']: + continue + self.assertTrue(k in converted_cumulus, f'missing {k}') + if k not in ['files', 'dateFrom', 'dateTo']: + self.assertEqual(v, converted_cumulus[k], f'wrong value for {k}') + self.assertEqual(sorted(json.dumps(source['files'])), sorted(json.dumps(converted_cumulus['files'])), f"wrong files content: {source['files']} vs. {converted_cumulus['files']}") return From 68328e0a5975117c7db68aeaa518304f14915f6e Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 31 Aug 2022 18:00:22 -0700 Subject: [PATCH 05/24] fix: try creating collection with min requirement --- .../cumulus_stac/collection_transformer.py | 18 ++++++++++++------ .../cumulus_wrapper/test_query_collection.py | 10 +++++----- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py index 301d5297..c62b7808 100644 --- a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py +++ b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py @@ -289,6 +289,14 @@ def __init__(self, report_to_ems:bool = True): self.__cumulus_collection_schema = {} self.__report_to_ems = report_to_ems + def generate_target_link_url(self, regex: str = None, bucket: str = None): + href_link = ['unknown_bucket', 'unknown_regex'] + if regex is not None and regex != '': + href_link[1] = regex + if bucket is not None and bucket != '': + href_link[0] = bucket + return f"./collection.json?bucket={href_link[0]}®ex={quote_plus(href_link[1])}" + def __convert_to_stac_links(self, collection_file_obj: dict, rel_type: str = 'item'): """ expected output @@ -320,12 +328,10 @@ def __convert_to_stac_links(self, collection_file_obj: dict, rel_type: str = 'it stac_link['type'] = collection_file_obj['type'] if 'sampleFileName' in collection_file_obj: stac_link['title'] = collection_file_obj['sampleFileName'] - href_link = ['unknown_bucket', 'unknown_regex'] - if 'bucket' in collection_file_obj: - href_link[0] = collection_file_obj['bucket'] - if 'regex' in collection_file_obj: - href_link[1] = collection_file_obj['regex'] - stac_link['href'] = f"./collection.json?bucket={href_link[0]}®ex={quote_plus(href_link[1])}" + stac_link['href'] = self.generate_target_link_url( + collection_file_obj['regex'] if 'regex' in collection_file_obj else None, + collection_file_obj['bucket'] if 'bucket' in collection_file_obj else None, + ) return stac_link # def to_pystac_link_obj(self, input_dict: dict): diff --git a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py b/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py index ebbbc2af..9c986f39 100644 --- a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py +++ b/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py @@ -10,15 +10,15 @@ def test_01(self): collection_query = CollectionsQuery('NA', 'NA') collection_version = int(datetime.utcnow().timestamp()) sample_collection = { + # "dataType": "MOD09GQ", + # "provider_path": "cumulus-test-data/pdrs", "name": "UNITY_CUMULUS_DEV_UNIT_TEST", "version": str(collection_version), - "process": "modis", - # "dataType": "MOD09GQ", - "duplicateHandling": "skip", - # "provider_path": "cumulus-test-data/pdrs", + # "process": "modis", + # "duplicateHandling": "skip", "granuleId": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0$", "granuleIdExtraction": "(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+", - "url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}", + # "url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}", "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS", "files": [ { From 3b0246b95bf25d569b3042629441f3d40c55d85f Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 31 Aug 2022 18:01:00 -0700 Subject: [PATCH 06/24] feat: add class to create dapa collection obj --- .../cumulus_collection_dapa_creation.py | 67 +++++++++++++++++++ .../cumulus_collections_dapa/__init__.py | 0 .../test_cumulus_collection_dapa_creation.py | 23 +++++++ 3 files changed, 90 insertions(+) create mode 100644 cumulus_lambda_functions/cumulus_collections_dapa/cumulus_collection_dapa_creation.py create mode 100644 tests/cumulus_lambda_functions/cumulus_collections_dapa/__init__.py create mode 100644 tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_collection_dapa_creation.py diff --git a/cumulus_lambda_functions/cumulus_collections_dapa/cumulus_collection_dapa_creation.py b/cumulus_lambda_functions/cumulus_collections_dapa/cumulus_collection_dapa_creation.py new file mode 100644 index 00000000..8aae1479 --- /dev/null +++ b/cumulus_lambda_functions/cumulus_collections_dapa/cumulus_collection_dapa_creation.py @@ -0,0 +1,67 @@ +from datetime import datetime + +import pystac +from pystac import Link, Collection, Extent, SpatialExtent, TemporalExtent, Summaries + +from cumulus_lambda_functions.cumulus_stac.collection_transformer import CollectionTransformer +from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator + +LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + + +class CumulusCollectionDapaCreation: + def __init__(self): + self.__id = '' + self.__granule_id_extraction_regex = '' + self.__process = '' + self.__collection_title = '' + self.__granule_id_regex = '' + self.__sample_filename = '' + self.__files = [] + self.__collection_transformer = CollectionTransformer() + + def with_title(self, title: str): + self.__collection_title = title + return self + + def with_process(self, process: str): + self.__process = process + return self + + def with_id(self, collection_id: str): + self.__id = collection_id + if '___' not in collection_id: + LOGGER.warning(f'no ID in {collection_id}. using 001') + self.__id = f'{self.__id}___001' + return self + + def with_graule_id_regex(self, granule_id_regex): + self.__granule_id_regex = granule_id_regex + return self + + def with_granule_id_extraction_regex(self, granule_id_extraction_regex): + self.__granule_id_extraction_regex = granule_id_extraction_regex + return self + + def add_file_type(self, title: str, regex: str, bucket: str, media_type: str, rel: str = 'item'): + if rel == 'root': + LOGGER.debug('updating media_type for rel = root') + media_type = 'application/json' + self.__files.append(Link(rel=rel, target=self.__collection_transformer.generate_target_link_url(regex, bucket), media_type=media_type, title=title)) + return self + + def start(self): + # TODO validate + stac_collection = Collection(id=self.__id, + description='TODO', + extent=Extent(SpatialExtent([0, 0, 0, 0]), + TemporalExtent([[datetime.utcnow(), datetime.utcnow()]])), + title=self.__collection_title, + summaries=Summaries({ + 'granuleId': [self.__granule_id_regex], + 'granuleIdExtraction': [self.__granule_id_extraction_regex], + 'process': [self.__process] + }), + ) + stac_collection.add_links(self.__files) + return stac_collection.to_dict(include_self_link=False) diff --git a/tests/cumulus_lambda_functions/cumulus_collections_dapa/__init__.py b/tests/cumulus_lambda_functions/cumulus_collections_dapa/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_collection_dapa_creation.py b/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_collection_dapa_creation.py new file mode 100644 index 00000000..17c9b4c9 --- /dev/null +++ b/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_collection_dapa_creation.py @@ -0,0 +1,23 @@ +import json +from datetime import datetime +from unittest import TestCase + +from cumulus_lambda_functions.cumulus_collections_dapa.cumulus_collection_dapa_creation import \ + CumulusCollectionDapaCreation + + +class TestCumulusCollectionDapaCreation(TestCase): + def test_01(self): + dapa_collection = CumulusCollectionDapaCreation()\ + .with_id(f'CUMULUS_DAPA_UNIT_TEST___{int(datetime.utcnow().timestamp())}')\ + .with_graule_id_regex("^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0$")\ + .with_granule_id_extraction_regex("(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+")\ + .with_title("P1570515ATMSSCIENCEAXT11344000000001.PDS")\ + .with_process('modis')\ + .add_file_type("P1570515ATMSSCIENCEAXT11344000000000.PDS.cmr.xml", "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00.PDS.cmr.xml$", 'internal', 'metadata', 'item') \ + .add_file_type("P1570515ATMSSCIENCEAXT11344000000001.PDS.xml", "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS\\.xml$", 'internal', 'metadata', 'item') \ + .add_file_type("P1570515ATMSSCIENCEAXT11344000000000.PDS", "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00\\.PDS$", 'internal', 'data', 'item') + + aa = dapa_collection.start() + print(json.dumps(aa, indent=4)) + return From 8bff47338e26381d58d9d49715d4596c11c97b6f Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 6 Sep 2022 13:59:45 -0700 Subject: [PATCH 07/24] fix: add class to create dapa collection object for unity --- .../unity_collection_stac.py} | 10 ++++++---- .../cumulus_wrapper/query_collections.py | 2 +- .../test_unity_collection_stac.py} | 15 +++++++++------ .../cumulus_wrapper/test_query_collection.py | 2 +- 4 files changed, 17 insertions(+), 12 deletions(-) rename cumulus_lambda_functions/{cumulus_collections_dapa/cumulus_collection_dapa_creation.py => cumulus_stac/unity_collection_stac.py} (88%) rename tests/cumulus_lambda_functions/{cumulus_collections_dapa/test_cumulus_collection_dapa_creation.py => cumulus_stac/test_unity_collection_stac.py} (65%) diff --git a/cumulus_lambda_functions/cumulus_collections_dapa/cumulus_collection_dapa_creation.py b/cumulus_lambda_functions/cumulus_stac/unity_collection_stac.py similarity index 88% rename from cumulus_lambda_functions/cumulus_collections_dapa/cumulus_collection_dapa_creation.py rename to cumulus_lambda_functions/cumulus_stac/unity_collection_stac.py index 8aae1479..df1245bd 100644 --- a/cumulus_lambda_functions/cumulus_collections_dapa/cumulus_collection_dapa_creation.py +++ b/cumulus_lambda_functions/cumulus_stac/unity_collection_stac.py @@ -1,6 +1,5 @@ from datetime import datetime -import pystac from pystac import Link, Collection, Extent, SpatialExtent, TemporalExtent, Summaries from cumulus_lambda_functions.cumulus_stac.collection_transformer import CollectionTransformer @@ -9,7 +8,7 @@ LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) -class CumulusCollectionDapaCreation: +class UnityCollectionStac: def __init__(self): self.__id = '' self.__granule_id_extraction_regex = '' @@ -54,7 +53,7 @@ def start(self): # TODO validate stac_collection = Collection(id=self.__id, description='TODO', - extent=Extent(SpatialExtent([0, 0, 0, 0]), + extent=Extent(SpatialExtent([[0, 0, 0, 0]]), TemporalExtent([[datetime.utcnow(), datetime.utcnow()]])), title=self.__collection_title, summaries=Summaries({ @@ -64,4 +63,7 @@ def start(self): }), ) stac_collection.add_links(self.__files) - return stac_collection.to_dict(include_self_link=False) + new_collection = stac_collection.to_dict(include_self_link=False) + if 'links' in new_collection and len(new_collection['links']) > 0 and new_collection['links'][0]['rel'] == 'root': + new_collection['links'][0]['href'] = './collection.json' + return new_collection diff --git a/cumulus_lambda_functions/cumulus_wrapper/query_collections.py b/cumulus_lambda_functions/cumulus_wrapper/query_collections.py index 827a31e8..b19eaf9e 100644 --- a/cumulus_lambda_functions/cumulus_wrapper/query_collections.py +++ b/cumulus_lambda_functions/cumulus_wrapper/query_collections.py @@ -46,7 +46,7 @@ def get_size(self, private_api_prefix: str): total_size = query_result['meta']['count'] return {'total_size': total_size} - def create_creation(self, new_collection: dict, private_api_prefix: str): + def create_collection(self, new_collection: dict, private_api_prefix: str): payload = { 'httpMethod': 'POST', 'resource': '/{proxy+}', diff --git a/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_collection_dapa_creation.py b/tests/cumulus_lambda_functions/cumulus_stac/test_unity_collection_stac.py similarity index 65% rename from tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_collection_dapa_creation.py rename to tests/cumulus_lambda_functions/cumulus_stac/test_unity_collection_stac.py index 17c9b4c9..df970581 100644 --- a/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_collection_dapa_creation.py +++ b/tests/cumulus_lambda_functions/cumulus_stac/test_unity_collection_stac.py @@ -2,13 +2,15 @@ from datetime import datetime from unittest import TestCase -from cumulus_lambda_functions.cumulus_collections_dapa.cumulus_collection_dapa_creation import \ - CumulusCollectionDapaCreation +import pystac +from cumulus_lambda_functions.cumulus_stac.unity_collection_stac import \ + UnityCollectionStac -class TestCumulusCollectionDapaCreation(TestCase): + +class TestUnityCollectionStac(TestCase): def test_01(self): - dapa_collection = CumulusCollectionDapaCreation()\ + dapa_collection = UnityCollectionStac()\ .with_id(f'CUMULUS_DAPA_UNIT_TEST___{int(datetime.utcnow().timestamp())}')\ .with_graule_id_regex("^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0$")\ .with_granule_id_extraction_regex("(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+")\ @@ -18,6 +20,7 @@ def test_01(self): .add_file_type("P1570515ATMSSCIENCEAXT11344000000001.PDS.xml", "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS\\.xml$", 'internal', 'metadata', 'item') \ .add_file_type("P1570515ATMSSCIENCEAXT11344000000000.PDS", "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00\\.PDS$", 'internal', 'data', 'item') - aa = dapa_collection.start() - print(json.dumps(aa, indent=4)) + stac_collection = dapa_collection.start() + validation_result = pystac.Collection.from_dict(stac_collection).validate() + self.assertTrue(isinstance(validation_result, list), f'wrong validation for : {json.dumps(stac_collection, indent=4)}. details: {validation_result}') return diff --git a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py b/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py index 9c986f39..7d2de4d0 100644 --- a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py +++ b/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py @@ -57,7 +57,7 @@ def test_01(self): # "updatedAt": 1647992847582, # "timestamp": 1647992849273 # } - response = collection_query.create_creation(sample_collection, lambda_prefix) + response = collection_query.create_collection(sample_collection, lambda_prefix) self.assertTrue('status' in response, f'status not in response: {response}') self.assertEqual('Record saved', response['status'], f'wrong status: {response}') return From 6850df5517ca9b170a3a636f6b2313aab133c2c4 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 6 Sep 2022 16:14:45 -0700 Subject: [PATCH 08/24] feat: add class to create STAC collection --- .../cumulus_create_collection_dapa.py | 57 +++++++++++++++++++ .../lambda_function.py | 6 ++ .../cumulus_stac/collection_transformer.py | 5 +- .../test_cumulus_create_collection_dapa.py | 32 +++++++++++ .../test_collection_transformer.py | 4 +- 5 files changed, 100 insertions(+), 4 deletions(-) create mode 100644 cumulus_lambda_functions/cumulus_collections_dapa/cumulus_create_collection_dapa.py create mode 100644 tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_create_collection_dapa.py diff --git a/cumulus_lambda_functions/cumulus_collections_dapa/cumulus_create_collection_dapa.py b/cumulus_lambda_functions/cumulus_collections_dapa/cumulus_create_collection_dapa.py new file mode 100644 index 00000000..6c2227ff --- /dev/null +++ b/cumulus_lambda_functions/cumulus_collections_dapa/cumulus_create_collection_dapa.py @@ -0,0 +1,57 @@ +import json +import os + +import pystac + +from cumulus_lambda_functions.cumulus_stac.collection_transformer import CollectionTransformer +from cumulus_lambda_functions.cumulus_wrapper.query_collections import CollectionsQuery +from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator + +LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + + +class CumulusCreateCollectionDapa: + def __init__(self, event): + self.__event = event + self.__request_body = None + self.__cumulus_collection_query = CollectionsQuery('', '') + self.__cumulus_lambda_prefix = os.getenv('CUMULUS_LAMBDA_PREFIX') + + def start(self): + if 'body' not in self.__event: + raise ValueError(f'missing body in {self.__event}') + self.__request_body = json.loads(self.__event['body']) + LOGGER.debug(f'request body: {self.__request_body}') + validation_result = pystac.Collection.from_dict(self.__request_body).validate() + if not isinstance(validation_result, list): + LOGGER.error(f'request body is not valid STAC collection: {validation_result}') + return { + 'statusCode': 500, + 'body': {'message': f'request body is not valid STAC Collection schema. check details', + 'details': validation_result} + } + try: + cumulus_collection_doc = CollectionTransformer().from_stac(self.__request_body) + creation_result = self.__cumulus_collection_query.create_collection(cumulus_collection_doc, self.__cumulus_lambda_prefix) + if 'status' not in creation_result: + return { + 'statusCode': 500, + 'body': { + 'message': {creation_result} + } + } + except Exception as e: + LOGGER.exception('error while creating new collection in Cumulus') + return { + 'statusCode': 500, + 'body': { + 'message': f'error while creating new collection in Cumulus. check details', + 'details': str(e) + } + } + return { + 'statusCode': 200, + 'body': { + 'message': creation_result + } + } diff --git a/cumulus_lambda_functions/cumulus_collections_dapa/lambda_function.py b/cumulus_lambda_functions/cumulus_collections_dapa/lambda_function.py index 459d4c7d..598fb32b 100644 --- a/cumulus_lambda_functions/cumulus_collections_dapa/lambda_function.py +++ b/cumulus_lambda_functions/cumulus_collections_dapa/lambda_function.py @@ -1,4 +1,5 @@ from cumulus_lambda_functions.cumulus_collections_dapa.cumulus_collections_dapa import CumulusCollectionsDapa +from cumulus_lambda_functions.cumulus_collections_dapa.cumulus_create_collection_dapa import CumulusCreateCollectionDapa from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator @@ -12,3 +13,8 @@ def lambda_handler(event, context): LambdaLoggerGenerator.remove_default_handlers() # TODO implement return CumulusCollectionsDapa(event).start() + + +def lambda_handler_ingestion(event, context): + LambdaLoggerGenerator.remove_default_handlers() + return CumulusCreateCollectionDapa(event).start() diff --git a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py index c62b7808..1232ec5c 100644 --- a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py +++ b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py @@ -284,10 +284,11 @@ class CollectionTransformer(StacTransformerAbstract): - def __init__(self, report_to_ems:bool = True): + def __init__(self, report_to_ems:bool = True, include_date_range=False): self.__stac_collection_schema = json.loads(STAC_COLLECTION_SCHEMA) self.__cumulus_collection_schema = {} self.__report_to_ems = report_to_ems + self.__include_date_range = include_date_range def generate_target_link_url(self, regex: str = None, bucket: str = None): href_link = ['unknown_bucket', 'unknown_regex'] @@ -496,7 +497,7 @@ def from_stac(self, source: dict) -> dict: output_collection_cumulus['files'] = output_files if len(input_dapa_collection.extent.temporal.intervals) > 0: date_interval = input_dapa_collection.extent.temporal.intervals[0] - if len(date_interval) == 2: + if len(date_interval) == 2 and self.__include_date_range is True: if date_interval[0] is not None: output_collection_cumulus['dateFrom'] = date_interval[0].strftime(TimeUtils.MMDD_FORMAT) if date_interval[1] is not None: diff --git a/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_create_collection_dapa.py b/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_create_collection_dapa.py new file mode 100644 index 00000000..f0e0b838 --- /dev/null +++ b/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_create_collection_dapa.py @@ -0,0 +1,32 @@ +import json +import os +from datetime import datetime +from unittest import TestCase + +from cumulus_lambda_functions.cumulus_collections_dapa.cumulus_create_collection_dapa import CumulusCreateCollectionDapa +from cumulus_lambda_functions.cumulus_stac.unity_collection_stac import UnityCollectionStac + + +class TestCumulusCreateCollectionDapa(TestCase): + def test_01(self): + dapa_collection = UnityCollectionStac() \ + .with_id(f'CUMULUS_DAPA_UNIT_TEST___{int(datetime.utcnow().timestamp())}') \ + .with_graule_id_regex("^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0$") \ + .with_granule_id_extraction_regex("(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+") \ + .with_title("P1570515ATMSSCIENCEAXT11344000000001.PDS") \ + .with_process('modis') \ + .add_file_type("P1570515ATMSSCIENCEAXT11344000000000.PDS.cmr.xml", + "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00.PDS.cmr.xml$", 'internal', 'metadata', 'item') \ + .add_file_type("P1570515ATMSSCIENCEAXT11344000000001.PDS.xml", + "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS\\.xml$", 'internal', 'metadata', 'item') \ + .add_file_type("P1570515ATMSSCIENCEAXT11344000000000.PDS", "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00\\.PDS$", + 'internal', 'data', 'item') + os.environ['CUMULUS_LAMBDA_PREFIX'] = 'am-uds-dev-cumulus' + stac_collection = dapa_collection.start() + event = { + 'body': json.dumps(stac_collection) + } + creation = CumulusCreateCollectionDapa(event).start() + self.assertTrue('statusCode' in creation, f'missing statusCode: {creation}') + self.assertEqual(200, creation['statusCode'], f'wrong statusCode: {creation}') + return diff --git a/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py b/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py index f988631d..af4a67e0 100644 --- a/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py +++ b/tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py @@ -82,9 +82,9 @@ def test_01(self): }, ] } - converted_stac = CollectionTransformer().to_stac(source) + converted_stac = CollectionTransformer(include_date_range=True).to_stac(source) self.assertEqual(None, stac_validator.validate(converted_stac), f'invalid stac format: {stac_validator}') - converted_cumulus = CollectionTransformer().from_stac(converted_stac) + converted_cumulus = CollectionTransformer(include_date_range=True).from_stac(converted_stac) for k, v in source.items(): if k in ['updatedAt', 'timestamp', 'createdAt']: continue From 00b3508a3c1a799c6e0d04c2ab7ed5f7785d1190 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 08:39:36 -0700 Subject: [PATCH 09/24] feat: update to python3.9 + new lambda for collection creation --- tf-module/unity-cumulus/main.tf | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tf-module/unity-cumulus/main.tf b/tf-module/unity-cumulus/main.tf index 33fd759d..d6cf6d60 100644 --- a/tf-module/unity-cumulus/main.tf +++ b/tf-module/unity-cumulus/main.tf @@ -38,7 +38,7 @@ resource "aws_lambda_function" "snpp_lvl0_generate_cmr" { function_name = "${var.prefix}-snpp_lvl0_generate_cmr" role = var.lambda_processing_role_arn handler = "cumulus_lambda_functions.snpp_lvl0_generate_cmr.lambda_function.lambda_handler" - runtime = "python3.7" + runtime = "python3.9" timeout = 300 environment { variables = { @@ -58,7 +58,7 @@ resource "aws_lambda_function" "snpp_lvl1_generate_cmr" { function_name = "${var.prefix}-snpp_lvl1_generate_cmr" role = var.lambda_processing_role_arn handler = "cumulus_lambda_functions.snpp_level1a_generate_cmr.lambda_function.lambda_handler" - runtime = "python3.7" + runtime = "python3.9" timeout = 300 environment { variables = { @@ -78,7 +78,7 @@ resource "aws_lambda_function" "cumulus_granules_dapa" { function_name = "${var.prefix}-cumulus_granules_dapa" role = var.lambda_processing_role_arn handler = "cumulus_lambda_functions.cumulus_granules_dapa.lambda_function.lambda_handler" - runtime = "python3.7" + runtime = "python3.9" timeout = 300 environment { @@ -100,7 +100,7 @@ resource "aws_lambda_function" "cumulus_collections_dapa" { function_name = "${var.prefix}-cumulus_collections_dapa" role = var.lambda_processing_role_arn handler = "cumulus_lambda_functions.cumulus_collections_dapa.lambda_function.lambda_handler" - runtime = "python3.7" + runtime = "python3.9" timeout = 300 environment { @@ -122,7 +122,7 @@ resource "aws_lambda_function" "cumulus_collections_ingest_cnm_dapa" { function_name = "${var.prefix}-cumulus_collections_ingest_cnm_dapa" role = var.lambda_processing_role_arn handler = "cumulus_lambda_functions.cumulus_granules_dapa_ingest_cnm.lambda_function.lambda_handler" - runtime = "python3.7" + runtime = "python3.9" timeout = 300 environment { @@ -132,6 +132,28 @@ resource "aws_lambda_function" "cumulus_collections_ingest_cnm_dapa" { } } + vpc_config { + subnet_ids = var.cumulus_lambda_subnet_ids + security_group_ids = local.security_group_ids_set ? var.security_group_ids : [aws_security_group.unity_cumulus_lambda_sg[0].id] + } + tags = var.tags +} + +resource "aws_lambda_function" "cumulus_collections_creation_dapa" { + filename = local.lambda_file_name + function_name = "${var.prefix}-cumulus_collections_creation_dapa" + role = var.lambda_processing_role_arn + handler = "cumulus_lambda_functions.cumulus_granules_dapa_ingest_cnm.lambda_function.lambda_handler_ingestion" + runtime = "python3.9" + timeout = 300 + + environment { + variables = { + LOG_LEVEL = var.log_level + CUMULUS_LAMBDA_PREFIX = var.prefix + } + } + vpc_config { subnet_ids = var.cumulus_lambda_subnet_ids security_group_ids = local.security_group_ids_set ? var.security_group_ids : [aws_security_group.unity_cumulus_lambda_sg[0].id] From 159e93a3dd8c4ced27916bcf7fd5b9db85376cf2 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 09:39:34 -0700 Subject: [PATCH 10/24] chore: python3.9 for github action --- .github/workflows/dockerbuild.yml | 2 +- .github/workflows/makefile.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dockerbuild.yml b/.github/workflows/dockerbuild.yml index fabbff99..dd824895 100644 --- a/.github/workflows/dockerbuild.yml +++ b/.github/workflows/dockerbuild.yml @@ -15,7 +15,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v3 with: - python-version: '3.7' + python-version: '3.9' - run: | # make file runnable, might not be necessary chmod +x "${GITHUB_WORKSPACE}/ci.cd/store_version.sh" diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index f08a2c48..882c8ae4 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v3 with: - python-version: '3.7' + python-version: '3.9' - run: | python3 "${GITHUB_WORKSPACE}/setup.py" install - run: | From ce6de70b8291e9b8560c044531aa4f7ec0465397 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 09:51:22 -0700 Subject: [PATCH 11/24] fix: ci/cd on develop branch with additional release postfix --- .github/workflows/makefile.yml | 6 +++--- ci.cd/create_aws_lambda_zip.sh | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index 882c8ae4..89ed74a4 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -2,9 +2,9 @@ name: Makefile CI on: push: - branches: [ main ] -# pull_request: -# branches: [ main ] + branches: [ main, develop ] + pull_request: + branches: [ develop ] env: ARTIFACT_BASE_NAME: cumulus_lambda_functions diff --git a/ci.cd/create_aws_lambda_zip.sh b/ci.cd/create_aws_lambda_zip.sh index 2a3ef1ea..ccef39e0 100644 --- a/ci.cd/create_aws_lambda_zip.sh +++ b/ci.cd/create_aws_lambda_zip.sh @@ -22,6 +22,19 @@ cp ${zip_file} build/ cd $project_root_dir/tf-module/unity-cumulus zip -9 ${terraform_zip_file} * **/* +# github.job +github_branch=${GITHUB_REF##*/} +github_job=${github.job} +echo $github_branch +echo github_job +software_version_trailing="" +if [["github_branch"=="main"]] +then + software_version_trailing="" +else + software_version_trailing="-job-${github_job}" + +fi software_version=`python3 ${project_root_dir}/setup.py --version` -echo "software_version=${software_version}" >> ${GITHUB_ENV} +echo "software_version=${software_version}${software_version_trailing}" >> ${GITHUB_ENV} cat ${GITHUB_ENV} From 829bcab16f5353821946bcc72d9ccb2c2b33b3dd Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 10:21:11 -0700 Subject: [PATCH 12/24] fix: testing different output number --- ci.cd/create_aws_lambda_zip.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ci.cd/create_aws_lambda_zip.sh b/ci.cd/create_aws_lambda_zip.sh index ccef39e0..62dd9ca6 100644 --- a/ci.cd/create_aws_lambda_zip.sh +++ b/ci.cd/create_aws_lambda_zip.sh @@ -24,11 +24,12 @@ zip -9 ${terraform_zip_file} * **/* # github.job github_branch=${GITHUB_REF##*/} +echo "branch: ${$github_branch}" github_job=${github.job} -echo $github_branch -echo github_job +echo "job: ${github_job}" +echo "run_id: ${github.run_id}" software_version_trailing="" -if [["github_branch"=="main"]] +if [["$github_branch"=="main"]] then software_version_trailing="" else From 27679e2872967280302d88d87b0129ed78c2b4c4 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 10:24:40 -0700 Subject: [PATCH 13/24] fix: testing different output number --- ci.cd/create_aws_lambda_zip.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ci.cd/create_aws_lambda_zip.sh b/ci.cd/create_aws_lambda_zip.sh index 62dd9ca6..7168e8f4 100644 --- a/ci.cd/create_aws_lambda_zip.sh +++ b/ci.cd/create_aws_lambda_zip.sh @@ -24,12 +24,13 @@ zip -9 ${terraform_zip_file} * **/* # github.job github_branch=${GITHUB_REF##*/} -echo "branch: ${$github_branch}" -github_job=${github.job} -echo "job: ${github_job}" -echo "run_id: ${github.run_id}" +echo "branch: ${github_branch}" +echo "GITHUB_ENV: ${GITHUB_ENV}" +github_job="TODO" +#github_job=${github.job} +#echo "run_id: ${github.run_id}" software_version_trailing="" -if [["$github_branch"=="main"]] +if ["$github_branch"=="main"] then software_version_trailing="" else From 7195e7b38ae84bba962e81fe8830e0f8b66b7b0c Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 10:27:09 -0700 Subject: [PATCH 14/24] fix: testing different output number --- ci.cd/create_aws_lambda_zip.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci.cd/create_aws_lambda_zip.sh b/ci.cd/create_aws_lambda_zip.sh index 7168e8f4..68cdb8a9 100644 --- a/ci.cd/create_aws_lambda_zip.sh +++ b/ci.cd/create_aws_lambda_zip.sh @@ -25,16 +25,16 @@ zip -9 ${terraform_zip_file} * **/* # github.job github_branch=${GITHUB_REF##*/} echo "branch: ${github_branch}" -echo "GITHUB_ENV: ${GITHUB_ENV}" +cat $GITHUB_ENV github_job="TODO" #github_job=${github.job} #echo "run_id: ${github.run_id}" software_version_trailing="" -if ["$github_branch"=="main"] +if ["$github_branch"=="main"]; then software_version_trailing="" else - software_version_trailing="-job-${github_job}" + software_version_trailing="-${github_branch}-${github_job}" fi software_version=`python3 ${project_root_dir}/setup.py --version` From 3d6a98fd19f42cf1d700a312dce04f7df02b4792 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 10:29:57 -0700 Subject: [PATCH 15/24] fix: testing different output number --- ci.cd/create_aws_lambda_zip.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci.cd/create_aws_lambda_zip.sh b/ci.cd/create_aws_lambda_zip.sh index 68cdb8a9..a360af23 100644 --- a/ci.cd/create_aws_lambda_zip.sh +++ b/ci.cd/create_aws_lambda_zip.sh @@ -26,11 +26,13 @@ zip -9 ${terraform_zip_file} * **/* github_branch=${GITHUB_REF##*/} echo "branch: ${github_branch}" cat $GITHUB_ENV +echo "DONE displaying $GITHUB_ENV" github_job="TODO" #github_job=${github.job} #echo "run_id: ${github.run_id}" software_version_trailing="" -if ["$github_branch"=="main"]; +main_branch="main" +if ["$github_branch"=="$main_branch"]; then software_version_trailing="" else From a5968a90d552d058ef3410d71acf495f8c661013 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 10:33:16 -0700 Subject: [PATCH 16/24] fix: testing different output number --- .github/workflows/makefile.yml | 4 ++++ ci.cd/create_aws_lambda_zip.sh | 7 ++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index 89ed74a4..669409f3 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -15,6 +15,10 @@ jobs: steps: - uses: actions/checkout@v3 + - name: Generate build number + uses: einaregilsson/build-number@v3 + with: + token: ${{secrets.github_token}} - uses: actions/setup-python@v3 with: python-version: '3.9' diff --git a/ci.cd/create_aws_lambda_zip.sh b/ci.cd/create_aws_lambda_zip.sh index a360af23..d5f62fe9 100644 --- a/ci.cd/create_aws_lambda_zip.sh +++ b/ci.cd/create_aws_lambda_zip.sh @@ -29,15 +29,12 @@ cat $GITHUB_ENV echo "DONE displaying $GITHUB_ENV" github_job="TODO" #github_job=${github.job} -#echo "run_id: ${github.run_id}" +echo "run_id: ${BUILD_NUMBER}" software_version_trailing="" main_branch="main" -if ["$github_branch"=="$main_branch"]; +if ["$github_branch"!="$main_branch"]; then - software_version_trailing="" -else software_version_trailing="-${github_branch}-${github_job}" - fi software_version=`python3 ${project_root_dir}/setup.py --version` echo "software_version=${software_version}${software_version_trailing}" >> ${GITHUB_ENV} From bbaf2c7ad675acbe57b0d3b93213db218e090da1 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 10:35:33 -0700 Subject: [PATCH 17/24] fix: testing different output number --- .github/workflows/makefile.yml | 4 ---- ci.cd/create_aws_lambda_zip.sh | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index 669409f3..89ed74a4 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -15,10 +15,6 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Generate build number - uses: einaregilsson/build-number@v3 - with: - token: ${{secrets.github_token}} - uses: actions/setup-python@v3 with: python-version: '3.9' diff --git a/ci.cd/create_aws_lambda_zip.sh b/ci.cd/create_aws_lambda_zip.sh index d5f62fe9..d24d276d 100644 --- a/ci.cd/create_aws_lambda_zip.sh +++ b/ci.cd/create_aws_lambda_zip.sh @@ -29,7 +29,7 @@ cat $GITHUB_ENV echo "DONE displaying $GITHUB_ENV" github_job="TODO" #github_job=${github.job} -echo "run_id: ${BUILD_NUMBER}" +echo "run_id: ${GITHUB_RUN_ID}" software_version_trailing="" main_branch="main" if ["$github_branch"!="$main_branch"]; From 38d148079cf4857e500731770fec36d171718af3 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 10:37:42 -0700 Subject: [PATCH 18/24] fix: testing different output number --- ci.cd/create_aws_lambda_zip.sh | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/ci.cd/create_aws_lambda_zip.sh b/ci.cd/create_aws_lambda_zip.sh index d24d276d..54edb8df 100644 --- a/ci.cd/create_aws_lambda_zip.sh +++ b/ci.cd/create_aws_lambda_zip.sh @@ -24,18 +24,13 @@ zip -9 ${terraform_zip_file} * **/* # github.job github_branch=${GITHUB_REF##*/} -echo "branch: ${github_branch}" -cat $GITHUB_ENV -echo "DONE displaying $GITHUB_ENV" -github_job="TODO" -#github_job=${github.job} -echo "run_id: ${GITHUB_RUN_ID}" software_version_trailing="" main_branch="main" -if ["$github_branch"!="$main_branch"]; +if [["$github_branch"!="$main_branch"]]; then - software_version_trailing="-${github_branch}-${github_job}" + software_version_trailing="-${github_branch}-${GITHUB_RUN_ID}" fi software_version=`python3 ${project_root_dir}/setup.py --version` +cat ${project_root_dir}/setup.py echo "software_version=${software_version}${software_version_trailing}" >> ${GITHUB_ENV} cat ${GITHUB_ENV} From 2e6ba525194d2b15c71b8e49ec93310b0e0efb54 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 11:14:53 -0700 Subject: [PATCH 19/24] fix: testing different output number --- .github/workflows/makefile.yml | 6 +++--- ci.cd/create_aws_lambda_zip.sh | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index 89ed74a4..72e24062 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -52,7 +52,7 @@ jobs: prerelease: false - name: Create PreRelease id: create_prerelease - if: ${{ contains(github.ref, 'main') }} +# if: ${{ contains(github.ref, 'main') }} uses: actions/create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token @@ -66,7 +66,7 @@ jobs: prerelease: true - name: Upload PreRelease Asset 1 id: upload-prerelease-asset-1 - if: ${{ contains(github.ref, 'main') }} +# if: ${{ contains(github.ref, 'main') }} uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -77,7 +77,7 @@ jobs: asset_content_type: application/zip - name: Upload PreRelease Asset 2 id: upload-prerelease-asset-2 - if: ${{ contains(github.ref, 'main') }} +# if: ${{ contains(github.ref, 'main') }} uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/ci.cd/create_aws_lambda_zip.sh b/ci.cd/create_aws_lambda_zip.sh index 54edb8df..0ddd0e7a 100644 --- a/ci.cd/create_aws_lambda_zip.sh +++ b/ci.cd/create_aws_lambda_zip.sh @@ -26,8 +26,10 @@ zip -9 ${terraform_zip_file} * **/* github_branch=${GITHUB_REF##*/} software_version_trailing="" main_branch="main" -if [["$github_branch"!="$main_branch"]]; +if ["$github_branch"="$main_branch"]; then + software_version="" +else software_version_trailing="-${github_branch}-${GITHUB_RUN_ID}" fi software_version=`python3 ${project_root_dir}/setup.py --version` From d85d28c773a77b8efde360e40edd72438f63cb02 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 11:17:18 -0700 Subject: [PATCH 20/24] fix: testing different output number --- ci.cd/create_aws_lambda_zip.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci.cd/create_aws_lambda_zip.sh b/ci.cd/create_aws_lambda_zip.sh index 0ddd0e7a..51052696 100644 --- a/ci.cd/create_aws_lambda_zip.sh +++ b/ci.cd/create_aws_lambda_zip.sh @@ -26,13 +26,12 @@ zip -9 ${terraform_zip_file} * **/* github_branch=${GITHUB_REF##*/} software_version_trailing="" main_branch="main" -if ["$github_branch"="$main_branch"]; +if [ "$github_branch" = "$main_branch" ]; then software_version="" else software_version_trailing="-${github_branch}-${GITHUB_RUN_ID}" fi software_version=`python3 ${project_root_dir}/setup.py --version` -cat ${project_root_dir}/setup.py echo "software_version=${software_version}${software_version_trailing}" >> ${GITHUB_ENV} cat ${GITHUB_ENV} From 384a3d2c5918f4d7e1cb54f843f8f77c676ed193 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 14:38:38 -0700 Subject: [PATCH 21/24] chore: add testcase.. in progress --- .../test_cumulus_create_collection_dapa.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_create_collection_dapa.py b/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_create_collection_dapa.py index f0e0b838..cf249c61 100644 --- a/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_create_collection_dapa.py +++ b/tests/cumulus_lambda_functions/cumulus_collections_dapa/test_cumulus_create_collection_dapa.py @@ -3,8 +3,12 @@ from datetime import datetime from unittest import TestCase +import requests + from cumulus_lambda_functions.cumulus_collections_dapa.cumulus_create_collection_dapa import CumulusCreateCollectionDapa from cumulus_lambda_functions.cumulus_stac.unity_collection_stac import UnityCollectionStac +from cumulus_lambda_functions.lib.cognito_login.cognito_token_retriever import CognitoTokenRetriever +from cumulus_lambda_functions.lib.constants import Constants class TestCumulusCreateCollectionDapa(TestCase): @@ -30,3 +34,39 @@ def test_01(self): self.assertTrue('statusCode' in creation, f'missing statusCode: {creation}') self.assertEqual(200, creation['statusCode'], f'wrong statusCode: {creation}') return + + def test_02(self): + os.environ[Constants.USERNAME] = '/unity/uds/user/wphyo/username' + os.environ[Constants.PASSWORD] = '/unity/uds/user/wphyo/dwssap' + os.environ[Constants.PASSWORD_TYPE] = Constants.PARAM_STORE + os.environ[Constants.CLIENT_ID] = '7a1fglm2d54eoggj13lccivp25' # JPL Cloud + + os.environ[Constants.COGNITO_URL] = 'https://cognito-idp.us-west-2.amazonaws.com' + bearer_token = CognitoTokenRetriever().start() + + post_url = 'https://k3a3qmarxh.execute-api.us-west-2.amazonaws.com/dev/am-uds-dapa/collections/' + headers = { + 'Authorization': f'Bearer {bearer_token}', + 'Content-Type': 'application/json', + } + + dapa_collection = UnityCollectionStac() \ + .with_id(f'CUMULUS_DAPA_UNIT_TEST___{int(datetime.utcnow().timestamp())}') \ + .with_graule_id_regex("^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0$") \ + .with_granule_id_extraction_regex("(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+") \ + .with_title("P1570515ATMSSCIENCEAXT11344000000001.PDS") \ + .with_process('modis') \ + .add_file_type("P1570515ATMSSCIENCEAXT11344000000000.PDS.cmr.xml", + "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00.PDS.cmr.xml$", 'internal', 'metadata', 'item') \ + .add_file_type("P1570515ATMSSCIENCEAXT11344000000001.PDS.xml", + "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS\\.xml$", 'internal', 'metadata', 'item') \ + .add_file_type("P1570515ATMSSCIENCEAXT11344000000000.PDS", "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00\\.PDS$", + 'internal', 'data', 'item') + stac_collection = dapa_collection.start() + + query_result = requests.post(url=post_url, + headers=headers, + json=stac_collection, + ) + self.assertEqual(query_result.status_code, 200, f'wrong status code. {query_result.text}') + return From ddef51387805e8877fa3709406df36271e6a5cdc Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 15:05:58 -0700 Subject: [PATCH 22/24] fix: wrong handler for new lambda --- tf-module/unity-cumulus/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf-module/unity-cumulus/main.tf b/tf-module/unity-cumulus/main.tf index d6cf6d60..480a517f 100644 --- a/tf-module/unity-cumulus/main.tf +++ b/tf-module/unity-cumulus/main.tf @@ -143,7 +143,7 @@ resource "aws_lambda_function" "cumulus_collections_creation_dapa" { filename = local.lambda_file_name function_name = "${var.prefix}-cumulus_collections_creation_dapa" role = var.lambda_processing_role_arn - handler = "cumulus_lambda_functions.cumulus_granules_dapa_ingest_cnm.lambda_function.lambda_handler_ingestion" + handler = "cumulus_lambda_functions.cumulus_collections_dapa.lambda_function.lambda_handler_ingestion" runtime = "python3.9" timeout = 300 From b2bbd4207d10f0ce78a9c3cc63e6d0282922d502 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 7 Sep 2022 15:14:58 -0700 Subject: [PATCH 23/24] fix: require libraries at the worng location --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index df01ea63..667856db 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,13 @@ from setuptools import find_packages, setup install_requires = [ + 'pystac', 'jsonschema', 'fastjsonschema', 'xmltodict', 'requests' ] flask_requires = [ - 'pystac', 'jsonschema', 'flask===2.0.1', 'flask_restful===0.3.9', 'flask-restx===0.5.0', # to create Flask server 'gevent===21.8.0', 'greenlet===1.1.1', # to run flask server 'werkzeug===2.0.1', From 347121d46c0744ad625704172c37f1effc24090e Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Thu, 8 Sep 2022 16:18:33 -0700 Subject: [PATCH 24/24] fix: add mock url if url_path is misisng --- .../cumulus_stac/collection_transformer.py | 2 +- .../cumulus_wrapper/test_query_collection.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py index 1232ec5c..a641da81 100644 --- a/cumulus_lambda_functions/cumulus_stac/collection_transformer.py +++ b/cumulus_lambda_functions/cumulus_stac/collection_transformer.py @@ -430,7 +430,7 @@ def to_stac(self, source: dict) -> dict: "totalGranules": [source['total_size'] if 'total_size' in source else -1], }, "links": [self.__convert_to_stac_links({ - "regex": source['url_path'], + "regex": source['url_path'] if 'url_path' in source else './collection.json', "sampleFileName": source['sampleFileName'], "type": "application/json", diff --git a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py b/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py index 7d2de4d0..7e089571 100644 --- a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py +++ b/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py @@ -65,7 +65,8 @@ def test_01(self): def test_02(self): lambda_prefix = 'am-uds-dev-cumulus' collection_query = CollectionsQuery('NA', 'NA') - + collection_query.with_limit(2) collections = collection_query.query_direct_to_private_api(lambda_prefix) self.assertTrue('results' in collections, f'results not in collections: {collections}') + self.assertEqual(2, len(collections['results']), f'wrong length: {collections}') return