unity-sds · ngachung · Dec 16, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [9.4.0] - 2024-12-16
+### Changed
+- [#485](https://github.com/unity-sds/unity-data-services/pull/485) feat: delete granule endpoint
+
+## [9.3.0] - 2024-12-11
+### Changed
+- [#482](https://github.com/unity-sds/unity-data-services/pull/482) feat: updated name spaces and naming of resources
+
+## [9.2.0] - 2024-12-09
+### Changed
+- [#478](https://github.com/unity-sds/unity-data-services/pull/478) feat: uds lib update
+
+## [9.1.2] - 2024-12-09
+### Fixed
+- [#448](https://github.com/unity-sds/unity-data-services/pull/448) fix: wrong location archive keys
+
+## [9.1.1] - 2024-12-09
+### Fixed
+- [#475](https://github.com/unity-sds/unity-data-services/pull/475) fix: index to es is not setting bbox correctly
+
 ## [9.1.0] - 2024-12-03
 ### Changed
 - [#472](https://github.com/unity-sds/unity-data-services/pull/472) feat: amalgamation download type

diff --git a/cumulus_lambda_functions/cumulus_wrapper/query_granules.py b/cumulus_lambda_functions/cumulus_wrapper/query_granules.py
@@ -21,6 +21,7 @@ def __init__(self, cumulus_base: str, cumulus_token: str):
         super().__init__(cumulus_base, cumulus_token)
         self._conditions.append('status=completed')
         self._item_transformer = ItemTransformer()
+        self.__collection_id = None
 
     def with_filter(self, filter_key, filter_values: list):
         if len(filter_values) < 1:
@@ -34,6 +35,7 @@ def with_filter(self, filter_key, filter_values: list):
 
     def with_collection_id(self, collection_id: str):
         self._conditions.append(f'{self.__collection_id_key}={collection_id}')
+        self.__collection_id = collection_id
         return self
 
     def with_bbox(self):
@@ -130,6 +132,48 @@ def query_direct_to_private_api(self, private_api_prefix: str, transform=True):
             return {'server_error': f'error while invoking:{str(e)}'}
         return {'results': stac_list}
 
+    def delete_entry(self, private_api_prefix: str, granule_id: str):
+        payload = {
+            'httpMethod': 'DELETE',
+            'resource': '/{proxy+}',
+            'path': f'/{self.__granules_key}/{self.__collection_id}/{granule_id}',
+            'queryStringParameters': {**{k[0]: k[1] for k in [k1.split('=') for k1 in self._conditions]}},
+            # 'queryStringParameters': {'limit': '30'},
+            'headers': {
+                'Content-Type': 'application/json',
+            },
+            # 'body': json.dumps({"action": "removeFromCmr"})
+        }
+        LOGGER.debug(f'payload: {payload}')
+        try:
+            query_result = self._invoke_api(payload, private_api_prefix)
+            """
+        {'statusCode': 200, 'body': '{"meta":{"name":"cumulus-api","stack":"am-uds-dev-cumulus","table":"granule","limit":3,"page":1,"count":0},"results":[]}', 'headers': {'x-powered-by': 'Express', 'access-control-allow-origin': '*', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'application/json; charset=utf-8', 'content-length': '120', 'etag': 'W/"78-YdHqDNIH4LuOJMR39jGNA/23yOQ"', 'date': 'Tue, 07 Jun 2022 22:30:44 GMT', 'connection': 'close'}, 'isBase64Encoded': False}
+            """
+            if query_result['statusCode'] >= 500:
+                LOGGER.error(f'server error status code: {query_result.statusCode}. details: {query_result}')
+                return {'server_error': query_result}
+            if query_result['statusCode'] >= 400:
+                LOGGER.error(f'client error status code: {query_result.statusCode}. details: {query_result}')
+                return {'client_error': query_result}
+            query_result = json.loads(query_result['body'])
+            LOGGER.info(f'json query_result: {query_result}')
+            """
+            {
+              "detail": "Record deleted"
+            }
+            """
+            if 'detail' not in query_result:
+                LOGGER.error(f'missing key: detail. invalid response json: {query_result}')
+                return {'server_error': f'missing key: detail. invalid response json: {query_result}'}
+            if query_result['detail'] != 'Record deleted':
+                LOGGER.error(f'Wrong Message: {query_result}')
+                return {'server_error': f'Wrong Message: {query_result}'}
+        except Exception as e:
+            LOGGER.exception('error while invoking')
+            return {'server_error': f'error while invoking:{str(e)}'}
+        return {}
+
     def query(self, transform=True):
         conditions_str = '&'.join(self._conditions)
         LOGGER.info(f'cumulus_base: {self.cumulus_base}')

diff --git a/cumulus_lambda_functions/granules_to_es/granules_indexer.py b/cumulus_lambda_functions/granules_to_es/granules_indexer.py
@@ -47,7 +47,7 @@ def __get_potential_files(self):
         potential_files = []
         self.__input_file_list = self.__cumulus_record['files']
         for each_file in self.__input_file_list:
-            if 'type' in each_file and each_file['type'].strip().lower() != self.__valid_filetype_name:
+            if 'type' in each_file and self.__valid_filetype_name not in each_file['type'].strip().lower():
                 LOGGER.debug(f'Not metadata. skipping {each_file}')
                 continue
             if 'fileName' not in each_file and 'name' in each_file:  # add fileName if there is only name
@@ -93,6 +93,8 @@ def start(self):
         else:
             LOGGER.warning(f'unable to find STAC JSON file in {potential_files}')
         stac_item = ItemTransformer().to_stac(self.__cumulus_record)
+        if stac_input_meta is not None and stac_input_meta.bbox is not None:
+            stac_item['bbox'] = stac_input_meta.bbox
         if 'bbox' in stac_item:
             stac_item['bbox'] = GranulesDbIndex.to_es_bbox(stac_item['bbox'])
         collection_identifier = UdsCollections.decode_identifier(self.__cumulus_record['collectionId'])

diff --git a/cumulus_lambda_functions/lib/uds_db/granules_db_index.py b/cumulus_lambda_functions/lib/uds_db/granules_db_index.py
@@ -201,6 +201,23 @@ def get_entry(self, tenant: str, tenant_venue: str, doc_id: str, ):
             raise ValueError(f"no such granule: {doc_id}")
         return result
 
+    def delete_entry(self, tenant: str, tenant_venue: str, doc_id: str, ):
+        read_alias_name = f'{DBConstants.granules_read_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
+        result = self.__es.query({
+            'size': 9999,
+            'query': {'term': {'_id': doc_id}}
+        }, read_alias_name)
+        if result is None:
+            raise ValueError(f"no such granule: {doc_id}")
+        for each_granule in result['hits']['hits']:
+            delete_result = self.__es.delete_by_query({
+                'query': {'term': {'_id': each_granule['_id']}}
+            }, each_granule['_index'])
+            LOGGER.debug(f'delete_result: {delete_result}')
+            if delete_result is None:
+                raise ValueError(f"error deleting {each_granule}")
+        return result
+
     def update_entry(self, tenant: str, tenant_venue: str, json_body: dict, doc_id: str, ):
         write_alias_name = f'{DBConstants.granules_write_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
         json_body['event_time'] = TimeUtils.get_current_unix_milli()

diff --git a/cumulus_lambda_functions/metadata_stac_generate_cmr/stac_input_metadata.py b/cumulus_lambda_functions/metadata_stac_generate_cmr/stac_input_metadata.py
@@ -17,6 +17,20 @@ def __init__(self, input_stac_dict: dict):
         self.__prod_dt = None
         self.__insert_dt = None
         self.__custom_properties = {}
+        self.__bbox = None
+
+    @property
+    def bbox(self):
+        return self.__bbox
+
+    @bbox.setter
+    def bbox(self, val):
+        """
+        :param val:
+        :return: None
+        """
+        self.__bbox = val
+        return
 
     @property
     def custom_properties(self):
@@ -132,6 +146,7 @@ def start(self) -> GranuleMetadataProps:
         stac_item: Item = ItemTransformer().from_stac(self.__input_stac_dict)
         self.__custom_properties = deepcopy(stac_item.properties)
         self.__remove_default_keys_in_custom_props()
+        self.__bbox = stac_item.bbox
         # self.__custom_properties['collection_id'] = stac_item.collection_id  # TODO version is included
         # collection_led_granule_id = stac_item.id if stac_item.id.startswith(stac_item.collection_id) else f'{stac_item.collection_id}:{stac_item.id}'
         # self.__custom_properties['granule_id'] = collection_led_granule_id  # This needs to be start with collection_id to be consistent with cumulus granule_id which starts with collection

diff --git a/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py b/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py
@@ -75,6 +75,8 @@ def __get_time_range_terms(self):
             ]
 
     def __create_pagination_links(self, page_marker_str):
+        if self.__pagination_link_obj is None:
+            return []
         new_queries = deepcopy(self.__pagination_link_obj.org_query_params)
         new_queries['limit'] = int(new_queries['limit'] if 'limit' in new_queries else self.__limit)
         current_page = f"{self.__pagination_link_obj.requesting_base_url}?{'&'.join([f'{k}={v}' for k, v in new_queries.items()])}"
@@ -114,8 +116,20 @@ def archive_single_granule(self, granule_id):
         daac_archiver.send_to_daac_internal(cnm_response)
         return
 
+    def __restructure_each_granule_result(self, each_granules_query_result_stripped):
+        if 'event_time' in each_granules_query_result_stripped:
+            each_granules_query_result_stripped.pop('event_time')
+        if 'bbox' in each_granules_query_result_stripped:
+            each_granules_query_result_stripped['bbox'] = GranulesDbIndex.from_es_bbox(each_granules_query_result_stripped['bbox'])
+        for each_archiving_key in GranulesIndexMapping.archiving_keys:
+            if each_archiving_key in each_granules_query_result_stripped:
+                each_granules_query_result_stripped['properties'][each_archiving_key] = each_granules_query_result_stripped.pop(each_archiving_key)
+        return
+
     def get_single_granule(self, granule_id):
         granules_query_dsl = {
+            'size': 1,
+            'sort': [{'id': {'order': 'asc'}}],
             'query': {'bool': {'must': [{
                 'term': {'id': granule_id}
             }]}}
@@ -132,10 +146,7 @@ def get_single_granule(self, granule_id):
         each_granules_query_result_stripped = granules_query_result['hits']['hits'][0]['_source']
         self_link = Link(rel='self', target=f'{self.__base_url}/{WebServiceConstants.COLLECTIONS}/{self.__collection_id}/items/{each_granules_query_result_stripped["id"]}', media_type='application/json', title=each_granules_query_result_stripped["id"]).to_dict(False)
         each_granules_query_result_stripped['links'].append(self_link)
-        if 'event_time' in each_granules_query_result_stripped:
-            each_granules_query_result_stripped.pop('event_time')
-        if 'bbox' in each_granules_query_result_stripped:
-            each_granules_query_result_stripped['bbox'] = GranulesDbIndex.from_es_bbox(each_granules_query_result_stripped['bbox'])
+        self.__restructure_each_granule_result(each_granules_query_result_stripped)
         return each_granules_query_result_stripped
 
     def start(self):
@@ -152,13 +163,7 @@ def start(self):
             for each_granules_query_result_stripped in granules_query_result_stripped:
                 self_link = Link(rel='self', target=f'{self.__base_url}/{WebServiceConstants.COLLECTIONS}/{self.__collection_id}/items/{each_granules_query_result_stripped["id"]}', media_type='application/json', title=each_granules_query_result_stripped["id"]).to_dict(False)
                 each_granules_query_result_stripped['links'].append(self_link)
-                if 'event_time' in each_granules_query_result_stripped:
-                    each_granules_query_result_stripped.pop('event_time')
-                if 'bbox' in each_granules_query_result_stripped:
-                    each_granules_query_result_stripped['bbox'] = GranulesDbIndex.from_es_bbox(each_granules_query_result_stripped['bbox'])
-                for each_archiving_key in GranulesIndexMapping.archiving_keys:
-                    if each_archiving_key in each_granules_query_result_stripped:
-                        each_granules_query_result_stripped['properties'][each_archiving_key] = each_granules_query_result_stripped.pop(each_archiving_key)
+                self.__restructure_each_granule_result(each_granules_query_result_stripped)
             pagination_link = '' if len(granules_query_result['hits']['hits']) < self.__limit else ','.join(granules_query_result['hits']['hits'][-1]['sort'])
             return {
                 'statusCode': 200,

diff --git a/cumulus_lambda_functions/uds_api/granules_api.py b/cumulus_lambda_functions/uds_api/granules_api.py
@@ -2,6 +2,11 @@
 import os
 from typing import Union
 
+from mdps_ds_lib.lib.aws.aws_s3 import AwsS3
+from pystac import Item
+
+from cumulus_lambda_functions.cumulus_wrapper.query_granules import GranulesQuery
+
 from cumulus_lambda_functions.daac_archiver.daac_archiver_logic import DaacArchiverLogic
 from cumulus_lambda_functions.uds_api.dapa.daac_archive_crud import DaacArchiveCrud, DaacDeleteModel, DaacAddModel, \
     DaacUpdateModel
@@ -239,6 +244,51 @@ async def get_single_granule_dapa(request: Request, collection_id: str, granule_
         raise HTTPException(status_code=500, detail=str(e))
     return granules_result
 
+@router.delete("/{collection_id}/items/{granule_id}")
+@router.delete("/{collection_id}/items/{granule_id}/")
+async def delete_single_granule_dapa(request: Request, collection_id: str, granule_id: str):
+    authorizer: UDSAuthorizorAbstract = UDSAuthorizerFactory() \
+        .get_instance(UDSAuthorizerFactory.cognito,
+                      es_url=os.getenv('ES_URL'),
+                      es_port=int(os.getenv('ES_PORT', '443'))
+                      )
+    auth_info = FastApiUtils.get_authorization_info(request)
+    collection_identifier = UdsCollections.decode_identifier(collection_id)
+    if not authorizer.is_authorized_for_collection(DBConstants.delete, collection_id,
+                                                   auth_info['ldap_groups'],
+                                                   collection_identifier.tenant,
+                                                   collection_identifier.venue):
+        LOGGER.debug(f'user: {auth_info["username"]} is not authorized for {collection_id}')
+        raise HTTPException(status_code=403, detail=json.dumps({
+            'message': 'not authorized to execute this action'
+        }))
+    try:
+        LOGGER.debug(f'deleting granule: {granule_id}')
+        cumulus_lambda_prefix = os.getenv('CUMULUS_LAMBDA_PREFIX')
+        cumulus = GranulesQuery('https://na/dev', 'NA')
+        cumulus.with_collection_id(collection_id)
+        cumulus_delete_result = cumulus.delete_entry(cumulus_lambda_prefix, granule_id)  # TODO not sure it is correct granule ID
+        LOGGER.debug(f'cumulus_delete_result: {cumulus_delete_result}')
+        es_delete_result = GranulesDbIndex().delete_entry(collection_identifier.tenant,
+                                                          collection_identifier.venue,
+                                                          granule_id
+                                                          )
+        LOGGER.debug(f'es_delete_result: {es_delete_result}')
+        # es_delete_result = [Item.from_dict(k['_source']) for k in es_delete_result['hits']['hits']]
+        # if delete_files is False:
+        #     LOGGER.debug(f'Not deleting files as it is set to false in the request')
+        #     return {}
+        # s3 = AwsS3()
+        # for each_granule in es_delete_result:
+        #     s3_urls = [v.href for k, v in each_granule.assets.items()]
+        #     LOGGER.debug(f'deleting S3 for {each_granule.id} - s3_urls: {s3_urls}')
+        #     delete_result = s3.delete_multiple(s3_urls=s3_urls)
+        #     LOGGER.debug(f'delete_result for {each_granule.id} - delete_result: {delete_result}')
+    except Exception as e:
+        LOGGER.exception('failed during get_granules_dapa')
+        raise HTTPException(status_code=500, detail=str(e))
+    return {}
+
 
 @router.put("/{collection_id}/archive/{granule_id}")
 @router.put("/{collection_id}/archive/{granule_id}/")

diff --git a/requirements.txt b/requirements.txt
@@ -15,7 +15,7 @@ jsonschema==4.23.0
 jsonschema-specifications==2023.12.1
 lark==0.12.0
 mangum==0.18.0
-mdps-ds-lib==1.1.0
+mdps-ds-lib==1.1.1.dev000200
 pydantic==2.9.2
 pydantic_core==2.23.4
 pygeofilter==0.2.4

diff --git a/setup.py b/setup.py
@@ -12,7 +12,7 @@
 
 setup(
     name="cumulus_lambda_functions",
-    version="9.1.0",
+    version="9.4.0",
     packages=find_packages(),
     install_requires=install_requires,
     package_data={