diff --git a/cumulus_lambda_functions/lib/uds_db/granules_db_index.py b/cumulus_lambda_functions/lib/uds_db/granules_db_index.py index 9ace9ce..da5c060 100644 --- a/cumulus_lambda_functions/lib/uds_db/granules_db_index.py +++ b/cumulus_lambda_functions/lib/uds_db/granules_db_index.py @@ -236,18 +236,21 @@ def add_entry(self, tenant: str, tenant_venue: str, json_body: dict, doc_id: str def dsl_search(self, tenant: str, tenant_venue: str, search_dsl: dict): read_alias_name = f'{DBConstants.granules_read_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip() - if 'sort' not in search_dsl: - search_result = self.__es.query(search_dsl, - querying_index=read_alias_name) if 'sort' in search_dsl else self.__es.query( - search_dsl, querying_index=read_alias_name) + if 'sort' not in search_dsl: # We cannot paginate w/o sort. So, max is 10k items: + # This also assumes "size" should be part of search_dsl + search_result = self.__es.query(search_dsl, querying_index=read_alias_name) LOGGER.debug(f'search_finished: {len(search_result["hits"]["hits"])}') return search_result + # we can run paginate search original_size = search_dsl['size'] if 'size' in search_dsl else 20 + total_size = -1 result = [] duplicates = set([]) while len(result) < original_size: search_dsl['size'] = (original_size - len(result)) * 2 - search_result = self.__es.query_pages(search_dsl, querying_index=read_alias_name) if 'sort' in search_dsl else self.__es.query(search_dsl, querying_index=read_alias_name) + search_result = self.__es.query_pages(search_dsl, querying_index=read_alias_name) + if total_size != -1: + total_size = self.__es.get_result_size(search_result) if len(search_result['hits']['hits']) < 1: break for each in search_result['hits']['hits']: @@ -257,10 +260,12 @@ def dsl_search(self, tenant: str, tenant_venue: str, search_dsl: dict): search_dsl['search_after'] = search_result['hits']['hits'][-1]['sort'] LOGGER.debug(f'search_finished: {len(result)}') + if len(result) > original_size: + result = result[:original_size] return { 'hits': { "total": { - "value": len(result) + "value": total_size, }, 'hits': result } diff --git a/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py b/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py index 1685f90..706c807 100644 --- a/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py +++ b/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py @@ -39,7 +39,7 @@ def __generate_es_dsl(self): if self.__filter_input is not None: query_terms.append(CqlParser('properties').transform(self.__filter_input)) query_dsl = { - 'track_total_hits': True, + 'track_total_hits': self.__offset is None, 'size': self.__limit, # "collapse": {"field": "id"}, 'sort': [ @@ -228,11 +228,11 @@ def start(self): each_granules_query_result_stripped['links'].append(self_link) self.__restructure_each_granule_result(each_granules_query_result_stripped) - pagination_link = '' if len(granules_query_result['hits']['hits']) < self.__limit else ','.join([k if isinstance(k, str) else str(k) for k in granules_query_result['hits']['hits'][-1]['sort']]) + pagination_link = '' if len(granules_query_result['hits']['hits']) < 1 else ','.join([k if isinstance(k, str) else str(k) for k in granules_query_result['hits']['hits'][-1]['sort']]) return { 'statusCode': 200, 'body': { - 'numberMatched': {'total_size': result_size}, + 'numberMatched': {'total_size': -1 if self.__offset is not None else result_size}, 'numberReturned': len(granules_query_result['hits']['hits']), 'stac_version': '1.0.0', 'type': 'FeatureCollection', # TODO correct name? diff --git a/tests/cumulus_lambda_functions/uds_api/dapa/test_granules_dapa_query_es.py b/tests/cumulus_lambda_functions/uds_api/dapa/test_granules_dapa_query_es.py index 9cc3fdc..9d687d5 100644 --- a/tests/cumulus_lambda_functions/uds_api/dapa/test_granules_dapa_query_es.py +++ b/tests/cumulus_lambda_functions/uds_api/dapa/test_granules_dapa_query_es.py @@ -22,6 +22,25 @@ def setUp(self) -> None: self.collection_version = '001' return + def test_start_01(self): + os.environ['ES_URL'] = 'vpc-uds-sbx-cumulus-es-qk73x5h47jwmela5nbwjte4yzq.us-west-2.es.amazonaws.com' + os.environ['ES_PORT'] = '9200' + self.tenant = 'UDS_LOCAL_TEST_3' # 'uds_local_test' # 'uds_sandbox' + self.tenant_venue = 'DEV' # 'DEV1' # 'dev' + self.collection_name = 'DDD-01' # 'uds_collection' # 'sbx_collection' + self.collection_version = '001' + + collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}___{self.collection_version}' + granule_id = f'{collection_id}:test_file09' + + granules_dapa_query = GranulesDapaQueryEs(collection_id, 10, '1736291597733,URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file10', None, None, None, f'localhost/api-prefix') + granules_result = granules_dapa_query.start() + print(granules_result) + print([k['id'] for k in granules_result['body']['features']]) + # ['URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file20', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file19', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file14', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file17', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file18', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file12', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file13', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file15', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file06', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file01'] + # ['URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file05', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file03', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file09', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file16', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file11', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file04', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file08', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file02', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file07', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file10'] + return + def test_get_single_granule_01(self): os.environ['ES_URL'] = 'vpc-uds-sbx-cumulus-es-qk73x5h47jwmela5nbwjte4yzq.us-west-2.es.amazonaws.com' os.environ['ES_PORT'] = '9200' diff --git a/tests/integration_tests/test_uds_api.py b/tests/integration_tests/test_uds_api.py index 8ad1646..c503444 100644 --- a/tests/integration_tests/test_uds_api.py +++ b/tests/integration_tests/test_uds_api.py @@ -184,8 +184,8 @@ def test_collections_get_single_granule(self): return def test_granules_get(self): - # post_url = f'{self.uds_url}collections/urn:nasa:unity:unity:dev:SBG-L2A_RFL___1/items/' # MCP Dev - post_url = f'{self.uds_url}collections/urn:nasa:unity:asips:int:P1590011-T___1/items/' # MCP OPS + post_url = f'{self.uds_url}collections/URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001/items/?limit=10' # MCP Dev + # post_url = f'{self.uds_url}collections/urn:nasa:unity:asips:int:P1590011-T___1/items/?limit=10' # MCP OPS headers = { 'Authorization': f'Bearer {self.bearer_token}', } @@ -195,6 +195,7 @@ def test_granules_get(self): ) response_json = json.loads(query_result.text) print(json.dumps(response_json, indent=4)) + print(f"length: {len(response_json['features'])}") self.assertEqual(query_result.status_code, 200, f'wrong status code. {query_result.text}') links = {k['rel']: k['href'] for k in response_json['links'] if k['rel'] != 'root'} for k, v in links.items():