Skip to content

Commit

Permalink
Merge pull request #527 from unity-sds/develop
Browse files Browse the repository at this point in the history
release/9.6.0
  • Loading branch information
ngachung authored Feb 3, 2025
2 parents b210a79 + ffe4493 commit f2fa8f2
Show file tree
Hide file tree
Showing 42 changed files with 968 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/makefile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- run: |
python3 "${GITHUB_WORKSPACE}/setup.py" install_lib
- run: |
python3 -m pip uninstall botocore boto3 -y
python3 -m pip uninstall botocore boto3 s3transfer -y
- run: |
# make file runnable, might not be necessary
chmod +x "${GITHUB_WORKSPACE}/ci.cd/create_aws_lambda_zip.sh"
Expand Down
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [9.6.0] - 2025-02-03
### Changed
- [#517](https://github.com/unity-sds/unity-data-services/pull/517) feat: stac browser oidc cookie

## [9.5.2] - 2025-01-31
### Fixed
- [#524](https://github.com/unity-sds/unity-data-services/pull/524) fix: default boto3 from aws already has s3transfer library

## [9.5.1] - 2025-01-17
### Fixed
- [#502](https://github.com/unity-sds/unity-data-services/pull/502) fix: get granules pagination

## [9.5.0] - 2025-01-17
### Changed
- [#499](https://github.com/unity-sds/unity-data-services/pull/499) feat: duplicate granules diff index

## [9.4.2] - 2025-01-17
### Fixed
- [#498](https://github.com/unity-sds/unity-data-services/pull/498) fix: restructure bbox for geoshape

## [9.4.1] - 2024-12-18
### Fixed
- [#489](https://github.com/unity-sds/unity-data-services/pull/489) fix: delete bug
Expand Down
121 changes: 98 additions & 23 deletions cumulus_lambda_functions/lib/uds_db/granules_db_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,25 @@ def __init__(self):

@staticmethod
def to_es_bbox(bbox_array):
# lon = x, lat = y
# lon, lat, lon, lat
# x can be 170 to -170
# 170, 0, -170, 10
minX, minY, maxX, maxY = bbox_array

# Ensure the values are properly sorted
# if minX > maxX:
# minX, maxX = maxX, minX
if minY > maxY:
minY, maxY = maxY, minY

return {
"type": "envelope",
"coordinates": [
[bbox_array[0], bbox_array[3]], # Top-left corner (minLon, maxLat)
[bbox_array[2], bbox_array[1]] # Bottom-right corner (maxLon, minLat)
]
"coordinates": [[minX, maxY], [maxX, minY]],
# "coordinates": [
# [bbox_array[0], bbox_array[3]], # Top-left corner (minLon, maxLat)
# [bbox_array[2], bbox_array[1]] # Bottom-right corner (maxLon, minLat)
# ]
}

@staticmethod
Expand Down Expand Up @@ -152,16 +165,20 @@ def create_new_index(self, tenant, tenant_venue, es_mapping: dict):
self.__es.swap_index_for_alias(write_perc_alias_name, current_perc_index_name, new_perc_index_name)
try:
self.__es.migrate_index_data(current_perc_index_name, new_perc_index_name)
except Exception as e:
except:
LOGGER.exception(f'failed to migrate index data: {(current_perc_index_name, new_perc_index_name)}')
return

def get_latest_index(self, tenant, tenant_venue):
def get_latest_index_name(self, tenant, tenant_venue):
write_alias_name = f'{DBConstants.granules_write_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
write_alias_name = self.__es.get_alias(write_alias_name)
if len(write_alias_name) != 1:
raise ValueError(f'missing index for {tenant}_{tenant_venue}. {write_alias_name}')
latest_index_name = [k for k in write_alias_name.keys()][0]
return latest_index_name

def get_latest_index(self, tenant, tenant_venue):
latest_index_name = self.get_latest_index_name(tenant, tenant_venue)
index_mapping = self.__es.get_index_mapping(latest_index_name)
if index_mapping is None:
raise ValueError(f'missing index: {latest_index_name}')
Expand Down Expand Up @@ -201,53 +218,106 @@ def get_entry(self, tenant: str, tenant_venue: str, doc_id: str, ):
raise ValueError(f"no such granule: {doc_id}")
return result

def delete_entry(self, tenant: str, tenant_venue: str, doc_id: str, ):
def __query_by_id_local(self, tenant: str, tenant_venue: str, doc_id: str, ):
read_alias_name = f'{DBConstants.granules_read_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
result = self.__es.query({
dsl = {
'size': 9999,
'query': {'term': {'_id': doc_id}}
}, read_alias_name)
if result is None:
raise ValueError(f"no such granule: {doc_id}")
for each_granule in result['hits']['hits']:
'sort': [
{'properties.datetime': {'order': 'desc'}},
{'id': {'order': 'asc'}}
],
'query': {
'term': {'_id': doc_id}
}
}
result = self.__es.query(dsl, read_alias_name)
if result is None or len(result['hits']['hits']) < 1:
return []
return result['hits']['hits']

def __delete_old_entries(self, dsl_result):
for each_granule in dsl_result:
LOGGER.debug(f"deleting {each_granule['_id']} from {each_granule['_index']}")
delete_result = self.__es.delete_by_query({
'query': {'term': {'id': each_granule['_id']}}
}, each_granule['_index'])
LOGGER.debug(f'delete_result: {delete_result}')
if delete_result is None:
raise ValueError(f"error deleting {each_granule}")
return

def delete_entry(self, tenant: str, tenant_venue: str, doc_id: str, ):
result = self.__query_by_id_local(tenant, tenant_venue, doc_id)
if len(result) < 1:
raise ValueError(f"no such granule: {doc_id}")
self.__delete_old_entries(result)
return result

def update_entry(self, tenant: str, tenant_venue: str, json_body: dict, doc_id: str, ):
# find existing doc_id
# if not found, throw error. Cannot update
# if found, check index.
# if latest index, proceed with update
# if older index, proceed with get + delete
# tweak meta locally, and add it.
write_alias_name = f'{DBConstants.granules_write_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
json_body['event_time'] = TimeUtils.get_current_unix_milli()
self.__es.update_one(json_body, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
LOGGER.debug(f'custom_metadata indexed')
existing_entries = self.__query_by_id_local(tenant, tenant_venue, doc_id)
if len(existing_entries) < 1:
raise ValueError(f'unable to update {doc_id} as it is not found. ')
latest_index_name = self.get_latest_index_name(tenant, tenant_venue)
existing_entry = existing_entries[0]
if existing_entry['_index'] == latest_index_name:
LOGGER.debug(f'{doc_id} in latest index: {latest_index_name}. continuing with update')
self.__es.update_one(json_body, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
self.__delete_old_entries(existing_entries[1:])
return
LOGGER.debug(f'{doc_id} in older index: {latest_index_name} v. {existing_entry["_index"]}')
new_doc = {**existing_entry['_source'], **json_body}
self.__es.index_one(new_doc, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
self.__delete_old_entries(existing_entries)
return

def add_entry(self, tenant: str, tenant_venue: str, json_body: dict, doc_id: str, ):
# find existing doc_id
# if not found, add it
# if found, and it is in latest index, add it.
# if found, and it is in older index, add current one, and delete the older one.

write_alias_name = f'{DBConstants.granules_write_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
json_body['event_time'] = TimeUtils.get_current_unix_milli()
# TODO validate custom metadata vs the latest index to filter extra items
existing_entries = self.__query_by_id_local(tenant, tenant_venue, doc_id)
if len(existing_entries) < 1:
self.__es.index_one(json_body, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
return
latest_index_name = self.get_latest_index_name(tenant, tenant_venue)
existing_entry = existing_entries[0]
if existing_entry['_index'] == latest_index_name:
self.__es.index_one(json_body, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
self.__delete_old_entries(existing_entries[1:])
return
self.__es.index_one(json_body, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
LOGGER.debug(f'custom_metadata indexed')
self.__delete_old_entries(existing_entries)
# TODO validate custom metadata vs the latest index to filter extra items
return

def dsl_search(self, tenant: str, tenant_venue: str, search_dsl: dict):
read_alias_name = f'{DBConstants.granules_read_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
if 'sort' not in search_dsl:
search_result = self.__es.query(search_dsl,
querying_index=read_alias_name) if 'sort' in search_dsl else self.__es.query(
search_dsl, querying_index=read_alias_name)
if 'sort' not in search_dsl: # We cannot paginate w/o sort. So, max is 10k items:
# This also assumes "size" should be part of search_dsl
search_result = self.__es.query(search_dsl, querying_index=read_alias_name)
LOGGER.debug(f'search_finished: {len(search_result["hits"]["hits"])}')
return search_result
# we can run paginate search
original_size = search_dsl['size'] if 'size' in search_dsl else 20
total_size = -999
result = []
duplicates = set([])
while len(result) < original_size:
search_dsl['size'] = (original_size - len(result)) * 2
search_result = self.__es.query_pages(search_dsl, querying_index=read_alias_name) if 'sort' in search_dsl else self.__es.query(search_dsl, querying_index=read_alias_name)
search_result = self.__es.query_pages(search_dsl, querying_index=read_alias_name)
if total_size == -999:
total_size = self.__es.get_result_size(search_result)
if len(search_result['hits']['hits']) < 1:
break
for each in search_result['hits']['hits']:
Expand All @@ -257,11 +327,16 @@ def dsl_search(self, tenant: str, tenant_venue: str, search_dsl: dict):
search_dsl['search_after'] = search_result['hits']['hits'][-1]['sort']

LOGGER.debug(f'search_finished: {len(result)}')
if len(result) > original_size:
result = result[:original_size]
return {
'hits': {
"total": {
"value": len(result)
"value": total_size,
},
'hits': result
}
}



Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __generate_es_dsl(self):
if self.__filter_input is not None:
query_terms.append(CqlParser('properties').transform(self.__filter_input))
query_dsl = {
'track_total_hits': True,
'track_total_hits': self.__offset is None,
'size': self.__limit,
# "collapse": {"field": "id"},
'sort': [
Expand Down Expand Up @@ -228,11 +228,11 @@ def start(self):
each_granules_query_result_stripped['links'].append(self_link)
self.__restructure_each_granule_result(each_granules_query_result_stripped)

pagination_link = '' if len(granules_query_result['hits']['hits']) < self.__limit else ','.join([k if isinstance(k, str) else str(k) for k in granules_query_result['hits']['hits'][-1]['sort']])
pagination_link = '' if len(granules_query_result['hits']['hits']) < 1 else ','.join([k if isinstance(k, str) else str(k) for k in granules_query_result['hits']['hits'][-1]['sort']])
return {
'statusCode': 200,
'body': {
'numberMatched': {'total_size': result_size},
'numberMatched': {'total_size': -1 if self.__offset is not None else result_size},
'numberReturned': len(granules_query_result['hits']['hits']),
'stac_version': '1.0.0',
'type': 'FeatureCollection', # TODO correct name?
Expand Down
7 changes: 7 additions & 0 deletions cumulus_lambda_functions/uds_api/misc_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ async def stac_entry(request: Request, response: Response):
@router.get(f'/stac_entry')
@router.get(f'/stac_entry/')
async def stac_entry(request: Request, response: Response):
"""
How to re-load UCS
https://github.com/unity-sds/unity-data-services/issues/381#issuecomment-2201165672
:param request:
:param response:
:return:
"""
request_headers = dict(request.headers)
LOGGER.debug(f'stac_entry - request_headers: {request_headers}')
print(request_headers)
Expand Down
2 changes: 1 addition & 1 deletion cumulus_lambda_functions/uds_api/stac_browser/index.html
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><meta id="meta-description" name="description" content=""><title>STAC Browser</title><script defer="defer" src="/to/be/replaced/stac_browser/js/chunk-vendors.eae96ced.js"></script><script defer="defer" src="/to/be/replaced/stac_browser/js/app.79583f94.js"></script><link href="/to/be/replaced/stac_browser/css/chunk-vendors.de510de6.css" rel="stylesheet"><link href="/to/be/replaced/stac_browser/css/app.b5efa536.css" rel="stylesheet"></head><body><noscript><strong>We're sorry but STAC Browser doesn't work properly without JavaScript enabled. Please enable it to continue.</strong></noscript><div id="stac-browser"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><meta id="meta-description" name="description" content=""><title>STAC Browser</title><script defer="defer" src="/to/be/replaced/stac_browser/js/chunk-vendors.eae96ced.js"></script><script defer="defer" src="/to/be/replaced/stac_browser/js/app.ad7851bb.js"></script><link href="/to/be/replaced/stac_browser/css/chunk-vendors.de510de6.css" rel="stylesheet"><link href="/to/be/replaced/stac_browser/css/app.b5efa536.css" rel="stylesheet"></head><body><noscript><strong>We're sorry but STAC Browser doesn't work properly without JavaScript enabled. Please enable it to continue.</strong></noscript><div id="stac-browser"></div></body></html>

This file was deleted.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions cwl/stage-in-daac/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Test stage in data from DAAC

Update `stac_json` value in stage-in-job-01.yml

Run the following command with a CWL runner.

`cwltool stage-in-workflow.cwl stage-in-job-01.yml`
2 changes: 2 additions & 0 deletions cwl/stage-in-daac/stage-in-job-01.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
download_dir: "granules"
stac_json: "https://cmr.earthdata.nasa.gov/stac/LPCLOUD/collections/EMITL1BRAD_001/items?limit=2"
31 changes: 31 additions & 0 deletions cwl/stage-in-daac/stage-in-workflow.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.2
class: Workflow
label: Workflow that executes the Sounder SIPS end-to-end chirp rebinngin workflow

$namespaces:
cwltool: http://commonwl.org/cwltool#

requirements:
SubworkflowFeatureRequirement: {}
StepInputExpressionRequirement: {}
InlineJavascriptRequirement: {}
NetworkAccess:
networkAccess: true

inputs:
download_dir: string
stac_json: string

outputs:
data:
type: Directory
outputSource: stage-in/download_dir

steps:
stage-in:
run: "stage-in.cwl"
in:
stac_json: stac_json
download_dir: download_dir
out: [download_dir]
39 changes: 39 additions & 0 deletions cwl/stage-in-daac/stage-in.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
cwlVersion: v1.2
class: CommandLineTool

baseCommand: ["DOWNLOAD"]

requirements:
DockerRequirement:
dockerPull: ghcr.io/unity-sds/unity-data-services:9.4.0
NetworkAccess:
networkAccess: true
EnvVarRequirement:
envDef:
DOWNLOAD_DIR: $(runtime.outdir)/$(inputs.download_dir)
STAC_JSON: $(inputs.stac_json)
LOG_LEVEL: '10'
PARALLEL_COUNT: '-1'
DOWNLOAD_RETRY_WAIT_TIME: '30'
DOWNLOAD_RETRY_TIMES: '5'
DOWNLOADING_ROLES: 'data'

EDL_BASE_URL: 'https://urs.earthdata.nasa.gov/'
EDL_USERNAME: '/sps/processing/workflows/edl_username'
EDL_PASSWORD: '/sps/processing/workflows/edl_password'
EDL_PASSWORD_TYPE: 'PARAM_STORE'

VERIFY_SSL: 'TRUE'
STAC_AUTH_TYPE: 'NONE'

inputs:
download_dir:
type: string
stac_json:
type: string

outputs:
download_dir:
type: Directory
outputBinding:
glob: "$(inputs.download_dir)"
7 changes: 7 additions & 0 deletions cwl/stage-in-json-str/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Test stage in data from DAAC

Update `stac_json` value in stage-in-job-01.yml

Run the following command with a CWL runner.

`cwltool stage-in-workflow.cwl stage-in-job-01.yml`
2 changes: 2 additions & 0 deletions cwl/stage-in-json-str/stage-in-job-01.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
download_dir: "granules"
stac_json: '{"type":"FeatureCollection","stac_version":"1.0.0","numberMatched":2,"numberReturned":2,"features":[{"properties":{"datetime":"2016-08-22T00:05:22.000Z","start_datetime":"2016-08-22T00:05:22.000Z","end_datetime":"2016-08-22T00:11:22.000Z"},"bbox":[-7.02,-60.32,26.31,-36.16],"assets":{"data":{"title":"Download SNDR.SS1330.CHIRP.20160822T0005.m06.g001.L1_AQ.std.v02_48.G.200425095850.nc","href":"https://raw.githubusercontent.com/unity-sds/unity-tutorial-application/main/test/stage_in/SNDR.SS1330.CHIRP.20160822T0005.m06.g001.L1_AQ.std.v02_48.G.200425095850.nc"}},"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[6.18,-36.16],[-7.02,-56.04],[23.24,-60.32],[26.31,-38.94],[6.18,-36.16]]]},"stac_extensions":[],"id":"G2040068613-GES_DISC","stac_version":"1.0.0","collection":"C2011289787-GES_DISC","links":[{"rel":"self","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068613-GES_DISC.stac"},{"rel":"parent","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/C2011289787-GES_DISC.stac"},{"rel":"collection","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/C2011289787-GES_DISC.stac"},{"rel":"root","href":"https://cmr.earthdata.nasa.gov:443/search/"},{"rel":"via","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068613-GES_DISC.json"},{"rel":"via","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068613-GES_DISC.umm_json"}]},{"properties":{"datetime":"2016-08-22T00:11:22.000Z","start_datetime":"2016-08-22T00:11:22.000Z","end_datetime":"2016-08-22T00:17:22.000Z"},"bbox":[-43.78,-81.77028018298317,23.22,-56.18],"assets":{"data":{"title":"Download SNDR.SS1330.CHIRP.20160822T0011.m06.g002.L1_AQ.std.v02_48.G.200425095901.nc","href":"https://raw.githubusercontent.com/unity-sds/unity-tutorial-application/main/test/stage_in/SNDR.SS1330.CHIRP.20160822T0011.m06.g002.L1_AQ.std.v02_48.G.200425095901.nc"}},"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-7.16,-56.18],[-43.78,-71.72],[20.73,-81.77],[23.22,-60.47],[-7.16,-56.18]]]},"stac_extensions":[],"id":"G2040068619-GES_DISC","stac_version":"1.0.0","collection":"C2011289787-GES_DISC","links":[{"rel":"self","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068619-GES_DISC.stac"},{"rel":"parent","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/C2011289787-GES_DISC.stac"},{"rel":"collection","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/C2011289787-GES_DISC.stac"},{"rel":"root","href":"https://cmr.earthdata.nasa.gov:443/search/"},{"rel":"via","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068619-GES_DISC.json"},{"rel":"via","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068619-GES_DISC.umm_json"}]}],"links":[{"rel":"self","href":"https://cmr.earthdata.nasa.gov:443/search/granules.stac?collection_concept_id=C2011289787-GES_DISC&temporal%5B%5D=2016-08-22T00%3A10%3A00%2C2016-08-22T00%3A15%3A00&page_num=1"},{"rel":"root","href":"https://cmr.earthdata.nasa.gov:443/search/"}],"context":{"returned":2,"limit":1000000,"matched":2}}'
Loading

0 comments on commit f2fa8f2

Please sign in to comment.