Skip to content

release/9.6.0 #527

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/makefile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- run: |
python3 "${GITHUB_WORKSPACE}/setup.py" install_lib
- run: |
python3 -m pip uninstall botocore boto3 -y
python3 -m pip uninstall botocore boto3 s3transfer -y
- run: |
# make file runnable, might not be necessary
chmod +x "${GITHUB_WORKSPACE}/ci.cd/create_aws_lambda_zip.sh"
Expand Down
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [9.6.0] - 2025-02-03
### Changed
- [#517](https://github.com/unity-sds/unity-data-services/pull/517) feat: stac browser oidc cookie

## [9.5.2] - 2025-01-31
### Fixed
- [#524](https://github.com/unity-sds/unity-data-services/pull/524) fix: default boto3 from aws already has s3transfer library

## [9.5.1] - 2025-01-17
### Fixed
- [#502](https://github.com/unity-sds/unity-data-services/pull/502) fix: get granules pagination

## [9.5.0] - 2025-01-17
### Changed
- [#499](https://github.com/unity-sds/unity-data-services/pull/499) feat: duplicate granules diff index

## [9.4.2] - 2025-01-17
### Fixed
- [#498](https://github.com/unity-sds/unity-data-services/pull/498) fix: restructure bbox for geoshape

## [9.4.1] - 2024-12-18
### Fixed
- [#489](https://github.com/unity-sds/unity-data-services/pull/489) fix: delete bug
Expand Down
121 changes: 98 additions & 23 deletions cumulus_lambda_functions/lib/uds_db/granules_db_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,25 @@ def __init__(self):

@staticmethod
def to_es_bbox(bbox_array):
# lon = x, lat = y
# lon, lat, lon, lat
# x can be 170 to -170
# 170, 0, -170, 10
minX, minY, maxX, maxY = bbox_array

# Ensure the values are properly sorted
# if minX > maxX:
# minX, maxX = maxX, minX
if minY > maxY:
minY, maxY = maxY, minY

return {
"type": "envelope",
"coordinates": [
[bbox_array[0], bbox_array[3]], # Top-left corner (minLon, maxLat)
[bbox_array[2], bbox_array[1]] # Bottom-right corner (maxLon, minLat)
]
"coordinates": [[minX, maxY], [maxX, minY]],
# "coordinates": [
# [bbox_array[0], bbox_array[3]], # Top-left corner (minLon, maxLat)
# [bbox_array[2], bbox_array[1]] # Bottom-right corner (maxLon, minLat)
# ]
}

@staticmethod
Expand Down Expand Up @@ -152,16 +165,20 @@ def create_new_index(self, tenant, tenant_venue, es_mapping: dict):
self.__es.swap_index_for_alias(write_perc_alias_name, current_perc_index_name, new_perc_index_name)
try:
self.__es.migrate_index_data(current_perc_index_name, new_perc_index_name)
except Exception as e:
except:
LOGGER.exception(f'failed to migrate index data: {(current_perc_index_name, new_perc_index_name)}')
return

def get_latest_index(self, tenant, tenant_venue):
def get_latest_index_name(self, tenant, tenant_venue):
write_alias_name = f'{DBConstants.granules_write_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
write_alias_name = self.__es.get_alias(write_alias_name)
if len(write_alias_name) != 1:
raise ValueError(f'missing index for {tenant}_{tenant_venue}. {write_alias_name}')
latest_index_name = [k for k in write_alias_name.keys()][0]
return latest_index_name

def get_latest_index(self, tenant, tenant_venue):
latest_index_name = self.get_latest_index_name(tenant, tenant_venue)
index_mapping = self.__es.get_index_mapping(latest_index_name)
if index_mapping is None:
raise ValueError(f'missing index: {latest_index_name}')
Expand Down Expand Up @@ -201,53 +218,106 @@ def get_entry(self, tenant: str, tenant_venue: str, doc_id: str, ):
raise ValueError(f"no such granule: {doc_id}")
return result

def delete_entry(self, tenant: str, tenant_venue: str, doc_id: str, ):
def __query_by_id_local(self, tenant: str, tenant_venue: str, doc_id: str, ):
read_alias_name = f'{DBConstants.granules_read_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
result = self.__es.query({
dsl = {
'size': 9999,
'query': {'term': {'_id': doc_id}}
}, read_alias_name)
if result is None:
raise ValueError(f"no such granule: {doc_id}")
for each_granule in result['hits']['hits']:
'sort': [
{'properties.datetime': {'order': 'desc'}},
{'id': {'order': 'asc'}}
],
'query': {
'term': {'_id': doc_id}
}
}
result = self.__es.query(dsl, read_alias_name)
if result is None or len(result['hits']['hits']) < 1:
return []
return result['hits']['hits']

def __delete_old_entries(self, dsl_result):
for each_granule in dsl_result:
LOGGER.debug(f"deleting {each_granule['_id']} from {each_granule['_index']}")
delete_result = self.__es.delete_by_query({
'query': {'term': {'id': each_granule['_id']}}
}, each_granule['_index'])
LOGGER.debug(f'delete_result: {delete_result}')
if delete_result is None:
raise ValueError(f"error deleting {each_granule}")
return

def delete_entry(self, tenant: str, tenant_venue: str, doc_id: str, ):
result = self.__query_by_id_local(tenant, tenant_venue, doc_id)
if len(result) < 1:
raise ValueError(f"no such granule: {doc_id}")
self.__delete_old_entries(result)
return result

def update_entry(self, tenant: str, tenant_venue: str, json_body: dict, doc_id: str, ):
# find existing doc_id
# if not found, throw error. Cannot update
# if found, check index.
# if latest index, proceed with update
# if older index, proceed with get + delete
# tweak meta locally, and add it.
write_alias_name = f'{DBConstants.granules_write_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
json_body['event_time'] = TimeUtils.get_current_unix_milli()
self.__es.update_one(json_body, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
LOGGER.debug(f'custom_metadata indexed')
existing_entries = self.__query_by_id_local(tenant, tenant_venue, doc_id)
if len(existing_entries) < 1:
raise ValueError(f'unable to update {doc_id} as it is not found. ')
latest_index_name = self.get_latest_index_name(tenant, tenant_venue)
existing_entry = existing_entries[0]
if existing_entry['_index'] == latest_index_name:
LOGGER.debug(f'{doc_id} in latest index: {latest_index_name}. continuing with update')
self.__es.update_one(json_body, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
self.__delete_old_entries(existing_entries[1:])
return
LOGGER.debug(f'{doc_id} in older index: {latest_index_name} v. {existing_entry["_index"]}')
new_doc = {**existing_entry['_source'], **json_body}
self.__es.index_one(new_doc, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
self.__delete_old_entries(existing_entries)
return

def add_entry(self, tenant: str, tenant_venue: str, json_body: dict, doc_id: str, ):
# find existing doc_id
# if not found, add it
# if found, and it is in latest index, add it.
# if found, and it is in older index, add current one, and delete the older one.

write_alias_name = f'{DBConstants.granules_write_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
json_body['event_time'] = TimeUtils.get_current_unix_milli()
# TODO validate custom metadata vs the latest index to filter extra items
existing_entries = self.__query_by_id_local(tenant, tenant_venue, doc_id)
if len(existing_entries) < 1:
self.__es.index_one(json_body, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
return
latest_index_name = self.get_latest_index_name(tenant, tenant_venue)
existing_entry = existing_entries[0]
if existing_entry['_index'] == latest_index_name:
self.__es.index_one(json_body, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
self.__delete_old_entries(existing_entries[1:])
return
self.__es.index_one(json_body, doc_id, index=write_alias_name) # TODO assuming granule_id is prefixed with collection id
LOGGER.debug(f'custom_metadata indexed')
self.__delete_old_entries(existing_entries)
# TODO validate custom metadata vs the latest index to filter extra items
return

def dsl_search(self, tenant: str, tenant_venue: str, search_dsl: dict):
read_alias_name = f'{DBConstants.granules_read_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
if 'sort' not in search_dsl:
search_result = self.__es.query(search_dsl,
querying_index=read_alias_name) if 'sort' in search_dsl else self.__es.query(
search_dsl, querying_index=read_alias_name)
if 'sort' not in search_dsl: # We cannot paginate w/o sort. So, max is 10k items:
# This also assumes "size" should be part of search_dsl
search_result = self.__es.query(search_dsl, querying_index=read_alias_name)
LOGGER.debug(f'search_finished: {len(search_result["hits"]["hits"])}')
return search_result
# we can run paginate search
original_size = search_dsl['size'] if 'size' in search_dsl else 20
total_size = -999
result = []
duplicates = set([])
while len(result) < original_size:
search_dsl['size'] = (original_size - len(result)) * 2
search_result = self.__es.query_pages(search_dsl, querying_index=read_alias_name) if 'sort' in search_dsl else self.__es.query(search_dsl, querying_index=read_alias_name)
search_result = self.__es.query_pages(search_dsl, querying_index=read_alias_name)
if total_size == -999:
total_size = self.__es.get_result_size(search_result)
if len(search_result['hits']['hits']) < 1:
break
for each in search_result['hits']['hits']:
Expand All @@ -257,11 +327,16 @@ def dsl_search(self, tenant: str, tenant_venue: str, search_dsl: dict):
search_dsl['search_after'] = search_result['hits']['hits'][-1]['sort']

LOGGER.debug(f'search_finished: {len(result)}')
if len(result) > original_size:
result = result[:original_size]
return {
'hits': {
"total": {
"value": len(result)
"value": total_size,
},
'hits': result
}
}



Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __generate_es_dsl(self):
if self.__filter_input is not None:
query_terms.append(CqlParser('properties').transform(self.__filter_input))
query_dsl = {
'track_total_hits': True,
'track_total_hits': self.__offset is None,
'size': self.__limit,
# "collapse": {"field": "id"},
'sort': [
Expand Down Expand Up @@ -228,11 +228,11 @@ def start(self):
each_granules_query_result_stripped['links'].append(self_link)
self.__restructure_each_granule_result(each_granules_query_result_stripped)

pagination_link = '' if len(granules_query_result['hits']['hits']) < self.__limit else ','.join([k if isinstance(k, str) else str(k) for k in granules_query_result['hits']['hits'][-1]['sort']])
pagination_link = '' if len(granules_query_result['hits']['hits']) < 1 else ','.join([k if isinstance(k, str) else str(k) for k in granules_query_result['hits']['hits'][-1]['sort']])
return {
'statusCode': 200,
'body': {
'numberMatched': {'total_size': result_size},
'numberMatched': {'total_size': -1 if self.__offset is not None else result_size},
'numberReturned': len(granules_query_result['hits']['hits']),
'stac_version': '1.0.0',
'type': 'FeatureCollection', # TODO correct name?
Expand Down
7 changes: 7 additions & 0 deletions cumulus_lambda_functions/uds_api/misc_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ async def stac_entry(request: Request, response: Response):
@router.get(f'/stac_entry')
@router.get(f'/stac_entry/')
async def stac_entry(request: Request, response: Response):
"""
How to re-load UCS
https://github.com/unity-sds/unity-data-services/issues/381#issuecomment-2201165672
:param request:
:param response:
:return:
"""
request_headers = dict(request.headers)
LOGGER.debug(f'stac_entry - request_headers: {request_headers}')
print(request_headers)
Expand Down
2 changes: 1 addition & 1 deletion cumulus_lambda_functions/uds_api/stac_browser/index.html
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><meta id="meta-description" name="description" content=""><title>STAC Browser</title><script defer="defer" src="/to/be/replaced/stac_browser/js/chunk-vendors.eae96ced.js"></script><script defer="defer" src="/to/be/replaced/stac_browser/js/app.79583f94.js"></script><link href="/to/be/replaced/stac_browser/css/chunk-vendors.de510de6.css" rel="stylesheet"><link href="/to/be/replaced/stac_browser/css/app.b5efa536.css" rel="stylesheet"></head><body><noscript><strong>We're sorry but STAC Browser doesn't work properly without JavaScript enabled. Please enable it to continue.</strong></noscript><div id="stac-browser"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><meta id="meta-description" name="description" content=""><title>STAC Browser</title><script defer="defer" src="/to/be/replaced/stac_browser/js/chunk-vendors.eae96ced.js"></script><script defer="defer" src="/to/be/replaced/stac_browser/js/app.ad7851bb.js"></script><link href="/to/be/replaced/stac_browser/css/chunk-vendors.de510de6.css" rel="stylesheet"><link href="/to/be/replaced/stac_browser/css/app.b5efa536.css" rel="stylesheet"></head><body><noscript><strong>We're sorry but STAC Browser doesn't work properly without JavaScript enabled. Please enable it to continue.</strong></noscript><div id="stac-browser"></div></body></html>

This file was deleted.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions cwl/stage-in-daac/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Test stage in data from DAAC

Update `stac_json` value in stage-in-job-01.yml

Run the following command with a CWL runner.

`cwltool stage-in-workflow.cwl stage-in-job-01.yml`
2 changes: 2 additions & 0 deletions cwl/stage-in-daac/stage-in-job-01.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
download_dir: "granules"
stac_json: "https://cmr.earthdata.nasa.gov/stac/LPCLOUD/collections/EMITL1BRAD_001/items?limit=2"
31 changes: 31 additions & 0 deletions cwl/stage-in-daac/stage-in-workflow.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.2
class: Workflow
label: Workflow that executes the Sounder SIPS end-to-end chirp rebinngin workflow

$namespaces:
cwltool: http://commonwl.org/cwltool#

requirements:
SubworkflowFeatureRequirement: {}
StepInputExpressionRequirement: {}
InlineJavascriptRequirement: {}
NetworkAccess:
networkAccess: true

inputs:
download_dir: string
stac_json: string

outputs:
data:
type: Directory
outputSource: stage-in/download_dir

steps:
stage-in:
run: "stage-in.cwl"
in:
stac_json: stac_json
download_dir: download_dir
out: [download_dir]
39 changes: 39 additions & 0 deletions cwl/stage-in-daac/stage-in.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
cwlVersion: v1.2
class: CommandLineTool

baseCommand: ["DOWNLOAD"]

requirements:
DockerRequirement:
dockerPull: ghcr.io/unity-sds/unity-data-services:9.4.0
NetworkAccess:
networkAccess: true
EnvVarRequirement:
envDef:
DOWNLOAD_DIR: $(runtime.outdir)/$(inputs.download_dir)
STAC_JSON: $(inputs.stac_json)
LOG_LEVEL: '10'
PARALLEL_COUNT: '-1'
DOWNLOAD_RETRY_WAIT_TIME: '30'
DOWNLOAD_RETRY_TIMES: '5'
DOWNLOADING_ROLES: 'data'

EDL_BASE_URL: 'https://urs.earthdata.nasa.gov/'
EDL_USERNAME: '/sps/processing/workflows/edl_username'
EDL_PASSWORD: '/sps/processing/workflows/edl_password'
EDL_PASSWORD_TYPE: 'PARAM_STORE'

VERIFY_SSL: 'TRUE'
STAC_AUTH_TYPE: 'NONE'

inputs:
download_dir:
type: string
stac_json:
type: string

outputs:
download_dir:
type: Directory
outputBinding:
glob: "$(inputs.download_dir)"
7 changes: 7 additions & 0 deletions cwl/stage-in-json-str/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Test stage in data from DAAC

Update `stac_json` value in stage-in-job-01.yml

Run the following command with a CWL runner.

`cwltool stage-in-workflow.cwl stage-in-job-01.yml`
2 changes: 2 additions & 0 deletions cwl/stage-in-json-str/stage-in-job-01.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
download_dir: "granules"
stac_json: '{"type":"FeatureCollection","stac_version":"1.0.0","numberMatched":2,"numberReturned":2,"features":[{"properties":{"datetime":"2016-08-22T00:05:22.000Z","start_datetime":"2016-08-22T00:05:22.000Z","end_datetime":"2016-08-22T00:11:22.000Z"},"bbox":[-7.02,-60.32,26.31,-36.16],"assets":{"data":{"title":"Download SNDR.SS1330.CHIRP.20160822T0005.m06.g001.L1_AQ.std.v02_48.G.200425095850.nc","href":"https://raw.githubusercontent.com/unity-sds/unity-tutorial-application/main/test/stage_in/SNDR.SS1330.CHIRP.20160822T0005.m06.g001.L1_AQ.std.v02_48.G.200425095850.nc"}},"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[6.18,-36.16],[-7.02,-56.04],[23.24,-60.32],[26.31,-38.94],[6.18,-36.16]]]},"stac_extensions":[],"id":"G2040068613-GES_DISC","stac_version":"1.0.0","collection":"C2011289787-GES_DISC","links":[{"rel":"self","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068613-GES_DISC.stac"},{"rel":"parent","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/C2011289787-GES_DISC.stac"},{"rel":"collection","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/C2011289787-GES_DISC.stac"},{"rel":"root","href":"https://cmr.earthdata.nasa.gov:443/search/"},{"rel":"via","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068613-GES_DISC.json"},{"rel":"via","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068613-GES_DISC.umm_json"}]},{"properties":{"datetime":"2016-08-22T00:11:22.000Z","start_datetime":"2016-08-22T00:11:22.000Z","end_datetime":"2016-08-22T00:17:22.000Z"},"bbox":[-43.78,-81.77028018298317,23.22,-56.18],"assets":{"data":{"title":"Download SNDR.SS1330.CHIRP.20160822T0011.m06.g002.L1_AQ.std.v02_48.G.200425095901.nc","href":"https://raw.githubusercontent.com/unity-sds/unity-tutorial-application/main/test/stage_in/SNDR.SS1330.CHIRP.20160822T0011.m06.g002.L1_AQ.std.v02_48.G.200425095901.nc"}},"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-7.16,-56.18],[-43.78,-71.72],[20.73,-81.77],[23.22,-60.47],[-7.16,-56.18]]]},"stac_extensions":[],"id":"G2040068619-GES_DISC","stac_version":"1.0.0","collection":"C2011289787-GES_DISC","links":[{"rel":"self","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068619-GES_DISC.stac"},{"rel":"parent","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/C2011289787-GES_DISC.stac"},{"rel":"collection","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/C2011289787-GES_DISC.stac"},{"rel":"root","href":"https://cmr.earthdata.nasa.gov:443/search/"},{"rel":"via","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068619-GES_DISC.json"},{"rel":"via","href":"https://cmr.earthdata.nasa.gov:443/search/concepts/G2040068619-GES_DISC.umm_json"}]}],"links":[{"rel":"self","href":"https://cmr.earthdata.nasa.gov:443/search/granules.stac?collection_concept_id=C2011289787-GES_DISC&temporal%5B%5D=2016-08-22T00%3A10%3A00%2C2016-08-22T00%3A15%3A00&page_num=1"},{"rel":"root","href":"https://cmr.earthdata.nasa.gov:443/search/"}],"context":{"returned":2,"limit":1000000,"matched":2}}'
Loading
Loading