Skip to content

Commit 876d994

Browse files
authored
Merge 903bbbf into 27406e3
2 parents 27406e3 + 903bbbf commit 876d994

File tree

9 files changed

+139
-130
lines changed

9 files changed

+139
-130
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [1.10.3] - 2022-12-19
9+
### Added
10+
- [#104](https://github.com/unity-sds/unity-data-services/pull/104) added Updated time in collection & item STAC dictionaries
11+
### Changed
12+
- [#104](https://github.com/unity-sds/unity-data-services/pull/104) use pystac library objects to create collection and item STAC dictionaries
13+
814
## [1.9.3] - 2022-12-19
915
### Added
1016
- [#103](https://github.com/unity-sds/unity-data-services/pull/103) return a dictionary including HREFs instead of a string REGISTERED

ci.cd/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,6 @@ update_lambda_function_mcp_dev_4:
4242
update_lambda_function_mcp_dev_5:
4343
aws lambda update-function-code --s3-key unity_cumulus_lambda/cumulus_lambda_functions_deployment.zip --s3-bucket uds-dev-cumulus-public --function-name arn:aws:lambda:us-west-2:237868187491:function:uds-dev-cumulus-cumulus_collections_creation_dapa_facade --publish &>/dev/null
4444

45+
mcp_dev: upload_lambda_mcp_dev update_lambda_function_mcp_dev_1 update_lambda_function_mcp_dev_2 update_lambda_function_mcp_dev_4 update_lambda_function_mcp_dev_5
46+
4547

cumulus_lambda_functions/cumulus_stac/collection_transformer.py

Lines changed: 42 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
from urllib.parse import quote_plus, urlparse, unquote_plus
44

55
import pystac
6+
from pystac.utils import datetime_to_str
7+
68
from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator
7-
from pystac import Link
9+
from pystac import Link, Collection, Extent, SpatialExtent, TemporalExtent, Summaries
810

911
from cumulus_lambda_functions.cumulus_stac.stac_transformer_abstract import StacTransformerAbstract
1012
from cumulus_lambda_functions.lib.time_utils import TimeUtils
@@ -287,6 +289,7 @@
287289

288290
class CollectionTransformer(StacTransformerAbstract):
289291
def __init__(self, report_to_ems:bool = True, include_date_range=False):
292+
super().__init__()
290293
self.__stac_collection_schema = json.loads(STAC_COLLECTION_SCHEMA)
291294
self.__cumulus_collection_schema = {}
292295
self.__report_to_ems = report_to_ems
@@ -307,7 +310,7 @@ def generate_target_link_url(self, regex: str = None, bucket: str = None):
307310
href_link[0] = bucket
308311
return f"./collection.json?bucket={href_link[0]}&regex={quote_plus(href_link[1])}"
309312

310-
def __convert_to_stac_links(self, collection_file_obj: dict, rel_type: str = 'item'):
313+
def __convert_to_stac_link_obj(self, collection_file_obj: dict, rel_type: str = 'item'):
311314
"""
312315
expected output
313316
{
@@ -329,23 +332,17 @@ def __convert_to_stac_links(self, collection_file_obj: dict, rel_type: str = 'it
329332
:param collection_file_obj:
330333
:return: dict
331334
"""
332-
if collection_file_obj is None:
333-
return {}
334-
stac_link = {
335-
'rel': rel_type,
336-
}
335+
temp_link = Link(target=self.generate_target_link_url(
336+
collection_file_obj['regex'] if 'regex' in collection_file_obj else None,
337+
collection_file_obj['bucket'] if 'bucket' in collection_file_obj else None,
338+
),
339+
rel=rel_type
340+
)
337341
if 'type' in collection_file_obj:
338-
stac_link['type'] = collection_file_obj['type']
342+
temp_link.media_type = collection_file_obj['type']
339343
if 'sampleFileName' in collection_file_obj:
340-
stac_link['title'] = collection_file_obj['sampleFileName']
341-
stac_link['href'] = self.generate_target_link_url(
342-
collection_file_obj['regex'] if 'regex' in collection_file_obj else None,
343-
collection_file_obj['bucket'] if 'bucket' in collection_file_obj else None,
344-
)
345-
return stac_link
346-
347-
# def to_pystac_link_obj(self, input_dict: dict):
348-
# return
344+
temp_link.title = collection_file_obj['sampleFileName']
345+
return temp_link
349346

350347
def to_stac(self, source: dict) -> dict:
351348
source_sample = {
@@ -392,60 +389,39 @@ def to_stac(self, source: dict) -> dict:
392389
"url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}",
393390
"timestamp": 1647992849273
394391
}
395-
# TemporalIntervals([
396-
# datetime.strptime(source['dateFrom'])
397-
# ])
398-
# stac_collection = pystac.Collection(
399-
# id=f"{source['name']}___{source['version']}",
400-
# description='TODO',
401-
# extent=Extent(
402-
# SpatialExtent([[0, 0, 0, 0]]),
403-
# TemporalExtent([[source['dateFrom'] if 'dateFrom' in source else None,
404-
# source['dateTo'] if 'dateTo' in source else None]])
405-
# ),
406-
# summaries=Summaries({
407-
# "granuleId": [source['granuleId'] if 'granuleId' in source else ''],
408-
# "granuleIdExtraction": [source['granuleIdExtraction'] if 'granuleIdExtraction' in source else ''],
409-
# "process": [source['process'] if 'process' in source else ''],
410-
# "totalGranules": [source['total_size'] if 'total_size' in source else -1],
411-
# }),
412-
# )
413-
# stac_collection.get_root_link().target = './collection.json'
414-
# stac_collection.add_links([Link.from_dict(k) for k in [self.__convert_to_stac_links(k) for k in source['files']]])
415-
stac_collection = {
416-
"type": "Collection",
417-
"stac_version": "1.0.0",
418-
# "stac_extensions": [],
419-
"id": f"{source['name']}___{source['version']}",
420-
"description": "TODO",
421-
"license": "proprietary",
422-
# "keywords": [],
423-
"providers": [],
424-
"extent": {
425-
"spatial": {
426-
"bbox": [[0, 0, 0, 0]]
427-
},
428-
"temporal": {
429-
"interval": [[source['dateFrom'] if 'dateFrom' in source else None,
430-
source['dateTo'] if 'dateTo' in source else None
431-
]]
432-
}
433-
},
434-
"assets": {},
435-
"summaries": {
392+
temporal_extent = []
393+
if 'dateFrom' in source:
394+
temporal_extent.append(self.get_time_obj(source['dateFrom']))
395+
if 'dateTo' in source:
396+
temporal_extent.append(self.get_time_obj(source['dateTo']))
397+
stac_collection = Collection(
398+
id=f"{source['name']}___{source['version']}",
399+
# href=f"https://ideas-api-to-be-hosted/slcp/collections/{input_collection['ShortName']}::{input_collection['VersionId']}",
400+
description="TODO",
401+
extent=Extent(
402+
SpatialExtent([[0.0, 0.0, 0.0, 0.0]]),
403+
TemporalExtent([temporal_extent])
404+
),
405+
license="proprietary",
406+
providers=[],
407+
# title=input_collection['LongName'],
408+
# keywords=[input_collection['SpatialKeywords']['Keyword']],
409+
summaries=Summaries({
410+
"updated": [datetime_to_str(TimeUtils().parse_from_unix(source['updatedAt'], True).get_datetime_obj())],
436411
"granuleId": [source['granuleId'] if 'granuleId' in source else ''],
437412
"granuleIdExtraction": [source['granuleIdExtraction'] if 'granuleIdExtraction' in source else ''],
438413
"process": [source['process'] if 'process' in source else ''],
439414
"totalGranules": [source['total_size'] if 'total_size' in source else -1],
440-
},
441-
"links": [self.__convert_to_stac_links({
442-
"regex": source['url_path'] if 'url_path' in source else './collection.json',
443-
"sampleFileName": source['sampleFileName'],
444-
"type": "application/json",
415+
}),
416+
# assets={}
417+
)
418+
stac_collection.links = [self.__convert_to_stac_link_obj({
419+
"regex": source['url_path'] if 'url_path' in source else './collection.json',
420+
"sampleFileName": source['sampleFileName'],
421+
"type": "application/json",
445422

446-
}, 'root')] + [self.__convert_to_stac_links(k) for k in source['files']],
447-
}
448-
return stac_collection
423+
}, 'root')] + [self.__convert_to_stac_link_obj(k) for k in source['files']]
424+
return stac_collection.to_dict(include_self_link=False, transform_hrefs=False)
449425

450426
def get_href(self, input_href: str):
451427
parse_result = urlparse(input_href)

cumulus_lambda_functions/cumulus_stac/item_transformer.py

Lines changed: 31 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import json
22

3+
from pystac import Item, Asset, Link
4+
from pystac.utils import datetime_to_str
5+
36
from cumulus_lambda_functions.cumulus_stac.stac_transformer_abstract import StacTransformerAbstract
47
from cumulus_lambda_functions.lib.json_validator import JsonValidator
58
from cumulus_lambda_functions.lib.time_utils import TimeUtils
@@ -292,6 +295,7 @@
292295

293296
class ItemTransformer(StacTransformerAbstract):
294297
def __init__(self):
298+
super().__init__()
295299
self.__stac_item_schema = json.loads(STAC_ITEM_SCHEMA)
296300
self.__cumulus_granule_schema = {}
297301

@@ -307,33 +311,13 @@ def __get_asset_name(self, input_dict):
307311
return 'metadata__data'
308312
return input_dict['type']
309313

310-
def __get_assets(self, input_dict):
311-
"""
312-
Sample:
313-
{
314-
"bucket": "am-uds-dev-cumulus-internal",
315-
"key": "ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518500.PDS",
316-
"size": 760,
317-
"fileName": "P1570515ATMSSCIENCEAAT16032024518500.PDS",
318-
"source": "data/SNPP_ATMS_Level0_T/ATMS_SCIENCE_Group/2016/031//P1570515ATMSSCIENCEAAT16032024518500.PDS",
319-
"type": "data"
320-
}
321-
:param input_dict:
322-
:return:
323-
"""
324-
asset_dict = {
325-
'href': f"s3://{input_dict['bucket']}/{input_dict['key']}",
326-
'title': input_dict['fileName'],
327-
'description': input_dict['fileName'],
328-
# 'type': '',
329-
# 'roles': '',
330-
}
331-
return asset_dict
332-
333-
def __get_datetime_from_source(self, source: dict, datetime_key: str):
334-
if datetime_key not in source:
335-
return '1970-01-01T00:00:00Z'
336-
return f"{source[datetime_key]}{'' if source[datetime_key].endswith('Z') else 'Z'}"
314+
def __get_asset_obj(self, input_dict):
315+
asset = Asset(
316+
href=f"s3://{input_dict['bucket']}/{input_dict['key']}",
317+
title=input_dict['fileName'],
318+
description=input_dict['fileName'],
319+
)
320+
return asset
337321

338322
def to_stac(self, source: dict) -> dict:
339323
"""
@@ -402,35 +386,28 @@ def to_stac(self, source: dict) -> dict:
402386

403387
cumulus_file_validator = JsonValidator(CUMULUS_FILE_SCHEMA)
404388
validated_files = [k for k in source['files'] if cumulus_file_validator.validate(k) is None]
405-
minimum_stac_item = {
406-
"stac_version": "1.0.0",
407-
"stac_extensions": [],
408-
"type": "Feature",
409-
"id": source['granuleId'],
410-
"bbox": [0, 0, 0, 0, ],
411-
"geometry": {
412-
"type": "Point",
413-
"coordinates": [0, 0]
389+
stac_item = Item(
390+
id=source['granuleId'],
391+
bbox=[0.0, 0.0, 0.0, 0.0],
392+
properties={
393+
# "datetime": f"{TimeUtils.decode_datetime(source['createdAt'], False)}Z",
394+
"start_datetime": datetime_to_str(self.get_time_obj(source['beginningDateTime'])),
395+
"end_datetime": datetime_to_str(self.get_time_obj(source['endingDateTime'])),
396+
"created": datetime_to_str(self.get_time_obj(source['productionDateTime'])),
397+
"updated": datetime_to_str(TimeUtils().parse_from_unix(source['updatedAt'], True).get_datetime_obj()),
414398
},
415-
"properties": {
416-
"datetime": f"{TimeUtils.decode_datetime(source['createdAt'], False)}Z",
417-
"start_datetime": self.__get_datetime_from_source(source, 'beginningDateTime'),
418-
"end_datetime": self.__get_datetime_from_source(source, 'endingDateTime'),
419-
"created": self.__get_datetime_from_source(source, 'productionDateTime'),
420-
# "created": source['processingEndDateTime'], # TODO
399+
collection=source['collectionId'],
400+
assets={self.__get_asset_name(k): self.__get_asset_obj(k) for k in validated_files},
401+
geometry={
402+
"type": "Point",
403+
"coordinates": [0.0, 0.0]
421404
},
422-
"collection": source['collectionId'],
423-
"links": [
424-
{
425-
"rel": "collection",
426-
"href": ".",
427-
# "type": "application/json",
428-
# "title": "Simple Example Collection"
429-
}
430-
],
431-
"assets": {self.__get_asset_name(k): self.__get_assets(k) for k in validated_files}
432-
}
433-
return minimum_stac_item
405+
datetime=TimeUtils().parse_from_unix(source['createdAt'], True).get_datetime_obj(),
406+
)
407+
stac_item.links = [
408+
Link(rel='collection', target='.')
409+
]
410+
return stac_item.to_dict(include_self_link=False, transform_hrefs=False)
434411

435412
def from_stac(self, source: dict) -> dict:
436413
return {}
Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,37 @@
1+
import logging
12
from abc import ABC, abstractmethod
23

4+
from cumulus_lambda_functions.lib.time_utils import TimeUtils
5+
6+
LOGGER = logging.getLogger(__name__)
7+
38

49
class StacTransformerAbstract(ABC):
10+
def __init__(self) -> None:
11+
super().__init__()
12+
self._dt_formats = [
13+
'%y-%m-%dT%H:%M:%S.%f%z',
14+
'%y-%m-%dT%H:%M:%S%z',
15+
'%Y-%m-%dT%H:%M:%S.%f%z',
16+
'%Y-%m-%dT%H:%M:%S%z',
17+
'%Y-%m-%dT%H:%M:%S.%f',
18+
]
19+
20+
def get_time_obj(self, datetime_str: str):
21+
if datetime_str is None:
22+
return None
23+
for each_fmt in self._dt_formats:
24+
try:
25+
dt_utils = TimeUtils().parse_from_str(datetime_str, each_fmt)
26+
return dt_utils.get_datetime_obj()
27+
except ValueError as ve1:
28+
LOGGER.debug(f'format and value do not match: {each_fmt} v. {datetime_str}')
29+
raise ValueError(f'unknown format: {datetime_str}')
30+
531
@abstractmethod
632
def to_stac(self, source: dict) -> dict:
7-
return
33+
return {}
834

935
@abstractmethod
1036
def from_stac(self, source: dict) -> dict:
11-
return
37+
return {}

cumulus_lambda_functions/lib/time_utils.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
import calendar
33
import time
4-
from datetime import datetime
4+
from datetime import datetime, timezone, timedelta
55
from time import mktime
66
import re
77

@@ -17,6 +17,27 @@ class TimeUtils:
1717
MMDD_FORMAT = '%Y-%m-%dT%H:%M:%S'
1818
GB_1 = 1000000000
1919
YR_IN_SECOND = 31536000
20+
21+
def __init__(self):
22+
self.__time_obj = datetime.utcnow()
23+
24+
def parse_from_str(self, timestamp_str: str, fmt='%Y-%m-%dT%H:%M:%S%z', in_ms=False):
25+
self.__time_obj = datetime.strptime(timestamp_str, fmt)
26+
return self
27+
28+
def parse_from_unix(self, unix_timestamp, in_ms=False):
29+
converting_timestamp = unix_timestamp / 1000 if in_ms is True else unix_timestamp
30+
self.__time_obj = datetime.fromtimestamp(converting_timestamp, timezone(timedelta(0, 0, 0, 0)))
31+
return self
32+
33+
def get_datetime_obj(self):
34+
return self.__time_obj
35+
36+
def get_datetime_unix(self, in_ms=False):
37+
return int(self.__time_obj.timestamp()) if not in_ms else int(self.__time_obj.timestamp() * 1000)
38+
39+
def get_datetime_str(self, fmt='%Y-%m-%dT%H:%M:%S %z', in_ms=True):
40+
return self.__time_obj.strftime(fmt).replace('0000', '00:00')
2041
@staticmethod
2142
def get_current_year():
2243
return datetime.utcnow().year

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
setup(
1919
name="cumulus_lambda_functions",
20-
version="1.9.3",
20+
version="1.10.3",
2121
packages=find_packages(),
2222
install_requires=install_requires,
2323
tests_require=['mock', 'nose', 'sphinx', 'sphinx_rtd_theme', 'coverage', 'pystac', 'python-dotenv', 'jsonschema'],

tests/cumulus_lambda_functions/cumulus_stac/test_collection_transformer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from cumulus_lambda_functions.lib.json_validator import JsonValidator
88

99

10-
class TestItemTransformer(TestCase):
10+
class TestCollectionTransformer(TestCase):
1111
def test_01(self):
1212
stac_validator = JsonValidator(json.loads(STAC_COLLECTION_SCHEMA))
1313
source = {

tests/cumulus_lambda_functions/cumulus_stac/test_item_transformer.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,12 @@ def test_01(self):
108108
source = json.loads(source)
109109
stac_item = ItemTransformer().to_stac(source)
110110
sample_stac_item = {'stac_version': '1.0.0', 'stac_extensions': [], 'type': 'Feature',
111-
'id': 'P1570515ATMSSCIENCEAAT16032024518500.PDS', 'bbox': [0, 0, 0, 0], 'geometry': {"coordinates": [0, 0], "type": "Point"},
112-
'properties': {'datetime': '1970-01-01T00:27:28.050499079Z',
113-
'start_datetime': '2016-01-31T18:00:00.009057Z',
114-
'end_datetime': '2016-01-31T19:59:59.991043Z',
115-
'created': '2016-02-01T02:45:59.639000Z'},
111+
'id': 'P1570515ATMSSCIENCEAAT16032024518500.PDS', 'bbox': [0.0, 0.0, 0.0, 0.0], 'geometry': {"coordinates": [0.0, 0.0], "type": "Point"},
112+
'properties': { "start_datetime": "2016-01-31T18:00:00.009057Z",
113+
"end_datetime": "2016-01-31T19:59:59.991043Z",
114+
"created": "2016-02-01T02:45:59.639000Z",
115+
"updated": "2022-03-23T15:48:21.578000Z",
116+
"datetime": "2022-03-23T15:48:19.079000Z"},
116117
'collection': 'ATMS_SCIENCE_Group___001', 'links': [{"href": ".", "rel": "collection"}], 'assets': {'data': {
117118
'href': 's3://am-uds-dev-cumulus-internal/ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16032024518500.PDS',
118119
'title': 'P1570515ATMSSCIENCEAAT16032024518500.PDS',

0 commit comments

Comments
 (0)