Skip to content

Commit

Permalink
Merge pull request #319 from geoadmin/develop
Browse files Browse the repository at this point in the history
Release v1.7.0
  • Loading branch information
hansmannj authored Sep 30, 2021
2 parents 6c0aa3f + ee50645 commit b6324f6
Show file tree
Hide file tree
Showing 14 changed files with 842 additions and 20 deletions.
1 change: 1 addition & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ confidence=
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=missing-docstring,
fixme, # TODO BGDIINF_SB-1983 remove once all the todos have been done
missing-module-docstring,
unused-argument,
unused-variable,
Expand Down
25 changes: 25 additions & 0 deletions app/stac_api/migrations/0018_assetupload_md5_parts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 3.1.13 on 2021-09-15 14:05

import django.core.serializers.json
from django.db import migrations
from django.db import models


class Migration(migrations.Migration):

dependencies = [
('stac_api', '0017_data_collection_summaries_lang'),
]

operations = [
migrations.AddField(
model_name='assetupload',
name='md5_parts',
field=models.JSONField(
blank=True,
default=list,
editable=False,
encoder=django.core.serializers.json.DjangoJSONEncoder
),
),
]
5 changes: 5 additions & 0 deletions app/stac_api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,11 @@ class Status(models.TextChoices):
number_parts = models.IntegerField(
validators=[MinValueValidator(1), MaxValueValidator(100)], null=False, blank=False
) # S3 doesn't support more that 10'000 parts
# TODO BGDIINF_SB-1983 make the md5_parts mandatory by setting blank=False and removing
# the default=list
md5_parts = models.JSONField(
encoder=DjangoJSONEncoder, blank=True, default=list, editable=False
)
urls = models.JSONField(default=list, encoder=DjangoJSONEncoder, blank=True)
created = models.DateTimeField(auto_now_add=True)
ended = models.DateTimeField(blank=True, null=True, default=None)
Expand Down
20 changes: 13 additions & 7 deletions app/stac_api/s3_multipart_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def create_multipart_upload(self, key, asset, checksum_multihash):
)
return response['UploadId']

def create_presigned_url(self, key, asset, part, upload_id):
def create_presigned_url(self, key, asset, part, upload_id, part_md5):
'''Create a presigned url for an upload part on the backend
Args:
Expand All @@ -104,22 +104,28 @@ def create_presigned_url(self, key, asset, part, upload_id):
Part number for which to create a presigned url for upload part
upload_id: string
Upload ID for which to create a presigned url
part_md5: string
base64 MD5 digest of the part
Returns: dict(string, int, datetime)
Dict {'url': string, 'part': int, 'expires': datetime}
'''
expires = utc_aware(
datetime.utcnow() + timedelta(seconds=settings.AWS_PRESIGNED_URL_EXPIRES)
)
params = {
'Bucket': settings.AWS_STORAGE_BUCKET_NAME,
'Key': key,
'UploadId': upload_id,
'PartNumber': part
}
# TODO BGDIINF_SB-1983 part_md5 should be mandatory
if part_md5:
params['ContentMD5'] = part_md5
url = self.call_s3_api(
self.s3.generate_presigned_url,
'upload_part',
Params={
'Bucket': settings.AWS_STORAGE_BUCKET_NAME,
'Key': key,
'UploadId': upload_id,
'PartNumber': part
},
Params=params,
ExpiresIn=settings.AWS_PRESIGNED_URL_EXPIRES,
HttpMethod='PUT',
log_extra={
Expand Down
11 changes: 11 additions & 0 deletions app/stac_api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from stac_api.validators import validate_checksum_multihash_sha256
from stac_api.validators import validate_geoadmin_variant
from stac_api.validators import validate_item_properties_datetimes
from stac_api.validators import validate_md5_parts
from stac_api.validators import validate_name
from stac_api.validators_serializer import validate_json_payload
from stac_api.validators_serializer import validate_uniqueness_and_create
Expand Down Expand Up @@ -771,6 +772,7 @@ class Meta:
'completed',
'aborted',
'number_parts',
'md5_parts',
'urls',
'ended',
'parts'
Expand All @@ -783,6 +785,8 @@ class Meta:
allow_blank=False,
validators=[validate_checksum_multihash_sha256]
)
# TODO BGDIINF_SB-1983 make the md5_parts required
md5_parts = serializers.JSONField(required=False)

# write only fields
ended = serializers.DateTimeField(write_only=True, required=False)
Expand All @@ -797,6 +801,13 @@ class Meta:
completed = serializers.SerializerMethodField()
aborted = serializers.SerializerMethodField()

def validate(self, attrs):
# TODO BGDIINF_SB-1983 md5_parts should be required not optional
# Check the md5 parts length
if 'md5_parts' in attrs:
validate_md5_parts(attrs['md5_parts'], attrs['number_parts'])
return attrs

def get_completed(self, obj):
if obj.status == AssetUpload.Status.COMPLETED:
return isoformat(obj.ended)
Expand Down
21 changes: 21 additions & 0 deletions app/stac_api/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import hashlib
import json
import logging
from base64 import b64decode
from datetime import datetime
from datetime import timezone
from decimal import Decimal
Expand Down Expand Up @@ -362,3 +363,23 @@ def get_browser_url(request, view, collection='', item=''):
return f'{base}#/item/{b58encode(collection_path).decode()}/{b58encode(item_path).decode()}'
logger.error('Unknown view "%s", return the STAC browser base url %s', view, base)
return base


def is_valid_b64(value):
'''Check if the value is a valid b64 encoded string
Args:
value: string
Value to check
Returns:
bool - True if valid, False otherwise
'''
if not isinstance(value, str):
return False
try:
b64decode(value)
except (ValueError) as err:
logger.debug('Invalid b64 value %s: %s', value, err)
return False
return True
90 changes: 90 additions & 0 deletions app/stac_api/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from stac_api.utils import fromisoformat
from stac_api.utils import geometry_from_bbox
from stac_api.utils import is_valid_b64

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -342,5 +343,94 @@ def validate_checksum_multihash_sha256(value):
raise ValidationError(_('Invalid multihash value; %(error)s'),
params={'error': error}, code='invalid') from None
if mhash.code != HASH_CODES['sha2-256']:
logger.error("Invalid multihash value: must be sha2-256 but is %s", CODE_HASHES[mhash.code])
raise ValidationError(_('Invalid multihash value: must be sha2-256 but is %(code)s'),
params={'code': CODE_HASHES[mhash.code]}, code='invalid')


def validate_md5_parts(md5_parts, number_parts):
'''Validate the md5_parts field.
'''

if not isinstance(md5_parts, list):
logger.error(
"Invalid md5_parts field %s, must be a list and is %s", md5_parts, type(md5_parts)
)
raise ValidationError(_('Invalid md5_parts field: must be a list but is %(type)s'),
params={'type': type(md5_parts)}, code='invalid')
# sort and remove duplicate part number
sorted_md5_parts = sorted(
dict((item.get('part_number', 0) if isinstance(item, dict) else 0, item)
for item in md5_parts).values(),
key=lambda item: item.get('part_number', 0) if isinstance(item, dict) else 0
)
if len(sorted_md5_parts) != number_parts:
logger.error(
"Invalid md5_parts field value=%s: "
"list has too few, too many or duplicate part_number item(s), "
"it should have a total of %d non duplicated item(s)",
md5_parts,
number_parts
)
raise ValidationError(
_('Missing, too many or duplicate part_number in md5_parts field list: '
'list should have %(size)d item(s).'),
params={'size': number_parts},
code='invalid'
)
for i, item in enumerate(md5_parts):
if not isinstance(item, dict):
logger.error(
"Invalid md5_parts[%d] field value=%s, must be a dict and is %s",
i,
item,
type(item)
)
raise ValidationError(_('Invalid md5_parts[%(i)d] value: must be dict but is %(type)s'),
params={'i': i, 'type': type(item)}, code='invalid')
if 'part_number' not in item:
logger.error(
"Invalid md5_parts[%d] field value=%s, part_number field missing",
i,
item,
)
raise ValidationError(_('Invalid md5_parts[%(i)d] value: part_number field missing'),
params={'i': i}, code='invalid')
if (
not isinstance(item['part_number'], int) or
(item['part_number'] < 1 or item['part_number'] > number_parts)
):
logger.error(
"Invalid md5_parts[%d].part_number field value=%s: "
"part_number field must be an int between 1 and %d",
i,
item['part_number'],
number_parts
)
raise ValidationError(
_('Invalid md5_parts[%(i)d].part_number value: '
'part_number field must be an int between 1 and %(number_parts)d'),
params={'i': i, 'number_parts': number_parts}, code='invalid'
)
if 'md5' not in item:
logger.error(
"Invalid md5_parts[%d] field value=%s, md5 field missing",
i,
item,
)
raise ValidationError(_('Invalid md5_parts[%(i)d] value: md5 field missing'),
params={'i': i}, code='invalid')
if not isinstance(item['md5'], str) or item['md5'] == '' or not is_valid_b64(item['md5']):
logger.error(
"Invalid md5_parts[%d].md5 field value=%s: "
"md5 field must be a non empty b64 encoded string; type=%s valid_b64=%s",
i,
item['md5'],
type(item['md5']),
is_valid_b64(item['md5'])
)
raise ValidationError(
_('Invalid md5_parts[%(i)d].md5 value: '
'md5 field must be a non empty b64 encoded string'),
params={'i': i - 1}, code='invalid'
)
22 changes: 18 additions & 4 deletions app/stac_api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
from collections import OrderedDict
from datetime import datetime
from operator import itemgetter

from django.conf import settings
from django.db import IntegrityError
Expand Down Expand Up @@ -622,10 +623,23 @@ def create_multipart_upload(self, executor, serializer, validated_data, asset):
key, asset, validated_data['checksum_multihash']
)
urls = []
for part in range(
1, (validated_data['number_parts'] if 'number_parts' in validated_data else 0) + 1
):
urls.append(executor.create_presigned_url(key, asset, part, upload_id))
# TODO BGDIINF_SB-1983 md5_parts should be mandatory
if 'md5_parts' in validated_data:
sorted_md5_parts = sorted(validated_data['md5_parts'], key=itemgetter('part_number'))
else:
# dummy parts md5
sorted_md5_parts = map(
lambda i: {
'part_number': i, 'md5': None
},
range(1, validated_data['number_parts'] + 1)
)
for part in sorted_md5_parts:
urls.append(
executor.create_presigned_url(
key, asset, part['part_number'], upload_id, part['md5']
)
)

clean_up_required = False
try:
Expand Down
Loading

0 comments on commit b6324f6

Please sign in to comment.