Skip to content

Commit 9f493e9

Browse files
authored
Patch20250310: fixup size check didn't read local file (#206)
* move upload status check into dedicated function for upload/resume logic * add test case for checking status timeout * fixup the size check when resumable uploading * bumpup version * bumpup version * bumpup version
1 parent 425ba4b commit 9f493e9

File tree

8 files changed

+79
-20
lines changed

8 files changed

+79
-20
lines changed

app/configs/app_config.py

+4
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ class Env:
2929
# the number of items to active interative mode
3030
interative_threshold = 10
3131

32+
# number looping when waiting upload status
33+
output_truncate_count = 10
34+
max_waiting_count = 30
35+
3236
github_url = 'PilotDataPlatform/cli'
3337

3438
zone_int2string = {

app/resources/custom_error.py

+1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ class Error:
6969
'The following files already exist in the upload destination: \n%s\n'
7070
'Do you want to cancel the upload [N] or skip duplicates and continue uploading [y]?'
7171
),
72+
'UPLOAD_TIMEOUT': 'Upload task was timeout. Please check the portal for the upload status.',
7273
'UPLOAD_ID_NOT_EXIST': (
7374
'The specified multipart upload does not exist. '
7475
'The upload ID may be invalid, or the upload may have been aborted or completed.'

app/services/file_manager/file_upload/file_upload.py

+9-19
Original file line numberDiff line numberDiff line change
@@ -254,14 +254,8 @@ def simple_upload( # noqa: C901
254254
pool.close()
255255
pool.join()
256256

257-
unfinished_files = pre_upload_infos
258-
while len(unfinished_files) > 0:
259-
temp = []
260-
mhandler.SrvOutPutHandler.finalize_upload()
261-
for file_batchs in batch_generator(pre_upload_infos, batch_size=AppConfig.Env.upload_batch_size):
262-
temp.extend(upload_client.check_status(file_batchs))
263-
unfinished_files = temp
264-
257+
# check the status of the upload
258+
upload_client.upload_status_check(pre_upload_infos)
265259
num_of_file = len(pre_upload_infos)
266260
logger.info(f'Upload Time: {time.time() - upload_start_time:.2f}s for {num_of_file:d} files')
267261

@@ -310,14 +304,17 @@ def resume_get_unfinished_items(
310304
f'expected size: {file_info.get("total_size")}, '
311305
f'actual size: {x.get("result").get("size")}'
312306
)
313-
if file_info.get('total_size') != x.get('result').get('size'):
307+
local_file_size = os.path.getsize(file_info.get('local_path'))
308+
if file_info.get('total_size') != x.get('result').get('size') or local_file_size != x.get('result').get(
309+
'size'
310+
):
314311
SrvErrorHandler.customized_handle(
315312
ECustomizedError.INVALID_RESUMABLE_FILE_SIZE,
316313
if_exit=True,
317314
value=(
318315
file_info.get('object_path'),
319316
x.get('result').get('size'),
320-
file_info.get('total_size'),
317+
local_file_size,
321318
),
322319
)
323320

@@ -390,14 +387,7 @@ def resume_upload(
390387
pool.close()
391388
pool.join()
392389

393-
unfinished_files = unfinished_items
394-
while len(unfinished_files) > 0:
395-
temp = []
396-
mhandler.SrvOutPutHandler.finalize_upload()
397-
for file_batchs in batch_generator(unfinished_items, batch_size=AppConfig.Env.upload_batch_size):
398-
temp.extend(upload_client.check_status(file_batchs))
399-
unfinished_files = temp
400-
time.sleep(1)
401-
390+
# check the status of the upload
391+
upload_client.upload_status_check(unfinished_items)
402392
num_of_file = len(unfinished_items)
403393
logger.info(f'Upload Time: {time.time() - upload_start_time:.2f}s for {num_of_file:d} files')

app/services/file_manager/file_upload/upload_client.py

+29
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import math
99
import os
1010
import threading
11+
import time
1112
from logging import getLogger
1213
from multiprocessing.pool import ThreadPool
1314
from typing import Any
@@ -30,6 +31,7 @@
3031
from app.services.output_manager.error_handler import SrvErrorHandler
3132
from app.services.user_authentication.decorator import require_valid_token
3233
from app.utils.aggregated import ItemStatus
34+
from app.utils.aggregated import batch_generator
3335
from app.utils.aggregated import get_file_info_by_geid
3436

3537
from .exception import INVALID_CHUNK_ETAG
@@ -471,5 +473,32 @@ def check_status(self, file_objects: list[FileObject]) -> list[FileObject]:
471473

472474
return unfinished_files
473475

476+
def upload_status_check(self, file_objects: list[FileObject]) -> None:
477+
'''
478+
Summary:
479+
The function is to check the list of upload status.
480+
481+
Parameter:
482+
- file_objects(list[FileObject]): the list of file objects that need to be checked.
483+
484+
'''
485+
486+
unfinished_files = file_objects
487+
wait_count = 0
488+
while len(unfinished_files) > 0:
489+
temp = []
490+
if wait_count % AppConfig.Env.output_truncate_count == 0:
491+
mhandler.SrvOutPutHandler.finalize_upload()
492+
elif wait_count > AppConfig.Env.max_waiting_count:
493+
SrvErrorHandler.customized_handle(ECustomizedError.UPLOAD_TIMEOUT, True)
494+
495+
for file_batchs in batch_generator(file_objects, batch_size=AppConfig.Env.upload_batch_size):
496+
temp.extend(self.check_status(file_batchs))
497+
unfinished_files = temp
498+
wait_count += 1
499+
time.sleep(1)
500+
501+
return
502+
474503
def set_finish_upload(self):
475504
self.finish_upload = True

app/services/output_manager/error_handler.py

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class ECustomizedError(enum.Enum):
4242
UPLOAD_CANCEL = 'UPLOAD_CANCEL'
4343
UPLOAD_FAIL = 'UPLOAD_FAIL'
4444
UPLOAD_SKIP_DUPLICATION = 'UPLOAD_SKIP_DUPLICATION'
45+
UPLOAD_TIMEOUT = 'UPLOAD_TIMEOUT'
4546
# the error when multipart upload id is not exist
4647
UPLOAD_ID_NOT_EXIST = 'UPLOAD_ID_NOT_EXIST'
4748
MANIFEST_OF_FOLDER_FILE_EXIST = 'MANIFEST_OF_FOLDER_FILE_EXIST'

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "app"
3-
version = "3.15.0"
3+
version = "3.15.2"
44
description = "This service is designed to support pilot platform"
55
authors = ["Indoc Systems"]
66

tests/app/services/file_manager/file_upload/test_file_upload.py

+8
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,10 @@ def test_resume_upload(mocker):
342342
resume_upload_mock = mocker.patch(
343343
'app.services.file_manager.file_upload.file_upload.UploadClient.resume_upload', return_value=[]
344344
)
345+
mocker.patch(
346+
'os.path.getsize',
347+
return_value=1,
348+
)
345349

346350
resume_upload(manifest_json, 1)
347351

@@ -406,6 +410,10 @@ def test_resume_upload_integrity_check_failed(mocker, capfd):
406410
get_mock = mocker.patch(
407411
'app.services.file_manager.file_upload.file_upload.get_file_info_by_geid', return_value=[{'result': get_return}]
408412
)
413+
mocker.patch(
414+
'os.path.getsize',
415+
return_value=2,
416+
)
409417

410418
try:
411419
resume_upload(manifest_json, 1)

tests/app/services/file_manager/file_upload/test_upload_client.py

+26
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,32 @@ def test_check_status_fail(httpx_mock, mocker):
7171
assert len(result) == 1
7272

7373

74+
def test_check_upload_status_with_timeout(httpx_mock, mocker, capfd):
75+
upload_client = UploadClient('project_code', 'parent_folder_id')
76+
77+
mocker.patch('app.services.file_manager.file_upload.models.FileObject.generate_meta', return_value=(1, 1))
78+
mocker.patch('app.services.user_authentication.token_manager.SrvTokenManager.check_valid', return_value=0)
79+
80+
httpx_mock.add_response(
81+
method='POST',
82+
url=AppConfig.Connections.url_bff + '/v1/query/geid',
83+
json={'result': [{'status': ItemStatus.REGISTERED, 'result': {'name': 'test', 'status': ItemStatus.ACTIVE}}]},
84+
status_code=200,
85+
)
86+
87+
test_obj = FileObject('test', 'test', 'test', 'test', 'test')
88+
try:
89+
AppConfig.Env.max_waiting_count = 1
90+
upload_client.upload_status_check([test_obj])
91+
except SystemExit:
92+
out, _ = capfd.readouterr()
93+
94+
expect_out = 'Upload task was timeout. Please check the portal for the upload status.\n'
95+
assert expect_out in out
96+
else:
97+
AssertionError('SystemExit not raised')
98+
99+
74100
def test_chunk_upload(httpx_mock, mocker):
75101
upload_client = UploadClient('project_code', 'parent_folder_id')
76102

0 commit comments

Comments
 (0)