diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 95d0727..d7b7bdc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,7 +45,7 @@ repos: - id: no-commit-to-branch args: [--branch, dev, --branch, int, --branch, main] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.2 + rev: v0.8.3 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] diff --git a/.pyproject_generation/pyproject_custom.toml b/.pyproject_generation/pyproject_custom.toml index dafdbd1..1711c96 100644 --- a/.pyproject_generation/pyproject_custom.toml +++ b/.pyproject_generation/pyproject_custom.toml @@ -1,6 +1,6 @@ [project] name = "ghga_connector" -version = "1.5.1" +version = "1.6.0" description = "GHGA Connector - A CLI client application for interacting with the GHGA system." dependencies = [ "typer~=0.12", @@ -9,6 +9,7 @@ dependencies = [ "hexkit[s3]>=3, <4", "rich>=13.7.1, <14", "tenacity >=9.0.0, <10", + "hishel>=0.1.1, < 0.2", ] diff --git a/README.md b/README.md index 99ee003..c4a12cf 100644 --- a/README.md +++ b/README.md @@ -26,13 +26,13 @@ We recommend using the provided Docker container. A pre-build version is available at [docker hub](https://hub.docker.com/repository/docker/ghga/ghga-connector): ```bash -docker pull ghga/ghga-connector:1.5.1 +docker pull ghga/ghga-connector:1.6.0 ``` Or you can build the container yourself from the [`./Dockerfile`](./Dockerfile): ```bash # Execute in the repo's root dir: -docker build -t ghga/ghga-connector:1.5.1 . +docker build -t ghga/ghga-connector:1.6.0 . ``` For production-ready deployment, we recommend using Kubernetes, however, @@ -40,7 +40,7 @@ for simple use cases, you could execute the service using docker on a single server: ```bash # The entrypoint is preconfigured: -docker run -p 8080:8080 ghga/ghga-connector:1.5.1 --help +docker run -p 8080:8080 ghga/ghga-connector:1.6.0 --help ``` If you prefer not to use containers, you may install the service from source: diff --git a/lock/requirements-dev.txt b/lock/requirements-dev.txt index 7836acb..1fca13f 100644 --- a/lock/requirements-dev.txt +++ b/lock/requirements-dev.txt @@ -36,20 +36,20 @@ bcrypt==4.2.1 \ --hash=sha256:e84e0e6f8e40a242b11bce56c313edc2be121cec3e0ec2d76fce01f6af33c07c \ --hash=sha256:f85b1ffa09240c89aa2e1ae9f3b1c687104f7b2b9d2098da4e923f1b7082d331 # via crypt4gh -boto3==1.35.79 \ - --hash=sha256:1fa26217cd33ded82e55aed4460cd55f7223fa647916aa0d3c5d6828e6ec7135 \ - --hash=sha256:a673b0b6378c9ccbf045a31a43195b175e12aa5c37fb7635fcbfc8f48fb857b3 +boto3==1.35.83 \ + --hash=sha256:a4828d67b12892cb11fe9e6d86f40975a06db470676e61194968e3a32ec4c536 \ + --hash=sha256:df2e0d57241de0f9c31b62e73093c2126e4fd73b87b1897ecf280a1b87a2b825 # via hexkit -botocore==1.35.79 \ - --hash=sha256:245bfdda1b1508539ddd1819c67a8a2cc81780adf0715d3de418d64c4247f346 \ - --hash=sha256:e6b10bb9a357e3f5ca2e60f6dd15a85d311b9a476eb21b3c0c2a3b364a2897c8 +botocore==1.35.83 \ + --hash=sha256:ba363183e4df79fbcfd5f3600fd473bd45a1de03d0d0b5e78abd59f276971d27 \ + --hash=sha256:df5e4384838e50bbafd47e9b5fefb995e83cbb9412e7cd7c0db9555174d91bba # via # boto3 # hexkit # s3transfer -certifi==2024.8.30 \ - --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \ - --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9 +certifi==2024.12.14 \ + --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ + --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db # via # httpcore # httpx @@ -385,6 +385,10 @@ hexkit==3.8.1 \ # via # ghga-connector (pyproject.toml) # ghga-service-commons +hishel==0.1.1 \ + --hash=sha256:1f6421b78cc23fc43c610f651b7848c9b8eee2d29551d64a2ab0d45b319b6559 \ + --hash=sha256:5b51acc340303faeef2f5cfc1658acb1db1fdc3e3ad76406265a485f9707c5d6 + # via ghga-connector (pyproject.toml) httpcore==1.0.7 \ --hash=sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c \ --hash=sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd @@ -440,6 +444,7 @@ httpx==0.27.2 \ # via # -r lock/requirements-dev-template.in # ghga-service-commons + # hishel # pytest-httpx identify==2.6.3 \ --hash=sha256:62f5dae9b5fef52c84cc188514e9ea4f3f636b1d8799ab5ebc475471f9e47a02 \ @@ -652,9 +657,9 @@ pydantic-core==2.27.1 \ --hash=sha256:f69ed81ab24d5a3bd93861c8c4436f54afdf8e8cc421562b0c7504cf3be58206 \ --hash=sha256:f82d068a2d6ecfc6e054726080af69a6764a10015467d7d7b9f66d6ed5afa23b # via pydantic -pydantic-settings==2.6.1 \ - --hash=sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87 \ - --hash=sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0 +pydantic-settings==2.7.0 \ + --hash=sha256:ac4bfd4a36831a48dbf8b2d9325425b549a0a6f18cea118436d728eb4f1c4d66 \ + --hash=sha256:e00c05d5fa6cbbb227c84bd7487c5c1065084119b750df7c8c1a554aed236eb5 # via hexkit pygments==2.18.0 \ --hash=sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199 \ @@ -682,9 +687,9 @@ pytest==8.3.4 \ # pytest-asyncio # pytest-cov # pytest-httpx -pytest-asyncio==0.24.0 \ - --hash=sha256:a811296ed596b69bf0b6f3dc40f83bcaf341b155a269052d82efa2b25ac7037b \ - --hash=sha256:d081d828e576d85f875399194281e92bf8a68d60d72d1a2faf2feddb6c46b276 +pytest-asyncio==0.25.0 \ + --hash=sha256:8c0610303c9e0442a5db8604505fc0f545456ba1528824842b37b4a626cbf609 \ + --hash=sha256:db5432d18eac6b7e28b46dcd9b69921b55c3b1086e85febfe04e70b18d9e81b3 # via -r lock/requirements-dev-template.in pytest-cov==6.0.0 \ --hash=sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35 \ @@ -777,25 +782,25 @@ rich==13.9.4 \ # via # ghga-connector (pyproject.toml) # typer -ruff==0.8.2 \ - --hash=sha256:1ca4e3a87496dc07d2427b7dd7ffa88a1e597c28dad65ae6433ecb9f2e4f022f \ - --hash=sha256:2aae99ec70abf43372612a838d97bfe77d45146254568d94926e8ed5bbb409ea \ - --hash=sha256:32096b41aaf7a5cc095fa45b4167b890e4c8d3fd217603f3634c92a541de7248 \ - --hash=sha256:5fe716592ae8a376c2673fdfc1f5c0c193a6d0411f90a496863c99cd9e2ae25d \ - --hash=sha256:60f578c11feb1d3d257b2fb043ddb47501ab4816e7e221fbb0077f0d5d4e7b6f \ - --hash=sha256:705832cd7d85605cb7858d8a13d75993c8f3ef1397b0831289109e953d833d29 \ - --hash=sha256:729850feed82ef2440aa27946ab39c18cb4a8889c1128a6d589ffa028ddcfc22 \ - --hash=sha256:81c148825277e737493242b44c5388a300584d73d5774defa9245aaef55448b0 \ - --hash=sha256:ac42caaa0411d6a7d9594363294416e0e48fc1279e1b0e948391695db2b3d5b1 \ - --hash=sha256:b402ddee3d777683de60ff76da801fa7e5e8a71038f57ee53e903afbcefdaa58 \ - --hash=sha256:b84f4f414dda8ac7f75075c1fa0b905ac0ff25361f42e6d5da681a465e0f78e5 \ - --hash=sha256:c49ab4da37e7c457105aadfd2725e24305ff9bc908487a9bf8d548c6dad8bb3d \ - --hash=sha256:cbd5cf9b0ae8f30eebc7b360171bd50f59ab29d39f06a670b3e4501a36ba5897 \ - --hash=sha256:d261d7850c8367704874847d95febc698a950bf061c9475d4a8b7689adc4f7fa \ - --hash=sha256:e769083da9439508833cfc7c23e351e1809e67f47c50248250ce1ac52c21fb93 \ - --hash=sha256:ec016beb69ac16be416c435828be702ee694c0d722505f9c1f35e1b9c0cc1bf5 \ - --hash=sha256:f05cdf8d050b30e2ba55c9b09330b51f9f97d36d4673213679b965d25a785f3c \ - --hash=sha256:fb88e2a506b70cfbc2de6fae6681c4f944f7dd5f2fe87233a7233d888bad73e8 +ruff==0.8.3 \ + --hash=sha256:01b14b2f72a37390c1b13477c1c02d53184f728be2f3ffc3ace5b44e9e87b90d \ + --hash=sha256:19048f2f878f3ee4583fc6cb23fb636e48c2635e30fb2022b3a1cd293402f964 \ + --hash=sha256:1ae441ce4cf925b7f363d33cd6570c51435972d697e3e58928973994e56e1452 \ + --hash=sha256:53babd6e63e31f4e96ec95ea0d962298f9f0d9cc5990a1bbb023a6baf2503a82 \ + --hash=sha256:5be450bb18f23f0edc5a4e5585c17a56ba88920d598f04a06bd9fd76d324cb20 \ + --hash=sha256:5e7558304353b84279042fc584a4f4cb8a07ae79b2bf3da1a7551d960b5626d3 \ + --hash=sha256:6567be9fb62fbd7a099209257fef4ad2c3153b60579818b31a23c886ed4147ea \ + --hash=sha256:75fb782f4db39501210ac093c79c3de581d306624575eddd7e4e13747e61ba18 \ + --hash=sha256:7f26bc76a133ecb09a38b7868737eded6941b70a6d34ef53a4027e83913b6502 \ + --hash=sha256:8d5d273ffffff0acd3db5bf626d4b131aa5a5ada1276126231c4174543ce20d6 \ + --hash=sha256:8faeae3827eaa77f5721f09b9472a18c749139c891dbc17f45e72d8f2ca1f8fc \ + --hash=sha256:9c0a60a825e3e177116c84009d5ebaa90cf40dfab56e1358d1df4e29a9a14b13 \ + --hash=sha256:c356e770811858bd20832af696ff6c7e884701115094f427b64b25093d6d932d \ + --hash=sha256:d7c65bc0cadce32255e93c57d57ecc2cca23149edd52714c0c5d6fa11ec328cd \ + --hash=sha256:db503486e1cf074b9808403991663e4277f5c664d3fe237ee0d994d1305bb060 \ + --hash=sha256:e4d66a21de39f15c9757d00c50c8cdd20ac84f55684ca56def7891a025d7e939 \ + --hash=sha256:f7df94f57d7418fa7c3ffb650757e0c2b96cf2501a0b192c18e4fb5571dfada9 \ + --hash=sha256:fe2756edf68ea79707c8d68b78ca9a58ed9af22e430430491ee03e718b5e4936 # via -r lock/requirements-dev-template.in s3transfer==0.10.4 \ --hash=sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e \ @@ -922,24 +927,24 @@ urllib3==1.26.20 \ # docker # requests # testcontainers -uv==0.5.8 \ - --hash=sha256:0f2bcdd00a49ad1669e217a2787448cac1653c9968d74bfa3732f3c25ca26f69 \ - --hash=sha256:2b3076c79746d4f83257c9dea5ba0833b0711aeff8e6695670eadd140a0cf67f \ - --hash=sha256:2ee40bc9c08fea0e71092838c0fc36df83f741807d8be9acf2fd4c4757b3171e \ - --hash=sha256:365eb6bbb551c5623a73b1ed530f4e69083016f70f0cf5ca1a30ec66413bcda2 \ - --hash=sha256:4a3325af8ed1effa7076967472c063b0000d609fd6f561c7751e43bab30297f1 \ - --hash=sha256:56715389d240ac989af2188cd3bfc2b603d31b42330e915dacfe113b34d8e65b \ - --hash=sha256:5989bbbbca072edc1875036c76aed74ec3dfc4741de7d1f060e181717efea6ac \ - --hash=sha256:8058ab06d2f69355694f6e9a36edc45164474c516b4e2895bd67f8232d9022ed \ - --hash=sha256:84f26ce1736d075d1df34f7c3f6b0b728cecd9a4da3e5160d5d887587830e7ce \ - --hash=sha256:8a8cbe1ffa0ef5c2f1c90622e07211a8f93f48daa2be1bd4592bb8cda52b0285 \ - --hash=sha256:a7956787658fb9253fba49741886409402a48039bee64b1697397d27284919af \ - --hash=sha256:aa03c338e19456d3a6544a94293bd2905837ae22720cc161c83ea0fd13c3b09f \ - --hash=sha256:c56022edc0f61febbdef89e6f699a0e991932c493b7293635b4814e102d040d2 \ - --hash=sha256:c91d0a2b8218af2aa0385b867da8c13a620db22077686793c7231f012cb40619 \ - --hash=sha256:defd5da3685f43f74698634ffc197aaf9b836b8ba0de0e57b34d7bc74d856fa9 \ - --hash=sha256:e146062e4cc39db334cbde38d56d2c6301dd9cf6739ce07ce5a4d71b4cbc2d00 \ - --hash=sha256:f8ade0430b6618ae0e21e52f61f6f3943dd6f3184ef6dc4491087b27940427f9 +uv==0.5.10 \ + --hash=sha256:064e977957e61aaaf7215bbd8f8566bcb22d7662c8adc929d039010fdb686436 \ + --hash=sha256:06eb14988a75cc178241747a9437d23faad7d62e2d9b955db7e8a8098853341a \ + --hash=sha256:253a02e03bf83bc0ec4e17242f54a4af2fef6191fcfb392b2613defd2b2a2f89 \ + --hash=sha256:27f27eba58b9a71c3a7905ca966c69adf5a4a1df1dd14ef4d064c40cbaabc49e \ + --hash=sha256:326603d44454a8856a5660bb406e99194f3c8d2cc4504c97c99871da59575022 \ + --hash=sha256:4e0b91598e67d8c1228b47894a61fffb9d82caf8f1080bb9f21df49530118db6 \ + --hash=sha256:502d9d10f5f139c850b1f6085a0c5719d49dd39d767504ce7c4245b47531f156 \ + --hash=sha256:5890ca6703c371cecc88c2a7bf32fc47187a865fc577df0d40d390fcbdec76f0 \ + --hash=sha256:68a6b992b7ebae9f3fa2f395348c95e6f05745246b067a26e7597a6730fcb690 \ + --hash=sha256:7337ed40bae6f37d9335bf7f83bb43d08b6c141212b1ca3b15a9194c4d438ffe \ + --hash=sha256:87dd4473ebf585fcd78a818bf8735ab39a157bef4f712e8b22e753b7344f6290 \ + --hash=sha256:8bc47bd623b1f8fa883b7afbf480286b946512d9ac7bf23105e7d63ef702ea7b \ + --hash=sha256:936759d8de8f78969756ee2b1558b4e9bd4b059922d0840cdd162a190c95ac50 \ + --hash=sha256:adc0dad56118127b3a1cc0126149a9b8c643fd4e4c5fa37be6af4bd84d33d30c \ + --hash=sha256:b61812ee4765f07db02ff616d4aac9c514857c0648459242a286243fe92d6223 \ + --hash=sha256:d0d0e75a4337076f43936b11d6cc4cb11e261948c719adb8e208b78454a122a0 \ + --hash=sha256:fa8607cc07cc9e666e531a9533b02d45bbb376ae314721434643c328298709b4 # via -r lock/requirements-dev-template.in uvicorn==0.29.0 \ --hash=sha256:2c2aac7ff4f4365c206fd773a39bf4ebd1047c238f8b8268ad996829323473de \ diff --git a/lock/requirements.txt b/lock/requirements.txt index b54b8b8..bdad576 100644 --- a/lock/requirements.txt +++ b/lock/requirements.txt @@ -4,6 +4,12 @@ annotated-types==0.7.0 \ # via # -c lock/requirements-dev.txt # pydantic +anyio==4.7.0 \ + --hash=sha256:2f834749c602966b7d456a7567cafcb309f96482b5081d14ac93ccd457f9dd48 \ + --hash=sha256:ea60c3723ab42ba6fff7e8ccb0488c898ec538ff4df1f1d5e642c3601d07e352 + # via + # -c lock/requirements-dev.txt + # httpx bcrypt==4.2.1 \ --hash=sha256:041fa0155c9004eb98a232d54da05c0b41d4b8e66b6fc3cb71b4b3f6144ba837 \ --hash=sha256:04e56e3fe8308a88b77e0afd20bec516f74aecf391cdd6e374f15cbed32783d6 \ @@ -33,20 +39,27 @@ bcrypt==4.2.1 \ # via # -c lock/requirements-dev.txt # crypt4gh -boto3==1.35.79 \ - --hash=sha256:1fa26217cd33ded82e55aed4460cd55f7223fa647916aa0d3c5d6828e6ec7135 \ - --hash=sha256:a673b0b6378c9ccbf045a31a43195b175e12aa5c37fb7635fcbfc8f48fb857b3 +boto3==1.35.83 \ + --hash=sha256:a4828d67b12892cb11fe9e6d86f40975a06db470676e61194968e3a32ec4c536 \ + --hash=sha256:df2e0d57241de0f9c31b62e73093c2126e4fd73b87b1897ecf280a1b87a2b825 # via # -c lock/requirements-dev.txt # hexkit -botocore==1.35.79 \ - --hash=sha256:245bfdda1b1508539ddd1819c67a8a2cc81780adf0715d3de418d64c4247f346 \ - --hash=sha256:e6b10bb9a357e3f5ca2e60f6dd15a85d311b9a476eb21b3c0c2a3b364a2897c8 +botocore==1.35.83 \ + --hash=sha256:ba363183e4df79fbcfd5f3600fd473bd45a1de03d0d0b5e78abd59f276971d27 \ + --hash=sha256:df5e4384838e50bbafd47e9b5fefb995e83cbb9412e7cd7c0db9555174d91bba # via # -c lock/requirements-dev.txt # boto3 # hexkit # s3transfer +certifi==2024.12.14 \ + --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ + --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db + # via + # -c lock/requirements-dev.txt + # httpcore + # httpx cffi==1.17.1 \ --hash=sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8 \ --hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \ @@ -168,18 +181,55 @@ docopt==0.6.2 \ # via # -c lock/requirements-dev.txt # crypt4gh +exceptiongroup==1.2.2 \ + --hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \ + --hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc + # via + # -c lock/requirements-dev.txt + # anyio ghga-service-commons==3.2.0 \ --hash=sha256:05f2fd0b31f282aaacccdbaddde9a7b2a56f5c5bac418961217b2738ed60e662 \ --hash=sha256:0849fde66b8e0ad546d5e76d9be01ae0429cab21703afc342b0d3f53a9d17c04 # via # -c lock/requirements-dev.txt # ghga-connector (pyproject.toml) +h11==0.14.0 \ + --hash=sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d \ + --hash=sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761 + # via + # -c lock/requirements-dev.txt + # httpcore hexkit==3.8.1 \ --hash=sha256:51c9f00003f5ed8dfe0585617031c530f2d08f704955f5c5e3340e6f947d597a \ --hash=sha256:d33b390e861e41deb54217ce85d0a5f3c8fe6ddc3656514d45cc173fe2775553 # via # -c lock/requirements-dev.txt # ghga-connector (pyproject.toml) +hishel==0.1.1 \ + --hash=sha256:1f6421b78cc23fc43c610f651b7848c9b8eee2d29551d64a2ab0d45b319b6559 \ + --hash=sha256:5b51acc340303faeef2f5cfc1658acb1db1fdc3e3ad76406265a485f9707c5d6 + # via + # -c lock/requirements-dev.txt + # ghga-connector (pyproject.toml) +httpcore==1.0.7 \ + --hash=sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c \ + --hash=sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd + # via + # -c lock/requirements-dev.txt + # httpx +httpx==0.27.2 \ + --hash=sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0 \ + --hash=sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2 + # via + # -c lock/requirements-dev.txt + # hishel +idna==3.10 \ + --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ + --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 + # via + # -c lock/requirements-dev.txt + # anyio + # httpx jmespath==1.0.1 \ --hash=sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980 \ --hash=sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe @@ -317,9 +367,9 @@ pydantic-core==2.27.1 \ # via # -c lock/requirements-dev.txt # pydantic -pydantic-settings==2.6.1 \ - --hash=sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87 \ - --hash=sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0 +pydantic-settings==2.7.0 \ + --hash=sha256:ac4bfd4a36831a48dbf8b2d9325425b549a0a6f18cea118436d728eb4f1c4d66 \ + --hash=sha256:e00c05d5fa6cbbb227c84bd7487c5c1065084119b750df7c8c1a554aed236eb5 # via # -c lock/requirements-dev.txt # hexkit @@ -439,6 +489,13 @@ six==1.17.0 \ # via # -c lock/requirements-dev.txt # python-dateutil +sniffio==1.3.1 \ + --hash=sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 \ + --hash=sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc + # via + # -c lock/requirements-dev.txt + # anyio + # httpx tenacity==9.0.0 \ --hash=sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b \ --hash=sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539 @@ -456,6 +513,7 @@ typing-extensions==4.12.2 \ --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 # via # -c lock/requirements-dev.txt + # anyio # pydantic # pydantic-core # rich diff --git a/pyproject.toml b/pyproject.toml index 54f6027..b8b249b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ "Intended Audience :: Developers", ] name = "ghga_connector" -version = "1.5.1" +version = "1.6.0" description = "GHGA Connector - A CLI client application for interacting with the GHGA system." dependencies = [ "typer~=0.12", @@ -30,6 +30,7 @@ dependencies = [ "hexkit[s3]>=3, <4", "rich>=13.7.1, <14", "tenacity >=9.0.0, <10", + "hishel>=0.1.1, < 0.2", ] [project.license] diff --git a/src/ghga_connector/constants.py b/src/ghga_connector/constants.py index 485e918..053adbf 100644 --- a/src/ghga_connector/constants.py +++ b/src/ghga_connector/constants.py @@ -22,3 +22,4 @@ MAX_PART_NUMBER = 10000 MAX_RETRIES = 5 MAX_WAIT_TIME = 60 * 60 +CACHE_MIN_FRESH = 3 diff --git a/src/ghga_connector/core/api_calls/utils.py b/src/ghga_connector/core/api_calls/utils.py index 0179c27..17dfc3d 100644 --- a/src/ghga_connector/core/api_calls/utils.py +++ b/src/ghga_connector/core/api_calls/utils.py @@ -31,6 +31,7 @@ def is_service_healthy(api_url: str, *, timeout_in_seconds: int = 5) -> bool: def check_url(api_url: str, *, timeout_in_seconds: int = 5) -> bool: """Checks, if an url is reachable within a certain time""" try: + # Don't cache health checks response = httpx.get(url=api_url, timeout=timeout_in_seconds) except httpx.RequestError: return False diff --git a/src/ghga_connector/core/client.py b/src/ghga_connector/core/client.py index 7675afc..ea2cd31 100644 --- a/src/ghga_connector/core/client.py +++ b/src/ghga_connector/core/client.py @@ -16,8 +16,11 @@ from contextlib import asynccontextmanager, contextmanager from functools import cached_property +from typing import Union +import hishel import httpx +from ghga_service_commons.http.correlation import attach_correlation_id_to_requests from tenacity import ( AsyncRetrying, retry_if_exception_type, @@ -71,6 +74,32 @@ def httpx_client(): yield client +def get_cache_transport( + wrapped_transport: Union[httpx.AsyncBaseTransport, None] = None, +) -> hishel.AsyncCacheTransport: + """Construct an async cache transport with `hishel`. + + The `wrapped_transport` parameter can be used for testing to inject, for example, + an httpx.ASGITransport pointing to a FastAPI app. + """ + cache_transport = hishel.AsyncCacheTransport( + transport=wrapped_transport or httpx.AsyncHTTPTransport(), + storage=hishel.AsyncInMemoryStorage(ttl=1800), # persist for 30 minutes + controller=hishel.Controller( + cacheable_methods=["POST", "GET"], + cacheable_status_codes=[200, 201], + ), + ) + return cache_transport + + +def get_mounts() -> dict[str, httpx.AsyncBaseTransport]: + """Return a dict of mounts for the cache transport.""" + return { + "all://": get_cache_transport(), + } + + @asynccontextmanager async def async_client(): """Yields a context manager async httpx client and closes it afterward""" @@ -80,5 +109,7 @@ async def async_client(): max_connections=CONFIG.max_concurrent_downloads, max_keepalive_connections=CONFIG.max_concurrent_downloads, ), + mounts=get_mounts(), ) as client: + attach_correlation_id_to_requests(client) yield client diff --git a/src/ghga_connector/core/downloading/abstract_downloader.py b/src/ghga_connector/core/downloading/abstract_downloader.py index deb050d..dfa6656 100644 --- a/src/ghga_connector/core/downloading/abstract_downloader.py +++ b/src/ghga_connector/core/downloading/abstract_downloader.py @@ -51,7 +51,7 @@ def get_file_header_envelope(self) -> Coroutine[bytes, Any, Any]: """ @abstractmethod - async def download_to_queue(self, *, url: str, part_range: PartRange) -> None: + async def download_to_queue(self, *, part_range: PartRange) -> None: """ Start downloading file parts in parallel into a queue. This should be wrapped into asyncio.task and is guarded by a semaphore to limit diff --git a/src/ghga_connector/core/downloading/api_calls.py b/src/ghga_connector/core/downloading/api_calls.py index 8c5811b..97fb18e 100644 --- a/src/ghga_connector/core/downloading/api_calls.py +++ b/src/ghga_connector/core/downloading/api_calls.py @@ -19,7 +19,7 @@ import httpx -from ghga_connector.constants import TIMEOUT_LONG +from ghga_connector.constants import CACHE_MIN_FRESH, TIMEOUT_LONG from ghga_connector.core import WorkPackageAccessor, exceptions from .structs import ( @@ -30,11 +30,14 @@ async def _get_authorization( - file_id: str, work_package_accessor: WorkPackageAccessor, url: str -) -> UrlAndHeaders: + file_id: str, work_package_accessor: WorkPackageAccessor +) -> httpx.Headers: """ Fetch work order token using accessor and prepare DCS endpoint URL and headers for a - given endpoint identified by the `url` passed + given endpoint identified by the `url` passed. + + The calls will use the cache if possible while the cached responses are still fresh + for at least another `CACHE_MIN_FRESH` seconds. """ # fetch a work order token decrypted_token = await work_package_accessor.get_work_order_token(file_id=file_id) @@ -44,10 +47,11 @@ async def _get_authorization( "Accept": "application/json", "Authorization": f"Bearer {decrypted_token}", "Content-Type": "application/json", + "Cache-Control": f"min-fresh={CACHE_MIN_FRESH}", } ) - return UrlAndHeaders(endpoint_url=url, headers=headers) + return headers async def get_envelope_authorization( @@ -59,9 +63,10 @@ async def get_envelope_authorization( """ # build url url = f"{work_package_accessor.dcs_api_url}/objects/{file_id}/envelopes" - return await _get_authorization( - file_id=file_id, work_package_accessor=work_package_accessor, url=url + headers = await _get_authorization( + file_id=file_id, work_package_accessor=work_package_accessor ) + return UrlAndHeaders(url, headers) async def get_file_authorization( @@ -73,9 +78,10 @@ async def get_file_authorization( """ # build URL url = f"{work_package_accessor.dcs_api_url}/objects/{file_id}" - return await _get_authorization( - file_id=file_id, work_package_accessor=work_package_accessor, url=url + headers = await _get_authorization( + file_id=file_id, work_package_accessor=work_package_accessor ) + return UrlAndHeaders(url, headers) async def get_download_url( diff --git a/src/ghga_connector/core/downloading/downloader.py b/src/ghga_connector/core/downloading/downloader.py index f57e46f..25940e8 100644 --- a/src/ghga_connector/core/downloading/downloader.py +++ b/src/ghga_connector/core/downloading/downloader.py @@ -17,6 +17,7 @@ import asyncio import base64 +import gc from asyncio import PriorityQueue, Queue, Semaphore, Task, create_task from collections.abc import Coroutine from io import BufferedWriter @@ -111,6 +112,9 @@ def __init__( # noqa: PLR0913 async def download_file(self, *, output_path: Path, part_size: int): """Download file to the specified location and manage lower level details.""" # Split the file into parts based on the part size + self._message_display.display( + f"Fetching work order token and download URL for {self._file_id}" + ) url_response = await self.fetch_download_url() part_ranges = calc_part_ranges( part_size=part_size, total_file_size=url_response.file_size @@ -120,11 +124,7 @@ async def download_file(self, *, output_path: Path, part_size: int): # start async part download to intermediate queue for part_range in part_ranges: - task_handler.schedule( - self.download_to_queue( - url=url_response.download_url, part_range=part_range - ) - ) + task_handler.schedule(self.download_to_queue(part_range=part_range)) # get file header envelope try: @@ -157,8 +157,13 @@ async def download_file(self, *, output_path: Path, part_size: int): ), name="Write queue to file", ) - await task_handler.gather() - await write_to_file + try: + await task_handler.gather() + except: + write_to_file.cancel() + raise + else: + await write_to_file async def fetch_download_url(self) -> URLResponse: """Fetch a work order token and retrieve the download url. @@ -168,14 +173,10 @@ async def fetch_download_url(self) -> URLResponse: 2. the file size in bytes """ try: - self._message_display.display( - f"Fetching work order token for {self._file_id}" - ) url_and_headers = await get_file_authorization( file_id=self._file_id, work_package_accessor=self._work_package_accessor, ) - self._message_display.display(f"Fetching download URL for {self._file_id}") response = await get_download_url( client=self._client, url_and_headers=url_and_headers ) @@ -242,7 +243,7 @@ async def get_file_header_envelope(self) -> bytes: ResponseExceptionTranslator(spec=spec).handle(response=response) raise exceptions.BadResponseCodeError(url=url, response_code=status_code) - async def download_to_queue(self, *, url: str, part_range: PartRange) -> None: + async def download_to_queue(self, *, part_range: PartRange) -> None: """ Start downloading file parts in parallel into a queue. This should be wrapped into asyncio.task and is guarded by a semaphore to limit @@ -250,6 +251,8 @@ async def download_to_queue(self, *, url: str, part_range: PartRange) -> None: """ # Guard with semaphore to ensure only a set amount of downloads runs in parallel async with self._semaphore: + url_and_headers = await self.fetch_download_url() + url = url_and_headers.download_url try: await self.download_content_range( url=url, start=part_range.start, end=part_range.stop @@ -265,7 +268,12 @@ async def download_content_range( end: int, ) -> None: """Download a specific range of a file's content using a presigned download url.""" - headers = httpx.Headers({"Range": f"bytes={start}-{end}"}) + headers = httpx.Headers( + { + "Range": f"bytes={start}-{end}", + "Cache-Control": "no-store", # don't cache part downloads + } + ) try: response: httpx.Response = await retry_handler( @@ -320,3 +328,4 @@ async def drain_queue_to_file( downloaded_size += chunk_size self._queue.task_done() progress_bar.advance(chunk_size) + gc.collect() diff --git a/src/ghga_connector/core/uploading/api_calls.py b/src/ghga_connector/core/uploading/api_calls.py deleted file mode 100644 index 60cefd8..0000000 --- a/src/ghga_connector/core/uploading/api_calls.py +++ /dev/null @@ -1,325 +0,0 @@ -# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln -# for the German Human Genome-Phenome Archive (GHGA) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This file contains all api calls related to uploading files""" - -import base64 -import json -from collections.abc import Iterator -from pathlib import Path - -import crypt4gh.keys -import httpx - -from ghga_connector.constants import MAX_PART_NUMBER -from ghga_connector.core import ResponseExceptionTranslator, exceptions - -from .abstract_uploader import UploaderBase -from .structs import UploadStatus - - -class Uploader(UploaderBase): - """ - Class bundling functionality calling Upload Controller Service to initiate and - manage an ongoing upload - """ - - def __init__( - self, - *, - api_url: str, - client: httpx.AsyncClient, - file_id: str, - public_key_path: Path, - ) -> None: - self._part_size = 0 - self._upload_id = "" - self._api_url = api_url - self._client = client - self._file_id = file_id - self._public_key_path = public_key_path - - async def start_multipart_upload(self): - """Start multipart upload""" - try: - await self._initiate_multipart_upload() - - except exceptions.NoUploadPossibleError as error: - file_metadata = await self.get_file_metadata() - upload_id = file_metadata["current_upload_id"] - if upload_id is None: - raise error - - await self.patch_multipart_upload( - upload_status=UploadStatus.CANCELLED, - ) - - await self._initiate_multipart_upload() - - except Exception as error: - raise error - - async def finish_multipart_upload(self): - """Complete or clean up multipart upload""" - await self.patch_multipart_upload(upload_status=UploadStatus.UPLOADED) - - async def _initiate_multipart_upload(self) -> None: - """ - Perform a RESTful API call to initiate a multipart upload - Returns an upload id and a part size - """ - # build url and headers - url = f"{self._api_url}/uploads" - headers = {"Accept": "application/json", "Content-Type": "application/json"} - public_key = base64.b64encode( - crypt4gh.keys.get_public_key(self._public_key_path) - ).decode() - - post_data = {"file_id": self._file_id, "public_key": public_key} - serialized_data = json.dumps(post_data) - - # Make function call to get upload url - try: - response = await self._client.post( - url=url, headers=headers, content=serialized_data - ) - except httpx.RequestError as request_error: - exceptions.raise_if_connection_failed(request_error=request_error, url=url) - raise exceptions.RequestFailedError(url=url) from request_error - - status_code = response.status_code - if status_code != 200: - spec = { - 400: { - "existingActiveUpload": lambda: exceptions.NoUploadPossibleError( - file_id=self._file_id - ), - "fileNotRegistered": lambda: exceptions.FileNotRegisteredError( - file_id=self._file_id - ), - }, - 403: { - "noFileAccess": lambda: exceptions.NoFileAccessError( - file_id=self._file_id - ) - }, - } - ResponseExceptionTranslator(spec=spec).handle(response=response) - raise exceptions.BadResponseCodeError(url=url, response_code=status_code) - - response_body = response.json() - - self._part_size = int(response_body["part_size"]) - self._upload_id = response_body["upload_id"] - - async def get_file_metadata(self) -> dict[str, str]: - """Get all file metadata""" - # build url and headers - url = f"{self._api_url}/files/{self._file_id}" - headers = {"Accept": "application/json", "Content-Type": "application/json"} - - try: - response = await self._client.get(url=url, headers=headers) - except httpx.RequestError as request_error: - exceptions.raise_if_connection_failed(request_error=request_error, url=url) - raise exceptions.RequestFailedError(url=url) from request_error - - status_code = response.status_code - if status_code != 200: - spec = { - 403: { - "noFileAccess": lambda: exceptions.NoFileAccessError( - file_id=self._file_id - ) - }, - 404: { - "fileNotRegistered": lambda: exceptions.FileNotRegisteredError( - file_id=self._file_id - ) - }, - } - ResponseExceptionTranslator(spec=spec).handle(response=response) - raise exceptions.BadResponseCodeError(url=url, response_code=status_code) - - file_metadata = response.json() - - return file_metadata - - async def get_part_upload_url(self, *, part_no: int) -> str: - """Get a presigned url to upload a specific part""" - if not self._upload_id: - raise exceptions.UploadIdUnsetError() - - # build url and headers - url = f"{self._api_url}/uploads/{self._upload_id}/parts/{part_no}/signed_urls" - headers = {"Accept": "application/json", "Content-Type": "application/json"} - - # Make function call to get upload url - try: - response = await self._client.post(url=url, headers=headers) - except httpx.RequestError as request_error: - exceptions.raise_if_connection_failed(request_error=request_error, url=url) - raise exceptions.RequestFailedError(url=url) from request_error - - status_code = response.status_code - if status_code != 200: - spec = { - 403: { - "noFileAccess": lambda: exceptions.NoUploadAccessError( - upload_id=self._upload_id - ) - }, - 404: { - "noSuchUpload": lambda: exceptions.UploadNotRegisteredError( - upload_id=self._upload_id - ) - }, - } - ResponseExceptionTranslator(spec=spec).handle(response=response) - raise exceptions.BadResponseCodeError(url=url, response_code=status_code) - - response_body = response.json() - presigned_url = response_body["url"] - - return presigned_url - - def get_part_upload_urls( - self, - *, - from_part: int = 1, - get_url_func=get_part_upload_url, - ) -> Iterator[str]: - """ - For a specific mutli-part upload identified by the `upload_id`, it returns an - iterator to iterate through file parts and obtain the corresponding upload urls. - - By default it start with the first part but you may also start from a specific part - in the middle of the file using the `from_part` argument. This might be useful to - resume an interrupted upload process. - - Please note: the upload corresponding to the `upload_id` must have already been - initiated. - - `get_url_func` only for testing purposes. - """ - if not self._upload_id: - raise exceptions.UploadIdUnsetError() - - for part_no in range(from_part, MAX_PART_NUMBER + 1): - yield get_url_func( - api_url=self._api_url, upload_id=self._upload_id, part_no=part_no - ) - - raise exceptions.MaxPartNoExceededError() - - async def get_upload_info(self) -> dict[str, str]: - """Get details on a specific upload""" - if not self._upload_id: - raise exceptions.UploadIdUnsetError() - - # build url and headers - url = f"{self._api_url}/uploads/{self._upload_id}" - headers = {"Accept": "*/*", "Content-Type": "application/json"} - - try: - response = await self._client.get(url=url, headers=headers) - except httpx.RequestError as request_error: - exceptions.raise_if_connection_failed(request_error=request_error, url=url) - raise exceptions.RequestFailedError(url=url) from request_error - - status_code = response.status_code - if status_code != 200: - spec = { - 403: { - "noFileAccess": lambda: exceptions.NoUploadAccessError( - upload_id=self._upload_id - ) - }, - 404: { - "noSuchUpload": lambda: exceptions.UploadNotRegisteredError( - upload_id=self._upload_id - ) - }, - } - ResponseExceptionTranslator(spec=spec).handle(response=response) - raise exceptions.BadResponseCodeError(url=url, response_code=status_code) - - return response.json() - - async def patch_multipart_upload(self, *, upload_status: UploadStatus) -> None: - """ - Set the status of a specific upload attempt. - The API accepts "uploaded" or "accepted", - if the upload_id is currently set to "pending" - """ - if not self._upload_id: - raise exceptions.UploadIdUnsetError() - - # build url and headers - url = f"{self._api_url}/uploads/{self._upload_id}" - headers = {"Accept": "*/*", "Content-Type": "application/json"} - post_data = {"status": upload_status} - serialized_data = json.dumps(post_data) - - try: - response = await self._client.patch( - url=url, headers=headers, content=serialized_data - ) - except httpx.RequestError as request_error: - exceptions.raise_if_connection_failed(request_error=request_error, url=url) - raise exceptions.RequestFailedError(url=url) from request_error - - status_code = response.status_code - if status_code != 204: - spec = { - 400: { - "uploadNotPending": lambda: exceptions.CantChangeUploadStatusError( - upload_id=self._upload_id, upload_status=upload_status - ), - "uploadStatusChange": lambda: exceptions.CantChangeUploadStatusError( - upload_id=self._upload_id, upload_status=upload_status - ), - }, - 403: { - "noFileAccess": lambda: exceptions.NoUploadAccessError( - upload_id=self._upload_id - ) - }, - 404: { - "noSuchUpload": lambda: exceptions.UploadNotRegisteredError( - upload_id=self._upload_id - ) - }, - } - ResponseExceptionTranslator(spec=spec).handle(response=response) - raise exceptions.BadResponseCodeError(url=url, response_code=status_code) - - async def upload_file_part(self, *, presigned_url: str, part: bytes) -> None: - """Upload File""" - try: - response = await self._client.put(presigned_url, content=part) - except httpx.RequestError as request_error: - exceptions.raise_if_connection_failed( - request_error=request_error, url=presigned_url - ) - raise exceptions.RequestFailedError(url=presigned_url) from request_error - - status_code = response.status_code - if status_code == 200: - return - - raise exceptions.BadResponseCodeError( - url=presigned_url, response_code=status_code - ) diff --git a/src/ghga_connector/core/work_package.py b/src/ghga_connector/core/work_package.py index d11c69d..cddbdc6 100644 --- a/src/ghga_connector/core/work_package.py +++ b/src/ghga_connector/core/work_package.py @@ -22,6 +22,8 @@ from ghga_service_commons.utils.crypt import decrypt from tenacity import RetryError +from ghga_connector.constants import CACHE_MIN_FRESH + from . import exceptions, retry_handler @@ -94,7 +96,12 @@ async def get_work_order_token(self, *, file_id: str) -> str: url = f"{self.api_url}/work-packages/{self.package_id}/files/{file_id}/work-order-tokens" # send authorization header as bearer token - headers = httpx.Headers({"Authorization": f"Bearer {self.access_token}"}) + headers = httpx.Headers( + { + "Authorization": f"Bearer {self.access_token}", + "Cache-Control": f"min-fresh={CACHE_MIN_FRESH}", + } + ) response = await self._call_url(fn=self.client.post, headers=headers, url=url) status_code = response.status_code diff --git a/tests/fixtures/mock_api/app.py b/tests/fixtures/mock_api/app.py index 8c9feac..f45270e 100644 --- a/tests/fixtures/mock_api/app.py +++ b/tests/fixtures/mock_api/app.py @@ -23,21 +23,23 @@ import base64 import json -import logging import os from datetime import datetime +from email.utils import format_datetime from enum import Enum -from typing import Literal, Union +from typing import Annotated, Literal import httpx -from fastapi import HTTPException, status -from ghga_service_commons.api.mock_router import MockRouter +import pytest +from fastapi import Depends, FastAPI, HTTPException, Request, Response, status +from fastapi.responses import JSONResponse +from ghga_service_commons.api.api import ApiConfigBase, configure_app +from ghga_service_commons.api.di import DependencyDummy from ghga_service_commons.httpyexpect.server.exceptions import HttpException from ghga_service_commons.utils.utc_dates import now_as_utc from pydantic import BaseModel -logger = logging.getLogger() -logger.setLevel(logging.INFO) +from ghga_connector.core.client import get_cache_transport class UploadStatus(str, Enum): @@ -63,12 +65,6 @@ class StatePost(BaseModel): file_id: str -class PresignedPostURL(BaseModel): - """Model containing an url""" - - url: str - - class Checksum(BaseModel): """A Checksum as per the DRS OpenApi specs.""" @@ -129,7 +125,7 @@ class DrsObjectServe(BaseModel): access_methods: list[AccessMethod] -class HttpEnvelopeResponse(httpx.Response): +class HttpEnvelopeResponse(Response): """Return base64 encoded envelope bytes""" response_id = "envelope" @@ -139,49 +135,50 @@ def __init__(self, *, envelope: str, status_code: int = 200): super().__init__(content=envelope, status_code=status_code) -def exception_handler(request: httpx.Request, exc: Union[HttpException, HTTPException]): - """Transform HttpException data into a proper response object""" - status_code = exc.status_code +def create_caching_headers(expires_after: int = 60) -> dict[str, str]: + """Return headers used in responses for caching by `hishel`""" + cache_control_header = ("Cache-Control", f"max-age={expires_after}, private") + date_header = ("date", format_datetime(now_as_utc())) + return {k: v for k, v in [cache_control_header, date_header]} - if isinstance(exc, HTTPException): - return httpx.Response(status_code=status_code, json={"detail": exc.detail}) - # if exception is HttpException - return httpx.Response( - status_code=status_code, - content=json.dumps( - { - "exception_id": exc.body.exception_id, - "description": exc.body.description, - "data": exc.body.data, - } - ).encode("utf-8"), - ) +mock_external_app = FastAPI() +url_expires_after = DependencyDummy("url_expires_after") +UrlLifespan = Annotated[int, Depends(url_expires_after)] -router = MockRouter( - exception_handler=exception_handler, - exceptions_to_handle=(HTTPException, HttpException), -) +async def update_presigned_url_placeholder(): + """Placeholder function to generate a new S3 download URL. + Patch this function only via `set_presigned_url_update_endpoint`. -@router.get("/") -def ready(): + This is stand-in logic for how the download controller creates a pre-signed + S3 download URL when its `/objects/{file_id}` endpoint is called. + """ + raise NotImplementedError() + + +@mock_external_app.get("/") +async def ready(): """Readiness probe.""" - return httpx.Response(status_code=status.HTTP_204_NO_CONTENT) + return Response(status_code=status.HTTP_204_NO_CONTENT) -@router.get("/health") -def health(): +@mock_external_app.get("/health") +async def health(): """Used to test if this service is alive""" - return httpx.Response( + return Response( status_code=status.HTTP_200_OK, content=json.dumps({"status": "OK"}) ) -@router.get("/objects/{file_id}") -def drs3_objects(file_id: str, request: httpx.Request): - """Mock for the drs3 /objects/{file_id} call""" +@mock_external_app.get("/objects/{file_id}") +async def drs3_objects(file_id: str, request: Request, expires_after: UrlLifespan): + """Mock for the drs3 /objects/{file_id} call. + + The `url_expires_after` parameter is an app dependency that is overridden by tests + that use this mock api. + """ # get authorization header authorization = request.headers["authorization"] @@ -202,13 +199,16 @@ def drs3_objects(file_id: str, request: httpx.Request): ) if file_id == "retry": - return httpx.Response( - status_code=status.HTTP_202_ACCEPTED, headers={"Retry-After": "10"} + return Response( + status_code=status.HTTP_202_ACCEPTED, + headers={"Retry-After": "10", "Cache-Control": "no-store"}, ) if file_id in ("downloadable", "big-downloadable", "envelope-missing"): - return httpx.Response( + await update_presigned_url_placeholder() + return Response( status_code=200, + headers=create_caching_headers(expires_after=expires_after), content=DrsObjectServe( file_id=file_id, self_uri=f"drs://localhost:8080//{file_id}", @@ -231,13 +231,15 @@ def drs3_objects(file_id: str, request: httpx.Request): ) -@router.get("/objects/{file_id}/envelopes") -def drs3_objects_envelopes(file_id: str): +@mock_external_app.get("/objects/{file_id}/envelopes") +async def drs3_objects_envelopes(file_id: str): """Mock for the dcs /objects/{file_id}/envelopes call""" if file_id in ("downloadable", "big-downloadable"): response_str = str.encode(os.environ["FAKE_ENVELOPE"]) envelope = base64.b64encode(response_str).decode("utf-8") - return HttpEnvelopeResponse(envelope=envelope) + response = HttpEnvelopeResponse(envelope=envelope) + response.headers["Cache-Control"] = "no-store" + return response raise HttpException( status_code=404, @@ -247,8 +249,8 @@ def drs3_objects_envelopes(file_id: str): ) -@router.get("/files/{file_id}") -def ulc_get_files(file_id: str): +@mock_external_app.get("/files/{file_id}") +async def ulc_get_files(file_id: str): """Mock for the ulc GET /files/{file_id} call.""" if file_id == "pending": return FileProperties( @@ -271,11 +273,11 @@ def ulc_get_files(file_id: str): ) -@router.get("/uploads/{upload_id}") -def ulc_get_uploads(upload_id: str): +@mock_external_app.get("/uploads/{upload_id}") +async def ulc_get_uploads(upload_id: str): """Mock for the ulc GET /uploads/{upload_id} call.""" if upload_id == "pending": - return httpx.Response( + return Response( status_code=200, content=UploadProperties( upload_id="pending", @@ -292,16 +294,16 @@ def ulc_get_uploads(upload_id: str): ) -@router.post("/uploads") -def ulc_post_files_uploads(request: httpx.Request): +@mock_external_app.post("/uploads") +async def ulc_post_files_uploads(request: Request): """Mock for the ulc POST /uploads call.""" - content = json.loads(request.content) + content = json.loads(await request.body()) state: StatePost = StatePost(**content) file_id = state.file_id if file_id == "uploadable": - return httpx.Response( + return Response( status_code=200, content=UploadProperties( upload_id="pending", @@ -310,7 +312,7 @@ def ulc_post_files_uploads(request: httpx.Request): ).model_dump_json(), ) if file_id == "uploadable-16": - return httpx.Response( + return Response( status_code=200, content=UploadProperties( upload_id="pending", @@ -320,7 +322,7 @@ def ulc_post_files_uploads(request: httpx.Request): ) if file_id == "uploadable-8": - return httpx.Response( + return Response( status_code=200, content=UploadProperties( upload_id="pending", @@ -344,14 +346,14 @@ def ulc_post_files_uploads(request: httpx.Request): ) -@router.post("/uploads/{upload_id}/parts/{part_no}/signed_urls") -def ulc_post_uploads_parts_files_signed_posts(upload_id: str, part_no: int): +@mock_external_app.post("/uploads/{upload_id}/parts/{part_no}/signed_urls") +async def ulc_post_uploads_parts_files_signed_posts(upload_id: str, part_no: int): """Mock for the ulc POST /uploads/{upload_id}/parts/{part_no}/signed_urls call.""" if upload_id == "pending": if part_no in (1, 2): urls = (os.environ["S3_UPLOAD_URL_1"], os.environ["S3_UPLOAD_URL_2"]) - return httpx.Response( - status_code=200, text=json.dumps({"url": urls[part_no - 1]}) + return Response( + status_code=200, content=json.dumps({"url": urls[part_no - 1]}) ) raise HttpException( @@ -362,16 +364,16 @@ def ulc_post_uploads_parts_files_signed_posts(upload_id: str, part_no: int): ) -@router.patch("/uploads/{upload_id}") -def ulc_patch_uploads(upload_id: str, request: httpx.Request): +@mock_external_app.patch("/uploads/{upload_id}") +async def ulc_patch_uploads(upload_id: str, request: Request): """Mock for the ulc PATCH /uploads/{upload_id} call""" - content = json.loads(request.content) + content = json.loads(await request.body()) state: StatePatch = StatePatch(**content) upload_status = state.status if upload_id == "uploaded": if upload_status == UploadStatus.CANCELLED: - return httpx.Response(status_code=status.HTTP_204_NO_CONTENT) + return Response(status_code=status.HTTP_204_NO_CONTENT) raise HttpException( status_code=400, @@ -382,7 +384,7 @@ def ulc_patch_uploads(upload_id: str, request: httpx.Request): if upload_id == "pending": if upload_status == UploadStatus.UPLOADED: - return httpx.Response(status_code=status.HTTP_204_NO_CONTENT) + return Response(status_code=status.HTTP_204_NO_CONTENT) raise HttpException( status_code=400, @@ -407,24 +409,32 @@ def ulc_patch_uploads(upload_id: str, request: httpx.Request): ) -@router.post("/work-packages/{package_id}/files/{file_id}/work-order-tokens") -def create_work_order_token(package_id: str, file_id: str): - """Mock Work Order Token endpoint""" +@mock_external_app.post("/work-packages/{package_id}/files/{file_id}/work-order-tokens") +async def create_work_order_token(package_id: str, file_id: str): + """Mock Work Order Token endpoint. + + Cached response will be valid for 5 seconds for testing purposes. + Since client requests (should) use the min-fresh cache-control header value of 3 + seconds, the cached responses will be used for 2 seconds before making new requests. + """ # has to be at least 48 chars long - return httpx.Response( + headers = create_caching_headers(expires_after=5) + return JSONResponse( status_code=201, - json=base64.b64encode(b"1234567890" * 5).decode(), + content=base64.b64encode(b"1234567890" * 5).decode(), + headers=headers, ) -@router.get("/values/{value_name}") -def mock_wkvs(value_name: str): +@mock_external_app.get("/values/{value_name}") +async def mock_wkvs(value_name: str): """Mock the WKVS /values/value_name endpoint""" + api_url = "http://127.0.0.1" values: dict[str, str] = { "crypt4gh_public_key": "qx5g31H7rdsq7sgkew9ElkLIXvBje4RxDVcAHcJD8XY=", - "wps_api_url": "http://127.0.0.1/wps", - "dcs_api_url": "http://127.0.0.1/download", - "ucs_api_url": "http://127.0.0.1/upload", + "wps_api_url": api_url, + "dcs_api_url": api_url, + "ucs_api_url": api_url, } if value_name not in values: @@ -434,4 +444,29 @@ def mock_wkvs(value_name: str): description=f"The value {value_name} is not configured.", data={"value_name": value_name}, ) - return httpx.Response(status_code=200, json={value_name: values[value_name]}) + + return JSONResponse(status_code=200, content={value_name: values[value_name]}) + + +config = ApiConfigBase() +configure_app(mock_external_app, config) + + +def get_test_mounts(): + """Test-only version of `async_client` to route traffic to the specified app. + + Lets other traffic go out as usual, e.g. to the S3 testcontainer, while still using + the same caching logic as the real client. + """ + mock_app_transport = get_cache_transport(httpx.ASGITransport(app=mock_external_app)) + mounts = { + "all://127.0.0.1": mock_app_transport, # route traffic to the mock app + "all://host.docker.internal": get_cache_transport(), # let S3 traffic go out + } + return mounts + + +@pytest.fixture(scope="function") +def mock_external_calls(monkeypatch): + """Monkeypatch the async_client so it only intercepts calls to the mock app""" + monkeypatch.setattr("ghga_connector.core.client.get_mounts", get_test_mounts) diff --git a/tests/integration/test_cli.py b/tests/integration/test_cli.py index 90fc1f9..d9b2606 100644 --- a/tests/integration/test_cli.py +++ b/tests/integration/test_cli.py @@ -19,11 +19,10 @@ import base64 import os import pathlib -import re from contextlib import nullcontext from filecmp import cmp from pathlib import Path -from typing import Union +from typing import Any from unittest.mock import AsyncMock, patch import crypt4gh.keys @@ -38,12 +37,17 @@ retrieve_upload_parameters, ) from ghga_connector.constants import DEFAULT_PART_SIZE -from ghga_connector.core import async_client, exceptions +from ghga_connector.core import exceptions +from ghga_connector.core.client import async_client from ghga_connector.core.crypt import Crypt4GHEncryptor from ghga_connector.core.main import upload_file from tests.fixtures import state from tests.fixtures.config import get_test_config -from tests.fixtures.mock_api.app import router +from tests.fixtures.mock_api.app import ( + mock_external_app, + mock_external_calls, # noqa: F401 + url_expires_after, +) from tests.fixtures.s3 import ( # noqa: F401 S3Fixture, get_big_s3_object, @@ -52,8 +56,9 @@ ) from tests.fixtures.utils import PRIVATE_KEY_FILE, PUBLIC_KEY_FILE, mock_wps_token -URL_PATTERN = re.compile(r"^https?://127\.0\.0\.1.*") - +GET_PACKAGE_FILES_ATTR = ( + "ghga_connector.core.work_package.WorkPackageAccessor.get_package_files" +) ENVIRON_DEFAULTS = { "DEFAULT_PART_SIZE": str(16 * 1024 * 1024), "S3_DOWNLOAD_URL": "test://download.url", @@ -62,8 +67,8 @@ "S3_DOWNLOAD_FIELD_SIZE": str(146), "FAKE_ENVELOPE": "Fake_envelope", } - -unintercepted_hosts: list[str] = [] +FAKE_ENVELOPE = "Thisisafakeenvelope" +SHORT_LIFESPAN = 10 pytestmark = [ pytest.mark.asyncio, @@ -71,18 +76,68 @@ assert_all_responses_were_requested=False, assert_all_requests_were_expected=False, can_send_already_matched_responses=True, - should_mock=lambda request: request.url.host not in unintercepted_hosts, + should_mock=lambda request: str(request.url).endswith("/health"), ), ] -def wkvs_method_mock(value: str): - """Dummy to patch WVKS method""" +@pytest.fixture(scope="function", autouse=True) +def set_env_vars(monkeypatch): + """Set environment variables""" + for name, value in ENVIRON_DEFAULTS.items(): + monkeypatch.setenv(name, value) + + +@pytest.fixture(scope="function", autouse=True) +def apply_test_config(): + """Apply default test config""" + with patch("ghga_connector.cli.CONFIG", get_test_config()): + yield + + +@pytest.fixture(scope="function") +def apply_common_download_mocks(monkeypatch): + """Monkeypatch download-specific functions and values""" + monkeypatch.setattr("ghga_connector.cli.get_wps_token", mock_wps_token) + monkeypatch.setattr( + "ghga_connector.core.work_package._decrypt", + lambda data, key: data, + ) + monkeypatch.setenv("FAKE_ENVELOPE", FAKE_ENVELOPE) + + +def set_presigned_url_update_endpoint( + monkeypatch, + s3_fixture: S3Fixture, # noqa: F811 + *, + bucket_id: str, + object_id: str, + expires_after: int, +): + """Temporarily assign the S3 download URL update endpoint in the mock app. + + Since creating the URL requires access to the S3 fixture, this behavior is + defined here instead of with the rest of the mock api. + """ + + async def update_presigned_url_actual(): + """Create a new presigned download URL for S3.""" + download_url = await s3_fixture.storage.get_object_download_url( + bucket_id=bucket_id, + object_id=object_id, + expires_after=expires_after, + ) + + monkeypatch.setenv("S3_DOWNLOAD_URL", download_url) - async def inner(self): - return value + # Monkeypatch the placeholder endpoint function with the above + monkeypatch.setattr( + "tests.fixtures.mock_api.app.update_presigned_url_placeholder", + update_presigned_url_actual, + ) - return inner + # Override the app dependency so it uses the new cache lifespan + mock_external_app.dependency_overrides[url_expires_after] = lambda: expires_after @pytest.mark.parametrize( @@ -102,70 +157,57 @@ async def inner(self): (20 * 1024 * 1024, 1 * 1024 * 1024), (20 * 1024 * 1024, 64 * 1024), (1 * 1024 * 1024, DEFAULT_PART_SIZE), - (20 * 1024 * 1024, DEFAULT_PART_SIZE), + (75 * 1024 * 1024, 10 * 1024 * 1024), ], ) async def test_multipart_download( - httpx_mock: HTTPXMock, # noqa: F811 file_size: int, part_size: int, + httpx_mock: HTTPXMock, # noqa: F811 s3_fixture: S3Fixture, # noqa F811 tmp_path: pathlib.Path, monkeypatch, + mock_external_calls, # noqa: F811 + apply_common_download_mocks, ): """Test the multipart download of a file""" - httpx_mock.add_callback(callback=router.handle_request) - for name, value in ENVIRON_DEFAULTS.items(): - monkeypatch.setenv(name, value) + # override the default config fixture with updated part size + monkeypatch.setattr( + "ghga_connector.cli.CONFIG", get_test_config(part_size=part_size) + ) big_object = await get_big_s3_object(s3_fixture, object_size=file_size) - # The download function will ask the user for input. - monkeypatch.setattr("ghga_connector.cli.get_wps_token", mock_wps_token) - monkeypatch.setattr( - "ghga_connector.core.work_package.WorkPackageAccessor.get_package_files", - AsyncMock(return_value=dict(zip([big_object.object_id], [""]))), - ) + # The intercepted health check API calls will return the following mock response + httpx_mock.add_response(json={"status": "OK"}) + + # Patch get_package_files monkeypatch.setattr( - "ghga_connector.core.work_package._decrypt", - lambda data, key: data, + GET_PACKAGE_FILES_ATTR, + AsyncMock(return_value={big_object.object_id: ""}), ) # right now the desired file size is only # approximately met by the provided big file: actual_file_size = len(big_object.content) + monkeypatch.setenv("S3_DOWNLOAD_FIELD_SIZE", str(actual_file_size)) - # get s3 download url - download_url = await s3_fixture.storage.get_object_download_url( + set_presigned_url_update_endpoint( + monkeypatch, + s3_fixture, bucket_id=big_object.bucket_id, object_id=big_object.object_id, - expires_after=180, + expires_after=SHORT_LIFESPAN, ) - unintercepted_hosts.append(httpx.URL(download_url).host) - - fake_envelope = "Thisisafakeenvelope" - - monkeypatch.setenv("S3_DOWNLOAD_URL", download_url) - monkeypatch.setenv("S3_DOWNLOAD_FIELD_SIZE", str(actual_file_size)) - monkeypatch.setenv("FAKE_ENVELOPE", fake_envelope) - big_file_content = str.encode(fake_envelope) + big_file_content = str.encode(FAKE_ENVELOPE) big_file_content += big_object.content - api_url = "http://127.0.0.1" - - with patch( - "ghga_connector.cli.CONFIG", - get_test_config( - download_api=api_url, - part_size=part_size, - wps_api_url=api_url, - ), - ): - await async_download( - output_dir=tmp_path, - my_public_key_path=Path(PUBLIC_KEY_FILE), - my_private_key_path=Path(PRIVATE_KEY_FILE), - ) + + await async_download( + output_dir=tmp_path, + my_public_key_path=Path(PUBLIC_KEY_FILE), + my_private_key_path=Path(PRIVATE_KEY_FILE), + ) with open(tmp_path / f"{big_object.object_id}.c4gh", "rb") as file: observed_content = file.read() @@ -175,179 +217,177 @@ async def test_multipart_download( @pytest.mark.parametrize( - "bad_url,bad_outdir,file_name,expected_exception,proceed_on_missing", + "bad_outdir,file_name,expected_exception", [ - (True, False, "file_downloadable", exceptions.ApiNotReachableError, True), - (False, False, "file_downloadable", None, True), - (False, False, "file_not_downloadable", None, True), + (False, "file_downloadable", nullcontext()), + (False, "file_retry", pytest.raises(exceptions.MaxWaitTimeExceededError)), ( - False, - False, - "file_not_downloadable", - exceptions.AbortBatchProcessError, - False, - ), - (False, False, "file_retry", exceptions.MaxWaitTimeExceededError, True), - (False, True, "file_downloadable", exceptions.DirectoryDoesNotExistError, True), - ( - False, - False, - "file_envelope_missing", - exceptions.GetEnvelopeError, True, + "file_downloadable", + pytest.raises(exceptions.DirectoryDoesNotExistError), ), + (False, "file_envelope_missing", pytest.raises(exceptions.GetEnvelopeError)), ], ) async def test_download( - httpx_mock: HTTPXMock, # noqa: F811 - bad_url: bool, bad_outdir: bool, file_name: str, - expected_exception: type[Union[Exception, None]], + expected_exception: Any, + httpx_mock: HTTPXMock, # noqa: F811 s3_fixture: S3Fixture, # noqa: F811 tmp_path: pathlib.Path, - proceed_on_missing: bool, monkeypatch, + mock_external_calls, # noqa: F811 + apply_common_download_mocks, ): """Test the download of a file""" output_dir = Path("/non/existing/path") if bad_outdir else tmp_path + # Patch get_package_files file = state.FILES[file_name] - - # Intercept requests sent with httpx - httpx_mock.add_callback(callback=router.handle_request, url=URL_PATTERN) - for name, value in ENVIRON_DEFAULTS.items(): - monkeypatch.setenv(name, value) - - # The download function will ask the user for input. - monkeypatch.setattr("ghga_connector.cli.get_wps_token", mock_wps_token) - monkeypatch.setattr( - "ghga_connector.core.work_package.WorkPackageAccessor.get_package_files", - AsyncMock(return_value=dict(zip([file.file_id], [""]))), - ) monkeypatch.setattr( - "ghga_connector.core.work_package._decrypt", - lambda data, key: data, + GET_PACKAGE_FILES_ATTR, + AsyncMock(return_value={file.file_id: ""}), ) if file.populate_storage: - download_url = await s3_fixture.storage.get_object_download_url( + set_presigned_url_update_endpoint( + monkeypatch, + s3_fixture, bucket_id=file.grouping_label, object_id=file.file_id, - expires_after=60, + expires_after=SHORT_LIFESPAN, ) - else: - download_url = "" - unintercepted_hosts.append(httpx.URL(download_url).host) + monkeypatch.setenv("S3_DOWNLOAD_URL", "") + + monkeypatch.setenv("S3_DOWNLOAD_FIELD_SIZE", str(os.path.getsize(file.file_path))) + + # The intercepted health check API calls will return the following mock response + httpx_mock.add_response(json={"status": "OK"}) + + with expected_exception: + await async_download( + output_dir=output_dir, + my_public_key_path=Path(PUBLIC_KEY_FILE), + my_private_key_path=Path(PRIVATE_KEY_FILE), + ) + + tmp_file = tmp_path / "file_with_envelope" + + # Copy fake envelope into new temp file, then append the test file + with tmp_file.open("wb") as file_write: + with file.file_path.open("rb") as file_read: + buffer = file_read.read() + file_write.write(str.encode(FAKE_ENVELOPE)) + file_write.write(buffer) + + if not expected_exception: + assert cmp(output_dir / f"{file.file_id}.c4gh", tmp_file) + + +async def test_file_not_downloadable( + httpx_mock: HTTPXMock, # noqa: F811 + s3_fixture: S3Fixture, # noqa: F811 + tmp_path: pathlib.Path, + monkeypatch, + mock_external_calls, # noqa: F811 + apply_common_download_mocks, +): + """Test to try downloading a file that isn't in storage. + + Tests for 403 error behavior as well as the case where an expected file ID is + reported missing by the download controller API (and the user chooses not to + continue the download). + """ + output_dir = tmp_path + + # The intercepted health check API calls will return the following mock response + httpx_mock.add_response(json={"status": "OK"}) - fake_envelope = "Thisisafakeenvelope" + # Patch get_package_files + file = state.FILES["file_not_downloadable"] + monkeypatch.setattr( + GET_PACKAGE_FILES_ATTR, + AsyncMock(return_value={file.file_id: ""}), + ) - monkeypatch.setenv("S3_DOWNLOAD_URL", download_url) monkeypatch.setenv("S3_DOWNLOAD_FIELD_SIZE", str(os.path.getsize(file.file_path))) - monkeypatch.setenv("FAKE_ENVELOPE", fake_envelope) - api_url = "http://bad_url" if bad_url else "http://127.0.0.1" - for wkvs_method in ["get_wps_api_url", "get_dcs_api_url"]: - monkeypatch.setattr( - f"ghga_connector.core.api_calls.well_knowns.WKVSCaller.{wkvs_method}", - wkvs_method_mock(api_url), + # 403 caused by an invalid auth token + with ( + patch( + "ghga_connector.core.work_package._decrypt", + lambda data, key: "authfail_normal", + ), + pytest.raises( + exceptions.UnauthorizedAPICallError, + match="This is not the token you're looking for.", + ), + ): + await async_download( + output_dir=output_dir, + my_public_key_path=Path(PUBLIC_KEY_FILE), + my_private_key_path=Path(PRIVATE_KEY_FILE), ) - with patch( - "ghga_connector.cli.CONFIG", - get_test_config(), + + # 403 caused by requesting file ID that's not part of the work order token + with ( + patch( + "ghga_connector.core.work_package._decrypt", + lambda data, key: "file_id_mismatch", + ), + pytest.raises( + exceptions.UnauthorizedAPICallError, + match="Endpoint file ID did not match file ID" + " announced in work order token", + ), ): - # needed to mock user input - with patch( + await async_download( + output_dir=output_dir, + my_public_key_path=Path(PUBLIC_KEY_FILE), + my_private_key_path=Path(PRIVATE_KEY_FILE), + ) + + # Exception arising when the file ID is valid, but not found in the DCS (and the + # user inputs 'no' instead of 'yes' when prompted if they want to continue anyway) + with ( + patch( "ghga_connector.core.downloading.batch_processing.CliInputHandler.get_input", - return_value="yes" if proceed_on_missing else "no", - ): - if file_name == "file_not_downloadable": - # check both 403 scenarios - with patch( - "ghga_connector.core.work_package._decrypt", - lambda data, key: "authfail_normal", - ): - with pytest.raises( - exceptions.UnauthorizedAPICallError, - match="This is not the token you're looking for.", - ): - await async_download( - output_dir=output_dir, - my_public_key_path=Path(PUBLIC_KEY_FILE), - my_private_key_path=Path(PRIVATE_KEY_FILE), - ) - with patch( - "ghga_connector.core.work_package._decrypt", - lambda data, key: "file_id_mismatch", - ): - with pytest.raises( - exceptions.UnauthorizedAPICallError, - match="Endpoint file ID did not match file ID" - " announced in work order token", - ): - await async_download( - output_dir=output_dir, - my_public_key_path=Path(PUBLIC_KEY_FILE), - my_private_key_path=Path(PRIVATE_KEY_FILE), - ) - else: - with ( - pytest.raises( - expected_exception # type: ignore - ) - if expected_exception - else nullcontext() - ): - await async_download( - output_dir=output_dir, - my_public_key_path=Path(PUBLIC_KEY_FILE), - my_private_key_path=Path(PRIVATE_KEY_FILE), - ) - - # BadResponseCode is no longer propagated and file at path does not exist - if file_name == "file_not_downloadable": - return - - tmp_file = tmp_path / "file_with_envelope" - - # Copy fake envelope into new temp file, then append the test file - with tmp_file.open("wb") as file_write: - with file.file_path.open("rb") as file_read: - buffer = file_read.read() - file_write.write(str.encode(fake_envelope)) - file_write.write(buffer) - - if not expected_exception: - assert cmp(output_dir / f"{file.file_id}.c4gh", tmp_file) + return_value="no", + ), + pytest.raises(exceptions.AbortBatchProcessError), + ): + await async_download( + output_dir=output_dir, + my_public_key_path=Path(PUBLIC_KEY_FILE), + my_private_key_path=Path(PRIVATE_KEY_FILE), + ) @pytest.mark.parametrize( - "bad_url,file_name,expected_exception", + "file_name,expected_exception", [ - (True, "file_uploadable", exceptions.ApiNotReachableError), - (False, "file_uploadable", None), - (False, "file_not_uploadable", exceptions.StartUploadError), - (False, "file_with_bad_path", exceptions.FileDoesNotExistError), - (False, "encrypted_file", exceptions.FileAlreadyEncryptedError), + ("file_uploadable", nullcontext()), + ("file_not_uploadable", pytest.raises(exceptions.StartUploadError)), + ("file_with_bad_path", pytest.raises(exceptions.FileDoesNotExistError)), + ("encrypted_file", pytest.raises(exceptions.FileAlreadyEncryptedError)), ], ) async def test_upload( - httpx_mock: HTTPXMock, # noqa: F811 - bad_url: bool, file_name: str, - expected_exception: type[Union[Exception, None]], + expected_exception: Any, + httpx_mock: HTTPXMock, # noqa: F811 s3_fixture: S3Fixture, # noqa F811 monkeypatch, + mock_external_calls, # noqa: F811 tmpdir, ): """Test the upload of a file, expects Abort, if the file was not found""" uploadable_file = state.FILES[file_name] - # Intercept requests sent with httpx - httpx_mock.add_callback(callback=router.handle_request, url=URL_PATTERN) - for name, value in ENVIRON_DEFAULTS.items(): - monkeypatch.setenv(name, value) + # The intercepted health check API calls will return the following mock response + httpx_mock.add_response(json={"status": "OK"}) if file_name == "encrypted_file": # encrypt test file on the fly @@ -381,20 +421,10 @@ async def test_upload( upload_id=upload_id, part_number=1, ) - unintercepted_hosts.append(httpx.URL(upload_url).host) monkeypatch.setenv("S3_UPLOAD_URL_1", upload_url) - api_url = "http://bad_url" if bad_url else "http://127.0.0.1" - monkeypatch.setattr( - "ghga_connector.core.api_calls.well_knowns.WKVSCaller.get_ucs_api_url", - wkvs_method_mock(api_url), - ) - - with ( - patch("ghga_connector.cli.CONFIG", get_test_config()), - pytest.raises(expected_exception) if expected_exception else nullcontext(), # type: ignore - ): + with expected_exception: message_display = init_message_display(debug=True) async with async_client() as client: parameters = await retrieve_upload_parameters(client=client) @@ -430,20 +460,19 @@ async def test_upload( ], ) async def test_multipart_upload( - httpx_mock: HTTPXMock, # noqa: F811 file_size: int, anticipated_part_size: int, + httpx_mock: HTTPXMock, # noqa: F811 s3_fixture: S3Fixture, # noqa F811 monkeypatch, + mock_external_calls, # noqa: F811 ): """Test the upload of a file, expects Abort, if the file was not found""" bucket_id = s3_fixture.existing_buckets[0] file_id = "uploadable-" + str(anticipated_part_size) - # Intercept requests sent with httpx - httpx_mock.add_callback(callback=router.handle_request, url=URL_PATTERN) - for name, value in ENVIRON_DEFAULTS.items(): - monkeypatch.setenv(name, value) + # The intercepted health check API calls will return the following mock response + httpx_mock.add_response(json={"status": "OK"}) anticipated_part_size = anticipated_part_size * 1024 * 1024 @@ -465,7 +494,6 @@ async def test_multipart_upload( object_id=file_id, part_number=1, ) - unintercepted_hosts.append(httpx.URL(upload_url_1).host) # create presigned url for upload part 2 upload_url_2 = await s3_fixture.storage.get_part_upload_url( @@ -474,25 +502,12 @@ async def test_multipart_upload( object_id=file_id, part_number=2, ) - unintercepted_hosts.append(httpx.URL(upload_url_2).host) monkeypatch.setenv("S3_UPLOAD_URL_1", upload_url_1) monkeypatch.setenv("S3_UPLOAD_URL_2", upload_url_2) - api_url = "http://127.0.0.1" - # create big temp file - monkeypatch.setattr( - "ghga_connector.core.api_calls.well_knowns.WKVSCaller.get_ucs_api_url", - wkvs_method_mock(api_url), - ) - with ( - big_temp_file(file_size) as file, - patch( - "ghga_connector.cli.CONFIG", - get_test_config(), - ), - ): + with big_temp_file(file_size) as file: message_display = init_message_display(debug=True) async with async_client() as client: parameters = await retrieve_upload_parameters(client=client) @@ -520,3 +535,53 @@ async def test_multipart_upload( bucket_id=bucket_id, object_id=file_id, ) + + +async def test_upload_bad_url(httpx_mock: HTTPXMock, mock_external_calls): # noqa: F811 + """Check that the right error is raised for a bad URL in the upload logic.""" + # The intercepted health check API call will return the following mock response + httpx_mock.add_exception(httpx.RequestError("")) + + uploadable_file = state.FILES["file_uploadable"] + file_path = uploadable_file.file_path.resolve() + + with pytest.raises(exceptions.ApiNotReachableError): + message_display = init_message_display(debug=True) + async with async_client() as client: + parameters = await retrieve_upload_parameters(client=client) + await upload_file( + api_url=parameters.ucs_api_url, + client=client, + file_id=uploadable_file.file_id, + file_path=file_path, + message_display=message_display, + server_public_key=parameters.server_pubkey, + my_public_key_path=Path(PUBLIC_KEY_FILE), + my_private_key_path=Path(PRIVATE_KEY_FILE), + part_size=DEFAULT_PART_SIZE, + ) + + +async def test_download_bad_url( + httpx_mock: HTTPXMock, # noqa: F811 + tmp_path: pathlib.Path, + monkeypatch, + mock_external_calls, # noqa: F811 + apply_common_download_mocks, +): + """Check that the right error is raised for a bad URL in the download logic.""" + httpx_mock.add_exception(httpx.RequestError("")) + + # Patch get_package_files + file = state.FILES["file_downloadable"] + monkeypatch.setattr( + GET_PACKAGE_FILES_ATTR, + AsyncMock(return_value={file.file_id: ""}), + ) + + with pytest.raises(exceptions.ApiNotReachableError): + await async_download( + output_dir=tmp_path, + my_public_key_path=Path(PUBLIC_KEY_FILE), + my_private_key_path=Path(PRIVATE_KEY_FILE), + ) diff --git a/tests/integration/test_file_operations.py b/tests/integration/test_file_operations.py index 019e3e5..c89169c 100644 --- a/tests/integration/test_file_operations.py +++ b/tests/integration/test_file_operations.py @@ -17,7 +17,7 @@ """Test file operations""" from asyncio import create_task -from unittest.mock import Mock +from unittest.mock import AsyncMock, Mock import pytest @@ -30,6 +30,7 @@ ) from ghga_connector.core.downloading.downloader import Downloader, TaskHandler from ghga_connector.core.downloading.progress_bar import ProgressBar +from ghga_connector.core.downloading.structs import URLResponse from ghga_connector.core.exceptions import DownloadError from tests.fixtures.s3 import ( # noqa: F401 S3Fixture, @@ -109,6 +110,8 @@ async def test_download_file_parts( download_url = await s3_fixture.storage.get_object_download_url( object_id=big_object.object_id, bucket_id=big_object.bucket_id ) + url_response = URLResponse(download_url, total_file_size) + mock_fetch = AsyncMock(return_value=url_response) async with async_client() as client: # no work package accessor calls in download_file_parts, just mock for correct type @@ -122,12 +125,11 @@ async def test_download_file_parts( work_package_accessor=dummy_accessor, message_display=message_display, ) + downloader.fetch_download_url = mock_fetch # type: ignore task_handler = TaskHandler() for part_range in part_ranges: - task_handler.schedule( - downloader.download_to_queue(url=download_url, part_range=part_range) - ) + task_handler.schedule(downloader.download_to_queue(part_range=part_range)) file_path = tmp_path / "test.file" with ( @@ -157,18 +159,17 @@ async def test_download_file_parts( work_package_accessor=dummy_accessor, message_display=message_display, ) + downloader.fetch_download_url = mock_fetch # type: ignore task_handler = TaskHandler() part_ranges = calc_part_ranges( part_size=part_size, total_file_size=total_file_size ) task_handler.schedule( - downloader.download_to_queue( - url=download_url, part_range=PartRange(-10000, -1) - ) + downloader.download_to_queue(part_range=PartRange(-10000, -1)) ) task_handler.schedule( - downloader.download_to_queue(url=download_url, part_range=next(part_ranges)) + downloader.download_to_queue(part_range=next(part_ranges)) ) file_path = tmp_path / "test2.file" @@ -185,8 +186,13 @@ async def test_download_file_parts( ) ) with pytest.raises(DownloadError): - await task_handler.gather() - await dl_task + try: + await task_handler.gather() + except: + dl_task.cancel() + raise + else: + await dl_task # test exception at the end downloader = Downloader( @@ -197,6 +203,7 @@ async def test_download_file_parts( work_package_accessor=dummy_accessor, message_display=message_display, ) + downloader.fetch_download_url = mock_fetch # type: ignore task_handler = TaskHandler() part_ranges = calc_part_ranges( part_size=part_size, total_file_size=total_file_size @@ -205,15 +212,11 @@ async def test_download_file_parts( for idx, part_range in enumerate(part_ranges): if idx == len(part_ranges) - 1: # type: ignore task_handler.schedule( - downloader.download_to_queue( - url=download_url, part_range=PartRange(-10000, -1) - ) + downloader.download_to_queue(part_range=PartRange(-10000, -1)) ) else: task_handler.schedule( - downloader.download_to_queue( - url=download_url, part_range=part_range - ) + downloader.download_to_queue(part_range=part_range) ) file_path = tmp_path / "test3.file" @@ -230,5 +233,10 @@ async def test_download_file_parts( ) ) with pytest.raises(DownloadError): - await task_handler.gather() - await dl_task + try: + await task_handler.gather() + except: + dl_task.cancel() + raise + else: + await dl_task diff --git a/tests/unit/test_api_calls.py b/tests/unit/test_api_calls.py index af805f1..4f804e1 100644 --- a/tests/unit/test_api_calls.py +++ b/tests/unit/test_api_calls.py @@ -16,12 +16,15 @@ """Tests for API Calls""" +import asyncio +import base64 from contextlib import nullcontext from functools import partial from pathlib import Path from typing import Union from unittest.mock import Mock +import httpx import pytest from pytest_httpx import HTTPXMock @@ -29,10 +32,104 @@ from ghga_connector.core.api_calls.well_knowns import WKVSCaller from ghga_connector.core.uploading.structs import UploadStatus from ghga_connector.core.uploading.uploader import Uploader +from tests.fixtures.mock_api.app import create_caching_headers from tests.fixtures.utils import mock_wps_token +pytest.mark.httpx_mock( + assert_all_responses_were_requested=False, can_send_already_matched_responses=True +) +pytestmark = [ + pytest.mark.asyncio, + pytest.mark.httpx_mock( + assert_all_responses_were_requested=False, + can_send_already_matched_responses=True, + should_mock=lambda request: True, + ), +] +API_URL = "http://127.0.0.1" + + +class RecordingClient(httpx.AsyncClient): + """An `AsyncClient` wrapper that records responses.""" + + calls: list[httpx.Response] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.calls = [] + + async def _do_request(self, method: str, *args, **kwargs) -> httpx.Response: + """Wrap actual client calls so we can see which calls were cached vs not.""" + method_func = getattr(super(), method) + response = await method_func(*args, **kwargs) + self.calls.append(response) + return response + + def assert_last_call_from_cache(self): + """Assert that the last call was from the cache.""" + assert self.calls[-1].extensions["from_cache"] + + def assert_last_call_not_from_cache(self): + """Assert that the last call was not from the cache.""" + assert not self.calls[-1].extensions["from_cache"] + + async def get(self, *args, **kwargs) -> httpx.Response: + """Record GET calls.""" + return await self._do_request("get", *args, **kwargs) + + async def post(self, *args, **kwargs) -> httpx.Response: + """Record POST calls.""" + return await self._do_request("post", *args, **kwargs) + + +async def test_get_work_order_token_caching(monkeypatch, httpx_mock: HTTPXMock): + """Test the caching of call to the WPS to get a work order token. + + The `mock_external_calls` fixture will route HTTP requests to the mock API. + """ + # Patch the decrypt function so we don't need an actual token + monkeypatch.setattr( + "ghga_connector.core.work_package._decrypt", lambda data, key: data + ) + + # Patch the client to record calls + monkeypatch.setattr("ghga_connector.core.client.httpx.AsyncClient", RecordingClient) + async with async_client() as client: + assert isinstance(client, RecordingClient) + accessor = WorkPackageAccessor( + api_url=API_URL, + client=client, + dcs_api_url="", + my_private_key=b"", + my_public_key=b"", + access_token="", + package_id="wp_1", + ) + file_id = "file-id-1" + add_httpx_response = partial( + httpx_mock.add_response, + status_code=201, + json=base64.b64encode(b"1234567890" * 5).decode(), + headers=create_caching_headers(3), + ) + add_httpx_response() + await accessor.get_work_order_token(file_id=file_id) + + # Verify that the call was made + assert client.calls + client.assert_last_call_not_from_cache() + + # Make same call and verify that the response came from the cache instead + await accessor.get_work_order_token(file_id=file_id) + client.assert_last_call_from_cache() + + # Wait for the cache entry to expire, then make the call again + await asyncio.sleep(1) + add_httpx_response() + await accessor.get_work_order_token(file_id=file_id) + client.assert_last_call_not_from_cache() + -@pytest.mark.asyncio @pytest.mark.parametrize( "bad_url,upload_id,upload_status,expected_exception", [ @@ -107,7 +204,6 @@ async def test_patch_multipart_upload( await uploader.patch_multipart_upload(upload_status=upload_status) -@pytest.mark.asyncio @pytest.mark.parametrize( "from_part, end_part, expected_exception", [ @@ -160,10 +256,6 @@ async def test_get_part_upload_urls( break -@pytest.mark.httpx_mock( - assert_all_responses_were_requested=False, can_send_already_matched_responses=True -) -@pytest.mark.asyncio async def test_get_wps_file_info(httpx_mock: HTTPXMock): """Test response handling with some mock - just make sure code paths work""" files = {"file_1": ".tar.gz"} @@ -218,7 +310,6 @@ async def test_get_wps_file_info(httpx_mock: HTTPXMock): response = await work_package_accessor.get_package_files() -@pytest.mark.asyncio async def test_wkvs_calls(httpx_mock: HTTPXMock): """Test handling of responses for WKVS api calls""" wkvs_url = "https://127.0.0.1"