From 9b78811ff7c2326cb8242f07e360e13bdeae55e5 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 16 Sep 2024 11:26:26 -0400 Subject: [PATCH 01/50] setup --- .github/workflows/deploy-tests.yml | 7 +- CHANGELOG.md | 5 + src/neuroconv/tools/aws/__init__.py | 3 +- .../tools/aws/_rclone_transfer_batch_job.py | 113 ++++++++++++++ tests/test_minimal/test_tools/aws_tools.py | 145 +++++++++++++++++- 5 files changed, 268 insertions(+), 5 deletions(-) create mode 100644 src/neuroconv/tools/aws/_rclone_transfer_batch_job.py diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 9cbf6ba94..d592e7e59 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -1,9 +1,10 @@ name: Deploy tests on: - pull_request: - types: [synchronize, opened, reopened, ready_for_review] # defaults + ready_for_review - merge_group: + # TODO: disabled to save resources +# pull_request: +# types: [synchronize, opened, reopened, ready_for_review] # defaults + ready_for_review +# merge_group: workflow_dispatch: concurrency: # Cancel previous workflows on the same pull request diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa9612aa..053791a45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Upcoming +## Features +* Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #]() + + + ## v0.6.4 ## Bug Fixes diff --git a/src/neuroconv/tools/aws/__init__.py b/src/neuroconv/tools/aws/__init__.py index d40ddb2dd..88144fb01 100644 --- a/src/neuroconv/tools/aws/__init__.py +++ b/src/neuroconv/tools/aws/__init__.py @@ -1,3 +1,4 @@ from ._submit_aws_batch_job import submit_aws_batch_job +from ._rclone_transfer_batch_job import rclone_transfer_batch_job -__all__ = ["submit_aws_batch_job"] +__all__ = ["submit_aws_batch_job", "rclone_transfer_batch_job"] diff --git a/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py b/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py new file mode 100644 index 000000000..65bef7824 --- /dev/null +++ b/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py @@ -0,0 +1,113 @@ +"""Collection of helper functions for assessing and performing automated data transfers related to AWS.""" + +import warnings +from typing import Optional + +from pydantic import FilePath, validate_call + +from ._submit_aws_batch_job import submit_aws_batch_job + + +@validate_call +def rclone_transfer_batch_job( + *, + rclone_command: str, + job_name: str, + efs_volume_name: str, + rclone_config_file_path: Optional[FilePath] = None, + status_tracker_table_name: str = "neuroconv_batch_status_tracker", + compute_environment_name: str = "neuroconv_batch_environment", + job_queue_name: str = "neuroconv_batch_queue", + job_definition_name: Optional[str] = None, + minimum_worker_ram_in_gib: int = 4, + minimum_worker_cpus: int = 4, + submission_id: Optional[str] = None, + region: Optional[str] = None, +) -> dict[str, str]: + """ + Submit a job to AWS Batch for processing. + + Requires AWS credentials saved to files in the `~/.aws/` folder or set as environment variables. + + Parameters + ---------- + rclone_command : str + The command to pass directly to Rclone running on the EC2 instance. + E.g.: "rclone copy my_drive:testing_rclone /mnt/efs" + Must move data from or to '/mnt/efs'. + job_name : str + The name of the job to submit. + efs_volume_name : str + The name of an EFS volume to be created and attached to the job. + The path exposed to the container will always be `/mnt/efs`. + rclone_config_file_path : FilePath, optional + The path to the Rclone configuration file to use for the job. + If unspecified, method will attempt to find the file in `~/.rclone` and will raise an error if it cannot. + status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" + The name of the DynamoDB table to use for tracking job status. + compute_environment_name : str, default: "neuroconv_batch_environment" + The name of the compute environment to use for the job. + job_queue_name : str, default: "neuroconv_batch_queue" + The name of the job queue to use for the job. + job_definition_name : str, optional + The name of the job definition to use for the job. + If unspecified, a name starting with 'neuroconv_batch_' will be generated. + minimum_worker_ram_in_gib : int, default: 4 + The minimum amount of base worker memory required to run this job. + Determines the EC2 instance type selected by the automatic 'best fit' selector. + Recommended to be several GiB to allow comfortable buffer space for data chunk iterators. + minimum_worker_cpus : int, default: 4 + The minimum number of CPUs required to run this job. + A minimum of 4 is required, even if only one will be used in the actual process. + submission_id : str, optional + The unique ID to pair with this job submission when tracking the status via DynamoDB. + Defaults to a random UUID4. + region : str, optional + The AWS region to use for the job. + If not provided, we will attempt to load the region from your local AWS configuration. + If that file is not found on your system, we will default to "us-east-2", the location of the DANDI Archive. + + Returns + ------- + info : dict + A dictionary containing information about this AWS Batch job. + + info["job_submission_info"] is the return value of `boto3.client.submit_job` which contains the job ID. + info["table_submission_info"] is the initial row data inserted into the DynamoDB status tracking table. + """ + docker_image = "ghcr.io/catalystneuro/rclone_with_config:latest" + + if "/mnt/efs" not in rclone_command: + message = ( + f"The Rclone command '{rclone_command}' does not contain a reference to '/mnt/efs'. " + "Without utilizing the EFS mount, the instance is unlikely to have enough local disk space." + ) + warnings.warn(message=message, stacklevel=2) + + rclone_config_file_path = rclone_config_file_path or pathlib.Path.home() / ".rclone" / "rclone.conf" + if not rclone_config_file_path.exists(): + raise FileNotFoundError( + f"Rclone configuration file not found at: {rclone_config_file_path}! " + "Please check that `rclone config` successfully created the file." + ) + with open(file=rclone_config_file_path, mode="r") as io: + rclone_config_file_stream = io.read() + + region = region or "us-east-2" + + info = submit_aws_batch_job( + job_name=job_name, + docker_image=docker_image, + environment_variables={"RCLONE_CONFIG": rclone_config_file_stream, "RCLONE_COMMAND": rclone_command}, + efs_volume_name=efs_volume_name, + status_tracker_table_name=status_tracker_table_name, + compute_environment_name=compute_environment_name, + job_queue_name=job_queue_name, + job_definition_name=job_definition_name, + minimum_worker_ram_in_gib=minimum_worker_ram_in_gib, + minimum_worker_cpus=minimum_worker_cpus, + submission_id=submission_id, + region=region, + ) + + return info diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index 2e7598178..efa624e87 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -4,7 +4,7 @@ import boto3 -from neuroconv.tools.aws import submit_aws_batch_job +from neuroconv.tools.aws import rclone_transfer_batch_job, submit_aws_batch_job _RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] @@ -297,3 +297,146 @@ def test_submit_aws_batch_job_with_efs_mount(): table.update_item( Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} ) + + +class TestRcloneTransferBatchJob(TestCase): + """ + To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive + called 'testing_rclone_spikeglx' with the following structure: + + testing_rclone_spikeglx + ├── ci_tests + ├────── Noise4Sam_g0 + + Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset. + + Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. + The developer can easily find the location of the config file on their system using `rclone config file`. + """ + + test_folder = OUTPUT_PATH / "aws_rclone_tests" + test_config_file_path = test_folder / "rclone.conf" + + def setUp(self): + self.test_folder.mkdir(exist_ok=True) + + # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command) + token_dictionary = dict( + access_token=os.environ["RCLONE_DRIVE_ACCESS_TOKEN"], + token_type="Bearer", + refresh_token=os.environ["RCLONE_DRIVE_REFRESH_TOKEN"], + expiry=os.environ["RCLONE_EXPIRY_TOKEN"], + ) + token_string = str(token_dictionary).replace("'", '"').replace(" ", "") + rclone_config_contents = [ + "[test_google_drive_remote]\n", + "type = drive\n", + "scope = drive\n", + f"token = {token_string}\n", + "team_drive = \n", + "\n", + ] + with open(file=self.test_config_file_path, mode="w") as io: + io.writelines(rclone_config_contents) + + def test_rclone_transfer_batch_job(self): + region = "us-east-2" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" + rclone_config_file_path = self.test_config_file_path + + info = rclone_transfer_batch_job( + rclone_command=rclone_command, + rclone_config_file_path=rclone_config_file_path, + ) + + # Wait for AWS to process the job + time.sleep(60) + + job_id = info["job_submission_info"]["jobId"] + job = None + max_retries = 10 + retry = 0 + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[job_id]) + assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + jobs = job_description_response["jobs"] + assert len(jobs) == 1 + + job = jobs[0] + + if job["status"] in _RETRY_STATES: + retry += 1 + time.sleep(60) + else: + break + + # Check EFS specific details + efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + assert len(matching_efs_volumes) == 1 + efs_volume = matching_efs_volumes[0] + efs_id = efs_volume["FileSystemId"] + + # Check normal job completion + assert job["jobName"] == job_name + assert "neuroconv_batch_queue" in job["jobQueue"] + assert "fs-" in job["jobDefinition"] + assert job["status"] == "SUCCEEDED" + + status_tracker_table_name = "neuroconv_batch_status_tracker" + table = dynamodb_resource.Table(name=status_tracker_table_name) + table_submission_id = info["table_submission_info"]["id"] + + table_item_response = table.get_item(Key={"id": table_submission_id}) + assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + table_item = table_item_response["Item"] + assert table_item["job_name"] == job_name + assert table_item["job_id"] == job_id + assert table_item["status"] == "Job submitted..." + + table.update_item( + Key={"id": table_submission_id}, + AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, + ) + + # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # TODO: cleanup job definitions? (since built daily) + mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(60) + efs_client.delete_file_system(FileSystemId=efs_id) + + table.update_item( + Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} + ) From 380b57a5527aac2826953c54db513c41c3c7828d Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 16 Sep 2024 12:08:17 -0400 Subject: [PATCH 02/50] setup --- CHANGELOG.md | 1 + src/neuroconv/tools/aws/__init__.py | 7 +- .../tools/aws/_deploy_neuroconv_batch_job.py | 131 ++++++++++++++++++ .../tools/aws/_rclone_transfer_batch_job.py | 2 +- tests/test_minimal/test_tools/aws_tools.py | 123 +++++++++++++++- 5 files changed, 261 insertions(+), 3 deletions(-) create mode 100644 src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 053791a45..24a4e41f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Features * Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #]() +* Added the `deploy_neuroconv_batch_job` helper function for deploying NeuroConv AWS Batch jobs. [PR #]() diff --git a/src/neuroconv/tools/aws/__init__.py b/src/neuroconv/tools/aws/__init__.py index 88144fb01..70a42cbf5 100644 --- a/src/neuroconv/tools/aws/__init__.py +++ b/src/neuroconv/tools/aws/__init__.py @@ -1,4 +1,9 @@ from ._submit_aws_batch_job import submit_aws_batch_job from ._rclone_transfer_batch_job import rclone_transfer_batch_job +from ._deploy_neuroconv_batch_job import deploy_neuroconv_batch_job -__all__ = ["submit_aws_batch_job", "rclone_transfer_batch_job"] +__all__ = [ + "submit_aws_batch_job", + "rclone_transfer_batch_job", + "deploy_neuroconv_batch_job", +] diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py new file mode 100644 index 000000000..440518f29 --- /dev/null +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -0,0 +1,131 @@ +"""Collection of helper functions for deploying NeuroConv in EC2 Batch jobs on AWS.""" + +import uuid +from typing import Optional + +from pydantic import FilePath, validate_call + +from ._rclone_transfer_batch_job import rclone_transfer_batch_job +from ._submit_aws_batch_job import submit_aws_batch_job + + +@validate_call +def deploy_neuroconv_batch_job( + *, + rclone_command: str, + yaml_specification_file_path: FilePath, + job_name: str, + efs_volume_name: str, + rclone_config_file_path: Optional[FilePath] = None, + status_tracker_table_name: str = "neuroconv_batch_status_tracker", + compute_environment_name: str = "neuroconv_batch_environment", + job_queue_name: str = "neuroconv_batch_queue", + job_definition_name: Optional[str] = None, + minimum_worker_ram_in_gib: int = 16, # Higher than previous recommendations for safer buffering room + minimum_worker_cpus: int = 4, + region: Optional[str] = None, +) -> dict[str, str]: + """ + Submit a job to AWS Batch for processing. + + Requires AWS credentials saved to files in the `~/.aws/` folder or set as environment variables. + + Parameters + ---------- + rclone_command : str + The command to pass directly to Rclone running on the EC2 instance. + E.g.: "rclone copy my_drive:testing_rclone /mnt/efs/source" + Must move data from or to '/mnt/efs/source'. + yaml_specification_file_path : FilePath + The path to the YAML file containing the NeuroConv specification. + job_name : str + The name of the job to submit. + efs_volume_name : str + The name of an EFS volume to be created and attached to the job. + The path exposed to the container will always be `/mnt/efs`. + rclone_config_file_path : FilePath, optional + The path to the Rclone configuration file to use for the job. + If unspecified, method will attempt to find the file in `~/.rclone` and will raise an error if it cannot. + status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" + The name of the DynamoDB table to use for tracking job status. + compute_environment_name : str, default: "neuroconv_batch_environment" + The name of the compute environment to use for the job. + job_queue_name : str, default: "neuroconv_batch_queue" + The name of the job queue to use for the job. + job_definition_name : str, optional + The name of the job definition to use for the job. + If unspecified, a name starting with 'neuroconv_batch_' will be generated. + minimum_worker_ram_in_gib : int, default: 4 + The minimum amount of base worker memory required to run this job. + Determines the EC2 instance type selected by the automatic 'best fit' selector. + Recommended to be several GiB to allow comfortable buffer space for data chunk iterators. + minimum_worker_cpus : int, default: 4 + The minimum number of CPUs required to run this job. + A minimum of 4 is required, even if only one will be used in the actual process. + region : str, optional + The AWS region to use for the job. + If not provided, we will attempt to load the region from your local AWS configuration. + If that file is not found on your system, we will default to "us-east-2", the location of the DANDI Archive. + + Returns + ------- + info : dict + A dictionary containing information about this AWS Batch job. + + info["rclone_job_submission_info"] is the return value of `neuroconv.tools.aws.rclone_transfer_batch_job`. + info["neuroconv_job_submission_info"] is the return value of `neuroconv.tools.aws.submit_job`. + """ + efs_volume_name = efs_volume_name or f"neuroconv_batch_efs_volume_{uuid.uuid4().hex[:4]}" + region = region or "us-east-2" + + if "/mnt/efs/source" not in rclone_command: + message = ( + f"The Rclone command '{rclone_command}' does not contain a reference to '/mnt/efs/source'. " + "Without utilizing the EFS mount, the instance is unlikely to have enough local disk space. " + "The subfolder 'source' is also required to eliminate ambiguity in the transfer process." + ) + raise ValueError(message=message) + + rclone_job_name = f"{job_name}_rclone_transfer" + rclone_job_submission_info = rclone_transfer_batch_job( + rclone_command=rclone_command, + job_name=rclone_job_name, + efs_volume_name=efs_volume_name, + rclone_config_file_path=rclone_config_file_path, + region=region, + ) + rclone_job_id = rclone_job_submission_info["job_submission_info"]["jobId"] + + docker_image = "ghcr.io/catalystneuro/neuroconv_latest_yaml_variable:latest" + + with open(file=yaml_specification_file_path, mode="r") as io: + yaml_specification_file_stream = io.read() + + neuroconv_job_name = f"{job_name}_neuroconv_deployment" + neuroconv_job_submission_info = submit_aws_batch_job( + job_name=neuroconv_job_name, + docker_image=docker_image, + environment_variables={ + "NEUROCONV_YAML": yaml_specification_file_stream, + "NEUROCONV_DATA_PATH": "/mnt/efs/source", + "NEUROCONV_OUTPUT_PATH": "/mnt/efs/output", + }, + efs_volume_name=efs_volume_name, + job_dependencies=[rclone_job_id], + status_tracker_table_name=status_tracker_table_name, + compute_environment_name=compute_environment_name, + job_queue_name=job_queue_name, + job_definition_name=job_definition_name, + minimum_worker_ram_in_gib=minimum_worker_ram_in_gib, + minimum_worker_cpus=minimum_worker_cpus, + region=region, + ) + + # TODO: spinup third dependent job to clean up EFS volume after neuroconv job is complete? + + info = { + "rclone_job_submission_info": rclone_job_submission_info, + "neuroconv_job_submission_info": neuroconv_job_submission_info, + } + + return info diff --git a/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py b/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py index 65bef7824..989c00d8c 100644 --- a/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py +++ b/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py @@ -1,4 +1,4 @@ -"""Collection of helper functions for assessing and performing automated data transfers related to AWS.""" +"""Collection of helper functions for performing Rclone data transfers in EC2 Batch jobs on AWS.""" import warnings from typing import Optional diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index efa624e87..c1dd3c1e4 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -1,10 +1,15 @@ import datetime import os +import pathlib import time import boto3 -from neuroconv.tools.aws import rclone_transfer_batch_job, submit_aws_batch_job +from neuroconv.tools.aws import ( + deploy_neuroconv_batch_job, + rclone_transfer_batch_job, + submit_aws_batch_job, +) _RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] @@ -440,3 +445,119 @@ def test_rclone_transfer_batch_job(self): table.update_item( Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} ) + + def test_deploy_neuroconv_batch_job(self): + region = "us-east-2" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" + + testing_base_folder_path = pathlib.Path(__file__).parent.parent.parent + yaml_specification_file_path = ( + testing_base_folder_path + / "test_on_data" + / "test_yaml" + / "conversion_specifications" + / "GIN_conversion_specification.yml" + ) + + rclone_config_file_path = self.test_config_file_path + + job_name = "test_deploy_neuroconv_batch_job" + all_info = deploy_neuroconv_batch_job( + rclone_command=rclone_command, + yaml_specification_file_path=yaml_specification_file_path, + job_name=job_name, + rclone_config_file_path=rclone_config_file_path, + ) + + # Wait for AWS to process the job + time.sleep(120) + + info = all_info["neuroconv_job_submission_info"] + job_id = info["job_submission_info"]["jobId"] + job = None + max_retries = 10 + retry = 0 + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[job_id]) + assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + jobs = job_description_response["jobs"] + assert len(jobs) == 1 + + job = jobs[0] + + if job["status"] in _RETRY_STATES: + retry += 1 + time.sleep(60) + else: + break + + # Check EFS specific details + efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + assert len(matching_efs_volumes) == 1 + efs_volume = matching_efs_volumes[0] + efs_id = efs_volume["FileSystemId"] + + # Check normal job completion + assert job["jobName"] == job_name + assert "neuroconv_batch_queue" in job["jobQueue"] + assert "fs-" in job["jobDefinition"] + assert job["status"] == "SUCCEEDED" + + status_tracker_table_name = "neuroconv_batch_status_tracker" + table = dynamodb_resource.Table(name=status_tracker_table_name) + table_submission_id = info["table_submission_info"]["id"] + + table_item_response = table.get_item(Key={"id": table_submission_id}) + assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + table_item = table_item_response["Item"] + assert table_item["job_name"] == job_name + assert table_item["job_id"] == job_id + assert table_item["status"] == "Job submitted..." + + table.update_item( + Key={"id": table_submission_id}, + AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, + ) + + # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # TODO: cleanup job definitions? (since built daily) + mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(60) + efs_client.delete_file_system(FileSystemId=efs_id) + + table.update_item( + Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} + ) From 3f7e2e375e5a9e61f8dbe29d705c2529cb6c153e Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Tue, 17 Sep 2024 11:51:46 -0400 Subject: [PATCH 03/50] enhance test docs --- tests/test_minimal/test_tools/aws_tools.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index efa624e87..7e861be2e 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -302,13 +302,17 @@ def test_submit_aws_batch_job_with_efs_mount(): class TestRcloneTransferBatchJob(TestCase): """ To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive - called 'testing_rclone_spikeglx' with the following structure: + called 'testing_rclone_spikegl_and_phy' with the following structure: - testing_rclone_spikeglx + testing_rclone_spikeglx_and_phy ├── ci_tests + ├──── spikeglx ├────── Noise4Sam_g0 + ├──── phy + ├────── phy_example_0 - Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset. + Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset and 'phy_example_0' is likewise from the + 'phy' folder of the same dataset. Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. The developer can easily find the location of the config file on their system using `rclone config file`. From b49a70232e43ca0576bda99f2cd1abd1674441d4 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 17:32:32 -0400 Subject: [PATCH 04/50] fix import --- tests/test_minimal/test_tools/aws_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index 7e861be2e..0193f85e3 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -1,6 +1,7 @@ import datetime import os import time +import unittest import boto3 @@ -299,7 +300,7 @@ def test_submit_aws_batch_job_with_efs_mount(): ) -class TestRcloneTransferBatchJob(TestCase): +class TestRcloneTransferBatchJob(unittest.TestCase): """ To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive called 'testing_rclone_spikegl_and_phy' with the following structure: From 68c8e945173c013939b25f3927ebcf7ac108efb4 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 17:39:22 -0400 Subject: [PATCH 05/50] split tests --- .github/workflows/aws_tests.yml | 4 + tests/test_minimal/test_tools/aws_tools.py | 150 +---------------- .../test_on_data/test_yaml/yaml_aws_tools.py | 158 ++++++++++++++++++ 3 files changed, 163 insertions(+), 149 deletions(-) create mode 100644 tests/test_on_data/test_yaml/yaml_aws_tools.py diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index 0ecbb4d7b..66ca08c9f 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -39,5 +39,9 @@ jobs: - name: Install full requirements run: pip install .[aws,test] + # TODO: when passing, reverse order of tests from minimal to data + - name: Run subset of tests that use S3 live services + run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools.py + - name: Run subset of tests that use S3 live services run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools.py diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index 0193f85e3..2e7598178 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -1,11 +1,10 @@ import datetime import os import time -import unittest import boto3 -from neuroconv.tools.aws import rclone_transfer_batch_job, submit_aws_batch_job +from neuroconv.tools.aws import submit_aws_batch_job _RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] @@ -298,150 +297,3 @@ def test_submit_aws_batch_job_with_efs_mount(): table.update_item( Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} ) - - -class TestRcloneTransferBatchJob(unittest.TestCase): - """ - To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive - called 'testing_rclone_spikegl_and_phy' with the following structure: - - testing_rclone_spikeglx_and_phy - ├── ci_tests - ├──── spikeglx - ├────── Noise4Sam_g0 - ├──── phy - ├────── phy_example_0 - - Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset and 'phy_example_0' is likewise from the - 'phy' folder of the same dataset. - - Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. - The developer can easily find the location of the config file on their system using `rclone config file`. - """ - - test_folder = OUTPUT_PATH / "aws_rclone_tests" - test_config_file_path = test_folder / "rclone.conf" - - def setUp(self): - self.test_folder.mkdir(exist_ok=True) - - # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command) - token_dictionary = dict( - access_token=os.environ["RCLONE_DRIVE_ACCESS_TOKEN"], - token_type="Bearer", - refresh_token=os.environ["RCLONE_DRIVE_REFRESH_TOKEN"], - expiry=os.environ["RCLONE_EXPIRY_TOKEN"], - ) - token_string = str(token_dictionary).replace("'", '"').replace(" ", "") - rclone_config_contents = [ - "[test_google_drive_remote]\n", - "type = drive\n", - "scope = drive\n", - f"token = {token_string}\n", - "team_drive = \n", - "\n", - ] - with open(file=self.test_config_file_path, mode="w") as io: - io.writelines(rclone_config_contents) - - def test_rclone_transfer_batch_job(self): - region = "us-east-2" - aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) - aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) - - dynamodb_resource = boto3.resource( - service_name="dynamodb", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - batch_client = boto3.client( - service_name="batch", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - efs_client = boto3.client( - service_name="efs", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - - rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" - rclone_config_file_path = self.test_config_file_path - - info = rclone_transfer_batch_job( - rclone_command=rclone_command, - rclone_config_file_path=rclone_config_file_path, - ) - - # Wait for AWS to process the job - time.sleep(60) - - job_id = info["job_submission_info"]["jobId"] - job = None - max_retries = 10 - retry = 0 - while retry < max_retries: - job_description_response = batch_client.describe_jobs(jobs=[job_id]) - assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 - - jobs = job_description_response["jobs"] - assert len(jobs) == 1 - - job = jobs[0] - - if job["status"] in _RETRY_STATES: - retry += 1 - time.sleep(60) - else: - break - - # Check EFS specific details - efs_volumes = efs_client.describe_file_systems() - matching_efs_volumes = [ - file_system - for file_system in efs_volumes["FileSystems"] - for tag in file_system["Tags"] - if tag["Key"] == "Name" and tag["Value"] == efs_volume_name - ] - assert len(matching_efs_volumes) == 1 - efs_volume = matching_efs_volumes[0] - efs_id = efs_volume["FileSystemId"] - - # Check normal job completion - assert job["jobName"] == job_name - assert "neuroconv_batch_queue" in job["jobQueue"] - assert "fs-" in job["jobDefinition"] - assert job["status"] == "SUCCEEDED" - - status_tracker_table_name = "neuroconv_batch_status_tracker" - table = dynamodb_resource.Table(name=status_tracker_table_name) - table_submission_id = info["table_submission_info"]["id"] - - table_item_response = table.get_item(Key={"id": table_submission_id}) - assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 - - table_item = table_item_response["Item"] - assert table_item["job_name"] == job_name - assert table_item["job_id"] == job_id - assert table_item["status"] == "Job submitted..." - - table.update_item( - Key={"id": table_submission_id}, - AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, - ) - - # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume - # TODO: cleanup job definitions? (since built daily) - mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) - for mount_target in mount_targets["MountTargets"]: - efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) - - time.sleep(60) - efs_client.delete_file_system(FileSystemId=efs_id) - - table.update_item( - Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} - ) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools.py b/tests/test_on_data/test_yaml/yaml_aws_tools.py new file mode 100644 index 000000000..d3ad42bd1 --- /dev/null +++ b/tests/test_on_data/test_yaml/yaml_aws_tools.py @@ -0,0 +1,158 @@ +import os +import time +import unittest + +import boto3 + +from neuroconv.tools.aws import rclone_transfer_batch_job + +from ..setup_paths import OUTPUT_PATH + +_RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] + + +class TestRcloneTransferBatchJob(unittest.TestCase): + """ + To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive + called 'testing_rclone_spikegl_and_phy' with the following structure: + + testing_rclone_spikeglx_and_phy + ├── ci_tests + ├──── spikeglx + ├────── Noise4Sam_g0 + ├──── phy + ├────── phy_example_0 + + Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset and 'phy_example_0' is likewise from the + 'phy' folder of the same dataset. + + Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. + The developer can easily find the location of the config file on their system using `rclone config file`. + """ + + test_folder = OUTPUT_PATH / "aws_rclone_tests" + test_config_file_path = test_folder / "rclone.conf" + + def setUp(self): + self.test_folder.mkdir(exist_ok=True) + + # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command) + token_dictionary = dict( + access_token=os.environ["RCLONE_DRIVE_ACCESS_TOKEN"], + token_type="Bearer", + refresh_token=os.environ["RCLONE_DRIVE_REFRESH_TOKEN"], + expiry=os.environ["RCLONE_EXPIRY_TOKEN"], + ) + token_string = str(token_dictionary).replace("'", '"').replace(" ", "") + rclone_config_contents = [ + "[test_google_drive_remote]\n", + "type = drive\n", + "scope = drive\n", + f"token = {token_string}\n", + "team_drive = \n", + "\n", + ] + with open(file=self.test_config_file_path, mode="w") as io: + io.writelines(rclone_config_contents) + + def test_rclone_transfer_batch_job(self): + region = "us-east-2" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" + rclone_config_file_path = self.test_config_file_path + + info = rclone_transfer_batch_job( + rclone_command=rclone_command, + rclone_config_file_path=rclone_config_file_path, + ) + + # Wait for AWS to process the job + time.sleep(60) + + job_id = info["job_submission_info"]["jobId"] + job = None + max_retries = 10 + retry = 0 + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[job_id]) + assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + jobs = job_description_response["jobs"] + assert len(jobs) == 1 + + job = jobs[0] + + if job["status"] in _RETRY_STATES: + retry += 1 + time.sleep(60) + else: + break + + # Check EFS specific details + efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + assert len(matching_efs_volumes) == 1 + efs_volume = matching_efs_volumes[0] + efs_id = efs_volume["FileSystemId"] + + # Check normal job completion + assert job["jobName"] == job_name + assert "neuroconv_batch_queue" in job["jobQueue"] + assert "fs-" in job["jobDefinition"] + assert job["status"] == "SUCCEEDED" + + status_tracker_table_name = "neuroconv_batch_status_tracker" + table = dynamodb_resource.Table(name=status_tracker_table_name) + table_submission_id = info["table_submission_info"]["id"] + + table_item_response = table.get_item(Key={"id": table_submission_id}) + assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + table_item = table_item_response["Item"] + assert table_item["job_name"] == job_name + assert table_item["job_id"] == job_id + assert table_item["status"] == "Job submitted..." + + table.update_item( + Key={"id": table_submission_id}, + AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, + ) + + # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # TODO: cleanup job definitions? (since built daily) + mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(60) + efs_client.delete_file_system(FileSystemId=efs_id) + + table.update_item( + Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} + ) From 830ca3dfbfe580b9ea1e6dcd32b9cc7c228b38f4 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 17:42:54 -0400 Subject: [PATCH 06/50] pass secrets --- .github/workflows/aws_tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index 66ca08c9f..a66addf3a 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -11,6 +11,9 @@ concurrency: # Cancel previous workflows on the same pull request env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }} + RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }} + RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} jobs: From ca48752ba9897bd635fa84c461af8f815702fee6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 27 Sep 2024 21:44:25 +0000 Subject: [PATCH 07/50] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_minimal/test_tools/aws_tools.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index c7fd48c42..159b7b6a3 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -1,13 +1,10 @@ import datetime import os -import pathlib import time import boto3 from neuroconv.tools.aws import ( - deploy_neuroconv_batch_job, - rclone_transfer_batch_job, submit_aws_batch_job, ) From 03e7a98571b3d5f5606a4cac961fb31aa41e2de2 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Fri, 27 Sep 2024 17:45:07 -0400 Subject: [PATCH 08/50] restore --- .../test_on_data/test_yaml/yaml_aws_tools.py | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools.py b/tests/test_on_data/test_yaml/yaml_aws_tools.py index d3ad42bd1..3cc7477b8 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools.py @@ -156,3 +156,119 @@ def test_rclone_transfer_batch_job(self): table.update_item( Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} ) + + def test_deploy_neuroconv_batch_job(self): + region = "us-east-2" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" + + testing_base_folder_path = pathlib.Path(__file__).parent.parent.parent + yaml_specification_file_path = ( + testing_base_folder_path + / "test_on_data" + / "test_yaml" + / "conversion_specifications" + / "GIN_conversion_specification.yml" + ) + + rclone_config_file_path = self.test_config_file_path + + job_name = "test_deploy_neuroconv_batch_job" + all_info = deploy_neuroconv_batch_job( + rclone_command=rclone_command, + yaml_specification_file_path=yaml_specification_file_path, + job_name=job_name, + rclone_config_file_path=rclone_config_file_path, + ) + + # Wait for AWS to process the job + time.sleep(120) + + info = all_info["neuroconv_job_submission_info"] + job_id = info["job_submission_info"]["jobId"] + job = None + max_retries = 10 + retry = 0 + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[job_id]) + assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + jobs = job_description_response["jobs"] + assert len(jobs) == 1 + + job = jobs[0] + + if job["status"] in _RETRY_STATES: + retry += 1 + time.sleep(60) + else: + break + + # Check EFS specific details + efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + assert len(matching_efs_volumes) == 1 + efs_volume = matching_efs_volumes[0] + efs_id = efs_volume["FileSystemId"] + + # Check normal job completion + assert job["jobName"] == job_name + assert "neuroconv_batch_queue" in job["jobQueue"] + assert "fs-" in job["jobDefinition"] + assert job["status"] == "SUCCEEDED" + + status_tracker_table_name = "neuroconv_batch_status_tracker" + table = dynamodb_resource.Table(name=status_tracker_table_name) + table_submission_id = info["table_submission_info"]["id"] + + table_item_response = table.get_item(Key={"id": table_submission_id}) + assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + table_item = table_item_response["Item"] + assert table_item["job_name"] == job_name + assert table_item["job_id"] == job_id + assert table_item["status"] == "Job submitted..." + + table.update_item( + Key={"id": table_submission_id}, + AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, + ) + + # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # TODO: cleanup job definitions? (since built daily) + mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(60) + efs_client.delete_file_system(FileSystemId=efs_id) + + table.update_item( + Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} + ) From 2f5c6c20d1ffd2f0521ede39276ba0a8d23d9f17 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 20:30:10 -0400 Subject: [PATCH 09/50] fix test --- tests/test_on_data/test_yaml/yaml_aws_tools.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools.py b/tests/test_on_data/test_yaml/yaml_aws_tools.py index d3ad42bd1..200368af8 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools.py @@ -82,8 +82,14 @@ def test_rclone_transfer_batch_job(self): rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" rclone_config_file_path = self.test_config_file_path + now = datetime.datetime.now().isoformat() + job_name = f"test_rclone_transfer_batch_job_{now}" + efs_volume_name = "test_rclone_transfer_batch_efs" + info = rclone_transfer_batch_job( rclone_command=rclone_command, + job_name=job_name, + efs_volume_name=efs_volume_name, rclone_config_file_path=rclone_config_file_path, ) From eb631f9d2cf63ed57e3ce95057fc13efb09c1145 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 20:33:03 -0400 Subject: [PATCH 10/50] fix import --- tests/test_on_data/test_yaml/yaml_aws_tools.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools.py b/tests/test_on_data/test_yaml/yaml_aws_tools.py index 200368af8..8cd001689 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools.py @@ -1,3 +1,4 @@ +import datetime import os import time import unittest From f1cfbd080de0931bcac8e2af1906c271d559d02b Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 21:28:00 -0400 Subject: [PATCH 11/50] fix job definition logic --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 9e3ba0488..d2d0c1f6f 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -530,7 +530,9 @@ def _generate_job_definition_name( """ docker_tags = docker_image.split(":")[1:] docker_tag = docker_tags[0] if len(docker_tags) > 1 else None - parsed_docker_image_name = docker_image.replace(":", "-") # AWS Batch does not allow colons in job definition names + + # AWS Batch does not allow colons or slashes in job definition names + parsed_docker_image_name = docker_image.replace(":", "-").replace("/", "-") job_definition_name = f"neuroconv_batch" job_definition_name += f"_{parsed_docker_image_name}-image" @@ -641,7 +643,7 @@ def _ensure_job_definition_exists_and_get_arn( ] mountPoints = [{"containerPath": "/mnt/efs/", "readOnly": False, "sourceVolume": "neuroconv_batch_efs_mounted"}] - # batch_client.register_job_definition() is not synchronous and so we need to wait a bit afterwards + # batch_client.register_job_definition is not synchronous and so we need to wait a bit afterwards batch_client.register_job_definition( jobDefinitionName=job_definition_name, type="container", From 08ba1e140f12980fb3b6a2d76c2bcf7b0eb3e6e5 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 21:59:09 -0400 Subject: [PATCH 12/50] fix job definition logic --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index d2d0c1f6f..2d9a683a5 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -531,8 +531,9 @@ def _generate_job_definition_name( docker_tags = docker_image.split(":")[1:] docker_tag = docker_tags[0] if len(docker_tags) > 1 else None - # AWS Batch does not allow colons or slashes in job definition names - parsed_docker_image_name = docker_image.replace(":", "-").replace("/", "-") + # AWS Batch does not allow colons, slashes, or periods in job definition names + for disallowed_character in [":", "/", "."]: + docker_image = docker_image.replace(disallowed_character, "-") job_definition_name = f"neuroconv_batch" job_definition_name += f"_{parsed_docker_image_name}-image" From bd7cb1e7f393399679ed748c94da1e3583af6c85 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 22:13:44 -0400 Subject: [PATCH 13/50] fix copilot assigned name --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 2d9a683a5..07dce9070 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -533,7 +533,7 @@ def _generate_job_definition_name( # AWS Batch does not allow colons, slashes, or periods in job definition names for disallowed_character in [":", "/", "."]: - docker_image = docker_image.replace(disallowed_character, "-") + parsed_docker_image_name = docker_image.replace(disallowed_character, "-") job_definition_name = f"neuroconv_batch" job_definition_name += f"_{parsed_docker_image_name}-image" From 6ddb351dae72312ff7231409ed8ae239fae2eb8c Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 22:18:27 -0400 Subject: [PATCH 14/50] fix job definition logic --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 07dce9070..55130b543 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -532,8 +532,9 @@ def _generate_job_definition_name( docker_tag = docker_tags[0] if len(docker_tags) > 1 else None # AWS Batch does not allow colons, slashes, or periods in job definition names - for disallowed_character in [":", "/", "."]: - parsed_docker_image_name = docker_image.replace(disallowed_character, "-") + parsed_docker_image_name = str(docker_image) + for disallowed_character in [":", r"/", "."]: + parsed_docker_image_name = parsed_docker_image_name.replace(disallowed_character, "-") job_definition_name = f"neuroconv_batch" job_definition_name += f"_{parsed_docker_image_name}-image" From beeabffac1fc564aab403877c90f2c21a6cc0fab Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 22:25:17 -0400 Subject: [PATCH 15/50] fix environment variable passing --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 55130b543..caaced56f 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -171,7 +171,9 @@ def submit_aws_batch_job( job_dependencies = job_dependencies or [] container_overrides = dict() if environment_variables is not None: - container_overrides["environment"] = [{key: value} for key, value in environment_variables.items()] + container_overrides["environment"] = [ + {"name": key, "value": value} for key, value in environment_variables.items() + ] if commands is not None: container_overrides["command"] = commands From daeabc9a9864fd05b472fc759138787754b81748 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 22:30:47 -0400 Subject: [PATCH 16/50] fix job name in test --- .github/workflows/aws_tests.yml | 4 ++-- .../test_tools/{aws_tools.py => aws_tools_tests.py} | 0 .../test_yaml/{yaml_aws_tools.py => yaml_aws_tools_tests.py} | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) rename tests/test_minimal/test_tools/{aws_tools.py => aws_tools_tests.py} (100%) rename tests/test_on_data/test_yaml/{yaml_aws_tools.py => yaml_aws_tools_tests.py} (98%) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index a66addf3a..331486ec1 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -44,7 +44,7 @@ jobs: # TODO: when passing, reverse order of tests from minimal to data - name: Run subset of tests that use S3 live services - run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools.py + run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools_tests.py - name: Run subset of tests that use S3 live services - run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools.py + run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools_tests.py diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools_tests.py similarity index 100% rename from tests/test_minimal/test_tools/aws_tools.py rename to tests/test_minimal/test_tools/aws_tools_tests.py diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py similarity index 98% rename from tests/test_on_data/test_yaml/yaml_aws_tools.py rename to tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 8cd001689..8a0e958b3 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -83,8 +83,8 @@ def test_rclone_transfer_batch_job(self): rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" rclone_config_file_path = self.test_config_file_path - now = datetime.datetime.now().isoformat() - job_name = f"test_rclone_transfer_batch_job_{now}" + today = datetime.datetime.now().date().isoformat() + job_name = f"test_rclone_transfer_batch_job_{today}" efs_volume_name = "test_rclone_transfer_batch_efs" info = rclone_transfer_batch_job( From 5a9eaff514d07ec239f66c11e240b3f0a9b0b601 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 10:45:19 -0400 Subject: [PATCH 17/50] try Cody creds --- .github/workflows/aws_tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index 331486ec1..e87a21aaf 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -11,9 +11,9 @@ concurrency: # Cancel previous workflows on the same pull request env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }} - RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }} - RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }} + RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN_CODY }} + RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN_CODY }} + RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN_CODY }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} jobs: From d20ca09cf5574af39228b93aa0db9c3d25b41af1 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:03:45 -0400 Subject: [PATCH 18/50] some adjustments --- .../test_yaml/yaml_aws_tools_tests.py | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 8a0e958b3..d871237f5 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -33,6 +33,9 @@ class TestRcloneTransferBatchJob(unittest.TestCase): test_folder = OUTPUT_PATH / "aws_rclone_tests" test_config_file_path = test_folder / "rclone.conf" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + region = "us-east-2" def setUp(self): self.test_folder.mkdir(exist_ok=True) @@ -56,10 +59,29 @@ def setUp(self): with open(file=self.test_config_file_path, mode="w") as io: io.writelines(rclone_config_contents) + self.efs_client = boto3.client( + service_name="efs", + region_name=self.region, + aws_access_key_id=self.aws_access_key_id, + aws_secret_access_key=self.aws_secret_access_key, + ) + + def tearDown(self): + efs_client = self.efs_client + + # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # TODO: cleanup job definitions? (since built daily) + mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(60) + efs_client.delete_file_system(FileSystemId=efs_id) + def test_rclone_transfer_batch_job(self): - region = "us-east-2" - aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) - aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + region = self.region + aws_access_key_id = self.aws_access_key_id + aws_secret_access_key = self.aws_secret_access_key dynamodb_resource = boto3.resource( service_name="dynamodb", @@ -73,14 +95,9 @@ def test_rclone_transfer_batch_job(self): aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) - efs_client = boto3.client( - service_name="efs", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) + efs_client = self.efs_client - rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" + rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs --config ./rclone.conf" rclone_config_file_path = self.test_config_file_path today = datetime.datetime.now().date().isoformat() @@ -151,15 +168,6 @@ def test_rclone_transfer_batch_job(self): AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, ) - # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume - # TODO: cleanup job definitions? (since built daily) - mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) - for mount_target in mount_targets["MountTargets"]: - efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) - - time.sleep(60) - efs_client.delete_file_system(FileSystemId=efs_id) - table.update_item( Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} ) From 8d0bb4cb7d3685b5ac00e3941ac0478cffdc930b Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:21:59 -0400 Subject: [PATCH 19/50] fix path and add verbosity --- .github/workflows/aws_tests.yml | 2 +- tests/docker_rclone_with_config_cli.py | 3 ++- tests/test_on_data/test_yaml/yaml_aws_tools_tests.py | 5 ++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index e87a21aaf..189edf502 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -39,7 +39,7 @@ jobs: git config --global user.email "CI@example.com" git config --global user.name "CI Almighty" - - name: Install full requirements + - name: Install AWS requirements run: pip install .[aws,test] # TODO: when passing, reverse order of tests from minimal to data diff --git a/tests/docker_rclone_with_config_cli.py b/tests/docker_rclone_with_config_cli.py index ed472bdf2..9b1e265dd 100644 --- a/tests/docker_rclone_with_config_cli.py +++ b/tests/docker_rclone_with_config_cli.py @@ -61,7 +61,8 @@ def test_direct_usage_of_rclone_with_config(self): os.environ["RCLONE_CONFIG"] = rclone_config_file_stream os.environ["RCLONE_COMMAND"] = ( - f"rclone copy test_google_drive_remote:testing_rclone_with_config {self.test_folder} --verbose --progress --config ./rclone.conf" + f"rclone copy test_google_drive_remote:testing_rclone_with_config {self.test_folder} " + "--verbose --progress --config ./rclone.conf" ) command = ( diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index d871237f5..9fd5aee45 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -97,7 +97,10 @@ def test_rclone_transfer_batch_job(self): ) efs_client = self.efs_client - rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs --config ./rclone.conf" + rclone_command = ( + "rclone copy test_google_drive_remote:testing_rclone_spikeglx_and_phy /mnt/efs " + "--verbose --progress --config ./rclone.conf" # TODO: should just include this in helper function? + ) rclone_config_file_path = self.test_config_file_path today = datetime.datetime.now().date().isoformat() From a388bd1b8e5836943a1d02e44beec69a703511d5 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:28:58 -0400 Subject: [PATCH 20/50] fix teardown --- tests/test_on_data/test_yaml/yaml_aws_tools_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 9fd5aee45..8812feb78 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -71,7 +71,7 @@ def tearDown(self): # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume # TODO: cleanup job definitions? (since built daily) - mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + mount_targets = efs_client.describe_mount_targets(FileSystemId=self.efs_id) for mount_target in mount_targets["MountTargets"]: efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) @@ -146,7 +146,7 @@ def test_rclone_transfer_batch_job(self): ] assert len(matching_efs_volumes) == 1 efs_volume = matching_efs_volumes[0] - efs_id = efs_volume["FileSystemId"] + self.efs_id = efs_volume["FileSystemId"] # Check normal job completion assert job["jobName"] == job_name From 8fb3811885d08c8dc959c963264232521d9891ef Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:38:27 -0400 Subject: [PATCH 21/50] fix teardown --- tests/test_on_data/test_yaml/yaml_aws_tools_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 8812feb78..7ea49e644 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -76,7 +76,7 @@ def tearDown(self): efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) time.sleep(60) - efs_client.delete_file_system(FileSystemId=efs_id) + efs_client.delete_file_system(FileSystemId=self.efs_id) def test_rclone_transfer_batch_job(self): region = self.region From 41f315522731ac7380b34917ec8068840c55d698 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:43:43 -0400 Subject: [PATCH 22/50] remove creation date on job definition --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index caaced56f..748f25399 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -296,7 +296,7 @@ def _ensure_compute_environment_exists( The AWS Batch client to use for the job. max_retries : int, default: 12 If the compute environment does not already exist, then this is the maximum number of times to synchronously - check for its successful creation before erroring. + check for its successful creation before raising an error. This is essential for a clean setup of the entire pipeline, or else later steps might error because they tried to launch before the compute environment was ready. """ @@ -546,7 +546,6 @@ def _generate_job_definition_name( job_definition_name += f"_{efs_id}" if docker_tag is None or docker_tag == "latest": date = datetime.now().strftime("%Y-%m-%d") - job_definition_name += f"_created-on-{date}" return job_definition_name From ff030dff55c6016e468b5ab8098a295fb896aefd Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:50:29 -0400 Subject: [PATCH 23/50] try normal credentials --- .github/workflows/aws_tests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index 189edf502..97a97c552 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -11,9 +11,9 @@ concurrency: # Cancel previous workflows on the same pull request env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN_CODY }} - RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN_CODY }} - RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN_CODY }} + RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }} + RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }} + RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} jobs: @@ -43,8 +43,8 @@ jobs: run: pip install .[aws,test] # TODO: when passing, reverse order of tests from minimal to data - - name: Run subset of tests that use S3 live services + - name: Run RClone on AWS tests run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools_tests.py - - name: Run subset of tests that use S3 live services + - name: Run generic AWS tests run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools_tests.py From cdb7f5f355e11da6a8082571dbc2f68dfba63b43 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:11:08 -0400 Subject: [PATCH 24/50] split workflows --- .github/workflows/generic_aws_tests.yml | 42 +++++++++++++++++++ .../{aws_tests.yml => rclone_aws_tests.yml} | 6 +-- 2 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/generic_aws_tests.yml rename .github/workflows/{aws_tests.yml => rclone_aws_tests.yml} (87%) diff --git a/.github/workflows/generic_aws_tests.yml b/.github/workflows/generic_aws_tests.yml new file mode 100644 index 000000000..20886a178 --- /dev/null +++ b/.github/workflows/generic_aws_tests.yml @@ -0,0 +1,42 @@ +name: AWS Tests +on: + schedule: + - cron: "0 16 * * 1" # Weekly at noon on Monday + workflow_dispatch: + +concurrency: # Cancel previous workflows on the same pull request + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + +jobs: + run: + name: ${{ matrix.os }} Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.12"] + os: [ubuntu-latest] + steps: + - uses: actions/checkout@v4 + - run: git fetch --prune --unshallow --tags + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Global Setup + run: | + python -m pip install -U pip # Official recommended way + git config --global user.email "CI@example.com" + git config --global user.name "CI Almighty" + + - name: Install AWS requirements + run: pip install .[aws,test] + + - name: Run generic AWS tests + run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools_tests.py diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/rclone_aws_tests.yml similarity index 87% rename from .github/workflows/aws_tests.yml rename to .github/workflows/rclone_aws_tests.yml index 97a97c552..5003ec710 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/rclone_aws_tests.yml @@ -1,4 +1,4 @@ -name: AWS Tests +name: Rclone AWS Tests on: schedule: - cron: "0 16 * * 1" # Weekly at noon on Monday @@ -42,9 +42,5 @@ jobs: - name: Install AWS requirements run: pip install .[aws,test] - # TODO: when passing, reverse order of tests from minimal to data - name: Run RClone on AWS tests run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools_tests.py - - - name: Run generic AWS tests - run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools_tests.py From 750034ddb6df78894bb821633a057ed35dc7207e Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:11:38 -0400 Subject: [PATCH 25/50] add initial push trigger --- .github/workflows/rclone_aws_tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rclone_aws_tests.yml b/.github/workflows/rclone_aws_tests.yml index 5003ec710..fa704a9ab 100644 --- a/.github/workflows/rclone_aws_tests.yml +++ b/.github/workflows/rclone_aws_tests.yml @@ -3,6 +3,7 @@ on: schedule: - cron: "0 16 * * 1" # Weekly at noon on Monday workflow_dispatch: + push: concurrency: # Cancel previous workflows on the same pull request group: ${{ github.workflow }}-${{ github.ref }} From 860ad7ff654bf2d3ae662234e3297aed0098c5bb Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:24:36 -0400 Subject: [PATCH 26/50] undo push --- .github/workflows/rclone_aws_tests.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/rclone_aws_tests.yml b/.github/workflows/rclone_aws_tests.yml index fa704a9ab..bcfbeb5c7 100644 --- a/.github/workflows/rclone_aws_tests.yml +++ b/.github/workflows/rclone_aws_tests.yml @@ -1,9 +1,8 @@ name: Rclone AWS Tests on: schedule: - - cron: "0 16 * * 1" # Weekly at noon on Monday + - cron: "0 16 * * 2" # Weekly at noon on Tuesday workflow_dispatch: - push: concurrency: # Cancel previous workflows on the same pull request group: ${{ github.workflow }}-${{ github.ref }} From 7af57d40cf8977654c92236c0159ed154cd12a16 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:26:07 -0400 Subject: [PATCH 27/50] restore normal trigger --- .github/workflows/deploy-tests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 85d7546d8..606cd4910 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -2,11 +2,11 @@ name: Deploy tests on: # TODO: disabled to save resources -# pull_request: -# types: [synchronize, opened, reopened, ready_for_review] -# # Synchronize, open and reopened are the default types for pull request -# # We add ready_for_review to trigger the check for changelog and full tests when ready for review is clicked -# merge_group: + pull_request: + types: [synchronize, opened, reopened, ready_for_review] + # Synchronize, open and reopened are the default types for pull request + # We add ready_for_review to trigger the check for changelog and full tests when ready for review is clicked + merge_group: workflow_dispatch: concurrency: From c58649aac9f6027f993fbce10c26b51f30bb5ab8 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:26:40 -0400 Subject: [PATCH 28/50] remove comment --- .github/workflows/deploy-tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 606cd4910..4f67d15de 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -1,7 +1,6 @@ name: Deploy tests on: - # TODO: disabled to save resources pull_request: types: [synchronize, opened, reopened, ready_for_review] # Synchronize, open and reopened are the default types for pull request From 6bac79f917ecbbda3ece92550659b2a497643119 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:45:24 -0400 Subject: [PATCH 29/50] split workflows and tests; initial push trigger --- .../neuroconv_deployment_aws_tests.yml | 47 +++++ .../test_tools/aws_tools_tests.py | 4 +- .../neuroconv_deployment_aws_tools_tests.py | 185 ++++++++++++++++++ .../test_yaml/yaml_aws_tools_tests.py | 116 ----------- 4 files changed, 233 insertions(+), 119 deletions(-) create mode 100644 .github/workflows/neuroconv_deployment_aws_tests.yml create mode 100644 tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py diff --git a/.github/workflows/neuroconv_deployment_aws_tests.yml b/.github/workflows/neuroconv_deployment_aws_tests.yml new file mode 100644 index 000000000..b9e12b0d8 --- /dev/null +++ b/.github/workflows/neuroconv_deployment_aws_tests.yml @@ -0,0 +1,47 @@ +name: NeuroConv Deployment AWS Tests +on: + schedule: + - cron: "0 16 * * 2" # Weekly at noon on Tuesday + workflow_dispatch: + push: + +concurrency: # Cancel previous workflows on the same pull request + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }} + RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }} + RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }} + DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} + +jobs: + run: + name: ${{ matrix.os }} Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.12"] + os: [ubuntu-latest] + steps: + - uses: actions/checkout@v4 + - run: git fetch --prune --unshallow --tags + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Global Setup + run: | + python -m pip install -U pip # Official recommended way + git config --global user.email "CI@example.com" + git config --global user.name "CI Almighty" + + - name: Install AWS requirements + run: pip install .[aws,test] + + - name: Run RClone on AWS tests + run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools_tests.py diff --git a/tests/test_minimal/test_tools/aws_tools_tests.py b/tests/test_minimal/test_tools/aws_tools_tests.py index 159b7b6a3..2e7598178 100644 --- a/tests/test_minimal/test_tools/aws_tools_tests.py +++ b/tests/test_minimal/test_tools/aws_tools_tests.py @@ -4,9 +4,7 @@ import boto3 -from neuroconv.tools.aws import ( - submit_aws_batch_job, -) +from neuroconv.tools.aws import submit_aws_batch_job _RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py new file mode 100644 index 000000000..2784c5f0f --- /dev/null +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -0,0 +1,185 @@ +import os +import time +import unittest + +import boto3 + +from neuroconv.tools.aws import deploy_neuroconv_batch_job + +from ..setup_paths import OUTPUT_PATH + +_RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] + + +class TestNeuroConvDeploymentBatchJob(unittest.TestCase): + """ + To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive + called 'testing_rclone_spikegl_and_phy' with the following structure: + + testing_rclone_spikeglx_and_phy + ├── ci_tests + ├──── spikeglx + ├────── Noise4Sam_g0 + ├──── phy + ├────── phy_example_0 + + Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset and 'phy_example_0' is likewise from the + 'phy' folder of the same dataset. + + Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. + The developer can easily find the location of the config file on their system using `rclone config file`. + """ + + test_folder = OUTPUT_PATH / "aws_rclone_tests" + test_config_file_path = test_folder / "rclone.conf" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + region = "us-east-2" + + def setUp(self): + self.test_folder.mkdir(exist_ok=True) + + # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command) + token_dictionary = dict( + access_token=os.environ["RCLONE_DRIVE_ACCESS_TOKEN"], + token_type="Bearer", + refresh_token=os.environ["RCLONE_DRIVE_REFRESH_TOKEN"], + expiry=os.environ["RCLONE_EXPIRY_TOKEN"], + ) + token_string = str(token_dictionary).replace("'", '"').replace(" ", "") + rclone_config_contents = [ + "[test_google_drive_remote]\n", + "type = drive\n", + "scope = drive\n", + f"token = {token_string}\n", + "team_drive = \n", + "\n", + ] + with open(file=self.test_config_file_path, mode="w") as io: + io.writelines(rclone_config_contents) + + self.efs_client = boto3.client( + service_name="efs", + region_name=self.region, + aws_access_key_id=self.aws_access_key_id, + aws_secret_access_key=self.aws_secret_access_key, + ) + + def tearDown(self): + efs_client = self.efs_client + + # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # TODO: cleanup job definitions? (since built daily) + mount_targets = efs_client.describe_mount_targets(FileSystemId=self.efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(60) + efs_client.delete_file_system(FileSystemId=self.efs_id) + + def test_deploy_neuroconv_batch_job(self): + region = "us-east-2" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx/ci_tests /mnt/efs" + + testing_base_folder_path = pathlib.Path(__file__).parent.parent.parent + yaml_specification_file_path = ( + testing_base_folder_path + / "test_on_data" + / "test_yaml" + / "conversion_specifications" + / "GIN_conversion_specification.yml" + ) + + rclone_config_file_path = self.test_config_file_path + + job_name = "test_deploy_neuroconv_batch_job" + all_info = deploy_neuroconv_batch_job( + rclone_command=rclone_command, + yaml_specification_file_path=yaml_specification_file_path, + job_name=job_name, + rclone_config_file_path=rclone_config_file_path, + ) + + # Wait for AWS to process the job + time.sleep(120) + + info = all_info["neuroconv_job_submission_info"] + job_id = info["job_submission_info"]["jobId"] + job = None + max_retries = 10 + retry = 0 + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[job_id]) + assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + jobs = job_description_response["jobs"] + assert len(jobs) == 1 + + job = jobs[0] + + if job["status"] in _RETRY_STATES: + retry += 1 + time.sleep(60) + else: + break + + # Check EFS specific details + efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + assert len(matching_efs_volumes) == 1 + efs_volume = matching_efs_volumes[0] + efs_id = efs_volume["FileSystemId"] + + # Check normal job completion + assert job["jobName"] == job_name + assert "neuroconv_batch_queue" in job["jobQueue"] + assert "fs-" in job["jobDefinition"] + assert job["status"] == "SUCCEEDED" + + status_tracker_table_name = "neuroconv_batch_status_tracker" + table = dynamodb_resource.Table(name=status_tracker_table_name) + table_submission_id = info["table_submission_info"]["id"] + + table_item_response = table.get_item(Key={"id": table_submission_id}) + assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + table_item = table_item_response["Item"] + assert table_item["job_name"] == job_name + assert table_item["job_id"] == job_id + assert table_item["status"] == "Job submitted..." + + table.update_item( + Key={"id": table_submission_id}, + AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, + ) + + table.update_item( + Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} + ) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 53418a452..7ea49e644 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -174,119 +174,3 @@ def test_rclone_transfer_batch_job(self): table.update_item( Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} ) - - def test_deploy_neuroconv_batch_job(self): - region = "us-east-2" - aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) - aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) - - dynamodb_resource = boto3.resource( - service_name="dynamodb", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - batch_client = boto3.client( - service_name="batch", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - efs_client = boto3.client( - service_name="efs", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - - rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" - - testing_base_folder_path = pathlib.Path(__file__).parent.parent.parent - yaml_specification_file_path = ( - testing_base_folder_path - / "test_on_data" - / "test_yaml" - / "conversion_specifications" - / "GIN_conversion_specification.yml" - ) - - rclone_config_file_path = self.test_config_file_path - - job_name = "test_deploy_neuroconv_batch_job" - all_info = deploy_neuroconv_batch_job( - rclone_command=rclone_command, - yaml_specification_file_path=yaml_specification_file_path, - job_name=job_name, - rclone_config_file_path=rclone_config_file_path, - ) - - # Wait for AWS to process the job - time.sleep(120) - - info = all_info["neuroconv_job_submission_info"] - job_id = info["job_submission_info"]["jobId"] - job = None - max_retries = 10 - retry = 0 - while retry < max_retries: - job_description_response = batch_client.describe_jobs(jobs=[job_id]) - assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 - - jobs = job_description_response["jobs"] - assert len(jobs) == 1 - - job = jobs[0] - - if job["status"] in _RETRY_STATES: - retry += 1 - time.sleep(60) - else: - break - - # Check EFS specific details - efs_volumes = efs_client.describe_file_systems() - matching_efs_volumes = [ - file_system - for file_system in efs_volumes["FileSystems"] - for tag in file_system["Tags"] - if tag["Key"] == "Name" and tag["Value"] == efs_volume_name - ] - assert len(matching_efs_volumes) == 1 - efs_volume = matching_efs_volumes[0] - efs_id = efs_volume["FileSystemId"] - - # Check normal job completion - assert job["jobName"] == job_name - assert "neuroconv_batch_queue" in job["jobQueue"] - assert "fs-" in job["jobDefinition"] - assert job["status"] == "SUCCEEDED" - - status_tracker_table_name = "neuroconv_batch_status_tracker" - table = dynamodb_resource.Table(name=status_tracker_table_name) - table_submission_id = info["table_submission_info"]["id"] - - table_item_response = table.get_item(Key={"id": table_submission_id}) - assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 - - table_item = table_item_response["Item"] - assert table_item["job_name"] == job_name - assert table_item["job_id"] == job_id - assert table_item["status"] == "Job submitted..." - - table.update_item( - Key={"id": table_submission_id}, - AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, - ) - - # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume - # TODO: cleanup job definitions? (since built daily) - mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) - for mount_target in mount_targets["MountTargets"]: - efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) - - time.sleep(60) - efs_client.delete_file_system(FileSystemId=efs_id) - - table.update_item( - Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} - ) From 867cac4d199959b9613d189a534eb51cd8ceb633 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:46:47 -0400 Subject: [PATCH 30/50] fix workflow; suppress normal tests --- .github/workflows/deploy-tests.yml | 10 +++++----- .github/workflows/neuroconv_deployment_aws_tests.yml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 4f67d15de..5c44d1544 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -1,11 +1,11 @@ name: Deploy tests on: - pull_request: - types: [synchronize, opened, reopened, ready_for_review] - # Synchronize, open and reopened are the default types for pull request - # We add ready_for_review to trigger the check for changelog and full tests when ready for review is clicked - merge_group: +# pull_request: +# types: [synchronize, opened, reopened, ready_for_review] +# # Synchronize, open and reopened are the default types for pull request +# # We add ready_for_review to trigger the check for changelog and full tests when ready for review is clicked +# merge_group: workflow_dispatch: concurrency: diff --git a/.github/workflows/neuroconv_deployment_aws_tests.yml b/.github/workflows/neuroconv_deployment_aws_tests.yml index b9e12b0d8..002d647f6 100644 --- a/.github/workflows/neuroconv_deployment_aws_tests.yml +++ b/.github/workflows/neuroconv_deployment_aws_tests.yml @@ -44,4 +44,4 @@ jobs: run: pip install .[aws,test] - name: Run RClone on AWS tests - run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools_tests.py + run: pytest -rsx -n auto tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py From e40f8ef58f49656007acdcdcf4736d2163a3fe9b Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:49:28 -0400 Subject: [PATCH 31/50] debug --- .github/workflows/neuroconv_deployment_aws_tests.yml | 2 +- .../test_yaml/neuroconv_deployment_aws_tools_tests.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/neuroconv_deployment_aws_tests.yml b/.github/workflows/neuroconv_deployment_aws_tests.yml index 002d647f6..5da63894d 100644 --- a/.github/workflows/neuroconv_deployment_aws_tests.yml +++ b/.github/workflows/neuroconv_deployment_aws_tests.yml @@ -1,7 +1,7 @@ name: NeuroConv Deployment AWS Tests on: schedule: - - cron: "0 16 * * 2" # Weekly at noon on Tuesday + - cron: "0 16 * * 3" # Weekly at noon on Wednesday workflow_dispatch: push: diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index 2784c5f0f..5b27f54d9 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -1,4 +1,5 @@ import os +import pathlib import time import unittest @@ -155,7 +156,7 @@ def test_deploy_neuroconv_batch_job(self): ] assert len(matching_efs_volumes) == 1 efs_volume = matching_efs_volumes[0] - efs_id = efs_volume["FileSystemId"] + self.efs_id = efs_volume["FileSystemId"] # Check normal job completion assert job["jobName"] == job_name From afeec8dbecf09597d7f59ebc95ff37d2af958624 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:52:08 -0400 Subject: [PATCH 32/50] debug --- .github/workflows/neuroconv_deployment_aws_tests.yml | 2 +- .../test_yaml/neuroconv_deployment_aws_tools_tests.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/neuroconv_deployment_aws_tests.yml b/.github/workflows/neuroconv_deployment_aws_tests.yml index 5da63894d..e7f7eb12a 100644 --- a/.github/workflows/neuroconv_deployment_aws_tests.yml +++ b/.github/workflows/neuroconv_deployment_aws_tests.yml @@ -43,5 +43,5 @@ jobs: - name: Install AWS requirements run: pip install .[aws,test] - - name: Run RClone on AWS tests + - name: Run NeuroConv Deployment on AWS tests run: pytest -rsx -n auto tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index 5b27f54d9..2b62b57c6 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -116,10 +116,12 @@ def test_deploy_neuroconv_batch_job(self): rclone_config_file_path = self.test_config_file_path job_name = "test_deploy_neuroconv_batch_job" + efs_volume_name = "test_deploy_neuroconv_batch_job" all_info = deploy_neuroconv_batch_job( rclone_command=rclone_command, yaml_specification_file_path=yaml_specification_file_path, job_name=job_name, + efs_volume_name=efs_volume_name, rclone_config_file_path=rclone_config_file_path, ) From f688e993e51e366e8727e7a1bb5adb6f29c7c8d8 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:57:48 -0400 Subject: [PATCH 33/50] debug --- src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py | 2 +- .../test_yaml/neuroconv_deployment_aws_tools_tests.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py index 440518f29..cc064aede 100644 --- a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -84,7 +84,7 @@ def deploy_neuroconv_batch_job( "Without utilizing the EFS mount, the instance is unlikely to have enough local disk space. " "The subfolder 'source' is also required to eliminate ambiguity in the transfer process." ) - raise ValueError(message=message) + raise ValueError(message) rclone_job_name = f"{job_name}_rclone_transfer" rclone_job_submission_info = rclone_transfer_batch_job( diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index 2b62b57c6..db098fcd6 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -102,7 +102,7 @@ def test_deploy_neuroconv_batch_job(self): aws_secret_access_key=aws_secret_access_key, ) - rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx/ci_tests /mnt/efs" + rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx/ci_tests /mnt/efs/source" testing_base_folder_path = pathlib.Path(__file__).parent.parent.parent yaml_specification_file_path = ( From 7b8500eeb3d6c64d8c4c9f5a13faad6bc5392766 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 13:15:47 -0400 Subject: [PATCH 34/50] fix job dependencies syntax --- src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py index cc064aede..3eafa1ae2 100644 --- a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -102,6 +102,7 @@ def deploy_neuroconv_batch_job( yaml_specification_file_stream = io.read() neuroconv_job_name = f"{job_name}_neuroconv_deployment" + job_dependencies = [{"jobId": rclone_job_id, "type": "SEQUENTIAL"}] neuroconv_job_submission_info = submit_aws_batch_job( job_name=neuroconv_job_name, docker_image=docker_image, @@ -111,7 +112,7 @@ def deploy_neuroconv_batch_job( "NEUROCONV_OUTPUT_PATH": "/mnt/efs/output", }, efs_volume_name=efs_volume_name, - job_dependencies=[rclone_job_id], + job_dependencies=job_dependencies, status_tracker_table_name=status_tracker_table_name, compute_environment_name=compute_environment_name, job_queue_name=job_queue_name, From 4054b4cca92e26617bf0fbe8426735606cb1fa7a Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 13:22:30 -0400 Subject: [PATCH 35/50] fix rclone command --- .../test_yaml/neuroconv_deployment_aws_tools_tests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index db098fcd6..a782f8a2e 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -102,7 +102,10 @@ def test_deploy_neuroconv_batch_job(self): aws_secret_access_key=aws_secret_access_key, ) - rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx/ci_tests /mnt/efs/source" + rclone_command = ( + "rclone copy test_google_drive_remote:testing_rclone_spikeglx/ci_tests /mnt/efs/source" + "--verbose --progress --config ./rclone.conf" # TODO: should just include this in helper function? + ) testing_base_folder_path = pathlib.Path(__file__).parent.parent.parent yaml_specification_file_path = ( From 9faf04a648b59f441a08307da46ef0a7c286f5fd Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 14:43:59 -0400 Subject: [PATCH 36/50] add sleep to avoid race condition --- src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py | 5 +++++ src/neuroconv/tools/aws/_submit_aws_batch_job.py | 7 +------ .../test_yaml/neuroconv_deployment_aws_tools_tests.py | 4 ++++ tests/test_on_data/test_yaml/yaml_aws_tools_tests.py | 6 +++++- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py index 3eafa1ae2..d0003d251 100644 --- a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -1,5 +1,6 @@ """Collection of helper functions for deploying NeuroConv in EC2 Batch jobs on AWS.""" +import time import uuid from typing import Optional @@ -96,6 +97,10 @@ def deploy_neuroconv_batch_job( ) rclone_job_id = rclone_job_submission_info["job_submission_info"]["jobId"] + # Give the EFS and other aspects time to spin up before submitting next dependent job + # (Otherwise, good chance that duplicate EFS will be created) + time.sleep(30) + docker_image = "ghcr.io/catalystneuro/neuroconv_latest_yaml_variable:latest" with open(file=yaml_specification_file_path, mode="r") as io: diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 748f25399..cf9b40593 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -530,12 +530,9 @@ def _generate_job_definition_name( The minimum number of CPUs required to run this job. A minimum of 4 is required, even if only one will be used in the actual process. """ - docker_tags = docker_image.split(":")[1:] - docker_tag = docker_tags[0] if len(docker_tags) > 1 else None - # AWS Batch does not allow colons, slashes, or periods in job definition names parsed_docker_image_name = str(docker_image) - for disallowed_character in [":", r"/", "."]: + for disallowed_character in [":", "/", "."]: parsed_docker_image_name = parsed_docker_image_name.replace(disallowed_character, "-") job_definition_name = f"neuroconv_batch" @@ -544,8 +541,6 @@ def _generate_job_definition_name( job_definition_name += f"_{minimum_worker_cpus}-CPU" if efs_id is not None: job_definition_name += f"_{efs_id}" - if docker_tag is None or docker_tag == "latest": - date = datetime.now().strftime("%Y-%m-%d") return job_definition_name diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index a782f8a2e..c45f60d20 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -36,6 +36,7 @@ class TestNeuroConvDeploymentBatchJob(unittest.TestCase): aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) region = "us-east-2" + efs_id = None def setUp(self): self.test_folder.mkdir(exist_ok=True) @@ -67,6 +68,9 @@ def setUp(self): ) def tearDown(self): + if self.efs_id is None: + return None + efs_client = self.efs_client # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 7ea49e644..2ff7282bc 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -36,6 +36,7 @@ class TestRcloneTransferBatchJob(unittest.TestCase): aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) region = "us-east-2" + efs_id = None def setUp(self): self.test_folder.mkdir(exist_ok=True) @@ -66,7 +67,10 @@ def setUp(self): aws_secret_access_key=self.aws_secret_access_key, ) - def tearDown(self): + def tearDown(self) -> None: + if self.efs_id is None: + return None + efs_client = self.efs_client # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume From 4463a7a1f76c08481014315408c4de3ebd689f83 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 14:58:59 -0400 Subject: [PATCH 37/50] add proper wait check --- .../tools/aws/_deploy_neuroconv_batch_job.py | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py index d0003d251..ac2f059c2 100644 --- a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -99,7 +99,28 @@ def deploy_neuroconv_batch_job( # Give the EFS and other aspects time to spin up before submitting next dependent job # (Otherwise, good chance that duplicate EFS will be created) - time.sleep(30) + matching_efs_volumes = [ + file_system + for file_system in available_efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + max_iterations = 10 + iteration = 0 + while len(matching_efs_volumes) == 0 and iteration < max_iterations: + iteration += 1 + time.sleep(30) + + matching_efs_volumes = [ + file_system + for file_system in available_efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + + if len(matching_efs_volumes) == 0: + message = f"Unable to create EFS volume '{efs_volume_name}' after {max_iterations} attempts!" + raise ValueError(message) docker_image = "ghcr.io/catalystneuro/neuroconv_latest_yaml_variable:latest" From 77c25e4ddb6954b14ebf341672ab8ead12336a66 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 16:18:09 -0400 Subject: [PATCH 38/50] fix --- .../tools/aws/_deploy_neuroconv_batch_job.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py index ac2f059c2..9fe274325 100644 --- a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -1,9 +1,11 @@ """Collection of helper functions for deploying NeuroConv in EC2 Batch jobs on AWS.""" +import os import time import uuid from typing import Optional +import boto3 from pydantic import FilePath, validate_call from ._rclone_transfer_batch_job import rclone_transfer_batch_job @@ -99,6 +101,17 @@ def deploy_neuroconv_batch_job( # Give the EFS and other aspects time to spin up before submitting next dependent job # (Otherwise, good chance that duplicate EFS will be created) + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + available_efs_volumes = efs_client.describe_file_systems() matching_efs_volumes = [ file_system for file_system in available_efs_volumes["FileSystems"] From b119181ff3ce3968cc976836a1a8376e53d31cd0 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 17:01:07 -0400 Subject: [PATCH 39/50] try to pin down source --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 5 ++++- .../test_yaml/neuroconv_deployment_aws_tools_tests.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index cf9b40593..2cae11bdd 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -464,11 +464,14 @@ def _create_or_get_efs_id( if tag["Key"] == "Name" and tag["Value"] == efs_volume_name ] - if len(matching_efs_volumes) > 1: + if len(matching_efs_volumes) == 1: efs_volume = matching_efs_volumes[0] efs_id = efs_volume["FileSystemId"] return efs_id + elif len(matching_efs_volumes) > 1: + message = f"Multiple EFS volumes with the name '{efs_volume_name}' were found!\n\n{matching_efs_volumes=}\n" + raise ValueError(message) # Existing volume not found - must create a fresh one and set mount targets on it efs_volume = efs_client.create_file_system( diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index c45f60d20..e29565b20 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -107,7 +107,7 @@ def test_deploy_neuroconv_batch_job(self): ) rclone_command = ( - "rclone copy test_google_drive_remote:testing_rclone_spikeglx/ci_tests /mnt/efs/source" + "rclone copy test_google_drive_remote:testing_rclone_spikeglx_and_phy/ci_tests /mnt/efs/source" "--verbose --progress --config ./rclone.conf" # TODO: should just include this in helper function? ) From 6bd2999c08ac3919d1b33b9a95bad38d31165b4d Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 17:34:29 -0400 Subject: [PATCH 40/50] fix dockerfile publishing --- .../build_and_upload_docker_image_yaml_variable.yml | 2 +- dockerfiles/neuroconv_latest_yaml_variable | 2 +- src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py | 4 +++- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 2 +- .../_yaml_conversion_specification.py | 8 +++++++- .../test_yaml/neuroconv_deployment_aws_tools_tests.py | 2 +- 6 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_and_upload_docker_image_yaml_variable.yml b/.github/workflows/build_and_upload_docker_image_yaml_variable.yml index 3c2a06d93..d5c6da058 100644 --- a/.github/workflows/build_and_upload_docker_image_yaml_variable.yml +++ b/.github/workflows/build_and_upload_docker_image_yaml_variable.yml @@ -31,7 +31,7 @@ jobs: uses: docker/build-push-action@v5 with: push: true # Push is a shorthand for --output=type=registry - tags: ghcr.io/catalystneuro/neuroconv:yaml_variable + tags: ghcr.io/catalystneuro/neuroconv_yaml_variable:latest context: . file: dockerfiles/neuroconv_latest_yaml_variable provenance: false diff --git a/dockerfiles/neuroconv_latest_yaml_variable b/dockerfiles/neuroconv_latest_yaml_variable index ea411ee44..903857244 100644 --- a/dockerfiles/neuroconv_latest_yaml_variable +++ b/dockerfiles/neuroconv_latest_yaml_variable @@ -1,4 +1,4 @@ FROM ghcr.io/catalystneuro/neuroconv:latest LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package. Modified to take in environment variables for the YAML conversion specification and other command line arguments." -CMD echo "$NEUROCONV_YAML" > run.yml && python -m neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite +CMD printf "$NEUROCONV_YAML" > ./run.yml && python -m neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py index 9fe274325..8e57a684f 100644 --- a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -135,7 +135,7 @@ def deploy_neuroconv_batch_job( message = f"Unable to create EFS volume '{efs_volume_name}' after {max_iterations} attempts!" raise ValueError(message) - docker_image = "ghcr.io/catalystneuro/neuroconv_latest_yaml_variable:latest" + docker_image = "ghcr.io/catalystneuro/neuroconv_yaml_variable:latest" with open(file=yaml_specification_file_path, mode="r") as io: yaml_specification_file_stream = io.read() @@ -147,6 +147,8 @@ def deploy_neuroconv_batch_job( docker_image=docker_image, environment_variables={ "NEUROCONV_YAML": yaml_specification_file_stream, + # TODO: would prefer this to use subfolders for source and output, but need logic for YAML + # related code to create them if missing (hard to send EFS this command directly) "NEUROCONV_DATA_PATH": "/mnt/efs/source", "NEUROCONV_OUTPUT_PATH": "/mnt/efs/output", }, diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 2cae11bdd..650d16c32 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -642,7 +642,7 @@ def _ensure_job_definition_exists_and_get_arn( }, }, ] - mountPoints = [{"containerPath": "/mnt/efs/", "readOnly": False, "sourceVolume": "neuroconv_batch_efs_mounted"}] + mountPoints = [{"containerPath": "/mnt/efs", "readOnly": False, "sourceVolume": "neuroconv_batch_efs_mounted"}] # batch_client.register_job_definition is not synchronous and so we need to wait a bit afterwards batch_client.register_job_definition( diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index 10e33cbc8..0e2f05f74 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -73,10 +73,16 @@ def run_conversion_from_yaml( if data_folder_path is None: data_folder_path = Path(specification_file_path).parent + else: + data_folder_path = Path(data_folder_path) + data_folder_path.mkdir(exist_ok=True) + if output_folder_path is None: - output_folder_path = Path(specification_file_path).parent + output_folder_path = specification_file_path.parent else: output_folder_path = Path(output_folder_path) + output_folder_path.mkdir(exist_ok=True) + specification = load_dict_from_file(file_path=specification_file_path) schema_folder = Path(__file__).parent.parent.parent / "schemas" specification_schema = load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json") diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index e29565b20..55c9d65f2 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -168,7 +168,7 @@ def test_deploy_neuroconv_batch_job(self): self.efs_id = efs_volume["FileSystemId"] # Check normal job completion - assert job["jobName"] == job_name + assert job["jobName"] == f"{job_name}_neuroconv_deployment" assert "neuroconv_batch_queue" in job["jobQueue"] assert "fs-" in job["jobDefinition"] assert job["status"] == "SUCCEEDED" From 359c5935c32eeae31ec9ba0b9376f5c20ec40dee Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 17:47:49 -0400 Subject: [PATCH 41/50] restore commented paths; try again --- src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py index 8e57a684f..a2565c921 100644 --- a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -149,8 +149,12 @@ def deploy_neuroconv_batch_job( "NEUROCONV_YAML": yaml_specification_file_stream, # TODO: would prefer this to use subfolders for source and output, but need logic for YAML # related code to create them if missing (hard to send EFS this command directly) - "NEUROCONV_DATA_PATH": "/mnt/efs/source", - "NEUROCONV_OUTPUT_PATH": "/mnt/efs/output", + # (the code was included in this PR, but a release cycle needs to complete for the docker images before + # it can be used here) + # "NEUROCONV_DATA_PATH": "/mnt/efs/source", + # "NEUROCONV_OUTPUT_PATH": "/mnt/efs/output", + "NEUROCONV_DATA_PATH": "/mnt/efs", + "NEUROCONV_OUTPUT_PATH": "/mnt/efs", }, efs_volume_name=efs_volume_name, job_dependencies=job_dependencies, From fe9bca7b2fef84d481c7822bcfc4002566c613aa Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 18:00:58 -0400 Subject: [PATCH 42/50] try different CLI call pattern --- dockerfiles/neuroconv_latest_yaml_variable | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockerfiles/neuroconv_latest_yaml_variable b/dockerfiles/neuroconv_latest_yaml_variable index 903857244..5500f14f0 100644 --- a/dockerfiles/neuroconv_latest_yaml_variable +++ b/dockerfiles/neuroconv_latest_yaml_variable @@ -1,4 +1,4 @@ FROM ghcr.io/catalystneuro/neuroconv:latest LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package. Modified to take in environment variables for the YAML conversion specification and other command line arguments." -CMD printf "$NEUROCONV_YAML" > ./run.yml && python -m neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite +CMD printf "$NEUROCONV_YAML" > ./run.yml && neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite From b5e2ef4d5279917e9e1969e0b232af2149df7702 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 18:31:26 -0400 Subject: [PATCH 43/50] disable efs cleanup to tap in manually --- .../neuroconv_deployment_aws_tools_tests.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index 55c9d65f2..bc5e6368b 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -67,20 +67,20 @@ def setUp(self): aws_secret_access_key=self.aws_secret_access_key, ) - def tearDown(self): - if self.efs_id is None: - return None - - efs_client = self.efs_client - - # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume - # TODO: cleanup job definitions? (since built daily) - mount_targets = efs_client.describe_mount_targets(FileSystemId=self.efs_id) - for mount_target in mount_targets["MountTargets"]: - efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) - - time.sleep(60) - efs_client.delete_file_system(FileSystemId=self.efs_id) + # def tearDown(self): + # if self.efs_id is None: + # return None + # + # efs_client = self.efs_client + # + # # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # # TODO: cleanup job definitions? (since built daily) + # mount_targets = efs_client.describe_mount_targets(FileSystemId=self.efs_id) + # for mount_target in mount_targets["MountTargets"]: + # efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + # + # time.sleep(60) + # efs_client.delete_file_system(FileSystemId=self.efs_id) def test_deploy_neuroconv_batch_job(self): region = "us-east-2" From 69093231527959186fc34410712caa81d91f418f Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 18:43:30 -0400 Subject: [PATCH 44/50] silly bug fix --- src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py | 3 +-- .../test_yaml/neuroconv_deployment_aws_tools_tests.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py index a2565c921..39a0bb90e 100644 --- a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -147,13 +147,12 @@ def deploy_neuroconv_batch_job( docker_image=docker_image, environment_variables={ "NEUROCONV_YAML": yaml_specification_file_stream, + "NEUROCONV_DATA_PATH": "/mnt/efs/source", # TODO: would prefer this to use subfolders for source and output, but need logic for YAML # related code to create them if missing (hard to send EFS this command directly) # (the code was included in this PR, but a release cycle needs to complete for the docker images before # it can be used here) - # "NEUROCONV_DATA_PATH": "/mnt/efs/source", # "NEUROCONV_OUTPUT_PATH": "/mnt/efs/output", - "NEUROCONV_DATA_PATH": "/mnt/efs", "NEUROCONV_OUTPUT_PATH": "/mnt/efs", }, efs_volume_name=efs_volume_name, diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index bc5e6368b..0032edcda 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -107,7 +107,7 @@ def test_deploy_neuroconv_batch_job(self): ) rclone_command = ( - "rclone copy test_google_drive_remote:testing_rclone_spikeglx_and_phy/ci_tests /mnt/efs/source" + "rclone copy test_google_drive_remote:testing_rclone_spikeglx_and_phy/ci_tests /mnt/efs/source " "--verbose --progress --config ./rclone.conf" # TODO: should just include this in helper function? ) From 236be55bc8f90f85651f6e6c23c70e56428ea807 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 21:21:53 -0400 Subject: [PATCH 45/50] final fixes --- .../test_yaml/neuroconv_deployment_aws_tools_tests.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index 0032edcda..e3ffa2f99 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -168,7 +168,8 @@ def test_deploy_neuroconv_batch_job(self): self.efs_id = efs_volume["FileSystemId"] # Check normal job completion - assert job["jobName"] == f"{job_name}_neuroconv_deployment" + expected_job_name = f"{job_name}_neuroconv_deployment" + assert job["jobName"] == expected_job_name assert "neuroconv_batch_queue" in job["jobQueue"] assert "fs-" in job["jobDefinition"] assert job["status"] == "SUCCEEDED" @@ -181,7 +182,7 @@ def test_deploy_neuroconv_batch_job(self): assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 table_item = table_item_response["Item"] - assert table_item["job_name"] == job_name + assert table_item["job_name"] == expected_job_name assert table_item["job_id"] == job_id assert table_item["status"] == "Job submitted..." From 1ace016e025e1a1bdebcd235f887c4b0370534fa Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 23:39:42 -0400 Subject: [PATCH 46/50] add auto efs cleanup attempt --- .../tools/aws/_deploy_neuroconv_batch_job.py | 69 ++++++++++++++++++- .../neuroconv_deployment_aws_tools_tests.py | 40 ++--------- 2 files changed, 72 insertions(+), 37 deletions(-) diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py index 39a0bb90e..d4d9a1fe0 100644 --- a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -3,6 +3,7 @@ import os import time import uuid +import warnings from typing import Optional import boto3 @@ -11,6 +12,8 @@ from ._rclone_transfer_batch_job import rclone_transfer_batch_job from ._submit_aws_batch_job import submit_aws_batch_job +_RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] + @validate_call def deploy_neuroconv_batch_job( @@ -104,6 +107,12 @@ def deploy_neuroconv_batch_job( aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) efs_client = boto3.client( service_name="efs", region_name=region, @@ -166,11 +175,67 @@ def deploy_neuroconv_batch_job( region=region, ) - # TODO: spinup third dependent job to clean up EFS volume after neuroconv job is complete? - info = { "rclone_job_submission_info": rclone_job_submission_info, "neuroconv_job_submission_info": neuroconv_job_submission_info, } + # TODO: would be better to spin up third dependent job to clean up EFS volume after neuroconv job completes + neuroconv_job_id = neuroconv_job_submission_info["jobId"] + job = None + max_retries = 60 * 12 # roughly 12 hours max runtime (aside from internet loss) for checking cleanup + sleep_time = 60 # 1 minute + retry = 0.0 + time.sleep(sleep_time) + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[neuroconv_job_id]) + if job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200: + # sleep but only increment retry by a small amount + # (really should only apply if internet connection is temporarily lost) + retry += 0.1 + time.sleep(sleep_time) + + job = job_description_response["jobs"][0] + if job["status"] in _RETRY_STATES: + retry += 1.0 + time.sleep(sleep_time) + elif job["status"] == "SUCCEEDED": + break + + if retry >= max_retries: + message = ( + "Maximum retries reached for checking job completion for automatic EFS cleanup! " + "Please delete the EFS volume manually." + ) + warnings.warn(message=message, stacklevel=2) + + return info + + # Cleanup EFS after job is complete - must clear mount targets first, then wait before deleting the volume + efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + if len(matching_efs_volumes) != 1: + message = ( + f"Expected to find exactly one EFS volume with name '{efs_volume_name}', " + f"but found {len(matching_efs_volumes)}\n\n{matching_efs_volumes=}\n\n!" + "You will have to delete these manually." + ) + warnings.warn(message=message, stacklevel=2) + + return info + + efs_volume = matching_efs_volumes[0] + efs_id = efs_volume["FileSystemId"] + mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(sleep_time) + efs_client.delete_file_system(FileSystemId=efs_id) + return info diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index e3ffa2f99..94aae391b 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -36,7 +36,6 @@ class TestNeuroConvDeploymentBatchJob(unittest.TestCase): aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) region = "us-east-2" - efs_id = None def setUp(self): self.test_folder.mkdir(exist_ok=True) @@ -60,28 +59,6 @@ def setUp(self): with open(file=self.test_config_file_path, mode="w") as io: io.writelines(rclone_config_contents) - self.efs_client = boto3.client( - service_name="efs", - region_name=self.region, - aws_access_key_id=self.aws_access_key_id, - aws_secret_access_key=self.aws_secret_access_key, - ) - - # def tearDown(self): - # if self.efs_id is None: - # return None - # - # efs_client = self.efs_client - # - # # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume - # # TODO: cleanup job definitions? (since built daily) - # mount_targets = efs_client.describe_mount_targets(FileSystemId=self.efs_id) - # for mount_target in mount_targets["MountTargets"]: - # efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) - # - # time.sleep(60) - # efs_client.delete_file_system(FileSystemId=self.efs_id) - def test_deploy_neuroconv_batch_job(self): region = "us-east-2" aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) @@ -105,6 +82,7 @@ def test_deploy_neuroconv_batch_job(self): aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) + efs_volumes_before = efs_client.describe_file_systems() rclone_command = ( "rclone copy test_google_drive_remote:testing_rclone_spikeglx_and_phy/ci_tests /mnt/efs/source " @@ -132,7 +110,7 @@ def test_deploy_neuroconv_batch_job(self): rclone_config_file_path=rclone_config_file_path, ) - # Wait for AWS to process the job + # Wait additional time for AWS to clean up resources time.sleep(120) info = all_info["neuroconv_job_submission_info"] @@ -155,17 +133,9 @@ def test_deploy_neuroconv_batch_job(self): else: break - # Check EFS specific details - efs_volumes = efs_client.describe_file_systems() - matching_efs_volumes = [ - file_system - for file_system in efs_volumes["FileSystems"] - for tag in file_system["Tags"] - if tag["Key"] == "Name" and tag["Value"] == efs_volume_name - ] - assert len(matching_efs_volumes) == 1 - efs_volume = matching_efs_volumes[0] - self.efs_id = efs_volume["FileSystemId"] + # Check EFS cleaned up automatically + efs_volumes_after = efs_client.describe_file_systems() + assert len(efs_volumes_after["FileSystems"]) == len(efs_volumes_before["FileSystems"]) # Check normal job completion expected_job_name = f"{job_name}_neuroconv_deployment" From 6a8d552e43acd2152dacaf1056b34d00513935ad Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 23:45:42 -0400 Subject: [PATCH 47/50] fix info structure --- src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py index d4d9a1fe0..1df86d957 100644 --- a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -181,7 +181,7 @@ def deploy_neuroconv_batch_job( } # TODO: would be better to spin up third dependent job to clean up EFS volume after neuroconv job completes - neuroconv_job_id = neuroconv_job_submission_info["jobId"] + neuroconv_job_id = neuroconv_job_submission_info["job_submission_info"]["jobId"] job = None max_retries = 60 * 12 # roughly 12 hours max runtime (aside from internet loss) for checking cleanup sleep_time = 60 # 1 minute From 7e303302e97dec4faf0240ac4c6e1c0e5e3a760c Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 23:59:36 -0400 Subject: [PATCH 48/50] add comment --- .../test_yaml/neuroconv_deployment_aws_tools_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py index 94aae391b..f58865d26 100644 --- a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -82,6 +82,7 @@ def test_deploy_neuroconv_batch_job(self): aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) + # Assume no other tests of EFS volumes are fluctuating at the same time, otherwise make this more specific efs_volumes_before = efs_client.describe_file_systems() rclone_command = ( From bac35102a479a69a9055e5ab3c7a07aaf4478d4c Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 29 Sep 2024 00:16:54 -0400 Subject: [PATCH 49/50] restore test triggers --- .github/workflows/deploy-tests.yml | 10 +++++----- .github/workflows/neuroconv_deployment_aws_tests.yml | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 5c44d1544..4f67d15de 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -1,11 +1,11 @@ name: Deploy tests on: -# pull_request: -# types: [synchronize, opened, reopened, ready_for_review] -# # Synchronize, open and reopened are the default types for pull request -# # We add ready_for_review to trigger the check for changelog and full tests when ready for review is clicked -# merge_group: + pull_request: + types: [synchronize, opened, reopened, ready_for_review] + # Synchronize, open and reopened are the default types for pull request + # We add ready_for_review to trigger the check for changelog and full tests when ready for review is clicked + merge_group: workflow_dispatch: concurrency: diff --git a/.github/workflows/neuroconv_deployment_aws_tests.yml b/.github/workflows/neuroconv_deployment_aws_tests.yml index e7f7eb12a..64aae5ec9 100644 --- a/.github/workflows/neuroconv_deployment_aws_tests.yml +++ b/.github/workflows/neuroconv_deployment_aws_tests.yml @@ -3,7 +3,6 @@ on: schedule: - cron: "0 16 * * 3" # Weekly at noon on Wednesday workflow_dispatch: - push: concurrency: # Cancel previous workflows on the same pull request group: ${{ github.workflow }}-${{ github.ref }} From 253855becfd65d15015088efc30c23f9c589044f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Nov 2024 20:03:53 +0000 Subject: [PATCH 50/50] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/tools/aws/__init__.py | 1 - src/neuroconv/tools/aws/_submit_aws_batch_job.py | 4 +++- tests/test_on_data/test_yaml/yaml_aws_tools_tests.py | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/neuroconv/tools/aws/__init__.py b/src/neuroconv/tools/aws/__init__.py index d2b5e4d45..70a42cbf5 100644 --- a/src/neuroconv/tools/aws/__init__.py +++ b/src/neuroconv/tools/aws/__init__.py @@ -7,4 +7,3 @@ "rclone_transfer_batch_job", "deploy_neuroconv_batch_job", ] - diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 9e0c8857e..cae25f3ce 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -508,6 +508,7 @@ def _create_or_get_efs_id( return efs_id + def generate_job_definition_name( *, docker_image: str, @@ -545,7 +546,8 @@ def generate_job_definition_name( if docker_tag is None or docker_tag == "latest": date = datetime.now().strftime("%Y-%m-%d") return job_definition_name - + + def _ensure_job_definition_exists_and_get_arn( *, job_definition_name: str, diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index ae229174a..e767e516b 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -38,7 +38,6 @@ class TestRcloneTransferBatchJob(unittest.TestCase): region = "us-east-2" efs_id = None - def setUp(self): self.test_folder.mkdir(exist_ok=True)