From 9b78811ff7c2326cb8242f07e360e13bdeae55e5 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 16 Sep 2024 11:26:26 -0400 Subject: [PATCH 01/25] setup --- .github/workflows/deploy-tests.yml | 7 +- CHANGELOG.md | 5 + src/neuroconv/tools/aws/__init__.py | 3 +- .../tools/aws/_rclone_transfer_batch_job.py | 113 ++++++++++++++ tests/test_minimal/test_tools/aws_tools.py | 145 +++++++++++++++++- 5 files changed, 268 insertions(+), 5 deletions(-) create mode 100644 src/neuroconv/tools/aws/_rclone_transfer_batch_job.py diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 9cbf6ba94..d592e7e59 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -1,9 +1,10 @@ name: Deploy tests on: - pull_request: - types: [synchronize, opened, reopened, ready_for_review] # defaults + ready_for_review - merge_group: + # TODO: disabled to save resources +# pull_request: +# types: [synchronize, opened, reopened, ready_for_review] # defaults + ready_for_review +# merge_group: workflow_dispatch: concurrency: # Cancel previous workflows on the same pull request diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa9612aa..053791a45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Upcoming +## Features +* Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #]() + + + ## v0.6.4 ## Bug Fixes diff --git a/src/neuroconv/tools/aws/__init__.py b/src/neuroconv/tools/aws/__init__.py index d40ddb2dd..88144fb01 100644 --- a/src/neuroconv/tools/aws/__init__.py +++ b/src/neuroconv/tools/aws/__init__.py @@ -1,3 +1,4 @@ from ._submit_aws_batch_job import submit_aws_batch_job +from ._rclone_transfer_batch_job import rclone_transfer_batch_job -__all__ = ["submit_aws_batch_job"] +__all__ = ["submit_aws_batch_job", "rclone_transfer_batch_job"] diff --git a/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py b/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py new file mode 100644 index 000000000..65bef7824 --- /dev/null +++ b/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py @@ -0,0 +1,113 @@ +"""Collection of helper functions for assessing and performing automated data transfers related to AWS.""" + +import warnings +from typing import Optional + +from pydantic import FilePath, validate_call + +from ._submit_aws_batch_job import submit_aws_batch_job + + +@validate_call +def rclone_transfer_batch_job( + *, + rclone_command: str, + job_name: str, + efs_volume_name: str, + rclone_config_file_path: Optional[FilePath] = None, + status_tracker_table_name: str = "neuroconv_batch_status_tracker", + compute_environment_name: str = "neuroconv_batch_environment", + job_queue_name: str = "neuroconv_batch_queue", + job_definition_name: Optional[str] = None, + minimum_worker_ram_in_gib: int = 4, + minimum_worker_cpus: int = 4, + submission_id: Optional[str] = None, + region: Optional[str] = None, +) -> dict[str, str]: + """ + Submit a job to AWS Batch for processing. + + Requires AWS credentials saved to files in the `~/.aws/` folder or set as environment variables. + + Parameters + ---------- + rclone_command : str + The command to pass directly to Rclone running on the EC2 instance. + E.g.: "rclone copy my_drive:testing_rclone /mnt/efs" + Must move data from or to '/mnt/efs'. + job_name : str + The name of the job to submit. + efs_volume_name : str + The name of an EFS volume to be created and attached to the job. + The path exposed to the container will always be `/mnt/efs`. + rclone_config_file_path : FilePath, optional + The path to the Rclone configuration file to use for the job. + If unspecified, method will attempt to find the file in `~/.rclone` and will raise an error if it cannot. + status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" + The name of the DynamoDB table to use for tracking job status. + compute_environment_name : str, default: "neuroconv_batch_environment" + The name of the compute environment to use for the job. + job_queue_name : str, default: "neuroconv_batch_queue" + The name of the job queue to use for the job. + job_definition_name : str, optional + The name of the job definition to use for the job. + If unspecified, a name starting with 'neuroconv_batch_' will be generated. + minimum_worker_ram_in_gib : int, default: 4 + The minimum amount of base worker memory required to run this job. + Determines the EC2 instance type selected by the automatic 'best fit' selector. + Recommended to be several GiB to allow comfortable buffer space for data chunk iterators. + minimum_worker_cpus : int, default: 4 + The minimum number of CPUs required to run this job. + A minimum of 4 is required, even if only one will be used in the actual process. + submission_id : str, optional + The unique ID to pair with this job submission when tracking the status via DynamoDB. + Defaults to a random UUID4. + region : str, optional + The AWS region to use for the job. + If not provided, we will attempt to load the region from your local AWS configuration. + If that file is not found on your system, we will default to "us-east-2", the location of the DANDI Archive. + + Returns + ------- + info : dict + A dictionary containing information about this AWS Batch job. + + info["job_submission_info"] is the return value of `boto3.client.submit_job` which contains the job ID. + info["table_submission_info"] is the initial row data inserted into the DynamoDB status tracking table. + """ + docker_image = "ghcr.io/catalystneuro/rclone_with_config:latest" + + if "/mnt/efs" not in rclone_command: + message = ( + f"The Rclone command '{rclone_command}' does not contain a reference to '/mnt/efs'. " + "Without utilizing the EFS mount, the instance is unlikely to have enough local disk space." + ) + warnings.warn(message=message, stacklevel=2) + + rclone_config_file_path = rclone_config_file_path or pathlib.Path.home() / ".rclone" / "rclone.conf" + if not rclone_config_file_path.exists(): + raise FileNotFoundError( + f"Rclone configuration file not found at: {rclone_config_file_path}! " + "Please check that `rclone config` successfully created the file." + ) + with open(file=rclone_config_file_path, mode="r") as io: + rclone_config_file_stream = io.read() + + region = region or "us-east-2" + + info = submit_aws_batch_job( + job_name=job_name, + docker_image=docker_image, + environment_variables={"RCLONE_CONFIG": rclone_config_file_stream, "RCLONE_COMMAND": rclone_command}, + efs_volume_name=efs_volume_name, + status_tracker_table_name=status_tracker_table_name, + compute_environment_name=compute_environment_name, + job_queue_name=job_queue_name, + job_definition_name=job_definition_name, + minimum_worker_ram_in_gib=minimum_worker_ram_in_gib, + minimum_worker_cpus=minimum_worker_cpus, + submission_id=submission_id, + region=region, + ) + + return info diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index 2e7598178..efa624e87 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -4,7 +4,7 @@ import boto3 -from neuroconv.tools.aws import submit_aws_batch_job +from neuroconv.tools.aws import rclone_transfer_batch_job, submit_aws_batch_job _RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] @@ -297,3 +297,146 @@ def test_submit_aws_batch_job_with_efs_mount(): table.update_item( Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} ) + + +class TestRcloneTransferBatchJob(TestCase): + """ + To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive + called 'testing_rclone_spikeglx' with the following structure: + + testing_rclone_spikeglx + ├── ci_tests + ├────── Noise4Sam_g0 + + Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset. + + Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. + The developer can easily find the location of the config file on their system using `rclone config file`. + """ + + test_folder = OUTPUT_PATH / "aws_rclone_tests" + test_config_file_path = test_folder / "rclone.conf" + + def setUp(self): + self.test_folder.mkdir(exist_ok=True) + + # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command) + token_dictionary = dict( + access_token=os.environ["RCLONE_DRIVE_ACCESS_TOKEN"], + token_type="Bearer", + refresh_token=os.environ["RCLONE_DRIVE_REFRESH_TOKEN"], + expiry=os.environ["RCLONE_EXPIRY_TOKEN"], + ) + token_string = str(token_dictionary).replace("'", '"').replace(" ", "") + rclone_config_contents = [ + "[test_google_drive_remote]\n", + "type = drive\n", + "scope = drive\n", + f"token = {token_string}\n", + "team_drive = \n", + "\n", + ] + with open(file=self.test_config_file_path, mode="w") as io: + io.writelines(rclone_config_contents) + + def test_rclone_transfer_batch_job(self): + region = "us-east-2" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" + rclone_config_file_path = self.test_config_file_path + + info = rclone_transfer_batch_job( + rclone_command=rclone_command, + rclone_config_file_path=rclone_config_file_path, + ) + + # Wait for AWS to process the job + time.sleep(60) + + job_id = info["job_submission_info"]["jobId"] + job = None + max_retries = 10 + retry = 0 + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[job_id]) + assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + jobs = job_description_response["jobs"] + assert len(jobs) == 1 + + job = jobs[0] + + if job["status"] in _RETRY_STATES: + retry += 1 + time.sleep(60) + else: + break + + # Check EFS specific details + efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + assert len(matching_efs_volumes) == 1 + efs_volume = matching_efs_volumes[0] + efs_id = efs_volume["FileSystemId"] + + # Check normal job completion + assert job["jobName"] == job_name + assert "neuroconv_batch_queue" in job["jobQueue"] + assert "fs-" in job["jobDefinition"] + assert job["status"] == "SUCCEEDED" + + status_tracker_table_name = "neuroconv_batch_status_tracker" + table = dynamodb_resource.Table(name=status_tracker_table_name) + table_submission_id = info["table_submission_info"]["id"] + + table_item_response = table.get_item(Key={"id": table_submission_id}) + assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + table_item = table_item_response["Item"] + assert table_item["job_name"] == job_name + assert table_item["job_id"] == job_id + assert table_item["status"] == "Job submitted..." + + table.update_item( + Key={"id": table_submission_id}, + AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, + ) + + # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # TODO: cleanup job definitions? (since built daily) + mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(60) + efs_client.delete_file_system(FileSystemId=efs_id) + + table.update_item( + Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} + ) From 3f7e2e375e5a9e61f8dbe29d705c2529cb6c153e Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Tue, 17 Sep 2024 11:51:46 -0400 Subject: [PATCH 02/25] enhance test docs --- tests/test_minimal/test_tools/aws_tools.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index efa624e87..7e861be2e 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -302,13 +302,17 @@ def test_submit_aws_batch_job_with_efs_mount(): class TestRcloneTransferBatchJob(TestCase): """ To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive - called 'testing_rclone_spikeglx' with the following structure: + called 'testing_rclone_spikegl_and_phy' with the following structure: - testing_rclone_spikeglx + testing_rclone_spikeglx_and_phy ├── ci_tests + ├──── spikeglx ├────── Noise4Sam_g0 + ├──── phy + ├────── phy_example_0 - Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset. + Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset and 'phy_example_0' is likewise from the + 'phy' folder of the same dataset. Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. The developer can easily find the location of the config file on their system using `rclone config file`. From b49a70232e43ca0576bda99f2cd1abd1674441d4 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 17:32:32 -0400 Subject: [PATCH 03/25] fix import --- tests/test_minimal/test_tools/aws_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index 7e861be2e..0193f85e3 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -1,6 +1,7 @@ import datetime import os import time +import unittest import boto3 @@ -299,7 +300,7 @@ def test_submit_aws_batch_job_with_efs_mount(): ) -class TestRcloneTransferBatchJob(TestCase): +class TestRcloneTransferBatchJob(unittest.TestCase): """ To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive called 'testing_rclone_spikegl_and_phy' with the following structure: From 68c8e945173c013939b25f3927ebcf7ac108efb4 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 17:39:22 -0400 Subject: [PATCH 04/25] split tests --- .github/workflows/aws_tests.yml | 4 + tests/test_minimal/test_tools/aws_tools.py | 150 +---------------- .../test_on_data/test_yaml/yaml_aws_tools.py | 158 ++++++++++++++++++ 3 files changed, 163 insertions(+), 149 deletions(-) create mode 100644 tests/test_on_data/test_yaml/yaml_aws_tools.py diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index 0ecbb4d7b..66ca08c9f 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -39,5 +39,9 @@ jobs: - name: Install full requirements run: pip install .[aws,test] + # TODO: when passing, reverse order of tests from minimal to data + - name: Run subset of tests that use S3 live services + run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools.py + - name: Run subset of tests that use S3 live services run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools.py diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index 0193f85e3..2e7598178 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -1,11 +1,10 @@ import datetime import os import time -import unittest import boto3 -from neuroconv.tools.aws import rclone_transfer_batch_job, submit_aws_batch_job +from neuroconv.tools.aws import submit_aws_batch_job _RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] @@ -298,150 +297,3 @@ def test_submit_aws_batch_job_with_efs_mount(): table.update_item( Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} ) - - -class TestRcloneTransferBatchJob(unittest.TestCase): - """ - To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive - called 'testing_rclone_spikegl_and_phy' with the following structure: - - testing_rclone_spikeglx_and_phy - ├── ci_tests - ├──── spikeglx - ├────── Noise4Sam_g0 - ├──── phy - ├────── phy_example_0 - - Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset and 'phy_example_0' is likewise from the - 'phy' folder of the same dataset. - - Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. - The developer can easily find the location of the config file on their system using `rclone config file`. - """ - - test_folder = OUTPUT_PATH / "aws_rclone_tests" - test_config_file_path = test_folder / "rclone.conf" - - def setUp(self): - self.test_folder.mkdir(exist_ok=True) - - # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command) - token_dictionary = dict( - access_token=os.environ["RCLONE_DRIVE_ACCESS_TOKEN"], - token_type="Bearer", - refresh_token=os.environ["RCLONE_DRIVE_REFRESH_TOKEN"], - expiry=os.environ["RCLONE_EXPIRY_TOKEN"], - ) - token_string = str(token_dictionary).replace("'", '"').replace(" ", "") - rclone_config_contents = [ - "[test_google_drive_remote]\n", - "type = drive\n", - "scope = drive\n", - f"token = {token_string}\n", - "team_drive = \n", - "\n", - ] - with open(file=self.test_config_file_path, mode="w") as io: - io.writelines(rclone_config_contents) - - def test_rclone_transfer_batch_job(self): - region = "us-east-2" - aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) - aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) - - dynamodb_resource = boto3.resource( - service_name="dynamodb", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - batch_client = boto3.client( - service_name="batch", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - efs_client = boto3.client( - service_name="efs", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - - rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" - rclone_config_file_path = self.test_config_file_path - - info = rclone_transfer_batch_job( - rclone_command=rclone_command, - rclone_config_file_path=rclone_config_file_path, - ) - - # Wait for AWS to process the job - time.sleep(60) - - job_id = info["job_submission_info"]["jobId"] - job = None - max_retries = 10 - retry = 0 - while retry < max_retries: - job_description_response = batch_client.describe_jobs(jobs=[job_id]) - assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 - - jobs = job_description_response["jobs"] - assert len(jobs) == 1 - - job = jobs[0] - - if job["status"] in _RETRY_STATES: - retry += 1 - time.sleep(60) - else: - break - - # Check EFS specific details - efs_volumes = efs_client.describe_file_systems() - matching_efs_volumes = [ - file_system - for file_system in efs_volumes["FileSystems"] - for tag in file_system["Tags"] - if tag["Key"] == "Name" and tag["Value"] == efs_volume_name - ] - assert len(matching_efs_volumes) == 1 - efs_volume = matching_efs_volumes[0] - efs_id = efs_volume["FileSystemId"] - - # Check normal job completion - assert job["jobName"] == job_name - assert "neuroconv_batch_queue" in job["jobQueue"] - assert "fs-" in job["jobDefinition"] - assert job["status"] == "SUCCEEDED" - - status_tracker_table_name = "neuroconv_batch_status_tracker" - table = dynamodb_resource.Table(name=status_tracker_table_name) - table_submission_id = info["table_submission_info"]["id"] - - table_item_response = table.get_item(Key={"id": table_submission_id}) - assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 - - table_item = table_item_response["Item"] - assert table_item["job_name"] == job_name - assert table_item["job_id"] == job_id - assert table_item["status"] == "Job submitted..." - - table.update_item( - Key={"id": table_submission_id}, - AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, - ) - - # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume - # TODO: cleanup job definitions? (since built daily) - mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) - for mount_target in mount_targets["MountTargets"]: - efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) - - time.sleep(60) - efs_client.delete_file_system(FileSystemId=efs_id) - - table.update_item( - Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} - ) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools.py b/tests/test_on_data/test_yaml/yaml_aws_tools.py new file mode 100644 index 000000000..d3ad42bd1 --- /dev/null +++ b/tests/test_on_data/test_yaml/yaml_aws_tools.py @@ -0,0 +1,158 @@ +import os +import time +import unittest + +import boto3 + +from neuroconv.tools.aws import rclone_transfer_batch_job + +from ..setup_paths import OUTPUT_PATH + +_RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] + + +class TestRcloneTransferBatchJob(unittest.TestCase): + """ + To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive + called 'testing_rclone_spikegl_and_phy' with the following structure: + + testing_rclone_spikeglx_and_phy + ├── ci_tests + ├──── spikeglx + ├────── Noise4Sam_g0 + ├──── phy + ├────── phy_example_0 + + Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset and 'phy_example_0' is likewise from the + 'phy' folder of the same dataset. + + Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. + The developer can easily find the location of the config file on their system using `rclone config file`. + """ + + test_folder = OUTPUT_PATH / "aws_rclone_tests" + test_config_file_path = test_folder / "rclone.conf" + + def setUp(self): + self.test_folder.mkdir(exist_ok=True) + + # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command) + token_dictionary = dict( + access_token=os.environ["RCLONE_DRIVE_ACCESS_TOKEN"], + token_type="Bearer", + refresh_token=os.environ["RCLONE_DRIVE_REFRESH_TOKEN"], + expiry=os.environ["RCLONE_EXPIRY_TOKEN"], + ) + token_string = str(token_dictionary).replace("'", '"').replace(" ", "") + rclone_config_contents = [ + "[test_google_drive_remote]\n", + "type = drive\n", + "scope = drive\n", + f"token = {token_string}\n", + "team_drive = \n", + "\n", + ] + with open(file=self.test_config_file_path, mode="w") as io: + io.writelines(rclone_config_contents) + + def test_rclone_transfer_batch_job(self): + region = "us-east-2" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" + rclone_config_file_path = self.test_config_file_path + + info = rclone_transfer_batch_job( + rclone_command=rclone_command, + rclone_config_file_path=rclone_config_file_path, + ) + + # Wait for AWS to process the job + time.sleep(60) + + job_id = info["job_submission_info"]["jobId"] + job = None + max_retries = 10 + retry = 0 + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[job_id]) + assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + jobs = job_description_response["jobs"] + assert len(jobs) == 1 + + job = jobs[0] + + if job["status"] in _RETRY_STATES: + retry += 1 + time.sleep(60) + else: + break + + # Check EFS specific details + efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + assert len(matching_efs_volumes) == 1 + efs_volume = matching_efs_volumes[0] + efs_id = efs_volume["FileSystemId"] + + # Check normal job completion + assert job["jobName"] == job_name + assert "neuroconv_batch_queue" in job["jobQueue"] + assert "fs-" in job["jobDefinition"] + assert job["status"] == "SUCCEEDED" + + status_tracker_table_name = "neuroconv_batch_status_tracker" + table = dynamodb_resource.Table(name=status_tracker_table_name) + table_submission_id = info["table_submission_info"]["id"] + + table_item_response = table.get_item(Key={"id": table_submission_id}) + assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + table_item = table_item_response["Item"] + assert table_item["job_name"] == job_name + assert table_item["job_id"] == job_id + assert table_item["status"] == "Job submitted..." + + table.update_item( + Key={"id": table_submission_id}, + AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, + ) + + # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # TODO: cleanup job definitions? (since built daily) + mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(60) + efs_client.delete_file_system(FileSystemId=efs_id) + + table.update_item( + Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} + ) From 830ca3dfbfe580b9ea1e6dcd32b9cc7c228b38f4 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 17:42:54 -0400 Subject: [PATCH 05/25] pass secrets --- .github/workflows/aws_tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index 66ca08c9f..a66addf3a 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -11,6 +11,9 @@ concurrency: # Cancel previous workflows on the same pull request env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }} + RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }} + RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} jobs: From 2f5c6c20d1ffd2f0521ede39276ba0a8d23d9f17 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 20:30:10 -0400 Subject: [PATCH 06/25] fix test --- tests/test_on_data/test_yaml/yaml_aws_tools.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools.py b/tests/test_on_data/test_yaml/yaml_aws_tools.py index d3ad42bd1..200368af8 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools.py @@ -82,8 +82,14 @@ def test_rclone_transfer_batch_job(self): rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" rclone_config_file_path = self.test_config_file_path + now = datetime.datetime.now().isoformat() + job_name = f"test_rclone_transfer_batch_job_{now}" + efs_volume_name = "test_rclone_transfer_batch_efs" + info = rclone_transfer_batch_job( rclone_command=rclone_command, + job_name=job_name, + efs_volume_name=efs_volume_name, rclone_config_file_path=rclone_config_file_path, ) From eb631f9d2cf63ed57e3ce95057fc13efb09c1145 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 20:33:03 -0400 Subject: [PATCH 07/25] fix import --- tests/test_on_data/test_yaml/yaml_aws_tools.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools.py b/tests/test_on_data/test_yaml/yaml_aws_tools.py index 200368af8..8cd001689 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools.py @@ -1,3 +1,4 @@ +import datetime import os import time import unittest From f1cfbd080de0931bcac8e2af1906c271d559d02b Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 21:28:00 -0400 Subject: [PATCH 08/25] fix job definition logic --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 9e3ba0488..d2d0c1f6f 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -530,7 +530,9 @@ def _generate_job_definition_name( """ docker_tags = docker_image.split(":")[1:] docker_tag = docker_tags[0] if len(docker_tags) > 1 else None - parsed_docker_image_name = docker_image.replace(":", "-") # AWS Batch does not allow colons in job definition names + + # AWS Batch does not allow colons or slashes in job definition names + parsed_docker_image_name = docker_image.replace(":", "-").replace("/", "-") job_definition_name = f"neuroconv_batch" job_definition_name += f"_{parsed_docker_image_name}-image" @@ -641,7 +643,7 @@ def _ensure_job_definition_exists_and_get_arn( ] mountPoints = [{"containerPath": "/mnt/efs/", "readOnly": False, "sourceVolume": "neuroconv_batch_efs_mounted"}] - # batch_client.register_job_definition() is not synchronous and so we need to wait a bit afterwards + # batch_client.register_job_definition is not synchronous and so we need to wait a bit afterwards batch_client.register_job_definition( jobDefinitionName=job_definition_name, type="container", From 08ba1e140f12980fb3b6a2d76c2bcf7b0eb3e6e5 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 21:59:09 -0400 Subject: [PATCH 09/25] fix job definition logic --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index d2d0c1f6f..2d9a683a5 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -531,8 +531,9 @@ def _generate_job_definition_name( docker_tags = docker_image.split(":")[1:] docker_tag = docker_tags[0] if len(docker_tags) > 1 else None - # AWS Batch does not allow colons or slashes in job definition names - parsed_docker_image_name = docker_image.replace(":", "-").replace("/", "-") + # AWS Batch does not allow colons, slashes, or periods in job definition names + for disallowed_character in [":", "/", "."]: + docker_image = docker_image.replace(disallowed_character, "-") job_definition_name = f"neuroconv_batch" job_definition_name += f"_{parsed_docker_image_name}-image" From bd7cb1e7f393399679ed748c94da1e3583af6c85 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 22:13:44 -0400 Subject: [PATCH 10/25] fix copilot assigned name --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 2d9a683a5..07dce9070 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -533,7 +533,7 @@ def _generate_job_definition_name( # AWS Batch does not allow colons, slashes, or periods in job definition names for disallowed_character in [":", "/", "."]: - docker_image = docker_image.replace(disallowed_character, "-") + parsed_docker_image_name = docker_image.replace(disallowed_character, "-") job_definition_name = f"neuroconv_batch" job_definition_name += f"_{parsed_docker_image_name}-image" From 6ddb351dae72312ff7231409ed8ae239fae2eb8c Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 22:18:27 -0400 Subject: [PATCH 11/25] fix job definition logic --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 07dce9070..55130b543 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -532,8 +532,9 @@ def _generate_job_definition_name( docker_tag = docker_tags[0] if len(docker_tags) > 1 else None # AWS Batch does not allow colons, slashes, or periods in job definition names - for disallowed_character in [":", "/", "."]: - parsed_docker_image_name = docker_image.replace(disallowed_character, "-") + parsed_docker_image_name = str(docker_image) + for disallowed_character in [":", r"/", "."]: + parsed_docker_image_name = parsed_docker_image_name.replace(disallowed_character, "-") job_definition_name = f"neuroconv_batch" job_definition_name += f"_{parsed_docker_image_name}-image" From beeabffac1fc564aab403877c90f2c21a6cc0fab Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 22:25:17 -0400 Subject: [PATCH 12/25] fix environment variable passing --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 55130b543..caaced56f 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -171,7 +171,9 @@ def submit_aws_batch_job( job_dependencies = job_dependencies or [] container_overrides = dict() if environment_variables is not None: - container_overrides["environment"] = [{key: value} for key, value in environment_variables.items()] + container_overrides["environment"] = [ + {"name": key, "value": value} for key, value in environment_variables.items() + ] if commands is not None: container_overrides["command"] = commands From daeabc9a9864fd05b472fc759138787754b81748 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 22:30:47 -0400 Subject: [PATCH 13/25] fix job name in test --- .github/workflows/aws_tests.yml | 4 ++-- .../test_tools/{aws_tools.py => aws_tools_tests.py} | 0 .../test_yaml/{yaml_aws_tools.py => yaml_aws_tools_tests.py} | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) rename tests/test_minimal/test_tools/{aws_tools.py => aws_tools_tests.py} (100%) rename tests/test_on_data/test_yaml/{yaml_aws_tools.py => yaml_aws_tools_tests.py} (98%) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index a66addf3a..331486ec1 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -44,7 +44,7 @@ jobs: # TODO: when passing, reverse order of tests from minimal to data - name: Run subset of tests that use S3 live services - run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools.py + run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools_tests.py - name: Run subset of tests that use S3 live services - run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools.py + run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools_tests.py diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools_tests.py similarity index 100% rename from tests/test_minimal/test_tools/aws_tools.py rename to tests/test_minimal/test_tools/aws_tools_tests.py diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py similarity index 98% rename from tests/test_on_data/test_yaml/yaml_aws_tools.py rename to tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 8cd001689..8a0e958b3 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -83,8 +83,8 @@ def test_rclone_transfer_batch_job(self): rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" rclone_config_file_path = self.test_config_file_path - now = datetime.datetime.now().isoformat() - job_name = f"test_rclone_transfer_batch_job_{now}" + today = datetime.datetime.now().date().isoformat() + job_name = f"test_rclone_transfer_batch_job_{today}" efs_volume_name = "test_rclone_transfer_batch_efs" info = rclone_transfer_batch_job( From 5a9eaff514d07ec239f66c11e240b3f0a9b0b601 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 10:45:19 -0400 Subject: [PATCH 14/25] try Cody creds --- .github/workflows/aws_tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index 331486ec1..e87a21aaf 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -11,9 +11,9 @@ concurrency: # Cancel previous workflows on the same pull request env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }} - RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }} - RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }} + RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN_CODY }} + RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN_CODY }} + RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN_CODY }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} jobs: From d20ca09cf5574af39228b93aa0db9c3d25b41af1 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:03:45 -0400 Subject: [PATCH 15/25] some adjustments --- .../test_yaml/yaml_aws_tools_tests.py | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 8a0e958b3..d871237f5 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -33,6 +33,9 @@ class TestRcloneTransferBatchJob(unittest.TestCase): test_folder = OUTPUT_PATH / "aws_rclone_tests" test_config_file_path = test_folder / "rclone.conf" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + region = "us-east-2" def setUp(self): self.test_folder.mkdir(exist_ok=True) @@ -56,10 +59,29 @@ def setUp(self): with open(file=self.test_config_file_path, mode="w") as io: io.writelines(rclone_config_contents) + self.efs_client = boto3.client( + service_name="efs", + region_name=self.region, + aws_access_key_id=self.aws_access_key_id, + aws_secret_access_key=self.aws_secret_access_key, + ) + + def tearDown(self): + efs_client = self.efs_client + + # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # TODO: cleanup job definitions? (since built daily) + mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(60) + efs_client.delete_file_system(FileSystemId=efs_id) + def test_rclone_transfer_batch_job(self): - region = "us-east-2" - aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) - aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + region = self.region + aws_access_key_id = self.aws_access_key_id + aws_secret_access_key = self.aws_secret_access_key dynamodb_resource = boto3.resource( service_name="dynamodb", @@ -73,14 +95,9 @@ def test_rclone_transfer_batch_job(self): aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) - efs_client = boto3.client( - service_name="efs", - region_name=region, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) + efs_client = self.efs_client - rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs" + rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs --config ./rclone.conf" rclone_config_file_path = self.test_config_file_path today = datetime.datetime.now().date().isoformat() @@ -151,15 +168,6 @@ def test_rclone_transfer_batch_job(self): AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, ) - # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume - # TODO: cleanup job definitions? (since built daily) - mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) - for mount_target in mount_targets["MountTargets"]: - efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) - - time.sleep(60) - efs_client.delete_file_system(FileSystemId=efs_id) - table.update_item( Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} ) From 8d0bb4cb7d3685b5ac00e3941ac0478cffdc930b Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:21:59 -0400 Subject: [PATCH 16/25] fix path and add verbosity --- .github/workflows/aws_tests.yml | 2 +- tests/docker_rclone_with_config_cli.py | 3 ++- tests/test_on_data/test_yaml/yaml_aws_tools_tests.py | 5 ++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index e87a21aaf..189edf502 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -39,7 +39,7 @@ jobs: git config --global user.email "CI@example.com" git config --global user.name "CI Almighty" - - name: Install full requirements + - name: Install AWS requirements run: pip install .[aws,test] # TODO: when passing, reverse order of tests from minimal to data diff --git a/tests/docker_rclone_with_config_cli.py b/tests/docker_rclone_with_config_cli.py index ed472bdf2..9b1e265dd 100644 --- a/tests/docker_rclone_with_config_cli.py +++ b/tests/docker_rclone_with_config_cli.py @@ -61,7 +61,8 @@ def test_direct_usage_of_rclone_with_config(self): os.environ["RCLONE_CONFIG"] = rclone_config_file_stream os.environ["RCLONE_COMMAND"] = ( - f"rclone copy test_google_drive_remote:testing_rclone_with_config {self.test_folder} --verbose --progress --config ./rclone.conf" + f"rclone copy test_google_drive_remote:testing_rclone_with_config {self.test_folder} " + "--verbose --progress --config ./rclone.conf" ) command = ( diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index d871237f5..9fd5aee45 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -97,7 +97,10 @@ def test_rclone_transfer_batch_job(self): ) efs_client = self.efs_client - rclone_command = "rclone copy test_google_drive_remote:testing_rclone_spikeglx /mnt/efs --config ./rclone.conf" + rclone_command = ( + "rclone copy test_google_drive_remote:testing_rclone_spikeglx_and_phy /mnt/efs " + "--verbose --progress --config ./rclone.conf" # TODO: should just include this in helper function? + ) rclone_config_file_path = self.test_config_file_path today = datetime.datetime.now().date().isoformat() From a388bd1b8e5836943a1d02e44beec69a703511d5 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:28:58 -0400 Subject: [PATCH 17/25] fix teardown --- tests/test_on_data/test_yaml/yaml_aws_tools_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 9fd5aee45..8812feb78 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -71,7 +71,7 @@ def tearDown(self): # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume # TODO: cleanup job definitions? (since built daily) - mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + mount_targets = efs_client.describe_mount_targets(FileSystemId=self.efs_id) for mount_target in mount_targets["MountTargets"]: efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) @@ -146,7 +146,7 @@ def test_rclone_transfer_batch_job(self): ] assert len(matching_efs_volumes) == 1 efs_volume = matching_efs_volumes[0] - efs_id = efs_volume["FileSystemId"] + self.efs_id = efs_volume["FileSystemId"] # Check normal job completion assert job["jobName"] == job_name From 8fb3811885d08c8dc959c963264232521d9891ef Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:38:27 -0400 Subject: [PATCH 18/25] fix teardown --- tests/test_on_data/test_yaml/yaml_aws_tools_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 8812feb78..7ea49e644 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -76,7 +76,7 @@ def tearDown(self): efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) time.sleep(60) - efs_client.delete_file_system(FileSystemId=efs_id) + efs_client.delete_file_system(FileSystemId=self.efs_id) def test_rclone_transfer_batch_job(self): region = self.region From 41f315522731ac7380b34917ec8068840c55d698 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:43:43 -0400 Subject: [PATCH 19/25] remove creation date on job definition --- src/neuroconv/tools/aws/_submit_aws_batch_job.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index caaced56f..748f25399 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -296,7 +296,7 @@ def _ensure_compute_environment_exists( The AWS Batch client to use for the job. max_retries : int, default: 12 If the compute environment does not already exist, then this is the maximum number of times to synchronously - check for its successful creation before erroring. + check for its successful creation before raising an error. This is essential for a clean setup of the entire pipeline, or else later steps might error because they tried to launch before the compute environment was ready. """ @@ -546,7 +546,6 @@ def _generate_job_definition_name( job_definition_name += f"_{efs_id}" if docker_tag is None or docker_tag == "latest": date = datetime.now().strftime("%Y-%m-%d") - job_definition_name += f"_created-on-{date}" return job_definition_name From ff030dff55c6016e468b5ab8098a295fb896aefd Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 11:50:29 -0400 Subject: [PATCH 20/25] try normal credentials --- .github/workflows/aws_tests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index 189edf502..97a97c552 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -11,9 +11,9 @@ concurrency: # Cancel previous workflows on the same pull request env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN_CODY }} - RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN_CODY }} - RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN_CODY }} + RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }} + RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }} + RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} jobs: @@ -43,8 +43,8 @@ jobs: run: pip install .[aws,test] # TODO: when passing, reverse order of tests from minimal to data - - name: Run subset of tests that use S3 live services + - name: Run RClone on AWS tests run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools_tests.py - - name: Run subset of tests that use S3 live services + - name: Run generic AWS tests run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools_tests.py From cdb7f5f355e11da6a8082571dbc2f68dfba63b43 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:11:08 -0400 Subject: [PATCH 21/25] split workflows --- .github/workflows/generic_aws_tests.yml | 42 +++++++++++++++++++ .../{aws_tests.yml => rclone_aws_tests.yml} | 6 +-- 2 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/generic_aws_tests.yml rename .github/workflows/{aws_tests.yml => rclone_aws_tests.yml} (87%) diff --git a/.github/workflows/generic_aws_tests.yml b/.github/workflows/generic_aws_tests.yml new file mode 100644 index 000000000..20886a178 --- /dev/null +++ b/.github/workflows/generic_aws_tests.yml @@ -0,0 +1,42 @@ +name: AWS Tests +on: + schedule: + - cron: "0 16 * * 1" # Weekly at noon on Monday + workflow_dispatch: + +concurrency: # Cancel previous workflows on the same pull request + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + +jobs: + run: + name: ${{ matrix.os }} Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.12"] + os: [ubuntu-latest] + steps: + - uses: actions/checkout@v4 + - run: git fetch --prune --unshallow --tags + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Global Setup + run: | + python -m pip install -U pip # Official recommended way + git config --global user.email "CI@example.com" + git config --global user.name "CI Almighty" + + - name: Install AWS requirements + run: pip install .[aws,test] + + - name: Run generic AWS tests + run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools_tests.py diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/rclone_aws_tests.yml similarity index 87% rename from .github/workflows/aws_tests.yml rename to .github/workflows/rclone_aws_tests.yml index 97a97c552..5003ec710 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/rclone_aws_tests.yml @@ -1,4 +1,4 @@ -name: AWS Tests +name: Rclone AWS Tests on: schedule: - cron: "0 16 * * 1" # Weekly at noon on Monday @@ -42,9 +42,5 @@ jobs: - name: Install AWS requirements run: pip install .[aws,test] - # TODO: when passing, reverse order of tests from minimal to data - name: Run RClone on AWS tests run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools_tests.py - - - name: Run generic AWS tests - run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools_tests.py From 750034ddb6df78894bb821633a057ed35dc7207e Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:11:38 -0400 Subject: [PATCH 22/25] add initial push trigger --- .github/workflows/rclone_aws_tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rclone_aws_tests.yml b/.github/workflows/rclone_aws_tests.yml index 5003ec710..fa704a9ab 100644 --- a/.github/workflows/rclone_aws_tests.yml +++ b/.github/workflows/rclone_aws_tests.yml @@ -3,6 +3,7 @@ on: schedule: - cron: "0 16 * * 1" # Weekly at noon on Monday workflow_dispatch: + push: concurrency: # Cancel previous workflows on the same pull request group: ${{ github.workflow }}-${{ github.ref }} From 860ad7ff654bf2d3ae662234e3297aed0098c5bb Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:24:36 -0400 Subject: [PATCH 23/25] undo push --- .github/workflows/rclone_aws_tests.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/rclone_aws_tests.yml b/.github/workflows/rclone_aws_tests.yml index fa704a9ab..bcfbeb5c7 100644 --- a/.github/workflows/rclone_aws_tests.yml +++ b/.github/workflows/rclone_aws_tests.yml @@ -1,9 +1,8 @@ name: Rclone AWS Tests on: schedule: - - cron: "0 16 * * 1" # Weekly at noon on Monday + - cron: "0 16 * * 2" # Weekly at noon on Tuesday workflow_dispatch: - push: concurrency: # Cancel previous workflows on the same pull request group: ${{ github.workflow }}-${{ github.ref }} From 7af57d40cf8977654c92236c0159ed154cd12a16 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:26:07 -0400 Subject: [PATCH 24/25] restore normal trigger --- .github/workflows/deploy-tests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 85d7546d8..606cd4910 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -2,11 +2,11 @@ name: Deploy tests on: # TODO: disabled to save resources -# pull_request: -# types: [synchronize, opened, reopened, ready_for_review] -# # Synchronize, open and reopened are the default types for pull request -# # We add ready_for_review to trigger the check for changelog and full tests when ready for review is clicked -# merge_group: + pull_request: + types: [synchronize, opened, reopened, ready_for_review] + # Synchronize, open and reopened are the default types for pull request + # We add ready_for_review to trigger the check for changelog and full tests when ready for review is clicked + merge_group: workflow_dispatch: concurrency: From c58649aac9f6027f993fbce10c26b51f30bb5ab8 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 28 Sep 2024 12:26:40 -0400 Subject: [PATCH 25/25] remove comment --- .github/workflows/deploy-tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 606cd4910..4f67d15de 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -1,7 +1,6 @@ name: Deploy tests on: - # TODO: disabled to save resources pull_request: types: [synchronize, opened, reopened, ready_for_review] # Synchronize, open and reopened are the default types for pull request