From f96652bed917aa1ba35b43bd7f66ac602ef3e9e7 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 27 Mar 2023 05:57:15 -0400 Subject: [PATCH 01/54] added helper function --- src/neuroconv/tools/data_transfers.py | 154 ++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/src/neuroconv/tools/data_transfers.py b/src/neuroconv/tools/data_transfers.py index 66dc42e88..cc1607369 100644 --- a/src/neuroconv/tools/data_transfers.py +++ b/src/neuroconv/tools/data_transfers.py @@ -9,6 +9,8 @@ from time import sleep, time from typing import Dict, List, Optional, Tuple, Union from warnings import warn +from uuid import uuid4 +from datetime import datetime import psutil from dandi.download import download as dandi_download @@ -17,6 +19,7 @@ from pynwb import NWBHDF5IO from tqdm import tqdm +from ..tools import get_package from ..utils import FolderPathType, OptionalFolderPathType try: # pragma: no cover @@ -370,3 +373,154 @@ def automatic_dandi_upload( rmtree(path=nwb_folder_path) except PermissionError: # pragma: no cover warn("Unable to clean up source files and dandiset! Please manually delete them.") + + +def submit_aws_batch_job( + job_name: str, + docker_container: str, + status_tracker_table_name: str = "neuroconv_batch_status_tracker", + iam_role_name: str = "neuroconv_batch_role", + compute_environment_name: str = "neuroconv_batch_environment", + job_queue_name: str = "neuroconv_batch_queue", +) -> None: + assert ( + "RCLONE_CREDENTIALS" in os.environ + ), "You must set your rclone credentials as the environment variable 'RCLONE_CREDENTIALS' to submit this job!" + assert ( + "DANDI_API_KEY" in os.environ + ), "You must set your DANDI API key as the environment variable 'DANDI_API_KEY' to submit this job!" + + import boto3 + + region = "us-east-2" # TODO, maybe control AWS region? Technically not required if user has it set in credentials + + dynamodb_client = boto3.client("dynamodb", region) + dynamodb_resource = boto3.resource("dynamodb", region) + iam_client = boto3.client("iam", region) + batch_client = boto3.client("batch", region) + + # It is extremely useful to have a status tracker that is separate from the job environment + # Technically detailed logs of inner workings are given in the CloudWatch, but that can only really be + # analyzed from the AWS web console + current_tables = dynamodb_client.list_tables()["TableNames"] + if status_tracker_table_name not in current_tables: + table = dynamodb_resource.create_table( + TableName=status_tracker_table_name, + KeySchema=[dict(AttributeName="id", KeyType="HASH")], + AttributeDefinitions=[dict(AttributeName="id", AttributeType="S")], + ProvisionedThroughput=dict(ReadCapacityUnits=1, WriteCapacityUnits=1), + ) + else: + table = dynamodb_resource.Table(name=status_tracker_table_name) + + # Ensure role policy is set + current_roles = [role["RoleName"] for role in iam_client.list_roles()["Roles"]] + if iam_role_name not in current_roles: + assume_role_policy = dict( + Version="2012-10-17", + Statement=[ + dict(Effect="Allow", Principal=dict(Service="ecs-tasks.amazonaws.com"), Action="sts:AssumeRole") + ], + ) + + role = iam_client.create_role(RoleName=iam_role_name, AssumeRolePolicyDocument=json.dumps(assume_role_policy)) + iam_client.attach_role_policy( + RoleName=role["Role"]["RoleName"], PolicyArn="arn:aws:iam::aws:policy/AmazonDynamoDBFullAccess" + ) + iam_client.attach_role_policy( + RoleName=role["Role"]["RoleName"], PolicyArn="arn:aws:iam::aws:policy/CloudWatchFullAccess" + ) + else: + role = iam_client.get_role(RoleName=iam_role_name) + + # Ensure compute environment is setup + current_compute_environments = [ + environment["computeEnvironmentName"] + for environment in batch_client.describe_compute_environments()["computeEnvironments"] + ] + if compute_environment_name not in current_compute_environments: + batch_client.create_compute_environment( + computeEnvironmentName=compute_environment_name, + type="MANAGED", + state="ENABLED", + computeResources={ + "type": "EC2", + "allocationStrategy": "BEST_FIT", + "minvCpus": 0, # TODO, control + "maxvCpus": 256, # TODO, control + "subnets": ["subnet-0be50d51", "subnet-3fd16f77", "subnet-0092132b"], + "instanceRole": "ecsInstanceRole", + "securityGroupIds": ["sg-851667c7"], + "instanceTypes": ["optimal"], + }, + ) + + # Ensure job queue exists + current_job_queues = [queue["jobQueueName"] for queue in batch_client.describe_job_queues()["jobQueues"]] + if job_queue_name not in current_job_queues: + batch_client.create_job_queue( + jobQueueName=job_queue_name, + state="ENABLED", + priority=1, + computeEnvironmentOrder=[ + dict(order=100, computeEnvironment="dynamodb_import_environment"), + ], + ) + + # Ensure job definition exists + job_definition = f"neuroconv_batch_{docker_container}" # Keep unique by incorporating name of container + current_job_definitions = [ + definition["jobDefinitionName"] for definition in batch_client.describe_job_queues()["jobDefinitions"] + ] + if job_definition not in current_job_definitions: + batch_client.register_job_definition( + jobDefinitionName=job_definition, + type="container", + containerProperties=dict( + image=docker_container, + memory=256, # TODO, control + vcpus=16, # TODO, control + jobRoleArn=role["Role"]["Arn"], + executionRoleArn=role["Role"]["Arn"], + environment=[ + dict( + name="AWS_DEFAULT_REGION", + value=region, + ) + ], + ), + ) + else: + # TODO: would also need to check that memory/vcpu values resolve with previously defined name + pass + + # Submit job and update status tracker + currently_running_jobs = batch_client.list_jobs(jobQueue=job_queue_name) + if job_name not in currently_running_jobs: + batch_client.submit_job( + jobQueue=job_queue_name, + jobDefinition=job_definition, + jobName=job_name, + containerOverrides=dict( + environment=[ # These are environment variables + dict( # The burden is on the calling script to update the table status to finished + name="STATUS_TRACKER_TABLE_NAME", + value=status_tracker_table_name, + ), + dict( # For rclone transfers + name="RCLONE_CREDENTIALS", + value=os.environ["RCLONE_CREDENTIALS"], + ), + dict( # For rclone transfers + name="DANDI_API_KEY", + value=os.environ["DANDI_API_KEY"], + ), + ] + ), + ) + table.put_item(Item=dict(id=uuid4(), job_name=job_name, submitted_on=datetime.now(), status="submitted")) + else: + raise ValueError( + f"There is already a job named '{job_name}' running in the queue! " + "If you are submitting multiple jobs, each will need a unique name." + ) From da3ee441eb88167c46ea8a219c00c11131df06c2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 27 Mar 2023 20:01:45 +0000 Subject: [PATCH 02/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/tools/data_transfers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/data_transfers.py b/src/neuroconv/tools/data_transfers.py index cc1607369..708a934d4 100644 --- a/src/neuroconv/tools/data_transfers.py +++ b/src/neuroconv/tools/data_transfers.py @@ -3,14 +3,14 @@ import os import re import subprocess +from datetime import datetime from pathlib import Path from shutil import rmtree from tempfile import mkdtemp from time import sleep, time from typing import Dict, List, Optional, Tuple, Union -from warnings import warn from uuid import uuid4 -from datetime import datetime +from warnings import warn import psutil from dandi.download import download as dandi_download From aa3619ad0e5eb49b43f53140f2b97b1ec3bec273 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 29 Mar 2023 16:10:04 -0400 Subject: [PATCH 03/54] remake dockerfile; add dandi upload to YAML --- dockerfiles/dockerfile_neuroconv_with_rclone | 9 ++++ src/neuroconv/tools/data_transfers.py | 18 +------ .../yaml_conversion_specification/__init__.py | 2 +- .../yaml_conversion_specification.py | 54 ++++++++++++++++++- 4 files changed, 65 insertions(+), 18 deletions(-) create mode 100644 dockerfiles/dockerfile_neuroconv_with_rclone diff --git a/dockerfiles/dockerfile_neuroconv_with_rclone b/dockerfiles/dockerfile_neuroconv_with_rclone new file mode 100644 index 000000000..8fa339dbc --- /dev/null +++ b/dockerfiles/dockerfile_neuroconv_with_rclone @@ -0,0 +1,9 @@ +FROM rclone/rclone:latest +RUN $CONFIG_STREAM > ./rclone.conf +RUN $RCLONE_COMMANNDS + +FROM ghcr.io/catalystneuro/neuroconv:latest +LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv_with_rclone +ENV YAML_FILE /usr/local/bin/this_image_yaml_conversion.yml +RUN $YAML_STREAM > $YAML_FILE +ENTRYPOINT neuroconv $YAML_FILE --upload-to-dandiset $DANDISET_ID diff --git a/src/neuroconv/tools/data_transfers.py b/src/neuroconv/tools/data_transfers.py index 708a934d4..991d037e4 100644 --- a/src/neuroconv/tools/data_transfers.py +++ b/src/neuroconv/tools/data_transfers.py @@ -378,6 +378,7 @@ def automatic_dandi_upload( def submit_aws_batch_job( job_name: str, docker_container: str, + environment_variables: List[Dict[str, str]], status_tracker_table_name: str = "neuroconv_batch_status_tracker", iam_role_name: str = "neuroconv_batch_role", compute_environment_name: str = "neuroconv_batch_environment", @@ -501,22 +502,7 @@ def submit_aws_batch_job( jobQueue=job_queue_name, jobDefinition=job_definition, jobName=job_name, - containerOverrides=dict( - environment=[ # These are environment variables - dict( # The burden is on the calling script to update the table status to finished - name="STATUS_TRACKER_TABLE_NAME", - value=status_tracker_table_name, - ), - dict( # For rclone transfers - name="RCLONE_CREDENTIALS", - value=os.environ["RCLONE_CREDENTIALS"], - ), - dict( # For rclone transfers - name="DANDI_API_KEY", - value=os.environ["DANDI_API_KEY"], - ), - ] - ), + containerOverrides=dict(environment=environment_variables), # TODO - auto inject status tracker? ) table.put_item(Item=dict(id=uuid4(), job_name=job_name, submitted_on=datetime.now(), status="submitted")) else: diff --git a/src/neuroconv/tools/yaml_conversion_specification/__init__.py b/src/neuroconv/tools/yaml_conversion_specification/__init__.py index 4cad63e60..eae42cc48 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/__init__.py +++ b/src/neuroconv/tools/yaml_conversion_specification/__init__.py @@ -1 +1 @@ -from .yaml_conversion_specification import run_conversion_from_yaml +from .yaml_conversion_specification import run_conversion_from_yaml, deploy_conversion_from_yaml diff --git a/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py index b73e1e3ee..89df98072 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py @@ -1,13 +1,17 @@ import sys +import os from importlib import import_module from pathlib import Path -from typing import Optional +from typing import Optional, Literal +from uuid import uuid4 import click from dandi.metadata import _get_pynwb_metadata from dandi.organize import create_unique_filenames_from_metadata from jsonschema import RefResolver, validate +from pydantic import FilePath +from ..data_transfers import submit_aws_batch_job, automatic_dandi_upload from ...nwbconverter import NWBConverter from ...utils import FilePathType, FolderPathType, dict_deep_update, load_dict_from_file @@ -25,11 +29,17 @@ help="Path to folder where you want to save the output NWBFile.", type=click.Path(writable=True), ) +@click.option( + "--upload-to-dandiset", + help="Do you want to upload the result to DANDI? Specify the dandiset ID if so. " + "Also ensure you have your DANDI_API_KEY set as an environment variable.", +) @click.option("--overwrite", help="Overwrite an existing NWBFile at the location.", is_flag=True) def run_conversion_from_yaml_cli( specification_file_path: str, data_folder_path: Optional[str] = None, output_folder_path: Optional[str] = None, + upload_to_dandiset: Optional[str] = None, overwrite: bool = False, ): """ @@ -42,6 +52,7 @@ def run_conversion_from_yaml_cli( specification_file_path=specification_file_path, data_folder_path=data_folder_path, output_folder_path=output_folder_path, + upload_to_dandiset=upload_to_dandiset, overwrite=overwrite, ) @@ -50,6 +61,7 @@ def run_conversion_from_yaml( specification_file_path: FilePathType, data_folder_path: Optional[FolderPathType] = None, output_folder_path: Optional[FolderPathType] = None, + upload_to_dandiset: Optional[str] = None, overwrite: bool = False, ): """ @@ -69,6 +81,13 @@ def run_conversion_from_yaml( If True, replaces any existing NWBFile at the nwbfile_path location, if save_to_file is True. If False, appends the existing NWBFile at the nwbfile_path location, if save_to_file is True. """ + # This check is technically a part of the automatic dandi upload, but good to check as early as possible + # to avoid wasting time. + if upload_to_dandiset: + assert os.getenv("DANDI_API_KEY"), ( + "Unable to find environment variable 'DANDI_API_KEY'. " + "Please retrieve your token from DANDI and set this environment variable." + ) if data_folder_path is None: data_folder_path = Path(specification_file_path).parent @@ -140,3 +159,36 @@ def run_conversion_from_yaml( dandi_filename != ".nwb" ), f"Not enough metadata available to assign name to {str(named_dandi_metadata['path'])}!" named_dandi_metadata["path"].rename(str(output_folder_path / dandi_filename)) + + if upload_to_dandiset: + staging = int(upload_to_dandiset) < 100000 # temporary heuristic + automatic_dandi_upload(dandiset_id=upload_to_dandiset, nwb_folder_path=output_folder_path, staging=staging) + + +def deploy_conversion_from_yaml_and_upload_to_dandi( # the 'and upload to DANDI' part should ultimately be embedded in the YAML syntax + specification_file_path: FilePath, + dandiset_id: str, + transfer_method: Literal["rclone"], + transfer_commands: str, + transfer_config_file_path: FilePath, +): + job_name = Path(specification_file_path).stem + uuid4() + with open(file=specification_file_path) as file: + yaml_as_string = "".join(file.readlines()) # Loaded as raw string, not as dict + + if transfer_method == "rclone": + docker_container = "ghcr.io/catalystneuro/neuroconv:with_rclone" + with open(file=transfer_config_file_path) as file: + config_as_string = "".join(file.readlines()) # TODO - check newlines are OK cross-platform + environment_variables = [ + dict(name="CONFIG_STREAM", value=config_as_string), + dict(name="RCLONE_COMMANDS", value=transfer_commands), + dict(name="YAML_STREAM", value=yaml_as_string), + dict(name="DANDI_API_KEY", value=os.envion["DANDI_API_KEY"]), + dict(name="DANDISET_ID", value=dandiset_id), + ] + else: + raise NotImplementedError(f"The transfer method {transfer_method} is not yet supported!") + submit_aws_batch_job( + job_name=job_name, docker_container=docker_container, environment_variables=environment_variables + ) # TODO - setup something here with status tracker table - might have to be on YAML level From 1ae80346a11f63cbab9510b1ec8adcea2b828a20 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 2 Apr 2023 17:21:32 -0400 Subject: [PATCH 04/54] debugged --- src/neuroconv/tools/data_transfers.py | 93 ++++++++++--------- .../yaml_conversion_specification/__init__.py | 2 +- .../yaml_conversion_specification.py | 10 +- 3 files changed, 58 insertions(+), 47 deletions(-) diff --git a/src/neuroconv/tools/data_transfers.py b/src/neuroconv/tools/data_transfers.py index 991d037e4..554efb48b 100644 --- a/src/neuroconv/tools/data_transfers.py +++ b/src/neuroconv/tools/data_transfers.py @@ -378,19 +378,13 @@ def automatic_dandi_upload( def submit_aws_batch_job( job_name: str, docker_container: str, + efs_name: str, environment_variables: List[Dict[str, str]], status_tracker_table_name: str = "neuroconv_batch_status_tracker", iam_role_name: str = "neuroconv_batch_role", compute_environment_name: str = "neuroconv_batch_environment", job_queue_name: str = "neuroconv_batch_queue", ) -> None: - assert ( - "RCLONE_CREDENTIALS" in os.environ - ), "You must set your rclone credentials as the environment variable 'RCLONE_CREDENTIALS' to submit this job!" - assert ( - "DANDI_API_KEY" in os.environ - ), "You must set your DANDI API key as the environment variable 'DANDI_API_KEY' to submit this job!" - import boto3 region = "us-east-2" # TODO, maybe control AWS region? Technically not required if user has it set in credentials @@ -399,6 +393,7 @@ def submit_aws_batch_job( dynamodb_resource = boto3.resource("dynamodb", region) iam_client = boto3.client("iam", region) batch_client = boto3.client("batch", region) + efs_client = boto3.client("efs", region) # It is extremely useful to have a status tracker that is separate from the job environment # Technically detailed logs of inner workings are given in the CloudWatch, but that can only really be @@ -435,6 +430,7 @@ def submit_aws_batch_job( role = iam_client.get_role(RoleName=iam_role_name) # Ensure compute environment is setup + # Note that it's easier to do this through the UI current_compute_environments = [ environment["computeEnvironmentName"] for environment in batch_client.describe_compute_environments()["computeEnvironments"] @@ -445,14 +441,17 @@ def submit_aws_batch_job( type="MANAGED", state="ENABLED", computeResources={ - "type": "EC2", - "allocationStrategy": "BEST_FIT", - "minvCpus": 0, # TODO, control - "maxvCpus": 256, # TODO, control - "subnets": ["subnet-0be50d51", "subnet-3fd16f77", "subnet-0092132b"], - "instanceRole": "ecsInstanceRole", - "securityGroupIds": ["sg-851667c7"], + "type": "SPOT", "instanceTypes": ["optimal"], + "allocationStrategy": "BEST_FIT_PROGRESSIVE", + "minvCpus": 0, + "maxvCpus": 256, + "instanceRole": "ecsInstanceRole", + "subnets": ["subnet-0890a93aedb42e73e", "subnet-0680e07980538b786", "subnet-0e20bbcfb951b5387"], + "securityGroupIds": [ + # "sg-0ad453a713c5b5580", + "sg-001699e5b7496b226", + ], }, ) @@ -464,47 +463,55 @@ def submit_aws_batch_job( state="ENABLED", priority=1, computeEnvironmentOrder=[ - dict(order=100, computeEnvironment="dynamodb_import_environment"), + dict(order=100, computeEnvironment=compute_environment_name), ], ) - # Ensure job definition exists - job_definition = f"neuroconv_batch_{docker_container}" # Keep unique by incorporating name of container - current_job_definitions = [ - definition["jobDefinitionName"] for definition in batch_client.describe_job_queues()["jobDefinitions"] - ] - if job_definition not in current_job_definitions: - batch_client.register_job_definition( - jobDefinitionName=job_definition, - type="container", - containerProperties=dict( - image=docker_container, - memory=256, # TODO, control - vcpus=16, # TODO, control - jobRoleArn=role["Role"]["Arn"], - executionRoleArn=role["Role"]["Arn"], - environment=[ - dict( - name="AWS_DEFAULT_REGION", - value=region, - ) - ], - ), - ) - else: - # TODO: would also need to check that memory/vcpu values resolve with previously defined name - pass + # Create unique EFS volume - having some trouble with automatically doing this ATM + # efs_client.create_file_system(name=job_name) # TODO, decide how best to perform cleanup + # efs_name = "test_efs" + + # Always re-register the job; if the name already exists, it will count as a 'revision' + batch_client.register_job_definition( + jobDefinitionName=job_name, + type="container", + containerProperties=dict( + image=docker_container, + # instanceType="t2.small", + memory=16 * 1024, # TODO, also conflicting info on if its MiB or GiB; confirmed, via boto3 it's in MiB + vcpus=4, + jobRoleArn=role["Role"]["Arn"], + executionRoleArn=role["Role"]["Arn"], + environment=[ + dict( + name="AWS_DEFAULT_REGION", + value=region, + ) + ], + volumes=[ + # {"name": efs_name, "host": {"sourcePath": "/mnt/data"}}, + { + "name": efs_name, + # "host": {"sourcePath": "/mnt/data"}, + "efsVolumeConfiguration": {"fileSystemId": "fs-0ab4d92c222097625"}, # "rootDirectory": ""}, + } + ], + mountPoints=[{"sourceVolume": efs_name, "containerPath": "/mnt/data", "readOnly": False}], + ), + ) # Submit job and update status tracker currently_running_jobs = batch_client.list_jobs(jobQueue=job_queue_name) if job_name not in currently_running_jobs: batch_client.submit_job( jobQueue=job_queue_name, - jobDefinition=job_definition, + jobDefinition=job_name, jobName=job_name, containerOverrides=dict(environment=environment_variables), # TODO - auto inject status tracker? ) - table.put_item(Item=dict(id=uuid4(), job_name=job_name, submitted_on=datetime.now(), status="submitted")) + table.put_item( + Item=dict(id=str(uuid4()), job_name=job_name, submitted_on=str(datetime.now()), status="submitted") + ) else: raise ValueError( f"There is already a job named '{job_name}' running in the queue! " diff --git a/src/neuroconv/tools/yaml_conversion_specification/__init__.py b/src/neuroconv/tools/yaml_conversion_specification/__init__.py index eae42cc48..aca3b40e9 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/__init__.py +++ b/src/neuroconv/tools/yaml_conversion_specification/__init__.py @@ -1 +1 @@ -from .yaml_conversion_specification import run_conversion_from_yaml, deploy_conversion_from_yaml +from .yaml_conversion_specification import run_conversion_from_yaml, deploy_conversion_from_yaml_and_upload_to_dandi diff --git a/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py index 89df98072..3f19c697e 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py @@ -167,12 +167,13 @@ def run_conversion_from_yaml( def deploy_conversion_from_yaml_and_upload_to_dandi( # the 'and upload to DANDI' part should ultimately be embedded in the YAML syntax specification_file_path: FilePath, + efs_name: str, dandiset_id: str, transfer_method: Literal["rclone"], transfer_commands: str, transfer_config_file_path: FilePath, ): - job_name = Path(specification_file_path).stem + uuid4() + job_name = Path(specification_file_path).stem + "-" + str(uuid4()) with open(file=specification_file_path) as file: yaml_as_string = "".join(file.readlines()) # Loaded as raw string, not as dict @@ -184,11 +185,14 @@ def deploy_conversion_from_yaml_and_upload_to_dandi( # the 'and upload to DANDI dict(name="CONFIG_STREAM", value=config_as_string), dict(name="RCLONE_COMMANDS", value=transfer_commands), dict(name="YAML_STREAM", value=yaml_as_string), - dict(name="DANDI_API_KEY", value=os.envion["DANDI_API_KEY"]), + dict(name="DANDI_API_KEY", value=os.environ["DANDI_API_KEY"]), dict(name="DANDISET_ID", value=dandiset_id), ] else: raise NotImplementedError(f"The transfer method {transfer_method} is not yet supported!") submit_aws_batch_job( - job_name=job_name, docker_container=docker_container, environment_variables=environment_variables + job_name=job_name, + docker_container=docker_container, + efs_name=efs_name, + environment_variables=environment_variables, ) # TODO - setup something here with status tracker table - might have to be on YAML level From 8f50c80cc7f25ede65d9ebbc5669f3c1ce08c268 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Sun, 2 Apr 2023 17:52:54 -0400 Subject: [PATCH 05/54] Create aws_batch_deployment.rst --- docs/developer_guide/aws_batch_deployment.rst | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 docs/developer_guide/aws_batch_deployment.rst diff --git a/docs/developer_guide/aws_batch_deployment.rst b/docs/developer_guide/aws_batch_deployment.rst new file mode 100644 index 000000000..cb4d5539b --- /dev/null +++ b/docs/developer_guide/aws_batch_deployment.rst @@ -0,0 +1,168 @@ +One way of deploying items on AWS Batch is to manually setup the entire workflow through AWS web UI, and to manually submit each jobs in that manner. + +Deploying hundreds of jobs in this way would be cumbersome. + +Here are two other methods that allow simpler deployment by using `boto3` + + +Semi-automated Deployment of NeuroConv on AWS Batch +--------------------------------------------------- + +Step 1: Transfer data to Elastic File System (EFS) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The nice thing about using EFS is that we are only ever billed for our literal amount of disk storage over time, and do not need to specify a particular fixed allocation or scaling stategy. + +It is also relatively easy to mount across multiple AWS Batch jobs simultaneously. + +Unfortunately, the one downside is that it's pricing per GB-month is significantly higher than either S3 or EBS. + +To easily transfer data from a Google Drive (or theoretically any backend supported by `rclone`), set the following environment variables for rclone credentials: `DRIVE_NAME`, `TOKEN`, `REFRESH_TOKEN`, and `EXPIRY`. + +.. note: + + I eventually hope to just be able to read and pass these directly from a local `rclone.conf` file, but + +.. note: + + All path references must point to `/mnt/data/` as the base in order to persist across jobs. + +.. code: + + import os + from datetime import datetime + + from neuroconv.tools.data_transfers import submit_aws_batch_job + + job_name = "" + docker_container = "ghcr.io/catalystneuro/rclone_auto_config:latest" + efs_name = "" + + log_datetime = str(datetime.now()).replace(" ", ":") # no spaces in CLI + RCLONE_COMMAND = f"{os.environ['RCLONE_COMMAND']} -v --config /mnt/data/rclone.conf --log-file /mnt/data/submit-{log_datetime}.txt" + + environment_variables = [ + dict(name="DRIVE_NAME", value=os.environ["DRIVE_NAME"]), + dict(name="TOKEN", value=os.environ["TOKEN"]), + dict(name="REFRESH_TOKEN", value=os.environ["REFRESH_TOKEN"]), + dict(name="EXPIRY", value=os.environ["EXPIRY"]), + dict(name="RCLONE_COMMAND", value=RCLONE_COMMAND), + ] + + submit_aws_batch_job( + job_name=job_name, + docker_container=docker_container, + efs_name=efs_name, + environment_variables=environment_variables, + ) + + +An example `RCLONE_COMMAND` for a drive named 'MyDrive' and the GIN testing data stored under `/ephy_testing_data/spikeglx/Noise4Sam_g0/` of that drive would be + +.. code: + + RCLONE_COMMAND = f"sync MyDrive:/ephy_testing_data/spikeglx/Noise4Sam_g0 /mnt/data/Noise4Sam_g0" + + +Step 2: Run the YAML Conversion Specificattion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Continuing the example above, if we have the YAML file `test_batch.yml` + +.. code: + + metadata: + NWBFile: + lab: My Lab + institution: My Institution + + conversion_options: + stub_test: True + + data_interfaces: + ap: SpikeGLXRecordingInterface + lf: SpikeGLXRecordingInterface + + experiments: + ymaze: + metadata: + NWBFile: + session_description: Testing batch deployment. + + sessions: + - nwbfile_name: /mnt/data/test_batch_deployment.nwb + source_data: + ap: + file_path: /mnt/data/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin + lf: + file_path: /mnt/data/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin + metadata: + NWBFile: + session_id: test_batch_deployment + Subject: + subject_id: "1" + sex: F + age: P35D + species: Mus musculus + +then we can run the following stand-alone script to deploy the conversion after confirming Step 1 completed successfully. + +.. code: + + from neuroconv.tools.data_transfers import submit_aws_batch_job + + job_name = "" + docker_container = "ghcr.io/catalystneuro/neuroconv:dev_auto_yaml" + efs_name = "" + + yaml_file_path = "/path/to/test_batch.yml" + + with open(file=yaml_file_path) as file: + YAML_STREAM = "".join(file.readlines()).replace('"', "'") + + environment_variables = [dict(name="YAML_STREAM", value=YAML_STREAM)] + + submit_aws_batch_job( + job_name=job_name, + docker_container=docker_container, + efs_name=efs_name, + environment_variables=environment_variables, + ) + + +Step 3: Ensure File Cleanup +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +TODO: write a dockerfile to perform this step with the API + +It's a good idea to confirm that you have access to your EFS from on-demand resources in case you ever need to go in and perform a manual cleanup operation. + +Boot up a EC2 t2.micro instance using AWS Linux 2 image with minimal resources. + +Create 2 new security groups, `EFS Target` (no policies set) and `EFS Mount` (set inbound policy to NFS with the `EFS Target` as the source). + +On the EC2 instance, change the security group to the `EFS Target`. On the EFS Network settings, add the `EFS Mount` group. + +Connect to the EC2 instance and run + +.. code: + + mkdir ~/efs-mount-point # or any other name you want; I do recommend keeping this in the home directory (~) for ease of access though + sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport fs-.efs.us-east-2.amazonaws.com:/ ~/efs-mount-point # Note that any operations performed on contents of the mounted volume must utilize sudo + +and it _should_ work, but this step is known to have various issues. If you did everything exactly as illustrated above, hopefully it should work. At least it did on 4/2/2023. + +You can now read, write, and importantly delete any contents on the EFS. + +Until the automated DANDI upload is implemented in YAML functionality, you will need to use this method to manually remove the NWB file. + +Even after, you should double check to ensure the `cleanup=True` flag to that function properly executed. + + + +Fully Automated Deployment of NeuroConv on AWS Batch +---------------------------------------------------- + +Coming soon... + +Approach is essentially the same as the semi-automated, I just submit all jobs at the same time with the jobs being dependent on the completion of one another. From 901f1e17105fed9d2f08a10723d00feadd4f5982 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Sun, 2 Apr 2023 17:53:26 -0400 Subject: [PATCH 06/54] Delete dockerfile_neuroconv_with_rclone --- dockerfiles/dockerfile_neuroconv_with_rclone | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 dockerfiles/dockerfile_neuroconv_with_rclone diff --git a/dockerfiles/dockerfile_neuroconv_with_rclone b/dockerfiles/dockerfile_neuroconv_with_rclone deleted file mode 100644 index 8fa339dbc..000000000 --- a/dockerfiles/dockerfile_neuroconv_with_rclone +++ /dev/null @@ -1,9 +0,0 @@ -FROM rclone/rclone:latest -RUN $CONFIG_STREAM > ./rclone.conf -RUN $RCLONE_COMMANNDS - -FROM ghcr.io/catalystneuro/neuroconv:latest -LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv_with_rclone -ENV YAML_FILE /usr/local/bin/this_image_yaml_conversion.yml -RUN $YAML_STREAM > $YAML_FILE -ENTRYPOINT neuroconv $YAML_FILE --upload-to-dandiset $DANDISET_ID From d4ae252006387f2db66c4280f7f7c2538953c5bd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 2 Apr 2023 21:56:58 +0000 Subject: [PATCH 07/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/developer_guide/aws_batch_deployment.rst | 2 +- .../tools/yaml_conversion_specification/__init__.py | 5 ++++- .../yaml_conversion_specification.py | 6 +++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/developer_guide/aws_batch_deployment.rst b/docs/developer_guide/aws_batch_deployment.rst index cb4d5539b..393b81eae 100644 --- a/docs/developer_guide/aws_batch_deployment.rst +++ b/docs/developer_guide/aws_batch_deployment.rst @@ -21,7 +21,7 @@ To easily transfer data from a Google Drive (or theoretically any backend suppor .. note: - I eventually hope to just be able to read and pass these directly from a local `rclone.conf` file, but + I eventually hope to just be able to read and pass these directly from a local `rclone.conf` file, but .. note: diff --git a/src/neuroconv/tools/yaml_conversion_specification/__init__.py b/src/neuroconv/tools/yaml_conversion_specification/__init__.py index aca3b40e9..6f1daf35b 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/__init__.py +++ b/src/neuroconv/tools/yaml_conversion_specification/__init__.py @@ -1 +1,4 @@ -from .yaml_conversion_specification import run_conversion_from_yaml, deploy_conversion_from_yaml_and_upload_to_dandi +from .yaml_conversion_specification import ( + deploy_conversion_from_yaml_and_upload_to_dandi, + run_conversion_from_yaml, +) diff --git a/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py index 3f19c697e..6eaa6be59 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/yaml_conversion_specification.py @@ -1,8 +1,8 @@ -import sys import os +import sys from importlib import import_module from pathlib import Path -from typing import Optional, Literal +from typing import Literal, Optional from uuid import uuid4 import click @@ -11,7 +11,7 @@ from jsonschema import RefResolver, validate from pydantic import FilePath -from ..data_transfers import submit_aws_batch_job, automatic_dandi_upload +from ..data_transfers import automatic_dandi_upload, submit_aws_batch_job from ...nwbconverter import NWBConverter from ...utils import FilePathType, FolderPathType, dict_deep_update, load_dict_from_file From f1f7b9fc8110c8a411972365c373d106b45a7426 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Sun, 18 Feb 2024 18:44:42 +0000 Subject: [PATCH 08/54] typos and formatting --- docs/developer_guide/aws_batch_deployment.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/developer_guide/aws_batch_deployment.rst b/docs/developer_guide/aws_batch_deployment.rst index 393b81eae..5c2ad68de 100644 --- a/docs/developer_guide/aws_batch_deployment.rst +++ b/docs/developer_guide/aws_batch_deployment.rst @@ -11,7 +11,7 @@ Semi-automated Deployment of NeuroConv on AWS Batch Step 1: Transfer data to Elastic File System (EFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The nice thing about using EFS is that we are only ever billed for our literal amount of disk storage over time, and do not need to specify a particular fixed allocation or scaling stategy. +The nice thing about using EFS is that we are only ever billed for our literal amount of disk storage over time, and do not need to specify a particular fixed allocation or scaling strategy. It is also relatively easy to mount across multiple AWS Batch jobs simultaneously. @@ -27,7 +27,7 @@ To easily transfer data from a Google Drive (or theoretically any backend suppor All path references must point to `/mnt/data/` as the base in order to persist across jobs. -.. code: +.. code: python import os from datetime import datetime @@ -61,10 +61,10 @@ An example `RCLONE_COMMAND` for a drive named 'MyDrive' and the GIN testing data .. code: - RCLONE_COMMAND = f"sync MyDrive:/ephy_testing_data/spikeglx/Noise4Sam_g0 /mnt/data/Noise4Sam_g0" + RCLONE_COMMAND = "sync MyDrive:/ephy_testing_data/spikeglx/Noise4Sam_g0 /mnt/data/Noise4Sam_g0" -Step 2: Run the YAML Conversion Specificattion +Step 2: Run the YAML Conversion Specification ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Continuing the example above, if we have the YAML file `test_batch.yml` From 9213391b580d079bb1e73e6e4c153d1f936220d5 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 13:32:28 -0400 Subject: [PATCH 09/54] add changelog --- CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 509797542..735fe0a82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,9 @@ * The usage of `compression_options` directly through the `neuroconv.tools.ophys` submodule is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #940](https://github.com/catalystneuro/neuroconv/pull/940) * Removed the option of running `interface.run_conversion` without `nwbfile_path` argument . [PR #951](https://github.com/catalystneuro/neuroconv/pull/951) - - ### Features * Added docker image and tests for an automated Rclone configuration (with file stream passed via an environment variable). [PR #902](https://github.com/catalystneuro/neuroconv/pull/902) +* Added helper function `neuroconv.tools.data_transfers.submit_aws_batch_job` for performing cloud-based conversions on data that is accessible via `rclone`, and whose cost can be estimated with `neuroconv.tools.data_transfers.estimate_aws_batch_job_cost`. [PR #384](https://github.com/catalystneuro/neuroconv/pull/384) ### Bug fixes * Fixed the conversion option schema of a `SpikeGLXConverter` when used inside another `NWBConverter`. [PR #922](https://github.com/catalystneuro/neuroconv/pull/922) From a476ba79456c63dff569f1e64a40701847eb7e1c Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 13:56:36 -0400 Subject: [PATCH 10/54] correct merge conflict and changelog + imports --- CHANGELOG.md | 2 +- .../tools/data_transfers/__init__.py | 4 +- src/neuroconv/tools/data_transfers/_aws.py | 54 +++++++++---------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 735fe0a82..d7d87138f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Features * Added docker image and tests for an automated Rclone configuration (with file stream passed via an environment variable). [PR #902](https://github.com/catalystneuro/neuroconv/pull/902) -* Added helper function `neuroconv.tools.data_transfers.submit_aws_batch_job` for performing cloud-based conversions on data that is accessible via `rclone`, and whose cost can be estimated with `neuroconv.tools.data_transfers.estimate_aws_batch_job_cost`. [PR #384](https://github.com/catalystneuro/neuroconv/pull/384) +* Added helper function `neuroconv.tools.data_transfers.submit_aws_batch_job` for performing cloud-based conversions on data that is accessible via `rclone`, and runtime can be estimated with `neuroconv.tools.data_transfers.estimate_total_conversion_runtime`. [PR #384](https://github.com/catalystneuro/neuroconv/pull/384) ### Bug fixes * Fixed the conversion option schema of a `SpikeGLXConverter` when used inside another `NWBConverter`. [PR #922](https://github.com/catalystneuro/neuroconv/pull/922) diff --git a/src/neuroconv/tools/data_transfers/__init__.py b/src/neuroconv/tools/data_transfers/__init__.py index ee8ef8b63..5521b153e 100644 --- a/src/neuroconv/tools/data_transfers/__init__.py +++ b/src/neuroconv/tools/data_transfers/__init__.py @@ -1,12 +1,12 @@ """Collection of helper functions for assessing and performing automated data transfers.""" -from ._aws import estimate_s3_conversion_cost, submit_aws_batch_job, estimate_aws_batch_job_cost +from ._aws import estimate_s3_conversion_cost, submit_aws_batch_job, estimate_total_conversion_runtime from ._dandi import automatic_dandi_upload from ._globus import get_globus_dataset_content_sizes, transfer_globus_content from ._helpers import estimate_total_conversion_runtime __all__ = [ - "estimate_aws_batch_job_cost", + "estimate_total_conversion_runtime", "submit_aws_batch_job", "estimate_s3_conversion_cost", "automatic_dandi_upload", diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 89f317c28..410477bd4 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -1,7 +1,6 @@ """Collection of helper functions for assessing and performing automated data transfers related to AWS.""" import json -import os from datetime import datetime from uuid import uuid4 @@ -37,16 +36,15 @@ def estimate_s3_conversion_cost( return cost_mb_s * total_mb_s -def estimate_s3_conversion_cost( +def estimate_total_conversion_runtime( total_mb: float, transfer_rate_mb: float = 20.0, conversion_rate_mb: float = 17.0, - upload_rate_mb: float = 40.0, + upload_rate_mb: float = 40, compression_ratio: float = 1.7, ): """ - Estimate potential cost of performing an entire conversion on S3 using full automation. - + Estimate how long the combined process of data transfer, conversion, and upload is expected to take. Parameters ---------- total_mb: float @@ -57,36 +55,37 @@ def estimate_s3_conversion_cost( Estimate of the conversion rate for the data. Can vary widely depending on conversion options and type of data. Figure of 17MB/s is based on extensive compression of high-volume, high-resolution ecephys. upload_rate_mb : float, default: 40.0 - Estimate of the upload rate of a single file to the DANDI Archive. + Estimate of the upload rate of a single file to the DANDI archive. compression_ratio : float, default: 1.7 Estimate of the final average compression ratio for datasets in the file. Can vary widely. """ c = 1 / compression_ratio # compressed_size = total_size * c - total_mb_s = total_mb**2 / 2 * (1 / transfer_rate_mb + (2 * c + 1) / conversion_rate_mb + 2 * c**2 / upload_rate_mb) - cost_gb_m = 0.08 / 1e3 # $0.08 / GB Month - cost_mb_s = cost_gb_m / (1e3 * 2.628e6) # assuming 30 day month; unsure how amazon weights shorter months? - return cost_mb_s * total_mb_s + return total_mb * (1 / transfer_rate_mb + 1 / conversion_rate_mb + c / upload_rate_mb) def submit_aws_batch_job( job_name: str, docker_container: str, + # rclone_config_file_path: Optional[str] = None, + region: str = "us-east-2", status_tracker_table_name: str = "neuroconv_batch_status_tracker", iam_role_name: str = "neuroconv_batch_role", compute_environment_name: str = "neuroconv_batch_environment", job_queue_name: str = "neuroconv_batch_queue", ) -> None: - assert ( - "RCLONE_CREDENTIALS" in os.environ - ), "You must set your rclone credentials as the environment variable 'RCLONE_CREDENTIALS' to submit this job!" - assert ( - "DANDI_API_KEY" in os.environ - ), "You must set your DANDI API key as the environment variable 'DANDI_API_KEY' to submit this job!" + # assert ( + # "DANDI_API_KEY" in os.environ + # ), "You must set your DANDI API key as the environment variable 'DANDI_API_KEY' to submit this job!" + # + # default_rclone_config_file_path = pathlib.Path.home() / ".config" / "rclone" / "rclone.conf" + # rclone_config_file_path = rclone_config_file_path or default_rclone_config_file_path + # if not rclone_config_file_path.exists(): + # raise ValueError("You must configure rclone on your local system to submit this job!") + # with open(file=rclone_config_file_path, mode="r") as io: + # rclone_config_file_stream = io.read() import boto3 - region = "us-east-2" # TODO, maybe control AWS region? Technically not required if user has it set in credentials - dynamodb_client = boto3.client("dynamodb", region) dynamodb_resource = boto3.resource("dynamodb", region) iam_client = boto3.client("iam", region) @@ -195,19 +194,20 @@ def submit_aws_batch_job( jobDefinition=job_definition, jobName=job_name, containerOverrides=dict( - environment=[ # These are environment variables + # Set environment variables + environment=[ dict( # The burden is on the calling script to update the table status to finished name="STATUS_TRACKER_TABLE_NAME", value=status_tracker_table_name, ), - dict( # For rclone transfers - name="RCLONE_CREDENTIALS", - value=os.environ["RCLONE_CREDENTIALS"], - ), - dict( # For rclone transfers - name="DANDI_API_KEY", - value=os.environ["DANDI_API_KEY"], - ), + # dict( # For rclone transfers + # name="RCLONE_CREDENTIALS", + # value=os.environ["RCLONE_CREDENTIALS"], + # ), + # dict( # For rclone transfers + # name="DANDI_API_KEY", + # value=os.environ["DANDI_API_KEY"], + # ), ] ), ) From 4f6489d8ceabfdbd41a2930c136e514bed74ad25 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 14:06:10 -0400 Subject: [PATCH 11/54] format docstring --- src/neuroconv/tools/data_transfers/_aws.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 410477bd4..4df26fce1 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -45,6 +45,7 @@ def estimate_total_conversion_runtime( ): """ Estimate how long the combined process of data transfer, conversion, and upload is expected to take. + Parameters ---------- total_mb: float @@ -60,6 +61,7 @@ def estimate_total_conversion_runtime( Estimate of the final average compression ratio for datasets in the file. Can vary widely. """ c = 1 / compression_ratio # compressed_size = total_size * c + return total_mb * (1 / transfer_rate_mb + 1 / conversion_rate_mb + c / upload_rate_mb) From 766185f0f7da215e57ac7af2b0cad11acdbe782f Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 14:15:42 -0400 Subject: [PATCH 12/54] add changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7d87138f..cee0a8e8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Features * Added docker image and tests for an automated Rclone configuration (with file stream passed via an environment variable). [PR #902](https://github.com/catalystneuro/neuroconv/pull/902) -* Added helper function `neuroconv.tools.data_transfers.submit_aws_batch_job` for performing cloud-based conversions on data that is accessible via `rclone`, and runtime can be estimated with `neuroconv.tools.data_transfers.estimate_total_conversion_runtime`. [PR #384](https://github.com/catalystneuro/neuroconv/pull/384) +* Added helper function `neuroconv.tools.yaml_conversion_specification.deploy_conversion_from_yaml_and_upload_to_dandi` for performing cloud-based conversions on data that is accessible via `rclone`, and total runtime can be estimated with `neuroconv.tools.data_transfers.estimate_total_conversion_runtime`. [PR #393](https://github.com/catalystneuro/neuroconv/pull/393) ### Bug fixes * Fixed the conversion option schema of a `SpikeGLXConverter` when used inside another `NWBConverter`. [PR #922](https://github.com/catalystneuro/neuroconv/pull/922) From 9ae7ace238d38ca433d5467837dbd7b68275dc4c Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 14:16:38 -0400 Subject: [PATCH 13/54] adjust changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7d87138f..d79097dd3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Features * Added docker image and tests for an automated Rclone configuration (with file stream passed via an environment variable). [PR #902](https://github.com/catalystneuro/neuroconv/pull/902) -* Added helper function `neuroconv.tools.data_transfers.submit_aws_batch_job` for performing cloud-based conversions on data that is accessible via `rclone`, and runtime can be estimated with `neuroconv.tools.data_transfers.estimate_total_conversion_runtime`. [PR #384](https://github.com/catalystneuro/neuroconv/pull/384) +* Added helper function `neuroconv.tools.data_transfers.submit_aws_batch_job` for basic automated submission of AWS batch jobs. [PR #384](https://github.com/catalystneuro/neuroconv/pull/384) ### Bug fixes * Fixed the conversion option schema of a `SpikeGLXConverter` when used inside another `NWBConverter`. [PR #922](https://github.com/catalystneuro/neuroconv/pull/922) From c7fb810f27bddb0eda07edf0c866654c0186f5c0 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 14:18:20 -0400 Subject: [PATCH 14/54] split estimator to different PR --- .../tools/data_transfers/__init__.py | 3 +- src/neuroconv/tools/data_transfers/_aws.py | 29 ------------------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/__init__.py b/src/neuroconv/tools/data_transfers/__init__.py index 5521b153e..dd653cb05 100644 --- a/src/neuroconv/tools/data_transfers/__init__.py +++ b/src/neuroconv/tools/data_transfers/__init__.py @@ -1,12 +1,11 @@ """Collection of helper functions for assessing and performing automated data transfers.""" -from ._aws import estimate_s3_conversion_cost, submit_aws_batch_job, estimate_total_conversion_runtime +from ._aws import estimate_s3_conversion_cost, submit_aws_batch_job from ._dandi import automatic_dandi_upload from ._globus import get_globus_dataset_content_sizes, transfer_globus_content from ._helpers import estimate_total_conversion_runtime __all__ = [ - "estimate_total_conversion_runtime", "submit_aws_batch_job", "estimate_s3_conversion_cost", "automatic_dandi_upload", diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 4df26fce1..94bf1011b 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -36,35 +36,6 @@ def estimate_s3_conversion_cost( return cost_mb_s * total_mb_s -def estimate_total_conversion_runtime( - total_mb: float, - transfer_rate_mb: float = 20.0, - conversion_rate_mb: float = 17.0, - upload_rate_mb: float = 40, - compression_ratio: float = 1.7, -): - """ - Estimate how long the combined process of data transfer, conversion, and upload is expected to take. - - Parameters - ---------- - total_mb: float - The total amount of data (in MB) that will be transferred, converted, and uploaded to dandi. - transfer_rate_mb : float, default: 20.0 - Estimate of the transfer rate for the data. - conversion_rate_mb : float, default: 17.0 - Estimate of the conversion rate for the data. Can vary widely depending on conversion options and type of data. - Figure of 17MB/s is based on extensive compression of high-volume, high-resolution ecephys. - upload_rate_mb : float, default: 40.0 - Estimate of the upload rate of a single file to the DANDI archive. - compression_ratio : float, default: 1.7 - Estimate of the final average compression ratio for datasets in the file. Can vary widely. - """ - c = 1 / compression_ratio # compressed_size = total_size * c - - return total_mb * (1 / transfer_rate_mb + 1 / conversion_rate_mb + c / upload_rate_mb) - - def submit_aws_batch_job( job_name: str, docker_container: str, From 7fedcdda06753728503fbb2f2a795716b5b01129 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 14:45:46 -0400 Subject: [PATCH 15/54] expose extra options and add tests --- src/neuroconv/tools/data_transfers/_aws.py | 103 +++++++++++++-------- tests/test_minimal/test_tools/s3_tools.py | 35 +++++++ 2 files changed, 100 insertions(+), 38 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 94bf1011b..b52fe4abd 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -39,26 +39,53 @@ def estimate_s3_conversion_cost( def submit_aws_batch_job( job_name: str, docker_container: str, - # rclone_config_file_path: Optional[str] = None, + command: Optional[str] = None, + job_dependencies: Optional[List[Dict[str, str]]] = None, region: str = "us-east-2", status_tracker_table_name: str = "neuroconv_batch_status_tracker", iam_role_name: str = "neuroconv_batch_role", compute_environment_name: str = "neuroconv_batch_environment", job_queue_name: str = "neuroconv_batch_queue", -) -> None: - # assert ( - # "DANDI_API_KEY" in os.environ - # ), "You must set your DANDI API key as the environment variable 'DANDI_API_KEY' to submit this job!" - # - # default_rclone_config_file_path = pathlib.Path.home() / ".config" / "rclone" / "rclone.conf" - # rclone_config_file_path = rclone_config_file_path or default_rclone_config_file_path - # if not rclone_config_file_path.exists(): - # raise ValueError("You must configure rclone on your local system to submit this job!") - # with open(file=rclone_config_file_path, mode="r") as io: - # rclone_config_file_stream = io.read() +) -> Dict[str, str]: + """ + Submit a job to AWS Batch for processing. + + Parameters + ---------- + job_name : str + The name of the job to submit. + docker_container : str + The name of the Docker container to use for the job. + command : str, optional + The command to run in the Docker container. + Current syntax only supports a single line; consecutive actions should be chained with the '&&' operator. + job_dependencies : list of dict + A list of job dependencies for this job to trigger. Structured as follows: + [ + {"jobId": "job_id_1", "type": "N_TO_N"}, + {"jobId": "job_id_2", "type": "SEQUENTIAL"}, + ... + ] + region : str, default: "us-east-2" + The AWS region to use for the job. + status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" + The name of the DynamoDB table to use for tracking job status. + iam_role_name : str, default: "neuroconv_batch_role" + The name of the IAM role to use for the job. + compute_environment_name : str, default: "neuroconv_batch_environment" + The name of the compute environment to use for the job. + job_queue_name : str, default: "neuroconv_batch_queue" + The name of the job queue to use for the job. + Returns + ------- + job_submission_info : dict + A dictionary containing the job ID and other relevant information. + """ import boto3 + job_dependencies = job_dependencies or [] + dynamodb_client = boto3.client("dynamodb", region) dynamodb_resource = boto3.resource("dynamodb", region) iam_client = boto3.client("iam", region) @@ -156,37 +183,37 @@ def submit_aws_batch_job( ), ) else: - # TODO: would also need to check that memory/vcpu values resolve with previously defined name + # TODO: do I also need to check that memory/vcpu values resolve with previously defined name? pass # Submit job and update status tracker currently_running_jobs = batch_client.list_jobs(jobQueue=job_queue_name) - if job_name not in currently_running_jobs: - batch_client.submit_job( - jobQueue=job_queue_name, - jobDefinition=job_definition, - jobName=job_name, - containerOverrides=dict( - # Set environment variables - environment=[ - dict( # The burden is on the calling script to update the table status to finished - name="STATUS_TRACKER_TABLE_NAME", - value=status_tracker_table_name, - ), - # dict( # For rclone transfers - # name="RCLONE_CREDENTIALS", - # value=os.environ["RCLONE_CREDENTIALS"], - # ), - # dict( # For rclone transfers - # name="DANDI_API_KEY", - # value=os.environ["DANDI_API_KEY"], - # ), - ] - ), - ) - table.put_item(Item=dict(id=uuid4(), job_name=job_name, submitted_on=datetime.now(), status="submitted")) - else: + if job_name in currently_running_jobs: raise ValueError( f"There is already a job named '{job_name}' running in the queue! " "If you are submitting multiple jobs, each will need a unique name." ) + + # Set environment variables to the docker container + # as well as optional command to run + container_overrides = dict( + # Set environment variables + environment=[ + dict( # The burden is on the calling script to update the table status to finished + name="STATUS_TRACKER_TABLE_NAME", + value=status_tracker_table_name, + ), + ] + ) + if command is not None: + container_overrides["command"] = [command] + job_submission_info = batch_client.submit_job( + jobQueue=job_queue_name, + dependsOn=job_dependencies, + jobDefinition=job_definition, + jobName=job_name, + containerOverrides=container_overrides, + ) + table.put_item(Item=dict(id=uuid4(), job_name=job_name, submitted_on=datetime.now(), status="submitted")) + + return job_submission_info diff --git a/tests/test_minimal/test_tools/s3_tools.py b/tests/test_minimal/test_tools/s3_tools.py index 4998cc0c9..3a61b47d3 100644 --- a/tests/test_minimal/test_tools/s3_tools.py +++ b/tests/test_minimal/test_tools/s3_tools.py @@ -1,6 +1,7 @@ from neuroconv.tools.data_transfers import ( estimate_s3_conversion_cost, estimate_total_conversion_runtime, + submit_aws_batch_job, ) @@ -46,3 +47,37 @@ def test_estimate_total_conversion_runtime(): 1235294.1176470588, 12352941.176470589, ] + + +def test_submit_aws_batch_job(): + job_name = "test_submit_aws_batch_job" + docker_container = "ubuntu:latest" + command = "echo 'Testing NeuroConv AWS Batch submission." + + submit_aws_batch_job( + job_name=job_name, + docker_container=docker_container, + command=command, + ) + + +def test_submit_aws_batch_job_with_dependencies(): + job_name_1 = "test_submit_aws_batch_job_with_dependencies_1" + docker_container = "ubuntu:latest" + command_1 = "echo 'Testing NeuroConv AWS Batch submission." + + job_submission_info = submit_aws_batch_job( + job_name=job_name_1, + docker_container=docker_container, + command=command_1, + ) + + job_name_2 = "test_submit_aws_batch_job_with_dependencies_1" + command_2 = "echo 'Testing NeuroConv AWS Batch submission with dependencies." + job_dependencies = [{"jobId": job_submission_info["jobId"], "type": "SEQUENTIAL"}] + submit_aws_batch_job( + job_name=job_name_2, + docker_container=docker_container, + command=command_2, + job_dependencies=job_dependencies, + ) From 935f03834ee3809a2d8d2c32fbb3fe2cc972b3af Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 14:52:05 -0400 Subject: [PATCH 16/54] debug import --- src/neuroconv/tools/data_transfers/_aws.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index b52fe4abd..361ed5e71 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -2,6 +2,7 @@ import json from datetime import datetime +from typing import Dict, List, Optional from uuid import uuid4 From 7e8ef72b770c3c8aa64dd1e48225a2f0636368af Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 14:58:20 -0400 Subject: [PATCH 17/54] fix bad conflict --- src/neuroconv/tools/data_transfers/_globus.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/neuroconv/tools/data_transfers/_globus.py b/src/neuroconv/tools/data_transfers/_globus.py index 8ee195e29..3429127f1 100644 --- a/src/neuroconv/tools/data_transfers/_globus.py +++ b/src/neuroconv/tools/data_transfers/_globus.py @@ -6,8 +6,12 @@ from time import sleep, time from typing import Dict, List, Tuple, Union +from pydantic import DirectoryPath from tqdm import tqdm +from ..importing import is_package_installed +from ..processes import deploy_process + def get_globus_dataset_content_sizes( globus_endpoint_id: str, path: str, recursive: bool = True, timeout: float = 120.0 From f2be0081d90d49d9ecbd7230ea5099c6d023845a Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 15:14:14 -0400 Subject: [PATCH 18/54] add boto3 to requirements --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index e06c90077..1e57f1295 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,9 @@ extras_require = defaultdict(list) +extras_require["aws"].append("boto3") +extras_require["full"].extend(extras_require["aws"]) + extras_require["dandi"].append("dandi>=0.58.1") extras_require["full"].extend(extras_require["dandi"]) From a4e7bf532a46743c643615706996f6e3770059e9 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 15 Jul 2024 15:23:15 -0400 Subject: [PATCH 19/54] pass AWS credentials in function and actions --- .github/workflows/live-service-testing.yml | 6 ++++ src/neuroconv/tools/data_transfers/_aws.py | 36 +++++++++++++++++++--- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/.github/workflows/live-service-testing.yml b/.github/workflows/live-service-testing.yml index 1f7cda4da..c5bc056ab 100644 --- a/.github/workflows/live-service-testing.yml +++ b/.github/workflows/live-service-testing.yml @@ -4,10 +4,16 @@ on: - cron: "0 16 * * *" # Daily at noon EST workflow_call: secrets: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true DANDI_API_KEY: required: true env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} jobs: diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 361ed5e71..fce90d5ca 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -1,6 +1,7 @@ """Collection of helper functions for assessing and performing automated data transfers related to AWS.""" import json +import os from datetime import datetime from typing import Dict, List, Optional from uuid import uuid4 @@ -87,10 +88,37 @@ def submit_aws_batch_job( job_dependencies = job_dependencies or [] - dynamodb_client = boto3.client("dynamodb", region) - dynamodb_resource = boto3.resource("dynamodb", region) - iam_client = boto3.client("iam", region) - batch_client = boto3.client("batch", region) + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY") + if aws_access_key_id is None or aws_secret_access_key is None: + raise EnvironmentError( + "'AWS_ACCESS_KEY_ID' and 'AWS_SECRET_ACCESS_KEY' must both be set in the environment to use this function." + ) + + dynamodb_client = boto3.client( + service_name="dynamodb", + region=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + iam_client = boto3.client( + service_name="iam", + region=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + batch_client = boto3.client( + service_name="batch", + region=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) # It is extremely useful to have a status tracker that is separate from the job environment # Technically detailed logs of inner workings are given in the CloudWatch, but that can only really be From 4939c60430a478e75d766bf5f7c6107749abccd3 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 09:32:07 -0400 Subject: [PATCH 20/54] pass secrets --- .github/workflows/deploy-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index a1a6ae790..586659441 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -43,6 +43,8 @@ jobs: if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }} uses: ./.github/workflows/live-service-testing.yml secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} run-dev-tests: From 7c66c82cb257ffc985dce381282e0b2370f5f133 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 09:37:30 -0400 Subject: [PATCH 21/54] correct keyword name --- src/neuroconv/tools/data_transfers/_aws.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index fce90d5ca..709d71d13 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -97,25 +97,25 @@ def submit_aws_batch_job( dynamodb_client = boto3.client( service_name="dynamodb", - region=region, + region_name=region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) dynamodb_resource = boto3.resource( service_name="dynamodb", - region=region, + region_name=region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) iam_client = boto3.client( service_name="iam", - region=region, + region_name=region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) batch_client = boto3.client( service_name="batch", - region=region, + region_name=region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) From b115adb2afea140b29a808bcc98924569c06c9f6 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:03:42 -0400 Subject: [PATCH 22/54] debug role fetching --- src/neuroconv/tools/data_transfers/_aws.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 709d71d13..2b69dfb16 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -140,7 +140,8 @@ def submit_aws_batch_job( assume_role_policy = dict( Version="2012-10-17", Statement=[ - dict(Effect="Allow", Principal=dict(Service="ecs-tasks.amazonaws.com"), Action="sts:AssumeRole") + dict(Effect="Allow", Principal=dict(Service="ecs-tasks.amazonaws.com"), Action="sts:AssumeRole"), + dict(Effect="Allow", Action=["iam:GetRole", "iam:PassRole"], Resource=f"arn:aws:iam::account-id:role/{iam_role_name}", ], ) From dfcb148086c1f547a34f0eb07211e3adbc9b0c81 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:09:21 -0400 Subject: [PATCH 23/54] fix syntax --- src/neuroconv/tools/data_transfers/_aws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 2b69dfb16..a1b07befd 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -141,7 +141,7 @@ def submit_aws_batch_job( Version="2012-10-17", Statement=[ dict(Effect="Allow", Principal=dict(Service="ecs-tasks.amazonaws.com"), Action="sts:AssumeRole"), - dict(Effect="Allow", Action=["iam:GetRole", "iam:PassRole"], Resource=f"arn:aws:iam::account-id:role/{iam_role_name}", + dict(Effect="Allow", Action=["iam:GetRole", "iam:PassRole"], Resource=f"arn:aws:iam::account-id:role/{iam_role_name}"), ], ) From 57f65ce9032812a761fc96aa56f32918e245a840 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jul 2024 14:09:36 +0000 Subject: [PATCH 24/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/tools/data_transfers/_aws.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index a1b07befd..601f8d810 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -141,7 +141,11 @@ def submit_aws_batch_job( Version="2012-10-17", Statement=[ dict(Effect="Allow", Principal=dict(Service="ecs-tasks.amazonaws.com"), Action="sts:AssumeRole"), - dict(Effect="Allow", Action=["iam:GetRole", "iam:PassRole"], Resource=f"arn:aws:iam::account-id:role/{iam_role_name}"), + dict( + Effect="Allow", + Action=["iam:GetRole", "iam:PassRole"], + Resource=f"arn:aws:iam::account-id:role/{iam_role_name}", + ), ], ) From 38327f7340c63d2318751a57f8d86d7a620dcdee Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:18:16 -0400 Subject: [PATCH 25/54] splinter out aws tests to reduce costs --- .github/workflows/aws_tests.py | 39 ++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/aws_tests.py diff --git a/.github/workflows/aws_tests.py b/.github/workflows/aws_tests.py new file mode 100644 index 000000000..fe21d6712 --- /dev/null +++ b/.github/workflows/aws_tests.py @@ -0,0 +1,39 @@ +name: AWS Tests +on: + schedule: + - cron: "0 16 * * 1" # Weekly at noon on Monday + pull_request: # TODO, remove when done with this PR + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} + +jobs: + run: + name: ${{ matrix.os }} Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + os: [ubuntu-latest, macos-13, windows-latest] + steps: + - uses: actions/checkout@v4 + - run: git fetch --prune --unshallow --tags + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Global Setup + run: | + python -m pip install -U pip # Official recommended way + git config --global user.email "CI@example.com" + git config --global user.name "CI Almighty" + + - name: Install full requirements + run: pip install .[aws,test] + + - name: Run subset of tests that use S3 live services + run: pytest -rsx -n auto tests/test_minimal/test_tools/s3_tools.py From 90deef6bb77ff2334f016cb97c50d20bc7d6efc2 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:18:36 -0400 Subject: [PATCH 26/54] splinter out aws tests to reduce costs --- .github/workflows/live-service-testing.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/live-service-testing.yml b/.github/workflows/live-service-testing.yml index c5bc056ab..28be2d94a 100644 --- a/.github/workflows/live-service-testing.yml +++ b/.github/workflows/live-service-testing.yml @@ -4,16 +4,10 @@ on: - cron: "0 16 * * *" # Daily at noon EST workflow_call: secrets: - AWS_ACCESS_KEY_ID: - required: true - AWS_SECRET_ACCESS_KEY: - required: true DANDI_API_KEY: required: true env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} jobs: @@ -42,8 +36,6 @@ jobs: - name: Install full requirements run: pip install .[test,full] - - name: Run subset of tests that use S3 live services - run: pytest -rsx -n auto tests/test_minimal/test_tools/s3_tools.py - name: Run subset of tests that use DANDI live services run: pytest -rsx -n auto tests/test_minimal/test_tools/dandi_transfer_tools.py - name: Run subset of tests that use Globus live services From 0b6e42954a9516e136bd51e8a9b11650e080a673 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:19:09 -0400 Subject: [PATCH 27/54] temporarily disable --- .github/workflows/deploy-tests.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 586659441..e50f385c3 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -1,7 +1,7 @@ name: Deploy tests on: - pull_request: + #pull_request: # TODO: temporarily disabled merge_group: workflow_dispatch: @@ -43,8 +43,6 @@ jobs: if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }} uses: ./.github/workflows/live-service-testing.yml secrets: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} run-dev-tests: From 06e9bdb532f8ccddfdafc1c811d610e4bc0bfe0f Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:20:06 -0400 Subject: [PATCH 28/54] fix suffix --- .github/workflows/{aws_tests.py => aws_tests.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{aws_tests.py => aws_tests.yml} (100%) diff --git a/.github/workflows/aws_tests.py b/.github/workflows/aws_tests.yml similarity index 100% rename from .github/workflows/aws_tests.py rename to .github/workflows/aws_tests.yml From fe16ddea919a224de55cd1d4824352aadd4406f5 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:21:42 -0400 Subject: [PATCH 29/54] limit matrix to reduce costs --- .github/workflows/aws_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index fe21d6712..4115ab6af 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -16,8 +16,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] - os: [ubuntu-latest, macos-13, windows-latest] + python-version: ["3.12"] + os: [ubuntu-latest] steps: - uses: actions/checkout@v4 - run: git fetch --prune --unshallow --tags From 7f408856a2d51815a7a24394b9fe0b899df86169 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:26:12 -0400 Subject: [PATCH 30/54] cancel previous --- .github/workflows/aws_tests.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index 4115ab6af..cd8e00876 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -4,6 +4,10 @@ on: - cron: "0 16 * * 1" # Weekly at noon on Monday pull_request: # TODO, remove when done with this PR +concurrency: # Cancel previous workflows on the same pull request + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} From 34328cf1c49d8cb6e6ded8e6cd360f4565197f66 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:57:46 -0400 Subject: [PATCH 31/54] remove iam role stuff; has to be set on user --- src/neuroconv/tools/data_transfers/_aws.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 601f8d810..e4e590ef4 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -141,11 +141,6 @@ def submit_aws_batch_job( Version="2012-10-17", Statement=[ dict(Effect="Allow", Principal=dict(Service="ecs-tasks.amazonaws.com"), Action="sts:AssumeRole"), - dict( - Effect="Allow", - Action=["iam:GetRole", "iam:PassRole"], - Resource=f"arn:aws:iam::account-id:role/{iam_role_name}", - ), ], ) From 17898f4deefa4cb850eae53cadf9e36138563c3c Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:07:14 -0400 Subject: [PATCH 32/54] fix API call --- src/neuroconv/tools/data_transfers/_aws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index e4e590ef4..8e5a2ca52 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -191,7 +191,7 @@ def submit_aws_batch_job( # Ensure job definition exists job_definition = f"neuroconv_batch_{docker_container}" # Keep unique by incorporating name of container current_job_definitions = [ - definition["jobDefinitionName"] for definition in batch_client.describe_job_queues()["jobDefinitions"] + definition["jobDefinitionName"] for definition in batch_client.describe_job_definitions()["jobDefinitions"] ] if job_definition not in current_job_definitions: batch_client.register_job_definition( From de4e18f424f69ff22d6628b5954a67793751140d Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:00:24 -0400 Subject: [PATCH 33/54] update to modern standard; expose extra options; rename argument --- src/neuroconv/tools/data_transfers/_aws.py | 63 ++++++++++++++++------ 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 8e5a2ca52..c48f20ffb 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -40,7 +40,7 @@ def estimate_s3_conversion_cost( def submit_aws_batch_job( job_name: str, - docker_container: str, + docker_image: str, command: Optional[str] = None, job_dependencies: Optional[List[Dict[str, str]]] = None, region: str = "us-east-2", @@ -48,6 +48,9 @@ def submit_aws_batch_job( iam_role_name: str = "neuroconv_batch_role", compute_environment_name: str = "neuroconv_batch_environment", job_queue_name: str = "neuroconv_batch_queue", + job_definition_name : Optional[str] = None, + minimum_worker_ram_in_gb : float = 4.0, + minimum_worker_cpus : int = 4, ) -> Dict[str, str]: """ Submit a job to AWS Batch for processing. @@ -56,8 +59,8 @@ def submit_aws_batch_job( ---------- job_name : str The name of the job to submit. - docker_container : str - The name of the Docker container to use for the job. + docker_image : str + The name of the Docker image to use for the job. command : str, optional The command to run in the Docker container. Current syntax only supports a single line; consecutive actions should be chained with the '&&' operator. @@ -78,11 +81,24 @@ def submit_aws_batch_job( The name of the compute environment to use for the job. job_queue_name : str, default: "neuroconv_batch_queue" The name of the job queue to use for the job. + job_definition_name : str, optional + The name of the job definition to use for the job. + Defaults to f"neuroconv_batch_{ name of docker image }", + but replaces any colons from tags in the docker image name with underscores. + minimum_worker_ram_in_gb : int, default: 4.0 + The minimum amount of base worker memory required to run this job. + Determines the EC2 instance type selected by the automatic 'best fit' selector. + minimum_worker_cpus : int, default: 4 + The minimum number of CPUs required to run this job. + A minimum of 4 is required, even if only one will be used in the actual process. Returns ------- - job_submission_info : dict - A dictionary containing the job ID and other relevant information. + info : dict + A dictionary containing information about this AWS Batch job. + + info["job_submission_info"] is the return value of `boto3.client.submit_job` which contains the job ID. + info["table_submission_info"] is the initial row data inserted into the DynamoDB status tracking table. """ import boto3 @@ -120,7 +136,7 @@ def submit_aws_batch_job( aws_secret_access_key=aws_secret_access_key, ) - # It is extremely useful to have a status tracker that is separate from the job environment + # It is very useful to have a status tracker that is separate from the job environment # Technically detailed logs of inner workings are given in the CloudWatch, but that can only really be # analyzed from the AWS web console current_tables = dynamodb_client.list_tables()["TableNames"] @@ -167,8 +183,8 @@ def submit_aws_batch_job( computeResources={ "type": "EC2", "allocationStrategy": "BEST_FIT", - "minvCpus": 0, # TODO, control - "maxvCpus": 256, # TODO, control + "minvCpus": 0, + "maxvCpus": 256, "subnets": ["subnet-0be50d51", "subnet-3fd16f77", "subnet-0092132b"], "instanceRole": "ecsInstanceRole", "securityGroupIds": ["sg-851667c7"], @@ -189,18 +205,29 @@ def submit_aws_batch_job( ) # Ensure job definition exists - job_definition = f"neuroconv_batch_{docker_container}" # Keep unique by incorporating name of container + # By default, keep name unique by incorporating the name of the container + job_definition_docker_name = docker_image.replace(":", "_") + job_definition_name = job_definition_name or f"neuroconv_batch_{job_definition_docker_name}" current_job_definitions = [ definition["jobDefinitionName"] for definition in batch_client.describe_job_definitions()["jobDefinitions"] ] + resource_requirements = [ + { + 'value': str(minimum_worker_ram_in_gb * 1e3 / 1.024**2), # boto3 expects memory in MiB + 'type': 'MEMORY' + }, + { + 'value': str(minimum_worker_cpus), + 'type': 'VCPU' + }, + ] if job_definition not in current_job_definitions: batch_client.register_job_definition( - jobDefinitionName=job_definition, + jobDefinitionName=job_definition_name, type="container", containerProperties=dict( - image=docker_container, - memory=256, # TODO, control - vcpus=16, # TODO, control + image=docker_image, + resourceRequirements=resource_requirements, jobRoleArn=role["Role"]["Arn"], executionRoleArn=role["Role"]["Arn"], environment=[ @@ -212,7 +239,7 @@ def submit_aws_batch_job( ), ) else: - # TODO: do I also need to check that memory/vcpu values resolve with previously defined name? + # Should we also check that memory/vcpu values resolve with any previously defined job definitions in this event? pass # Submit job and update status tracker @@ -243,6 +270,10 @@ def submit_aws_batch_job( jobName=job_name, containerOverrides=container_overrides, ) - table.put_item(Item=dict(id=uuid4(), job_name=job_name, submitted_on=datetime.now(), status="submitted")) - return job_submission_info + # Update DynamoDB status tracking table + table_submission_info = dict(id=uuid4(), job_name=job_name, submitted_on=datetime.now(), status="submitted") + table.put_item(Item=table_submission_info) + + info = dict(job_submission_info=job_submission_info, table_submission_info=table_submission_info) + return info From 47cc91736b89e465f5e71ada96ce62ee202fae2b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:00:40 +0000 Subject: [PATCH 34/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/tools/data_transfers/_aws.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index c48f20ffb..04856ba24 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -48,9 +48,9 @@ def submit_aws_batch_job( iam_role_name: str = "neuroconv_batch_role", compute_environment_name: str = "neuroconv_batch_environment", job_queue_name: str = "neuroconv_batch_queue", - job_definition_name : Optional[str] = None, - minimum_worker_ram_in_gb : float = 4.0, - minimum_worker_cpus : int = 4, + job_definition_name: Optional[str] = None, + minimum_worker_ram_in_gb: float = 4.0, + minimum_worker_cpus: int = 4, ) -> Dict[str, str]: """ Submit a job to AWS Batch for processing. @@ -212,14 +212,8 @@ def submit_aws_batch_job( definition["jobDefinitionName"] for definition in batch_client.describe_job_definitions()["jobDefinitions"] ] resource_requirements = [ - { - 'value': str(minimum_worker_ram_in_gb * 1e3 / 1.024**2), # boto3 expects memory in MiB - 'type': 'MEMORY' - }, - { - 'value': str(minimum_worker_cpus), - 'type': 'VCPU' - }, + {"value": str(minimum_worker_ram_in_gb * 1e3 / 1.024**2), "type": "MEMORY"}, # boto3 expects memory in MiB + {"value": str(minimum_worker_cpus), "type": "VCPU"}, ] if job_definition not in current_job_definitions: batch_client.register_job_definition( From 4eea2db8f1a05d8ae44d59a5781c15b85af57a7a Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:01:04 -0400 Subject: [PATCH 35/54] fix keyword argument in tests --- tests/test_minimal/test_tools/s3_tools.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_minimal/test_tools/s3_tools.py b/tests/test_minimal/test_tools/s3_tools.py index 3a61b47d3..fb1930e86 100644 --- a/tests/test_minimal/test_tools/s3_tools.py +++ b/tests/test_minimal/test_tools/s3_tools.py @@ -51,24 +51,24 @@ def test_estimate_total_conversion_runtime(): def test_submit_aws_batch_job(): job_name = "test_submit_aws_batch_job" - docker_container = "ubuntu:latest" + docker_image = "ubuntu:latest" command = "echo 'Testing NeuroConv AWS Batch submission." submit_aws_batch_job( job_name=job_name, - docker_container=docker_container, + docker_image=docker_image, command=command, ) def test_submit_aws_batch_job_with_dependencies(): job_name_1 = "test_submit_aws_batch_job_with_dependencies_1" - docker_container = "ubuntu:latest" + docker_image = "ubuntu:latest" command_1 = "echo 'Testing NeuroConv AWS Batch submission." job_submission_info = submit_aws_batch_job( job_name=job_name_1, - docker_container=docker_container, + docker_image=docker_image, command=command_1, ) @@ -77,7 +77,7 @@ def test_submit_aws_batch_job_with_dependencies(): job_dependencies = [{"jobId": job_submission_info["jobId"], "type": "SEQUENTIAL"}] submit_aws_batch_job( job_name=job_name_2, - docker_container=docker_container, + docker_image=docker_image, command=command_2, job_dependencies=job_dependencies, ) From 4b229036a58385c39ff798ccabc7b8590dcb3a55 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:16:10 -0400 Subject: [PATCH 36/54] add status helper --- src/neuroconv/tools/data_transfers/_aws.py | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 04856ba24..8d220b8a4 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -39,6 +39,7 @@ def estimate_s3_conversion_cost( def submit_aws_batch_job( + *, job_name: str, docker_image: str, command: Optional[str] = None, @@ -271,3 +272,36 @@ def submit_aws_batch_job( info = dict(job_submission_info=job_submission_info, table_submission_info=table_submission_info) return info + + +def update_table_status(*, submission_id: str, status: str, status_tracker_table_name: str = "neuroconv_batch_status_tracker") -> None: + """ + Helper function for updating a status value on a DynamoDB table tracking the status of EC2 jobs. + + Intended for use by the running job to indicate its completion. + + Parameters + ---------- + submission_id : str + The random hash that was assigned on submission of this job to the status tracker table. + status : str + The new status value to update. + status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" + The name of the DynamoDB table to use for tracking job status. + """ + import boto3 + + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY") + + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + table = dynamodb_resource.Table(name=status_tracker_table_name) + + item = table.update_item(Key={"id": submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": status}}) + + return From e16551d224229a2c3f3060cfceee5b1e9d1593e1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:16:21 +0000 Subject: [PATCH 37/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/tools/data_transfers/_aws.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 8d220b8a4..7a9e03d02 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -274,7 +274,9 @@ def submit_aws_batch_job( return info -def update_table_status(*, submission_id: str, status: str, status_tracker_table_name: str = "neuroconv_batch_status_tracker") -> None: +def update_table_status( + *, submission_id: str, status: str, status_tracker_table_name: str = "neuroconv_batch_status_tracker" +) -> None: """ Helper function for updating a status value on a DynamoDB table tracking the status of EC2 jobs. @@ -290,7 +292,7 @@ def update_table_status(*, submission_id: str, status: str, status_tracker_table The name of the DynamoDB table to use for tracking job status. """ import boto3 - + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY") From 29aa19be935f483c7cc6112125cb1176f29da8d2 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:18:26 -0400 Subject: [PATCH 38/54] debug --- src/neuroconv/tools/data_transfers/_aws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 7a9e03d02..5f62deb49 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -216,7 +216,7 @@ def submit_aws_batch_job( {"value": str(minimum_worker_ram_in_gb * 1e3 / 1.024**2), "type": "MEMORY"}, # boto3 expects memory in MiB {"value": str(minimum_worker_cpus), "type": "VCPU"}, ] - if job_definition not in current_job_definitions: + if job_definition_name not in current_job_definitions: batch_client.register_job_definition( jobDefinitionName=job_definition_name, type="container", From 37223c99558ddf4955af3c509e5f18f1eecb51cd Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:19:52 -0400 Subject: [PATCH 39/54] enhance doc --- src/neuroconv/tools/data_transfers/_aws.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 5f62deb49..d358b2674 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -89,6 +89,7 @@ def submit_aws_batch_job( minimum_worker_ram_in_gb : int, default: 4.0 The minimum amount of base worker memory required to run this job. Determines the EC2 instance type selected by the automatic 'best fit' selector. + Recommended to be several GB to allow comfortable buffer space for data chunk iterators. minimum_worker_cpus : int, default: 4 The minimum number of CPUs required to run this job. A minimum of 4 is required, even if only one will be used in the actual process. From 1b4d88f160f958f7c5b859a9ffae5ce5f82e4310 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:20:53 -0400 Subject: [PATCH 40/54] try not casting as strings --- src/neuroconv/tools/data_transfers/_aws.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index d358b2674..6c844c86f 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -214,8 +214,8 @@ def submit_aws_batch_job( definition["jobDefinitionName"] for definition in batch_client.describe_job_definitions()["jobDefinitions"] ] resource_requirements = [ - {"value": str(minimum_worker_ram_in_gb * 1e3 / 1.024**2), "type": "MEMORY"}, # boto3 expects memory in MiB - {"value": str(minimum_worker_cpus), "type": "VCPU"}, + {"value": minimum_worker_ram_in_gb * 1e3 / 1.024**2, "type": "MEMORY"}, # boto3 expects memory in MiB + {"value": minimum_worker_cpus, "type": "VCPU"}, ] if job_definition_name not in current_job_definitions: batch_client.register_job_definition( From 829e5f208c1b10974810f7a89ca89953f18bc238 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:25:09 -0400 Subject: [PATCH 41/54] fix deserialization type --- src/neuroconv/tools/data_transfers/_aws.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 6c844c86f..3b10b3064 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -214,8 +214,8 @@ def submit_aws_batch_job( definition["jobDefinitionName"] for definition in batch_client.describe_job_definitions()["jobDefinitions"] ] resource_requirements = [ - {"value": minimum_worker_ram_in_gb * 1e3 / 1.024**2, "type": "MEMORY"}, # boto3 expects memory in MiB - {"value": minimum_worker_cpus, "type": "VCPU"}, + {"value": str(int(minimum_worker_ram_in_gb * 1e3 / 1.024**2)), "type": "MEMORY"}, # boto3 expects memory in round MiB + {"value": str(minimum_worker_cpus), "type": "VCPU"}, ] if job_definition_name not in current_job_definitions: batch_client.register_job_definition( From e76897f948ae10a776dee44a5d56358f95c09f11 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:25:48 +0000 Subject: [PATCH 42/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/tools/data_transfers/_aws.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 3b10b3064..5ecab6233 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -214,7 +214,10 @@ def submit_aws_batch_job( definition["jobDefinitionName"] for definition in batch_client.describe_job_definitions()["jobDefinitions"] ] resource_requirements = [ - {"value": str(int(minimum_worker_ram_in_gb * 1e3 / 1.024**2)), "type": "MEMORY"}, # boto3 expects memory in round MiB + { + "value": str(int(minimum_worker_ram_in_gb * 1e3 / 1.024**2)), + "type": "MEMORY", + }, # boto3 expects memory in round MiB {"value": str(minimum_worker_cpus), "type": "VCPU"}, ] if job_definition_name not in current_job_definitions: From df8cb106c8a8bb92882012997c5064f20539025e Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:33:57 -0400 Subject: [PATCH 43/54] debug --- src/neuroconv/tools/data_transfers/_aws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 5ecab6233..a9c1cc43e 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -265,7 +265,7 @@ def submit_aws_batch_job( job_submission_info = batch_client.submit_job( jobQueue=job_queue_name, dependsOn=job_dependencies, - jobDefinition=job_definition, + jobDefinition=job_definition_name, jobName=job_name, containerOverrides=container_overrides, ) From 67e8405da79eb34dcf2ecbd357129041e79a8270 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:38:00 -0400 Subject: [PATCH 44/54] expose submission ID --- src/neuroconv/tools/data_transfers/_aws.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index a9c1cc43e..4c1cd93b0 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -52,6 +52,7 @@ def submit_aws_batch_job( job_definition_name: Optional[str] = None, minimum_worker_ram_in_gb: float = 4.0, minimum_worker_cpus: int = 4, + submission_id: Optional[str] = None, ) -> Dict[str, str]: """ Submit a job to AWS Batch for processing. @@ -93,6 +94,9 @@ def submit_aws_batch_job( minimum_worker_cpus : int, default: 4 The minimum number of CPUs required to run this job. A minimum of 4 is required, even if only one will be used in the actual process. + submission_id : str, optional + The unique ID to pair with this job submission when tracking the status via DynamoDB. + Defaults to a sampled UUID4. Returns ------- @@ -104,8 +108,6 @@ def submit_aws_batch_job( """ import boto3 - job_dependencies = job_dependencies or [] - aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY") if aws_access_key_id is None or aws_secret_access_key is None: @@ -249,8 +251,8 @@ def submit_aws_batch_job( "If you are submitting multiple jobs, each will need a unique name." ) - # Set environment variables to the docker container - # as well as optional command to run + # Set environment variables to the docker container as well as optional commands to run + job_dependencies = job_dependencies or [] container_overrides = dict( # Set environment variables environment=[ @@ -271,7 +273,8 @@ def submit_aws_batch_job( ) # Update DynamoDB status tracking table - table_submission_info = dict(id=uuid4(), job_name=job_name, submitted_on=datetime.now(), status="submitted") + submission_id = submission_id or str(uuid4()) + table_submission_info = dict(id=submission_id, job_name=job_name, submitted_on=datetime.now(), status="submitted") table.put_item(Item=table_submission_info) info = dict(job_submission_info=job_submission_info, table_submission_info=table_submission_info) From 297476f7bd6edd1f5eb57a996583ee4e511bc94f Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:43:19 -0400 Subject: [PATCH 45/54] fix datetime typing --- src/neuroconv/tools/data_transfers/_aws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 4c1cd93b0..d1c8980f5 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -274,7 +274,7 @@ def submit_aws_batch_job( # Update DynamoDB status tracking table submission_id = submission_id or str(uuid4()) - table_submission_info = dict(id=submission_id, job_name=job_name, submitted_on=datetime.now(), status="submitted") + table_submission_info = dict(id=submission_id, job_name=job_name, submitted_on=datetime.now().isoformat(), status="submitted") table.put_item(Item=table_submission_info) info = dict(job_submission_info=job_submission_info, table_submission_info=table_submission_info) From 2cfaf5848315337530f467264f30c536a387cc55 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:43:35 +0000 Subject: [PATCH 46/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/tools/data_transfers/_aws.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index d1c8980f5..c60710ec0 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -274,7 +274,9 @@ def submit_aws_batch_job( # Update DynamoDB status tracking table submission_id = submission_id or str(uuid4()) - table_submission_info = dict(id=submission_id, job_name=job_name, submitted_on=datetime.now().isoformat(), status="submitted") + table_submission_info = dict( + id=submission_id, job_name=job_name, submitted_on=datetime.now().isoformat(), status="submitted" + ) table.put_item(Item=table_submission_info) info = dict(job_submission_info=job_submission_info, table_submission_info=table_submission_info) From 9ad5ef2fbeb0a5c0b0075d5134af2decf55e841a Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:46:02 -0400 Subject: [PATCH 47/54] update test to new structure --- tests/test_minimal/test_tools/s3_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_minimal/test_tools/s3_tools.py b/tests/test_minimal/test_tools/s3_tools.py index fb1930e86..fc46e7d84 100644 --- a/tests/test_minimal/test_tools/s3_tools.py +++ b/tests/test_minimal/test_tools/s3_tools.py @@ -66,11 +66,12 @@ def test_submit_aws_batch_job_with_dependencies(): docker_image = "ubuntu:latest" command_1 = "echo 'Testing NeuroConv AWS Batch submission." - job_submission_info = submit_aws_batch_job( + info = submit_aws_batch_job( job_name=job_name_1, docker_image=docker_image, command=command_1, ) + job_submission_info = info["job_submission_info"] job_name_2 = "test_submit_aws_batch_job_with_dependencies_1" command_2 = "echo 'Testing NeuroConv AWS Batch submission with dependencies." From 4db6141afc69aeaebc8b06e056a2aa711b563b17 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:54:22 -0400 Subject: [PATCH 48/54] remove trigger --- .github/workflows/aws_tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index cd8e00876..1b3a93a66 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -2,7 +2,6 @@ name: AWS Tests on: schedule: - cron: "0 16 * * 1" # Weekly at noon on Monday - pull_request: # TODO, remove when done with this PR concurrency: # Cancel previous workflows on the same pull request group: ${{ github.workflow }}-${{ github.ref }} From 6949be0413d3be5b6532c6b05ca0e5acb8f017b8 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:54:40 -0400 Subject: [PATCH 49/54] restore trigger --- .github/workflows/deploy-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index e50f385c3..a1a6ae790 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -1,7 +1,7 @@ name: Deploy tests on: - #pull_request: # TODO: temporarily disabled + pull_request: merge_group: workflow_dispatch: From 26c5f697a7d04abdc12c6c4a70013541464e7523 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 22 Jul 2024 15:05:07 -0400 Subject: [PATCH 50/54] resolve conflict --- src/neuroconv/tools/data_transfers/_aws.py | 6 +++--- .../_yaml_conversion_specification.py | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index c60710ec0..4f58923b8 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -217,9 +217,9 @@ def submit_aws_batch_job( ] resource_requirements = [ { - "value": str(int(minimum_worker_ram_in_gb * 1e3 / 1.024**2)), + "value": str(int(minimum_worker_ram_in_gb * 1e3 / 1.024**2)), # boto3 expects memory in round MiB "type": "MEMORY", - }, # boto3 expects memory in round MiB + }, {"value": str(minimum_worker_cpus), "type": "VCPU"}, ] if job_definition_name not in current_job_definitions: @@ -313,6 +313,6 @@ def update_table_status( ) table = dynamodb_resource.Table(name=status_tracker_table_name) - item = table.update_item(Key={"id": submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": status}}) + table.update_item(Key={"id": submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": status}}) return diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index 6eaa6be59..832f83f44 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -21,7 +21,7 @@ @click.option( "--data-folder-path", help="Path to folder where the source data may be found.", - type=click.Path(writable=True), + type=click.Path(writable=False), ) @click.option( "--output-folder-path", @@ -45,8 +45,10 @@ def run_conversion_from_yaml_cli( """ Run the tool function 'run_conversion_from_yaml' via the command line. - specification-file-path : - Path to the .yml specification file. + Parameters + ---------- + specification_file_path : str + Path to the .yml specification file. """ run_conversion_from_yaml( specification_file_path=specification_file_path, @@ -173,7 +175,7 @@ def deploy_conversion_from_yaml_and_upload_to_dandi( # the 'and upload to DANDI transfer_commands: str, transfer_config_file_path: FilePath, ): - job_name = Path(specification_file_path).stem + "-" + str(uuid4()) + job_name = Path(specification_file_path).stem + "_" + str(uuid4()) with open(file=specification_file_path) as file: yaml_as_string = "".join(file.readlines()) # Loaded as raw string, not as dict From 37d5be44991367e468b60c207927c1468af7b74f Mon Sep 17 00:00:00 2001 From: codycbakerphd Date: Wed, 24 Jul 2024 16:25:17 -0400 Subject: [PATCH 51/54] finish initial structure for deployment helper --- dockerfiles/neuroconv_latest_yaml_variable | 2 +- src/neuroconv/tools/data_transfers/_aws.py | 239 +++++++++++++++--- .../_yaml_conversion_specification.py | 70 ++--- 3 files changed, 220 insertions(+), 91 deletions(-) diff --git a/dockerfiles/neuroconv_latest_yaml_variable b/dockerfiles/neuroconv_latest_yaml_variable index ea411ee44..84c382738 100644 --- a/dockerfiles/neuroconv_latest_yaml_variable +++ b/dockerfiles/neuroconv_latest_yaml_variable @@ -1,4 +1,4 @@ FROM ghcr.io/catalystneuro/neuroconv:latest LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package. Modified to take in environment variables for the YAML conversion specification and other command line arguments." -CMD echo "$NEUROCONV_YAML" > run.yml && python -m neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite +CMD printf "$NEUROCONV_YAML" > run.yml && python -m neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite --upload-to-dandiset-id "$DANDISET_ID" diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 4f58923b8..322140daf 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -3,9 +3,12 @@ import json import os from datetime import datetime -from typing import Dict, List, Optional +from pathlib import Path +from typing import Dict, List, Literal, Optional from uuid import uuid4 +from pydantic import FilePath + def estimate_s3_conversion_cost( total_mb: float, @@ -35,23 +38,26 @@ def estimate_s3_conversion_cost( total_mb_s = total_mb**2 / 2 * (1 / transfer_rate_mb + (2 * c + 1) / conversion_rate_mb + 2 * c**2 / upload_rate_mb) cost_gb_m = 0.08 / 1e3 # $0.08 / GB Month cost_mb_s = cost_gb_m / (1e3 * 2.628e6) # assuming 30 day month; unsure how amazon weights shorter months? + return cost_mb_s * total_mb_s def submit_aws_batch_job( *, + region: str = "us-east-2", job_name: str, docker_image: str, command: Optional[str] = None, job_dependencies: Optional[List[Dict[str, str]]] = None, - region: str = "us-east-2", - status_tracker_table_name: str = "neuroconv_batch_status_tracker", iam_role_name: str = "neuroconv_batch_role", compute_environment_name: str = "neuroconv_batch_environment", job_queue_name: str = "neuroconv_batch_queue", job_definition_name: Optional[str] = None, minimum_worker_ram_in_gb: float = 4.0, minimum_worker_cpus: int = 4, + efs_volume_name: Optional[str] = None, + environment_variables: Optional[Dict[str, str]] = None, + status_tracker_table_name: str = "neuroconv_batch_status_tracker", submission_id: Optional[str] = None, ) -> Dict[str, str]: """ @@ -59,6 +65,10 @@ def submit_aws_batch_job( Parameters ---------- + region : str, default: "us-east-2" + The AWS region to use for the job. + us-east-2 (Ohio) is the location of the DANDI Archive, and we recommend all operations be run in that region to + remove cross-region transfer costs. job_name : str The name of the job to submit. docker_image : str @@ -73,10 +83,6 @@ def submit_aws_batch_job( {"jobId": "job_id_2", "type": "SEQUENTIAL"}, ... ] - region : str, default: "us-east-2" - The AWS region to use for the job. - status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" - The name of the DynamoDB table to use for tracking job status. iam_role_name : str, default: "neuroconv_batch_role" The name of the IAM role to use for the job. compute_environment_name : str, default: "neuroconv_batch_environment" @@ -94,6 +100,12 @@ def submit_aws_batch_job( minimum_worker_cpus : int, default: 4 The minimum number of CPUs required to run this job. A minimum of 4 is required, even if only one will be used in the actual process. + efs_volume_name : str, optional + The name of the EFS volume to attach to the jobs used by the operation. + environment_variables : dict of str, optional + A dictionary of key-value pairs to pass to the docker image. + status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" + The name of the DynamoDB table to use for tracking job status. submission_id : str, optional The unique ID to pair with this job submission when tracking the status via DynamoDB. Defaults to a sampled UUID4. @@ -212,9 +224,7 @@ def submit_aws_batch_job( # By default, keep name unique by incorporating the name of the container job_definition_docker_name = docker_image.replace(":", "_") job_definition_name = job_definition_name or f"neuroconv_batch_{job_definition_docker_name}" - current_job_definitions = [ - definition["jobDefinitionName"] for definition in batch_client.describe_job_definitions()["jobDefinitions"] - ] + resource_requirements = [ { "value": str(int(minimum_worker_ram_in_gb * 1e3 / 1.024**2)), # boto3 expects memory in round MiB @@ -222,26 +232,51 @@ def submit_aws_batch_job( }, {"value": str(minimum_worker_cpus), "type": "VCPU"}, ] + + container_properties = dict( + image=docker_image, + resourceRequirements=resource_requirements, + jobRoleArn=role["Role"]["Arn"], + executionRoleArn=role["Role"]["Arn"], + # environment=[ + # dict( + # name="AWS_DEFAULT_REGION", + # value=region, + # ) + # ], + ) + + if efs_volume_name is not None: + # Connect the job definition reference to the EFS mount + job_definition_name += f"_{efs_volume_name}" + + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + all_efs_volumes = efs_client.describe_file_systems() + all_efs_volumes_by_name = {efs_volume["Name"]: efs_volume for efs_volume in all_efs_volumes["FileSystems"]} + efs_volume = all_efs_volumes_by_name[efs_volume_name] + + volumes = [ + dict( + name=efs_volume_name, + efsVolumeConfiguration=dict(fileSystemId=efs_volume["FileSystemId"]), + ) + ] + + container_properties.update(volumes) + + current_job_definitions = [ + job_definition["jobDefinitionName"] + for job_definition in batch_client.describe_job_definitions()["jobDefinitions"] + ] if job_definition_name not in current_job_definitions: batch_client.register_job_definition( - jobDefinitionName=job_definition_name, - type="container", - containerProperties=dict( - image=docker_image, - resourceRequirements=resource_requirements, - jobRoleArn=role["Role"]["Arn"], - executionRoleArn=role["Role"]["Arn"], - environment=[ - dict( - name="AWS_DEFAULT_REGION", - value=region, - ) - ], - ), + jobDefinitionName=job_definition_name, type="container", containerProperties=container_properties ) - else: - # Should we also check that memory/vcpu values resolve with any previously defined job definitions in this event? - pass # Submit job and update status tracker currently_running_jobs = batch_client.list_jobs(jobQueue=job_queue_name) @@ -253,17 +288,20 @@ def submit_aws_batch_job( # Set environment variables to the docker container as well as optional commands to run job_dependencies = job_dependencies or [] - container_overrides = dict( - # Set environment variables - environment=[ - dict( # The burden is on the calling script to update the table status to finished - name="STATUS_TRACKER_TABLE_NAME", - value=status_tracker_table_name, - ), - ] - ) + + environment_variables_per_job = [ + dict( # The burden is on the calling script to update the table status to finished + name="STATUS_TRACKER_TABLE_NAME", + value=status_tracker_table_name, + ), + ] + if environment_variables is not None: + environment_variables_per_job.extend([{key: value for key, value in environment_variables.items()}]) + + container_overrides = dict(environment=environment_variables_per_job) if command is not None: container_overrides["command"] = [command] + job_submission_info = batch_client.submit_job( jobQueue=job_queue_name, dependsOn=job_dependencies, @@ -284,7 +322,11 @@ def submit_aws_batch_job( def update_table_status( - *, submission_id: str, status: str, status_tracker_table_name: str = "neuroconv_batch_status_tracker" + *, + region: str = "us-east-2", + submission_id: str, + status: str, + status_tracker_table_name: str = "neuroconv_batch_status_tracker", ) -> None: """ Helper function for updating a status value on a DynamoDB table tracking the status of EC2 jobs. @@ -293,6 +335,10 @@ def update_table_status( Parameters ---------- + region : str, default: "us-east-2" + The AWS region to use for the job. + us-east-2 (Ohio) is the location of the DANDI Archive, and we recommend all operations be run in that region to + remove cross-region transfer costs. submission_id : str The random hash that was assigned on submission of this job to the status tracker table. status : str @@ -316,3 +362,122 @@ def update_table_status( table.update_item(Key={"id": submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": status}}) return + + +def deploy_conversion_on_ec2( + specification_file_path: FilePath, + transfer_commands: str, + efs_volume_name: str, + dandiset_id: str, + region: str = "us-east-2", + transfer_method: Literal["rclone"] = "rclone", + transfer_config_file_path: Optional[FilePath] = None, + efs_volume_creation_options: Optional[dict] = None, +) -> None: + """ + Helper function for deploying a YAML-based NeuroConv data conversion in the cloud on AWS EC2 Batch. + + Parameters + ---------- + specification_file_path : FilePathType + File path leading to .yml specification file for NWB conversion. + transfer_commands : str + The syntax command to send to the transfer method. + E.g., `transfer_command="rclone copy YOUR_REMOTE:YOUR_SOURCE"` + efs_volume_name : str + The name of the EFS volume to attach to the jobs used by the operation. + dandiset_id : str + The six-digit Dandiset ID to use when uploading the data. + region : str, default: "us-east-2" + The AWS region to use for the job. + us-east-2 (Ohio) is the location of the DANDI Archive, and we recommend all operations be run in that region to + remove cross-region transfer costs. + transfer_method : Literal["rclone"] + The type of transfer used to move the data from the cloud source to the EFS volume. + Currently only supports Rclone. + transfer_config_file_path : FilePath, optional + Explicit path to the config file used by the transfer method. + When using `transfer_method = "rclone"`, this defaults to `~/.config/rclone/rclone.conf`. + efs_volume_creation_options : dict, optional + The dictionary of keyword arguments to pass to `boto3.client.EFS.create_file_system` when the volmume does not + already exist. + These are ignored if the volume already exists. + """ + import boto3 + + dandi_api_token = os.getenv("DANDI_API_KEY") + assert dandi_api_token is not None, ( + "Unable to find environment variable 'DANDI_API_KEY'. " + "Please retrieve your token from DANDI and set this environment variable." + ) + + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY") + + transfer_config_file_path = transfer_config_file_path or Path().home() / ".config" / "rclone" / "rclone.conf" + efs_volume_creation_options = efs_volume_creation_options or dict() + + if transfer_method != "rclone": + raise NotImplementedError(f"The transfer method '{transfer_method}' is not yet supported!") + if not transfer_config_file_path.exists(): + raise ValueError(f"The `transfer_config_file_path` located at '{transfer_config_file_path}' does not exist!") + + # Make EFS volume if it doesn't already exist + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + all_efs_volumes = efs_client.describe_file_systems() + all_efs_volumes_by_name = {efs_volume["Name"]: efs_volume for efs_volume in all_efs_volumes["FileSystems"]} + + efs_volume = all_efs_volumes_by_name.get(efs_volume_name, None) + if efs_volume is None: + efs_volume_default_creation_kwargs = dict( + PerformanceMode="generalPurpose", + # Setting AvailabilityZoneName sets the volume as 'One-Zone', which is cheaper + AvailabilityZoneName="us-east-2b", + Tags=[dict(Key="Name", Value=efs_volume_name)], + ) + efs_volume_creation_kwargs = dict(efs_volume_default_creation_kwargs) + efs_volume_creation_kwargs.update(**efs_volume_creation_options) + + efs_volume = efs_client.create_file_system(**efs_volume_creation_kwargs) + + # To avoid errors related to name collisions, append all job names with a small unique reference + unique_job_reference = str(uuid4())[:8] + + # Job 1: Transfer data from source to EFS + with open(file=transfer_config_file_path) as io: + rclone_config_file_content = io.read() + + transfer_job_submission_info = submit_aws_batch_job( + transfer_job_name=Path(specification_file_path).stem + "_transfer_" + unique_job_reference, + docker_container="ghcr.io/catalystneuro/rclone_with_config:latest", + efs_volume_name=efs_volume_name, + environment_variables=[ + dict(name="RCLONE_CONFIG", value=rclone_config_file_content), + dict(name="RCLONE_COMMANDS", value=transfer_commands), + ], + ) + + # Job 2: Run YAML specification on transferred data and upload to DANDI + with open(file=specification_file_path) as io: + specification_file_content = io.read() + + submit_aws_batch_job( + conversion_job_name=Path(specification_file_path).stem + "_conversion_" + unique_job_reference, + job_dependencies=[{"jobId": transfer_job_submission_info["jobId"], "type": "SEQUENTIAL"}], + docker_container="ghcr.io/catalystneuro/neuroconv:yaml_variable", + efs_volume_name=efs_volume_name, + environment_variables=[ + dict(name="NEUROCONV_YAML", value=specification_file_content), + dict(name="NEUROCONV_DATA_PATH", value=""), + dict(name="NEUROCONV_OUTPUT_PATH", value=""), + dict(name="DANDI_API_KEY", value=dandi_api_token), + dict(name="DANDISET_ID", value=dandiset_id), + ], + ) + + return None diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index 832f83f44..fbb9438cb 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -2,16 +2,14 @@ import sys from importlib import import_module from pathlib import Path -from typing import Literal, Optional -from uuid import uuid4 +from typing import Optional import click from dandi.metadata import _get_pynwb_metadata from dandi.organize import create_unique_filenames_from_metadata from jsonschema import RefResolver, validate -from pydantic import FilePath -from ..data_transfers import automatic_dandi_upload, submit_aws_batch_job +from ..data_transfers import automatic_dandi_upload from ...nwbconverter import NWBConverter from ...utils import FilePathType, FolderPathType, dict_deep_update, load_dict_from_file @@ -29,27 +27,22 @@ help="Path to folder where you want to save the output NWBFile.", type=click.Path(writable=True), ) +@click.option("--overwrite", help="Overwrite an existing NWBFile at the location.", is_flag=True) @click.option( - "--upload-to-dandiset", - help="Do you want to upload the result to DANDI? Specify the dandiset ID if so. " - "Also ensure you have your DANDI_API_KEY set as an environment variable.", + "--upload-to-dandiset-id", + help=( + "Do you want to upload the result to DANDI? If so, specify the six-digit Dandiset ID. " + "Also ensure you have your DANDI_API_KEY set as an environment variable." + ), ) -@click.option("--overwrite", help="Overwrite an existing NWBFile at the location.", is_flag=True) def run_conversion_from_yaml_cli( specification_file_path: str, data_folder_path: Optional[str] = None, output_folder_path: Optional[str] = None, - upload_to_dandiset: Optional[str] = None, overwrite: bool = False, + upload_to_dandiset: Optional[str] = None, ): - """ - Run the tool function 'run_conversion_from_yaml' via the command line. - - Parameters - ---------- - specification_file_path : str - Path to the .yml specification file. - """ + """Run the tool function 'run_conversion_from_yaml' via the command line.""" run_conversion_from_yaml( specification_file_path=specification_file_path, data_folder_path=data_folder_path, @@ -63,8 +56,8 @@ def run_conversion_from_yaml( specification_file_path: FilePathType, data_folder_path: Optional[FolderPathType] = None, output_folder_path: Optional[FolderPathType] = None, - upload_to_dandiset: Optional[str] = None, overwrite: bool = False, + upload_to_dandiset: Optional[str] = None, ): """ Run conversion to NWB given a yaml specification file. @@ -82,11 +75,15 @@ def run_conversion_from_yaml( overwrite : bool, default: False If True, replaces any existing NWBFile at the nwbfile_path location, if save_to_file is True. If False, appends the existing NWBFile at the nwbfile_path location, if save_to_file is True. + upload_to_dandiset : str, optional + If you wish to upload the resulting NWB file to a particular Dandiset, specify the six-digit ID here. + When using this feature, the `DANDI_API_KEY` environment variable must be set. """ # This check is technically a part of the automatic dandi upload, but good to check as early as possible # to avoid wasting time. if upload_to_dandiset: - assert os.getenv("DANDI_API_KEY"), ( + dandi_api_token = os.getenv("DANDI_API_KEY") + assert dandi_api_token is not None, ( "Unable to find environment variable 'DANDI_API_KEY'. " "Please retrieve your token from DANDI and set this environment variable." ) @@ -163,38 +160,5 @@ def run_conversion_from_yaml( named_dandi_metadata["path"].rename(str(output_folder_path / dandi_filename)) if upload_to_dandiset: - staging = int(upload_to_dandiset) < 100000 # temporary heuristic + staging = int(upload_to_dandiset) >= 200_000 automatic_dandi_upload(dandiset_id=upload_to_dandiset, nwb_folder_path=output_folder_path, staging=staging) - - -def deploy_conversion_from_yaml_and_upload_to_dandi( # the 'and upload to DANDI' part should ultimately be embedded in the YAML syntax - specification_file_path: FilePath, - efs_name: str, - dandiset_id: str, - transfer_method: Literal["rclone"], - transfer_commands: str, - transfer_config_file_path: FilePath, -): - job_name = Path(specification_file_path).stem + "_" + str(uuid4()) - with open(file=specification_file_path) as file: - yaml_as_string = "".join(file.readlines()) # Loaded as raw string, not as dict - - if transfer_method == "rclone": - docker_container = "ghcr.io/catalystneuro/neuroconv:with_rclone" - with open(file=transfer_config_file_path) as file: - config_as_string = "".join(file.readlines()) # TODO - check newlines are OK cross-platform - environment_variables = [ - dict(name="CONFIG_STREAM", value=config_as_string), - dict(name="RCLONE_COMMANDS", value=transfer_commands), - dict(name="YAML_STREAM", value=yaml_as_string), - dict(name="DANDI_API_KEY", value=os.environ["DANDI_API_KEY"]), - dict(name="DANDISET_ID", value=dandiset_id), - ] - else: - raise NotImplementedError(f"The transfer method {transfer_method} is not yet supported!") - submit_aws_batch_job( - job_name=job_name, - docker_container=docker_container, - efs_name=efs_name, - environment_variables=environment_variables, - ) # TODO - setup something here with status tracker table - might have to be on YAML level From 9af5b9951ac0205e6c06d93bfdc88814a260ab79 Mon Sep 17 00:00:00 2001 From: codycbakerphd Date: Thu, 25 Jul 2024 13:23:44 -0400 Subject: [PATCH 52/54] separate base code; add new entrypoint; adjust dockerfiles; add EFS cleanup; begin tests --- .github/workflows/aws_tests.yml | 4 +- ...d_docker_image_dev_for_ec2_deployment.yml} | 20 +- ...and_upload_docker_image_latest_release.yml | 5 +- ...mage_latest_release_for_ec2_deployment.yml | 48 +++++ ...upload_docker_image_rclone_with_config.yml | 1 + dockerfiles/neuroconv_dev_for_ec2_deployment | 6 + dockerfiles/neuroconv_latest_yaml_variable | 4 - ...ockerfile => neuroconv_release_dockerfile} | 2 +- .../neuroconv_release_for_ec2_deployment | 4 + setup.py | 3 +- .../tools/data_transfers/__init__.py | 11 +- src/neuroconv/tools/data_transfers/_aws.py | 62 +++++- .../yaml_conversion_specification/__init__.py | 7 +- .../_yaml_conversion_specification.py | 181 +++++++++++------- .../_yaml_conversion_specification_cli.py | 111 +++++++++++ tests/test_minimal/test_tools/aws_tools.py | 153 +++++++++++++++ tests/test_minimal/test_tools/s3_tools.py | 84 -------- 17 files changed, 516 insertions(+), 190 deletions(-) rename .github/workflows/{build_and_upload_docker_image_yaml_variable.yml => build_and_upload_docker_image_dev_for_ec2_deployment.yml} (60%) create mode 100644 .github/workflows/build_and_upload_docker_image_latest_release_for_ec2_deployment.yml create mode 100644 dockerfiles/neuroconv_dev_for_ec2_deployment delete mode 100644 dockerfiles/neuroconv_latest_yaml_variable rename dockerfiles/{neuroconv_latest_release_dockerfile => neuroconv_release_dockerfile} (63%) create mode 100644 dockerfiles/neuroconv_release_for_ec2_deployment create mode 100644 src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification_cli.py create mode 100644 tests/test_minimal/test_tools/aws_tools.py delete mode 100644 tests/test_minimal/test_tools/s3_tools.py diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml index 1b3a93a66..94abcb46b 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/aws_tests.yml @@ -38,5 +38,5 @@ jobs: - name: Install full requirements run: pip install .[aws,test] - - name: Run subset of tests that use S3 live services - run: pytest -rsx -n auto tests/test_minimal/test_tools/s3_tools.py + - name: Run subset of tests that use AWS live services + run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools.py diff --git a/.github/workflows/build_and_upload_docker_image_yaml_variable.yml b/.github/workflows/build_and_upload_docker_image_dev_for_ec2_deployment.yml similarity index 60% rename from .github/workflows/build_and_upload_docker_image_yaml_variable.yml rename to .github/workflows/build_and_upload_docker_image_dev_for_ec2_deployment.yml index 7ff2dc63c..b24d5e5ed 100644 --- a/.github/workflows/build_and_upload_docker_image_yaml_variable.yml +++ b/.github/workflows/build_and_upload_docker_image_dev_for_ec2_deployment.yml @@ -1,9 +1,8 @@ -name: Build and Upload Docker Image of latest with YAML variable to GHCR +name: Build and Upload Docker Image of Current Dev Branch to GHCR on: - workflow_run: - workflows: [build_and_upload_docker_image_latest_release] - types: [completed] + schedule: + - cron: "0 16 * * 1" # Weekly at noon EST on Monday workflow_dispatch: concurrency: # Cancel previous workflows on the same pull request @@ -12,7 +11,7 @@ concurrency: # Cancel previous workflows on the same pull request jobs: release-image: - name: Build and Upload Docker Image of latest with YAML variable to GHCR + name: Build and Upload Docker Image of Current Dev Branch to GHCR runs-on: ubuntu-latest steps: - name: Checkout @@ -27,11 +26,16 @@ jobs: registry: ghcr.io username: ${{ secrets.DOCKER_UPLOADER_USERNAME }} password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }} - - name: Build and push YAML variable image based on latest + - name: Get current date + id: date + run: | + date_tag="$(date +'%Y-%m-%d')" + echo "date_tag=$date_tag" >> $GITHUB_OUTPUT + - name: Build and push uses: docker/build-push-action@v5 with: push: true # Push is a shorthand for --output=type=registry - tags: ghcr.io/catalystneuro/neuroconv:yaml_variable + tags: ghcr.io/catalystneuro/neuroconv:dev,ghcr.io/catalystneuro/neuroconv:${{ steps.date.outputs.date_tag }} context: . - file: dockerfiles/neuroconv_latest_yaml_variable + file: dockerfiles/neuroconv_dev_for_ec2_deployment provenance: false diff --git a/.github/workflows/build_and_upload_docker_image_latest_release.yml b/.github/workflows/build_and_upload_docker_image_latest_release.yml index 423686ec9..947369787 100644 --- a/.github/workflows/build_and_upload_docker_image_latest_release.yml +++ b/.github/workflows/build_and_upload_docker_image_latest_release.yml @@ -17,6 +17,7 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + - name: Parse the version from the GitHub latest release tag id: parsed_version run: | @@ -26,6 +27,7 @@ jobs: echo "version_tag=$version_tag" >> $GITHUB_OUTPUT - name: Printout parsed version for GitHub Action log run: echo ${{ steps.parsed_version.outputs.version_tag }} + - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -36,11 +38,12 @@ jobs: registry: ghcr.io username: ${{ secrets.DOCKER_UPLOADER_USERNAME }} password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }} + - name: Build and push uses: docker/build-push-action@v5 with: push: true # Push is a shorthand for --output=type=registry tags: ghcr.io/catalystneuro/neuroconv:latest,ghcr.io/catalystneuro/neuroconv:${{ steps.parsed_version.outputs.version_tag }} context: . - file: dockerfiles/neuroconv_latest_release_dockerfile + file: dockerfiles/neuroconv_release_dockerfile provenance: false diff --git a/.github/workflows/build_and_upload_docker_image_latest_release_for_ec2_deployment.yml b/.github/workflows/build_and_upload_docker_image_latest_release_for_ec2_deployment.yml new file mode 100644 index 000000000..b7f62d980 --- /dev/null +++ b/.github/workflows/build_and_upload_docker_image_latest_release_for_ec2_deployment.yml @@ -0,0 +1,48 @@ +name: Build and Upload Docker Image of Latest Release for EC2 Deployment to GHCR + +on: + schedule: + - cron: "0 16 * * 1" # Weekly at noon EST on Monday + workflow_dispatch: + +concurrency: # Cancel previous workflows on the same pull request + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + release-image: + name: Build and Upload Docker Image + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Parse the version from the GitHub latest release tag + id: parsed_version + run: | + git fetch --prune --unshallow --tags + tags="$(git tag --list)" + version_tag=${tags: -6 : 6} + echo "version_tag=$version_tag" >> $GITHUB_OUTPUT + - name: Printout parsed version for GitHub Action log + run: echo ${{ steps.parsed_version.outputs.version_tag }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ secrets.DOCKER_UPLOADER_USERNAME }} + password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }} + + - name: Build and push + uses: docker/build-push-action@v5 + with: + push: true # Push is a shorthand for --output=type=registry + tags: ghcr.io/catalystneuro/neuroconv_for_ec2_deployment:dev + context: . + file: dockerfiles/neuroconv_release_for_ec2_deployment + provenance: false diff --git a/.github/workflows/build_and_upload_docker_image_rclone_with_config.yml b/.github/workflows/build_and_upload_docker_image_rclone_with_config.yml index 7ff197bdc..73ab0e5f7 100644 --- a/.github/workflows/build_and_upload_docker_image_rclone_with_config.yml +++ b/.github/workflows/build_and_upload_docker_image_rclone_with_config.yml @@ -26,6 +26,7 @@ jobs: registry: ghcr.io username: ${{ secrets.DOCKER_UPLOADER_USERNAME }} password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }} + - name: Build and push uses: docker/build-push-action@v5 with: diff --git a/dockerfiles/neuroconv_dev_for_ec2_deployment b/dockerfiles/neuroconv_dev_for_ec2_deployment new file mode 100644 index 000000000..64f37037a --- /dev/null +++ b/dockerfiles/neuroconv_dev_for_ec2_deployment @@ -0,0 +1,6 @@ +FROM python:3.11.7-slim +LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv +LABEL org.opencontainers.image.description="A docker image extending the dev branch of the NeuroConv package with modifications related to deployment on EC2 Batch." +ADD ./ neuroconv +RUN cd neuroconv && pip install .[full] +CMD printf "$NEUROCONV_YAML" > run.yml && python -m neuroconv_ec2 run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite --upload-to-dandiset-id "$DANDISET_ID" --update-tracking-table "$TRACKING_TABLE" --tracking-table-submission-id "$SUBMISSION_ID" --efs-volume-name-to-cleanup "$EFS_VOLUME" diff --git a/dockerfiles/neuroconv_latest_yaml_variable b/dockerfiles/neuroconv_latest_yaml_variable deleted file mode 100644 index 84c382738..000000000 --- a/dockerfiles/neuroconv_latest_yaml_variable +++ /dev/null @@ -1,4 +0,0 @@ -FROM ghcr.io/catalystneuro/neuroconv:latest -LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv -LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package. Modified to take in environment variables for the YAML conversion specification and other command line arguments." -CMD printf "$NEUROCONV_YAML" > run.yml && python -m neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite --upload-to-dandiset-id "$DANDISET_ID" diff --git a/dockerfiles/neuroconv_latest_release_dockerfile b/dockerfiles/neuroconv_release_dockerfile similarity index 63% rename from dockerfiles/neuroconv_latest_release_dockerfile rename to dockerfiles/neuroconv_release_dockerfile index a4a532b58..a2364d57e 100644 --- a/dockerfiles/neuroconv_latest_release_dockerfile +++ b/dockerfiles/neuroconv_release_dockerfile @@ -1,6 +1,6 @@ FROM python:3.11.7-slim LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv -LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package." +LABEL org.opencontainers.image.description="A docker image for an official release of the full NeuroConv package." RUN apt update && apt install musl-dev python3-dev -y RUN pip install "neuroconv[full]" CMD ["python -m"] diff --git a/dockerfiles/neuroconv_release_for_ec2_deployment b/dockerfiles/neuroconv_release_for_ec2_deployment new file mode 100644 index 000000000..723e4e631 --- /dev/null +++ b/dockerfiles/neuroconv_release_for_ec2_deployment @@ -0,0 +1,4 @@ +FROM ghcr.io/catalystneuro/neuroconv:latest +LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv +LABEL org.opencontainers.image.description="A docker image extending the official release of the NeuroConv package with modifications related to deployment on EC2 Batch." +CMD printf "$NEUROCONV_YAML" > run.yml && python -m neuroconv_ec2 run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite --upload-to-dandiset-id "$DANDISET_ID" --update-tracking-table "$TRACKING_TABLE" --tracking-table-submission-id "$SUBMISSION_ID" --efs-volume-name-to-cleanup "$EFS_VOLUME" diff --git a/setup.py b/setup.py index fbe6ff142..5ba404ccc 100644 --- a/setup.py +++ b/setup.py @@ -85,7 +85,8 @@ extras_require=extras_require, entry_points={ "console_scripts": [ - "neuroconv = neuroconv.tools.yaml_conversion_specification._yaml_conversion_specification:run_conversion_from_yaml_cli", + "neuroconv = neuroconv.tools.yaml_conversion_specification._yaml_conversion_specification_cli:run_conversion_from_yaml_cli", + "neuroconv_ec2 = neuroconv.tools.yaml_conversion_specification._yaml_conversion_specification_cli:run_ec2_conversion_from_yaml_cli", ], }, license="BSD-3-Clause", diff --git a/src/neuroconv/tools/data_transfers/__init__.py b/src/neuroconv/tools/data_transfers/__init__.py index dd653cb05..a6a90b848 100644 --- a/src/neuroconv/tools/data_transfers/__init__.py +++ b/src/neuroconv/tools/data_transfers/__init__.py @@ -1,15 +1,24 @@ """Collection of helper functions for assessing and performing automated data transfers.""" -from ._aws import estimate_s3_conversion_cost, submit_aws_batch_job +from ._aws import ( + estimate_s3_conversion_cost, + submit_aws_batch_job, + update_table_status, + deploy_conversion_on_ec2, + delete_efs_volume, +) from ._dandi import automatic_dandi_upload from ._globus import get_globus_dataset_content_sizes, transfer_globus_content from ._helpers import estimate_total_conversion_runtime __all__ = [ "submit_aws_batch_job", + "delete_efs_volume", + "deploy_conversion_on_ec2", "estimate_s3_conversion_cost", "automatic_dandi_upload", "get_globus_dataset_content_sizes", "transfer_globus_content", "estimate_total_conversion_runtime", + "update_table_status", ] diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 322140daf..4a4ce07fe 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -323,10 +323,10 @@ def submit_aws_batch_job( def update_table_status( *, - region: str = "us-east-2", + status_tracker_table_name: str, submission_id: str, status: str, - status_tracker_table_name: str = "neuroconv_batch_status_tracker", + region: str = "us-east-2", ) -> None: """ Helper function for updating a status value on a DynamoDB table tracking the status of EC2 jobs. @@ -335,16 +335,16 @@ def update_table_status( Parameters ---------- - region : str, default: "us-east-2" - The AWS region to use for the job. - us-east-2 (Ohio) is the location of the DANDI Archive, and we recommend all operations be run in that region to - remove cross-region transfer costs. + status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" + The name of the DynamoDB table to use for tracking job status. submission_id : str The random hash that was assigned on submission of this job to the status tracker table. status : str The new status value to update. - status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" - The name of the DynamoDB table to use for tracking job status. + region : str, default: "us-east-2" + The AWS region to use for the job. + us-east-2 (Ohio) is the location of the DANDI Archive, and we recommend all operations be run in that region to + remove cross-region transfer costs. """ import boto3 @@ -364,6 +364,36 @@ def update_table_status( return +def delete_efs_volume(efs_volume_name: str) -> None: + """ + Delete an EFS volume of a particular name. + + Parameters + ---------- + efs_volume_name : str + The name of the EFS volume to delete. + """ + import boto3 + + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY") + + efs_client = boto3.client( + service_name="efs", + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + all_efs_volumes = efs_client.describe_file_systems() + all_efs_volumes_by_name = {efs_volume["Name"]: efs_volume for efs_volume in all_efs_volumes["FileSystems"]} + + efs_volume = all_efs_volumes_by_name[efs_volume_name] + file_system_id = efs_volume["FileSystemId"] + efs_client.delete_file_system(FileSystemId=file_system_id) + + return None + + def deploy_conversion_on_ec2( specification_file_path: FilePath, transfer_commands: str, @@ -373,6 +403,8 @@ def deploy_conversion_on_ec2( transfer_method: Literal["rclone"] = "rclone", transfer_config_file_path: Optional[FilePath] = None, efs_volume_creation_options: Optional[dict] = None, + status_tracker_table_name: str = "neuroconv_batch_status_tracker", + cleanup_efs_volume: bool = True, ) -> None: """ Helper function for deploying a YAML-based NeuroConv data conversion in the cloud on AWS EC2 Batch. @@ -402,6 +434,12 @@ def deploy_conversion_on_ec2( The dictionary of keyword arguments to pass to `boto3.client.EFS.create_file_system` when the volmume does not already exist. These are ignored if the volume already exists. + status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" + The name of the DynamoDB table to use for tracking job status. + cleanup_efs_volume : bool, default: True + Whether or not to schedule the deletion of the associated `efs_volume_name` when the deployment is complete. + This is recommended to avoid unnecessary costs from leaving unused resources hanging indefinitely. + It is also recommended to manually ensure periodically that this cleanup was successful. """ import boto3 @@ -460,6 +498,7 @@ def deploy_conversion_on_ec2( dict(name="RCLONE_CONFIG", value=rclone_config_file_content), dict(name="RCLONE_COMMANDS", value=transfer_commands), ], + status_tracker_table_name=status_tracker_table_name, ) # Job 2: Run YAML specification on transferred data and upload to DANDI @@ -469,7 +508,7 @@ def deploy_conversion_on_ec2( submit_aws_batch_job( conversion_job_name=Path(specification_file_path).stem + "_conversion_" + unique_job_reference, job_dependencies=[{"jobId": transfer_job_submission_info["jobId"], "type": "SEQUENTIAL"}], - docker_container="ghcr.io/catalystneuro/neuroconv:yaml_variable", + docker_container="ghcr.io/catalystneuro/neuroconv_for_ec2_deployment:dev", efs_volume_name=efs_volume_name, environment_variables=[ dict(name="NEUROCONV_YAML", value=specification_file_content), @@ -477,6 +516,11 @@ def deploy_conversion_on_ec2( dict(name="NEUROCONV_OUTPUT_PATH", value=""), dict(name="DANDI_API_KEY", value=dandi_api_token), dict(name="DANDISET_ID", value=dandiset_id), + dict(name="AWS_ACCESS_KEY_ID", value=aws_access_key_id), + dict(name="AWS_SECRET_ACCESS_KEY", value=aws_secret_access_key), + dict(name="TRACKING_TABLE", value=status_tracker_table_name), + dict(name="SUBMISSION_ID", value=transfer_job_submission_info["table_submission_info"]["id"]), + dict(name="EFS_VOLUME", value=efs_volume_name), ], ) diff --git a/src/neuroconv/tools/yaml_conversion_specification/__init__.py b/src/neuroconv/tools/yaml_conversion_specification/__init__.py index 35e4cb3d8..ef43b5306 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/__init__.py +++ b/src/neuroconv/tools/yaml_conversion_specification/__init__.py @@ -1,6 +1,3 @@ -from .yaml_conversion_specification import ( - deploy_conversion_from_yaml_and_upload_to_dandi, - run_conversion_from_yaml, -) +from ._yaml_conversion_specification import run_conversion_from_yaml, run_ec2_conversion_from_yaml -__all__ = ["run_conversion_from_yaml", "deploy_conversion_from_yaml_and_upload_to_dandi"] +__all__ = ["run_conversion_from_yaml", "run_ec2_conversion_from_yaml"] diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index fbb9438cb..c0a958e4e 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -4,60 +4,23 @@ from pathlib import Path from typing import Optional -import click -from dandi.metadata import _get_pynwb_metadata -from dandi.organize import create_unique_filenames_from_metadata from jsonschema import RefResolver, validate -from ..data_transfers import automatic_dandi_upload +from ..data_transfers import ( + automatic_dandi_upload, + delete_efs_volume, + update_table_status, +) +from ..importing import get_package from ...nwbconverter import NWBConverter from ...utils import FilePathType, FolderPathType, dict_deep_update, load_dict_from_file -@click.command() -@click.argument("specification-file-path") -@click.option( - "--data-folder-path", - help="Path to folder where the source data may be found.", - type=click.Path(writable=False), -) -@click.option( - "--output-folder-path", - default=None, - help="Path to folder where you want to save the output NWBFile.", - type=click.Path(writable=True), -) -@click.option("--overwrite", help="Overwrite an existing NWBFile at the location.", is_flag=True) -@click.option( - "--upload-to-dandiset-id", - help=( - "Do you want to upload the result to DANDI? If so, specify the six-digit Dandiset ID. " - "Also ensure you have your DANDI_API_KEY set as an environment variable." - ), -) -def run_conversion_from_yaml_cli( - specification_file_path: str, - data_folder_path: Optional[str] = None, - output_folder_path: Optional[str] = None, - overwrite: bool = False, - upload_to_dandiset: Optional[str] = None, -): - """Run the tool function 'run_conversion_from_yaml' via the command line.""" - run_conversion_from_yaml( - specification_file_path=specification_file_path, - data_folder_path=data_folder_path, - output_folder_path=output_folder_path, - upload_to_dandiset=upload_to_dandiset, - overwrite=overwrite, - ) - - def run_conversion_from_yaml( specification_file_path: FilePathType, data_folder_path: Optional[FolderPathType] = None, output_folder_path: Optional[FolderPathType] = None, overwrite: bool = False, - upload_to_dandiset: Optional[str] = None, ): """ Run conversion to NWB given a yaml specification file. @@ -73,20 +36,11 @@ def run_conversion_from_yaml( Folder path leading to the desired output location of the .nwb files. The default is the parent directory of the specification_file_path. overwrite : bool, default: False - If True, replaces any existing NWBFile at the nwbfile_path location, if save_to_file is True. - If False, appends the existing NWBFile at the nwbfile_path location, if save_to_file is True. - upload_to_dandiset : str, optional - If you wish to upload the resulting NWB file to a particular Dandiset, specify the six-digit ID here. - When using this feature, the `DANDI_API_KEY` environment variable must be set. + If True, replaces the existing corresponding NWBFile at the `output_folder_path`. + If False, appends the existing corresponding NWBFile at the `output_folder_path`. """ - # This check is technically a part of the automatic dandi upload, but good to check as early as possible - # to avoid wasting time. - if upload_to_dandiset: - dandi_api_token = os.getenv("DANDI_API_KEY") - assert dandi_api_token is not None, ( - "Unable to find environment variable 'DANDI_API_KEY'. " - "Please retrieve your token from DANDI and set this environment variable." - ) + from dandi.organize import create_unique_filenames_from_metadata + from dandi.pynwb_utils import _get_pynwb_metadata if data_folder_path is None: data_folder_path = Path(specification_file_path).parent @@ -143,22 +97,101 @@ def run_conversion_from_yaml( ) # To properly mimic a true dandi organization, the full directory must be populated with NWBFiles. all_nwbfile_paths = [nwbfile_path for nwbfile_path in output_folder_path.iterdir() if nwbfile_path.suffix == ".nwb"] - if any(["temp_nwbfile_name_" in nwbfile_path.stem for nwbfile_path in all_nwbfile_paths]): - dandi_metadata_list = [] - for nwbfile_path in all_nwbfile_paths: - dandi_metadata = _get_pynwb_metadata(path=nwbfile_path) - dandi_metadata.update(path=nwbfile_path) + nwbfile_paths_to_set = [ + nwbfile_path for nwbfile_path in all_nwbfile_paths if "temp_nwbfile_name_" in nwbfile_path.stem + ] + if any(nwbfile_paths_to_set): + dandi_metadata_list = list() + for nwbfile_path_to_set in nwbfile_paths_to_set: + dandi_metadata = _get_pynwb_metadata(path=nwbfile_path_to_set) + dandi_metadata.update(path=nwbfile_path_to_set) dandi_metadata_list.append(dandi_metadata) - named_dandi_metadata_list = create_unique_filenames_from_metadata(metadata=dandi_metadata_list) - - for named_dandi_metadata in named_dandi_metadata_list: - if "temp_nwbfile_name_" in named_dandi_metadata["path"].stem: - dandi_filename = named_dandi_metadata["dandi_filename"].replace(" ", "_") - assert ( - dandi_filename != ".nwb" - ), f"Not enough metadata available to assign name to {str(named_dandi_metadata['path'])}!" - named_dandi_metadata["path"].rename(str(output_folder_path / dandi_filename)) - - if upload_to_dandiset: - staging = int(upload_to_dandiset) >= 200_000 - automatic_dandi_upload(dandiset_id=upload_to_dandiset, nwb_folder_path=output_folder_path, staging=staging) + dandi_metadata_with_set_paths = create_unique_filenames_from_metadata(metadata=dandi_metadata_list) + + for nwbfile_path_to_set, dandi_metadata_with_set_path in zip( + nwbfile_paths_to_set, dandi_metadata_with_set_paths + ): + dandi_filename = dandi_metadata_with_set_path["dandi_filename"] + + assert ( + dandi_filename != ".nwb" + ), f"Not enough metadata available to assign name to {str(nwbfile_path_to_set)}!" + + # Rename file on system + nwbfile_path_to_set.rename(str(output_folder_path / dandi_filename)) + + +def run_ec2_conversion_from_yaml( + specification_file_path: FilePathType, + upload_to_dandiset: str, + update_tracking_table: str, + tracking_table_submission_id: str, + efs_volume_name_to_cleanup: str, +): + """ + Run conversion to NWB given a yaml specification file. + + Parameters + ---------- + specification_file_path : FilePathType + File path leading to .yml specification file for NWB conversion. + upload_to_dandiset : str + If you wish to upload the resulting NWB file to a particular Dandiset, specify the six-digit ID here. + When using this feature, the `DANDI_API_KEY` environment variable must be set. + update_tracking_table : str + The name of the DynamoDB status tracking table to send a completion update to when the conversion is finished. + tracking_table_submission_id : str + The unique submission ID specifying the row (job) of the DynamoDB status tracking table to update the status of. + efs_volume_name_to_cleanup : str + The name of any associated EFS volume to cleanup upon successful conversion or upload. + This is only intended for use when running in EC2 Batch, but is necessary to include here in order to ensure + synchronicity. + """ + # Ensure boto3 is installed before beginning procedure + get_package(package_name="boto3") + + # This check is technically a part of the automatic dandi upload, but good to check as early as possible + # to avoid wasting time. + dandi_api_token = os.getenv("DANDI_API_KEY") + assert dandi_api_token is not None and dandi_api_token != "", ( + "Unable to read environment variable 'DANDI_API_KEY'. " + "Please retrieve your token from DANDI and set this environment variable." + ) + + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY") + assert aws_access_key_id is not None and aws_access_key_id != "", ( + "Unable to read environment variable 'AWS_ACCESS_KEY_ID'. " + "Please create and set AWS credentials if you wish to update a tracking table." + ) + assert aws_secret_access_key is not None and aws_secret_access_key != "", ( + "Unable to read environment variable 'AWS_SECRET_ACCESS_KEY'. " + "Please create and set AWS credentials if you wish to update a tracking table." + ) + + if update_tracking_table is not None and tracking_table_submission_id is None: + raise ValueError( + f"The table '{update_tracking_table}' was specified to be updated but no submission ID was specified! " + "Please specify the `tracking_table_submission_id` keyword argument." + ) + if update_tracking_table is None and tracking_table_submission_id is not None: + raise ValueError( + f"The submission ID '{tracking_table_submission_id}' was specified to be updated but no table name was " + "specified! Please specify the `update_tracking_table` keyword argument." + ) + + # Convert + run_conversion_from_yaml(specification_file_path=specification_file_path) + + # Upload + output_folder_path = Path(specification_file_path).parent + staging = int(upload_to_dandiset) >= 200_000 + automatic_dandi_upload(dandiset_id=upload_to_dandiset, nwb_folder_path=output_folder_path, staging=staging) + + # Update tracker + update_table_status( + status_tracker_table_name=update_tracking_table, submission_id=tracking_table_submission_id, status="Uploaded" + ) + + # Cleanup + delete_efs_volume(efs_volume_name=efs_volume_name_to_cleanup) diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification_cli.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification_cli.py new file mode 100644 index 000000000..498c2dcd0 --- /dev/null +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification_cli.py @@ -0,0 +1,111 @@ +from typing import Optional + +import click + +from ._yaml_conversion_specification import ( + run_conversion_from_yaml, + run_ec2_conversion_from_yaml, +) + + +@click.command() +@click.argument("specification-file-path") +@click.option( + "--data-folder-path", + help="Path to folder where the source data may be found.", + type=click.Path(writable=True), +) +@click.option( + "--output-folder-path", + default=None, + help="Path to folder where you want to save the output NWBFile.", + type=click.Path(writable=True), +) +@click.option("--overwrite", help="Overwrite an existing NWBFile at the location.", is_flag=True) +def run_conversion_from_yaml_cli( + specification_file_path: str, + data_folder_path: Optional[str] = None, + output_folder_path: Optional[str] = None, + overwrite: bool = False, +): + """ + Run the tool function 'run_conversion_from_yaml' via the command line. + + specification-file-path : + Path to the .yml specification file. + """ + run_conversion_from_yaml( + specification_file_path=specification_file_path, + data_folder_path=data_folder_path, + output_folder_path=output_folder_path, + overwrite=overwrite, + ) + + +@click.command() +@click.argument("specification-file-path") +@click.option( + "--data-folder-path", + help="Path to folder where the source data may be found.", + type=click.Path(writable=False), +) +@click.option( + "--output-folder-path", + default=None, + help="Path to folder where you want to save the output NWBFile.", + type=click.Path(writable=True), +) +@click.option("--overwrite", help="Overwrite an existing NWBFile at the location.", is_flag=True) +@click.option( + "--upload-to-dandiset-id", + help=( + "Do you want to upload the result to DANDI? If so, specify the six-digit Dandiset ID. " + "Also ensure you have your DANDI_API_KEY set as an environment variable." + ), + type=str, + required=False, +) +@click.option( + "--update-tracking-table", + help=( + "The name of the DynamoDB status tracking table to send a completion update to when the conversion is finished." + ), + type=str, + required=False, +) +@click.option( + "--tracking-table-submission-id", + help=( + "The unique submission ID specifying the row (job) of the DynamoDB status tracking table " + "to update the status of." + ), + type=str, + required=False, +) +@click.option( + "--efs-volume-name-to-cleanup", + help="The name of any associated EFS volume to cleanup upon successful conversion or upload.", + type=str, + required=False, +) +def run_ec2_conversion_from_yaml_cli( + specification_file_path: str, + data_folder_path: Optional[str] = None, + output_folder_path: Optional[str] = None, + overwrite: bool = False, + upload_to_dandiset: Optional[str] = None, + update_tracking_table: Optional[str] = None, + tracking_table_submission_id: Optional[str] = None, + efs_volume_name_to_cleanup: Optional[str] = None, +): + """Run the tool function 'run_conversion_from_yaml' via the command line.""" + run_ec2_conversion_from_yaml( + specification_file_path=specification_file_path, + data_folder_path=data_folder_path, + output_folder_path=output_folder_path, + overwrite=overwrite, + upload_to_dandiset=upload_to_dandiset, + update_tracking_table=update_tracking_table, + tracking_table_submission_id=tracking_table_submission_id, + efs_volume_name_to_cleanup=efs_volume_name_to_cleanup, + ) diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py new file mode 100644 index 000000000..b6cadcfa1 --- /dev/null +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -0,0 +1,153 @@ +import os +from datetime import datetime +from pathlib import Path +from unittest import TestCase + +from neuroconv.tools.data_transfers import ( + deploy_conversion_on_ec2, + estimate_s3_conversion_cost, + estimate_total_conversion_runtime, + submit_aws_batch_job, +) + +from .test_on_data.setup_paths import OUTPUT_PATH + +RCLONE_DRIVE_ACCESS_TOKEN = os.environ["RCLONE_DRIVE_ACCESS_TOKEN"] +RCLONE_DRIVE_REFRESH_TOKEN = os.environ["RCLONE_DRIVE_REFRESH_TOKEN"] +RCLONE_EXPIRY_TOKEN = os.environ["RCLONE_EXPIRY_TOKEN"] + + +def test_estimate_s3_conversion_cost_standard(): + test_sizes = [ + 1, + 100, + 1e3, # 1 GB + 1e5, # 100 GB + 1e6, # 1 TB + 1e7, # 10 TB + 1e8, # 100 TB + ] + results = [estimate_s3_conversion_cost(total_mb=total_mb) for total_mb in test_sizes] + assert results == [ + 2.9730398740210563e-15, # 1 MB + 2.973039874021056e-11, # 100 MB + 2.9730398740210564e-09, # 1 GB + 2.9730398740210563e-05, # 100 GB + 0.002973039874021056, # 1 TB + 0.2973039874021056, # 10 TB + 29.73039874021056, # 100 TB + ] + + +def test_estimate_total_conversion_runtime(): + test_sizes = [ + 1, + 100, + 1e3, # 1 GB + 1e5, # 100 GB + 1e6, # 1 TB + 1e7, # 10 TB + 1e8, # 100 TB + ] + results = [estimate_total_conversion_runtime(total_mb=total_mb) for total_mb in test_sizes] + assert results == [ + 0.12352941176470589, + 12.352941176470589, + 123.52941176470588, + 12352.94117647059, + 123529.41176470589, + 1235294.1176470588, + 12352941.176470589, + ] + + +def test_submit_aws_batch_job(): + job_name = "test_submit_aws_batch_job" + docker_image = "ubuntu:latest" + command = "echo 'Testing NeuroConv AWS Batch submission." + + submit_aws_batch_job( + job_name=job_name, + docker_image=docker_image, + command=command, + ) + + +def test_submit_aws_batch_job_with_dependencies(): + job_name_1 = "test_submit_aws_batch_job_with_dependencies_1" + docker_image = "ubuntu:latest" + command_1 = "echo 'Testing NeuroConv AWS Batch submission." + + info = submit_aws_batch_job( + job_name=job_name_1, + docker_image=docker_image, + command=command_1, + ) + job_submission_info = info["job_submission_info"] + + job_name_2 = "test_submit_aws_batch_job_with_dependencies_1" + command_2 = "echo 'Testing NeuroConv AWS Batch submission with dependencies." + job_dependencies = [{"jobId": job_submission_info["jobId"], "type": "SEQUENTIAL"}] + submit_aws_batch_job( + job_name=job_name_2, + docker_image=docker_image, + command=command_2, + job_dependencies=job_dependencies, + ) + + +class TestRcloneWithConfig(TestCase): + """ + In order to run this test in CI successfully, whoever sets the Rclone credentials must use the following setup. + + 1) On your Google Drive, create a folder named 'test_neuroconv_ec2_batch_deployment' + 2) Create a subfolder there named 'test_rclone_source_data' + 3) Copy the 'spikelgx/Noise4Sam' and 'phy/phy_example_0' folders from the 'ephy_testing_data' into that subfolder + 4) Locally, run `rclone config`, then copy the relevant token values into GitHub Action secrets + """ + + test_folder = OUTPUT_PATH / "rclone_tests" + + # Save the .conf file in a separate folder to avoid the potential of the container using the locally mounted file + adjacent_folder = OUTPUT_PATH / "rclone_conf" + test_config_file = adjacent_folder / "rclone.conf" + + def setUp(self): + self.test_folder.mkdir(exist_ok=True) + self.adjacent_folder.mkdir(exist_ok=True) + + # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command) + token_dictionary = dict( + access_token=RCLONE_DRIVE_ACCESS_TOKEN, + token_type="Bearer", + refresh_token=RCLONE_DRIVE_REFRESH_TOKEN, + expiry=RCLONE_EXPIRY_TOKEN, + ) + token_string = str(token_dictionary).replace("'", '"').replace(" ", "") + rclone_config_contents = [ + "[test_google_drive_remote]\n", + "type = drive\n", + "scope = drive\n", + f"token = {token_string}\n", + "team_drive = \n", + "\n", + ] + with open(file=self.test_config_file, mode="w") as io: + io.writelines(rclone_config_contents) + + def test_deploy_conversion_on_ec2(self): + path_to_test_yml_files = Path(__file__).parent.parent / "test_on_data" / "conversion_specifications" + yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification.yml" + + date_tag = datetime.now().strftime("%y%m%d") + efs_volume_name = f"neuroconv_ci_tests_{date_tag}" + + deploy_conversion_on_ec2( + specification_file_path=yaml_file_path, + transfer_commands="rclone copy ", + transfer_config_file_path=self.test_config_file, + efs_volume_name=efs_volume_name, + dandiset_id="200560", + ) + + # assert that EFS volume was cleaned up after some extendedd wait time diff --git a/tests/test_minimal/test_tools/s3_tools.py b/tests/test_minimal/test_tools/s3_tools.py deleted file mode 100644 index fc46e7d84..000000000 --- a/tests/test_minimal/test_tools/s3_tools.py +++ /dev/null @@ -1,84 +0,0 @@ -from neuroconv.tools.data_transfers import ( - estimate_s3_conversion_cost, - estimate_total_conversion_runtime, - submit_aws_batch_job, -) - - -def test_estimate_s3_conversion_cost_standard(): - test_sizes = [ - 1, - 100, - 1e3, # 1 GB - 1e5, # 100 GB - 1e6, # 1 TB - 1e7, # 10 TB - 1e8, # 100 TB - ] - results = [estimate_s3_conversion_cost(total_mb=total_mb) for total_mb in test_sizes] - assert results == [ - 2.9730398740210563e-15, # 1 MB - 2.973039874021056e-11, # 100 MB - 2.9730398740210564e-09, # 1 GB - 2.9730398740210563e-05, # 100 GB - 0.002973039874021056, # 1 TB - 0.2973039874021056, # 10 TB - 29.73039874021056, # 100 TB - ] - - -def test_estimate_total_conversion_runtime(): - test_sizes = [ - 1, - 100, - 1e3, # 1 GB - 1e5, # 100 GB - 1e6, # 1 TB - 1e7, # 10 TB - 1e8, # 100 TB - ] - results = [estimate_total_conversion_runtime(total_mb=total_mb) for total_mb in test_sizes] - assert results == [ - 0.12352941176470589, - 12.352941176470589, - 123.52941176470588, - 12352.94117647059, - 123529.41176470589, - 1235294.1176470588, - 12352941.176470589, - ] - - -def test_submit_aws_batch_job(): - job_name = "test_submit_aws_batch_job" - docker_image = "ubuntu:latest" - command = "echo 'Testing NeuroConv AWS Batch submission." - - submit_aws_batch_job( - job_name=job_name, - docker_image=docker_image, - command=command, - ) - - -def test_submit_aws_batch_job_with_dependencies(): - job_name_1 = "test_submit_aws_batch_job_with_dependencies_1" - docker_image = "ubuntu:latest" - command_1 = "echo 'Testing NeuroConv AWS Batch submission." - - info = submit_aws_batch_job( - job_name=job_name_1, - docker_image=docker_image, - command=command_1, - ) - job_submission_info = info["job_submission_info"] - - job_name_2 = "test_submit_aws_batch_job_with_dependencies_1" - command_2 = "echo 'Testing NeuroConv AWS Batch submission with dependencies." - job_dependencies = [{"jobId": job_submission_info["jobId"], "type": "SEQUENTIAL"}] - submit_aws_batch_job( - job_name=job_name_2, - docker_image=docker_image, - command=command_2, - job_dependencies=job_dependencies, - ) From 4022b60ce4eaf5a504188c371f3380bc7c1b733c Mon Sep 17 00:00:00 2001 From: codycbakerphd Date: Thu, 25 Jul 2024 16:54:52 -0400 Subject: [PATCH 53/54] fix tests; make deletion safe --- src/neuroconv/tools/data_transfers/_aws.py | 16 +++++++++++---- .../_yaml_conversion_specification_cli.py | 20 +------------------ tests/imports.py | 4 ++-- 3 files changed, 15 insertions(+), 25 deletions(-) diff --git a/src/neuroconv/tools/data_transfers/_aws.py b/src/neuroconv/tools/data_transfers/_aws.py index 4a4ce07fe..3339b6dcd 100644 --- a/src/neuroconv/tools/data_transfers/_aws.py +++ b/src/neuroconv/tools/data_transfers/_aws.py @@ -6,6 +6,7 @@ from pathlib import Path from typing import Dict, List, Literal, Optional from uuid import uuid4 +from warnings import warn from pydantic import FilePath @@ -361,7 +362,7 @@ def update_table_status( table.update_item(Key={"id": submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": status}}) - return + return None def delete_efs_volume(efs_volume_name: str) -> None: @@ -387,7 +388,16 @@ def delete_efs_volume(efs_volume_name: str) -> None: all_efs_volumes = efs_client.describe_file_systems() all_efs_volumes_by_name = {efs_volume["Name"]: efs_volume for efs_volume in all_efs_volumes["FileSystems"]} - efs_volume = all_efs_volumes_by_name[efs_volume_name] + efs_volume = all_efs_volumes_by_name.get(efs_volume_name, None) + if efs_volume is None: + warn( + message=( + f"The specified EFS volume '{efs_volume_name}' was not found, and so there is nothing to delete. " + "Please manually check your current EFS volumes to ensure none are hanging." + ), + stacklevel=2, + ) + file_system_id = efs_volume["FileSystemId"] efs_client.delete_file_system(FileSystemId=file_system_id) @@ -512,8 +522,6 @@ def deploy_conversion_on_ec2( efs_volume_name=efs_volume_name, environment_variables=[ dict(name="NEUROCONV_YAML", value=specification_file_content), - dict(name="NEUROCONV_DATA_PATH", value=""), - dict(name="NEUROCONV_OUTPUT_PATH", value=""), dict(name="DANDI_API_KEY", value=dandi_api_token), dict(name="DANDISET_ID", value=dandiset_id), dict(name="AWS_ACCESS_KEY_ID", value=aws_access_key_id), diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification_cli.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification_cli.py index 498c2dcd0..bd1d50f63 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification_cli.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification_cli.py @@ -44,18 +44,6 @@ def run_conversion_from_yaml_cli( @click.command() @click.argument("specification-file-path") -@click.option( - "--data-folder-path", - help="Path to folder where the source data may be found.", - type=click.Path(writable=False), -) -@click.option( - "--output-folder-path", - default=None, - help="Path to folder where you want to save the output NWBFile.", - type=click.Path(writable=True), -) -@click.option("--overwrite", help="Overwrite an existing NWBFile at the location.", is_flag=True) @click.option( "--upload-to-dandiset-id", help=( @@ -90,20 +78,14 @@ def run_conversion_from_yaml_cli( ) def run_ec2_conversion_from_yaml_cli( specification_file_path: str, - data_folder_path: Optional[str] = None, - output_folder_path: Optional[str] = None, - overwrite: bool = False, upload_to_dandiset: Optional[str] = None, update_tracking_table: Optional[str] = None, tracking_table_submission_id: Optional[str] = None, efs_volume_name_to_cleanup: Optional[str] = None, ): - """Run the tool function 'run_conversion_from_yaml' via the command line.""" + """Run the tool function `run_ec2_conversion_from_yaml` via the command line.""" run_ec2_conversion_from_yaml( specification_file_path=specification_file_path, - data_folder_path=data_folder_path, - output_folder_path=output_folder_path, - overwrite=overwrite, upload_to_dandiset=upload_to_dandiset, update_tracking_table=update_tracking_table, tracking_table_submission_id=tracking_table_submission_id, diff --git a/tests/imports.py b/tests/imports.py index 5f8b65e72..4ad279153 100644 --- a/tests/imports.py +++ b/tests/imports.py @@ -54,9 +54,9 @@ def test_tools(self): current_structure = _strip_magic_module_attributes(ls=tools.__dict__) expected_structure = [ - # Sub-Packages - "yaml_conversion_specification", # Attached to namespace by top __init__ call of NWBConverter # Sub-modules + "yaml_conversion_specification", # Attached to namespace by top __init__ call of NWBConverter + "data_transfers", # Attached indirectly by 'yaml_conversion_specification' "importing", # Attached to namespace by importing get_package "hdmf", "nwb_helpers", # Attached to namespace by top __init__ call of NWBConverter From 4491a7f20ab3ec061ff7e0d07a08ac98be91731b Mon Sep 17 00:00:00 2001 From: codycbakerphd Date: Thu, 25 Jul 2024 17:02:34 -0400 Subject: [PATCH 54/54] debugs --- tests/test_minimal/test_tools/aws_tools.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools.py index b6cadcfa1..8d88f1df2 100644 --- a/tests/test_minimal/test_tools/aws_tools.py +++ b/tests/test_minimal/test_tools/aws_tools.py @@ -96,7 +96,7 @@ def test_submit_aws_batch_job_with_dependencies(): ) -class TestRcloneWithConfig(TestCase): +class TestDeployConversionOnEC2(TestCase): """ In order to run this test in CI successfully, whoever sets the Rclone credentials must use the following setup. @@ -106,7 +106,7 @@ class TestRcloneWithConfig(TestCase): 4) Locally, run `rclone config`, then copy the relevant token values into GitHub Action secrets """ - test_folder = OUTPUT_PATH / "rclone_tests" + test_folder = OUTPUT_PATH / "deploy_conversion_on_ec2_tests" # Save the .conf file in a separate folder to avoid the potential of the container using the locally mounted file adjacent_folder = OUTPUT_PATH / "rclone_conf" @@ -139,15 +139,20 @@ def test_deploy_conversion_on_ec2(self): path_to_test_yml_files = Path(__file__).parent.parent / "test_on_data" / "conversion_specifications" yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification.yml" + transfer_commands = ( + "rclone copy test_google_drive_remote:test_neuroconv_ec2_batch_deployment {self.test_folder} " + "--verbose --progress --config ./rclone.conf" + ) + date_tag = datetime.now().strftime("%y%m%d") efs_volume_name = f"neuroconv_ci_tests_{date_tag}" deploy_conversion_on_ec2( specification_file_path=yaml_file_path, - transfer_commands="rclone copy ", + transfer_commands=transfer_commands, transfer_config_file_path=self.test_config_file, efs_volume_name=efs_volume_name, dandiset_id="200560", ) - # assert that EFS volume was cleaned up after some extendedd wait time + # assert that EFS volume was cleaned up after some extended wait time