From 41576519a0757bee12b193a7aae3f0b48c7ed9cf Mon Sep 17 00:00:00 2001 From: Zachary Susswein Date: Wed, 11 Dec 2024 17:17:27 +0000 Subject: [PATCH] Clean up a bit of the Azure stuff --- Makefile | 8 +++++- azure/job.py | 56 ++++++++++++++++++++++++++++++------------ azure/requirements.txt | 2 ++ 3 files changed, 49 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 7d82778..9879004 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,12 @@ build: tag: docker tag $(IMAGE_NAME):$(TAG) $(REGISTRY)$(IMAGE_NAME):$(TAG) +config: + gh workflow run \ + -R cdcgov/cfa-config-generator run-workload.yaml \ + -f disease=all \ + -f state=all + run-batch: docker build -f Dockerfile-batch -t batch . --no-cache docker run --rm \ @@ -37,7 +43,7 @@ run: docker run --mount type=bind,source=$(PWD),target=/cfa-epinow2-pipeline -it \ --env-file .env \ --rm $(REGISTRY)$(IMAGE_NAME):test-$(TAG) \ - Rscript -e "CFAEpiNow2Pipeline::orchestrate_pipeline('$(CONFIG)', config_container = 'zs-test-pipeline-update', input_dir = '/cfa-epinow2-pipeline/input', output_dir = '/cfa-epinow2-pipeline', output_container = 'zs-test-pipeline-update')" + Rscript -e "CFAEpiNow2Pipeline::orchestrate_pipeline('$(CONFIG)', config_container = 'rt-epinow2-config', input_dir = '/cfa-epinow2-pipeline/input', output_dir = '/cfa-epinow2-pipeline', output_container = 'zs-test-pipeline-update')" up: diff --git a/azure/job.py b/azure/job.py index 2259038..2c41b98 100644 --- a/azure/job.py +++ b/azure/job.py @@ -1,11 +1,23 @@ +import datetime import sys import os +import uuid from azure.identity import DefaultAzureCredential from msrest.authentication import BasicTokenAuthentication +from azure.storage.blob import BlobServiceClient from azure.batch import BatchServiceClient import azure.batch.models as batchmodels +blob_account = os.environ["BLOB_ACCOUNT"] +blob_url = f"https://{blob_account}.blob.core.windows.net" +batch_account = os.environ["BATCH_ACCOUNT"] +batch_url = f"https://{batch_account}.eastus.batch.azure.com" +config_container = sys.argv[1] +pool_id = sys.argv[2] +# Re-use Azure Pool name unless otherwise specified +job_id = sys.argv[3] if len(sys.argv) > 3 else pool_id + if __name__ == "__main__": # Authenticate with workaround because Batch is the one remaining # service that doesn't yet support Azure auth flow v2 :) :) @@ -16,13 +28,9 @@ batch_client = BatchServiceClient( credentials=credential_v1, - batch_url=os.environ["az_batch_url"] + batch_url=batch_url ) - # Add job to pool - pool_id = sys.argv[1] - job_id = sys.argv[2] - ############# # Set up job job = batchmodels.JobAddParameter( @@ -38,9 +46,24 @@ else: print("Job already exists. Using job object") - ########### + ########## + # Get tasks + blob_service_client = BlobServiceClient(blob_url, credential_v2) + container_client = blob_service_client.get_container_client(container=config_container) + two_mins_ago = datetime.datetime.now(datetime.UTC) - datetime.timedelta(minutes=2) + task_configs = [] + blobs = container_client.list_blobs() + + for blob in blobs: + if blob.creation_time > two_mins_ago: + task_configs.append(blob.name) + if len(task_configs) == 0: + raise ValueError("No tasks found") + else: + print(f"Creating {len(task_configs)} tasks in job {job_id} on pool {pool_id}") + + ########### # Set up task on job - task_id = 'sampletask' registry = os.environ["AZURE_CONTAINER_REGISTRY"] task_container_settings = batchmodels.TaskContainerSettings( image_name=registry + '/cfa-epinow2-pipeline:test-edit-azure-flow', @@ -51,7 +74,6 @@ batchmodels.EnvironmentSetting(name="az_client_id", value=os.environ["AZURE_CLIENT_ID"]), batchmodels.EnvironmentSetting(name="az_service_principal", value=os.environ["AZURE_CLIENT_SECRET"]) ] - command = "Rscript -e \"CFAEpiNow2Pipeline::orchestrate_pipeline('test-batch.json', config_container = 'zs-test-pipeline-update', input_dir = '/cfa-epinow2-pipeline/input', output_dir = '/cfa-epinow2-pipeline', output_container = 'zs-test-pipeline-update')\"" # Run task at the admin level to be able to read/write to mounted drives user_identity=batchmodels.UserIdentity( @@ -61,12 +83,14 @@ ) ) - task = batchmodels.TaskAddParameter( - id=task_id, - command_line=command, - container_settings=task_container_settings, - environment_settings=task_env_settings, - user_identity=user_identity - ) + for config_path in task_configs: + command = f"Rscript -e \"CFAEpiNow2Pipeline::orchestrate_pipeline('{config_path}', config_container = '{config_container}', input_dir = '/cfa-epinow2-pipeline/input', output_dir = '/cfa-epinow2-pipeline/output', output_container = 'zs-test-pipeline-update')\"" + task = batchmodels.TaskAddParameter( + id=str(uuid.uuid4()), + command_line=command, + container_settings=task_container_settings, + environment_settings=task_env_settings, + user_identity=user_identity + ) - batch_client.task.add(job_id, task) + batch_client.task.add(job_id, task) diff --git a/azure/requirements.txt b/azure/requirements.txt index eddc9f9..b0970be 100644 --- a/azure/requirements.txt +++ b/azure/requirements.txt @@ -3,6 +3,7 @@ azure-batch==14.2.0 azure-common==1.1.28 azure-core==1.32.0 azure-identity==1.19.0 +azure-storage-blob==12.24.0 certifi==2024.8.30 cffi==1.17.1 charset-normalizer==3.4.0 @@ -23,3 +24,4 @@ requests-oauthlib==2.0.0 six==1.17.0 typing_extensions==4.12.2 urllib3==2.2.3 +uuid==1.30