Skip to content

Commit

Permalink
Clean up a bit of the Azure stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
zsusswein committed Dec 11, 2024
1 parent 9ecce48 commit 4157651
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 17 deletions.
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ build:
tag:
docker tag $(IMAGE_NAME):$(TAG) $(REGISTRY)$(IMAGE_NAME):$(TAG)

config:
gh workflow run \
-R cdcgov/cfa-config-generator run-workload.yaml \
-f disease=all \
-f state=all

run-batch:
docker build -f Dockerfile-batch -t batch . --no-cache
docker run --rm \
Expand All @@ -37,7 +43,7 @@ run:
docker run --mount type=bind,source=$(PWD),target=/cfa-epinow2-pipeline -it \
--env-file .env \
--rm $(REGISTRY)$(IMAGE_NAME):test-$(TAG) \
Rscript -e "CFAEpiNow2Pipeline::orchestrate_pipeline('$(CONFIG)', config_container = 'zs-test-pipeline-update', input_dir = '/cfa-epinow2-pipeline/input', output_dir = '/cfa-epinow2-pipeline', output_container = 'zs-test-pipeline-update')"
Rscript -e "CFAEpiNow2Pipeline::orchestrate_pipeline('$(CONFIG)', config_container = 'rt-epinow2-config', input_dir = '/cfa-epinow2-pipeline/input', output_dir = '/cfa-epinow2-pipeline', output_container = 'zs-test-pipeline-update')"


up:
Expand Down
56 changes: 40 additions & 16 deletions azure/job.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
import datetime
import sys
import os
import uuid

from azure.identity import DefaultAzureCredential
from msrest.authentication import BasicTokenAuthentication
from azure.storage.blob import BlobServiceClient
from azure.batch import BatchServiceClient
import azure.batch.models as batchmodels

blob_account = os.environ["BLOB_ACCOUNT"]
blob_url = f"https://{blob_account}.blob.core.windows.net"
batch_account = os.environ["BATCH_ACCOUNT"]
batch_url = f"https://{batch_account}.eastus.batch.azure.com"
config_container = sys.argv[1]
pool_id = sys.argv[2]
# Re-use Azure Pool name unless otherwise specified
job_id = sys.argv[3] if len(sys.argv) > 3 else pool_id

if __name__ == "__main__":
# Authenticate with workaround because Batch is the one remaining
# service that doesn't yet support Azure auth flow v2 :) :)
Expand All @@ -16,13 +28,9 @@

batch_client = BatchServiceClient(
credentials=credential_v1,
batch_url=os.environ["az_batch_url"]
batch_url=batch_url
)

# Add job to pool
pool_id = sys.argv[1]
job_id = sys.argv[2]

#############
# Set up job
job = batchmodels.JobAddParameter(
Expand All @@ -38,9 +46,24 @@
else:
print("Job already exists. Using job object")

###########
##########
# Get tasks
blob_service_client = BlobServiceClient(blob_url, credential_v2)
container_client = blob_service_client.get_container_client(container=config_container)
two_mins_ago = datetime.datetime.now(datetime.UTC) - datetime.timedelta(minutes=2)
task_configs = []
blobs = container_client.list_blobs()

for blob in blobs:
if blob.creation_time > two_mins_ago:
task_configs.append(blob.name)
if len(task_configs) == 0:
raise ValueError("No tasks found")
else:
print(f"Creating {len(task_configs)} tasks in job {job_id} on pool {pool_id}")

###########
# Set up task on job
task_id = 'sampletask'
registry = os.environ["AZURE_CONTAINER_REGISTRY"]
task_container_settings = batchmodels.TaskContainerSettings(
image_name=registry + '/cfa-epinow2-pipeline:test-edit-azure-flow',
Expand All @@ -51,7 +74,6 @@
batchmodels.EnvironmentSetting(name="az_client_id", value=os.environ["AZURE_CLIENT_ID"]),
batchmodels.EnvironmentSetting(name="az_service_principal", value=os.environ["AZURE_CLIENT_SECRET"])
]
command = "Rscript -e \"CFAEpiNow2Pipeline::orchestrate_pipeline('test-batch.json', config_container = 'zs-test-pipeline-update', input_dir = '/cfa-epinow2-pipeline/input', output_dir = '/cfa-epinow2-pipeline', output_container = 'zs-test-pipeline-update')\""

# Run task at the admin level to be able to read/write to mounted drives
user_identity=batchmodels.UserIdentity(
Expand All @@ -61,12 +83,14 @@
)
)

task = batchmodels.TaskAddParameter(
id=task_id,
command_line=command,
container_settings=task_container_settings,
environment_settings=task_env_settings,
user_identity=user_identity
)
for config_path in task_configs:
command = f"Rscript -e \"CFAEpiNow2Pipeline::orchestrate_pipeline('{config_path}', config_container = '{config_container}', input_dir = '/cfa-epinow2-pipeline/input', output_dir = '/cfa-epinow2-pipeline/output', output_container = 'zs-test-pipeline-update')\""
task = batchmodels.TaskAddParameter(
id=str(uuid.uuid4()),
command_line=command,
container_settings=task_container_settings,
environment_settings=task_env_settings,
user_identity=user_identity
)

batch_client.task.add(job_id, task)
batch_client.task.add(job_id, task)
2 changes: 2 additions & 0 deletions azure/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ azure-batch==14.2.0
azure-common==1.1.28
azure-core==1.32.0
azure-identity==1.19.0
azure-storage-blob==12.24.0
certifi==2024.8.30
cffi==1.17.1
charset-normalizer==3.4.0
Expand All @@ -23,3 +24,4 @@ requests-oauthlib==2.0.0
six==1.17.0
typing_extensions==4.12.2
urllib3==2.2.3
uuid==1.30

0 comments on commit 4157651

Please sign in to comment.