From 21760c18b9e6fb3f1e72411e72e3ad9498dab070 Mon Sep 17 00:00:00 2001 From: Helen Lin <46795546+helen-m-lin@users.noreply.github.com> Date: Fri, 9 Feb 2024 09:15:03 -0800 Subject: [PATCH] Feat 50: Dynamically generate job template (#73) * generate, save, and download job template xlsx * update UI link, add error handling * unit tests for download job template * update /api/job_upload_template to use StreamingResponse * create job_template_configs.py * create JobUploadTemplate class to handle template creation * remove old upload_template_link * add test suite for JobUploadTemplate and update unit tests for download endpoint * code cleanup * cleanup openpyxl * address PR comments --- .../configs/job_upload_template.py | 116 ++++++++++++++++++ src/aind_data_transfer_service/server.py | 45 ++++++- .../templates/index.html | 2 +- .../templates/job_status.html | 2 +- tests/resources/job_upload_template.xlsx | Bin 0 -> 5564 bytes tests/test_job_upload_template.py | 58 +++++++++ tests/test_server.py | 40 ++++++ 7 files changed, 255 insertions(+), 8 deletions(-) create mode 100644 src/aind_data_transfer_service/configs/job_upload_template.py create mode 100644 tests/resources/job_upload_template.xlsx create mode 100644 tests/test_job_upload_template.py diff --git a/src/aind_data_transfer_service/configs/job_upload_template.py b/src/aind_data_transfer_service/configs/job_upload_template.py new file mode 100644 index 0000000..fc53915 --- /dev/null +++ b/src/aind_data_transfer_service/configs/job_upload_template.py @@ -0,0 +1,116 @@ +"""Module to configure and create xlsx job upload template""" +import datetime +from io import BytesIO + +from aind_data_schema.core.data_description import Modality, Platform +from openpyxl import Workbook +from openpyxl.styles import Font +from openpyxl.utils import get_column_letter +from openpyxl.worksheet.datavalidation import DataValidation + + +# TODO: convert to pydantic model +class JobUploadTemplate: + """Class to configure and create xlsx job upload template""" + + FILE_NAME = "job_upload_template.xlsx" + HEADERS = [ + "platform", + "acq_datetime", + "subject_id", + "s3_bucket", + "modality0", + "modality0.source", + "modality1", + "modality1.source", + ] + SAMPLE_JOBS = [ + [ + Platform.BEHAVIOR.abbreviation, + datetime.datetime(2023, 10, 4, 4, 0, 0), + "123456", + "aind-behavior-data", + Modality.BEHAVIOR_VIDEOS.abbreviation, + "/allen/aind/stage/fake/dir", + Modality.BEHAVIOR.abbreviation, + "/allen/aind/stage/fake/dir", + ], + [ + Platform.SMARTSPIM.abbreviation, + datetime.datetime(2023, 3, 4, 16, 30, 0), + "654321", + "aind-open-data", + Modality.SPIM.abbreviation, + "/allen/aind/stage/fake/dir", + ], + [ + Platform.ECEPHYS.abbreviation, + datetime.datetime(2023, 1, 30, 19, 1, 0), + "654321", + "aind-ephys-data", + Modality.ECEPHYS.abbreviation, + "/allen/aind/stage/fake/dir", + Modality.BEHAVIOR_VIDEOS.abbreviation, + "/allen/aind/stage/fake/dir", + ], + ] + VALIDATORS = [ + { + "name": "platform", + "options": [p().abbreviation for p in Platform._ALL], + "ranges": ["A2:A20"], + }, + { + "name": "modality", + "options": [m().abbreviation for m in Modality._ALL], + "ranges": ["E2:E20", "G2:G20"], + }, + { + "name": "s3_bucket", + "options": [ + "aind-ephys-data", + "aind-ophys-data", + "aind-behavior-data", + "aind-private-data", + ], + "ranges": ["D2:D20"], + }, + ] + + @staticmethod + def create_job_template(): + """Create job template as xlsx filestream""" + # job template + xl_io = BytesIO() + workbook = Workbook() + worksheet = workbook.active + worksheet.append(JobUploadTemplate.HEADERS) + for job in JobUploadTemplate.SAMPLE_JOBS: + worksheet.append(job) + # data validators + for validator in JobUploadTemplate.VALIDATORS: + dv = DataValidation( + type="list", + formula1=f'"{(",").join(validator["options"])}"', + allow_blank=True, + showErrorMessage=True, + showInputMessage=True, + ) + dv.promptTitle = validator["name"] + dv.prompt = f'Select a {validator["name"]} from the dropdown' + dv.error = f'Invalid {validator["name"]}.' + for r in validator["ranges"]: + dv.add(r) + worksheet.add_data_validation(dv) + # formatting + bold = Font(bold=True) + for header in worksheet["A1:H1"]: + for cell in header: + cell.font = bold + worksheet.column_dimensions[ + get_column_letter(cell.column) + ].auto_size = True + # save file + workbook.save(xl_io) + workbook.close() + return xl_io diff --git a/src/aind_data_transfer_service/server.py b/src/aind_data_transfer_service/server.py index 93b57a5..4489cff 100644 --- a/src/aind_data_transfer_service/server.py +++ b/src/aind_data_transfer_service/server.py @@ -7,10 +7,10 @@ from asyncio import sleep from pathlib import PurePosixPath -import openpyxl from fastapi import Request -from fastapi.responses import JSONResponse +from fastapi.responses import JSONResponse, StreamingResponse from fastapi.templating import Jinja2Templates +from openpyxl import load_workbook from pydantic import SecretStr from starlette.applications import Starlette from starlette.routing import Route @@ -19,6 +19,9 @@ BasicUploadJobConfigs, HpcJobConfigs, ) +from aind_data_transfer_service.configs.job_upload_template import ( + JobUploadTemplate, +) from aind_data_transfer_service.hpc.client import HpcClient, HpcClientConfigs from aind_data_transfer_service.hpc.models import ( HpcJobStatusResponse, @@ -32,7 +35,6 @@ templates = Jinja2Templates(directory=template_directory) # TODO: Add server configs model -# UPLOAD_TEMPLATE_LINK # HPC_SIF_LOCATION # HPC_USERNAME # HPC_LOGGING_DIRECTORY @@ -63,11 +65,13 @@ async def validate_csv(request: Request): # byte chars. Adding "utf-8-sig" should remove them. data = content.decode("utf-8-sig") else: - xlsx_sheet = openpyxl.load_workbook(io.BytesIO(content)).active + xlsx_book = load_workbook(io.BytesIO(content), read_only=True) + xlsx_sheet = xlsx_book.active csv_io = io.StringIO() csv_writer = csv.writer(csv_io) for r in xlsx_sheet.rows: csv_writer.writerow([cell.value for cell in r]) + xlsx_book.close() data = csv_io.getvalue() csv_reader = csv.DictReader(io.StringIO(data)) for row in csv_reader: @@ -274,7 +278,6 @@ async def index(request: Request): context=( { "request": request, - "upload_template_link": os.getenv("UPLOAD_TEMPLATE_LINK"), } ), ) @@ -312,12 +315,37 @@ async def jobs(request: Request): "request": request, "job_status_list": job_status_list, "num_of_jobs": len(job_status_list), - "upload_template_link": os.getenv("UPLOAD_TEMPLATE_LINK"), } ), ) +def download_job_template(request: Request): + """Get job template as xlsx filestream for download""" + try: + xl_io = JobUploadTemplate.create_job_template() + except Exception as e: + return JSONResponse( + content={ + "message": "Error creating job template", + "data": {"error": f"{e.__class__.__name__}{e.args}"}, + }, + status_code=500, + ) + return StreamingResponse( + io.BytesIO(xl_io.getvalue()), + media_type=( + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ), + headers={ + "Content-Disposition": ( + f"attachment; filename={JobUploadTemplate.FILE_NAME}" + ) + }, + status_code=200, + ) + + routes = [ Route("/", endpoint=index, methods=["GET", "POST"]), Route("/api/validate_csv", endpoint=validate_csv, methods=["POST"]), @@ -326,6 +354,11 @@ async def jobs(request: Request): ), Route("/api/submit_hpc_jobs", endpoint=submit_hpc_jobs, methods=["POST"]), Route("/jobs", endpoint=jobs, methods=["GET"]), + Route( + "/api/job_upload_template", + endpoint=download_job_template, + methods=["GET"], + ), ] app = Starlette(routes=routes) diff --git a/src/aind_data_transfer_service/templates/index.html b/src/aind_data_transfer_service/templates/index.html index 99b90ef..fabc9e9 100644 --- a/src/aind_data_transfer_service/templates/index.html +++ b/src/aind_data_transfer_service/templates/index.html @@ -52,7 +52,7 @@