From 21760c18b9e6fb3f1e72411e72e3ad9498dab070 Mon Sep 17 00:00:00 2001 From: Helen Lin <46795546+helen-m-lin@users.noreply.github.com> Date: Fri, 9 Feb 2024 09:15:03 -0800 Subject: [PATCH] Feat 50: Dynamically generate job template (#73) * generate, save, and download job template xlsx * update UI link, add error handling * unit tests for download job template * update /api/job_upload_template to use StreamingResponse * create job_template_configs.py * create JobUploadTemplate class to handle template creation * remove old upload_template_link * add test suite for JobUploadTemplate and update unit tests for download endpoint * code cleanup * cleanup openpyxl * address PR comments --- .../configs/job_upload_template.py | 116 ++++++++++++++++++ src/aind_data_transfer_service/server.py | 45 ++++++- .../templates/index.html | 2 +- .../templates/job_status.html | 2 +- tests/resources/job_upload_template.xlsx | Bin 0 -> 5564 bytes tests/test_job_upload_template.py | 58 +++++++++ tests/test_server.py | 40 ++++++ 7 files changed, 255 insertions(+), 8 deletions(-) create mode 100644 src/aind_data_transfer_service/configs/job_upload_template.py create mode 100644 tests/resources/job_upload_template.xlsx create mode 100644 tests/test_job_upload_template.py diff --git a/src/aind_data_transfer_service/configs/job_upload_template.py b/src/aind_data_transfer_service/configs/job_upload_template.py new file mode 100644 index 0000000..fc53915 --- /dev/null +++ b/src/aind_data_transfer_service/configs/job_upload_template.py @@ -0,0 +1,116 @@ +"""Module to configure and create xlsx job upload template""" +import datetime +from io import BytesIO + +from aind_data_schema.core.data_description import Modality, Platform +from openpyxl import Workbook +from openpyxl.styles import Font +from openpyxl.utils import get_column_letter +from openpyxl.worksheet.datavalidation import DataValidation + + +# TODO: convert to pydantic model +class JobUploadTemplate: + """Class to configure and create xlsx job upload template""" + + FILE_NAME = "job_upload_template.xlsx" + HEADERS = [ + "platform", + "acq_datetime", + "subject_id", + "s3_bucket", + "modality0", + "modality0.source", + "modality1", + "modality1.source", + ] + SAMPLE_JOBS = [ + [ + Platform.BEHAVIOR.abbreviation, + datetime.datetime(2023, 10, 4, 4, 0, 0), + "123456", + "aind-behavior-data", + Modality.BEHAVIOR_VIDEOS.abbreviation, + "/allen/aind/stage/fake/dir", + Modality.BEHAVIOR.abbreviation, + "/allen/aind/stage/fake/dir", + ], + [ + Platform.SMARTSPIM.abbreviation, + datetime.datetime(2023, 3, 4, 16, 30, 0), + "654321", + "aind-open-data", + Modality.SPIM.abbreviation, + "/allen/aind/stage/fake/dir", + ], + [ + Platform.ECEPHYS.abbreviation, + datetime.datetime(2023, 1, 30, 19, 1, 0), + "654321", + "aind-ephys-data", + Modality.ECEPHYS.abbreviation, + "/allen/aind/stage/fake/dir", + Modality.BEHAVIOR_VIDEOS.abbreviation, + "/allen/aind/stage/fake/dir", + ], + ] + VALIDATORS = [ + { + "name": "platform", + "options": [p().abbreviation for p in Platform._ALL], + "ranges": ["A2:A20"], + }, + { + "name": "modality", + "options": [m().abbreviation for m in Modality._ALL], + "ranges": ["E2:E20", "G2:G20"], + }, + { + "name": "s3_bucket", + "options": [ + "aind-ephys-data", + "aind-ophys-data", + "aind-behavior-data", + "aind-private-data", + ], + "ranges": ["D2:D20"], + }, + ] + + @staticmethod + def create_job_template(): + """Create job template as xlsx filestream""" + # job template + xl_io = BytesIO() + workbook = Workbook() + worksheet = workbook.active + worksheet.append(JobUploadTemplate.HEADERS) + for job in JobUploadTemplate.SAMPLE_JOBS: + worksheet.append(job) + # data validators + for validator in JobUploadTemplate.VALIDATORS: + dv = DataValidation( + type="list", + formula1=f'"{(",").join(validator["options"])}"', + allow_blank=True, + showErrorMessage=True, + showInputMessage=True, + ) + dv.promptTitle = validator["name"] + dv.prompt = f'Select a {validator["name"]} from the dropdown' + dv.error = f'Invalid {validator["name"]}.' + for r in validator["ranges"]: + dv.add(r) + worksheet.add_data_validation(dv) + # formatting + bold = Font(bold=True) + for header in worksheet["A1:H1"]: + for cell in header: + cell.font = bold + worksheet.column_dimensions[ + get_column_letter(cell.column) + ].auto_size = True + # save file + workbook.save(xl_io) + workbook.close() + return xl_io diff --git a/src/aind_data_transfer_service/server.py b/src/aind_data_transfer_service/server.py index 93b57a5..4489cff 100644 --- a/src/aind_data_transfer_service/server.py +++ b/src/aind_data_transfer_service/server.py @@ -7,10 +7,10 @@ from asyncio import sleep from pathlib import PurePosixPath -import openpyxl from fastapi import Request -from fastapi.responses import JSONResponse +from fastapi.responses import JSONResponse, StreamingResponse from fastapi.templating import Jinja2Templates +from openpyxl import load_workbook from pydantic import SecretStr from starlette.applications import Starlette from starlette.routing import Route @@ -19,6 +19,9 @@ BasicUploadJobConfigs, HpcJobConfigs, ) +from aind_data_transfer_service.configs.job_upload_template import ( + JobUploadTemplate, +) from aind_data_transfer_service.hpc.client import HpcClient, HpcClientConfigs from aind_data_transfer_service.hpc.models import ( HpcJobStatusResponse, @@ -32,7 +35,6 @@ templates = Jinja2Templates(directory=template_directory) # TODO: Add server configs model -# UPLOAD_TEMPLATE_LINK # HPC_SIF_LOCATION # HPC_USERNAME # HPC_LOGGING_DIRECTORY @@ -63,11 +65,13 @@ async def validate_csv(request: Request): # byte chars. Adding "utf-8-sig" should remove them. data = content.decode("utf-8-sig") else: - xlsx_sheet = openpyxl.load_workbook(io.BytesIO(content)).active + xlsx_book = load_workbook(io.BytesIO(content), read_only=True) + xlsx_sheet = xlsx_book.active csv_io = io.StringIO() csv_writer = csv.writer(csv_io) for r in xlsx_sheet.rows: csv_writer.writerow([cell.value for cell in r]) + xlsx_book.close() data = csv_io.getvalue() csv_reader = csv.DictReader(io.StringIO(data)) for row in csv_reader: @@ -274,7 +278,6 @@ async def index(request: Request): context=( { "request": request, - "upload_template_link": os.getenv("UPLOAD_TEMPLATE_LINK"), } ), ) @@ -312,12 +315,37 @@ async def jobs(request: Request): "request": request, "job_status_list": job_status_list, "num_of_jobs": len(job_status_list), - "upload_template_link": os.getenv("UPLOAD_TEMPLATE_LINK"), } ), ) +def download_job_template(request: Request): + """Get job template as xlsx filestream for download""" + try: + xl_io = JobUploadTemplate.create_job_template() + except Exception as e: + return JSONResponse( + content={ + "message": "Error creating job template", + "data": {"error": f"{e.__class__.__name__}{e.args}"}, + }, + status_code=500, + ) + return StreamingResponse( + io.BytesIO(xl_io.getvalue()), + media_type=( + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ), + headers={ + "Content-Disposition": ( + f"attachment; filename={JobUploadTemplate.FILE_NAME}" + ) + }, + status_code=200, + ) + + routes = [ Route("/", endpoint=index, methods=["GET", "POST"]), Route("/api/validate_csv", endpoint=validate_csv, methods=["POST"]), @@ -326,6 +354,11 @@ async def jobs(request: Request): ), Route("/api/submit_hpc_jobs", endpoint=submit_hpc_jobs, methods=["POST"]), Route("/jobs", endpoint=jobs, methods=["GET"]), + Route( + "/api/job_upload_template", + endpoint=download_job_template, + methods=["GET"], + ), ] app = Starlette(routes=routes) diff --git a/src/aind_data_transfer_service/templates/index.html b/src/aind_data_transfer_service/templates/index.html index 99b90ef..fabc9e9 100644 --- a/src/aind_data_transfer_service/templates/index.html +++ b/src/aind_data_transfer_service/templates/index.html @@ -52,7 +52,7 @@

Submit Jobs

diff --git a/src/aind_data_transfer_service/templates/job_status.html b/src/aind_data_transfer_service/templates/job_status.html index af2831c..73bb820 100644 --- a/src/aind_data_transfer_service/templates/job_status.html +++ b/src/aind_data_transfer_service/templates/job_status.html @@ -26,7 +26,7 @@

Jobs Submitted: {{num_of_jobs}}

diff --git a/tests/resources/job_upload_template.xlsx b/tests/resources/job_upload_template.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..14d0f79cc2d9f99198dd7d7093399f3e0016547f GIT binary patch literal 5564 zcmZ`-1yodP*B(k3ngOImBm^0{LsB}G8W>6eX$EN!1Z3zOS|lVD5Qa_xsgWM(4hbpg z7LXtBUH8BD@_pZ#b>5k?)_L~4?_PU9d+$eE4GWtB007_tz;VhDMWa^2*Vj+w*Ei|) zZ3(l`c7Zv&@;r5R=Js-Q(1_6>XyYY)bfvmJ>!i1Pm+$UK?n3uVWf9>xOL>pr3tyeq z*lEc*-lzu39!LkXr%*`8`&VS$N`M*y#di7OLK-DjT3g5+nE~%e@=5CsmNL38Smu=K z0t0U^7=BcXuhM@KTI5(St_@C2cT2J3V?Rx?KZpq)|HN1WJ9~=LlJFBp`(OJ7*qXR( zT<^0N0{|fV=YB0^!coR843Z|5Zpxl-+Zh*(N)RZmk>(qbUgpP1-@nns zk_t9&G6zyD5`)Wy(}behB>PZ5as)*=+m#&d2g<%n28(HG-)5bG6xa-B5$W(J_}M)# z+HA;yYR;5B1|nk{S8dNey$G+C@7d^i*QT82_wh?)TRn{PRR1G)WedLL?RMJ>*qUaqnUVzf_Fpq6iP0#AX zZ5^_yl`PHuOu;rXUv(cF!OZSftuP9lQugXbTjgSq8_2TNhgI#LIdoo3%}H6I*Tot5 z1D$y3Ab0%-VDFbTjl#a^wsm~AvzFGk|1vnF9Pql^FA=9f(4i$y7Fd;Q`_*#dc&ZdpP!z4BHGJYX zzxmcC9X}eXHEi!WI~)^^D*5U~)V!w^+FlxtD*o!WY0Gy1veZ6r{kDyq;?kxMh_ZS` zX80}u;_NZjH-wdJ%JI`Zk#>@yaaPuN7qTrDPWB7ci9KdJM3XG2=#mkKLktk<7*yBx zMatGhEVRWB8OM#GIjXg3@zS~ikCa|BbL{-za za$Z2NFEdTsRa)0x_ddU@dlfP&-~f&}Q5vCAT_$0BFTKt$h?|>c2yBAu+hrAn3QB#r z!;g=%SAR!p#m5U@BpLNdyY%%-jH|&Zk+$N~*Pq4wx&6@jJN1VTvxjO$nEk9$_t}nXC1da)@tj|9#ZKN{ywth~sz>#T&T+FxQ)B-DIS?Vq# zdv+gm0M{&`tiWE~VJndcP^KK6)4I-)YSQ=$vYx$861u#O%yyFkqt7JGR;&661mtU7 z)HZ4oo2%JGRS(PioHg(C5=vN0#xvA-zGDzNKQ{4LX3po;Ns+Ikm21XwY{-}q7tKsX z)Z-67iG#JDt#^9SE{aWJ*ZZX!IId0%4xX>JJ}%4MkxXXz61Jbiw4EK2if)$=^?qrV zYMq4A7Nl@=_{aiQtTnG2-vd)DSekRa!wo_{#@v6RXU+#jVR6Rg_^L-KtU0fy7@I8( z#xG45SXDbAIXr0`?>!M~vrQ$pF7BDLDsRo7v3O z+I^fdxb9y~n~prJqI#%!ko#ugPGzX;Tc5f%g^yQ|u{&q{^J31^;&WK0-^s!9d9Pai zA|e9Fik~$%Ahpg|(Xe zi7z?xYlo<;ts~~!R%8ptzY9o=jOpnW768CR0sv6|SwK8tF7~cAR#tAVJb!%sQAMf^ zrehXGNiUPw(YBZ>0Z!P;v;BHTf_TrZ8KG=1!l>>KOc1+WT_$JrVkR605AMn=R%q34 z(3?GICtt!PdkT&oPi1>%sraM-Yx?#pxCM(5xD$8?kNTo4m$2iAaEZbsiYVNwntmvu z$inwU-Vu7R7iSe8kBzKnst{GY*aGT!(7s`)6m;m4ik~u%4ds|x;!5^^4+tnQIRh$3 zk;_7!In&4VeHk$SW(&Ww7qpNd`cg!fUjjv3R8FY)9NG!zIL>zoTzJuePJOZhymw|O zDs+{l%Y>d+I}yl`hS$ntN(=lX0a^<3?`SZ8X6pGcQ#OoACFWi&pY=HVDq2UXZYd2# zOd!VWz4(faC6YkvYm7>~^q{>;pTVGJpof!OBhFM4aEsDrhs~{4#uFxYg70?yvqk0v zCO1#DcJ52FQn9HZAPb5PP0t`K4GFS8%1Ox#3DSxG;8&4E?*m*|7R^^KniZ~M_0kcZ z=84nVVMbgXu*!hcCIp4nR!;`+O7@kZ1F(po{eo~%)Un%pEb1U9!HQv`(XsY z+8~A;tQxE*N`n`-jHp^94uad9^2??dJP159XG&9w#lvUKSdZ^)Ga5CWzdBK!X1i)C zT@`ol^%{bIohq7cdOVIY-WX05if##ig3KAQcaHQ+SA@CuZgdwjS54a=G1AwL54Lcc zzGbuhUJcCl9z_U&&X1y=Ys4WP6WMqecSbsQZpB5~XU#@NWH361w2>DHJUs=gSvIF| zHx|kD(@2Ut#tA=r542u~XMpsUY}@%&1nS%5WbnK*fGCdDs4O}rh=)%OyVx3={w1^P z8~Ra8Mem~;b>e`8?U|~}%@XDh)z8n0}T zOf5eef-j0$uZpRMM`!R2=4igVEqN>h7}6~qv4pJ>Kt*V^@-6PnUI`n6ss=c~1Voh=Hdm@1L-eYAT4 z1;dn+?Vfq4%Uzb5F`vRajfnz&#qk*m>5`fGQN~_j{hKrj+wCesF#&*Q1ONchpQPdH z=IvnR`n%tp)z^r9%S(F68Frq`QpNr;Kpt^V&#pMhgQO{Kf+hJvHrkN&^61DMcDIqH zNPjE;5pc=-(;Jb@a^=D4O7mDHl!IHm+>w=URZXK-{40OU_`@ByFNyL-jx6O)w!|v) zG>SGPEFAIuc1f)9&Xfkhw9A@PQd66bsq5Q}F|AdoEr=X?msh4PvM08hy1g_WQUcVFvqh1R;@4)^d_< z?7sF$Q2LbNs{U6IcfO1w^Pjv=F@WE{`R*Fd9HJLJE)1t*gE*C=H+O<+ezcY)XQ7S? zGqBCSjbGjMO!%O!43ql3_A{oO*2<_TGc9##a_5&`;bXIrz1;*<1rti(qskaC8Z`Ke)`rlZkcJ zJMB45LzBB7R55z{X>uzSFzf*~aT-Cz<@Nv%;peuAi)X@Lxp`uUjP~2Ya#(qONXm5G z!Mu+~V92;);5y=qKfa6EjguQvHbn6=>cOMfGt9(Qc5L|wby)IKx};Xzbt<2*Awe`5 zp3Eb~v>a0U2^_p63^{FfjmyZx(Q~l3H=1?0Wn<&UoK&l{8S(i#Qg2^J?RC%s%v`J- zTzR;EuNm=bPMCazGDwh>nR;OdCf~=2GWjrqy71N9HysFzj>F^ViO+X!$keVfGr5o? zg6}-&U8nOMP`n++llnp>fC_sK^UN1HkOx9)GF;h$abJ%nNWtF^Si88|oyKa_T`yD) z@e!L1u2oi^_!LJNr={&_%&PK6lP>}U)UNtx(D);18OV=6F5V067W5q)-HpppQg8+z zBJ$KDJra>pnX}D#2l*Dys=Vgct`5F_pm=!JlEIrFJ~XZXWP_ER-I$b@%j~y#ucu`8 z>w{jG^xp&F?|S|W2DAU2oY8TEn%8r(f`5Jw?pLSRD(ai2sR1f@r1CG(MxNEl9Z5@0 zWyt#~9O{d~_rHvfbvK!&hmlC+$R9tAK5*sM;~eTgm@(jjY#~2jQ9UOr9|alBt&Rvn zS|f_)UMKh(QB9y2EIP`hnG0p-tS45KxX6ht#EsCOHFvBjj)(zU7c;A+0xIqGSXCZ#i&2#BEDjpDp(hiDtQ$vx<|t6`;qa3yd}qj z73i{Yo5Qhit5H0ATt54z@y`m)yntR18P6@-*vj<;f1iBmP?MQ0gL)yfQe6qt!r|uP zjYGZ-k%QA+A8`D-EBq}fK;>ahZdOiiX1dKRAR2%VD$FV zS~n27J5sE@JC0FPjRNMOZUv-sDGhc~3hu={y_oBFGLNfQULJA8C!4Is_Fv6#STBz3 zl8fsy7F&i}ZcW~Y&%Ge}sV}N^%ADu1i``k)$9UkGb^j7d>rIkzGVdHZkZ9`V0e{3h@w z5B?3Dx~9b&tauZAGtd79i(IRZ*Wmw=^*8O@Or^i=(1ZSKVri@4T#I=C0Py-Fb