Skip to content

Commit

Permalink
feat: adds new fields to template
Browse files Browse the repository at this point in the history
  • Loading branch information
jtyoung84 committed Apr 28, 2024
1 parent e9e8c21 commit 12d1f00
Show file tree
Hide file tree
Showing 17 changed files with 283 additions and 103 deletions.
13 changes: 13 additions & 0 deletions src/aind_data_transfer_service/configs/job_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,19 @@ class BasicUploadJobConfigs(BaseSettings):

aws_param_store_name: Optional[str] = Field(None)

processor_full_name: str = Field(
...,
description="Name of person uploading data",
title="Processor Full Name",
)
project_name: str = Field(
..., description="Name of project", title="Project Name"
)
process_capsule_id: Optional[str] = Field(
None,
description="Use custom codeocean capsule or pipeline id",
title="Process Capsule ID",
)
s3_bucket: Optional[str] = Field(
None,
description="Bucket where data will be uploaded",
Expand Down
102 changes: 69 additions & 33 deletions src/aind_data_transfer_service/configs/job_upload_template.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Module to configure and create xlsx job upload template"""
import datetime
from io import BytesIO
from typing import Any, Dict, List, Optional

from aind_data_schema.models.modalities import Modality
from aind_data_schema.models.platforms import Platform
Expand All @@ -14,10 +15,23 @@
class JobUploadTemplate:
"""Class to configure and create xlsx job upload template"""

def __init__(self, project_names: Optional[List[str]] = None):
"""
Class constructor to create template files.
Parameters
----------
project_names : Optional[List[str]]
Optional list of project names. Default is None.
"""
self.project_names = [] if project_names is None else project_names

FILE_NAME = "job_upload_template.xlsx"
NUM_TEMPLATE_ROWS = 20
XLSX_DATETIME_FORMAT = "YYYY-MM-DDTHH:mm:ss"
HEADERS = [
"processor_full_name",
"project_name",
"process_capsule_id",
"platform",
"acq_datetime",
"subject_id",
Expand All @@ -29,6 +43,9 @@ class JobUploadTemplate:
]
SAMPLE_JOBS = [
[
"Anna Apple",
"Behavior Platform",
"1f999652-00a0-4c4b-99b5-64c2985ad070",
Platform.BEHAVIOR.abbreviation,
datetime.datetime(2023, 10, 4, 4, 0, 0),
"123456",
Expand All @@ -39,6 +56,9 @@ class JobUploadTemplate:
"/allen/aind/stage/fake/dir",
],
[
"John Smith",
"Ophys Platform - SLAP2",
None,
Platform.SMARTSPIM.abbreviation,
datetime.datetime(2023, 3, 4, 16, 30, 0),
"654321",
Expand All @@ -47,6 +67,9 @@ class JobUploadTemplate:
"/allen/aind/stage/fake/dir",
],
[
"Anna Apple",
"Ephys Platform",
None,
Platform.ECEPHYS.abbreviation,
datetime.datetime(2023, 1, 30, 19, 1, 0),
"654321",
Expand All @@ -57,42 +80,57 @@ class JobUploadTemplate:
"/allen/aind/stage/fake/dir",
],
]
VALIDATORS = [
{
"name": "platform",
"type": "list",
"options": [p().abbreviation for p in Platform._ALL],
"column_indexes": [HEADERS.index("platform")],
},
{
"name": "modality",
"type": "list",
"options": [m().abbreviation for m in Modality._ALL],
"column_indexes": [
HEADERS.index("modality0"),
HEADERS.index("modality1"),
],
},
{
"name": "datetime",
"type": "date",
"column_indexes": [HEADERS.index("acq_datetime")],
},
]

@staticmethod
def create_job_template():
@property
def validators(self) -> List[Dict[str, Any]]:
"""
Returns
-------
List[Dict[str, Any]]
A list of validators for fields that require validation.
"""
return [
{
"name": "platform",
"type": "list",
"options": [p().abbreviation for p in Platform._ALL],
"column_indexes": [self.HEADERS.index("platform")],
},
{
"name": "project_name",
"type": "list",
"options": [p for p in self.project_names],
"column_indexes": [self.HEADERS.index("project_name")],
},
{
"name": "modality",
"type": "list",
"options": [m().abbreviation for m in Modality._ALL],
"column_indexes": [
self.HEADERS.index("modality0"),
self.HEADERS.index("modality1"),
],
},
{
"name": "datetime",
"type": "date",
"column_indexes": [self.HEADERS.index("acq_datetime")],
},
]

@property
def excel_sheet_filestream(self) -> BytesIO:
"""Create job template as xlsx filestream"""
# job template
xl_io = BytesIO()
workbook = Workbook()
workbook.iso_dates = True
worksheet = workbook.active
worksheet.append(JobUploadTemplate.HEADERS)
for job in JobUploadTemplate.SAMPLE_JOBS:
worksheet.append(self.HEADERS)
for job in self.SAMPLE_JOBS:
worksheet.append(job)
# data validators
for validator in JobUploadTemplate.VALIDATORS:
for validator in self.validators:
dv_type = validator["type"]
dv_name = validator["name"]
dv_params = {
Expand All @@ -108,19 +146,17 @@ def create_job_template():
dv_params["prompt"] = f"Select a {dv_name} from the dropdown"
elif dv_type == "date":
dv_params["prompt"] = "Provide a {} using {}".format(
dv_name, JobUploadTemplate.XLSX_DATETIME_FORMAT
dv_name, self.XLSX_DATETIME_FORMAT
)
dv = DataValidation(**dv_params)
for i in validator["column_indexes"]:
col = get_column_letter(i + 1)
col_range = (
f"{col}2:{col}{JobUploadTemplate.NUM_TEMPLATE_ROWS}"
)
col_range = f"{col}2:{col}{self.NUM_TEMPLATE_ROWS}"
dv.add(col_range)
if dv_type != "date":
continue
for (cell,) in worksheet[col_range]:
cell.number_format = JobUploadTemplate.XLSX_DATETIME_FORMAT
cell.number_format = self.XLSX_DATETIME_FORMAT
worksheet.add_data_validation(dv)
# formatting
bold = Font(bold=True)
Expand Down
44 changes: 30 additions & 14 deletions src/aind_data_transfer_service/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
from asyncio import sleep
from pathlib import PurePosixPath

import requests
from fastapi import Request
from fastapi.responses import JSONResponse, StreamingResponse
from fastapi.templating import Jinja2Templates
from openpyxl import load_workbook
from pydantic import SecretStr
from starlette.applications import Starlette
from starlette.concurrency import run_in_threadpool
from starlette.routing import Route

from aind_data_transfer_service import OPEN_DATA_BUCKET_NAME
Expand Down Expand Up @@ -48,6 +50,7 @@
# BASIC_JOB_SCRIPT
# OPEN_DATA_AWS_SECRET_ACCESS_KEY
# OPEN_DATA_AWS_ACCESS_KEY_ID
# AIND_PROJECT_NAMES_URL


async def validate_csv(request: Request):
Expand Down Expand Up @@ -329,30 +332,43 @@ async def jobs(request: Request):
)


def download_job_template(request: Request):
async def download_job_template(_: Request):
"""Get job template as xlsx filestream for download"""

# TODO: Cache list of project names
try:
xl_io = JobUploadTemplate.create_job_template()
smart_sheet_response = await run_in_threadpool(
requests.get, url=os.getenv("AIND_PROJECT_NAMES_URL")
)
if smart_sheet_response.status_code == 200:
project_names = smart_sheet_response.json()["data"]
else:
raise Exception("Unable to get project names!")

job_template = JobUploadTemplate(project_names=project_names)
xl_io = job_template.excel_sheet_filestream
return StreamingResponse(
io.BytesIO(xl_io.getvalue()),
media_type=(
"application/"
"vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
headers={
"Content-Disposition": (
f"attachment; filename={job_template.FILE_NAME}"
)
},
status_code=200,
)
except Exception as e:
logging.error(e)
return JSONResponse(
content={
"message": "Error creating job template",
"data": {"error": f"{e.__class__.__name__}{e.args}"},
},
status_code=500,
)
return StreamingResponse(
io.BytesIO(xl_io.getvalue()),
media_type=(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
headers={
"Content-Disposition": (
f"attachment; filename={JobUploadTemplate.FILE_NAME}"
)
},
status_code=200,
)


routes = [
Expand Down
15 changes: 9 additions & 6 deletions src/aind_data_transfer_service/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -144,19 +144,22 @@ <h2>Submit Jobs</h2>
let jobsLength = jobs.length;
var table = document.createElement('table'), tr, td, row;
addTableRow(
[ "s3_bucket", "platform", "subject_id", "acq_datetime", "metadata_dir", "modality", "modality.source" ],
[ "processor_full_name", "project_name", "process_capsule_id", "s3_bucket", "platform", "subject_id", "acq_datetime", "metadata_dir", "modality", "modality.source" ],
table, tr, td, true
);
for (row = 0; row < jobsLength; row++) {
let job = JSON.parse(jobs[row]);
let modalities = job.modalities;
let modalitiesLength = modalities.length;
addTableRow(
[ { value: job.s3_bucket, rowspan: modalitiesLength },
{ value: job.platform.abbreviation, rowspan: modalitiesLength },
{ value: job.subject_id, rowspan: modalitiesLength },
{ value: job.acq_datetime, rowspan: modalitiesLength },
{ value: job.metadata_dir ?? "", rowspan: modalitiesLength },
[ { value: job.processor_full_name, rowspan: modalitiesLength },
{ value: job.project_name, rowspan: modalitiesLength },
{ value: job.process_capsule_id ?? "", rowspan: modalitiesLength },
{ value: job.s3_bucket, rowspan: modalitiesLength },
{ value: job.platform.abbreviation, rowspan: modalitiesLength },
{ value: job.subject_id, rowspan: modalitiesLength },
{ value: job.acq_datetime, rowspan: modalitiesLength },
{ value: job.metadata_dir ?? "", rowspan: modalitiesLength },
modalities ? modalities[0].modality.abbreviation : "",
modalities ? modalities[0].source : ""
], table, tr, td, false
Expand Down
Binary file modified tests/resources/job_upload_template.xlsx
Binary file not shown.
8 changes: 4 additions & 4 deletions tests/resources/sample.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
processor_full_name, project_name, process_capsule_id, modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
Anna Apple, Ephys Platform, , ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
John Smith, Behavior Platform, 1f999652-00a0-4c4b-99b5-64c2985ad070, BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
Anna Apple, Behavior Platform, , BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
Binary file modified tests/resources/sample.xlsx
Binary file not shown.
8 changes: 4 additions & 4 deletions tests/resources/sample_alt_modality_case.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
ecephys, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
behavior-videos, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
behavior-videos, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
processor_full_name, project_name, process_capsule_id, modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
Anna Apple, Ephys Platform, , ecephys, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
John Smith, Behavior Platform, 1f999652-00a0-4c4b-99b5-64c2985ad070, behavior-videos, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
Anna Apple, Behavior Platform, , behavior-videos, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
8 changes: 4 additions & 4 deletions tests/resources/sample_empty_rows.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
processor_full_name, project_name, process_capsule_id, modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
Anna Apple, Ephys Platform, , ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
John Smith, Behavior Platform, 1f999652-00a0-4c4b-99b5-64c2985ad070, BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
Anna Apple, Behavior Platform, , BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
,,,,,,
,,,,,,
,,,,,,
Binary file modified tests/resources/sample_empty_rows.xlsx
Binary file not shown.
8 changes: 4 additions & 4 deletions tests/resources/sample_invalid_ext.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, some_bucket2, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, some_bucket2, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
processor_full_name, project_name, process_capsule_id, modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
Anna Apple, Ephys Platform, , ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
John Smith, Behavior Platform, 1f999652-00a0-4c4b-99b5-64c2985ad070, BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
Anna Apple, Behavior Platform, , BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
8 changes: 4 additions & 4 deletions tests/resources/sample_malformed.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
ECEPHYS, dir/data_set_1, , , some_bucket, 123454, ecephys, 2020-10-10 14:10:10
WRONG_MODALITY_HERE, dir/data_set_2, MRI, dir/data_set_3, some_bucket2, 123456, Other, 10/13/2020 1:10:10 PM
SPIM, dir/data_set_2, SPIM, dir/data_set_3, some_bucket2, 123456, smartSPIM, 10/13/2020 1:10:10 PM
processor_full_name, project_name, process_capsule_id, modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
Anna Apple, Ephys Platform, , ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
John Smith, Behavior Platform, 1f999652-00a0-4c4b-99b5-64c2985ad070, WRONG_MODALITY_HERE, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
Anna Apple, Behavior Platform, , BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
Binary file modified tests/resources/sample_malformed.xlsx
Binary file not shown.
Loading

0 comments on commit 12d1f00

Please sign in to comment.