Skip to content

Commit

Permalink
fixes merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
yosefmaru committed Apr 5, 2024
2 parents aad6212 + a43778d commit d0a87df
Show file tree
Hide file tree
Showing 9 changed files with 113 additions and 35 deletions.
2 changes: 1 addition & 1 deletion src/aind_data_transfer_service/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Init package"""
import os

__version__ = "0.9.1"
__version__ = "0.9.5"

# Global constants
OPEN_DATA_BUCKET_NAME = os.getenv("OPEN_DATA_BUCKET_NAME", "open")
Expand Down
73 changes: 52 additions & 21 deletions src/aind_data_transfer_service/configs/job_upload_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import datetime
from io import BytesIO

from aind_data_schema.core.data_description import Modality, Platform
from aind_data_schema.models.modalities import Modality
from aind_data_schema.models.platforms import Platform
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
Expand All @@ -14,10 +15,13 @@ class JobUploadTemplate:
"""Class to configure and create xlsx job upload template"""

FILE_NAME = "job_upload_template.xlsx"
NUM_TEMPLATE_ROWS = 20
XLSX_DATETIME_FORMAT = "YYYY-MM-DDTHH:mm:ss"
HEADERS = [
"platform",
"acq_datetime",
"subject_id",
"metadata_dir",
"modality0",
"modality0.source",
"modality1",
Expand All @@ -28,6 +32,7 @@ class JobUploadTemplate:
Platform.BEHAVIOR.abbreviation,
datetime.datetime(2023, 10, 4, 4, 0, 0),
"123456",
"/allen/aind/stage/fake/metadata_dir",
Modality.BEHAVIOR_VIDEOS.abbreviation,
"/allen/aind/stage/fake/dir",
Modality.BEHAVIOR.abbreviation,
Expand All @@ -37,13 +42,15 @@ class JobUploadTemplate:
Platform.SMARTSPIM.abbreviation,
datetime.datetime(2023, 3, 4, 16, 30, 0),
"654321",
"/allen/aind/stage/fake/Config",
Modality.SPIM.abbreviation,
"/allen/aind/stage/fake/dir",
],
[
Platform.ECEPHYS.abbreviation,
datetime.datetime(2023, 1, 30, 19, 1, 0),
"654321",
None,
Modality.ECEPHYS.abbreviation,
"/allen/aind/stage/fake/dir",
Modality.BEHAVIOR_VIDEOS.abbreviation,
Expand All @@ -53,13 +60,23 @@ class JobUploadTemplate:
VALIDATORS = [
{
"name": "platform",
"type": "list",
"options": [p().abbreviation for p in Platform._ALL],
"ranges": ["A2:A20"],
"column_indexes": [HEADERS.index("platform")],
},
{
"name": "modality",
"type": "list",
"options": [m().abbreviation for m in Modality._ALL],
"ranges": ["E2:E20", "G2:G20"],
"column_indexes": [
HEADERS.index("modality0"),
HEADERS.index("modality1"),
],
},
{
"name": "datetime",
"type": "date",
"column_indexes": [HEADERS.index("acq_datetime")],
},
]

Expand All @@ -69,33 +86,47 @@ def create_job_template():
# job template
xl_io = BytesIO()
workbook = Workbook()
workbook.iso_dates = True
worksheet = workbook.active
worksheet.append(JobUploadTemplate.HEADERS)
for job in JobUploadTemplate.SAMPLE_JOBS:
worksheet.append(job)
# data validators
for validator in JobUploadTemplate.VALIDATORS:
dv = DataValidation(
type="list",
formula1=f'"{(",").join(validator["options"])}"',
allow_blank=True,
showErrorMessage=True,
showInputMessage=True,
)
dv.promptTitle = validator["name"]
dv.prompt = f'Select a {validator["name"]} from the dropdown'
dv.error = f'Invalid {validator["name"]}.'
for r in validator["ranges"]:
dv.add(r)
dv_type = validator["type"]
dv_name = validator["name"]
dv_params = {
"type": dv_type,
"promptTitle": dv_name,
"error": f"Invalid {dv_name}.",
"allow_blank": True,
"showErrorMessage": True,
"showInputMessage": True,
}
if dv_type == "list":
dv_params["formula1"] = f'"{(",").join(validator["options"])}"'
dv_params["prompt"] = f"Select a {dv_name} from the dropdown"
elif dv_type == "date":
dv_params["prompt"] = "Provide a {} using {}".format(
dv_name, JobUploadTemplate.XLSX_DATETIME_FORMAT
)
dv = DataValidation(**dv_params)
for i in validator["column_indexes"]:
col = get_column_letter(i + 1)
col_range = (
f"{col}2:{col}{JobUploadTemplate.NUM_TEMPLATE_ROWS}"
)
dv.add(col_range)
if dv_type != "date":
continue
for (cell,) in worksheet[col_range]:
cell.number_format = JobUploadTemplate.XLSX_DATETIME_FORMAT
worksheet.add_data_validation(dv)
# formatting
bold = Font(bold=True)
for header in worksheet["A1:G1"]:
for cell in header:
cell.font = bold
worksheet.column_dimensions[
get_column_letter(cell.column)
].auto_size = True
for cell in worksheet[1]:
cell.font = bold
worksheet.column_dimensions[cell.column_letter].auto_size = True
# save file
workbook.save(xl_io)
workbook.close()
Expand Down
7 changes: 5 additions & 2 deletions src/aind_data_transfer_service/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,15 @@ async def validate_csv(request: Request):
xlsx_sheet = xlsx_book.active
csv_io = io.StringIO()
csv_writer = csv.writer(csv_io)
for r in xlsx_sheet.rows:
csv_writer.writerow([cell.value for cell in r])
for r in xlsx_sheet.iter_rows(values_only=True):
if any(r):
csv_writer.writerow(r)
xlsx_book.close()
data = csv_io.getvalue()
csv_reader = csv.DictReader(io.StringIO(data))
for row in csv_reader:
if not any(row.values()):
continue
try:
job = BasicUploadJobConfigs.from_csv_row(row=row)
# Construct hpc job setting most of the vars from the env
Expand Down
3 changes: 2 additions & 1 deletion src/aind_data_transfer_service/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ <h2>Submit Jobs</h2>
let jobsLength = jobs.length;
var table = document.createElement('table'), tr, td, row;
addTableRow(
[ "s3_bucket", "platform", "subject_id", "acq_datetime", "modality", "modality.source" ],
[ "s3_bucket", "platform", "subject_id", "acq_datetime", "metadata_dir", "modality", "modality.source" ],
table, tr, td, true
);
for (row = 0; row < jobsLength; row++) {
Expand All @@ -154,6 +154,7 @@ <h2>Submit Jobs</h2>
{ value: job.platform.abbreviation, rowspan: modalitiesLength },
{ value: job.subject_id, rowspan: modalitiesLength },
{ value: job.acq_datetime, rowspan: modalitiesLength },
{ value: job.metadata_dir ?? "", rowspan: modalitiesLength },
modalities ? modalities[0].modality.abbreviation : "",
modalities ? modalities[0].source : ""
], table, tr, td, false
Expand Down
Binary file modified tests/resources/job_upload_template.xlsx
Binary file not shown.
7 changes: 7 additions & 0 deletions tests/resources/sample_empty_rows.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
,,,,,,
,,,,,,
,,,,,,
Binary file added tests/resources/sample_empty_rows.xlsx
Binary file not shown.
30 changes: 20 additions & 10 deletions tests/test_job_upload_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path

from openpyxl import load_workbook
from openpyxl.utils import range_boundaries

from aind_data_transfer_service.configs.job_upload_template import (
JobUploadTemplate,
Expand All @@ -22,19 +23,19 @@ def read_xl_helper(self, source, return_validators=False):
lines = []
workbook = load_workbook(source, read_only=(not return_validators))
worksheet = workbook.active
for row in worksheet.rows:
row_contents = [cell.value for cell in row]
lines.append(row_contents)
for row in worksheet.iter_rows(values_only=True):
lines.append(row) if any(row) else None
if return_validators:
validators = []
for dv in worksheet.data_validations.dataValidation:
validators.append(
{
"name": dv.promptTitle,
"options": dv.formula1.strip('"').split(","),
"ranges": str(dv.cells).split(" "),
}
)
validator = {
"name": dv.promptTitle,
"type": dv.type,
"ranges": str(dv.cells).split(" "),
}
if dv.type == "list":
validator["options"] = dv.formula1.strip('"').split(",")
validators.append(validator)
result = (lines, validators)
else:
result = lines
Expand All @@ -49,6 +50,15 @@ def test_create_job_template(self):
JobUploadTemplate.create_job_template(), True
)
self.assertEqual(expected_lines, template_lines)
for validator in template_validators:
validator["column_indexes"] = []
for r in validator["ranges"]:
rb = (col, *_) = range_boundaries(r)
self.assertTupleEqual(
(col, 2, col, JobUploadTemplate.NUM_TEMPLATE_ROWS), rb
)
validator["column_indexes"].append(col - 1)
del validator["ranges"]
self.assertCountEqual(
JobUploadTemplate.VALIDATORS, template_validators
)
Expand Down
26 changes: 26 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@
TEST_DIRECTORY = Path(os.path.dirname(os.path.realpath(__file__)))
SAMPLE_INVALID_EXT = TEST_DIRECTORY / "resources" / "sample_invalid_ext.txt"
SAMPLE_CSV = TEST_DIRECTORY / "resources" / "sample.csv"
SAMPLE_CSV_EMPTY_ROWS = TEST_DIRECTORY / "resources" / "sample_empty_rows.csv"
MALFORMED_SAMPLE_CSV = TEST_DIRECTORY / "resources" / "sample_malformed.csv"
SAMPLE_XLSX = TEST_DIRECTORY / "resources" / "sample.xlsx"
SAMPLE_XLSX_EMPTY_ROWS = (
TEST_DIRECTORY / "resources" / "sample_empty_rows.xlsx"
)
MALFORMED_SAMPLE_XLSX = TEST_DIRECTORY / "resources" / "sample_malformed.xlsx"
MOCK_DB_FILE = TEST_DIRECTORY / "test_server" / "db.json"

Expand Down Expand Up @@ -95,6 +99,28 @@ def test_validate_csv_xlsx(self):
self.assertEqual(response.status_code, 200)
self.assertEqual(expected_response, response.json())

@patch.dict(os.environ, EXAMPLE_ENV_VAR1, clear=True)
def test_validate_csv_xlsx_empty_rows(self):
"""Tests that empty rows are ignored from valid csv and xlsx files."""
for file_path in [SAMPLE_CSV_EMPTY_ROWS, SAMPLE_XLSX_EMPTY_ROWS]:
with TestClient(app) as client:
with open(file_path, "rb") as f:
files = {
"file": f,
}
response = client.post(
url="/api/validate_csv", files=files
)
expected_jobs = [
j.model_dump_json() for j in self.expected_job_configs
]
expected_response = {
"message": "Valid Data",
"data": {"jobs": expected_jobs, "errors": []},
}
self.assertEqual(response.status_code, 200)
self.assertEqual(expected_response, response.json())

@patch.dict(os.environ, EXAMPLE_ENV_VAR1, clear=True)
@patch("aind_data_transfer_service.server.sleep", return_value=None)
@patch("aind_data_transfer_service.hpc.client.HpcClient.submit_job")
Expand Down

0 comments on commit d0a87df

Please sign in to comment.