Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat 85: validate & format datetime in xlsx job template #87

Merged
merged 4 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 38 additions & 17 deletions src/aind_data_transfer_service/configs/job_upload_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class JobUploadTemplate:

FILE_NAME = "job_upload_template.xlsx"
NUM_TEMPLATE_ROWS = 20
XLSX_DATETIME_FORMAT = "YYYY-MM-DDTHH:mm:ss"
HEADERS = [
"platform",
"acq_datetime",
Expand Down Expand Up @@ -55,17 +56,24 @@ class JobUploadTemplate:
VALIDATORS = [
{
"name": "platform",
"type": "list",
"options": [p().abbreviation for p in Platform._ALL],
"column_indexes": [HEADERS.index("platform")],
},
{
"name": "modality",
"type": "list",
"options": [m().abbreviation for m in Modality._ALL],
"column_indexes": [
HEADERS.index("modality0"),
HEADERS.index("modality1"),
],
},
{
"name": "datetime",
"type": "date",
"column_indexes": [HEADERS.index("acq_datetime")],
},
]

@staticmethod
Expand All @@ -74,34 +82,47 @@ def create_job_template():
# job template
xl_io = BytesIO()
workbook = Workbook()
workbook.iso_dates = True
worksheet = workbook.active
worksheet.append(JobUploadTemplate.HEADERS)
for job in JobUploadTemplate.SAMPLE_JOBS:
worksheet.append(job)
# data validators
for validator in JobUploadTemplate.VALIDATORS:
dv = DataValidation(
type="list",
formula1=f'"{(",").join(validator["options"])}"',
allow_blank=True,
showErrorMessage=True,
showInputMessage=True,
)
dv.promptTitle = validator["name"]
dv.prompt = f'Select a {validator["name"]} from the dropdown'
dv.error = f'Invalid {validator["name"]}.'
dv_type = validator["type"]
dv_name = validator["name"]
dv_params = {
"type": dv_type,
"promptTitle": dv_name,
"error": f"Invalid {dv_name}.",
"allow_blank": True,
"showErrorMessage": True,
"showInputMessage": True,
}
if dv_type == "list":
dv_params["formula1"] = f'"{(",").join(validator["options"])}"'
dv_params["prompt"] = f"Select a {dv_name} from the dropdown"
elif dv_type == "date":
dv_params["prompt"] = "Provide a {} using {}".format(
dv_name, JobUploadTemplate.XLSX_DATETIME_FORMAT
)
dv = DataValidation(**dv_params)
for i in validator["column_indexes"]:
col = get_column_letter(i + 1)
dv.add(f"{col}2:{col}{JobUploadTemplate.NUM_TEMPLATE_ROWS}")
col_range = (
f"{col}2:{col}{JobUploadTemplate.NUM_TEMPLATE_ROWS}"
)
dv.add(col_range)
if dv_type != "date":
continue
for (cell,) in worksheet[col_range]:
cell.number_format = JobUploadTemplate.XLSX_DATETIME_FORMAT
worksheet.add_data_validation(dv)
# formatting
bold = Font(bold=True)
for header in worksheet["A1:G1"]:
for cell in header:
cell.font = bold
worksheet.column_dimensions[
get_column_letter(cell.column)
].auto_size = True
for cell in worksheet[1]:
cell.font = bold
worksheet.column_dimensions[cell.column_letter].auto_size = True
# save file
workbook.save(xl_io)
workbook.close()
Expand Down
7 changes: 5 additions & 2 deletions src/aind_data_transfer_service/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,15 @@ async def validate_csv(request: Request):
xlsx_sheet = xlsx_book.active
csv_io = io.StringIO()
csv_writer = csv.writer(csv_io)
for r in xlsx_sheet.rows:
csv_writer.writerow([cell.value for cell in r])
for r in xlsx_sheet.iter_rows(values_only=True):
if any(r):
csv_writer.writerow(r)
xlsx_book.close()
data = csv_io.getvalue()
csv_reader = csv.DictReader(io.StringIO(data))
for row in csv_reader:
if not any(row.values()):
continue
try:
job = BasicUploadJobConfigs.from_csv_row(row=row)
# Construct hpc job setting most of the vars from the env
Expand Down
Binary file modified tests/resources/job_upload_template.xlsx
Binary file not shown.
7 changes: 7 additions & 0 deletions tests/resources/sample_empty_rows.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime
ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10
BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM
,,,,,,
,,,,,,
,,,,,,
Binary file added tests/resources/sample_empty_rows.xlsx
Binary file not shown.
20 changes: 10 additions & 10 deletions tests/test_job_upload_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@ def read_xl_helper(self, source, return_validators=False):
lines = []
workbook = load_workbook(source, read_only=(not return_validators))
worksheet = workbook.active
for row in worksheet.rows:
row_contents = [cell.value for cell in row]
lines.append(row_contents)
for row in worksheet.iter_rows(values_only=True):
lines.append(row) if any(row) else None
if return_validators:
validators = []
for dv in worksheet.data_validations.dataValidation:
validators.append(
{
"name": dv.promptTitle,
"options": dv.formula1.strip('"').split(","),
"ranges": str(dv.cells).split(" "),
}
)
validator = {
"name": dv.promptTitle,
"type": dv.type,
"ranges": str(dv.cells).split(" "),
}
if dv.type == "list":
validator["options"] = dv.formula1.strip('"').split(",")
validators.append(validator)
result = (lines, validators)
else:
result = lines
Expand Down
26 changes: 26 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@
TEST_DIRECTORY = Path(os.path.dirname(os.path.realpath(__file__)))
SAMPLE_INVALID_EXT = TEST_DIRECTORY / "resources" / "sample_invalid_ext.txt"
SAMPLE_CSV = TEST_DIRECTORY / "resources" / "sample.csv"
SAMPLE_CSV_EMPTY_ROWS = TEST_DIRECTORY / "resources" / "sample_empty_rows.csv"
MALFORMED_SAMPLE_CSV = TEST_DIRECTORY / "resources" / "sample_malformed.csv"
SAMPLE_XLSX = TEST_DIRECTORY / "resources" / "sample.xlsx"
SAMPLE_XLSX_EMPTY_ROWS = (
TEST_DIRECTORY / "resources" / "sample_empty_rows.xlsx"
)
MALFORMED_SAMPLE_XLSX = TEST_DIRECTORY / "resources" / "sample_malformed.xlsx"
MOCK_DB_FILE = TEST_DIRECTORY / "test_server" / "db.json"

Expand Down Expand Up @@ -95,6 +99,28 @@ def test_validate_csv_xlsx(self):
self.assertEqual(response.status_code, 200)
self.assertEqual(expected_response, response.json())

@patch.dict(os.environ, EXAMPLE_ENV_VAR1, clear=True)
def test_validate_csv_xlsx_empty_rows(self):
"""Tests that empty rows are ignored from valid csv and xlsx files."""
for file_path in [SAMPLE_CSV_EMPTY_ROWS, SAMPLE_XLSX_EMPTY_ROWS]:
with TestClient(app) as client:
with open(file_path, "rb") as f:
files = {
"file": f,
}
response = client.post(
url="/api/validate_csv", files=files
)
expected_jobs = [
j.model_dump_json() for j in self.expected_job_configs
]
expected_response = {
"message": "Valid Data",
"data": {"jobs": expected_jobs, "errors": []},
}
self.assertEqual(response.status_code, 200)
self.assertEqual(expected_response, response.json())

@patch.dict(os.environ, EXAMPLE_ENV_VAR1, clear=True)
@patch("aind_data_transfer_service.server.sleep", return_value=None)
@patch("aind_data_transfer_service.hpc.client.HpcClient.submit_job")
Expand Down
Loading