diff --git a/src/aind_data_transfer_service/__init__.py b/src/aind_data_transfer_service/__init__.py index 2715595..efa0c96 100644 --- a/src/aind_data_transfer_service/__init__.py +++ b/src/aind_data_transfer_service/__init__.py @@ -1,7 +1,7 @@ """Init package""" import os -__version__ = "0.9.1" +__version__ = "0.9.5" # Global constants OPEN_DATA_BUCKET_NAME = os.getenv("OPEN_DATA_BUCKET_NAME", "open") diff --git a/src/aind_data_transfer_service/configs/job_upload_template.py b/src/aind_data_transfer_service/configs/job_upload_template.py index 867721b..f49c946 100644 --- a/src/aind_data_transfer_service/configs/job_upload_template.py +++ b/src/aind_data_transfer_service/configs/job_upload_template.py @@ -2,7 +2,8 @@ import datetime from io import BytesIO -from aind_data_schema.core.data_description import Modality, Platform +from aind_data_schema.models.modalities import Modality +from aind_data_schema.models.platforms import Platform from openpyxl import Workbook from openpyxl.styles import Font from openpyxl.utils import get_column_letter @@ -14,10 +15,13 @@ class JobUploadTemplate: """Class to configure and create xlsx job upload template""" FILE_NAME = "job_upload_template.xlsx" + NUM_TEMPLATE_ROWS = 20 + XLSX_DATETIME_FORMAT = "YYYY-MM-DDTHH:mm:ss" HEADERS = [ "platform", "acq_datetime", "subject_id", + "metadata_dir", "modality0", "modality0.source", "modality1", @@ -28,6 +32,7 @@ class JobUploadTemplate: Platform.BEHAVIOR.abbreviation, datetime.datetime(2023, 10, 4, 4, 0, 0), "123456", + "/allen/aind/stage/fake/metadata_dir", Modality.BEHAVIOR_VIDEOS.abbreviation, "/allen/aind/stage/fake/dir", Modality.BEHAVIOR.abbreviation, @@ -37,6 +42,7 @@ class JobUploadTemplate: Platform.SMARTSPIM.abbreviation, datetime.datetime(2023, 3, 4, 16, 30, 0), "654321", + "/allen/aind/stage/fake/Config", Modality.SPIM.abbreviation, "/allen/aind/stage/fake/dir", ], @@ -44,6 +50,7 @@ class JobUploadTemplate: Platform.ECEPHYS.abbreviation, datetime.datetime(2023, 1, 30, 19, 1, 0), "654321", + None, Modality.ECEPHYS.abbreviation, "/allen/aind/stage/fake/dir", Modality.BEHAVIOR_VIDEOS.abbreviation, @@ -53,13 +60,23 @@ class JobUploadTemplate: VALIDATORS = [ { "name": "platform", + "type": "list", "options": [p().abbreviation for p in Platform._ALL], - "ranges": ["A2:A20"], + "column_indexes": [HEADERS.index("platform")], }, { "name": "modality", + "type": "list", "options": [m().abbreviation for m in Modality._ALL], - "ranges": ["E2:E20", "G2:G20"], + "column_indexes": [ + HEADERS.index("modality0"), + HEADERS.index("modality1"), + ], + }, + { + "name": "datetime", + "type": "date", + "column_indexes": [HEADERS.index("acq_datetime")], }, ] @@ -69,33 +86,47 @@ def create_job_template(): # job template xl_io = BytesIO() workbook = Workbook() + workbook.iso_dates = True worksheet = workbook.active worksheet.append(JobUploadTemplate.HEADERS) for job in JobUploadTemplate.SAMPLE_JOBS: worksheet.append(job) # data validators for validator in JobUploadTemplate.VALIDATORS: - dv = DataValidation( - type="list", - formula1=f'"{(",").join(validator["options"])}"', - allow_blank=True, - showErrorMessage=True, - showInputMessage=True, - ) - dv.promptTitle = validator["name"] - dv.prompt = f'Select a {validator["name"]} from the dropdown' - dv.error = f'Invalid {validator["name"]}.' - for r in validator["ranges"]: - dv.add(r) + dv_type = validator["type"] + dv_name = validator["name"] + dv_params = { + "type": dv_type, + "promptTitle": dv_name, + "error": f"Invalid {dv_name}.", + "allow_blank": True, + "showErrorMessage": True, + "showInputMessage": True, + } + if dv_type == "list": + dv_params["formula1"] = f'"{(",").join(validator["options"])}"' + dv_params["prompt"] = f"Select a {dv_name} from the dropdown" + elif dv_type == "date": + dv_params["prompt"] = "Provide a {} using {}".format( + dv_name, JobUploadTemplate.XLSX_DATETIME_FORMAT + ) + dv = DataValidation(**dv_params) + for i in validator["column_indexes"]: + col = get_column_letter(i + 1) + col_range = ( + f"{col}2:{col}{JobUploadTemplate.NUM_TEMPLATE_ROWS}" + ) + dv.add(col_range) + if dv_type != "date": + continue + for (cell,) in worksheet[col_range]: + cell.number_format = JobUploadTemplate.XLSX_DATETIME_FORMAT worksheet.add_data_validation(dv) # formatting bold = Font(bold=True) - for header in worksheet["A1:G1"]: - for cell in header: - cell.font = bold - worksheet.column_dimensions[ - get_column_letter(cell.column) - ].auto_size = True + for cell in worksheet[1]: + cell.font = bold + worksheet.column_dimensions[cell.column_letter].auto_size = True # save file workbook.save(xl_io) workbook.close() diff --git a/src/aind_data_transfer_service/server.py b/src/aind_data_transfer_service/server.py index e0c535e..26c871d 100644 --- a/src/aind_data_transfer_service/server.py +++ b/src/aind_data_transfer_service/server.py @@ -68,12 +68,15 @@ async def validate_csv(request: Request): xlsx_sheet = xlsx_book.active csv_io = io.StringIO() csv_writer = csv.writer(csv_io) - for r in xlsx_sheet.rows: - csv_writer.writerow([cell.value for cell in r]) + for r in xlsx_sheet.iter_rows(values_only=True): + if any(r): + csv_writer.writerow(r) xlsx_book.close() data = csv_io.getvalue() csv_reader = csv.DictReader(io.StringIO(data)) for row in csv_reader: + if not any(row.values()): + continue try: job = BasicUploadJobConfigs.from_csv_row(row=row) # Construct hpc job setting most of the vars from the env diff --git a/src/aind_data_transfer_service/templates/index.html b/src/aind_data_transfer_service/templates/index.html index 4a5b3ac..fcb07ac 100644 --- a/src/aind_data_transfer_service/templates/index.html +++ b/src/aind_data_transfer_service/templates/index.html @@ -142,7 +142,7 @@

Submit Jobs

let jobsLength = jobs.length; var table = document.createElement('table'), tr, td, row; addTableRow( - [ "s3_bucket", "platform", "subject_id", "acq_datetime", "modality", "modality.source" ], + [ "s3_bucket", "platform", "subject_id", "acq_datetime", "metadata_dir", "modality", "modality.source" ], table, tr, td, true ); for (row = 0; row < jobsLength; row++) { @@ -154,6 +154,7 @@

Submit Jobs

{ value: job.platform.abbreviation, rowspan: modalitiesLength }, { value: job.subject_id, rowspan: modalitiesLength }, { value: job.acq_datetime, rowspan: modalitiesLength }, + { value: job.metadata_dir ?? "", rowspan: modalitiesLength }, modalities ? modalities[0].modality.abbreviation : "", modalities ? modalities[0].source : "" ], table, tr, td, false diff --git a/tests/resources/job_upload_template.xlsx b/tests/resources/job_upload_template.xlsx index 227cf26..5c0a875 100644 Binary files a/tests/resources/job_upload_template.xlsx and b/tests/resources/job_upload_template.xlsx differ diff --git a/tests/resources/sample_empty_rows.csv b/tests/resources/sample_empty_rows.csv new file mode 100644 index 0000000..0dadca7 --- /dev/null +++ b/tests/resources/sample_empty_rows.csv @@ -0,0 +1,7 @@ +modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime +ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10 +BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM +BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM +,,,,,, +,,,,,, +,,,,,, diff --git a/tests/resources/sample_empty_rows.xlsx b/tests/resources/sample_empty_rows.xlsx new file mode 100644 index 0000000..8c3c02e Binary files /dev/null and b/tests/resources/sample_empty_rows.xlsx differ diff --git a/tests/test_job_upload_template.py b/tests/test_job_upload_template.py index 0751c06..11c52b3 100644 --- a/tests/test_job_upload_template.py +++ b/tests/test_job_upload_template.py @@ -5,6 +5,7 @@ from pathlib import Path from openpyxl import load_workbook +from openpyxl.utils import range_boundaries from aind_data_transfer_service.configs.job_upload_template import ( JobUploadTemplate, @@ -22,19 +23,19 @@ def read_xl_helper(self, source, return_validators=False): lines = [] workbook = load_workbook(source, read_only=(not return_validators)) worksheet = workbook.active - for row in worksheet.rows: - row_contents = [cell.value for cell in row] - lines.append(row_contents) + for row in worksheet.iter_rows(values_only=True): + lines.append(row) if any(row) else None if return_validators: validators = [] for dv in worksheet.data_validations.dataValidation: - validators.append( - { - "name": dv.promptTitle, - "options": dv.formula1.strip('"').split(","), - "ranges": str(dv.cells).split(" "), - } - ) + validator = { + "name": dv.promptTitle, + "type": dv.type, + "ranges": str(dv.cells).split(" "), + } + if dv.type == "list": + validator["options"] = dv.formula1.strip('"').split(",") + validators.append(validator) result = (lines, validators) else: result = lines @@ -49,6 +50,15 @@ def test_create_job_template(self): JobUploadTemplate.create_job_template(), True ) self.assertEqual(expected_lines, template_lines) + for validator in template_validators: + validator["column_indexes"] = [] + for r in validator["ranges"]: + rb = (col, *_) = range_boundaries(r) + self.assertTupleEqual( + (col, 2, col, JobUploadTemplate.NUM_TEMPLATE_ROWS), rb + ) + validator["column_indexes"].append(col - 1) + del validator["ranges"] self.assertCountEqual( JobUploadTemplate.VALIDATORS, template_validators ) diff --git a/tests/test_server.py b/tests/test_server.py index d2a774e..6d75725 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -18,8 +18,12 @@ TEST_DIRECTORY = Path(os.path.dirname(os.path.realpath(__file__))) SAMPLE_INVALID_EXT = TEST_DIRECTORY / "resources" / "sample_invalid_ext.txt" SAMPLE_CSV = TEST_DIRECTORY / "resources" / "sample.csv" +SAMPLE_CSV_EMPTY_ROWS = TEST_DIRECTORY / "resources" / "sample_empty_rows.csv" MALFORMED_SAMPLE_CSV = TEST_DIRECTORY / "resources" / "sample_malformed.csv" SAMPLE_XLSX = TEST_DIRECTORY / "resources" / "sample.xlsx" +SAMPLE_XLSX_EMPTY_ROWS = ( + TEST_DIRECTORY / "resources" / "sample_empty_rows.xlsx" +) MALFORMED_SAMPLE_XLSX = TEST_DIRECTORY / "resources" / "sample_malformed.xlsx" MOCK_DB_FILE = TEST_DIRECTORY / "test_server" / "db.json" @@ -95,6 +99,28 @@ def test_validate_csv_xlsx(self): self.assertEqual(response.status_code, 200) self.assertEqual(expected_response, response.json()) + @patch.dict(os.environ, EXAMPLE_ENV_VAR1, clear=True) + def test_validate_csv_xlsx_empty_rows(self): + """Tests that empty rows are ignored from valid csv and xlsx files.""" + for file_path in [SAMPLE_CSV_EMPTY_ROWS, SAMPLE_XLSX_EMPTY_ROWS]: + with TestClient(app) as client: + with open(file_path, "rb") as f: + files = { + "file": f, + } + response = client.post( + url="/api/validate_csv", files=files + ) + expected_jobs = [ + j.model_dump_json() for j in self.expected_job_configs + ] + expected_response = { + "message": "Valid Data", + "data": {"jobs": expected_jobs, "errors": []}, + } + self.assertEqual(response.status_code, 200) + self.assertEqual(expected_response, response.json()) + @patch.dict(os.environ, EXAMPLE_ENV_VAR1, clear=True) @patch("aind_data_transfer_service.server.sleep", return_value=None) @patch("aind_data_transfer_service.hpc.client.HpcClient.submit_job")