Harmonized fields headers in the spreadsheet

precisiontox · Aug 1, 2023 · 0a01169 · 0a01169
1 parent 6be1f0e
commit 0a01169
Show file tree

Hide file tree

Showing 15 changed files with 48 additions and 47 deletions.
diff --git a/ptmd/api/queries/samples/core.py b/ptmd/api/queries/samples/core.py
@@ -78,7 +78,7 @@ def get_data(self) -> tuple:
  exposure_info: DataFrame = file.parse("Exposure information").replace({nan: None}).replace({"NA": None})
  general_info["exposure_batch_startdate"] = general_info["exposure_batch_startdate"].astype(str)
  general_info["exposure_batch_enddate"] = general_info["exposure_batch_enddate"].astype(str)
- exposure_info["Shipment identifier"] = exposure_info["Shipment identifier"].astype(str)
+ exposure_info["Shipment_identifier"] = exposure_info["Shipment_identifier"].astype(str)
  return {
  "general_info": general_info.to_dict(orient='records')[0],
  "exposure_info": exposure_info.to_dict(orient='records')
@@ -87,7 +87,7 @@ def get_data(self) -> tuple:
  def save_samples(self) -> None:
  """ Save the samples to the database. """
  for sample_data in self.data["exposure_info"]:
- sample_id: str = sample_data["PrecisionTox short identifier"]
+ sample_id: str = sample_data["PrecisionTox_short_identifier"]
  compound_name: str = sample_data["compound_name"]
 
  if compound_name not in self.compounds:

diff --git a/ptmd/const/spreadsheets.py b/ptmd/const/spreadsheets.py
@@ -38,8 +38,7 @@
 }
 SAMPLE_SHEET_EMPTY_COLUMNS: list[str] = [
  "sampleID_label",
- "Shipment identifier",
- "Label tube / identifier",
+ "Shipment_identifier",
  "box_id",
  "exposure_route",
  "operator",
@@ -48,8 +47,8 @@
  "collection_order",
  "box_row",
  "box_column",
- "Mass including tube (mg)",
- "Mass excluding tube (mg)",
+ "Mass_including_tube_(mg)",
+ "Mass_excluding_tube_(mg)",
  "observations_notes"
 ]
 SAMPLE_SHEET_COLUMNS: list[str] = [
@@ -58,13 +57,13 @@
  "compound_name",
  "dose_code",
  "timepoint_level",
- "timepoint (hours)",
- "PrecisionTox short identifier"
+ "timepoint_(hours)",
+ "PrecisionTox_short_identifier"
 ]
 GENERAL_SHEET_COLUMNS: list[str] = [
  "partner_id",
  "biosystem_name",
- "exposure batch",
+ "exposure_batch",
  "control",
  "replicates",
  "blanks",

diff --git a/ptmd/lib/data_extractor/core.py b/ptmd/lib/data_extractor/core.py
@@ -29,6 +29,6 @@ def extract_data_from_spreadsheet(filepath: str) -> dict | None:
  'timepoints': create_timepoints_hours(timepoints_values),
  'chemicals': get_chemicals_from_name(list(exposure_information['compound_name'].unique())),
  'organism_name': general_information['biosystem_name'],
- 'batch': general_information['exposure batch'],
+ 'batch': general_information['exposure_batch'],
  'organisation_name': general_information['partner_id'],
  }
diff --git a/ptmd/lib/excel/styles.py b/ptmd/lib/excel/styles.py
@@ -33,15 +33,15 @@ def style_sample_sheet(writer: ExcelWriter) -> None:
  worksheet.set_row(0, 50, cell_format=header_format)
 
  worksheet.set_column('A:N', 20, cell_format=empty_cells_format)
- worksheet.set_column('D:D', 15, cell_format=empty_cells_format)
- worksheet.set_column('J:K', 15, cell_format=empty_cells_format)
- worksheet.set_column('G:H', 25, cell_format=empty_cells_format)
- worksheet.set_column('N:N', 25, cell_format=empty_cells_format)
-
- worksheet.set_column('O:O', 15, cell_format=extra_cells_format)
- worksheet.set_column('P:P', 25, cell_format=extra_cells_format)
- worksheet.set_column('Q:S', 15, cell_format=extra_cells_format)
- worksheet.set_column('T:T', 30, cell_format=extra_cells_format)
+ worksheet.set_column('C:C', 15, cell_format=empty_cells_format)
+ worksheet.set_column('I:J', 15, cell_format=empty_cells_format)
+ worksheet.set_column('F:G', 25, cell_format=empty_cells_format)
+ worksheet.set_column('M:M', 25, cell_format=empty_cells_format)
+
+ worksheet.set_column('N:N', 15, cell_format=extra_cells_format)
+ worksheet.set_column('O:O', 25, cell_format=extra_cells_format)
+ worksheet.set_column('P:R', 15, cell_format=extra_cells_format)
+ worksheet.set_column('S:S', 30, cell_format=extra_cells_format)
 
 
 def style_general_sheet(writer: ExcelWriter) -> None:

diff --git a/ptmd/lib/isa/core.py b/ptmd/lib/isa/core.py
@@ -123,7 +123,7 @@ def create_parameter_values(self, values: dict) -> list[ParameterValue]:
  return [
  ParameterValue(category=self.protocol_parameters["collection_order"],
  value=str(values['collection_order'])),
- ParameterValue(category=self.protocol_parameters["exposure batch"], value=values['exposure batch']),
+ ParameterValue(category=self.protocol_parameters["exposure_batch"], value=values['exposure_batch']),
  ParameterValue(category=self.protocol_parameters["exposure_route"], value=values['exposure_route']),
  ParameterValue(category=self.protocol_parameters["operator"], value=values['operator'])
  ]
@@ -182,7 +182,7 @@ def create_samples(self, study: Study) -> None:
 
  parameter_values: dict = {
  "collection_order": sample_info['collection_order'],
- "exposure batch": self.data['general_info']['batch'],
+ "exposure_batch": self.data['general_info']['batch'],
  "exposure_route": sample_info['exposure_route'],
  "operator": sample_info['operator'],
  }

diff --git a/ptmd/lib/isa/ontologies.py b/ptmd/lib/isa/ontologies.py
@@ -113,4 +113,4 @@
  'Daphnia_magna': OntologyAnnotation(term_source=NCBI_TAXON, term='Daphnia magna', term_accession='NCBITaxon:35525'),
  'Drosophila_melanogaster': DROSOPHILA_OA,
 }
-TREATMENT_PARAMETERS = ["collection_order", "exposure batch", "exposure_route", "operator"]
+TREATMENT_PARAMETERS = ["collection_order", "exposure_batch", "exposure_route", "operator"]
diff --git a/ptmd/lib/validator/const.py b/ptmd/lib/validator/const.py
@@ -3,8 +3,8 @@
 @author: D. Batista (Terazus)
 """
 
-PTX_ID_LABEL: str = "PrecisionTox short identifier"
-BATCH_LABEL: str = "exposure batch"
+PTX_ID_LABEL: str = "PrecisionTox_short_identifier"
+BATCH_LABEL: str = "exposure_batch"
 COMPOUND_LABEL: str = "compound_name"
 DOSE_LABEL: str = "dose_code"
 TIMEPOINT_LABEL: str = "timepoint_level"
diff --git a/ptmd/lib/validator/core.py b/ptmd/lib/validator/core.py
@@ -193,7 +193,7 @@ def add_node(self, node: dict) -> None:
  label: str = node['label']
  compound_name: str = node['data'].get('compound_name', None)
  replicate: int = node['data'].get('replicate', None)
- timepoint: int = node['data'].get('timepoint (hours)', None)
+ timepoint: int = node['data'].get('timepoint_(hours)', None)
  dose: int = node['data'].get('dose_code', None)
 
  if compound_name:
@@ -203,13 +203,13 @@ def add_node(self, node: dict) -> None:
 
  if timepoint not in self.timepoints and compound_name != 'EXTRACTION BLANK':
  message = f"Timepoint {timepoint} is not in the list of timepoints {self.timepoints}."
- self.validator.add_error(label, message, 'timepoint (hours)')
+ self.validator.add_error(label, message, 'timepoint_(hours)')
 
  if compound_name == 'EXTRACTION BLANK':
  self.extraction_blanks += 1
  if timepoint != 0:
  message = "Extraction blank must have a timepoint of 0."
- self.validator.add_error(label, message, 'timepoint (hours)')
+ self.validator.add_error(label, message, 'timepoint_(hours)')
 
  if compound_name in self.controls_keys:
  if dose != 0:

diff --git a/ptmd/resources/schemas/exposure_information_sheet_schema.json b/ptmd/resources/schemas/exposure_information_sheet_schema.json
@@ -46,7 +46,7 @@
  "type": "string",
  "description": "The operator."
  },
- "PrecisionTox short identifier": {
+ "PrecisionTox_short_identifier": {
  "type": "string",
  "description": "The PrecisionTox short identifier.",
  "length": 9
@@ -66,7 +66,7 @@
  "description": "The timepoint level.",
  "pattern": "^TP[0-9]+$"
  },
- "timepoint (hours)": {
+ "timepoint_(hours)": {
  "type": "number",
  "description": "The timepoint in hours.",
  "minimum": 0
@@ -81,11 +81,11 @@
  "compound_name",
  "dose_code",
  "exposure_route",
- "PrecisionTox short identifier",
+ "PrecisionTox_short_identifier",
  "quantity_dead_during_exposure",
  "operator",
  "replicate",
  "timepoint_level",
- "timepoint (hours)"
+ "timepoint_(hours)"
  ]
 }
diff --git a/tests/test_api/test_queries/test_samples/test_core.py b/tests/test_api/test_queries/test_samples/test_core.py
@@ -9,11 +9,11 @@
 HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer 123'}
 SAMPLES = [
  {
- "PrecisionTox short identifier": "A",
+ "PrecisionTox_short_identifier": "A",
  "compound_name": "test",
  },
  {
- "PrecisionTox short identifier": "B",
+ "PrecisionTox_short_identifier": "B",
  "compound_name": "CONTROL_test1",
  }
 ]

diff --git a/tests/test_lib/test_creator/test_creator.py b/tests/test_lib/test_creator/test_creator.py
@@ -69,7 +69,7 @@ def test_success(self, mock_chemical_mapping, mock_organism_code, mock_allowed_o
  data['exposure'] = [{"chemicals": ["chemical1", "chemical1", "chemical2"], "dose": 0}]
  creator: DataframeCreator = DataframeCreator(data)
  df1, df2 = creator.to_dataframe()
- self.assertEqual(df1.shape, (25, 20))
+ self.assertEqual(df1.shape, (25, 19))
  self.assertEqual(df2.shape, (1, 10))
 
  output_path = path.join(HERE, "..", "..", "test.xlsx")

diff --git a/tests/test_lib/test_data_extractor/test_core.py b/tests/test_lib/test_data_extractor/test_core.py
@@ -17,7 +17,7 @@ def test_extraction_valid(self, mock_tp, mock_chemicals, mock_excel):
  'compound_vehicle': 'DMSO',
  'timepoints': "[1, 2, 3]",
  'biosystem_name': 'H',
- 'exposure batch': "AA",
+ 'exposure_batch': "AA",
  'partner_id': "UOX"
  }]
  data = extract_data_from_spreadsheet('test.xlsx')

diff --git a/tests/test_lib/test_validator/test_core.py b/tests/test_lib/test_validator/test_core.py
@@ -62,13 +62,13 @@ def download_file(self, *args, **kwargs):
 mocked_session_error = MockSessionError()
 mock_exposure_dataframe = DataFrame(columns=SAMPLE_SHEET_COLUMNS)
 mock_exposure_series = Series([
- "qsd", "qsd", "qsd", "qsd", "qsd", "qsd", 12, 12, 1, "A", 1, None, None, None,
+ "qsd", "qsd", "qsd", "qsd", "qsd", 12, 12, 1, "A", 1, None, None, None,
  1, "Ethoprophos", "BMD10", "TP1", 4, "FAC002LA1"
 ], index=SAMPLE_SHEET_COLUMNS)
 mock_exposure_dataframe = concat([mock_exposure_dataframe, mock_exposure_series.to_frame().T],
  ignore_index=False, sort=False, copy=False)
 mock_exposure_series_error = Series([
- "qsd", "qsd", "qsd", "qsd", None, "qsd", 12, 12, 1, "A", 1, None, None, None,
+ "qsd", "qsd", "qsd", None, "qsd", 12, 12, 1, "A", 1, None, None, None,
  1, "Ethoprophos", "BMD10", "TP1", 4, "FAC002LA1"
 ], index=SAMPLE_SHEET_COLUMNS)
 mock_exposure_dataframe_error = concat([mock_exposure_dataframe, mock_exposure_series_error.to_frame().T],
@@ -150,7 +150,7 @@ def setUp(self) -> None:
  'data': {
  "compound_name": self.organism,
  "replicate": 1,
- "timepoint (hours)": 4
+ "timepoint_(hours)": 4
  },
  'label': 'CP1'
  }
@@ -166,7 +166,7 @@ def test_validate_errors_blanks(self):
  validator = MockValidator()
  self.general_information['blanks'] = 2
  blank_node: dict = deepcopy(self.default_node)
- blank_node['data']['timepoint (hours)'] = 8
+ blank_node['data']['timepoint_(hours)'] = 8
  blank_node['data']['compound_name'] = "EXTRACTION BLANK"
 
  graph = VerticalValidator(self.general_information, validator)
@@ -182,7 +182,7 @@ def test_validate_errors_blanks(self):
 
  def test_validate_timepoints_missing(self):
  validator = MockValidator()
- self.general_information['timepoints'] = [4, 8]
+ self.general_information['timepoints'] = [0, 1]
  graph = VerticalValidator(self.general_information, validator)
  graph.add_node(self.default_node)
  graph.validate()
@@ -193,7 +193,7 @@ def test_validate_timepoints_missing(self):
  def test_validate_timepoints_too_many(self):
  validator = MockValidator()
  extra_node: dict = {**self.default_node}
- extra_node['data']['timepoint (hours)'] = 8
+ extra_node['data']['timepoint_(hours)'] = 8
  self.general_information['timepoints'] = [4]
  graph = VerticalValidator(self.general_information, validator)
  graph.add_node(self.default_node)
@@ -210,6 +210,7 @@ def test_validate_replicate_missing(self):
  graph.add_node(self.default_node)
  graph.validate()
  self.assertFalse(validator.report['valid'])
+ print(validator.report['errors'])
  self.assertEqual(validator.report['errors'][self.organism][0]['message'],
  "Replicate 1 is missing 1 timespoints(s).")
 
@@ -234,6 +235,7 @@ def test_validate_controls_dose_not_zero(self):
  control_node: dict = deepcopy(self.default_node)
  control_node['data']['compound_name'] = "CONTROL (DMSO)"
  control_node['data']['dose_code'] = 1
+ control_node['data']['timepoint_(hours)'] = 4
 
  graph = VerticalValidator(self.general_information, validator)
  graph.add_node(control_node)

diff --git a/tests/test_lib/test_validator/test_validate_file.py b/tests/test_lib/test_validator/test_validate_file.py
@@ -87,13 +87,13 @@ def validate(self, *args, **kwargs):
 
 mock_exposure_dataframe = DataFrame(columns=SAMPLE_SHEET_COLUMNS)
 mock_exposure_series = Series([
- "qsd", "qsd", "qsd", "qsd", "qsd", "qsd", 12, 12, 1, "A", 1, None, None, None,
+ "qsd", "qsd", "qsd", "qsd", "qsd", 12, 12, 1, "A", 1, None, None, None,
  1, "Ethoprophos", "BMD10", "TP1", 4, "FAC002LA1"
 ], index=SAMPLE_SHEET_COLUMNS)
 mock_exposure_dataframe = concat([mock_exposure_dataframe, mock_exposure_series.to_frame().T],
  ignore_index=False, sort=False, copy=False)
 mock_exposure_series_error = Series([
- "qsd", "qsd", "qsd", "qsd", None, "qsd", 12, 12, 1, "A", 1, None, None, None,
+ "qsd", "qsd", "qsd", None, "qsd", 12, 12, 1, "A", 1, None, None, None,
  1, "Ethoprophos", "BMD10", "TP1", 4, "FAC002LA1"
 ], index=SAMPLE_SHEET_COLUMNS)
 mock_exposure_dataframe_error = concat([mock_exposure_dataframe, mock_exposure_series_error.to_frame().T],

diff --git a/tests/test_lib/test_validator/test_validate_identifier.py b/tests/test_lib/test_validator/test_validate_identifier.py
@@ -28,7 +28,7 @@ def __init__(self):
  'label': 'test'
  }
  self.general_info: dict = {
- 'exposure batch': 'AC',
+ 'exposure_batch': 'AC',
  'biosystem_name': 'test',
  'dose_code': 'BMD10',
  'compound_name': 'Compound 1',
@@ -52,7 +52,7 @@ def test_validate_identifier_failure(self):
  self.assertFalse(validator.report['valid'])
  expected_error = {
  'message': 'Record at line 2 (FAC002LA1) is duplicated with record at line 3',
- 'field_concerned': 'PrecisionTox short identifier'
+ 'field_concerned': 'PrecisionTox_short_identifier'
  }
  self.assertEqual(validator.report['errors']['test'][0], expected_error)
 
@@ -93,11 +93,11 @@ def test_validate_species_error_unknown(self):
 
  def test_validate_batch_error_wrong_batch_general_info(self):
  validator = ExcelValidatorMock()
- validator.general_info['exposure batch'] = 'ABC'
+ validator.general_info['exposure_batch'] = 'ABC'
  validate_batch(validator)
  self.assertFalse(validator.report['valid'])
  self.assertEqual(validator.report['errors']['test'][0]['message'], "The batch 'ABC' is not valid.")
- self.assertEqual(validator.report['errors']['test'][0]['field_concerned'], 'exposure batch')
+ self.assertEqual(validator.report['errors']['test'][0]['field_concerned'], 'exposure_batch')
 
  def test_validate_batch_error_wrong_batch_identifier(self):
  validator = ExcelValidatorMock()