Skip to content

Commit

Permalink
Harmonized fields headers in the spreadsheet
Browse files Browse the repository at this point in the history
  • Loading branch information
terazus committed Aug 1, 2023
1 parent 6be1f0e commit 0a01169
Show file tree
Hide file tree
Showing 15 changed files with 48 additions and 47 deletions.
4 changes: 2 additions & 2 deletions ptmd/api/queries/samples/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def get_data(self) -> tuple:
exposure_info: DataFrame = file.parse("Exposure information").replace({nan: None}).replace({"NA": None})
general_info["exposure_batch_startdate"] = general_info["exposure_batch_startdate"].astype(str)
general_info["exposure_batch_enddate"] = general_info["exposure_batch_enddate"].astype(str)
exposure_info["Shipment identifier"] = exposure_info["Shipment identifier"].astype(str)
exposure_info["Shipment_identifier"] = exposure_info["Shipment_identifier"].astype(str)
return {
"general_info": general_info.to_dict(orient='records')[0],
"exposure_info": exposure_info.to_dict(orient='records')
Expand All @@ -87,7 +87,7 @@ def get_data(self) -> tuple:
def save_samples(self) -> None:
""" Save the samples to the database. """
for sample_data in self.data["exposure_info"]:
sample_id: str = sample_data["PrecisionTox short identifier"]
sample_id: str = sample_data["PrecisionTox_short_identifier"]
compound_name: str = sample_data["compound_name"]

if compound_name not in self.compounds:
Expand Down
13 changes: 6 additions & 7 deletions ptmd/const/spreadsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@
}
SAMPLE_SHEET_EMPTY_COLUMNS: list[str] = [
"sampleID_label",
"Shipment identifier",
"Label tube / identifier",
"Shipment_identifier",
"box_id",
"exposure_route",
"operator",
Expand All @@ -48,8 +47,8 @@
"collection_order",
"box_row",
"box_column",
"Mass including tube (mg)",
"Mass excluding tube (mg)",
"Mass_including_tube_(mg)",
"Mass_excluding_tube_(mg)",
"observations_notes"
]
SAMPLE_SHEET_COLUMNS: list[str] = [
Expand All @@ -58,13 +57,13 @@
"compound_name",
"dose_code",
"timepoint_level",
"timepoint (hours)",
"PrecisionTox short identifier"
"timepoint_(hours)",
"PrecisionTox_short_identifier"
]
GENERAL_SHEET_COLUMNS: list[str] = [
"partner_id",
"biosystem_name",
"exposure batch",
"exposure_batch",
"control",
"replicates",
"blanks",
Expand Down
2 changes: 1 addition & 1 deletion ptmd/lib/data_extractor/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ def extract_data_from_spreadsheet(filepath: str) -> dict | None:
'timepoints': create_timepoints_hours(timepoints_values),
'chemicals': get_chemicals_from_name(list(exposure_information['compound_name'].unique())),
'organism_name': general_information['biosystem_name'],
'batch': general_information['exposure batch'],
'batch': general_information['exposure_batch'],
'organisation_name': general_information['partner_id'],
}
18 changes: 9 additions & 9 deletions ptmd/lib/excel/styles.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ def style_sample_sheet(writer: ExcelWriter) -> None:
worksheet.set_row(0, 50, cell_format=header_format)

worksheet.set_column('A:N', 20, cell_format=empty_cells_format)
worksheet.set_column('D:D', 15, cell_format=empty_cells_format)
worksheet.set_column('J:K', 15, cell_format=empty_cells_format)
worksheet.set_column('G:H', 25, cell_format=empty_cells_format)
worksheet.set_column('N:N', 25, cell_format=empty_cells_format)

worksheet.set_column('O:O', 15, cell_format=extra_cells_format)
worksheet.set_column('P:P', 25, cell_format=extra_cells_format)
worksheet.set_column('Q:S', 15, cell_format=extra_cells_format)
worksheet.set_column('T:T', 30, cell_format=extra_cells_format)
worksheet.set_column('C:C', 15, cell_format=empty_cells_format)
worksheet.set_column('I:J', 15, cell_format=empty_cells_format)
worksheet.set_column('F:G', 25, cell_format=empty_cells_format)
worksheet.set_column('M:M', 25, cell_format=empty_cells_format)

worksheet.set_column('N:N', 15, cell_format=extra_cells_format)
worksheet.set_column('O:O', 25, cell_format=extra_cells_format)
worksheet.set_column('P:R', 15, cell_format=extra_cells_format)
worksheet.set_column('S:S', 30, cell_format=extra_cells_format)


def style_general_sheet(writer: ExcelWriter) -> None:
Expand Down
4 changes: 2 additions & 2 deletions ptmd/lib/isa/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def create_parameter_values(self, values: dict) -> list[ParameterValue]:
return [
ParameterValue(category=self.protocol_parameters["collection_order"],
value=str(values['collection_order'])),
ParameterValue(category=self.protocol_parameters["exposure batch"], value=values['exposure batch']),
ParameterValue(category=self.protocol_parameters["exposure_batch"], value=values['exposure_batch']),
ParameterValue(category=self.protocol_parameters["exposure_route"], value=values['exposure_route']),
ParameterValue(category=self.protocol_parameters["operator"], value=values['operator'])
]
Expand Down Expand Up @@ -182,7 +182,7 @@ def create_samples(self, study: Study) -> None:

parameter_values: dict = {
"collection_order": sample_info['collection_order'],
"exposure batch": self.data['general_info']['batch'],
"exposure_batch": self.data['general_info']['batch'],
"exposure_route": sample_info['exposure_route'],
"operator": sample_info['operator'],
}
Expand Down
2 changes: 1 addition & 1 deletion ptmd/lib/isa/ontologies.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,4 @@
'Daphnia_magna': OntologyAnnotation(term_source=NCBI_TAXON, term='Daphnia magna', term_accession='NCBITaxon:35525'),
'Drosophila_melanogaster': DROSOPHILA_OA,
}
TREATMENT_PARAMETERS = ["collection_order", "exposure batch", "exposure_route", "operator"]
TREATMENT_PARAMETERS = ["collection_order", "exposure_batch", "exposure_route", "operator"]
4 changes: 2 additions & 2 deletions ptmd/lib/validator/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
@author: D. Batista (Terazus)
"""

PTX_ID_LABEL: str = "PrecisionTox short identifier"
BATCH_LABEL: str = "exposure batch"
PTX_ID_LABEL: str = "PrecisionTox_short_identifier"
BATCH_LABEL: str = "exposure_batch"
COMPOUND_LABEL: str = "compound_name"
DOSE_LABEL: str = "dose_code"
TIMEPOINT_LABEL: str = "timepoint_level"
6 changes: 3 additions & 3 deletions ptmd/lib/validator/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def add_node(self, node: dict) -> None:
label: str = node['label']
compound_name: str = node['data'].get('compound_name', None)
replicate: int = node['data'].get('replicate', None)
timepoint: int = node['data'].get('timepoint (hours)', None)
timepoint: int = node['data'].get('timepoint_(hours)', None)
dose: int = node['data'].get('dose_code', None)

if compound_name:
Expand All @@ -203,13 +203,13 @@ def add_node(self, node: dict) -> None:

if timepoint not in self.timepoints and compound_name != 'EXTRACTION BLANK':
message = f"Timepoint {timepoint} is not in the list of timepoints {self.timepoints}."
self.validator.add_error(label, message, 'timepoint (hours)')
self.validator.add_error(label, message, 'timepoint_(hours)')

if compound_name == 'EXTRACTION BLANK':
self.extraction_blanks += 1
if timepoint != 0:
message = "Extraction blank must have a timepoint of 0."
self.validator.add_error(label, message, 'timepoint (hours)')
self.validator.add_error(label, message, 'timepoint_(hours)')

if compound_name in self.controls_keys:
if dose != 0:
Expand Down
8 changes: 4 additions & 4 deletions ptmd/resources/schemas/exposure_information_sheet_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"type": "string",
"description": "The operator."
},
"PrecisionTox short identifier": {
"PrecisionTox_short_identifier": {
"type": "string",
"description": "The PrecisionTox short identifier.",
"length": 9
Expand All @@ -66,7 +66,7 @@
"description": "The timepoint level.",
"pattern": "^TP[0-9]+$"
},
"timepoint (hours)": {
"timepoint_(hours)": {
"type": "number",
"description": "The timepoint in hours.",
"minimum": 0
Expand All @@ -81,11 +81,11 @@
"compound_name",
"dose_code",
"exposure_route",
"PrecisionTox short identifier",
"PrecisionTox_short_identifier",
"quantity_dead_during_exposure",
"operator",
"replicate",
"timepoint_level",
"timepoint (hours)"
"timepoint_(hours)"
]
}
4 changes: 2 additions & 2 deletions tests/test_api/test_queries/test_samples/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer 123'}
SAMPLES = [
{
"PrecisionTox short identifier": "A",
"PrecisionTox_short_identifier": "A",
"compound_name": "test",
},
{
"PrecisionTox short identifier": "B",
"PrecisionTox_short_identifier": "B",
"compound_name": "CONTROL_test1",
}
]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_lib/test_creator/test_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_success(self, mock_chemical_mapping, mock_organism_code, mock_allowed_o
data['exposure'] = [{"chemicals": ["chemical1", "chemical1", "chemical2"], "dose": 0}]
creator: DataframeCreator = DataframeCreator(data)
df1, df2 = creator.to_dataframe()
self.assertEqual(df1.shape, (25, 20))
self.assertEqual(df1.shape, (25, 19))
self.assertEqual(df2.shape, (1, 10))

output_path = path.join(HERE, "..", "..", "test.xlsx")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_lib/test_data_extractor/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_extraction_valid(self, mock_tp, mock_chemicals, mock_excel):
'compound_vehicle': 'DMSO',
'timepoints': "[1, 2, 3]",
'biosystem_name': 'H',
'exposure batch': "AA",
'exposure_batch': "AA",
'partner_id': "UOX"
}]
data = extract_data_from_spreadsheet('test.xlsx')
Expand Down
14 changes: 8 additions & 6 deletions tests/test_lib/test_validator/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ def download_file(self, *args, **kwargs):
mocked_session_error = MockSessionError()
mock_exposure_dataframe = DataFrame(columns=SAMPLE_SHEET_COLUMNS)
mock_exposure_series = Series([
"qsd", "qsd", "qsd", "qsd", "qsd", "qsd", 12, 12, 1, "A", 1, None, None, None,
"qsd", "qsd", "qsd", "qsd", "qsd", 12, 12, 1, "A", 1, None, None, None,
1, "Ethoprophos", "BMD10", "TP1", 4, "FAC002LA1"
], index=SAMPLE_SHEET_COLUMNS)
mock_exposure_dataframe = concat([mock_exposure_dataframe, mock_exposure_series.to_frame().T],
ignore_index=False, sort=False, copy=False)
mock_exposure_series_error = Series([
"qsd", "qsd", "qsd", "qsd", None, "qsd", 12, 12, 1, "A", 1, None, None, None,
"qsd", "qsd", "qsd", None, "qsd", 12, 12, 1, "A", 1, None, None, None,
1, "Ethoprophos", "BMD10", "TP1", 4, "FAC002LA1"
], index=SAMPLE_SHEET_COLUMNS)
mock_exposure_dataframe_error = concat([mock_exposure_dataframe, mock_exposure_series_error.to_frame().T],
Expand Down Expand Up @@ -150,7 +150,7 @@ def setUp(self) -> None:
'data': {
"compound_name": self.organism,
"replicate": 1,
"timepoint (hours)": 4
"timepoint_(hours)": 4
},
'label': 'CP1'
}
Expand All @@ -166,7 +166,7 @@ def test_validate_errors_blanks(self):
validator = MockValidator()
self.general_information['blanks'] = 2
blank_node: dict = deepcopy(self.default_node)
blank_node['data']['timepoint (hours)'] = 8
blank_node['data']['timepoint_(hours)'] = 8
blank_node['data']['compound_name'] = "EXTRACTION BLANK"

graph = VerticalValidator(self.general_information, validator)
Expand All @@ -182,7 +182,7 @@ def test_validate_errors_blanks(self):

def test_validate_timepoints_missing(self):
validator = MockValidator()
self.general_information['timepoints'] = [4, 8]
self.general_information['timepoints'] = [0, 1]
graph = VerticalValidator(self.general_information, validator)
graph.add_node(self.default_node)
graph.validate()
Expand All @@ -193,7 +193,7 @@ def test_validate_timepoints_missing(self):
def test_validate_timepoints_too_many(self):
validator = MockValidator()
extra_node: dict = {**self.default_node}
extra_node['data']['timepoint (hours)'] = 8
extra_node['data']['timepoint_(hours)'] = 8
self.general_information['timepoints'] = [4]
graph = VerticalValidator(self.general_information, validator)
graph.add_node(self.default_node)
Expand All @@ -210,6 +210,7 @@ def test_validate_replicate_missing(self):
graph.add_node(self.default_node)
graph.validate()
self.assertFalse(validator.report['valid'])
print(validator.report['errors'])
self.assertEqual(validator.report['errors'][self.organism][0]['message'],
"Replicate 1 is missing 1 timespoints(s).")

Expand All @@ -234,6 +235,7 @@ def test_validate_controls_dose_not_zero(self):
control_node: dict = deepcopy(self.default_node)
control_node['data']['compound_name'] = "CONTROL (DMSO)"
control_node['data']['dose_code'] = 1
control_node['data']['timepoint_(hours)'] = 4

graph = VerticalValidator(self.general_information, validator)
graph.add_node(control_node)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_lib/test_validator/test_validate_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,13 @@ def validate(self, *args, **kwargs):

mock_exposure_dataframe = DataFrame(columns=SAMPLE_SHEET_COLUMNS)
mock_exposure_series = Series([
"qsd", "qsd", "qsd", "qsd", "qsd", "qsd", 12, 12, 1, "A", 1, None, None, None,
"qsd", "qsd", "qsd", "qsd", "qsd", 12, 12, 1, "A", 1, None, None, None,
1, "Ethoprophos", "BMD10", "TP1", 4, "FAC002LA1"
], index=SAMPLE_SHEET_COLUMNS)
mock_exposure_dataframe = concat([mock_exposure_dataframe, mock_exposure_series.to_frame().T],
ignore_index=False, sort=False, copy=False)
mock_exposure_series_error = Series([
"qsd", "qsd", "qsd", "qsd", None, "qsd", 12, 12, 1, "A", 1, None, None, None,
"qsd", "qsd", "qsd", None, "qsd", 12, 12, 1, "A", 1, None, None, None,
1, "Ethoprophos", "BMD10", "TP1", 4, "FAC002LA1"
], index=SAMPLE_SHEET_COLUMNS)
mock_exposure_dataframe_error = concat([mock_exposure_dataframe, mock_exposure_series_error.to_frame().T],
Expand Down
8 changes: 4 additions & 4 deletions tests/test_lib/test_validator/test_validate_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self):
'label': 'test'
}
self.general_info: dict = {
'exposure batch': 'AC',
'exposure_batch': 'AC',
'biosystem_name': 'test',
'dose_code': 'BMD10',
'compound_name': 'Compound 1',
Expand All @@ -52,7 +52,7 @@ def test_validate_identifier_failure(self):
self.assertFalse(validator.report['valid'])
expected_error = {
'message': 'Record at line 2 (FAC002LA1) is duplicated with record at line 3',
'field_concerned': 'PrecisionTox short identifier'
'field_concerned': 'PrecisionTox_short_identifier'
}
self.assertEqual(validator.report['errors']['test'][0], expected_error)

Expand Down Expand Up @@ -93,11 +93,11 @@ def test_validate_species_error_unknown(self):

def test_validate_batch_error_wrong_batch_general_info(self):
validator = ExcelValidatorMock()
validator.general_info['exposure batch'] = 'ABC'
validator.general_info['exposure_batch'] = 'ABC'
validate_batch(validator)
self.assertFalse(validator.report['valid'])
self.assertEqual(validator.report['errors']['test'][0]['message'], "The batch 'ABC' is not valid.")
self.assertEqual(validator.report['errors']['test'][0]['field_concerned'], 'exposure batch')
self.assertEqual(validator.report['errors']['test'][0]['field_concerned'], 'exposure_batch')

def test_validate_batch_error_wrong_batch_identifier(self):
validator = ExcelValidatorMock()
Expand Down

0 comments on commit 0a01169

Please sign in to comment.