diff --git a/README.md b/README.md index 145d59a..f819669 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ You can specify ENA sample checklist using the `--checklist` parameter. By defau The command line tool will automatically fetch the correct scientific name based on the taxon ID or fetch the taxon ID based on the scientific name. Both can be given and no overwrite will be done. - Mandatory: *alias*, *title*, *sample_description*, *collection date*, *geographic location (country and/or sea)* and either *scientific_name* or *taxon_id* (preferred) -- Optional: *common_name* +- Optional: *common_name*, *sample_description* | alias | title | taxon_id | scientific_name | common_name | sample_description | collection date | geographic location (country and/or sea) | |----------------|----------------|----------|-------------------------------------------------|-------------|----------------------|-----------------|------------------------------------------| @@ -133,13 +133,14 @@ Currently we refer to the [ENA Webin](https://wwwdev.ebi.ac.uk/ena/submit/webin/ | sample_alias | mandatory | Pick a sample to associate this experiment with. The sample may be an individual or a pool, depending on how it is specified. | | | design_description | mandatory | Goal and setup of the individual library including library was constructed. | | | spot_descriptor | optional | The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files). | | -| library_name | mandatory | The submitter's name for this library. | | +| library_name | optional | The submitter's name for this library. | | | library_layout | mandatory | LIBRARY_LAYOUT specifies whether to expect single, paired, or other configuration of reads. In the case of paired reads, information about the relative distance and orientation is specified. | yes | | insert_size | mandatory | Relative distance. | | | library_strategy | mandatory | Sequencing technique intended for this library | yes | | library_source | mandatory | The LIBRARY_SOURCE specifies the type of source material that is being sequenced. | yes | | library_selection | mandatory | Method used to enrich the target in the sequence library preparation | yes | | platform | mandatory | The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center. | yes | +| instrument_model | mandatory | Model of the sequencing instrument. | yes | | library_construction_protocol | optional | Free form text describing the protocol by which the sequencing library was constructed. | | diff --git a/ena_upload/_version.py b/ena_upload/_version.py index 63af887..364e7ba 100644 --- a/ena_upload/_version.py +++ b/ena_upload/_version.py @@ -1 +1 @@ -__version__ = "0.6.3" +__version__ = "0.6.4" diff --git a/ena_upload/ena_upload.py b/ena_upload/ena_upload.py index fa601e7..0e5f046 100755 --- a/ena_upload/ena_upload.py +++ b/ena_upload/ena_upload.py @@ -55,7 +55,7 @@ def create_dataframe(schema_tables, action, dev, auto_action): schema_dataframe = {} for schema, table in schema_tables.items(): - df = pd.read_csv(table, sep='\t', comment='#', dtype=str) + df = pd.read_csv(table, sep='\t', comment='#', dtype=str, na_values=["NA", "Na", "na", "NaN"]) df = df.dropna(how='all') df = check_columns(df, schema, action, dev, auto_action) schema_dataframe[schema] = df @@ -294,7 +294,7 @@ def run_construct(template_path, schema_targets, center, checklist, tool): template = templates[schema] Template = loader.load(template) stream = generate_stream(schema, targets, Template, center, tool) - + print(f"Constructing XML for '{schema}' schema") schema_xmls[schema] = construct_xml(schema, stream, xsds[schema]) return schema_xmls @@ -315,7 +315,7 @@ def construct_submission(template_path, action, submission_input, center, checkl :return submission_xml: filename of submission XML ''' - print("Constructing submission") + print(f"Constructing XML for submission schema") xsds, templates = actors(template_path, checklist) @@ -325,7 +325,6 @@ def construct_submission(template_path, action, submission_input, center, checkl stream = Template.generate(action=action, input=submission_input, center=center, tool_name=tool['tool_name'], tool_version=tool['tool_version']) - submission_xml = construct_xml('submission', stream, xsds['submission']) return submission_xml @@ -838,9 +837,9 @@ def main(): for schema in SCHEMA_TYPES: if schema in xl_workbook.book.sheetnames: - xl_sheet = xl_workbook.parse(schema, header=0) + xl_sheet = xl_workbook.parse(schema, header=0, na_values=["NA", "Na", "na", "NaN"]) elif f"ENA_{schema}" in xl_workbook.book.sheetnames: - xl_sheet = xl_workbook.parse(f"ENA_{schema}", header=0) + xl_sheet = xl_workbook.parse(f"ENA_{schema}", header=0, na_values=["NA", "Na", "na", "NaN"]) else: sys.exit( f"The sheet '{schema}' is not present in the excel sheet {xlsx}") diff --git a/ena_upload/templates/ENA_template_experiments.xml b/ena_upload/templates/ENA_template_experiments.xml index b6aaf18..9f87bab 100755 --- a/ena_upload/templates/ENA_template_experiments.xml +++ b/ena_upload/templates/ENA_template_experiments.xml @@ -1,11 +1,16 @@ + ${row.title} + + + + ${row.design_description} + ${row.spot_descriptor} + + + ${row.library_name} + + + + + + + + + ${row.library_construction_protocol} + + + + SUBMISSION_TOOL diff --git a/ena_upload/templates/ENA_template_runs.xml b/ena_upload/templates/ENA_template_runs.xml index 4c6d2f6..134d017 100755 --- a/ena_upload/templates/ENA_template_runs.xml +++ b/ena_upload/templates/ENA_template_runs.xml @@ -1,11 +1,13 @@ + + diff --git a/ena_upload/templates/ENA_template_samples_ERC000011.xml b/ena_upload/templates/ENA_template_samples_ERC000011.xml index 762acc9..0eba21f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000011.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000011.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -70,14 +81,18 @@ def attributetest(row, column): ${row['collected_by']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (region and locality) diff --git a/ena_upload/templates/ENA_template_samples_ERC000012.xml b/ena_upload/templates/ENA_template_samples_ERC000012.xml index 2bdefef..f445cbf 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000012.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000012.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,47 +208,63 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + altitude ${row['altitude']} m + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation diff --git a/ena_upload/templates/ENA_template_samples_ERC000013.xml b/ena_upload/templates/ENA_template_samples_ERC000013.xml index aebff11..c37b980 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000013.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000013.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,20 +221,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -233,18 +254,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation diff --git a/ena_upload/templates/ENA_template_samples_ERC000014.xml b/ena_upload/templates/ENA_template_samples_ERC000014.xml index 3868fe1..518bb60 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000014.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000014.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -201,42 +214,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000015.xml b/ena_upload/templates/ENA_template_samples_ERC000015.xml index 82e9129..e7e2be0 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000015.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000015.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,42 +208,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000016.xml b/ena_upload/templates/ENA_template_samples_ERC000016.xml index 6a20547..9003c92 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000016.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000016.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,42 +208,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000017.xml b/ena_upload/templates/ENA_template_samples_ERC000017.xml index b0c021a..d731a53 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000017.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000017.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,42 +208,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000018.xml b/ena_upload/templates/ENA_template_samples_ERC000018.xml index d1c765a..1852f93 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000018.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000018.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,42 +208,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000019.xml b/ena_upload/templates/ENA_template_samples_ERC000019.xml index 921a17e..c1bc72f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000019.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000019.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,48 +221,64 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + depth ${row['depth']} m + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + elevation ${row['elevation']} m + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000020.xml b/ena_upload/templates/ENA_template_samples_ERC000020.xml index 8e932a5..d5ac689 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000020.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000020.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,20 +221,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -233,18 +254,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation diff --git a/ena_upload/templates/ENA_template_samples_ERC000021.xml b/ena_upload/templates/ENA_template_samples_ERC000021.xml index 7ecc03d..92b3f83 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000021.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000021.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,48 +221,64 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + depth ${row['depth']} m + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + elevation ${row['elevation']} m + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000022.xml b/ena_upload/templates/ENA_template_samples_ERC000022.xml index d222394..1d2f41e 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000022.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000022.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -41,10 +52,12 @@ def attributetest(row, column): ${row['profile position']} + project name ${row['project name']} + experimental factor @@ -220,10 +233,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -231,48 +246,64 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + depth ${row['depth']} m + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + elevation ${row['elevation']} m + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000023.xml b/ena_upload/templates/ENA_template_samples_ERC000023.xml index 4e65e48..ce20952 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000023.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000023.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,24 +208,32 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -226,18 +247,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000024.xml b/ena_upload/templates/ENA_template_samples_ERC000024.xml index cb10191..0e4fb55 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000024.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000024.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,43 +221,57 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + depth ${row['depth']} m + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation diff --git a/ena_upload/templates/ENA_template_samples_ERC000025.xml b/ena_upload/templates/ENA_template_samples_ERC000025.xml index f837269..373c61e 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000025.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000025.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,20 +221,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -233,18 +254,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation diff --git a/ena_upload/templates/ENA_template_samples_ERC000027.xml b/ena_upload/templates/ENA_template_samples_ERC000027.xml index 49ab6e8..7af98df 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000027.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000027.xml @@ -1,37 +1,52 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + Latitude Start ${row['Latitude Start']} DD + + Longitude Start ${row['Longitude Start']} DD + Latitude End @@ -46,19 +61,25 @@ def attributetest(row, column): DD + Depth ${row['Depth']} m + + Protocol Label ${row['Protocol Label']} + + project name ${row['project name']} + experimental factor @@ -216,42 +237,60 @@ def attributetest(row, column): ${row['relevant standard operating procedures']} + environmental package ${row['environmental package']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + Sampling Campaign ${row['Sampling Campaign']} + + Sampling Site ${row['Sampling Site']} + + Sampling Platform ${row['Sampling Platform']} + source material identifiers @@ -335,20 +374,26 @@ def attributetest(row, column): ${row['sample storage location']} + Marine Region ${row['Marine Region']} + + Temperature ${row['Temperature']} ºC + + Salinity ${row['Salinity']} psu + specific host diff --git a/ena_upload/templates/ENA_template_samples_ERC000028.xml b/ena_upload/templates/ENA_template_samples_ERC000028.xml index 3b8e283..dad6a27 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000028.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000028.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + isolation_source ${row['isolation_source']} + lat_lon @@ -38,14 +51,18 @@ def attributetest(row, column): ${row['collected_by']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (region and locality) @@ -70,20 +87,24 @@ def attributetest(row, column): ${row['mating_type']} + host health state ${row['host health state']} + lab_host ${row['lab_host']} + host scientific name ${row['host scientific name']} + bio_material @@ -102,10 +123,12 @@ def attributetest(row, column): ${row['specimen_voucher']} + isolate ${row['isolate']} + sub_species diff --git a/ena_upload/templates/ENA_template_samples_ERC000029.xml b/ena_upload/templates/ENA_template_samples_ERC000029.xml index ae32d8a..6df4506 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000029.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000029.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -52,14 +63,18 @@ def attributetest(row, column): ${row['country of travel']} + collected_by ${row['collected_by']} + + collection date ${row['collection date']} + altitude @@ -67,20 +82,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -107,10 +128,12 @@ def attributetest(row, column): m3 + environmental_sample ${row['environmental_sample']} + mating_type @@ -166,10 +189,12 @@ def attributetest(row, column): ${row['host life stage']} + host health state ${row['host health state']} + host sex @@ -182,10 +207,12 @@ def attributetest(row, column): ${row['lab_host']} + host scientific name ${row['host scientific name']} + passage_history @@ -198,10 +225,12 @@ def attributetest(row, column): ${row['sample storage conditions']} + Is the sequenced pathogen host associated? ${row['Is the sequenced pathogen host associated?']} + bio_material @@ -220,10 +249,12 @@ def attributetest(row, column): ${row['specimen_voucher']} + isolate ${row['isolate']} + sub_species diff --git a/ena_upload/templates/ENA_template_samples_ERC000030.xml b/ena_upload/templates/ENA_template_samples_ERC000030.xml index ec074fd..bf37fa7 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000030.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000030.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -40,16 +51,20 @@ def attributetest(row, column): ${row['Event Date/Time End']} + Latitude Start ${row['Latitude Start']} DD + + Longitude Start ${row['Longitude Start']} DD + Latitude End @@ -64,21 +79,25 @@ def attributetest(row, column): DD + Depth ${row['Depth']} m + Sample Collection Device ${row['Sample Collection Device']} + Protocol Label ${row['Protocol Label']} + Size Fraction Lower Threshold @@ -103,55 +122,79 @@ def attributetest(row, column): ${row['Last Update Date']} + project name ${row['project name']} + + environmental package ${row['environmental package']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + Sampling Campaign ${row['Sampling Campaign']} + + Sampling Station ${row['Sampling Station']} + + Sampling Platform ${row['Sampling Platform']} + + Marine Region ${row['Marine Region']} + + Salinity Sensor ${row['Salinity Sensor']} psu + Oxygen Sensor @@ -166,11 +209,13 @@ def attributetest(row, column): µmol/L + Temperature ${row['Temperature']} ºC + Chlorophyll Sensor diff --git a/ena_upload/templates/ENA_template_samples_ERC000031.xml b/ena_upload/templates/ENA_template_samples_ERC000031.xml index 85b7d09..ba34566 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000031.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000031.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,42 +208,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + surface material @@ -249,56 +276,78 @@ def attributetest(row, column): ${row['indoor surface']} + indoor space ${row['indoor space']} + + filter type ${row['filter type']} + + heating and cooling system type ${row['heating and cooling system type']} + substructure type ${row['substructure type']} + light type ${row['light type']} + + building setting ${row['building setting']} + + building occupancy type ${row['building occupancy type']} + + space typical state ${row['space typical state']} + + typical occupant density ${row['typical occupant density']} + + occupancy at sampling ${row['occupancy at sampling']} + + occupant density at sampling ${row['occupant density at sampling']} + + ventilation type ${row['ventilation type']} + source material identifiers @@ -336,10 +385,12 @@ def attributetest(row, column): ${row['sample size sorting method']} + organism count ${row['organism count']} + sample collection device @@ -364,16 +415,20 @@ def attributetest(row, column): ${row['host scientific name']} + relative air humidity ${row['relative air humidity']} % + + absolute air humidity ${row['absolute air humidity']} kg + surface humidity @@ -381,11 +436,13 @@ def attributetest(row, column): % + air temperature ${row['air temperature']} ºC + surface temperature @@ -413,11 +470,13 @@ def attributetest(row, column): ºC + carbon dioxide ${row['carbon dioxide']} µmol/L + subspecific genetic lineage diff --git a/ena_upload/templates/ENA_template_samples_ERC000032.xml b/ena_upload/templates/ENA_template_samples_ERC000032.xml index 2fdce8d..25ca461 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000032.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000032.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -160,14 +171,18 @@ def attributetest(row, column): ${row['illness symptoms']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -200,14 +215,18 @@ def attributetest(row, column): ${row['host disease outcome']} + host common name ${row['host common name']} + + host subject id ${row['host subject id']} + host age @@ -215,34 +234,48 @@ def attributetest(row, column): years + host health state ${row['host health state']} + + host sex ${row['host sex']} + + host scientific name ${row['host scientific name']} + + influenza test method ${row['influenza test method']} + + influenza test result ${row['influenza test result']} + + other pathogens tested ${row['other pathogens tested']} + + other pathogens test result ${row['other pathogens test result']} + influenza virus type @@ -273,14 +306,18 @@ def attributetest(row, column): ${row['lineage:swl (required for H1N1 viruses)']} + collector name ${row['collector name']} + + collecting institution ${row['collecting institution']} + receipt date diff --git a/ena_upload/templates/ENA_template_samples_ERC000033.xml b/ena_upload/templates/ENA_template_samples_ERC000033.xml index f8acbe2..dc93b8d 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000033.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000033.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -64,14 +75,18 @@ def attributetest(row, column): ${row['illness symptoms']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -104,14 +119,18 @@ def attributetest(row, column): ${row['host disease outcome']} + host common name ${row['host common name']} + + host subject id ${row['host subject id']} + host age @@ -119,38 +138,48 @@ def attributetest(row, column): years + host health state ${row['host health state']} + + host sex ${row['host sex']} + lab_host ${row['lab_host']} + host scientific name ${row['host scientific name']} + virus identifier ${row['virus identifier']} + collector name ${row['collector name']} + + collecting institution ${row['collecting institution']} + receipt date @@ -175,10 +204,12 @@ def attributetest(row, column): ${row['serotype (required for a seropositive sample)']} + isolate ${row['isolate']} + strain diff --git a/ena_upload/templates/ENA_template_samples_ERC000034.xml b/ena_upload/templates/ENA_template_samples_ERC000034.xml index 4331d37..2ed7797 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000034.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000034.xml @@ -1,43 +1,62 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + tissue_type ${row['tissue_type']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + sex ${row['sex']} + date of birth @@ -50,14 +69,18 @@ def attributetest(row, column): ${row['date of death']} + diagnosis ${row['diagnosis']} + + strain ${row['strain']} + tumor grading (OBI_0600002) diff --git a/ena_upload/templates/ENA_template_samples_ERC000035.xml b/ena_upload/templates/ENA_template_samples_ERC000035.xml index b07af40..167931f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000035.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000035.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -58,14 +69,18 @@ def attributetest(row, column): ${row['protocol']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + sampling time point diff --git a/ena_upload/templates/ENA_template_samples_ERC000036.xml b/ena_upload/templates/ENA_template_samples_ERC000036.xml index ffa958a..42495ab 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000036.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000036.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -40,24 +51,30 @@ def attributetest(row, column): ${row['nucleic acid amplification']} + investigation type ${row['investigation type']} + surveillance target ${row['surveillance target']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -136,10 +153,12 @@ def attributetest(row, column): ${row['receipt date']} + sewage type ${row['sewage type']} + temperature diff --git a/ena_upload/templates/ENA_template_samples_ERC000037.xml b/ena_upload/templates/ENA_template_samples_ERC000037.xml index 7bf0247..a4324bc 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000037.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000037.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -59,10 +70,12 @@ def attributetest(row, column): ${row['collected_by']} + collection date ${row['collection date']} + altitude @@ -70,20 +83,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -140,10 +159,12 @@ def attributetest(row, column): ${row['sample material processing']} + isolation and growth condition ${row['isolation and growth condition']} + propagation @@ -183,14 +204,18 @@ def attributetest(row, column): ${row['sampling time point']} + plant structure ${row['plant structure']} + + plant developmental stage ${row['plant developmental stage']} + sampled age @@ -408,10 +433,12 @@ def attributetest(row, column): ${row['soil pH']} + plant growth medium ${row['plant growth medium']} + rooting conditions diff --git a/ena_upload/templates/ENA_template_samples_ERC000038.xml b/ena_upload/templates/ENA_template_samples_ERC000038.xml index 2842286..e817e3b 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000038.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000038.xml @@ -1,84 +1,119 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + Latitude Start ${row['Latitude Start']} DD + + Longitude Start ${row['Longitude Start']} DD + + Depth ${row['Depth']} m + Sample Collection Device ${row['Sample Collection Device']} + Protocol Label ${row['Protocol Label']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + Sampling Campaign ${row['Sampling Campaign']} + + Sampling Station ${row['Sampling Station']} + + Sampling Platform ${row['Sampling Platform']} + storage conditions (fresh/frozen/other) @@ -103,38 +138,52 @@ def attributetest(row, column): ${row['Marine Region']} + seabed habitat ${row['seabed habitat']} + + age ${row['age']} + + aquaculture origin ${row['aquaculture origin']} + + shellfish total weight ${row['shellfish total weight']} g + + shellfish soft tissue weight ${row['shellfish soft tissue weight']} g + + shell length ${row['shell length']} g + + shell width ${row['shell width']} g + adductor weight diff --git a/ena_upload/templates/ENA_template_samples_ERC000039.xml b/ena_upload/templates/ENA_template_samples_ERC000039.xml index 4484ca4..2b7d23f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000039.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000039.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + dev_stage ${row['dev_stage']} + subject exposure @@ -56,14 +69,18 @@ def attributetest(row, column): ${row['country of travel']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -133,24 +150,30 @@ def attributetest(row, column): ${row['host scientific name']} + collector name ${row['collector name']} + + collecting institution ${row['collecting institution']} + sample storage conditions ${row['sample storage conditions']} + isolate ${row['isolate']} + strain diff --git a/ena_upload/templates/ENA_template_samples_ERC000040.xml b/ena_upload/templates/ENA_template_samples_ERC000040.xml index 99f30c3..6572dcf 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000040.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000040.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -34,18 +45,24 @@ def attributetest(row, column): ${row['Size Fraction Upper Threshold']} + target gene ${row['target gene']} + + target subfragment ${row['target subfragment']} + + pcr primers ${row['pcr primers']} + isolation_source @@ -58,10 +75,12 @@ def attributetest(row, column): ${row['collected_by']} + collection date ${row['collection date']} + altitude @@ -69,20 +88,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -96,28 +121,36 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + sample collection device or method ${row['sample collection device or method']} + environmental_sample ${row['environmental_sample']} + Salinity diff --git a/ena_upload/templates/ENA_template_samples_ERC000041.xml b/ena_upload/templates/ENA_template_samples_ERC000041.xml index 4cd3f7e..e9f333f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000041.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000041.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -65,14 +76,18 @@ def attributetest(row, column): ${row['isolation_source']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (region and locality) @@ -143,10 +158,12 @@ def attributetest(row, column): ${row['links to additional analysis']} + isolate ${row['isolate']} + sub_species diff --git a/ena_upload/templates/ENA_template_samples_ERC000043.xml b/ena_upload/templates/ENA_template_samples_ERC000043.xml index f6348a0..22d0660 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000043.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000043.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -35,14 +46,18 @@ def attributetest(row, column): ${row['collected_by']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -135,10 +150,12 @@ def attributetest(row, column): ${row['culture_collection']} + strain ${row['strain']} + Further Details diff --git a/ena_upload/templates/ENA_template_samples_ERC000044.xml b/ena_upload/templates/ENA_template_samples_ERC000044.xml index 885f325..ce3857b 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000044.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000044.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -52,18 +63,24 @@ def attributetest(row, column): ${row['country of travel']} + collected_by ${row['collected_by']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (region and locality) @@ -82,14 +99,18 @@ def attributetest(row, column): ${row['host disease outcome']} + host scientific name ${row['host scientific name']} + + isolate ${row['isolate']} + sub_type diff --git a/ena_upload/templates/ENA_template_samples_ERC000045.xml b/ena_upload/templates/ENA_template_samples_ERC000045.xml index 6a35949..0e8a8df 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000045.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000045.xml @@ -1,45 +1,62 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + isolation_source ${row['isolation_source']} + collected_by ${row['collected_by']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (region and locality) @@ -52,10 +69,12 @@ def attributetest(row, column): ${row['receipt date']} + isolate ${row['isolate']} + serotype diff --git a/ena_upload/templates/ENA_template_samples_ERC000047.xml b/ena_upload/templates/ENA_template_samples_ERC000047.xml index 71018dc..d252cc9 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000047.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000047.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -158,26 +171,32 @@ def attributetest(row, column): ${row['tRNA extraction software']} + completeness score ${row['completeness score']} % + + completeness software ${row['completeness software']} + completeness approach ${row['completeness approach']} + contamination score ${row['contamination score']} % + contamination screening input @@ -196,10 +215,12 @@ def attributetest(row, column): ${row['decontamination software']} + binning software ${row['binning software']} + reassembly post binning @@ -213,32 +234,42 @@ def attributetest(row, column): ${row['MAG coverage software']} + assembly quality ${row['assembly quality']} + + binning parameters ${row['binning parameters']} + + taxonomic identity marker ${row['taxonomic identity marker']} + taxonomic classification ${row['taxonomic classification']} + isolation_source ${row['isolation_source']} + + collection date ${row['collection date']} + altitude @@ -246,20 +277,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -273,18 +310,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation @@ -317,14 +360,18 @@ def attributetest(row, column): ${row['size fraction selected']} + sample derived from ${row['sample derived from']} + + metagenomic source ${row['metagenomic source']} + sample collection device diff --git a/ena_upload/templates/ENA_template_samples_ERC000048.xml b/ena_upload/templates/ENA_template_samples_ERC000048.xml index efa3968..3871629 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000048.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000048.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -220,38 +233,48 @@ def attributetest(row, column): ${row['taxonomic classification']} + sorting technology ${row['sorting technology']} + + single cell or viral particle lysis approach ${row['single cell or viral particle lysis approach']} + single cell or viral particle lysis kit protocol ${row['single cell or viral particle lysis kit protocol']} + WGA amplification approach ${row['WGA amplification approach']} + WGA amplification kit ${row['WGA amplification kit']} + isolation_source ${row['isolation_source']} + + collection date ${row['collection date']} + altitude @@ -259,20 +282,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -286,18 +315,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation @@ -330,14 +365,18 @@ def attributetest(row, column): ${row['size fraction selected']} + sample derived from ${row['sample derived from']} + + metagenomic source ${row['metagenomic source']} + sample collection device diff --git a/ena_upload/templates/ENA_template_samples_ERC000049.xml b/ena_upload/templates/ENA_template_samples_ERC000049.xml index 15b53e1..66ae624 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000049.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000049.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -122,10 +135,12 @@ def attributetest(row, column): ${row['number of standard tRNAs extracted']} + assembly software ${row['assembly software']} + feature prediction @@ -236,30 +251,42 @@ def attributetest(row, column): ${row['WGA amplification kit']} + source of UViGs ${row['source of UViGs']} + + virus enrichment approach ${row['virus enrichment approach']} + + predicted genome type ${row['predicted genome type']} + + predicted genome structure ${row['predicted genome structure']} + + detection type ${row['detection type']} + + viral identification software ${row['viral identification software']} + OTU classification approach @@ -290,18 +317,24 @@ def attributetest(row, column): ${row['host prediction estimated accuracy']} + UViG assembly quality ${row['UViG assembly quality']} + + isolation_source ${row['isolation_source']} + + collection date ${row['collection date']} + altitude @@ -309,20 +342,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -336,18 +375,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation @@ -380,14 +425,18 @@ def attributetest(row, column): ${row['size fraction selected']} + sample derived from ${row['sample derived from']} + + metagenomic source ${row['metagenomic source']} + sample collection device diff --git a/ena_upload/templates/ENA_template_samples_ERC000050.xml b/ena_upload/templates/ENA_template_samples_ERC000050.xml index 866ee78..0038aca 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000050.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000050.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -86,10 +99,12 @@ def attributetest(row, column): ${row['adapters']} + sequencing method ${row['sequencing method']} + relevant electronic resources @@ -108,10 +123,12 @@ def attributetest(row, column): ${row['number of standard tRNAs extracted']} + assembly software ${row['assembly software']} + 16S recovered @@ -174,10 +191,12 @@ def attributetest(row, column): ${row['decontamination software']} + binning software ${row['binning software']} + reassembly post binning @@ -197,14 +216,18 @@ def attributetest(row, column): ${row['assembly quality']} + investigation type ${row['investigation type']} + + binning parameters ${row['binning parameters']} + taxonomic identity marker @@ -217,14 +240,18 @@ def attributetest(row, column): ${row['taxonomic classification']} + isolation_source ${row['isolation_source']} + + collection date ${row['collection date']} + altitude @@ -232,20 +259,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + depth @@ -253,18 +286,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation @@ -303,14 +342,18 @@ def attributetest(row, column): ${row['size fraction selected']} + sample derived from ${row['sample derived from']} + + metagenomic source ${row['metagenomic source']} + relationship to oxygen diff --git a/ena_upload/templates/ENA_template_samples_ERC000051.xml b/ena_upload/templates/ENA_template_samples_ERC000051.xml index 149e4f3..68608d9 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000051.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000051.xml @@ -1,97 +1,134 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + sample origin ${row['sample origin']} + + sample taxon name ${row['sample taxon name']} + + sample material ${row['sample material']} + + engrafted tumor sample passage ${row['engrafted tumor sample passage']} + engrafted tumor collection site ${row['engrafted tumor collection site']} + patient tumor site of collection ${row['patient tumor site of collection']} + + patient tumor type ${row['patient tumor type']} + + sample unique ID ${row['sample unique ID']} + engraftment host strain name ${row['engraftment host strain name']} + patient age at collection of tumor ${row['patient age at collection of tumor']} + + patient tumor diagnosis at time of collection ${row['patient tumor diagnosis at time of collection']} + + patient tumor primary site ${row['patient tumor primary site']} + was the PDX model humanised? ${row['was the PDX model humanised?']} + patient sex ${row['patient sex']} + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000052.xml b/ena_upload/templates/ENA_template_samples_ERC000052.xml index c28d696..f5eb87f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000052.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000052.xml @@ -1,36 +1,51 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + + sample volume or weight for DNA extraction ${row['sample volume or weight for DNA extraction']} ng + nucleic acid extraction @@ -49,22 +64,30 @@ def attributetest(row, column): ${row['adapters']} + sequencing method ${row['sequencing method']} + + reference host genome for decontamination ${row['reference host genome for decontamination']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -92,11 +115,13 @@ def attributetest(row, column): years + trial timepoint ${row['trial timepoint']} years + sample storage temperature @@ -128,22 +153,30 @@ def attributetest(row, column): ${row['host disease status']} + host common name ${row['host common name']} + + host subject id ${row['host subject id']} + + host taxid ${row['host taxid']} + + host body site ${row['host body site']} + host length @@ -189,10 +222,12 @@ def attributetest(row, column): ${row['host diet']} + host diet treatment ${row['host diet treatment']} + host diet treatment concentration diff --git a/ena_upload/templates/ENA_template_samples_ERC000053.xml b/ena_upload/templates/ENA_template_samples_ERC000053.xml index 0a21e87..f2c56bf 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000053.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000053.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -50,18 +61,24 @@ def attributetest(row, column): DD + organism part ${row['organism part']} + + lifestage ${row['lifestage']} + + project name ${row['project name']} + tolid @@ -74,18 +91,24 @@ def attributetest(row, column): ${row['barcoding center']} + collected_by ${row['collected_by']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -100,10 +123,12 @@ def attributetest(row, column): DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + identified_by @@ -124,10 +149,12 @@ def attributetest(row, column): m + habitat ${row['habitat']} + identifier_affiliation @@ -196,10 +223,12 @@ def attributetest(row, column): ${row['sample coordinator affiliation']} + sex ${row['sex']} + relationship @@ -212,10 +241,12 @@ def attributetest(row, column): ${row['symbiont']} + collecting institution ${row['collecting institution']} + GAL diff --git a/ena_upload/templates/ENA_template_studies.xml b/ena_upload/templates/ENA_template_studies.xml index 5c46625..374e743 100755 --- a/ena_upload/templates/ENA_template_studies.xml +++ b/ena_upload/templates/ENA_template_studies.xml @@ -1,22 +1,32 @@ - + + ${row.title} + + + + ${row.study_abstract} + ${row.center_project_name} diff --git a/ena_upload/templates/jinja_templates/ENA_template_samples.xml b/ena_upload/templates/jinja_templates/ENA_template_samples.xml index ee30272..8b8e978 100755 --- a/ena_upload/templates/jinja_templates/ENA_template_samples.xml +++ b/ena_upload/templates/jinja_templates/ENA_template_samples.xml @@ -1,38 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + {%- for key, value in attributes.items() %} - {%- if value['cardinality'].lower() == 'mandatory' %} - - {{key}} - ${row['{{key}}']} - {%- if value['units'] %} - {{value['units']}} - {%- endif %} - - {%- else %} + {%- if value['cardinality'].lower() != 'mandatory' %} + {%- else %} + + {%- endif %} {{key}} ${row['{{key}}']} @@ -41,7 +47,6 @@ def attributetest(row, column): {%- endif %} - {%- endif %} {%- endfor %} SUBMISSION_TOOL diff --git a/var/xml_converter.py b/var/xml_converter.py index 500ead2..7016b4b 100644 --- a/var/xml_converter.py +++ b/var/xml_converter.py @@ -74,8 +74,8 @@ def main(): output_from_parsed_template = t.render(attributes=xml_tree) # Saving new xml template file - with open(f"ena_upload/templates/ENA_template_samples_{checklist}.xml", "w") as fh: - fh.write(output_from_parsed_template) + with open(f"ena_upload/templates/ENA_template_samples_{checklist}.xml", "wb") as fh: + fh.write(output_from_parsed_template.encode('utf-8')) if __name__ == "__main__":