Skip to content

Commit

Permalink
update dada2 reference data
Browse files Browse the repository at this point in the history
  • Loading branch information
bernt-matthias committed Nov 27, 2024
1 parent 685c4b0 commit b417e38
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 1 deletion.
14 changes: 13 additions & 1 deletion data_managers/data_manager_dada2/data_manager/dada2_fetcher.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0"?>
<tool id="dada2_fetcher" name="dada2 data manager" tool_type="manage_data" version="0.1.1" profile="23.0">
<tool id="dada2_fetcher" name="dada2 data manager" tool_type="manage_data" version="0.1.2" profile="23.0">
<description>Download reference databases</description>
<requirements>
<requirement type="package" version="3.7">python</requirement>
Expand All @@ -16,6 +16,7 @@
<param name="db_select" type="select" label="Taxonomic database">
<option value="silva">Silva</option>
<option value="rdp">RDP</option>
<option value="greengenes2">GreenGenes2</option>
<option value="greengenes">GreenGenes</option>
<option value="unite">UNITE Fungi: General Fasta</option>
<!-- UNITE Eukaryotes not yet supported https://github.com/benjjneb/dada2/issues/702 -->
Expand All @@ -27,17 +28,25 @@
</param>
<when value="silva">
<param name="version_select" type="select" label="Database version">
<option value="138.2">138.2</option>
<option value="138.1">138.1</option>
<option value="138">138</option>
<option value="132">132</option>
<option value="128">128</option>
</param>
</when>
<when value="rdp">
<param name="version_select" type="select" label="Database version">
<option value="19">19</option>
<option value="16">16</option>
<option value="14">14</option>
</param>
</when>
<when value="greengenes2">
<param name="version_select" type="select" label="Database version">
<option value="2024.09">2024.09</option>
</param>
</when>
<when value="greengenes">
<param name="version_select" type="select" label="Database version">
<option value="13.84">13.84</option>
Expand All @@ -51,11 +60,14 @@
</when>
<when value="RefSeq_RDP">
<param name="version_select" type="select" label="Database version">
<option value="2023_12">12/2023</option>
<option value="2018_05">05/2018</option>
</param>
</when>
<when value="gtdb">
<param name="version_select" type="select" label="Database version">
<option value="2024_10">10/2024</option>
<option value="2023_12">12/2023</option>
<option value="2018_11">11/2018</option>
</param>
</when>
Expand Down
23 changes: 23 additions & 0 deletions data_managers/data_manager_dada2/data_manager/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,51 @@
DEFAULT_TAXLEVELS = "Kingdom,Phylum,Class,Order,Family,Genus,Species"

FILE2NAME = {
"silva_138.2": "Silva version 138.2",
"silva_138.1": "Silva version 138.1",
"silva_138": "Silva version 138",
"silva_132": "Silva version 132",
"silva_128": "Silva version 128",
"rdp_19": "RDP trainset 19",
"rdp_16": "RDP trainset 16",
"rdp_14": "RDP trainset 14",
"greengenes_13.84": "GreenGenes version 13.84",
"greengenes2_2024.09": "GreenGenes2 release 2024.09 ",
"unite_8.0_fungi": "UNITE: General Fasta release 8.0 for Fungi",
"unite_8.0_fungi_singletons": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons",

# # v4.5 https://zenodo.org/records/13984843 (contains no update on RefSeq_RDP)
# "gtdb_2024_10": "GTDB: Genome Taxonomy Database 220 (Bacteria &amp; Archaea) (10/2024)",

# # v4.3 https://zenodo.org/records/10403693
# "gtdb_2023_12": "GTDB: Genome Taxonomy Database 214 (Bacteria &amp; Archaea) (12/2023)",
# "RefSeq_RDP_2023_12": "NCBI RefSeq 16S rRNA database supplemented by RDP (12/2023)",

# v1 https://zenodo.org/records/2541239
"RefSeq_RDP_2018_05": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)",
"gtdb_2018_11": "GTDB: Genome Taxonomy Database (Bacteria &amp; Archaea) (11/2018)",

"hitdb_1": "HitDB version 1 (Human InTestinal 16S rRNA)",
"silva_euk_18S_132": "Silva version 132 Eukaryotic 18S",
"PR2_4.11.1": "Protist Ribosomal Reference database (PR2) 4.11.1"
}

FILE2TAXURL = {
"silva_138.2": "https://zenodo.org/records/14169026/files/silva_nr99_v138.2_toGenus_trainset.fa.gz?download=1", # using the one wo species info https://github.com/benjjneb/dada2/issues/2053#issuecomment-2478617791
"silva_138.1": "https://zenodo.org/records/4587955/files/silva_nr99_v138.1_train_set.fa.gz?download=1", # - " -
"silva_138": "https://zenodo.org/record/3731176/files/silva_nr_v138_train_set.fa.gz?download=1",
"silva_132": "https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1",
"silva_128": "https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1",
"rdp_19": "https://zenodo.org/records/14168771/files/rdp_19_toGenus_trainset.fa.gz?download=1",
"rdp_16": "https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1",
"rdp_14": "https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1",
"unite_8.0_fungi": "https://files.plutof.ut.ee/public/orig/EB/0C/EB0CCB3A871B77EA75E472D13926271076904A588D2E1C1EA5AFCF7397D48378.zip",
"unite_8.0_fungi_singletons": "https://files.plutof.ut.ee/doi/06/A2/06A2C86256EED64085670EB0C54B7115F6DAC8F311C656A9CB33E386CFABA0D0.zip",
"greengenes_13.84": "https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1",
"greengenes2_2024.09": "https://zenodo.org/records/14169078/files/gg2_2024_09_toGenus_trainset.fa.gz?download=1",
# "gtdb_220_4.5": "https://zenodo.org/records/13984843/files/GTDB_bac120_arc53_ssu_r220_genus.fa.gz?download=1",
# "gtdb_214_4.4": "https://zenodo.org/records/10403693/files/GTDB_bac120_arc53_ssu_r214_genus.fa.gz?download=1",
# "RefSeq_RDP_2023_12": "https://zenodo.org/records/10403693/files/RefSeq_16S_6-11-20_RDPv16_Genus.fa.gz?download=1",
"RefSeq_RDP_2018_05": "https://zenodo.org/record/2541239/files/RefSeq-RDP16S_v2_May2018.fa.gz?download=1",
"gtdb_2018_11": "https://zenodo.org/record/2541239/files/GTDB_bac-arc_ssu_r86.fa.gz?download=1",
"hitdb_1": "https://zenodo.org/record/159205/files/hitdb_v1.00.fa.gz?download=1",
Expand All @@ -43,6 +64,8 @@
}

FILE2SPECIESURL = {
"silva_138.2": "https://zenodo.org/records/14169026/files/silva_v138.2_assignSpecies.fa.gz?download=1",
"silva_138.1": "https://zenodo.org/records/4587955/files/silva_species_assignment_v138.1.fa.gz?download=1",
"silva_138": "https://zenodo.org/record/3731176/files/silva_species_assignment_v138.fa.gz?download=1",
"silva_132": "https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1",
"silva_128": "https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1",
Expand Down

0 comments on commit b417e38

Please sign in to comment.