From e0bcdcc61a3f5d5df8d78e875f668cf9211e22fb Mon Sep 17 00:00:00 2001 From: KatharinaHoff Date: Sat, 30 Mar 2024 16:08:09 +0100 Subject: [PATCH] updated references --- README.md | 70 ++++++++++++++++++++++++++++------------------- scripts/braker.pl | 2 ++ 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 82c32d3..c6b3914 100644 --- a/README.md +++ b/README.md @@ -991,7 +991,7 @@ By default, GeneMark-ES/ET/EP/ETP uses a probability of 0.001 for predicting the Use a species-specific lineage, e.g. arthropoda_odb10 for an arthropod. BRAKER does not support auto-typing of the lineage. -Specifying a BUSCO-lineage invokes two changes in BRAKER: +Specifying a BUSCO-lineage invokes two changes in BRAKER [R28](#f28): 1. BRAKER will run compleasm with the specified lineage in genome mode and convert the detected BUSCO matches into hints for AUGUSTUS. This may increase the number of BUSCOs in the augustus.hints.gtf file slightly. @@ -1301,15 +1301,47 @@ Since BRAKER is a pipeline that calls several Bioinformatics tools, publication - Always cite: + - Stanke, M., Diekhans, M., Baertsch, R. and Haussler, D. (2008). Using native and syntenically mapped cDNA alignments to improve de novo gene finding. Bioinformatics, doi: 10.1093/bioinformatics/btn013. + + - Stanke. M., Schöffmann, O., Morgenstern, B. and Waack, S. (2006). Gene prediction in eukaryotes with a generalized hidden Markov model that uses hints from external sources. BMC Bioinformatics 7, 62. + +- If you provided any kind of evidence for BRAKER, cite: + + - Gabriel, L., Bruna, T., Hoff, K. J., Borodovsky, M., Stanke, M. (2021) TSEBRA: transcript selector for BRAKER. BMC Bioinformatics 22, 1-12. + +- If you provided both short read RNA-Seq evidence and a large database of proteins, cite: + + - Gabriel, L., Bruna, T., Hoff, K. J., Ebel, M., Lomsadze, A., Borodovsky, M., Stanke, M. (2023). BRAKER3: Fully Automated Genome Annotation Using RNA-Seq and Protein Evidence with GeneMark-ETP, AUGUSTUS and TSEBRA. bioRxiV, doi: 10.1101/2023.06.10.54444910.1101/2023.01.01.474747. + + - Bruna, T., Lomsadze, A., Borodovsky, M. (2023). GeneMark-ETP: Automatic Gene Finding in Eukaryotic Genomes in Consistence with Extrinsic Data. bioRxiv, doi: 10.1101/2023.01.13.524024. + + - Kovaka, S., Zimin, A. V., Pertea, G. M., Razaghi, R., Salzberg, S. L., & Pertea, M. (2019). Transcriptome assembly from long-read RNA-seq alignments with StringTie2. Genome biology, 20(1):1-13. + + - Pertea, G., & Pertea, M. (2020). GFF utilities: GffRead and GffCompare. F1000Research, 9. + + - Quinlan, A. R. (2014). BEDTools: the Swiss‐army tool for genome feature analysis. Current protocols in bioinformatics, 47(1):11-12. + +- If the only source of evidence for BRAKER was a large database of protein sequences, cite: + - Bruna, T., Hoff, K.J., Lomsadze, A., Stanke, M., & Borodovsky, M. (2021). BRAKER2: Automatic Eukaryotic Genome Annotation with GeneMark-EP+ and AUGUSTUS Supported by a Protein Database. NAR Genomics and Bioinformatics 3(1):lqaa108, doi: 10.1093/nargab/lqaa108. - - Hoff, K.J., Lomsadze, A., Borodovsky, M. and Stanke, M. (2019). Whole-Genome Annotation with BRAKER. Methods Mol Biol. 1962:65-95, doi: 10.1007/978-1-4939-9173-0_5. +- If the only source of evidence for BRAKER was RNA-Seq data, cite: - Hoff, K.J., Lange, S., Lomsadze, A., Borodovsky, M. and Stanke, M. (2016). BRAKER1: unsupervised RNA-Seq-based genome annotation with GeneMark-ET and AUGUSTUS. Bioinformatics, 32(5):767-769. - - Stanke, M., Diekhans, M., Baertsch, R. and Haussler, D. (2008). Using native and syntenically mapped cDNA alignments to improve de novo gene finding. Bioinformatics, doi: 10.1093/bioinformatics/btn013. + - Lomsadze, A., Paul D.B., and Mark B. (2014) Integration of Mapped Rna-Seq Reads into Automatic Training of Eukaryotic Gene Finding Algorithm. Nucleic Acids Research 42(15): e119--e119 - - Stanke. M., Schöffmann, O., Morgenstern, B. and Waack, S. (2006). Gene prediction in eukaryotes with a generalized hidden Markov model that uses hints from external sources. BMC Bioinformatics 7, 62. +- If you called BRAKER3 with an IsoSeq BAM file, or if you envoked the `--busco_lineage` option, cite: + + - Bruna, T., Gabriel, L., Hoff, K. J. (2024). Navigating Eukaryotic Genome Annotation Pipelines: A Route Map to BRAKER, Galba, and TSEBRA. arXiv, doi: 10.48550/arXiv.2403.19416 . + +- If you called BRAKER with the `--busco_lineage` option, in addition, cite: + + - Simão, F. A., Waterhouse, R. M., Ioannidis, P., Kriventseva, E. V., & Zdobnov, E. M. (2015). BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs. Bioinformatics, 31(19), 3210-3212. + + - Li, H. (2023). Protein-to-genome alignment with miniprot. Bioinformatics, 39(1), btad014. + + - Huang, N., & Li, H. (2023). compleasm: a faster and more accurate reimplementation of BUSCO. Bioinformatics, 39(10), btad595. - If any kind of AUGUSTUS training was performed by BRAKER, check carefully whether you configured BRAKER to use NCBI BLAST or DIAMOND. One of them was used to filter out redundant training gene structures. @@ -1329,7 +1361,9 @@ Since BRAKER is a pipeline that calls several Bioinformatics tools, publication - Ter-Hovhannisyan, V., Lomsadze, A., Chernoff, Y.O. and Borodovsky, M. (2008). Gene prediction in novel fungal genomes using an ab initio algorithm with unsupervised training. Genome research, pages gr--081612, 2008. -- If BRAKER was run with proteins, please cite all tools that are used by the ProtHint pipeline to generate hints: + - Hoff, K.J., Lomsadze, A., Borodovsky, M. and Stanke, M. (2019). Whole-Genome Annotation with BRAKER. Methods Mol Biol. 1962:65-95, doi: 10.1007/978-1-4939-9173-0_5. + +- If BRAKER was run with proteins as source of evidence, please cite all tools that are used by the ProtHint pipeline to generate hints: - Bruna, T., Lomsadze, A., & Borodovsky, M. (2020). GeneMark-EP+: eukaryotic gene prediction with self-training in the space of genes and proteins. NAR Genomics and Bioinformatics, 2(2), lqaa026. @@ -1347,10 +1381,6 @@ Since BRAKER is a pipeline that calls several Bioinformatics tools, publication - Barnett, D.W., Garrison, E.K., Quinlan, A.R., Strömberg, M.P. and Marth G.T. (2011). BamTools: a C++ API and toolkit for analyzing and managing BAM files. Bioinformatics, 27(12):1691-2 -- If BRAKER used RNA-Seq alignments for generating a training gene set, cite GeneMark-ET: - - - Lomsadze, A., Paul D.B., and Mark B. (2014) Integration of Mapped Rna-Seq Reads into Automatic Training of Eukaryotic Gene Finding Algorithm. Nucleic Acids Research 42(15): e119--e119 - - If BRAKER downloaded RNA-Seq libraries from SRA using their IDs, cite SRA, SRA toolkit, and HISAT2: - Leinonen, R., Sugawara, H., Shumway, M., & International Nucleotide Sequence Database Collaboration. (2010). The sequence read archive. Nucleic acids research, 39(suppl_1), D19-D21. @@ -1363,19 +1393,9 @@ Since BRAKER is a pipeline that calls several Bioinformatics tools, publication - Kim, D., Paggi, J. M., Park, C., Bennett, C., & Salzberg, S. L. (2019). Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nature biotechnology, 37(8):907-915. -- If BRAKER was run with RNA-Seq data and proteins, please cite GeneMark-ETP and all tools that it uses: - - - ToDo add reference to ETP paper, when it is available - - - Kovaka, S., Zimin, A. V., Pertea, G. M., Razaghi, R., Salzberg, S. L., & Pertea, M. (2019). Transcriptome assembly from long-read RNA-seq alignments with StringTie2. Genome biology, 20(1):1-13. - - - Pertea, G., & Pertea, M. (2020). GFF utilities: GffRead and GffCompare. F1000Research, 9. - - - Quinlan, A. R. (2014). BEDTools: the Swiss‐army tool for genome feature analysis. Current protocols in bioinformatics, 47(1):11-12. - - If BRAKER called MakeHub for creating a track data hub for visualization of BRAKER results with the UCSC Genome Browser, cite: - - Hoff, K.J. (2019) MakeHub: Fully automated generation of UCSC Genome Browser Assembly Hubs. Genomics, Proteomics and Bioinformatics, in press 2020, preprint on bioarXive, doi: . + - Hoff, K. J. (2019). MakeHub: fully automated generation of UCSC genome browser assembly hubs. Genomics, Proteomics and Bioinformatics, 17(5), 546-549. - If BRAKER called GUSHR for generating UTRs, cite: @@ -1385,14 +1405,6 @@ Since BRAKER is a pipeline that calls several Bioinformatics tools, publication - Keilwagen, J., Hartung, F., Paulini, M., Twardziok, S.O., Grau, J. (2018) Combining RNA-seq data and homology-based gene prediction for plants, animals and fungi. BMC Bioinformatics, 19(1):189. -- If BRAKER was called with a BUSCO partition, cite: - - - Simão, F. A., Waterhouse, R. M., Ioannidis, P., Kriventseva, E. V., & Zdobnov, E. M. (2015). BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs. Bioinformatics, 31(19), 3210-3212. - - - Li, H. (2023). Protein-to-genome alignment with miniprot. Bioinformatics, 39(1), btad014. - - - Huang, N., & Li, H. (2023). compleasm: a faster and more accurate reimplementation of BUSCO. Bioinformatics, 39(10), btad595. - License ======= @@ -1470,3 +1482,5 @@ Oxford University Press: 2078--9.[↩](#a6) [R26] Pertea, G., & Pertea, M. (2020). GFF utilities: GffRead and GffCompare. F1000Research, 9.[↩](#a26) [R27] Huang, N., & Li, H. (2023). compleasm: a faster and more accurate reimplementation of BUSCO. Bioinformatics, 39(10), btad595.[↩](#a27) + +[R28] Bruna, T., Gabriel, L. & Hoff, K. J. (2024). Protein-to-genome alignment with miniprotNavigating Eukaryotic Genome Annotation Pipelines: A Route Map to BRAKER, Galba, and TSEBRA. arXiv, https://doi.org/10.48550/arXiv.2403.19416 .[↩](#a28) \ No newline at end of file diff --git a/scripts/braker.pl b/scripts/braker.pl index 1626b36..a677235 100755 --- a/scripts/braker.pl +++ b/scripts/braker.pl @@ -812,6 +812,7 @@ $pubs{'busco'} = "\nSimao, F. A., Waterhouse, R. M., Ioannidis, P., Kriventseva, E. V., & Zdobnov, E. M. (2015). BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs. Bioinformatics, 31(19), 3210-3212.\n"; $pubs{'miniprot'} = "\nLi, H. (2023). Protein-to-genome alignment with miniprot. Bioinformatics, 30(1):btad014.\n"; $pubs{'compleasm'} = "\nHuang, N., & Li, H. (2023). compleasm: a faster and more accurate reimplementation of BUSCO. Bioinformatics 39(10):btad595.\n"; +$pubs{'braker-c-i'} = "\nBruna, T., Gabriel, L., & Hoff, K. J. (2024). Navigating Eukaryotic Genome Annotation Pipelines: A Route Map to BRAKER, Galba, and TSEBRA. arXiv preprint at https://doi.org/10.48550/arXiv.2403.19416.\n"; $pubs{'makehub'} = "\nHoff, K. J. (2019). MakeHub: fully automated generation of UCSC genome browser assembly hubs. Genomics, Proteomics and Bioinformatics, 17(5), 546-549.\n"; @@ -4527,6 +4528,7 @@ sub make_compleasm_hints { print CITE $pubs{'busco'}; $pubs{'busco'} = ""; print CITE $pubs{'miniprot'}; $pubs{'miniprot'} = ""; print CITE $pubs{'compleasm'}; $pubs{'compleasm'} = ""; + print CITE $pubs{'braker-c-i'}; $pubs{'braker-c-i'} = ""; print LOG "\# " . (localtime) . ": Running compleasm and converting the output to hints\n" if ($v > 2);