forked from galaxyproject/training-material
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request galaxyproject#4876 from rlibouba/add_training_geno…
…me_annotation_helixer Add Helixer training
- Loading branch information
Showing
9 changed files
with
768 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
topics/genome-annotation/tutorials/helixer/data-library.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
--- | ||
destination: | ||
type: library | ||
name: GTN - Material | ||
description: Galaxy Training Network Material | ||
synopsis: Galaxy Training Network Material. See https://training.galaxyproject.org | ||
items: | ||
- name: Genome Annotation | ||
description: "This tutorial uses Helixer to annotate the genome sequence of a small eukaryote: Mucor mucedo (a fungal plant pathogen)." | ||
items: | ||
- name: Genome annotation with Helixer | ||
items: | ||
- name: 'DOI: 10.5281/zenodo.7867921' | ||
description: latest | ||
items: | ||
- url: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/genome_masked.fasta | ||
src: url | ||
ext: fasta | ||
info: https://zenodo.org/record/7867921 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
--- | ||
layout: faq-page | ||
--- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
@article {Holst2023.02.06.527280, | ||
author = {Felix Holst and Anthony Bolger and Christopher G{\"u}nther and Janina Ma{\ss} and Sebastian Triesch and Felicitas Kindel and Niklas Kiel and Nima Saadat and Oliver Ebenh{\"o}h and Bj{\"o}rn Usadel and Rainer Schwacke and Marie Bolger and Andreas P.M. Weber and Alisandra K. Denton}, | ||
title = {Helixer{\textendash}de novo Prediction of Primary Eukaryotic Gene Models Combining Deep Learning and a Hidden Markov Model}, | ||
elocation-id = {2023.02.06.527280}, | ||
year = {2023}, | ||
doi = {10.1101/2023.02.06.527280}, | ||
publisher = {Cold Spring Harbor Laboratory}, | ||
abstract = {Gene structural annotation is a critical step in obtaining biological knowledge from genome sequences yet remains a major challenge in genomics projects. Current de novo Hidden Markov Models are limited in their capacity to model biological complexity; while current pipelines are resource-intensive and their results vary in quality with the available extrinsic data. Here, we build on our previous work in applying Deep Learning to gene calling to make a fully applicable, fast and user friendly tool for predicting primary gene models from DNA sequence alone. The quality is state-of-the-art, with predictions scoring closer by most measures to the references than to predictions from other de novo tools. Helixer{\textquoteright}s predictions can be used as is or could be integrated in pipelines to boost quality further. Moreover, there is substantial potential for further improvements and advancements in gene calling with Deep Learning.Helixer is open source and available at https://github.com/weberlab-hhu/HelixerA web interface is available at https://www.plabipd.de/helixer_main.htmlCompeting Interest StatementThe authors have declared no competing interest.}, | ||
URL = {https://www.biorxiv.org/content/early/2023/02/09/2023.02.06.527280}, | ||
eprint = {https://www.biorxiv.org/content/early/2023/02/09/2023.02.06.527280.full.pdf}, | ||
journal = {bioRxiv} | ||
} | ||
|
||
@article{10.1093/bioinformatics/btad595, | ||
author = {Huang, Neng and Li, Heng}, | ||
title = "{compleasm: a faster and more accurate reimplementation of BUSCO}", | ||
journal = {Bioinformatics}, | ||
volume = {39}, | ||
number = {10}, | ||
pages = {btad595}, | ||
year = {2023}, | ||
month = {09}, | ||
abstract = "{Evaluating the gene completeness is critical to measuring the quality of a genome assembly. An incomplete assembly can lead to errors in gene predictions, annotation, and other downstream analyses. Benchmarking Universal Single-Copy Orthologs (BUSCO) is a widely used tool for assessing the completeness of genome assembly by testing the presence of a set of single-copy orthologs conserved across a wide range of taxa. However, BUSCO is slow particularly for large genome assemblies. It is cumbersome to apply BUSCO to a large number of assemblies.Here, we present compleasm, an efficient tool for assessing the completeness of genome assemblies. Compleasm utilizes the miniprot protein-to-genome aligner and the conserved orthologous genes from BUSCO. It is 14 times faster than BUSCO for human assemblies and reports a more accurate completeness of 99.6\\% than BUSCO’s 95.7\\%, which is in close agreement with the annotation completeness of 99.5\\% for T2T-CHM13.https://github.com/huangnengCSU/compleasm.}", | ||
issn = {1367-4811}, | ||
doi = {10.1093/bioinformatics/btad595}, | ||
url = {https://doi.org/10.1093/bioinformatics/btad595}, | ||
eprint = {https://academic.oup.com/bioinformatics/article-pdf/39/10/btad595/51913100/btad595.pdf}, | ||
} | ||
|
||
@article{10.1093/bioinformatics/btv351, | ||
author = {Simão, Felipe A. and Waterhouse, Robert M. and Ioannidis, Panagiotis and Kriventseva, Evgenia V. and Zdobnov, Evgeny M.}, | ||
title = "{BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs}", | ||
journal = {Bioinformatics}, | ||
volume = {31}, | ||
number = {19}, | ||
pages = {3210-3212}, | ||
year = {2015}, | ||
month = {06}, | ||
abstract = "{Motivation: Genomics has revolutionized biological research, but quality assessment of the resulting assembled sequences is complicated and remains mostly limited to technical measures like N50.Results: We propose a measure for quantitative assessment of genome assembly and annotation completeness based on evolutionarily informed expectations of gene content. We implemented the assessment procedure in open-source software, with sets of Benchmarking Universal Single-Copy Orthologs, named BUSCO.Availability and implementation: Software implemented in Python and datasets available for download from http://busco.ezlab.org.Contact: [email protected] information: Supplementary data are available at Bioinformatics online.}", | ||
issn = {1367-4803}, | ||
doi = {10.1093/bioinformatics/btv351}, | ||
url = {https://doi.org/10.1093/bioinformatics/btv351}, | ||
eprint = {https://academic.oup.com/bioinformatics/article-pdf/31/19/3210/49035194/bioinformatics\_31\_19\_3210.pdf}, | ||
} | ||
|
||
|
Oops, something went wrong.