-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add star genomegenerate component (#58)
* Add star genomegenerate component * Update changelog * Rename component * Update test * Update CHANGELOG.md --------- Co-authored-by: Robrecht Cannoodt <[email protected]>
- Loading branch information
1 parent
b68f1ed
commit 8191140
Showing
5 changed files
with
1,146 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
name: star_genome_generate | ||
namespace: star | ||
description: | | ||
Create index for STAR | ||
keywords: [genome, index, align] | ||
links: | ||
repository: https://github.com/alexdobin/STAR | ||
documentation: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf | ||
references: | ||
doi: 10.1093/bioinformatics/bts635 | ||
license: MIT | ||
requirements: | ||
commands: [ STAR ] | ||
|
||
argument_groups: | ||
- name: "Input" | ||
arguments: | ||
- name: "--genomeFastaFiles" | ||
type: file | ||
description: | | ||
Path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped. | ||
required: true | ||
multiple: yes | ||
multiple_sep: ; | ||
- name: "--sjdbGTFfile" | ||
type: file | ||
description: Path to the GTF file with annotations | ||
- name: --sjdbOverhang | ||
type: integer | ||
description: Length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1) | ||
example: 100 | ||
- name: --sjdbGTFchrPrefix | ||
type: string | ||
description: Prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes) | ||
- name: --sjdbGTFfeatureExon | ||
type: string | ||
description: Feature type in GTF file to be used as exons for building transcripts | ||
example: exon | ||
- name: --sjdbGTFtagExonParentTranscript | ||
type: string | ||
description: GTF attribute name for parent transcript ID (default "transcript_id" works for GTF files) | ||
example: transcript_id | ||
- name: --sjdbGTFtagExonParentGene | ||
type: string | ||
description: GTF attribute name for parent gene ID (default "gene_id" works for GTF files) | ||
example: gene_id | ||
- name: --sjdbGTFtagExonParentGeneName | ||
type: string | ||
description: GTF attribute name for parent gene name | ||
example: gene_name | ||
multiple: yes | ||
multiple_sep: ; | ||
- name: --sjdbGTFtagExonParentGeneType | ||
type: string | ||
description: GTF attribute name for parent gene type | ||
example: | ||
- gene_type | ||
- gene_biotype | ||
multiple: yes | ||
multiple_sep: ; | ||
- name: --limitGenomeGenerateRAM | ||
type: long | ||
description: Maximum available RAM (bytes) for genome generation | ||
example: '31000000000' | ||
- name: --genomeSAindexNbases | ||
type: integer | ||
description: Length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, this parameter must be scaled down to min(14, log2(GenomeLength)/2 - 1). | ||
example: 14 | ||
- name: --genomeChrBinNbits | ||
type: integer | ||
description: Defined as log2(chrBin), where chrBin is the size of the bins for genome storage. Each chromosome will occupy an integer number of bins. For a genome with large number of contigs, it is recommended to scale this parameter as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)]). | ||
example: 18 | ||
- name: --genomeSAsparseD | ||
type: integer | ||
min: 0 | ||
example: 1 | ||
description: Suffux array sparsity, i.e. distance between indices. Use bigger numbers to decrease needed RAM at the cost of mapping speed reduction. | ||
- name: --genomeSuffixLengthMax | ||
type: integer | ||
description: Maximum length of the suffixes, has to be longer than read length. Use -1 for infinite length. | ||
example: -1 | ||
- name: --genomeTransformType | ||
type: string | ||
description: | | ||
Type of genome transformation | ||
None ... no transformation | ||
Haploid ... replace reference alleles with alternative alleles from VCF file (e.g. consensus allele) | ||
Diploid ... create two haplotypes for each chromosome listed in VCF file, for genotypes 1|2, assumes perfect phasing (e.g. personal genome) | ||
example: None | ||
- name: --genomeTransformVCF | ||
type: file | ||
description: path to VCF file for genome transformation | ||
|
||
- name: "Output" | ||
arguments: | ||
- name: "--index" | ||
type: file | ||
direction: output | ||
description: STAR index directory. | ||
default: STAR_index | ||
required: true | ||
|
||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
|
||
test_resources: | ||
- type: bash_script | ||
path: test.sh | ||
|
||
engines: | ||
- type: docker | ||
image: ubuntu:22.04 | ||
setup: | ||
# setup derived from https://github.com/alexdobin/STAR/blob/master/extras/docker/Dockerfile | ||
- type: docker | ||
env: | ||
- STAR_VERSION 2.7.11b | ||
- PACKAGES gcc g++ make wget zlib1g-dev unzip xxd | ||
run: | | ||
apt-get update && \ | ||
apt-get install -y --no-install-recommends ${PACKAGES} && \ | ||
cd /tmp && \ | ||
wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ | ||
unzip ${STAR_VERSION}.zip && \ | ||
cd STAR-${STAR_VERSION}/source && \ | ||
make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ | ||
cp STAR /usr/local/bin && \ | ||
cd / && \ | ||
rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ | ||
apt-get --purge autoremove -y ${PACKAGES} && \ | ||
apt-get clean | ||
- type: docker | ||
run: | | ||
STAR --version | sed 's#\(.*\)#star: "\1"#' > /var/software_versions.txt | ||
runners: | ||
- type: executable | ||
- type: nextflow |
Oops, something went wrong.