From 1ed80fb64bf180b30551305da59018942f9d11ca Mon Sep 17 00:00:00 2001 From: Emma Rousseau Date: Sat, 5 Oct 2024 17:21:51 +0200 Subject: [PATCH] modify argument formatting, container setup --- src/snpeff/config.vsh.yaml | 155 ++++++++++++++++++------------------- src/snpeff/script.sh | 4 +- src/snpeff/test.sh | 28 ++++--- 3 files changed, 92 insertions(+), 95 deletions(-) diff --git a/src/snpeff/config.vsh.yaml b/src/snpeff/config.vsh.yaml index 8e0d015b..13492392 100644 --- a/src/snpeff/config.vsh.yaml +++ b/src/snpeff/config.vsh.yaml @@ -14,269 +14,267 @@ license: MIT argument_groups: - name: Inputs arguments: - - name: -input + - name: --input type: file description: Input variants file. example: test.vcf - direction: input required: true - - name: -genome_version + - name: --genome_version type: string description: Reference genome version. example: GRCh37.75 - direction: input required: true - name: Outputs arguments: - - name: -output + - name: --output type: file description: The output file. example: out.vcf direction: output required: true - - name: -summary + - name: --summary type: file description: Summary file directory. example: summary_dir direction: output - required: false - - name: -genes + - name: --genes type: file description: Txt file directory. example: genes_dir direction: output - required: false - name: Options arguments: - - name: -chr + - name: --chr type: string description: | Prepend 'string' to chromosome name (e.g. 'chr1' instead of '1'). Only on TXT output. - - name: -classic + - name: --classic type: boolean_true description: Use old style annotations instead of Sequence Ontology and Hgvs. - - name: -csv_stats + - name: --csv_stats type: file description: Create CSV summary file. - - name: -download + - name: --download type: boolean_true description: Download reference genome if not available. - - name: -input_format + - name: --input_format alternatives: [-i] type: string description: | Input format [ vcf, bed ]. Default: VCF. - - name: -file_list + example: "VCF" + - name: --file_list type: boolean_true description: Input actually contains a list of files to process. - - name: -output_format + - name: --output_format alternatives: [-o] type: string description: | Output format [ vcf, gatk, bed, bedAnn ]. Default: VCF. - - name: -stats - alternatives: [-s, -htmlStats] + example: "VCF" + - name: --stats + alternatives: [-s, --htmlStats] type: boolean_true description: Create HTML summary file. - - name: -no_stats + - name: --no_stats type: boolean_false description: Do not create stats (summary) file. - name: Results filter options arguments: - - name: -fi - alternatives: [-filterInterval] + - name: --fi + alternatives: [--filterInterval] type: file description: | Only analyze changes that intersect with the intervals specified in this file. This option can be used several times. - - name: -no_downstream + - name: --no_downstream type: boolean_false description: Do not show DOWNSTREAM changes - - name: -no_intergenic + - name: --no_intergenic type: boolean_false description: Do not show INTERGENIC changes. - - name: -no_intron + - name: --no_intron type: boolean_false description: Do not show INTRON changes. - - name: -no_upstream + - name: --no_upstream type: boolean_false description: Do not show UPSTREAM changes. - - name: -no_utr + - name: --no_utr type: boolean_false description: Do not show 5_PRIME_UTR or 3_PRIME_UTR changes. - - name: -no + - name: --no type: string description: | Do not show 'EffectType'. This option can be used several times. - name: Annotations options arguments: - - name: -cancer + - name: --cancer type: boolean_true description: Perform 'cancer' comparisons (Somatic vs Germline). - - name: -cancer_samples + - name: --cancer_samples type: file description: Two column TXT file defining 'original \t derived' samples. - - name: -fastaprot + - name: --fastaprot type: file description: | Create an output file containing the resulting protein sequences. - - name: -format_eff + - name: --format_eff type: boolean_true description: | Use 'EFF' field compatible with older versions (instead of 'ANN'). - - name: -gene_id + - name: --gene_id type: boolean_true description: Use gene ID instead of gene name (VCF output). - - name: -hgvs + - name: --hgvs type: boolean_true description: Use HGVS annotations for amino acid sub-field. - - name: -hgvs_old + - name: --hgvs_old type: boolean_true description: Use old HGVS notation. - - name: -hgvs1_letter_aa + - name: --hgvs1_letter_aa type: boolean_true description: Use one letter Amino acid codes in HGVS notation. - - name: -hgvs_tr_id + - name: --hgvs_tr_id type: boolean_true description: Use transcript ID in HGVS notation. - - name: -lof + - name: --lof type: boolean_true description: | Add loss of function (LOF) and Nonsense mediated decay (NMD) tags. - name: -no_hgvs type: boolean_false description: Do not add HGVS annotations. - - name: -no_lof + - name: --no_lof type: boolean_false description: Do not add LOF and NMD annotations. - - name: -no_shift_hgvs + - name: --no_shift_hgvs type: boolean_false description: | Do not shift variants according to HGVS notation (most 3prime end). - - name: -oicr + - name: --oicr type: boolean_true description: Add OICR tag in VCF file. - - name: -sequence_ontology + - name: --sequence_ontology type: boolean_true description: Use Sequence Ontology terms. - name: Generic options arguments: - - name: -config + - name: --config alternatives: [-c] type: file description: Specify config file - - name: -config_option + - name: --config_option type: string description: Override a config file option (name=value). - - name: -debug + - name: --debug alternatives: [-d] type: boolean_true description: Debug mode (very verbose). - - name: -data_dir + - name: --data_dir type: file description: Override data_dir parameter from config file. # - name: -download # type: boolean_true # description: Download a SnpEff database, if not available locally. - - name: -no_download + - name: --no_download type: boolean_false description: Do not download a SnpEff database, if not available locally. - - name: -no_log + - name: --no_log type: boolean_false description: Do not report usage statistics to server. - - name: -quiet + - name: --quiet alternatives: [-q] type: boolean_true description: Quiet mode (do not show any messages or errors) - - name: -verbose + - name: --verbose alternatives: [-v] type: boolean_true description: Verbose mode. - name: Database options arguments: - - name: -canon + - name: --canon type: boolean_true description: Only use canonical transcripts. - - name: -canon_list + - name: --canon_list type: file description: | Only use canonical transcripts, replace some transcripts using the 'gene_id transcript_id' entries in . - - name: -tag + - name: --tag type: string description: | Only use transcript having a tag 'tagName'. This option can be used multiple times. - - name: -no_tag + - name: --no_tag type: boolean_false description: | Filter out transcript having a tag 'tagName'. This option can be used multiple times. - - name: -interaction + - name: --interaction type: boolean_true description: Annotate using interactions (requires interaction database). - - name: -interval + - name: --interval type: file description: | Use a custom intervals in TXT/BED/BigBed/VCF/GFF file (you may use this option many times). - - name: -max_tsl + - name: --max_tsl type: integer description: Only use transcripts having Transcript Support Level lower than . - - name: -motif + - name: --motif type: boolean_true description: Annotate using motifs (requires Motif database). - - name: -nextprot + - name: --nextprot type: boolean_true description: Annotate using NextProt (requires NextProt database). - - name: -no_genome + - name: --no_genome type: boolean_false description: Do not load any genomic database (e.g. annotate using custom files). - - name: -no_expand_iub + - name: --no_expand_iub type: boolean_false description: Disable IUB code expansion in input variants. - - name: -no_interaction + - name: --no_interaction type: boolean_false description: Disable inteaction annotations. - - name: -no_motif + - name: --no_motif type: boolean_false description: Disable motif annotations. - - name: -no_nextprot + - name: --no_nextprot type: boolean_false description: Disable NextProt annotations. - - name: -only_reg + - name: --only_reg type: boolean_true description: Only use regulation tracks. - - name: -only_protein + - name: --only_protein type: boolean_true description: Only use protein coding transcripts. - - name: -only_tr + - name: --only_tr type: file description: | Only use the transcripts in this file. Format: One transcript ID per line. example: file.txt - - name: -reg + - name: --reg type: string description: Regulation track to use (this option can be used add several times). - - name: -ss - alternatives: [-spliceSiteSize] + - name: --ss + alternatives: [--spliceSiteSize] type: integer description: | Set size for splice sites (donor and acceptor) in bases. Default: 2. - - name: -splice_region_exon_size + - name: --splice_region_exon_size type: integer description: | Set size for splice site region within exons. Default: 3 bases. - - name: -splice_region_intron_min + - name: --splice_region_intron_min type: integer description: | Set minimum number of bases for splice site region within intron. Default: 3 bases. - - name: -splice_region_intron_max + - name: --splice_region_intron_max type: integer description: | Set maximum number of bases for splice site region within intron. Default: 8 bases. - - name: -strict + - name: --strict type: boolean_true description: Only use 'validated' transcripts (i.e. sequence has been checked). - - name: -ud - alternatives: [-upDownStreamLen] + - name: --ud + alternatives: [--upDownStreamLen] type: integer description: Set upstream downstream interval length (in bases). resources: @@ -293,10 +291,9 @@ engines: setup: - type: docker run: | - # touch ./var/software_versions.txt && \ - # version=$(snpEff -version) && \ - # version_trimmed=$(echo "$version" | awk '{print $1, $2}') && \ - # echo "$version_trimmed" > ./var/software_versions.txt + version=$(snpEff -version) && \ + version_trimmed=$(echo "$version" | awk '{print $1, $2}') && \ + echo "$version_trimmed" > /var/software_versions.txt runners: - type: executable - type: nextflow \ No newline at end of file diff --git a/src/snpeff/script.sh b/src/snpeff/script.sh index a3770e34..975c8eb9 100644 --- a/src/snpeff/script.sh +++ b/src/snpeff/script.sh @@ -150,4 +150,6 @@ if [ -z "$par_no_stats" ]; then else mv -f snpEff_genes.txt "$directory_path" fi -fi \ No newline at end of file +fi + +exit 0 \ No newline at end of file diff --git a/src/snpeff/test.sh b/src/snpeff/test.sh index 2185c70a..241deb67 100644 --- a/src/snpeff/test.sh +++ b/src/snpeff/test.sh @@ -3,8 +3,6 @@ set -eo pipefail ## VIASH START -meta_executable="$PWD/target/executable/snpeff/snpeff" -meta_resources_dir="$PWD/src/snpeff" ## VIASH END ########################################################################### @@ -16,9 +14,9 @@ pushd test1 > /dev/null # cd test1 (stack) echo "> Run Test 1: required parameters" "$meta_executable" \ - -genome_version GRCh37.75 \ - -input "$meta_resources_dir/test_data/cancer.vcf" \ - -output out.vcf + --genome_version GRCh37.75 \ + --input "$meta_resources_dir/test_data/cancer.vcf" \ + --output out.vcf # Check if output files are generated output_files=("out.vcf" "snpEff_genes.txt" "snpEff_summary.html") @@ -50,11 +48,11 @@ pushd test2 > /dev/null echo "> Run Test 2: different input + options" "$meta_executable" \ - -genome_version GRCh37.75 \ - -input "$meta_resources_dir/test_data/test.vcf" \ - -interval "$meta_resources_dir/test_data/my_annotations.bed" \ - -no_stats \ - -output output.vcf + --genome_version GRCh37.75 \ + --input "$meta_resources_dir/test_data/test.vcf" \ + --interval "$meta_resources_dir/test_data/my_annotations.bed" \ + --no_stats \ + --output output.vcf # Check if output.vcf exists if [ ! -e "output.vcf" ]; then @@ -89,11 +87,11 @@ mkdir temp echo "> Run Test 3: move output files" "$meta_executable" \ - -genome_version GRCh37.75 \ - -input "$meta_resources_dir/test_data/test.vcf" \ - -output output.vcf \ - -summary temp \ - -genes temp + --genome_version GRCh37.75 \ + --input "$meta_resources_dir/test_data/test.vcf" \ + --output output.vcf \ + --summary temp \ + --genes temp # Check if output.vcf exists if [ ! -e "output.vcf" ]; then