diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bd21a1e..9216829c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ - `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - `agat/agat_convert_sp_gff2tsv`: convert gtf/gff file into tabulated file (PR #102). - `agat/agat_convert_sp_gxf2gxf`: fixes and/or standardizes any GTF/GFF file into full sorted GTF/GFF file (PR #103). + - `agat/agat_sp_ensembl_output_style`: takes a normal gff3 annotation format file and convert it to gff3 like ensembl format (PR #130). * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). diff --git a/src/agat/agat_sp_ensembl_output_style/config.vsh.yaml b/src/agat/agat_sp_ensembl_output_style/config.vsh.yaml new file mode 100644 index 00000000..d364540a --- /dev/null +++ b/src/agat/agat_sp_ensembl_output_style/config.vsh.yaml @@ -0,0 +1,69 @@ +name: agat_sp_ensembl_output_style +namespace: agat +description: | + This script takes a normal gff3 annotation format file and convert it to + gff3 like ensembl format. +keywords: [gene annotations, GFF conversion] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_sp_ensembl_output_style.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +requirements: + - commands: [agat] +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] +argument_groups: + - name: Inputs + arguments: + - name: --gff + alternatives: [-g, --ref] + description: Input GTF/GFF file. + type: file + required: true + direction: input + example: input.gff + - name: Outputs + arguments: + - name: --output + alternatives: [-o] + description: Output GFF file. + type: file + direction: output + required: true + example: output.gff + - name: Arguments + arguments: + - name: --verbose + alternatives: [-v] + description: Verbose option to see the warning messages when parsing the GFF file. + type: boolean_true + - name: --config + alternatives: [-c] + description: | + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The `--config` option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + required: false + example: custom_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_sp_ensembl_output_style/help.txt b/src/agat/agat_sp_ensembl_output_style/help.txt new file mode 100644 index 00000000..137dc887 --- /dev/null +++ b/src/agat/agat_sp_ensembl_output_style/help.txt @@ -0,0 +1,66 @@ +```sh +agat_sp_ensembl_output_style.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_sp_ensembl_output_style.pl + +Description: + This script takes a normal gff3 annotation format file and convert it to + gff3 like ensembl format. + +Usage: + agat_sp_ensembl_output_style.pl -g infile.gff [ -o outfile ] + agat_sp_ensembl_output_style.pl --help + +Options: + -g, --gff or -ref + Input GTF/GFF file. + + -v Verbose option to see the warning messages when parsing the gff + file. + + -o or --output + Output GFF file. If no output file is specified, the output will + be written to STDOUT. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + -h or --help + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md diff --git a/src/agat/agat_sp_ensembl_output_style/script.sh b/src/agat/agat_sp_ensembl_output_style/script.sh new file mode 100644 index 00000000..188816a3 --- /dev/null +++ b/src/agat/agat_sp_ensembl_output_style/script.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +# unset flags +[[ "$par_verbose" == "false" ]] && unset par_verbose + +# run agat_sp_ensembl_output_style.pl +agat_sp_ensembl_output_style.pl \ + --gff "$par_gff" \ + --output "$par_output" \ + ${par_verbose:+-v} + ${par_config:+--config "${par_config}"} \ diff --git a/src/agat/agat_sp_ensembl_output_style/test.sh b/src/agat/agat_sp_ensembl_output_style/test.sh new file mode 100644 index 00000000..b8ce3c11 --- /dev/null +++ b/src/agat/agat_sp_ensembl_output_style/test.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" +} +trap clean_up EXIT + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --gff "$test_dir/0.gff" \ + --output "$TMPDIR/output.gff" + +echo ">> Checking output" +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$TMPDIR/output.gff" "$test_dir/agat_sp_ensembl_output_style_1.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_sp_ensembl_output_style/test_data/0.gff b/src/agat/agat_sp_ensembl_output_style/test_data/0.gff new file mode 100644 index 00000000..fafe86ed --- /dev/null +++ b/src/agat/agat_sp_ensembl_output_style/test_data/0.gff @@ -0,0 +1,36 @@ +##gff-version 3 +scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 +scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 +scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 +scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 +scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 +scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 +scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 +scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 +scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 +scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 +scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 +scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 +scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 +scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 +scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 +scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 +scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 +scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 +scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 +scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 +scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 +scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 +scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/src/agat/agat_sp_ensembl_output_style/test_data/agat_sp_ensembl_output_style_1.gff b/src/agat/agat_sp_ensembl_output_style/test_data/agat_sp_ensembl_output_style_1.gff new file mode 100644 index 00000000..063ce045 --- /dev/null +++ b/src/agat/agat_sp_ensembl_output_style/test_data/agat_sp_ensembl_output_style_1.gff @@ -0,0 +1,36 @@ +##gff-version 3 +scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;gene_id=CLUHARG00000005458 +scaffold625 maker transcript 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458;transcript_id=CLUHART00000008717 +scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717;exon_id=CLUHART00000008717:exon:1404 +scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;exon_id=CLUHART00000008717:exon:1405 +scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;exon_id=CLUHART00000008717:exon:1406 +scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;exon_id=CLUHART00000008717:exon:1407 +scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 +scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 +scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;gene_id=CLUHARG00000003852 +scaffold789 maker transcript 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852;transcript_id=CLUHART00000006146 +scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146;exon_id=CLUHART00000006146:exon:995 +scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146;exon_id=CLUHART00000006146:exon:996 +scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146;exon_id=CLUHART00000006146:exon:997 +scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146;exon_id=CLUHART00000006146:exon:998 +scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 +scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 +scaffold789 maker transcript 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852;transcript_id=CLUHART00000006147 +scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147;exon_id=CLUHART00000006147:exon:997 +scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147;exon_id=CLUHART00000006147:exon:998 +scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147;exon_id=CLUHART00000006147:exon:999 +scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147;exon_id=CLUHART00000006147:exon:1000 +scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 +scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/src/agat/agat_sp_ensembl_output_style/test_data/script.sh b/src/agat/agat_sp_ensembl_output_style/test_data/script.sh new file mode 100755 index 00000000..930eb183 --- /dev/null +++ b/src/agat/agat_sp_ensembl_output_style/test_data/script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/0.gff src/agat/agat_sp_ensembl_output_style/test_data/ +cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_ensembl_output_style_1.gff src/agat/agat_sp_ensembl_output_style/test_data/ \ No newline at end of file