From 2d2bdfffde5340fbed34b858bd631225400ff5a7 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Tue, 30 Jul 2024 13:44:35 +0200 Subject: [PATCH 01/23] add config --- .../agat_convert_genscan2gff/config.vsh.yaml | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 src/agat/agat_convert_genscan2gff/config.vsh.yaml diff --git a/src/agat/agat_convert_genscan2gff/config.vsh.yaml b/src/agat/agat_convert_genscan2gff/config.vsh.yaml new file mode 100644 index 00000000..c92e63f5 --- /dev/null +++ b/src/agat/agat_convert_genscan2gff/config.vsh.yaml @@ -0,0 +1,97 @@ +name: agat_convert_genscan2gff +namespace: agat +description: | + The script takes a genscan file as input, and will translate it in gff + format. The genscan format is described here: + http://genome.crg.es/courses/Bioinformatics2003_genefinding/results/gens + can.html /!\ vvv Known problem vvv /!\ You must have submited only DNA + sequence, wihtout any header!! Indeed the tool expects only DNA + sequences and does not crash/warn if an header is submited along the + sequence. e.g If you have an header ">seq" s-e-q are seen as the 3 first + nucleotides of the sequence. Then all prediction location are shifted + accordingly. (checked only on the online version + http://argonaute.mit.edu/GENSCAN.html. I don't know if there is the same + problem elsewhere.) /!\ ^^^ Known problem ^^^^ /!\ +keywords: [gene annotations, GFF conversion] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_convert_genscan2gff.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --genscan + alternatives: [-g] + description: Input genscan bed file that will be converted. + type: file + required: true + direction: input + - name: Outputs + arguments: + - name: --output + alternatives: [-o, --out, --outfile, --gff] + description: Output GFF file. If no output file is specified, the output will be written to STDOUT. + type: file + direction: output + required: true + example: output.gff + - name: Arguments + arguments: + - name: --source + description: | + The source informs about the tool used to produce the data and is stored in 2nd field of a gff file. Example: Stringtie, Maker, Augustus, etc. [default: data] + type: string + required: false + example: Stringtie + - name: --primary_tag + description: | + The primary_tag corresponds to the data type and is stored in 3rd field of a gff file. Example: gene, mRNA, CDS, etc. [default: gene] + type: string + required: false + example: gene + - name: --inflate_off + description: | + By default we inflate the block fields (blockCount, blockSizes, blockStarts) to create subfeatures of the main feature (primary_tag). Type of subfeature created based on the inflate_type parameter. If you don't want this inflating behaviour you can deactivate it by using the option --inflate_off. + type: boolean_false + - name: --inflate_type + description: | + Feature type (3rd column in gff) created when inflate parameter activated [default: exon]. + type: string + required: false + example: exon + - name: --verbose + description: add verbosity + type: boolean_true + - name: --config + alternatives: [-c] + description: | + Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the original agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + required: false + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file From 25ad1b01fae922a948b78f9b48fafdcb2061f1e8 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Tue, 30 Jul 2024 13:44:49 +0200 Subject: [PATCH 02/23] add help --- src/agat/agat_convert_genscan2gff/help.txt | 95 ++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/agat/agat_convert_genscan2gff/help.txt diff --git a/src/agat/agat_convert_genscan2gff/help.txt b/src/agat/agat_convert_genscan2gff/help.txt new file mode 100644 index 00000000..e465c778 --- /dev/null +++ b/src/agat/agat_convert_genscan2gff/help.txt @@ -0,0 +1,95 @@ +```sh +agat_convert_genscan2gff.pl --help +``` + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_convert_genscan2gff.pl + +Description: + The script takes a genscan file as input, and will translate it in gff + format. The genscan format is described here: + http://genome.crg.es/courses/Bioinformatics2003_genefinding/results/gens + can.html /!\ vvv Known problem vvv /!\ You must have submited only DNA + sequence, wihtout any header!! Indeed the tool expects only DNA + sequences and does not crash/warn if an header is submited along the + sequence. e.g If you have an header ">seq" s-e-q are seen as the 3 first + nucleotides of the sequence. Then all prediction location are shifted + accordingly. (checked only on the online version + http://argonaute.mit.edu/GENSCAN.html. I don't know if there is the same + pronlem elsewhere.) /!\ ^^^ Known problem ^^^^ /!\ + +Usage: + agat_convert_genscan2gff.pl --genscan infile.bed [ -o outfile ] + agat_convert_genscan2gff.pl -h + +Options: + --genscan or -g + Input genscan bed file that will be convert. + + --source + The source informs about the tool used to produce the data and + is stored in 2nd field of a gff file. Example: + Stringtie,Maker,Augustus,etc. [default: data] + + --primary_tag + The primary_tag corresponf to the data type and is stored in 3rd + field of a gff file. Example: gene,mRNA,CDS,etc. [default: gene] + + --inflate_off + By default we inflate the block fields (blockCount, blockSizes, + blockStarts) to create subfeatures of the main feature + (primary_tag). Type of subfeature created based on the + inflate_type parameter. If you don't want this inflating + behaviour you can deactivate it by using the option + --inflate_off. + + --inflate_type + Feature type (3rd column in gff) created when inflate parameter + activated [default: exon]. + + --verbose + add verbosity + + -o , --output , --out , --outfile or --gff + Output GFF file. If no output file is specified, the output will + be written to STDOUT. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + -h or --help + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md From 00e8fbc8a39ac5ecbf380a4d688aa637a14bc239 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Tue, 30 Jul 2024 13:45:34 +0200 Subject: [PATCH 03/23] add test data and expected output adn the script to obtain them --- .../test_data/agat_convert_genscan2gff_1.gff | 25 ++++ .../test_data/script.sh | 11 ++ .../test_data/test.genscan | 127 ++++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 src/agat/agat_convert_genscan2gff/test_data/agat_convert_genscan2gff_1.gff create mode 100755 src/agat/agat_convert_genscan2gff/test_data/script.sh create mode 100644 src/agat/agat_convert_genscan2gff/test_data/test.genscan diff --git a/src/agat/agat_convert_genscan2gff/test_data/agat_convert_genscan2gff_1.gff b/src/agat/agat_convert_genscan2gff/test_data/agat_convert_genscan2gff_1.gff new file mode 100644 index 00000000..695fb46c --- /dev/null +++ b/src/agat/agat_convert_genscan2gff/test_data/agat_convert_genscan2gff_1.gff @@ -0,0 +1,25 @@ +##gff-version 3 +unknown genscan gene 2223 4605 75.25 + . ID=gene_1 +unknown genscan mRNA 2223 4605 75.25 + . ID=mrna_1;Parent=gene_1 +unknown genscan exon 2223 3020 75.25 + . ID=exon_1;Parent=mrna_1 +unknown genscan exon 4249 4605 13.03 + . ID=exon_2;Parent=mrna_1 +unknown genscan CDS 2223 3020 75.25 + 0 ID=cds_1;Parent=mrna_1 +unknown genscan CDS 4249 4605 13.03 + 0 ID=cds_2;Parent=mrna_1 +unknown genscan gene 6829 8789 20.06 - . ID=gene_2 +unknown genscan mRNA 6829 8789 20.06 - . ID=mrna_2;Parent=gene_2 +unknown genscan exon 6829 7297 20.06 - . ID=exon_3;Parent=mrna_2 +unknown genscan exon 7730 7888 12.78 - . ID=exon_4;Parent=mrna_2 +unknown genscan exon 8029 8185 7.45 - . ID=exon_5;Parent=mrna_2 +unknown genscan exon 8278 8546 17.45 - . ID=exon_6;Parent=mrna_2 +unknown genscan exon 8647 8789 18.65 - . ID=exon_7;Parent=mrna_2 +unknown genscan CDS 6829 7297 20.06 - 1 ID=cds_3;Parent=mrna_2 +unknown genscan CDS 7730 7888 12.78 - 1 ID=cds_4;Parent=mrna_2 +unknown genscan CDS 8029 8185 7.45 - 2 ID=cds_5;Parent=mrna_2 +unknown genscan CDS 8278 8546 17.45 - 1 ID=cds_6;Parent=mrna_2 +unknown genscan CDS 8647 8789 18.65 - 0 ID=cds_7;Parent=mrna_2 +unknown genscan gene 10209 11924 16.18 + . ID=gene_3 +unknown genscan mRNA 10209 11924 16.18 + . ID=mrna_3;Parent=gene_3 +unknown genscan exon 10209 11313 16.18 + . ID=exon_8;Parent=mrna_3 +unknown genscan exon 11850 11924 3.27 + . ID=exon_9;Parent=mrna_3 +unknown genscan CDS 10209 11313 16.18 + 0 ID=cds_8;Parent=mrna_3 +unknown genscan CDS 11850 11924 3.27 + 2 ID=cds_9;Parent=mrna_3 diff --git a/src/agat/agat_convert_genscan2gff/test_data/script.sh b/src/agat/agat_convert_genscan2gff/test_data/script.sh new file mode 100755 index 00000000..c1693653 --- /dev/null +++ b/src/agat/agat_convert_genscan2gff/test_data/script.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/test.genscan src/agat/agat_convert_genscan2gff/test_data/test.genscan +cp -r /tmp/agat_source/t/scripts_output/out/agat_convert_genscan2gff_1.gff src/agat/agat_convert_genscan2gff/test_data/agat_convert_genscan2gff_1.gff + diff --git a/src/agat/agat_convert_genscan2gff/test_data/test.genscan b/src/agat/agat_convert_genscan2gff/test_data/test.genscan new file mode 100644 index 00000000..a88037db --- /dev/null +++ b/src/agat/agat_convert_genscan2gff/test_data/test.genscan @@ -0,0 +1,127 @@ +GENSCAN 1.0 Date run: 7-Mar-120 Time: 14:46:49 + + + +Sequence /tmp/03_07_20-14:46:49.fasta : 12217 bp : 42.83% C+G : Isochore 1 ( 0 - 43 C+G%) + + + +Parameter matrix: HumanIso.smat + + + +Predicted genes/exons: + + + +Gn.Ex Type S .Begin ...End .Len Fr Ph I/Ac Do/T CodRg P.... Tscr.. + +----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------ + + + + 1.01 Init + 2223 3020 798 2 0 55 2 924 0.940 75.25 + + 1.02 Term + 4249 4605 357 0 0 26 38 307 0.976 13.03 + + 1.03 PlyA + 4711 4716 6 -0.45 + + + + 2.06 PlyA - 4852 4847 6 -0.45 + + 2.05 Term - 7297 6829 469 0 1 13 42 387 0.281 20.06 + + 2.04 Intr - 7888 7730 159 0 0 85 93 144 0.998 12.78 + + 2.03 Intr - 8185 8029 157 2 1 65 60 144 0.787 7.45 + + 2.02 Intr - 8546 8278 269 1 2 36 65 287 0.946 17.45 + + 2.01 Init - 8789 8647 143 2 2 94 96 176 0.550 18.65 + + 2.00 Prom - 9720 9681 40 -6.55 + + + + 3.00 Prom + 10160 10199 40 -11.84 + + 3.01 Init + 10209 11313 1105 2 1 66 57 269 0.512 16.18 + + 3.02 Intr + 11850 11924 75 1 0 80 86 57 0.507 3.27 + + + +Suboptimal exons with probability > 1.000 + + + +Exnum Type S .Begin ...End .Len Fr Ph B/Ac Do/T CodRg P.... Tscr.. + +----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------ + + + +NO EXONS FOUND AT GIVEN PROBABILITY CUTOFF + + + + + +Predicted peptide sequence(s): + + + + + +>/tmp/03_03_20-07:33:11.fasta|GENSCAN_predicted_peptide_1|384_aa + +MSSKNKVSKQDIDSIVESLMKKQKSYFEPRLAQIQQVGMENVQKLSAIHAELALLTASIS + +TVKSDVDKLKCKVENNFSAIDGHDQAFGELELKMADMEDRSRRCNIRVIGLKERLEGFNA + +IQYLTHSLPKWFPALADVPVEVMSAHRIYSDAKRGDNRTLIFNVLRYTTRQAILRAAKKD + +PLSVDDRKVRFSPDYSNFTVKRCQAFHQAKDAARNKCLDFFLLYPATLKIKEGAQYRSFT + +SPKEAEDYVNSAASNHAATPASPRQHGTILTIYRRIHSLYDGERARKIQLLEQAASVALT + +GDNWTSVRNDNYLGVTAHFIDNVWKLRCFALEVKKKKKHSRHTAEDCAEEFIDVSNRWEI + +NGKLTTLGTDSALIMLAAARLLPF + + + +>/tmp/03_03_20-07:33:11.fasta|GENSCAN_predicted_peptide_2|398_aa + +MASTMPSSSSTEDEENTPECLNKDHYHFHHYTMEYIQDKPTNVARVGGFTDKKSIAKVER + +CLARERQEATEDHEAIPSTSGATSLTKKLRSRSGLPIAGSGLVLPALCIICQKKEKFINR + +AGKRQRDPLSKAETLTVGQLQKAAELKDDQSILLHIKDKDCVALEVQYHKGCYNQYTRFM + +TRPEKPEKEQNEPTFDVGYKILCERIIRQRLLVNQEVLRMGQLRMAFIELVKANEGLDAS + +NYSIKNLERSRRADAGSQRIQIFDPDQRTPTQWKKFLSEGTKKEALAEFLYVAWKNADLT + +IVGKNLCLYIAHTNQCHCVTVKEGVQSVRVVEDLLLFLHAQHAAREHKAVIIKSSDTDVA + +VIAVSVQTDLPCSLYVFTGTGNRTRIIDITKVSSANKI + + + +>/tmp/03_03_20-07:33:11.fasta|GENSCAN_predicted_peptide_3|394_aa + +MQRGRAAGINGIPPEFYVAFWEQLSPFFLHMINFSIEKGGFLRDVNTALISLLMKKDKNP + +TDCSSYRPLSLLNSDVKIFAKLLPLRLEPHMPELVSSDQTGFIKSRTAADNIRRLLHIIA + +AAPGCETPMSVLSLDAMKAFDRLEWSFLWSVLEAMGFISTFIGMVKVLYSNPSARVLTGQ + +TFSSLFPVSRSSRQGCPLSPALFVLSLEPLAQAVRLSNLVLPICICDTQHKLSLFADDVI + +VFLEHPTQSLPHFLSICEEFRKLSGFKMNWSKSALMHLNDNARKSVTPVNIPLVGQLKYL + +GIEVFPSLNQIVKHNYSLAFTNVLKDMDRWISLPMSIQARISIIKMNGLPRIHFVSSMVP + +LPPPSDYWIKISAQGVRCPLAKPFTHSPYSKTKX From 918a17debf5642cdb1c53a659fa351af9e9a19ee Mon Sep 17 00:00:00 2001 From: Leila011 Date: Tue, 30 Jul 2024 13:45:44 +0200 Subject: [PATCH 04/23] add running script --- src/agat/agat_convert_genscan2gff/script.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 src/agat/agat_convert_genscan2gff/script.sh diff --git a/src/agat/agat_convert_genscan2gff/script.sh b/src/agat/agat_convert_genscan2gff/script.sh new file mode 100644 index 00000000..e49050fc --- /dev/null +++ b/src/agat/agat_convert_genscan2gff/script.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +# unset flags +[[ "$par_inflate_off" == "true" ]] && unset par_inflate_off +[[ "$par_verbose" == "false" ]] && unset par_verbose + +# run agat_convert_genscan2gff +agat_convert_genscan2gff.pl \ + --genscan "$par_genscan" \ + --output "$par_output" \ + ${par_source:+--source "${par_source}"} \ + ${par_primary_tag:+--primary_tag "${par_primary_tag}"} \ + ${par_inflate_off:+--inflate_off} \ + ${par_inflate_type:+--inflate_type "${par_inflate_type}"} \ + ${par_verbose:+--verbose} \ + ${par_config:+--config "${par_config}"} \ No newline at end of file From ea05f34b8c93e3d8418d79c633bdbd2d88e421a0 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Tue, 30 Jul 2024 13:45:51 +0200 Subject: [PATCH 05/23] add test script --- src/agat/agat_convert_genscan2gff/test.sh | 27 +++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/agat/agat_convert_genscan2gff/test.sh diff --git a/src/agat/agat_convert_genscan2gff/test.sh b/src/agat/agat_convert_genscan2gff/test.sh new file mode 100644 index 00000000..0efb56e1 --- /dev/null +++ b/src/agat/agat_convert_genscan2gff/test.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" +out_dir="${meta_resources_dir}/out_data" + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --genscan "$test_dir/test.genscan" \ + --output "$out_dir/output.gff" + +echo ">> Checking output" +[ ! -f "output.gff" ] && echo "Output file output.gtf does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "output.gff" ] && echo "Output file output.gtf is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$out_dir/output.gff" "$test_dir/agat_convert_genscan2gff_1.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file From 488bbd32834554993e755958443739342c69a005 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Tue, 30 Jul 2024 13:52:04 +0200 Subject: [PATCH 06/23] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4575cb9..b1e45091 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,9 @@ * `agat/agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). +* `agat/agat_convert_genscan2gff`: convert a genscan file into a GFF file (PR #100). + + ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). From 18ef1524fa2bcce00adcf5c3666cd35e926d3be7 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Tue, 30 Jul 2024 13:52:22 +0200 Subject: [PATCH 07/23] cleanup --- src/agat/agat_convert_genscan2gff/help.txt | 1 - src/agat/agat_convert_genscan2gff/test.sh | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/agat/agat_convert_genscan2gff/help.txt b/src/agat/agat_convert_genscan2gff/help.txt index e465c778..8a9e9f52 100644 --- a/src/agat/agat_convert_genscan2gff/help.txt +++ b/src/agat/agat_convert_genscan2gff/help.txt @@ -7,7 +7,6 @@ agat_convert_genscan2gff.pl --help | National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | ------------------------------------------------------------------------------ - Name: agat_convert_genscan2gff.pl diff --git a/src/agat/agat_convert_genscan2gff/test.sh b/src/agat/agat_convert_genscan2gff/test.sh index 0efb56e1..60f0bdc1 100644 --- a/src/agat/agat_convert_genscan2gff/test.sh +++ b/src/agat/agat_convert_genscan2gff/test.sh @@ -12,10 +12,10 @@ echo "> Run $meta_name with test data" --output "$out_dir/output.gff" echo ">> Checking output" -[ ! -f "output.gff" ] && echo "Output file output.gtf does not exist" && exit 1 +[ ! -f "output.gff" ] && echo "Output file output.gff does not exist" && exit 1 echo ">> Check if output is empty" -[ ! -s "output.gff" ] && echo "Output file output.gtf is empty" && exit 1 +[ ! -s "output.gff" ] && echo "Output file output.gff is empty" && exit 1 echo ">> Check if output matches expected output" diff "$out_dir/output.gff" "$test_dir/agat_convert_genscan2gff_1.gff" From 0daefb5f5a4bdd8f7951c1d9d627c628c3886c5c Mon Sep 17 00:00:00 2001 From: Leila011 Date: Wed, 31 Jul 2024 10:07:26 +0200 Subject: [PATCH 08/23] fix tests --- src/agat/agat_convert_genscan2gff/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agat/agat_convert_genscan2gff/test.sh b/src/agat/agat_convert_genscan2gff/test.sh index 60f0bdc1..8dfad3a5 100644 --- a/src/agat/agat_convert_genscan2gff/test.sh +++ b/src/agat/agat_convert_genscan2gff/test.sh @@ -12,10 +12,10 @@ echo "> Run $meta_name with test data" --output "$out_dir/output.gff" echo ">> Checking output" -[ ! -f "output.gff" ] && echo "Output file output.gff does not exist" && exit 1 +[ ! -f "$out_dir/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 echo ">> Check if output is empty" -[ ! -s "output.gff" ] && echo "Output file output.gff is empty" && exit 1 +[ ! -s "$out_dir/output.gff" ] && echo "Output file output.gff is empty" && exit 1 echo ">> Check if output matches expected output" diff "$out_dir/output.gff" "$test_dir/agat_convert_genscan2gff_1.gff" From 98cb70639540b91b0432b2d762c778202c54d91e Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 08:38:28 +0200 Subject: [PATCH 09/23] format description --- src/agat/agat_convert_genscan2gff/config.vsh.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/agat/agat_convert_genscan2gff/config.vsh.yaml b/src/agat/agat_convert_genscan2gff/config.vsh.yaml index c92e63f5..5d1b3a10 100644 --- a/src/agat/agat_convert_genscan2gff/config.vsh.yaml +++ b/src/agat/agat_convert_genscan2gff/config.vsh.yaml @@ -2,16 +2,16 @@ name: agat_convert_genscan2gff namespace: agat description: | The script takes a genscan file as input, and will translate it in gff - format. The genscan format is described here: - http://genome.crg.es/courses/Bioinformatics2003_genefinding/results/gens - can.html /!\ vvv Known problem vvv /!\ You must have submited only DNA - sequence, wihtout any header!! Indeed the tool expects only DNA + format. The genscan format is described [here](http://genome.crg.es/courses/Bioinformatics2003_genefinding/results/genscan.html). + + **Known problem** + + You must have submited only DNA sequence, without any header!! Indeed the tool expects only DNA sequences and does not crash/warn if an header is submited along the sequence. e.g If you have an header ">seq" s-e-q are seen as the 3 first nucleotides of the sequence. Then all prediction location are shifted - accordingly. (checked only on the online version - http://argonaute.mit.edu/GENSCAN.html. I don't know if there is the same - problem elsewhere.) /!\ ^^^ Known problem ^^^^ /!\ + accordingly. (checked only on the [online version](http://argonaute.mit.edu/GENSCAN.html). + I don't know if there is the same problem elsewhere.) keywords: [gene annotations, GFF conversion] links: homepage: https://github.com/NBISweden/AGAT From c21ae867dbf9794a6deacf882f090c2dc632f8f6 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 08:40:27 +0200 Subject: [PATCH 10/23] remove unused argument --inflate-off --- src/agat/agat_convert_genscan2gff/config.vsh.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/agat/agat_convert_genscan2gff/config.vsh.yaml b/src/agat/agat_convert_genscan2gff/config.vsh.yaml index 5d1b3a10..741dd3dd 100644 --- a/src/agat/agat_convert_genscan2gff/config.vsh.yaml +++ b/src/agat/agat_convert_genscan2gff/config.vsh.yaml @@ -57,10 +57,6 @@ argument_groups: type: string required: false example: gene - - name: --inflate_off - description: | - By default we inflate the block fields (blockCount, blockSizes, blockStarts) to create subfeatures of the main feature (primary_tag). Type of subfeature created based on the inflate_type parameter. If you don't want this inflating behaviour you can deactivate it by using the option --inflate_off. - type: boolean_false - name: --inflate_type description: | Feature type (3rd column in gff) created when inflate parameter activated [default: exon]. From 4e3a98c1a4d98f217fe332d0c0bbb908dbf774ef Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 08:47:46 +0200 Subject: [PATCH 11/23] update --config description --- src/agat/agat_convert_genscan2gff/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agat/agat_convert_genscan2gff/config.vsh.yaml b/src/agat/agat_convert_genscan2gff/config.vsh.yaml index 741dd3dd..3c1725d9 100644 --- a/src/agat/agat_convert_genscan2gff/config.vsh.yaml +++ b/src/agat/agat_convert_genscan2gff/config.vsh.yaml @@ -69,7 +69,7 @@ argument_groups: - name: --config alternatives: [-c] description: | - Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the original agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). type: file required: false example: custom_agat_config.yaml From 3612a65471dc255a81db978d40c1d87dfb064e60 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 08:48:02 +0200 Subject: [PATCH 12/23] add requirements --- src/agat/agat_convert_genscan2gff/config.vsh.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/agat/agat_convert_genscan2gff/config.vsh.yaml b/src/agat/agat_convert_genscan2gff/config.vsh.yaml index 3c1725d9..c183082b 100644 --- a/src/agat/agat_convert_genscan2gff/config.vsh.yaml +++ b/src/agat/agat_convert_genscan2gff/config.vsh.yaml @@ -21,6 +21,8 @@ links: references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 +requirements: + - commands: [agat] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] From d63e2674e821ce8890a5f7880ee1f617f75e7625 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 10:43:31 +0200 Subject: [PATCH 13/23] create temporary directory and clean up on exit --- src/agat/agat_convert_genscan2gff/test.sh | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/agat/agat_convert_genscan2gff/test.sh b/src/agat/agat_convert_genscan2gff/test.sh index 8dfad3a5..e05202c4 100644 --- a/src/agat/agat_convert_genscan2gff/test.sh +++ b/src/agat/agat_convert_genscan2gff/test.sh @@ -4,21 +4,27 @@ ## VIASH END test_dir="${meta_resources_dir}/test_data" -out_dir="${meta_resources_dir}/out_data" + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp --tmpdir "$meta_temp_dir") +function clean_up { + rm -rf "$TMPDIR" +} +trap clean_up EXIT echo "> Run $meta_name with test data" "$meta_executable" \ --genscan "$test_dir/test.genscan" \ - --output "$out_dir/output.gff" + --output "$TMPDIR/output.gff" echo ">> Checking output" -[ ! -f "$out_dir/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 echo ">> Check if output is empty" -[ ! -s "$out_dir/output.gff" ] && echo "Output file output.gff is empty" && exit 1 +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 echo ">> Check if output matches expected output" -diff "$out_dir/output.gff" "$test_dir/agat_convert_genscan2gff_1.gff" +diff "$TMPDIR/output.gff" "$test_dir/agat_convert_genscan2gff_1.gff" if [ $? -ne 0 ]; then echo "Output file output.gff does not match expected output" exit 1 From 65884578561c262a285f8dc80d3504b3c69c8a51 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 10:45:28 +0200 Subject: [PATCH 14/23] add GENSCAN in keywords --- src/agat/agat_convert_genscan2gff/config.vsh.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/agat/agat_convert_genscan2gff/config.vsh.yaml b/src/agat/agat_convert_genscan2gff/config.vsh.yaml index c183082b..512445ef 100644 --- a/src/agat/agat_convert_genscan2gff/config.vsh.yaml +++ b/src/agat/agat_convert_genscan2gff/config.vsh.yaml @@ -1,8 +1,8 @@ name: agat_convert_genscan2gff namespace: agat description: | - The script takes a genscan file as input, and will translate it in gff - format. The genscan format is described [here](http://genome.crg.es/courses/Bioinformatics2003_genefinding/results/genscan.html). + The script takes a GENSCAN file as input, and will translate it in gff + format. The GENSCAN format is described [here](http://genome.crg.es/courses/Bioinformatics2003_genefinding/results/genscan.html). **Known problem** @@ -12,7 +12,7 @@ description: | nucleotides of the sequence. Then all prediction location are shifted accordingly. (checked only on the [online version](http://argonaute.mit.edu/GENSCAN.html). I don't know if there is the same problem elsewhere.) -keywords: [gene annotations, GFF conversion] +keywords: [gene annotations, GFF conversion, GENSCAN] links: homepage: https://github.com/NBISweden/AGAT documentation: https://agat.readthedocs.io/en/latest/tools/agat_convert_genscan2gff.html From 11ce152a9f785690f086e6ee47c332aed3c5a456 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 11:08:29 +0200 Subject: [PATCH 15/23] add set -e to test --- src/agat/agat_convert_genscan2gff/test.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/agat/agat_convert_genscan2gff/test.sh b/src/agat/agat_convert_genscan2gff/test.sh index e05202c4..39b0376e 100644 --- a/src/agat/agat_convert_genscan2gff/test.sh +++ b/src/agat/agat_convert_genscan2gff/test.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + ## VIASH START ## VIASH END From 3f646ba7c9a0441d66396208f1e0df58ca1c946d Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 11:15:58 +0200 Subject: [PATCH 16/23] fix create temporary directory --- src/agat/agat_convert_genscan2gff/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agat/agat_convert_genscan2gff/test.sh b/src/agat/agat_convert_genscan2gff/test.sh index 39b0376e..798f8251 100644 --- a/src/agat/agat_convert_genscan2gff/test.sh +++ b/src/agat/agat_convert_genscan2gff/test.sh @@ -8,7 +8,7 @@ set -e test_dir="${meta_resources_dir}/test_data" # create temporary directory and clean up on exit -TMPDIR=$(mktemp --tmpdir "$meta_temp_dir") +TMPDIR=$(mktemp -d --tmpdir "$meta_temp_dir") function clean_up { rm -rf "$TMPDIR" } From c471b25afbff5465ab4e3c38e5d972fc1dfb622c Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 13:35:01 +0200 Subject: [PATCH 17/23] add set -eo pipefail to test --- src/agat/agat_convert_genscan2gff/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agat/agat_convert_genscan2gff/test.sh b/src/agat/agat_convert_genscan2gff/test.sh index 798f8251..7f818b06 100644 --- a/src/agat/agat_convert_genscan2gff/test.sh +++ b/src/agat/agat_convert_genscan2gff/test.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -e +set -eo pipefail ## VIASH START ## VIASH END From e4a814fd285460338d9a96152ba3cda15f26049d Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 13:37:00 +0200 Subject: [PATCH 18/23] add set -eo pipefail to script --- src/agat/agat_convert_genscan2gff/script.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/agat/agat_convert_genscan2gff/script.sh b/src/agat/agat_convert_genscan2gff/script.sh index e49050fc..38afb084 100644 --- a/src/agat/agat_convert_genscan2gff/script.sh +++ b/src/agat/agat_convert_genscan2gff/script.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -eo pipefail + ## VIASH START ## VIASH END From 30fd75ef1addf983c82f3d9efd7bd505b7805290 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 13:38:15 +0200 Subject: [PATCH 19/23] fix create temporary directory --- src/agat/agat_convert_genscan2gff/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agat/agat_convert_genscan2gff/test.sh b/src/agat/agat_convert_genscan2gff/test.sh index 7f818b06..a4eda31f 100644 --- a/src/agat/agat_convert_genscan2gff/test.sh +++ b/src/agat/agat_convert_genscan2gff/test.sh @@ -8,9 +8,9 @@ set -eo pipefail test_dir="${meta_resources_dir}/test_data" # create temporary directory and clean up on exit -TMPDIR=$(mktemp -d --tmpdir "$meta_temp_dir") +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") function clean_up { - rm -rf "$TMPDIR" + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" } trap clean_up EXIT From 22a98f047d27d87112dd9fc095ce0f5222a81c7a Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 14:08:16 +0200 Subject: [PATCH 20/23] update --config description --- src/agat/agat_convert_genscan2gff/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agat/agat_convert_genscan2gff/config.vsh.yaml b/src/agat/agat_convert_genscan2gff/config.vsh.yaml index 512445ef..2adce1da 100644 --- a/src/agat/agat_convert_genscan2gff/config.vsh.yaml +++ b/src/agat/agat_convert_genscan2gff/config.vsh.yaml @@ -71,7 +71,7 @@ argument_groups: - name: --config alternatives: [-c] description: | - AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The `--config` option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). type: file required: false example: custom_agat_config.yaml From 1a1b75036d867c126eee9de3e9d327eda6700d04 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 15:10:42 +0200 Subject: [PATCH 21/23] cleanup changelog --- CHANGELOG.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb1c2729..cc935b38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,18 +21,14 @@ * `agat`: - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). - - `/agat_convert_bed2gff`: convert bed file to gff format (PR #97). + - `agat_convert_bed2gff`: convert bed file to gff format (PR #97). + - `agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). + - `agat_convert_genscan2gff`: convert a genscan file into a GFF file (PR #100). * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). - -* `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - -* `agat/agat_convert_genscan2gff`: convert a genscan file into a GFF file (PR #100). - - ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). From 8895d08d6f71c420145ffc1ff3000c3ceeed8b8e Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 15:18:23 +0200 Subject: [PATCH 22/23] cleanup changelog --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc935b38..5b63fbb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,10 +20,10 @@ based on a provided sequence IDs or region coordinates file (PR #85). * `agat`: - - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). - - `agat_convert_bed2gff`: convert bed file to gff format (PR #97). - - `agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - - `agat_convert_genscan2gff`: convert a genscan file into a GFF file (PR #100). + - `agat/agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). + - `agat/agat_convert_bed2gff`: convert bed file to gff format (PR #97). + - `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). + - `agat/agat_convert_genscan2gff`: convert a genscan file into a GFF file (PR #100). * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). From 1814c3e69455e0cd266f3fbba13386ddd0432d14 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 16 Sep 2024 10:16:55 +0200 Subject: [PATCH 23/23] Update deprecated variable --- src/agat/agat_convert_genscan2gff/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agat/agat_convert_genscan2gff/test.sh b/src/agat/agat_convert_genscan2gff/test.sh index a4eda31f..b666dacf 100644 --- a/src/agat/agat_convert_genscan2gff/test.sh +++ b/src/agat/agat_convert_genscan2gff/test.sh @@ -8,7 +8,7 @@ set -eo pipefail test_dir="${meta_resources_dir}/test_data" # create temporary directory and clean up on exit -TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXX") function clean_up { [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" } @@ -32,4 +32,4 @@ if [ $? -ne 0 ]; then exit 1 fi -echo "> Test successful" \ No newline at end of file +echo "> Test successful"