From 625241e28b08316539235599f66ae5c8723d82cb Mon Sep 17 00:00:00 2001 From: Leila011 Date: Wed, 31 Jul 2024 13:57:13 +0200 Subject: [PATCH 01/21] add help --- src/agat/agat_sp_merge_annotations/help.txt | 64 +++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 src/agat/agat_sp_merge_annotations/help.txt diff --git a/src/agat/agat_sp_merge_annotations/help.txt b/src/agat/agat_sp_merge_annotations/help.txt new file mode 100644 index 00000000..2a17e7e4 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/help.txt @@ -0,0 +1,64 @@ +```sh +agat_sp_merge_annotations.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_sp_merge_annotations.pl + +Description: + This script merge different gff annotation files in one. It uses the + AGAT parser that takes care of duplicated names and fixes other oddities + met in those files. + +Usage: + agat_sp_merge_annotations.pl --gff infile1 --gff infile2 --out outFile + agat_sp_merge_annotations.pl --help + +Options: + --gff or -f + Input GTF/GFF file(s). You can specify as much file you want + like so: -f file1 -f file2 -f file3 + + --out, --output or -o + Output gff3 file where the gene incriminated will be write. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + --help or -h + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md From 43dc3eb91c71e4b08f99c1b31fc72247d4c20010 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Wed, 31 Jul 2024 14:04:30 +0200 Subject: [PATCH 02/21] add config --- .../agat_sp_merge_annotations/config.vsh.yaml | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 src/agat/agat_sp_merge_annotations/config.vsh.yaml diff --git a/src/agat/agat_sp_merge_annotations/config.vsh.yaml b/src/agat/agat_sp_merge_annotations/config.vsh.yaml new file mode 100644 index 00000000..2997ec48 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/config.vsh.yaml @@ -0,0 +1,71 @@ +name: agat_sp_merge_annotations +namespace: agat +description: | + This script merge different gff annotation files in one. It uses the + AGAT parser that takes care of duplicated names and fixes other oddities + met in those files. +keywords: [gene annotations] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_sp_merge_annotations.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] +argument_groups: + - name: Inputs + arguments: + - name: --gff + alternatives: [-f] + description: | + Input GTF/GFF file(s). You can specify as much file you want like so: -f file1 -f file2 -f file3 + type: file + required: true + direction: input + example: input.gff + - name: Outputs + arguments: + - name: --output + alternatives: [-o, --out] + description: Output gff3 file where the gene incriminated will be write. + type: file + direction: output + required: true + example: output.gff + - name: Arguments + arguments: + - name: --config + alternatives: [-c] + description: | + String - Input AGAT config file. By default, AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the original agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + type: file + required: false + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file From 5375f5938711b058a9e8e028998d4a76c7438bef Mon Sep 17 00:00:00 2001 From: Leila011 Date: Wed, 31 Jul 2024 14:11:35 +0200 Subject: [PATCH 03/21] add test data and expected output + srcipt to fetch them --- .../test_data/agat_sp_merge_annotations_1.gff | 13 +++++++++++++ .../agat_sp_merge_annotations/test_data/file1.gff | 14 ++++++++++++++ .../agat_sp_merge_annotations/test_data/file2.gff | 12 ++++++++++++ .../agat_sp_merge_annotations/test_data/script.sh | 11 +++++++++++ 4 files changed, 50 insertions(+) create mode 100644 src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_1.gff create mode 100644 src/agat/agat_sp_merge_annotations/test_data/file1.gff create mode 100644 src/agat/agat_sp_merge_annotations/test_data/file2.gff create mode 100755 src/agat/agat_sp_merge_annotations/test_data/script.sh diff --git a/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_1.gff b/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_1.gff new file mode 100644 index 00000000..5f68f1f3 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_1.gff @@ -0,0 +1,13 @@ +##gff-version 3 +chr10 BestRefSeq gene 123237824 123357992 . - . ID=gene-FGFR2;ontology=G0222 +chr10 BestRefSeq mRNA 123237824 123357992 . - . ID=rna-NM_022970.3;Parent=gene-FGFR2;ontology=G0222;merged_ID=IDmodified-mrna-1;merged_Ontology=G0333;merged_Parent=IDmodified-gene-1 +chr10 BestRefSeq exon 123237824 123239535 . - . ID=exon-NM_022970.3-18;Parent=rna-NM_022970.3 +chr10 BestRefSeq exon 123243212 123243317 . - . ID=exon-NM_022970.3-17;Parent=rna-NM_022970.3 +chr10 BestRefSeq exon 123353223 123353481 . - . ID=exon-NM_022970.3-2;Parent=rna-NM_022970.3 +chr10 BestRefSeq exon 123357476 123357992 . - . ID=exon-NM_022970.3-1;Parent=rna-NM_022970.3 +chr10 BestRefSeq CDS 123239371 123239535 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3 +chr10 BestRefSeq CDS 123243212 123243317 . - 1 ID=cds-NP_075259.4;Parent=rna-NM_022970.3 +chr10 BestRefSeq CDS 123353223 123353331 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3 +chr10 BestRefSeq five_prime_UTR 123353332 123353481 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3 +chr10 BestRefSeq five_prime_UTR 123357476 123357992 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3 +chr10 BestRefSeq three_prime_UTR 123237824 123239370 . - . ID=agat-three_prime_utr-54427;Parent=rna-NM_022970.3 diff --git a/src/agat/agat_sp_merge_annotations/test_data/file1.gff b/src/agat/agat_sp_merge_annotations/test_data/file1.gff new file mode 100644 index 00000000..d822ebfa --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/file1.gff @@ -0,0 +1,14 @@ +chr10 BestRefSeq gene 123237824 123357992 . - . ID=gene-FGFR2;Ontology=G0222; +chr10 BestRefSeq mRNA 123237824 123357992 . - . ID=rna-NM_022970.3;Parent=gene-FGFR2;Ontology=G0222; +chr10 BestRefSeq exon 123237824 123239535 . - . ID=exon-NM_022970.3-18;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123243212 123243317 . - . ID=exon-NM_022970.3-17;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123353223 123353481 . - . ID=exon-NM_022970.3-2;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123357476 123357992 . - . ID=exon-NM_022970.3-1;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123239371 123239535 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123243212 123243317 . - 1 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123353223 123353331 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq five_prime_UTR 123353332 123353481 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3; +chr10 BestRefSeq five_prime_UTR 123357476 123357992 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3; +chr10 BestRefSeq three_prime_UTR 123237824 123239370 . - . ID=agat-three_prime_utr-54427;Parent=rna-NM_022970.3; + + \ No newline at end of file diff --git a/src/agat/agat_sp_merge_annotations/test_data/file2.gff b/src/agat/agat_sp_merge_annotations/test_data/file2.gff new file mode 100644 index 00000000..f072e1b3 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/file2.gff @@ -0,0 +1,12 @@ +chr10 BestRefSeq gene 123237824 123357992 . - . ID=gene-FGFR2;Ontology=G0222; +chr10 BestRefSeq mRNA 123237824 123357992 . - . ID=rna-NM_022970.3;Parent=gene-FGFR2;Ontology=G0333; +chr10 BestRefSeq exon 123237824 123239535 . - . ID=exon-NM_022970.3-18;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123243212 123243317 . - . ID=exon-NM_022970.3-17;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123353223 123353481 . - . ID=exon-NM_022970.3-2;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123357476 123357992 . - . ID=exon-NM_022970.3-1;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123239371 123239535 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123243212 123243317 . - 1 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123353223 123353331 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq five_prime_UTR 123353332 123353481 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3; +chr10 BestRefSeq five_prime_UTR 123357476 123357992 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3; +chr10 BestRefSeq three_prime_UTR 123237824 123239370 . - . ID=agat-three_prime_utr-54427;Parent=rna-NM_022970.3; \ No newline at end of file diff --git a/src/agat/agat_sp_merge_annotations/test_data/script.sh b/src/agat/agat_sp_merge_annotations/test_data/script.sh new file mode 100755 index 00000000..225119e0 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/script.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_merge_annotations/file1.gff src/agat/agat_sp_merge_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_merge_annotations/file2.gff src/agat/agat_sp_merge_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_merge_annotations_1.gff src/agat/agat_sp_merge_annotations/test_data From 11d6afa99c5efc1f3e1b6aff267c2c1b330685c2 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Wed, 31 Jul 2024 15:22:50 +0200 Subject: [PATCH 04/21] add run script and handle multiple inputs --- .../agat_sp_merge_annotations/config.vsh.yaml | 5 +++-- src/agat/agat_sp_merge_annotations/script.sh | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 src/agat/agat_sp_merge_annotations/script.sh diff --git a/src/agat/agat_sp_merge_annotations/config.vsh.yaml b/src/agat/agat_sp_merge_annotations/config.vsh.yaml index 2997ec48..a40d1d27 100644 --- a/src/agat/agat_sp_merge_annotations/config.vsh.yaml +++ b/src/agat/agat_sp_merge_annotations/config.vsh.yaml @@ -22,11 +22,12 @@ argument_groups: - name: --gff alternatives: [-f] description: | - Input GTF/GFF file(s). You can specify as much file you want like so: -f file1 -f file2 -f file3 + Input GTF/GFF file(s). type: file + multiple: true required: true direction: input - example: input.gff + example: input1.gff;input2.gff - name: Outputs arguments: - name: --output diff --git a/src/agat/agat_sp_merge_annotations/script.sh b/src/agat/agat_sp_merge_annotations/script.sh new file mode 100644 index 00000000..2613dde4 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/script.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +# Convert a list of file names to multiple -gff arguments +input_files="" +IFS=";" read -ra file_names <<< "$par_gff" +for file in "${file_names[@]}"; do + input_files+="-gff $file " +done +unset IFS + +# run agat_sp_merge_annotations +agat_sp_merge_annotations.pl \ + $input_files \ + -o "$par_output" \ + ${par_config:+--config "${par_config}"} From 7761dd36499865314ae5a92f97064212e5eec15e Mon Sep 17 00:00:00 2001 From: Leila011 Date: Wed, 31 Jul 2024 15:22:56 +0200 Subject: [PATCH 05/21] add test --- src/agat/agat_sp_merge_annotations/test.sh | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/agat/agat_sp_merge_annotations/test.sh diff --git a/src/agat/agat_sp_merge_annotations/test.sh b/src/agat/agat_sp_merge_annotations/test.sh new file mode 100644 index 00000000..73f3ff79 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" +out_dir="${meta_resources_dir}/out_data" + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --gff "$test_dir/file1.gff;$test_dir/file2.gff" \ + --output "$out_dir/output.gff" + +echo ">> Checking output" +[ ! -f "$out_dir/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$out_dir/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$out_dir/output.gff" "$test_dir/agat_sp_merge_annotations_1.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file From 5e3b25ba9fccb06b2182e86fce74cef56940744e Mon Sep 17 00:00:00 2001 From: Leila011 Date: Wed, 31 Jul 2024 15:25:50 +0200 Subject: [PATCH 06/21] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4575cb9..e23d2345 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ * `agat/agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). +* `agat/agat_sp_merge_annotations`: merge different gff annotation files in one (PR #106). + ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). From ea35b1d4e88b539b53928254bf7317783c0c1dc4 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Wed, 31 Jul 2024 15:28:01 +0200 Subject: [PATCH 07/21] fix typo --- src/agat/agat_sp_merge_annotations/script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agat/agat_sp_merge_annotations/script.sh b/src/agat/agat_sp_merge_annotations/script.sh index 2613dde4..078eca04 100644 --- a/src/agat/agat_sp_merge_annotations/script.sh +++ b/src/agat/agat_sp_merge_annotations/script.sh @@ -7,7 +7,7 @@ input_files="" IFS=";" read -ra file_names <<< "$par_gff" for file in "${file_names[@]}"; do - input_files+="-gff $file " + input_files+="--gff $file " done unset IFS From 19932770df6b6cb9b8345ed2eceb99d5fc033791 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Thu, 8 Aug 2024 09:02:52 +0200 Subject: [PATCH 08/21] add second test --- src/agat/agat_sp_merge_annotations/test.sh | 22 ++++++++++++++++++- .../test_data/agat_sp_merge_annotations_2.gff | 3 +++ .../test_data/fileA.gff | 2 ++ .../test_data/fileB.gff | 2 ++ .../test_data/script.sh | 4 ++++ 5 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_2.gff create mode 100644 src/agat/agat_sp_merge_annotations/test_data/fileA.gff create mode 100644 src/agat/agat_sp_merge_annotations/test_data/fileB.gff diff --git a/src/agat/agat_sp_merge_annotations/test.sh b/src/agat/agat_sp_merge_annotations/test.sh index 73f3ff79..bcf4dbf0 100644 --- a/src/agat/agat_sp_merge_annotations/test.sh +++ b/src/agat/agat_sp_merge_annotations/test.sh @@ -6,7 +6,7 @@ test_dir="${meta_resources_dir}/test_data" out_dir="${meta_resources_dir}/out_data" -echo "> Run $meta_name with test data" +echo "> Run $meta_name with test data 1" "$meta_executable" \ --gff "$test_dir/file1.gff;$test_dir/file2.gff" \ --output "$out_dir/output.gff" @@ -24,4 +24,24 @@ if [ $? -ne 0 ]; then exit 1 fi +rm -rf "$out_dir/output.gff" + +echo "> Run $meta_name with test data 2" +"$meta_executable" \ + --gff "$test_dir/fileA.gff;$test_dir/fileB.gff" \ + --output "$out_dir/output.gff" + +echo ">> Checking output" +[ ! -f "$out_dir/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$out_dir/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$out_dir/output.gff" "$test_dir/agat_sp_merge_annotations_2.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_2.gff b/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_2.gff new file mode 100644 index 00000000..1c3846b2 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_2.gff @@ -0,0 +1,3 @@ +##gff-version 3 +chr1 AUGUSTUS gene 1000424 1039237 . + . ID=A +chr1 AUGUSTUS mRNA 1000424 1039237 . + . ID=A.t1;Parent=A;merged_ID=B.t1;merged_Parent=B diff --git a/src/agat/agat_sp_merge_annotations/test_data/fileA.gff b/src/agat/agat_sp_merge_annotations/test_data/fileA.gff new file mode 100644 index 00000000..03b2d16d --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/fileA.gff @@ -0,0 +1,2 @@ +chr1 AUGUSTUS gene 1000424 1039237 . + . ID=A; +chr1 AUGUSTUS mRNA 1000424 1039237 . + . ID=A.t1;Parent=A; diff --git a/src/agat/agat_sp_merge_annotations/test_data/fileB.gff b/src/agat/agat_sp_merge_annotations/test_data/fileB.gff new file mode 100644 index 00000000..e796e5f0 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/fileB.gff @@ -0,0 +1,2 @@ +chr1 AUGUSTUS gene 1000424 1039237 . + . ID=B; +chr1 AUGUSTUS mRNA 1000424 1039237 . + . ID=B.t1;Parent=B; diff --git a/src/agat/agat_sp_merge_annotations/test_data/script.sh b/src/agat/agat_sp_merge_annotations/test_data/script.sh index 225119e0..0d3acae7 100755 --- a/src/agat/agat_sp_merge_annotations/test_data/script.sh +++ b/src/agat/agat_sp_merge_annotations/test_data/script.sh @@ -9,3 +9,7 @@ fi cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_merge_annotations/file1.gff src/agat/agat_sp_merge_annotations/test_data cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_merge_annotations/file2.gff src/agat/agat_sp_merge_annotations/test_data cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_merge_annotations_1.gff src/agat/agat_sp_merge_annotations/test_data + +cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_merge_annotations/fileA.gff src/agat/agat_sp_merge_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_merge_annotations/fileB.gff src/agat/agat_sp_merge_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_merge_annotations_2.gff src/agat/agat_sp_merge_annotations/test_data \ No newline at end of file From ca292ad9cced26ac9ac9336fed4aa29d7e367c78 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 09:18:24 +0200 Subject: [PATCH 09/21] Update src/agat/agat_sp_merge_annotations/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> --- src/agat/agat_sp_merge_annotations/config.vsh.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/agat/agat_sp_merge_annotations/config.vsh.yaml b/src/agat/agat_sp_merge_annotations/config.vsh.yaml index a40d1d27..dc876820 100644 --- a/src/agat/agat_sp_merge_annotations/config.vsh.yaml +++ b/src/agat/agat_sp_merge_annotations/config.vsh.yaml @@ -13,6 +13,8 @@ links: references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 +requirements: + commands: [agat] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] From 3eaf1969914c1706e7de61b537de99bd3a38640e Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 09:18:33 +0200 Subject: [PATCH 10/21] Update src/agat/agat_sp_merge_annotations/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> --- src/agat/agat_sp_merge_annotations/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agat/agat_sp_merge_annotations/config.vsh.yaml b/src/agat/agat_sp_merge_annotations/config.vsh.yaml index dc876820..1d1b7c18 100644 --- a/src/agat/agat_sp_merge_annotations/config.vsh.yaml +++ b/src/agat/agat_sp_merge_annotations/config.vsh.yaml @@ -1,7 +1,7 @@ name: agat_sp_merge_annotations namespace: agat description: | - This script merge different gff annotation files in one. It uses the + Merge different gff annotation files into one. It uses the AGAT parser that takes care of duplicated names and fixes other oddities met in those files. keywords: [gene annotations] From d4aa71c6fe7a3b43273ae4e224c6f25888133ca3 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 09:18:43 +0200 Subject: [PATCH 11/21] Update src/agat/agat_sp_merge_annotations/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> --- src/agat/agat_sp_merge_annotations/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agat/agat_sp_merge_annotations/config.vsh.yaml b/src/agat/agat_sp_merge_annotations/config.vsh.yaml index 1d1b7c18..8b864589 100644 --- a/src/agat/agat_sp_merge_annotations/config.vsh.yaml +++ b/src/agat/agat_sp_merge_annotations/config.vsh.yaml @@ -4,7 +4,7 @@ description: | Merge different gff annotation files into one. It uses the AGAT parser that takes care of duplicated names and fixes other oddities met in those files. -keywords: [gene annotations] +keywords: [gene annotations, merge, gff] links: homepage: https://github.com/NBISweden/AGAT documentation: https://agat.readthedocs.io/en/latest/tools/agat_sp_merge_annotations.html From cfda34814e25b0a73c678b59067f39e970148247 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 09:18:59 +0200 Subject: [PATCH 12/21] Update src/agat/agat_sp_merge_annotations/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> --- src/agat/agat_sp_merge_annotations/config.vsh.yaml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/agat/agat_sp_merge_annotations/config.vsh.yaml b/src/agat/agat_sp_merge_annotations/config.vsh.yaml index 8b864589..4ed5977f 100644 --- a/src/agat/agat_sp_merge_annotations/config.vsh.yaml +++ b/src/agat/agat_sp_merge_annotations/config.vsh.yaml @@ -44,13 +44,7 @@ argument_groups: - name: --config alternatives: [-c] description: | - String - Input AGAT config file. By default, AGAT takes as input - agat_config.yaml file from the working directory if any, - otherwise it takes the original agat_config.yaml shipped with - AGAT. To get the agat_config.yaml locally type: "agat config - --expose". The --config option gives you the possibility to use - your own AGAT config file (located elsewhere or named - differently). + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). type: file required: false example: custom_agat_config.yaml From 29501fb84e34a0aa2f2039af2cb0ac1293652cfd Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 09:31:59 +0200 Subject: [PATCH 13/21] update --config description --- src/agat/agat_sp_merge_annotations/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agat/agat_sp_merge_annotations/config.vsh.yaml b/src/agat/agat_sp_merge_annotations/config.vsh.yaml index 4ed5977f..1df86aaf 100644 --- a/src/agat/agat_sp_merge_annotations/config.vsh.yaml +++ b/src/agat/agat_sp_merge_annotations/config.vsh.yaml @@ -44,7 +44,7 @@ argument_groups: - name: --config alternatives: [-c] description: | - AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The `--config` option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). type: file required: false example: custom_agat_config.yaml From ead79bb93a5430ddeb168aff95a4f37b863820f5 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 09:45:49 +0200 Subject: [PATCH 14/21] remove unset IFS --- src/agat/agat_sp_merge_annotations/script.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/agat/agat_sp_merge_annotations/script.sh b/src/agat/agat_sp_merge_annotations/script.sh index 078eca04..7e3284db 100644 --- a/src/agat/agat_sp_merge_annotations/script.sh +++ b/src/agat/agat_sp_merge_annotations/script.sh @@ -9,7 +9,6 @@ IFS=";" read -ra file_names <<< "$par_gff" for file in "${file_names[@]}"; do input_files+="--gff $file " done -unset IFS # run agat_sp_merge_annotations agat_sp_merge_annotations.pl \ From 188c69e3ce9fb31ef207f31d82efe068862b5b6e Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 10:02:07 +0200 Subject: [PATCH 15/21] add temporary directory and cleanup on exit --- src/agat/agat_sp_merge_annotations/test.sh | 27 ++++++++++++++-------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/agat/agat_sp_merge_annotations/test.sh b/src/agat/agat_sp_merge_annotations/test.sh index bcf4dbf0..00ff1997 100644 --- a/src/agat/agat_sp_merge_annotations/test.sh +++ b/src/agat/agat_sp_merge_annotations/test.sh @@ -4,41 +4,48 @@ ## VIASH END test_dir="${meta_resources_dir}/test_data" -out_dir="${meta_resources_dir}/out_data" + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp --tmpdir "$meta_temp_dir") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT echo "> Run $meta_name with test data 1" "$meta_executable" \ --gff "$test_dir/file1.gff;$test_dir/file2.gff" \ - --output "$out_dir/output.gff" + --output "$TMPDIR/output.gff" echo ">> Checking output" -[ ! -f "$out_dir/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 echo ">> Check if output is empty" -[ ! -s "$out_dir/output.gff" ] && echo "Output file output.gff is empty" && exit 1 +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 echo ">> Check if output matches expected output" -diff "$out_dir/output.gff" "$test_dir/agat_sp_merge_annotations_1.gff" +diff "$TMPDIR/output.gff" "$test_dir/agat_sp_merge_annotations_1.gff" if [ $? -ne 0 ]; then echo "Output file output.gff does not match expected output" exit 1 fi -rm -rf "$out_dir/output.gff" +echo ">> cleanup" +rm -rf "$TMPDIR/output.gff" echo "> Run $meta_name with test data 2" "$meta_executable" \ --gff "$test_dir/fileA.gff;$test_dir/fileB.gff" \ - --output "$out_dir/output.gff" + --output "$TMPDIR/output.gff" echo ">> Checking output" -[ ! -f "$out_dir/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 echo ">> Check if output is empty" -[ ! -s "$out_dir/output.gff" ] && echo "Output file output.gff is empty" && exit 1 +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 echo ">> Check if output matches expected output" -diff "$out_dir/output.gff" "$test_dir/agat_sp_merge_annotations_2.gff" +diff "$TMPDIR/output.gff" "$test_dir/agat_sp_merge_annotations_2.gff" if [ $? -ne 0 ]; then echo "Output file output.gff does not match expected output" exit 1 From face09b0340dc9e1df7bb35a56070880e12759fe Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 10:12:58 +0200 Subject: [PATCH 16/21] update clean up on exit function --- src/agat/agat_sp_merge_annotations/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agat/agat_sp_merge_annotations/test.sh b/src/agat/agat_sp_merge_annotations/test.sh index 00ff1997..f8282f54 100644 --- a/src/agat/agat_sp_merge_annotations/test.sh +++ b/src/agat/agat_sp_merge_annotations/test.sh @@ -8,7 +8,7 @@ test_dir="${meta_resources_dir}/test_data" # create temporary directory and clean up on exit TMPDIR=$(mktemp --tmpdir "$meta_temp_dir") function clean_up { - [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" + rm -rf "$TMPDIR" } trap clean_up EXIT From b36eb36a8988e8fe1521aa86f15188dd9b6eca33 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 14:01:40 +0200 Subject: [PATCH 17/21] add set -eo pipefail to test and script --- src/agat/agat_sp_merge_annotations/script.sh | 2 ++ src/agat/agat_sp_merge_annotations/test.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/agat/agat_sp_merge_annotations/script.sh b/src/agat/agat_sp_merge_annotations/script.sh index 7e3284db..5703745a 100644 --- a/src/agat/agat_sp_merge_annotations/script.sh +++ b/src/agat/agat_sp_merge_annotations/script.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -eo pipefail + ## VIASH START ## VIASH END diff --git a/src/agat/agat_sp_merge_annotations/test.sh b/src/agat/agat_sp_merge_annotations/test.sh index f8282f54..ff13a1f4 100644 --- a/src/agat/agat_sp_merge_annotations/test.sh +++ b/src/agat/agat_sp_merge_annotations/test.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -eo pipefail + ## VIASH START ## VIASH END From cffbf33db59ee0d71dd1fe646a1b2b5f4653179d Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 14:02:14 +0200 Subject: [PATCH 18/21] fix create temporary directory --- src/agat/agat_sp_merge_annotations/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agat/agat_sp_merge_annotations/test.sh b/src/agat/agat_sp_merge_annotations/test.sh index ff13a1f4..7b882717 100644 --- a/src/agat/agat_sp_merge_annotations/test.sh +++ b/src/agat/agat_sp_merge_annotations/test.sh @@ -8,9 +8,9 @@ set -eo pipefail test_dir="${meta_resources_dir}/test_data" # create temporary directory and clean up on exit -TMPDIR=$(mktemp --tmpdir "$meta_temp_dir") +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") function clean_up { - rm -rf "$TMPDIR" + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" } trap clean_up EXIT From 277765f107878a1fbcbe40dfab809bb48caa52d5 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 15:13:31 +0200 Subject: [PATCH 19/21] cleanup changelog --- CHANGELOG.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4553e40d..453ef35e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,17 +21,14 @@ * `agat`: - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). - - `/agat_convert_bed2gff`: convert bed file to gff format (PR #97). + - `agat_convert_bed2gff`: convert bed file to gff format (PR #97). + - `agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). + - `agat_sp_merge_annotations`: merge different gff annotation files in one (PR #106). * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). - -* `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - -* `agat/agat_sp_merge_annotations`: merge different gff annotation files in one (PR #106). - ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). From d7b6a209e0fa27935e817d669a97093f75a595f0 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Mon, 19 Aug 2024 15:19:44 +0200 Subject: [PATCH 20/21] cleanup changelog --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 453ef35e..b7c3aee4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,10 +20,10 @@ based on a provided sequence IDs or region coordinates file (PR #85). * `agat`: - - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). - - `agat_convert_bed2gff`: convert bed file to gff format (PR #97). - - `agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - - `agat_sp_merge_annotations`: merge different gff annotation files in one (PR #106). + - `agat/agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). + - `agat/agat_convert_bed2gff`: convert bed file to gff format (PR #97). + - `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). + - `agat/agat_sp_merge_annotations`: merge different gff annotation files in one (PR #106). * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). From 90db9f1b0841e66378af7a857dfab3a7c1c107d4 Mon Sep 17 00:00:00 2001 From: Emma Rousseau Date: Sat, 26 Oct 2024 12:29:09 +0200 Subject: [PATCH 21/21] Minor formatting changes --- src/agat/agat_sp_merge_annotations/config.vsh.yaml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/agat/agat_sp_merge_annotations/config.vsh.yaml b/src/agat/agat_sp_merge_annotations/config.vsh.yaml index 1df86aaf..bc47921a 100644 --- a/src/agat/agat_sp_merge_annotations/config.vsh.yaml +++ b/src/agat/agat_sp_merge_annotations/config.vsh.yaml @@ -1,9 +1,8 @@ name: agat_sp_merge_annotations namespace: agat description: | - Merge different gff annotation files into one. It uses the - AGAT parser that takes care of duplicated names and fixes other oddities - met in those files. + Merge different gff annotation files into one. It uses the AGAT parser that takes care of + duplicated names and fixes other oddities met in those files. keywords: [gene annotations, merge, gff] links: homepage: https://github.com/NBISweden/AGAT @@ -28,13 +27,12 @@ argument_groups: type: file multiple: true required: true - direction: input example: input1.gff;input2.gff - name: Outputs arguments: - name: --output alternatives: [-o, --out] - description: Output gff3 file where the gene incriminated will be write. + description: Output gff3 file where the gene incriminated will be writen. type: file direction: output required: true @@ -44,9 +42,10 @@ argument_groups: - name: --config alternatives: [-c] description: | - AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The `--config` option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. + The `--config` option gives you the possibility to use your own AGAT config file (located + elsewhere or named differently). type: file - required: false example: custom_agat_config.yaml resources: - type: bash_script