diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bd21a1e..b3eeea96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ - `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - `agat/agat_convert_sp_gff2tsv`: convert gtf/gff file into tabulated file (PR #102). - `agat/agat_convert_sp_gxf2gxf`: fixes and/or standardizes any GTF/GFF file into full sorted GTF/GFF file (PR #103). + - `agat/agat_sp_complement_annotations`: complement a reference annotation with other annotations (PR #129). * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). diff --git a/src/agat/agat_sp_complement_annotations/config.vsh.yaml b/src/agat/agat_sp_complement_annotations/config.vsh.yaml new file mode 100644 index 00000000..aff815f7 --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/config.vsh.yaml @@ -0,0 +1,95 @@ +name: agat_sp_complement_annotations +namespace: agat +description: | + The script allows to complement a reference annotation with other annotations. + + * A l1 feature from the addfile.gff that does not overlap a l1 feature from the reference annotation will be added. + * A l1 feature from the addfile.gff without a CDS that overlaps a l1 feature with a CDS from the reference annotation will be added. + * A l1 feature from the addfile.gff with a CDS that overlaps a l1 feature without a CDS from the reference annotation will be added. + * A l1 feature from the addfile.gff with a CDS that overlaps a l1 feature with a CDS from the reference annotation will be added only if the CDSs don't overlap. + * A l1 feature from the addfile.gff without a CDS that overlaps a l1 feature without a CDS from the reference annotation will be added only if none of the l3 features overlap. + + ! It is sufficient that only one isoform is overlapping to prevent the whole gene (l1 feature) from the addfile.gff to be added in the output. +keywords: [gene annotations, GFF] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_sp_complement_annotations.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +requirements: + - commands: [agat] +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] +argument_groups: + - name: Inputs + arguments: + - name: --ref + alternatives: [-r, -i] + description: Input GTF/GFF file used as reference. + type: file + required: true + direction: input + example: reference.gff + - name: --add + alternatives: [-a] + description: | + Annotation(s) file you would like to use to complement the + reference annotation. You can specify as much file you want like. + The order you provide these files matter. Once the reference file has been + complemented by file1, this new annotation becomes the new + reference that will be complemented by file2 etc. + So, be aware of what you want if you use several addfiles. + type: file + required: true + direction: input + multiple: true + example: addfile1.gff + - name: Outputs + arguments: + - name: --output + alternatives: [-o, --out, --outfile] + description: Output gff3 containing the reference annotation with all the non-overlapping newly added genes from addfiles.gff. + type: file + direction: output + required: true + example: output.gff + - name: Arguments + arguments: + - name: --size_min + alternatives: [-s] + description: | + Option to keep the non-overlapping gene only if the CDS size (in + nucleotide) is over the minimum size defined. Default = 0 that + means all of them are kept. + type: integer + required: false + example: 100 + - name: --config + alternatives: [-c] + description: | + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The `--config` option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + required: false + example: custom_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_sp_complement_annotations/help.txt b/src/agat/agat_sp_complement_annotations/help.txt new file mode 100644 index 00000000..5db5e40a --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/help.txt @@ -0,0 +1,91 @@ +```sh +agat_sp_complement_annotations.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_sp_complement_annotations.pl + +Description: + The script allows to complement a reference annotation with other + annotations. A l1 feature from the addfile.gff that does not overlap a + l1 feature from the reference annotation will be added. A l1 feature + from the addfile.gff without a CDS that overlaps a l1 feature with a CDS + from the reference annotation will be added. A l1 feature from the + addfile.gff with a CDS that overlaps a l1 feature without a CDS from the + reference annotation will be added. A l1 feature from the addfile.gff + with a CDS that overlaps a l1 feature with a CDS from the reference + annotation will be added only if the CDSs don't overlap. A l1 feature + from the addfile.gff without a CDS that overlaps a l1 feature without a + CDS from the reference annotation will be added only if none of the l3 + features overlap. /!\ It is sufficiant that only one isoform is + overlapping to prevent the whole gene (l1 feature) from the addfile.gff + to be added in the output. + +Usage: + agat_sp_complement_annotations.pl --ref annotation_ref.gff --add addfile1.gff --add addfile2.gff --out outFile + agat_sp_complement_annotations.pl --help + +Options: + --ref, -r or -i + Input GTF/GFF file used as reference. + + --add or -a + Annotation(s) file you would like to use to complement the + reference annotation. You can specify as much file you want like + so: -a addfile1 -a addfile2 -a addfile3 /!\ The order you + provide these files matter. Once the reference file has been + complemented by file1, this new annotation becomes the new + reference that will be complemented by file2 etc. /!\ The result + with -a addfile1 -a addfile2 will differ to the result from -a + addfile2 -a addfile1. So, be aware of what you want if you use + several addfiles. + + --size_min or -s + Option to keep the non-overlping gene only if the CDS size (in + nucleotide) is over the minimum size defined. Default = 0 that + means all of them are kept. + + --out, --output, --outfile or -o + Output gff3 containing the reference annotation with all the + non-overlapping newly added genes from addfiles.gff. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + --help or -h + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md diff --git a/src/agat/agat_sp_complement_annotations/script.sh b/src/agat/agat_sp_complement_annotations/script.sh new file mode 100644 index 00000000..36a668a8 --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/script.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +# unset flags +[[ "$par_verbose" == "false" ]] && unset par_verbose + +# Convert a list of file names to multiple --add arguments +input_files="" +IFS=";" read -ra file_names <<< "$par_add" +for file in "${file_names[@]}"; do + input_files+="--add $file " +done + +# run agat_sp_complement_annotations.pl +agat_sp_complement_annotations.pl \ + --ref "$par_ref" \ + $input_files \ + -o "$par_output" \ + ${par_size_min:+--size_min "${par_size_min}"} \ + ${par_config:+--config "${par_config}"} \ + ${par_verbose:+--verbose} diff --git a/src/agat/agat_sp_complement_annotations/test.sh b/src/agat/agat_sp_complement_annotations/test.sh new file mode 100644 index 00000000..14e03713 --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/test.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" +} +trap clean_up EXIT + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --ref "$test_dir/25_test.gff" \ + --add "$test_dir/9_test.gff" \ + --output "$TMPDIR/output.gff" + +echo ">> Checking output" +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$TMPDIR/output.gff" "$test_dir/agat_sp_complement_annotations_1.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +rm -rf "$TMPDIR/output.gff" + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --ref "$test_dir/agat_sp_complement_annotations_ref.gff" \ + --add "$test_dir/agat_sp_complement_annotations_add.gff" \ + --output "$TMPDIR/output.gff" + +echo ">> Checking output" +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$TMPDIR/output.gff" "$test_dir/agat_sp_complement_annotations_2.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_sp_complement_annotations/test_data/25_test.gff b/src/agat/agat_sp_complement_annotations/test_data/25_test.gff new file mode 100644 index 00000000..906b8e81 --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/test_data/25_test.gff @@ -0,0 +1,32 @@ +# gffread all_merged.stringtie.gtf -E -F -o - +# gffread v0.9.9 +##gff-version 3 +scaffold1 StringTie transcript 2551 2965 1000.00 . . ID=MSTRG.1.1;geneID=MSTRG.1 +scaffold1 StringTie exon 2551 2965 1000.00 . . Parent=MSTRG.1.1;cov=68.607231 +scaffold1 StringTie transcript 8147 13353 1000.00 - . ID=MSTRG.6.1;geneID=MSTRG.6 +scaffold1 StringTie exon 8147 8981 1000.00 - . Parent=MSTRG.6.1;cov=529.868042 +scaffold1 StringTie exon 9082 9171 1000.00 - . Parent=MSTRG.6.1;cov=451.066681 +scaffold1 StringTie exon 9328 9433 1000.00 - . Parent=MSTRG.6.1;cov=548.889893 +scaffold1 StringTie exon 9682 9875 1000.00 - . Parent=MSTRG.6.1;cov=416.032471 +scaffold1 StringTie exon 10018 10228 1000.00 - . Parent=MSTRG.6.1;cov=268.398773 +scaffold1 StringTie exon 10436 10511 1000.00 - . Parent=MSTRG.6.1;cov=263.012329 +scaffold1 StringTie exon 10665 10744 1000.00 - . Parent=MSTRG.6.1;cov=262.177094 +scaffold1 StringTie exon 10901 10996 1000.00 - . Parent=MSTRG.6.1;cov=285.484375 +scaffold1 StringTie exon 11277 11348 1000.00 - . Parent=MSTRG.6.1;cov=272.513885 +scaffold1 StringTie exon 11521 11718 1000.00 - . Parent=MSTRG.6.1;cov=323.955170 +scaffold1 StringTie exon 11802 12004 1000.00 - . Parent=MSTRG.6.1;cov=258.021729 +scaffold1 StringTie exon 12106 13353 1000.00 - . Parent=MSTRG.6.1;cov=192.039612 +scaffold1 StringTie transcript 21499 23178 1000.00 . . ID=MSTRG.7.1;geneID=MSTRG.7 +scaffold1 StringTie exon 21499 23178 1000.00 . . Parent=MSTRG.7.1;cov=207.398804 +scaffold1 StringTie transcript 44218 47964 1000.00 - . ID=MSTRG.11.1;geneID=MSTRG.11 +scaffold1 StringTie exon 44218 45365 1000.00 - . Parent=MSTRG.11.1;cov=3001.629883 +scaffold1 StringTie exon 47660 47706 1000.00 - . Parent=MSTRG.11.1;cov=4399.870117 +scaffold1 StringTie exon 47827 47964 1000.00 - . Parent=MSTRG.11.1;cov=2103.559082 +scaffold1 StringTie transcript 44218 47964 1000.00 - . ID=MSTRG.11.2;geneID=MSTRG.11 +scaffold1 StringTie exon 44218 45365 1000.00 - . Parent=MSTRG.11.2;cov=487.085846 +scaffold1 StringTie exon 47660 47718 1000.00 - . Parent=MSTRG.11.2;cov=557.812744 +scaffold1 StringTie exon 47824 47964 1000.00 - . Parent=MSTRG.11.2;cov=242.265823 +scaffold1 StringTie transcript 44427 47958 1000.00 - . ID=MSTRG.11.3;geneID=MSTRG.11 +scaffold1 StringTie exon 44427 45365 1000.00 - . Parent=MSTRG.11.3;cov=2892.249023 +scaffold1 StringTie exon 47660 47723 1000.00 - . Parent=MSTRG.11.3;cov=2083.479492 +scaffold1 StringTie exon 47827 47958 1000.00 - . Parent=MSTRG.11.3;cov=734.545044 diff --git a/src/agat/agat_sp_complement_annotations/test_data/9_test.gff b/src/agat/agat_sp_complement_annotations/test_data/9_test.gff new file mode 100644 index 00000000..0e82a0ca --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/test_data/9_test.gff @@ -0,0 +1,20 @@ +##gff-version 3 +#!gff-spec-version 1.14 +#!source-version NCBI C++ formatter 0.2 +##Type DNA NC_003070.9 +NC_003070.9 RefSeq source 1 30427671 . + . organism=Arabidopsis thaliana;mol_type=genomic DNA;db_xref=taxon:3702;chromosome=1;ecotype=Columbia +NC_003070.9 RefSeq gene 3631 5899 . + . ID=NC_003070.9:NAC001;locus_tag=AT1G01010; +NC_003070.9 RefSeq exon 3631 3913 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010; +NC_003070.9 RefSeq exon 3996 4276 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010; +NC_003070.9 RefSeq exon 4486 4605 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010; +NC_003070.9 RefSeq exon 4706 5095 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010; +NC_003070.9 RefSeq exon 5174 5326 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010; +NC_003070.9 RefSeq exon 5439 5899 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010; +NC_003070.9 RefSeq CDS 3760 3913 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010; +NC_003070.9 RefSeq CDS 3996 4276 . + 2 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010; +NC_003070.9 RefSeq CDS 4486 4605 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010; +NC_003070.9 RefSeq CDS 4706 5095 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010; +NC_003070.9 RefSeq CDS 5174 5326 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010; +NC_003070.9 RefSeq CDS 5439 5627 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010; +NC_003070.9 RefSeq start_codon 3760 3762 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010; +NC_003070.9 RefSeq stop_codon 5628 5630 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010; diff --git a/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_1.gff b/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_1.gff new file mode 100644 index 00000000..486495da --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_1.gff @@ -0,0 +1,56 @@ +##gff-version 3 +# gffread all_merged.stringtie.gtf -E -F -o - +# gffread v0.9.9 +NC_003070.9 RefSeq gene 3631 5899 . + . ID=IDmodified-gene-1;locus_tag=AT1G01010 +NC_003070.9 AGAT mRNA 3631 5899 . + . ID=NC_003070.9:NAC001;Parent=IDmodified-gene-1;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 3631 3913 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 3996 4276 . + . ID=IDmodified-exon-1;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 4486 4605 . + . ID=IDmodified-exon-2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 4706 5095 . + . ID=IDmodified-exon-3;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 5174 5326 . + . ID=IDmodified-exon-4;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 5439 5899 . + . ID=IDmodified-exon-5;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 3760 3913 . + 0 ID=agat-cds-1;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 3996 4276 . + 2 ID=IDmodified-cds-1;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 4486 4605 . + 0 ID=IDmodified-cds-2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 4706 5095 . + 0 ID=IDmodified-cds-3;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 5174 5326 . + 0 ID=IDmodified-cds-4;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 5439 5630 . + 0 ID=IDmodified-cds-5;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 AGAT five_prime_UTR 3631 3759 . + . ID=agat-five_prime_utr-1;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq start_codon 3760 3762 . + 0 ID=agat-start_codon-1;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq stop_codon 5628 5630 . + 0 ID=agat-stop_codon-1;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 AGAT three_prime_UTR 5631 5899 . + . ID=agat-three_prime_utr-1;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +scaffold1 AGAT gene 2551 2965 . . . ID=agat-gene-1;geneID=MSTRG.1 +scaffold1 StringTie transcript 2551 2965 1000.00 . . ID=MSTRG.1.1;Parent=agat-gene-1;geneID=MSTRG.1 +scaffold1 StringTie exon 2551 2965 1000.00 . . ID=agat-exon-1;Parent=MSTRG.1.1;cov=68.607231 +scaffold1 AGAT gene 8147 13353 . - . ID=agat-gene-2;geneID=MSTRG.6 +scaffold1 StringTie transcript 8147 13353 1000.00 - . ID=MSTRG.6.1;Parent=agat-gene-2;geneID=MSTRG.6 +scaffold1 StringTie exon 8147 8981 1000.00 - . ID=agat-exon-2;Parent=MSTRG.6.1;cov=529.868042 +scaffold1 StringTie exon 9082 9171 1000.00 - . ID=agat-exon-3;Parent=MSTRG.6.1;cov=451.066681 +scaffold1 StringTie exon 9328 9433 1000.00 - . ID=agat-exon-4;Parent=MSTRG.6.1;cov=548.889893 +scaffold1 StringTie exon 9682 9875 1000.00 - . ID=agat-exon-5;Parent=MSTRG.6.1;cov=416.032471 +scaffold1 StringTie exon 10018 10228 1000.00 - . ID=agat-exon-6;Parent=MSTRG.6.1;cov=268.398773 +scaffold1 StringTie exon 10436 10511 1000.00 - . ID=agat-exon-7;Parent=MSTRG.6.1;cov=263.012329 +scaffold1 StringTie exon 10665 10744 1000.00 - . ID=agat-exon-8;Parent=MSTRG.6.1;cov=262.177094 +scaffold1 StringTie exon 10901 10996 1000.00 - . ID=agat-exon-9;Parent=MSTRG.6.1;cov=285.484375 +scaffold1 StringTie exon 11277 11348 1000.00 - . ID=agat-exon-10;Parent=MSTRG.6.1;cov=272.513885 +scaffold1 StringTie exon 11521 11718 1000.00 - . ID=agat-exon-11;Parent=MSTRG.6.1;cov=323.955170 +scaffold1 StringTie exon 11802 12004 1000.00 - . ID=agat-exon-12;Parent=MSTRG.6.1;cov=258.021729 +scaffold1 StringTie exon 12106 13353 1000.00 - . ID=agat-exon-13;Parent=MSTRG.6.1;cov=192.039612 +scaffold1 AGAT gene 21499 23178 . . . ID=agat-gene-3;geneID=MSTRG.7 +scaffold1 StringTie transcript 21499 23178 1000.00 . . ID=MSTRG.7.1;Parent=agat-gene-3;geneID=MSTRG.7 +scaffold1 StringTie exon 21499 23178 1000.00 . . ID=agat-exon-14;Parent=MSTRG.7.1;cov=207.398804 +scaffold1 AGAT gene 44218 47964 . - . ID=agat-gene-4;geneID=MSTRG.11 +scaffold1 StringTie transcript 44218 47964 1000.00 - . ID=MSTRG.11.1;Parent=agat-gene-4;geneID=MSTRG.11 +scaffold1 StringTie exon 44218 45365 1000.00 - . ID=agat-exon-15;Parent=MSTRG.11.1;cov=3001.629883 +scaffold1 StringTie exon 47660 47706 1000.00 - . ID=agat-exon-16;Parent=MSTRG.11.1;cov=4399.870117 +scaffold1 StringTie exon 47827 47964 1000.00 - . ID=agat-exon-17;Parent=MSTRG.11.1;cov=2103.559082 +scaffold1 AGAT gene 44218 47964 . - . ID=agat-gene-5;geneID=MSTRG.11 +scaffold1 StringTie transcript 44218 47964 1000.00 - . ID=MSTRG.11.2;Parent=agat-gene-5;geneID=MSTRG.11 +scaffold1 StringTie exon 44218 45365 1000.00 - . ID=agat-exon-18;Parent=MSTRG.11.2;cov=487.085846 +scaffold1 StringTie exon 47660 47718 1000.00 - . ID=agat-exon-19;Parent=MSTRG.11.2;cov=557.812744 +scaffold1 StringTie exon 47824 47964 1000.00 - . ID=agat-exon-20;Parent=MSTRG.11.2;cov=242.265823 +scaffold1 AGAT gene 44427 47958 . - . ID=agat-gene-6;geneID=MSTRG.11 +scaffold1 StringTie transcript 44427 47958 1000.00 - . ID=MSTRG.11.3;Parent=agat-gene-6;geneID=MSTRG.11 +scaffold1 StringTie exon 44427 45365 1000.00 - . ID=agat-exon-21;Parent=MSTRG.11.3;cov=2892.249023 +scaffold1 StringTie exon 47660 47723 1000.00 - . ID=agat-exon-22;Parent=MSTRG.11.3;cov=2083.479492 +scaffold1 StringTie exon 47827 47958 1000.00 - . ID=agat-exon-23;Parent=MSTRG.11.3;cov=734.545044 diff --git a/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_2.gff b/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_2.gff new file mode 100644 index 00000000..784a39c9 --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_2.gff @@ -0,0 +1,7 @@ +##gff-version 3 +Chr1 Helixer gene 41064 54753 . + . ID=Chr1_000001 +Chr1 Helixer mRNA 41064 54753 . + . ID=Chr1_000001.1;Parent=Chr1_000001 +Chr1 Helixer exon 41064 54753 . + . ID=Chr1_000001.1.exon.1;Parent=Chr1_000001.1 +Chr1 Helixer CDS 41311 54435 . + 0 ID=Chr1_000001.1.CDS.1;Parent=Chr1_000001.1 +Chr1 Helixer five_prime_UTR 41064 41310 . + . ID=Chr1_000001.1.five_prime_UTR.1;Parent=Chr1_000001.1 +Chr1 Helixer three_prime_UTR 54436 54753 . + . ID=Chr1_000001.1.three_prime_UTR.1;Parent=Chr1_000001.1 diff --git a/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_add.gff b/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_add.gff new file mode 100644 index 00000000..fb03df17 --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_add.gff @@ -0,0 +1,6 @@ +Chr1 Liftoff gene 41075 54676 . + . ID=gene-gene1 +Chr1 Liftoff mRNA 41075 54676 . + . ID=rna-transcript1;Parent=gene-gene1 +Chr1 Liftoff exon 41075 54676 . + . ID=exon-transcript1-1;Parent=rna-transcript1 +Chr1 Liftoff CDS 41311 54435 . + 0 ID=cds-codingsequence-1.1;Parent=rna-transcript1 +Chr1 Liftoff five_prime_UTR 41075 41310 . + . ID=nbis-five_prime_utr-19342;Parent=rna-transcript1 +Chr1 Liftoff three_prime_UTR 54436 54676 . + . ID=nbis-three_prime_utr-18368;Parent=rna-transcript1 diff --git a/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_ref.gff b/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_ref.gff new file mode 100644 index 00000000..a3791da2 --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/test_data/agat_sp_complement_annotations_ref.gff @@ -0,0 +1,6 @@ +Chr1 Helixer gene 41064 54753 . + . ID=Chr1_000001 +Chr1 Helixer mRNA 41064 54753 . + . ID=Chr1_000001.1;Parent=Chr1_000001 +Chr1 Helixer exon 41064 54753 . + . ID=Chr1_000001.1.exon.1;Parent=Chr1_000001.1 +Chr1 Helixer five_prime_UTR 41064 41310 . + . ID=Chr1_000001.1.five_prime_UTR.1;Parent=Chr1_000001.1 +Chr1 Helixer CDS 41311 54435 . + 0 ID=Chr1_000001.1.CDS.1;Parent=Chr1_000001.1 +Chr1 Helixer three_prime_UTR 54436 54753 . + . ID=Chr1_000001.1.three_prime_UTR.1;Parent=Chr1_000001.1 diff --git a/src/agat/agat_sp_complement_annotations/test_data/script.sh b/src/agat/agat_sp_complement_annotations/test_data/script.sh new file mode 100755 index 00000000..139540f9 --- /dev/null +++ b/src/agat/agat_sp_complement_annotations/test_data/script.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/gff_syntax/in/25_test.gff src/agat/agat_sp_complement_annotations/test_data +cp -r /tmp/agat_source/t/gff_syntax/in/9_test.gff src/agat/agat_sp_complement_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_complement_annotations/agat_sp_complement_annotations_ref.gff src/agat/agat_sp_complement_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_complement_annotations/agat_sp_complement_annotations_add.gff src/agat/agat_sp_complement_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_complement_annotations_1.gff src/agat/agat_sp_complement_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_complement_annotations_2.gff src/agat/agat_sp_complement_annotations/test_data \ No newline at end of file