From 0a0edcacb5368517d249210022363bd9265f1bf5 Mon Sep 17 00:00:00 2001 From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:46:57 +0200 Subject: [PATCH 01/15] Cutadapt: fix non-functional action parameter (#161) * Cutadapt: fix non-functional action parameter * Add PR number --- CHANGELOG.md | 4 ++++ src/cutadapt/script.sh | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2aa5387..d1654375 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ * `falco`: Fix a typo in the `--reverse_complement` argument (PR #157). +## BUG FIXES + +* `cutadapt`: fix the the non-functional `action` parameter (PR #161). + ## MINOR CHANGES * Upgrade to Viash 0.9.0. diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index 20c92724..d181e2b0 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -108,7 +108,7 @@ input_args=$(echo \ ${par_overlap:+--overlap "${par_overlap}"} \ ${par_match_read_wildcards:+--match-read-wildcards} \ ${par_no_match_adapter_wildcards:+--no-match-adapter-wildcards} \ - ${par_action:+--action "${par_action}"} \ + ${par_action:+--action="${par_action}"} \ ${par_revcomp:+--revcomp} \ ) debug "Arguments to cutadapt:" From add125261c6fa0ed7c9906fc85e7368d2072c4a3 Mon Sep 17 00:00:00 2001 From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Mon, 7 Oct 2024 11:06:04 +0200 Subject: [PATCH 02/15] FEAT: avoid using boolean_false (#160) --- CHANGELOG.md | 4 ++++ CONTRIBUTING.md | 6 ++++++ src/agat/agat_convert_bed2gff/config.vsh.yaml | 2 +- src/agat/agat_convert_bed2gff/script.sh | 2 +- src/cutadapt/config.vsh.yaml | 4 ++-- src/cutadapt/script.sh | 4 ++-- 6 files changed, 16 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1654375..47c786c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,10 @@ ## MINOR CHANGES +* `agat_convert_bed2gff`: change type of argument `inflate_off` from `boolean_false` to `boolean_true` (PR #160). + +* `cutadapt`: change type of argument `no_indels` and `no_match_adapter_wildcards` from `boolean_false` to `boolean_true` (PR #160). + * Upgrade to Viash 0.9.0. # biobox 0.2.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a32b680c..1e4ef18c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -231,6 +231,12 @@ Finally, add all other arguments to the config file. There are a few exceptions: * If the help lists defaults, do not add them as defaults but to the description. Example: `description: . Default: 10.` +Note: + +* Prefer using `boolean_true` over `boolean_false`. This avoids confusion when specifying values for this argument in a Nextflow workflow. + For example, consider the CLI option `--no-indels` for `cutadapt`. If the config for `cutadapt` would specify an argument `no_indels` of type `boolean_false`, + the script of the component must pass a `--no-indels` argument to `cutadapt` when `par_no_indels` is set to `false`. This becomes problematic setting a value for this argument using `fromState` in a nextflow workflow: with `fromState: ["no_indels": true]`, the value that gets passed to the script is `true` and the `--no-indels` flag would *not* be added to the options for `cutadapt`. This is inconsitent to what one might expect when interpreting `["no_indels": true]`. + When using `boolean_true`, the reasoning becomes simpler because its value no longer represents the effect of the argument, but wether or not the flag is set. ### Step 10: Add a Docker engine diff --git a/src/agat/agat_convert_bed2gff/config.vsh.yaml b/src/agat/agat_convert_bed2gff/config.vsh.yaml index a0fafc44..4466b5f1 100644 --- a/src/agat/agat_convert_bed2gff/config.vsh.yaml +++ b/src/agat/agat_convert_bed2gff/config.vsh.yaml @@ -49,7 +49,7 @@ argument_groups: - name: --inflate_off description: | By default we inflate the block fields (blockCount, blockSizes, blockStarts) to create subfeatures of the main feature (primary_tag). The type of subfeature created is based on the inflate_type parameter. If you do not want this inflating behaviour you can deactivate it by using the --inflate_off option. - type: boolean_false + type: boolean_true - name: --inflate_type description: | Feature type (3rd column in gff) created when inflate parameter activated [default: exon]. diff --git a/src/agat/agat_convert_bed2gff/script.sh b/src/agat/agat_convert_bed2gff/script.sh index fbeb9206..4d4b8209 100644 --- a/src/agat/agat_convert_bed2gff/script.sh +++ b/src/agat/agat_convert_bed2gff/script.sh @@ -4,7 +4,7 @@ ## VIASH END # unset flags -[[ "$par_inflate_off" == "true" ]] && unset par_inflate_off +[[ "$par_inflate_off" == "false" ]] && unset par_inflate_off [[ "$par_verbose" == "false" ]] && unset par_verbose # run agat_convert_sp_bed2gff.pl diff --git a/src/cutadapt/config.vsh.yaml b/src/cutadapt/config.vsh.yaml index 7e36a8e0..e20fb7fb 100644 --- a/src/cutadapt/config.vsh.yaml +++ b/src/cutadapt/config.vsh.yaml @@ -196,7 +196,7 @@ argument_groups: length of matching region. Default: 0.1 (10%). example: 0.1 - name: --no_indels - type: boolean_false + type: boolean_true description: | Allow only mismatches in alignments. @@ -218,7 +218,7 @@ argument_groups: description: | Interpret IUPAC wildcards in reads. - name: --no_match_adapter_wildcards - type: boolean_false + type: boolean_true description: | Do not interpret IUPAC wildcards in adapters. - name: --action diff --git a/src/cutadapt/script.sh b/src/cutadapt/script.sh index d181e2b0..1986e162 100644 --- a/src/cutadapt/script.sh +++ b/src/cutadapt/script.sh @@ -96,9 +96,9 @@ debug # Input arguments ########################################################### echo ">> Parsing input arguments" -[[ "$par_no_indels" == "true" ]] && unset par_no_indels +[[ "$par_no_indels" == "false" ]] && unset par_no_indels [[ "$par_match_read_wildcards" == "false" ]] && unset par_match_read_wildcards -[[ "$par_no_match_adapter_wildcards" == "true" ]] && unset par_no_match_adapter_wildcards +[[ "$par_no_match_adapter_wildcards" == "false" ]] && unset par_no_match_adapter_wildcards [[ "$par_revcomp" == "false" ]] && unset par_revcomp input_args=$(echo \ From 86333c1a465db45facd936695f1f33b186ccf0fc Mon Sep 17 00:00:00 2001 From: Suman Muralidharan <104161349+sumanm99@users.noreply.github.com> Date: Tue, 15 Oct 2024 23:46:17 +0530 Subject: [PATCH 03/15] SnpEff (#153) * Help file * config file * config file * runners script * config file * test script * test * test * runners script * snake case * snake case * output parameters * modify argument formatting, container setup * fix buf with mv command * avoid boolean_false and fix bug with output files --------- Co-authored-by: Emma Rousseau --- src/snpeff/config.vsh.yaml | 297 ++++++++++++++++++++++++ src/snpeff/help.txt | 79 +++++++ src/snpeff/script.sh | 148 ++++++++++++ src/snpeff/test.sh | 129 ++++++++++ src/snpeff/test_data/cancer.vcf | 2 + src/snpeff/test_data/my_annotations.bed | 1 + src/snpeff/test_data/script.sh | 15 ++ src/snpeff/test_data/test.vcf | 1 + 8 files changed, 672 insertions(+) create mode 100644 src/snpeff/config.vsh.yaml create mode 100644 src/snpeff/help.txt create mode 100644 src/snpeff/script.sh create mode 100644 src/snpeff/test.sh create mode 100644 src/snpeff/test_data/cancer.vcf create mode 100644 src/snpeff/test_data/my_annotations.bed create mode 100644 src/snpeff/test_data/script.sh create mode 100644 src/snpeff/test_data/test.vcf diff --git a/src/snpeff/config.vsh.yaml b/src/snpeff/config.vsh.yaml new file mode 100644 index 00000000..5fb8622d --- /dev/null +++ b/src/snpeff/config.vsh.yaml @@ -0,0 +1,297 @@ +name: snpeff +description: | + Genetic variant annotation, and functional effect prediction toolbox. + It annotates and predicts the effects of genetic variants on genes and + proteins (such as amino acid changes). +keywords: [ "annotation", "effect prediction", "snp", "variant", "vcf"] + +links: + repository: https://github.com/pcingola/SnpEff + homepage: https://pcingola.github.io/SnpEff/ + documentation: https://pcingola.github.io/SnpEff/ +references: + doi: 10.3389/fgene.2012.00035 +license: MIT +argument_groups: + - name: Inputs + arguments: + - name: --input + type: file + description: Input variants file. + example: test.vcf + required: true + - name: --genome_version + type: string + description: Reference genome version. + example: GRCh37.75 + required: true + - name: Outputs + arguments: + - name: --output + type: file + description: The output file. + example: out.vcf + direction: output + required: true + - name: --summary + type: file + description: Summary file directory. + example: summary_dir + direction: output + - name: --genes + type: file + description: Txt file directory. + example: genes_dir + direction: output + - name: Options + arguments: + - name: --chr + type: string + description: | + Prepend 'string' to chromosome name (e.g. 'chr1' instead of '1'). Only on TXT output. + - name: --classic + type: boolean_true + description: Use old style annotations instead of Sequence Ontology and Hgvs. + - name: --csv_stats + type: file + description: Create CSV summary file. + - name: --download + type: boolean_true + description: Download reference genome if not available. + - name: --input_format + alternatives: [-i] + type: string + description: | + Input format [ vcf, bed ]. Default: VCF. + example: "VCF" + - name: --file_list + type: boolean_true + description: Input actually contains a list of files to process. + - name: --output_format + alternatives: [-o] + type: string + description: | + Output format [ vcf, gatk, bed, bedAnn ]. Default: VCF. + example: "VCF" + - name: --stats + alternatives: [-s, --htmlStats] + type: boolean_true + description: Create HTML summary file. + - name: --no_stats + type: boolean_true + description: Do not create stats (summary) file. + - name: Results filter options + arguments: + - name: --fi + alternatives: [--filterInterval] + type: file + description: | + Only analyze changes that intersect with the intervals + specified in this file. This option can be used several times. + - name: --no_downstream + type: boolean_true + description: Do not show DOWNSTREAM changes + - name: --no_intergenic + type: boolean_true + description: Do not show INTERGENIC changes. + - name: --no_intron + type: boolean_true + description: Do not show INTRON changes. + - name: --no_upstream + type: boolean_true + description: Do not show UPSTREAM changes. + - name: --no_utr + type: boolean_true + description: Do not show 5_PRIME_UTR or 3_PRIME_UTR changes. + - name: --no + type: string + description: | + Do not show 'EffectType'. This option can be used several times. + - name: Annotations options + arguments: + - name: --cancer + type: boolean_true + description: Perform 'cancer' comparisons (Somatic vs Germline). + - name: --cancer_samples + type: file + description: Two column TXT file defining 'original \t derived' samples. + - name: --fastaprot + type: file + description: | + Create an output file containing the resulting protein sequences. + - name: --format_eff + type: boolean_true + description: | + Use 'EFF' field compatible with older versions (instead of 'ANN'). + - name: --gene_id + type: boolean_true + description: Use gene ID instead of gene name (VCF output). + - name: --hgvs + type: boolean_true + description: Use HGVS annotations for amino acid sub-field. + - name: --hgvs_old + type: boolean_true + description: Use old HGVS notation. + - name: --hgvs1_letter_aa + type: boolean_true + description: Use one letter Amino acid codes in HGVS notation. + - name: --hgvs_tr_id + type: boolean_true + description: Use transcript ID in HGVS notation. + - name: --lof + type: boolean_true + description: | + Add loss of function (LOF) and Nonsense mediated decay (NMD) tags. + - name: -no_hgvs + type: boolean_true + description: Do not add HGVS annotations. + - name: --no_lof + type: boolean_true + description: Do not add LOF and NMD annotations. + - name: --no_shift_hgvs + type: boolean_true + description: | + Do not shift variants according to HGVS notation (most 3prime end). + - name: --oicr + type: boolean_true + description: Add OICR tag in VCF file. + - name: --sequence_ontology + type: boolean_true + description: Use Sequence Ontology terms. + - name: Generic options + arguments: + - name: --config + alternatives: [-c] + type: file + description: Specify config file + - name: --config_option + type: string + description: Override a config file option (name=value). + - name: --debug + alternatives: [-d] + type: boolean_true + description: Debug mode (very verbose). + - name: --data_dir + type: file + description: Override data_dir parameter from config file. + - name: --no_download + type: boolean_true + description: Do not download a SnpEff database, if not available locally. + - name: --no_log + type: boolean_true + description: Do not report usage statistics to server. + - name: --quiet + alternatives: [-q] + type: boolean_true + description: Quiet mode (do not show any messages or errors) + - name: --verbose + alternatives: [-v] + type: boolean_true + description: Verbose mode. + - name: Database options + arguments: + - name: --canon + type: boolean_true + description: Only use canonical transcripts. + - name: --canon_list + type: file + description: | + Only use canonical transcripts, replace some transcripts using the 'gene_id + transcript_id' entries in . + - name: --tag + type: string + description: | + Only use transcript having a tag 'tagName'. This option can be used multiple times. + - name: --no_tag + type: boolean_true + description: | + Filter out transcript having a tag 'tagName'. This option can be used multiple times. + - name: --interaction + type: boolean_true + description: Annotate using interactions (requires interaction database). + - name: --interval + type: file + description: | + Use a custom intervals in TXT/BED/BigBed/VCF/GFF file (you may use this option many times). + - name: --max_tsl + type: integer + description: Only use transcripts having Transcript Support Level lower than . + - name: --motif + type: boolean_true + description: Annotate using motifs (requires Motif database). + - name: --nextprot + type: boolean_true + description: Annotate using NextProt (requires NextProt database). + - name: --no_genome + type: boolean_true + description: Do not load any genomic database (e.g. annotate using custom files). + - name: --no_expand_iub + type: boolean_true + description: Disable IUB code expansion in input variants. + - name: --no_interaction + type: boolean_true + description: Disable inteaction annotations. + - name: --no_motif + type: boolean_true + description: Disable motif annotations. + - name: --no_nextprot + type: boolean_true + description: Disable NextProt annotations. + - name: --only_reg + type: boolean_true + description: Only use regulation tracks. + - name: --only_protein + type: boolean_true + description: Only use protein coding transcripts. + - name: --only_tr + type: file + description: | + Only use the transcripts in this file. Format: One transcript ID per line. + example: file.txt + - name: --reg + type: string + description: Regulation track to use (this option can be used add several times). + - name: --ss + alternatives: [--spliceSiteSize] + type: integer + description: | + Set size for splice sites (donor and acceptor) in bases. Default: 2. + - name: --splice_region_exon_size + type: integer + description: | + Set size for splice site region within exons. Default: 3 bases. + - name: --splice_region_intron_min + type: integer + description: | + Set minimum number of bases for splice site region within intron. Default: 3 bases. + - name: --splice_region_intron_max + type: integer + description: | + Set maximum number of bases for splice site region within intron. Default: 8 bases. + - name: --strict + type: boolean_true + description: Only use 'validated' transcripts (i.e. sequence has been checked). + - name: --ud + alternatives: [--upDownStreamLen] + type: integer + description: Set upstream downstream interval length (in bases). +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/staphb/snpeff:5.2a + setup: + - type: docker + run: | + version=$(snpEff -version) && \ + version_trimmed=$(echo "$version" | awk '{print $1, $2}') && \ + echo "$version_trimmed" > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/snpeff/help.txt b/src/snpeff/help.txt new file mode 100644 index 00000000..d1950220 --- /dev/null +++ b/src/snpeff/help.txt @@ -0,0 +1,79 @@ +Usage: snpEff [eff] [options] genome_version [input_file] + + variants_file : Default is STDIN + +Options: + -chr : Prepend 'string' to chromosome name (e.g. 'chr1' instead of '1'). Only on TXT output. + -classic : Use old style annotations instead of Sequence Ontology and Hgvs. + -csvStats : Create CSV summary file. + -download : Download reference genome if not available. Default: true + -i : Input format [ vcf, bed ]. Default: VCF. + -fileList : Input actually contains a list of files to process. + -o : Ouput format [ vcf, gatk, bed, bedAnn ]. Default: VCF. + -s , -stats, -htmlStats : Create HTML summary file. Default is 'snpEff_summary.html' + -noStats : Do not create stats (summary) file + +Results filter options: + -fi , -filterInterval : Only analyze changes that intersect with the intervals specified in this file (you may use this option many times) + -no-downstream : Do not show DOWNSTREAM changes + -no-intergenic : Do not show INTERGENIC changes + -no-intron : Do not show INTRON changes + -no-upstream : Do not show UPSTREAM changes + -no-utr : Do not show 5_PRIME_UTR or 3_PRIME_UTR changes + -no : Do not show 'EffectType'. This option can be used several times. + +Annotations options: + -cancer : Perform 'cancer' comparisons (Somatic vs Germline). Default: false + -cancerSamples : Two column TXT file defining 'oringinal \t derived' samples. + -fastaProt : Create an output file containing the resulting protein sequences. + -formatEff : Use 'EFF' field compatible with older versions (instead of 'ANN'). + -geneId : Use gene ID instead of gene name (VCF output). Default: false + -hgvs : Use HGVS annotations for amino acid sub-field. Default: true + -hgvsOld : Use old HGVS notation. Default: false + -hgvs1LetterAa : Use one letter Amino acid codes in HGVS notation. Default: false + -hgvsTrId : Use transcript ID in HGVS notation. Default: false + -lof : Add loss of function (LOF) and Nonsense mediated decay (NMD) tags. + -noHgvs : Do not add HGVS annotations. + -noLof : Do not add LOF and NMD annotations. + -noShiftHgvs : Do not shift variants according to HGVS notation (most 3prime end). + -oicr : Add OICR tag in VCF file. Default: false + -sequenceOntology : Use Sequence Ontology terms. Default: true + +Generic options: + -c , -config : Specify config file + -configOption name=value : Override a config file option + -d , -debug : Debug mode (very verbose). + -dataDir : Override data_dir parameter from config file. + -download : Download a SnpEff database, if not available locally. Default: true + -nodownload : Do not download a SnpEff database, if not available locally. + -h , -help : Show this help and exit + -noLog : Do not report usage statistics to server + -q , -quiet : Quiet mode (do not show any messages or errors) + -v , -verbose : Verbose mode + -version : Show version number and exit + +Database options: + -canon : Only use canonical transcripts. + -canonList : Only use canonical transcripts, replace some transcripts using the 'gene_id transcript_id' entries in . + -tag : Only use transcript having a tag 'tagName'. This option can be used multiple times. + -notag : Filter out transcript having a tag 'tagName'. This option can be used multiple times. + -interaction : Annotate using interactions (requires interaction database). Default: true + -interval : Use a custom intervals in TXT/BED/BigBed/VCF/GFF file (you may use this option many times) + -maxTSL : Only use transcripts having Transcript Support Level lower than . + -motif : Annotate using motifs (requires Motif database). Default: true + -nextProt : Annotate using NextProt (requires NextProt database). + -noGenome : Do not load any genomic database (e.g. annotate using custom files). + -noExpandIUB : Disable IUB code expansion in input variants + -noInteraction : Disable inteaction annotations + -noMotif : Disable motif annotations. + -noNextProt : Disable NextProt annotations. + -onlyReg : Only use regulation tracks. + -onlyProtein : Only use protein coding transcripts. Default: false + -onlyTr : Only use the transcripts in this file. Format: One transcript ID per line. + -reg : Regulation track to use (this option can be used add several times). + -ss , -spliceSiteSize : Set size for splice sites (donor and acceptor) in bases. Default: 2 + -spliceRegionExonSize : Set size for splice site region within exons. Default: 3 bases + -spliceRegionIntronMin : Set minimum number of bases for splice site region within intron. Default: 3 bases + -spliceRegionIntronMax : Set maximum number of bases for splice site region within intron. Default: 8 bases + -strict : Only use 'validated' transcripts (i.e. sequence has been checked). Default: false + -ud , -upDownStreamLen : Set upstream downstream interval length (in bases) \ No newline at end of file diff --git a/src/snpeff/script.sh b/src/snpeff/script.sh new file mode 100644 index 00000000..bf3914bb --- /dev/null +++ b/src/snpeff/script.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +# Unset flags if 'false' +unset_if_false=( + par_classic + par_download + par_file_list + par_stats + par_cancer + par_format_eff + par_gene_id + par_hgvs + par_hgvs_old + par_hgvs1_letter_aa + par_hgvs_tr_id + par_lof + par_oicr + par_sequence_ontology + par_debug + par_quiet + par_verbose + par_canon + par_interaction + par_motif + par_nextprot + par_only_reg + par_only_protein + par_strict + par_no_stats + par_no_downstream + par_no_intergenic + par_no_intron + par_no_upstream + par_no_utr + par_no_hgvs + par_no_lof + par_no_shift_hgvs + par_no_download + par_no_log + par_no_tag + par_no_genome + par_no_expand_iub + par_no_interaction + par_no_motif + par_no_nextprot +) +for par in ${unset_if_false[@]}; do + test_val="${!par}" # contains the value of the 'par' + [[ "$test_val" == "false" ]] && unset $par +done + + +# Run SnpEff +snpEff \ + ${par_chr:+-chr "$par_chr"} \ + ${par_classic:+-classic} \ + ${par_csv_stats:+-csvStats "$par_csv_stats"} \ + ${par_download:+-download} \ + ${par_input_format:+-i "$par_input_format"} \ + ${par_file_list:+-fileList} \ + ${par_output_format:+-o "$par_output_format"} \ + ${par_stats:+-stats} \ + ${par_no_stats:+-noStats} \ + ${par_fi:+-fi "$par_fi"} \ + ${par_no_downstream:+-no-downstream} \ + ${par_no_intergenic:+-no-intergenic} \ + ${par_no_intron:+-no-intron} \ + ${par_no_upstream:+-no-upstream} \ + ${par_no_utr:+-no-utr} \ + ${par_no:+-no "$par_no"} \ + ${par_cancer:+-cancer} \ + ${par_cancer_samples:+-cancerSamples "$par_cancer_samples]"} \ + ${par_fastaprot:+-fastaProt "$par_fastaprot]"} \ + ${par_format_eff:+-formatEff} \ + ${par_gene_id:+-geneId} \ + ${par_hgvs:+-hgvs} \ + ${par_hgvs_old:+-hgvsOld} \ + ${par_hgvs1_letter_aa:+-hgvs1LetterAa} \ + ${par_hgvs_tr_id:+-hgvsTrId} \ + ${par_lof:+-lof} \ + ${par_no_hgvs:+-noHgvs} \ + ${par_no_lof:+-noLof} \ + ${par_no_shift_hgvs:+-noShiftHgvs} \ + ${par_oicr:+-oicr} \ + ${par_sequence_ontology:+-sequenceOntology} \ + ${par_config:+-config "$par_config"} \ + ${par_config_option:+-configOption "$par_config_option"} \ + ${par_debug:+-debug} \ + ${par_data_dir:+-dataDir "$par_data_dir"} \ + ${par_no_download:+-nodownload} \ + ${par_no_log:+-noLog} \ + ${par_quiet:+-quiet} \ + ${par_verbose:+-verbose} \ + ${par_canon:+-canon} \ + ${par_canon_list:+-canonList "$par_canon_list"} \ + ${par_tag:+-tag "$par_tag"} \ + ${par_no_tag:+-notag} \ + ${par_interaction:+-interaction} \ + ${par_interval:+-interval "$par_interval"} \ + ${par_max_tsl:+-maxTSL "$par_max_tsl"} \ + ${par_motif:+-motif} \ + ${par_nextprot:+-nextProt} \ + ${par_no_genome:+-noGenome} \ + ${par_no_expand_iub:+-noExpandIUB} \ + ${par_no_interaction:+-noInteraction} \ + ${par_no_motif:+-noMotif} \ + ${par_no_nextprot:+-noNextProt} \ + ${par_only_reg:+-onlyReg} \ + ${par_only_protein:+-onlyProtein} \ + ${par_only_tr:+-onlyTr "$par_onlyTr"} \ + ${par_reg:+-reg "$par_reg"} \ + ${par_ss:+-ss "$par_ss"} \ + ${par_splice_region_exon_size:+-spliceRegionExonSize "$par_splice_region_exon_size"} \ + ${par_splice_region_intron_min:+-spliceRegionIntronMin "$par_splice_region_intron_min"} \ + ${par_splice_region_intron_max:+-spliceRegionIntronMax "$par_splice_region_intron_max"} \ + ${par_strict:+-strict} \ + ${par_ud:+-ud "$par_ud"} \ + "$par_genome_version" \ + "$par_input" \ + > "$par_output" + +# Path of the output file (par_output) +absolute_path=$(realpath "$par_output") +directory_path=$(dirname "$absolute_path") + +# Move the automatically generated outputs to their locations +if [ -z "$par_no_stats" ]; then + if [ ! -z "$par_summary" ]; then + mv -n snpEff_summary.html "$par_summary" + else + mv -n snpEff_summary.html "$directory_path" + fi +fi + +if [ -z "$par_no_stats" ]; then + if [ ! -z "$par_genes" ]; then + mv -n snpEff_genes.txt "$par_genes" + else + mv -n snpEff_genes.txt "$directory_path" + fi +fi + +exit 0 diff --git a/src/snpeff/test.sh b/src/snpeff/test.sh new file mode 100644 index 00000000..d8c72c20 --- /dev/null +++ b/src/snpeff/test.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +########################################################################### + +# Test 1: Run SnpEff with only required parameters + +mkdir test1 +pushd test1 > /dev/null # cd test1 (stack) + +echo "> Run Test 1: required parameters" +"$meta_executable" \ + --genome_version GRCh37.75 \ + --input "$meta_resources_dir/test_data/cancer.vcf" \ + --output out.vcf + +# Check if output files are generated +output_files=("out.vcf" "snpEff_genes.txt" "snpEff_summary.html") + +# Check if any of the files do not exist +for file in "${output_files[@]}"; do + if [ ! -e "$file" ]; then + echo "File $file does not exist." + fi +done + +# Check if files are empty +for file in "${output_files[@]}"; do + if [ ! -s "$file" ]; then + echo "File $file is empty." + fi +done + +popd > /dev/null # Remove directory from stack (LIFO) + +echo "Test 1 succeeded." + +########################################################################### + +# Test 2: Run SnpEff with a different input + options + +mkdir test2 +pushd test2 > /dev/null + +echo "> Run Test 2: different input + options" +"$meta_executable" \ + --genome_version GRCh37.75 \ + --input "$meta_resources_dir/test_data/test.vcf" \ + --interval "$meta_resources_dir/test_data/my_annotations.bed" \ + --no_stats \ + --output output.vcf + +# Check if output.vcf exists +if [ ! -e "output.vcf" ]; then + echo "File output.vcf does not exist." +fi + +# These files should not exist +files=("snpEff_genes.txt" "snpEff_summary.html") +for file in "${files[@]}"; do + if [ -e "$file" ]; then + echo "Error: File $file exists." + fi +done + +# Check if output.vcf is empty +if [ ! -s "output.vcf" ]; then + echo "File output.vcf is empty." +fi + +popd > /dev/null + +echo "Test 2 succeeded." + +########################################################################### + +# Test 3: Move the output files to other locations + +mkdir test3 +pushd test3 > /dev/null + +mkdir temp + +echo "> Run Test 3: move output files" +"$meta_executable" \ + --genome_version GRCh37.75 \ + --input "$meta_resources_dir/test_data/test.vcf" \ + --output output.vcf \ + --summary temp \ + --genes temp + +# Check if output.vcf exists +if [ ! -e "output.vcf" ]; then + echo "File output.vcf does not exist." +fi + +# Check if the other output files have been moved to temp folder +output_files=("snpEff_genes.txt" "snpEff_summary.html") + +# Check if any of the files do not exist +for file in "${output_files[@]}"; do + if [ ! -e "temp/$file" ]; then + echo "File $file does not exist in 'temp' folder." + fi +done + +# Check if output.vcf is empty +if [ ! -s "output.vcf" ]; then + echo "File output.vcf is empty." +fi + +# Check if the other output files in temp folder are empty +for file in "${output_files[@]}"; do + if [ ! -s "temp/$file" ]; then + echo "File $file is empty." + fi +done + +popd > /dev/null + +echo "Test 3 succeeded." + +########################################################################### + +echo "All tests successfully completed!" \ No newline at end of file diff --git a/src/snpeff/test_data/cancer.vcf b/src/snpeff/test_data/cancer.vcf new file mode 100644 index 00000000..f37ad8c3 --- /dev/null +++ b/src/snpeff/test_data/cancer.vcf @@ -0,0 +1,2 @@ +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Patient_01_Germline Patient_01_Somatic +1 69091 . A C,G . PASS AC=1 GT 1/0 2/1 diff --git a/src/snpeff/test_data/my_annotations.bed b/src/snpeff/test_data/my_annotations.bed new file mode 100644 index 00000000..a5247f97 --- /dev/null +++ b/src/snpeff/test_data/my_annotations.bed @@ -0,0 +1 @@ +1 10000 20000 MY_ANNOTATION diff --git a/src/snpeff/test_data/script.sh b/src/snpeff/test_data/script.sh new file mode 100644 index 00000000..a47ec136 --- /dev/null +++ b/src/snpeff/test_data/script.sh @@ -0,0 +1,15 @@ +# Test files from SnpEff examples +if [ ! -f snpEff_latest_core.zip ]; then + wget https://snpeff.blob.core.windows.net/versions/snpEff_latest_core.zip +fi + +if [ ! -d snpEff ]; then + unzip snpEff_latest_core.zip +fi + +mv snpEff/examples/test.vcf src/snpeff/test_data/ +mv snpEff/examples/cancer.vcf src/snpeff/test_data/ +mv snpEff/examples/my_annotations.bed src/snpeff/test_data/ + +rm -rf snpEff_latest_core.zip +rm -rf snpEff \ No newline at end of file diff --git a/src/snpeff/test_data/test.vcf b/src/snpeff/test_data/test.vcf new file mode 100644 index 00000000..d552ef18 --- /dev/null +++ b/src/snpeff/test_data/test.vcf @@ -0,0 +1 @@ +1 10469 . C G 365.78 PASS AC=30;AF=0.0732 From 7fb67a98539868b9af788338fb5f46d34ab742f7 Mon Sep 17 00:00:00 2001 From: Emma Rousseau Date: Fri, 18 Oct 2024 11:15:20 +0200 Subject: [PATCH 04/15] Add bbmap_bbsplit (#138) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * initial commit, complete config file, add test data * complete config file, adjusted script and tests, not functional * update changelog, hep.txt, functional test, large test data * smaller test data * remove test resource from config * modify paths in test script * Arguments closer to original tool's * Extra arg to allow use of bbmap args --- CHANGELOG.md | 3 + src/bbmap_bbsplit/config.vsh.yaml | 162 ++++++++++++++++++++++++++++++ src/bbmap_bbsplit/help.txt | 83 +++++++++++++++ src/bbmap_bbsplit/script.sh | 91 +++++++++++++++++ src/bbmap_bbsplit/test.sh | 145 ++++++++++++++++++++++++++ 5 files changed, 484 insertions(+) create mode 100644 src/bbmap_bbsplit/config.vsh.yaml create mode 100644 src/bbmap_bbsplit/help.txt create mode 100755 src/bbmap_bbsplit/script.sh create mode 100644 src/bbmap_bbsplit/test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 47c786c6..16e79693 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -184,6 +184,9 @@ * `bedtools`: - `bedtools_getfasta`: extract sequences from a FASTA file for each of the intervals defined in a BED/GFF/VCF file (PR #59). + +* `bbmap`: + - `bbmap_bbsplit`: Split sequencing reads by mapping them to multiple references simultaneously (PR #138). diff --git a/src/bbmap_bbsplit/config.vsh.yaml b/src/bbmap_bbsplit/config.vsh.yaml new file mode 100644 index 00000000..61336b35 --- /dev/null +++ b/src/bbmap_bbsplit/config.vsh.yaml @@ -0,0 +1,162 @@ +namespace: "bbmap" +name: "bbmap_bbsplit" +description: Split sequencing reads by mapping them to multiple references simultaneously. +links: + homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/ + documentation: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/ + repository: https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh + +license: BBTools Copyright (c) 2014 + +argument_groups: +- name: "Input" + arguments: + - name: "--id" + type: string + description: Sample ID + - name: "--paired" + type: boolean_true + description: Paired fastq files or not? + - name: "--input" + type: file + multiple: true + description: Input fastq files, either one or two (paired), separated by ";". + example: reads.fastq + - name: "--ref" + type: file + multiple: true + description: Reference FASTA files, separated by ";". The primary reference should be specified first. + - name: "--only_build_index" + type: boolean_true + description: If set, only builds the index. Otherwise, mapping is performed. + - name: "--build" + type: string + description: | + Designate index to use. Corresponds to the number specified when building the index. + If building the index, this will be the build's id. If multiple references are indexed + in the same directory, each needs a unique build ID. Default: 1. + example: "1" + - name: "--qin" + type: string + description: | + Set to 33 or 64 to specify input quality value ASCII offset. Automatically detected if + not specified. + - name: "--interleaved" + type: boolean_true + description: | + True forces paired/interleaved input; false forces single-ended mapping. + If not specified, interleaved status will be autodetected from read names. + - name: "--maxindel" + type: integer + description: | + Don't look for indels longer than this. Lower is faster. Set to >=100k for RNA-seq. + example: 20 + - name: "--minratio" + type: double + description: | + Fraction of max alignment score required to keep a site. Higher is faster. + example: 0.56 + - name: "--minhits" + type: integer + description: | + Minimum number of seed hits required for candidate sites. Higher is faster. + example: 1 + - name: "--ambiguous" + type: string + description: | + Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations). + * best Use the first best site (Default) + * toss Consider unmapped + * random Select one top-scoring site randomly + * all Retain all top-scoring sites. Does not work yet with SAM output + choices: [best, toss, random, all] + example: best + - name: "--ambiguous2" + type: string + description: | + Set behavior only for reads that map ambiguously to multiple different references. + Normal 'ambiguous=' controls behavior on all ambiguous reads; + Ambiguous2 excludes reads that map ambiguously within a single reference. + * best Use the first best site (Default) + * toss Consider unmapped + * all Write a copy to the output for each reference to which it maps + * split Write a copy to the AMBIGUOUS_ output for each reference to which it maps + choices: [best, toss, all, split] + example: best + - name: "--qtrim" + type: string + description: | + Quality-trim ends to Q5 before mapping. Options are 'l' (left), 'r' (right), and 'lr' (both). + choices: [l, r, lr] + - name: "--untrim" + type: boolean_true + description: Undo trimming after mapping. Untrimmed bases will be soft-clipped in cigar strings. + + +- name: "Output" + arguments: + - name: "--fastq_1" + type: file + description: | + Output file for read 1. + direction: output + example: read_out1.fastq + - name: "--fastq_2" + type: file + description: | + Output file for read 2. + direction: output + example: read_out2.fastq + - name: "--sam2bam" + alternatives: ["--bs"] + type: file + description: | + Write a shell script to 'file' that will turn the sam output into a sorted, indexed bam file. + direction: output + example: script.sh + - name: "--scafstats" + type: file + description: | + Write statistics on how many reads mapped to which scaffold to this file. + direction: output + example: scaffold_stats.txt + - name: "--refstats" + type: file + description: | + Write statistics on how many reads were assigned to which reference to this file. + Unmapped reads whose mate mapped to a reference are considered assigned and will be counted. + direction: output + example: reference_stats.txt + - name: "--nzo" + type: boolean_true + description: Only print lines with nonzero coverage. + - name: "--bbmap_args" + type: string + description: | + Additional arguments from BBMap to pass to BBSplit. + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: docker + run: | + apt-get update && \ + apt-get install -y build-essential openjdk-17-jdk wget tar && \ + wget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \ + tar xzf BBMap_39.01.tar.gz && \ + cp -r bbmap/* /usr/local/bin + - type: docker + run: | + bbsplit.sh --version 2>&1 | awk '/BBMap version/{print "BBMAP:", $NF}' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow diff --git a/src/bbmap_bbsplit/help.txt b/src/bbmap_bbsplit/help.txt new file mode 100644 index 00000000..56544a34 --- /dev/null +++ b/src/bbmap_bbsplit/help.txt @@ -0,0 +1,83 @@ +``` +bbsplit.sh +``` + +BBSplit +Written by Brian Bushnell, from Dec. 2010 - present +Last modified June 11, 2018 + +Description: Maps reads to multiple references simultaneously. +Outputs reads to a file for the reference they best match, with multiple options for dealing with ambiguous mappings. + +To index: bbsplit.sh build=<1> ref_x= ref_y= +To map: bbsplit.sh build=<1> in= out_x= out_y= + +To be concise, and do everything in one command: +bbsplit.sh ref=x.fa,y.fa in=reads.fq basename=o%.fq + +that is equivalent to +bbsplit.sh build=1 in=reads.fq ref_x=x.fa ref_y=y.fa out_x=ox.fq out_y=oy.fq + +By default paired reads will yield interleaved output, but you can use the # symbol to produce twin output files. +For example, basename=o%_#.fq will produce ox_1.fq, ox_2.fq, oy_1.fq, and oy_2.fq. + + +Indexing Parameters (required when building the index): +ref= A list of references, or directories containing fasta files. +ref_= Alternate, longer way to specify references. e.g., ref_ecoli=ecoli.fa + These can also be comma-delimited lists of files; e.g., ref_a=a1.fa,a2.fa,a3.fa +build=<1> If multiple references are indexed in the same directory, each needs a unique build ID. +path=<.> Specify the location to write the index, if you don't want it in the current working directory. + +Input Parameters: +build=<1> Designate index to use. Corresponds to the number specified when building the index. +in= Primary reads input; required parameter. +in2= For paired reads in two files. +qin= Set to 33 or 64 to specify input quality value ASCII offset. +interleaved= True forces paired/interleaved input; false forces single-ended mapping. + If not specified, interleaved status will be autodetected from read names. + +Mapping Parameters: +maxindel=<20> Don't look for indels longer than this. Lower is faster. Set to >=100k for RNA-seq. +minratio=<0.56> Fraction of max alignment score required to keep a site. Higher is faster. +minhits=<1> Minimum number of seed hits required for candidate sites. Higher is faster. +ambiguous= Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations). + best (use the first best site) + toss (consider unmapped) + random (select one top-scoring site randomly) + all (retain all top-scoring sites. Does not work yet with SAM output) +ambiguous2= Set behavior only for reads that map ambiguously to multiple different references. + Normal 'ambiguous=' controls behavior on all ambiguous reads; + Ambiguous2 excludes reads that map ambiguously within a single reference. + best (use the first best site) + toss (consider unmapped) + all (write a copy to the output for each reference to which it maps) + split (write a copy to the AMBIGUOUS_ output for each reference to which it maps) +qtrim= Quality-trim ends to Q5 before mapping. Options are 'l' (left), 'r' (right), and 'lr' (both). +untrim= Undo trimming after mapping. Untrimmed bases will be soft-clipped in cigar strings. + +Output Parameters: +out_= Output reads that map to the reference to . +basename=prefix%suffix Equivalent to multiple out_%=prefix%suffix expressions, in which each % is replaced by the name of a reference file. +bs= Write a shell script to 'file' that will turn the sam output into a sorted, indexed bam file. +scafstats= Write statistics on how many reads mapped to which scaffold to this file. +refstats= Write statistics on how many reads were assigned to which reference to this file. + Unmapped reads whose mate mapped to a reference are considered assigned and will be counted. +nzo=t Only print lines with nonzero coverage. + +***** Notes ***** +Almost all BBMap parameters can be used; run bbmap.sh for more details. +Exceptions include the 'nodisk' flag, which BBSplit does not support. +BBSplit is recommended for fastq and fasta output, not for sam/bam output. +When the reference sequences are shorter than read length, use Seal instead of BBSplit. + +Java Parameters: +-Xmx This will set Java's memory usage, overriding autodetection. + -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify 200 megs. + The max is typically 85% of physical memory. +-eoom This flag will cause the process to exit if an + out-of-memory exception occurs. Requires Java 8u92+. +-da Disable assertions. + +This list is not complete. For more information, please consult /readme.txt +Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. \ No newline at end of file diff --git a/src/bbmap_bbsplit/script.sh b/src/bbmap_bbsplit/script.sh new file mode 100755 index 00000000..ac8542c9 --- /dev/null +++ b/src/bbmap_bbsplit/script.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +set -eo pipefail + +function clean_up { + rm -rf "$tmpdir" +} +trap clean_up EXIT + +unset_if_false=( par_paired par_only_build_index par_interleaved par_untrim par_nzo) + +for var in "${unset_if_false[@]}"; do + if [ -z "${!var}" ]; then + unset $var + fi +done + +if [ ! -d "$par_build" ]; then + IFS=";" read -ra ref_files <<< "$par_ref" + primary_ref="${ref_files[0]}" + refs=() + for file in "${ref_files[@]:1}" + do + name=$(basename "$file" | sed 's/\.[^.]*$//') + refs+=("ref_$name=$file") + done +fi + +if $par_only_build_index; then + if [ ${#refs[@]} -gt 1 ]; then + bbsplit.sh \ + --ref_primary="$primary_ref" \ + "${refs[@]}" \ + path=$par_build + else + echo "ERROR: Please specify at least two reference fasta files." + fi +else + IFS=";" read -ra input <<< "$par_input" + tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX") + index_files='' + if [ -d "$par_build" ]; then + index_files="path=$par_build" + elif [ ${#refs[@]} -gt 0 ]; then + index_files="--ref_primary=$primary_ref ${refs[*]}" + else + echo "ERROR: Please either specify a BBSplit index as input or at least two reference fasta files." + fi + + extra_args="" + if [ -n "$par_refstats" ]; then extra_args+=" --refstats $par_refstats"; fi + if [ -n "$par_ambiguous" ]; then extra_args+=" --ambiguous $par_ambiguous"; fi + if [ -n "$par_ambiguous2" ]; then extra_args+=" --ambiguous2 $par_ambiguous2"; fi + if [ -n "$par_minratio" ]; then extra_args+=" --minratio $par_minratio"; fi + if [ -n "$par_minhits" ]; then extra_args+=" --minhits $par_minhits"; fi + if [ -n "$par_maxindel" ]; then extra_args+=" --maxindel $par_maxindel"; fi + if [ -n "$par_qin" ]; then extra_args+=" --qin $par_qin"; fi + if [ -n "$par_qtrim" ]; then extra_args+=" --qtrim $par_qtrim"; fi + if [ "$par_interleaved" = true ]; then extra_args+=" --interleaved"; fi + if [ "$par_untrim" = true ]; then extra_args+=" --untrim"; fi + if [ "$par_nzo" = true ]; then extra_args+=" --nzo"; fi + + if [ -n "$par_bbmap_args" ]; then extra_args+=" $par_bbmap_args"; fi + + + if $par_paired; then + bbsplit.sh \ + $index_files \ + in=${input[0]} \ + in2=${input[1]} \ + basename=${tmpdir}/%_#.fastq \ + $extra_args + read1=$(find $tmpdir/ -iname primary_1*) + read2=$(find $tmpdir/ -iname primary_2*) + cp $read1 $par_fastq_1 + cp $read2 $par_fastq_2 + else + bbsplit.sh \ + $index_files \ + in=${input[0]} \ + basename=${tmpdir}/%.fastq \ + $extra_args + read1=$(find $tmpdir/ -iname primary*) + cp $read1 $par_fastq_1 + fi +fi + +exit 0 diff --git a/src/bbmap_bbsplit/test.sh b/src/bbmap_bbsplit/test.sh new file mode 100644 index 00000000..1ad7aac2 --- /dev/null +++ b/src/bbmap_bbsplit/test.sh @@ -0,0 +1,145 @@ +#!/bin/bash + +echo ">>> Test $meta_functionality_name" + +echo "> Prepare test data" + +cat > reads_R1.fastq <<'EOF' +@SEQ_ID1 +ACAGGGTTTCACCATGTTGGCCAGG ++ +IIIIIIIIIIIIIIIIIIIIIIIII +@SEQ_ID2 +TCCCAGGTAACAAACCAACCAACTT ++ +!!!!!!!!!!!!!!!!!!!!!!!!! +EOF + +cat > reads_R2.fastq <<'EOF' +@SEQ_ID1 +TACCATTACCCTACCATCCACCATG ++ +IIIIIIIIIIIIIIIIIIIIIIIII +@SEQ_ID2 +CACTCGGCTGCATGCTTAGTGCACT ++ +!!!!!!!!!!!!!!!!!!!!!!!!! +EOF + +cat > genome.fasta <<'EOF' +>I +AGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTGGTCTTGATCTCCTGACCTCAGGTGATCCATCCGCCT +TGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCACCTGGCCTGGTTTCGAACTCTTGACCTCAGGTGGTCTG +CCCATCTTGACCTTCCAAAGTGCTGGAGCTACAGGCATGAGCCACTGCACCTGGTGCTTTTGGTAAAAGCAACCTGGAAT +CAAATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTT +TAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGAC +EOF + +cat > human.fa <<'EOF' +>human +AGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTGGTCTTGATCTCCTGACCTCAGGTGATCCATCCGCCT +TGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCACCTGGCCTGGTTTCGAACTCTTGACCTCAGGTGGTCTG +CCCATCTTGACCTTCCAAAGTGCTGGAGCTACAGGCATGAGCCACTGCACCTGGTGCTTTTGGTAAAAGCAACCTGGAAT +EOF + +cat > sarscov2.fa <<'EOF' +>sarscov2 +ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAA +AATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGG +ACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT +EOF + +#################################################################################################### + +echo ">>> Building BBSplit index" +"${meta_executable}" \ + --ref "genome.fasta;human.fa;sarscov2.fa" \ + --only_build_index \ + --build "BBSplit_index" + +echo ">>> Check whether output exists" +[ ! -d "BBSplit_index" ] && echo "BBSplit index does not exist!" && exit 1 +[ -z "$(ls -A 'BBSplit_index')" ] && echo "BBSplit index is empty!" && exit 1 + +#################################################################################################### + + +echo ">>> Testing with single-end reads and primary/non-primary FASTA files" +"${meta_executable}" \ + --input "reads_R1.fastq" \ + --ref "genome.fasta;human.fa;sarscov2.fa" \ + --fastq_1 "filtered_reads_R1.fastq" + +echo ">>> Check whether output exists" +[ ! -f "filtered_reads_R1.fastq" ] && echo "Filtered reads file does not exist!" && exit 1 +[ ! -s "filtered_reads_R1.fastq" ] && echo "Filtered reads file is empty!" && exit 1 + +echo ">>> Check whether output is correct" +grep -q "ACAGGGTTTCACCATGTTGGCCAGG" filtered_reads_R1.fastq || { echo "Filtered reads file does not contain expected sequence!"; exit 1; } + +rm filtered_reads_R1.fastq + +#################################################################################################### + +echo ">>> Testing with paired-end reads and primary/non-primary FASTA files" +"${meta_executable}" \ + --paired \ + --input "reads_R1.fastq;reads_R2.fastq" \ + --ref "genome.fasta;human.fa;sarscov2.fa" \ + --fastq_1 "filtered_reads_R1.fastq" \ + --fastq_2 "filtered_reads_R2.fastq" + +echo ">>> Check whether output exists" +[ ! -f "filtered_reads_R1.fastq" ] && echo "Filtered read 1 file does not exist!" && exit 1 +[ ! -s "filtered_reads_R1.fastq" ] && echo "Filtered read 1 file is empty!" && exit 1 +[ ! -f "filtered_reads_R2.fastq" ] && echo "Filtered read 2 file does not exist!" && exit 1 +[ ! -s "filtered_reads_R2.fastq" ] && echo "Filtered read 2 file is empty!" && exit 1 + +echo ">>> Check whether output is correct" +grep -q "ACAGGGTTTCACCATGTTGGCCAGG" filtered_reads_R1.fastq || { echo "Filtered read 1 file does not contain expected sequence!"; exit 1; } +grep -q "TACCATTACCCTACCATCCACCATG" filtered_reads_R2.fastq || { echo "Filtered read 2 file does not contain expected sequence!"; exit 1; } + +rm filtered_reads_R1.fastq filtered_reads_R2.fastq + +#################################################################################################### + +echo ">>> Testing with single-end reads and BBSplit index" +"${meta_executable}" \ + --input "reads_R1.fastq" \ + --build "BBSplit_index" \ + --fastq_1 "filtered_reads_R1.fastq" + +echo ">>> Check whether output exists" +[ ! -f "filtered_reads_R1.fastq" ] && echo "Filtered reads file does not exist!" && exit 1 +[ ! -s "filtered_reads_R1.fastq" ] && echo "Filtered reads file is empty!" && exit 1 + +echo ">>> Check whether output is correct" +grep -q "ACAGGGTTTCACCATGTTGGCCAGG" filtered_reads_R1.fastq || { echo "Filtered reads file does not contain expected sequence!"; exit 1; } + +rm filtered_reads_R1.fastq + +#################################################################################################### + +echo ">>> Testing with paired-end reads and BBSplit index" +"${meta_executable}" \ + --paired \ + --input "reads_R1.fastq;reads_R2.fastq" \ + --build "BBSplit_index" \ + --fastq_1 "filtered_reads_R1.fastq" \ + --fastq_2 "filtered_reads_R2.fastq" + +echo ">>> Check whether output exists" +[ ! -f "filtered_reads_R1.fastq" ] && echo "Filtered read 1 file does not exist!" && exit 1 +[ ! -s "filtered_reads_R1.fastq" ] && echo "Filtered read 1 file is empty!" && exit 1 +[ ! -f "filtered_reads_R2.fastq" ] && echo "Filtered read 2 file does not exist!" && exit 1 +[ ! -s "filtered_reads_R2.fastq" ] && echo "Filtered read 2 file is empty!" && exit 1 + + +echo ">>> Check whether output is correct" +grep -q "ACAGGGTTTCACCATGTTGGCCAGG" filtered_reads_R1.fastq || { echo "Filtered read 1 file does not contain expected sequence!"; exit 1; } +grep -q "TACCATTACCCTACCATCCACCATG" filtered_reads_R2.fastq || { echo "Filtered read 2 file does not contain expected sequence!"; exit 1; } + +rm filtered_reads_R1.fastq filtered_reads_R2.fastq + +echo "All tests succeeded!" +exit 0 \ No newline at end of file From 6e6b13939c9d719f1cd7ff5a91a6562e0a6e2e29 Mon Sep 17 00:00:00 2001 From: Suman Muralidharan <104161349+sumanm99@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:23:03 +0530 Subject: [PATCH 05/15] nanoplot (#95) * nanoplot * test_data * reinitiate * gitignore * namespace * Testing NanoPlot in CLI * NanoPlot complete * Updated docker engine * Docker * Delete taget directory * Deleted * Input file * fastq with more reads * Delete config.vsh.yaml * Pull request changes * Delete var directory * Config arguments complete * Update help.txt * Update config file * Test files * runners script * gitignore default * Move output * Delete output directory * Runners script complete * Test script * default output * test data * params passed correctly * outdir * test script * input files * all test files * test data < 100 KB * test script update * Update CHANGELOG.md * Update CHANGELOG.md * Test cases in directories * rm .gz .pickle .feather files * reduce test input size * Multiple separator ";" and check there is only one input file --------- Co-authored-by: jakubmajercik Co-authored-by: Emma Rousseau --- CHANGELOG.md | 4 +- src/nanoplot/config.vsh.yaml | 230 +++++++++++ src/nanoplot/help.txt | 96 +++++ src/nanoplot/script.sh | 129 ++++++ src/nanoplot/test.sh | 549 +++++++++++++++++++++++++ src/nanoplot/test_data/script.sh | 102 +++++ src/nanoplot/test_data/summary.txt | 51 +++ src/nanoplot/test_data/test.bam | Bin 0 -> 2752 bytes src/nanoplot/test_data/test.bam.bai | Bin 0 -> 96 bytes src/nanoplot/test_data/test.fasta | 35 ++ src/nanoplot/test_data/test1.fastq | 49 +++ src/nanoplot/test_data/test2.fastq | 34 ++ src/nanoplot/test_data/test_rich.fastq | 40 ++ 13 files changed, 1317 insertions(+), 2 deletions(-) create mode 100644 src/nanoplot/config.vsh.yaml create mode 100644 src/nanoplot/help.txt create mode 100644 src/nanoplot/script.sh create mode 100644 src/nanoplot/test.sh create mode 100644 src/nanoplot/test_data/script.sh create mode 100644 src/nanoplot/test_data/summary.txt create mode 100644 src/nanoplot/test_data/test.bam create mode 100644 src/nanoplot/test_data/test.bam.bai create mode 100644 src/nanoplot/test_data/test.fasta create mode 100644 src/nanoplot/test_data/test1.fastq create mode 100644 src/nanoplot/test_data/test2.fastq create mode 100644 src/nanoplot/test_data/test_rich.fastq diff --git a/CHANGELOG.md b/CHANGELOG.md index 16e79693..9e59f784 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ * `rsem/rsem_calculate_expression`: Calculate expression levels (PR #93). +* `nanoplot`: Plotting tool for long read sequencing data and alignments (PR #95). + ## BREAKING CHANGES * `falco`: Fix a typo in the `--reverse_complement` argument (PR #157). @@ -189,8 +191,6 @@ - `bbmap_bbsplit`: Split sequencing reads by mapping them to multiple references simultaneously (PR #138). - - ## MINOR CHANGES * Uniformize component metadata (PR #23). diff --git a/src/nanoplot/config.vsh.yaml b/src/nanoplot/config.vsh.yaml new file mode 100644 index 00000000..1c22775f --- /dev/null +++ b/src/nanoplot/config.vsh.yaml @@ -0,0 +1,230 @@ +name: nanoplot +description: | + Run NanoPlot on nanopore-sequenced reads. + NanoPlot is a plotting tool for long read sequencing data and alignments. +keywords: ["fastq", "sequencing summary", "nanopore"] +links: + repository: https://github.com/wdecoster/NanoPlot + homepage: http://nanoplot.bioinf.be/ + documentation: https://github.com/wdecoster/NanoPlot +references: + doi: 10.1093/bioinformatics/btad311 +license: MIT +argument_groups: + - name: Inputs + arguments: + - name: --fastq + type: file + description: Input fastq file(s), separated by ";". + example: read.fq + direction: input + multiple: true + - name: --fasta + type: file + description: Input fasta file(s), separated by ";". + example: read.fa + direction: input + multiple: true + - name: --fastq_rich + type: file + description: | + Input fastq file(s) generated by albacore or + MinKNOW with additional information concerning channel and time, separated by ";". + example: read.fq + direction: input + multiple: true + - name: --fastq_minimal + type: file + description: | + Input fastq file(s) generated by albacore or MinKNOW with + additional information concerning channel and time. Minimal data is extracted + swiftly without elaborate checks. Separated by ";". + example: read.fq + direction: input + multiple: true + - name: --summary + type: file + description: | + Input summary file(s) generated by albacore or guppy, separated by ";". + example: read.txt + direction: input + multiple: true + - name: --bam + type: file + description: Input sorted bam file(s), separated by ";". + example: read.bam + direction: input + multiple: true + - name: --ubam + type: file + description: Input unmapped bam file(s), separated by ";". + example: read.ubam + direction: input + multiple: true + - name: --cram + type: file + description: Input sorted cram file(s), separated by ";". + example: read.cram + direction: input + multiple: true + - name: --pickle + type: file + description: Input pickle file stored earlier, separated by ";". + example: read.pkl + direction: input + multiple: true + - name: --feather + alternatives: [--arrow] + type: file + description: Input feather file(s), separated by ";". + example: read.arrow + direction: input + multiple: true + - name: Outputs + arguments: + - name: --outdir + alternatives: [-o] + type: file + direction: output + description: Specify directory in which output has to be created. + required: true + - name: Options + arguments: + - name: --verbose + type: boolean_true + description: Write log messages also to terminal + - name: --store + type: boolean_true + description: Store the extracted data in a pickle file for future plotting. + - name: --raw + type: boolean_true + description: Store the extracted data in tab separated file. + - name: --huge + type: boolean_true + description: Input data is one very large file. + - name: --no_static + type: boolean_false + description: Do not make static (png) plots. + - name: --prefix + alternatives: [-p] + type: string + description: Specify an optional prefix to be used for the output files. + - name: --tsv_stats + type: boolean_true + description: Output the stats file as a properly formatted TSV. + - name: --only_report + type: boolean_true + description: Output only the report. + - name: --info_in_report + type: boolean_true + description: Add NanoPlot run info in the report. + - name: Filtering or transforming input + arguments: + - name: --maxlength + type: integer + description: Drop reads longer than length specified. + - name: --minlength + type: integer + description: Drop reads shorter than length specified. + - name: --drop_outliers + type: boolean_false + description: Drop outlier reads with extreme long length. + - name: --downsample + type: integer + description: Reduce dataset to N reads by random sampling. + - name: --loglength + type: boolean_true + description: Logarithmic scaling of lengths in plots. + - name: --percentqual + type: boolean_true + description: Use qualities as theoretical percent identities. + - name: --alength + type: boolean_true + description: Use aligned read lengths rather than sequenced length (bam mode). + - name: --minqual + type: integer + description: Drop reads with an average quality lower than specified. + - name: --runtime_until + type: integer + description: Only take the N first hours of a run. + - name: --readtype + type: string + description: | + Which read type to extract information about from summary. + Options are 1D, 2D, 1D2 + - name: --barcoded + type: boolean_true + description: Use if you want to split the summary file by barcode. + - name: --no_supplementary + type: boolean_false + description: Use if you want to remove supplementary alignments. + - name: Customizing plots + arguments: + - name: --color + alternatives: [-c] + type: string + description: Specify a color for the plots, must be a valid matplotlib color. + - name: --colormap + alternatives: [-cm] + type: string + description: Specify a valid matplotlib colormap for the heatmap. + - name: --format + alternatives: [-f] + type: string + default: png + description: | + Specify the output format of the plots. + {eps,jpeg,jpg,pdf,pgf,png,ps,raw,rgba,svg,svgz,tif,tiff} + - name: --plots + type: string + description: | + Specify which bivariate plots have to be made. + [{kde,hex,dot} ...] + - name: --legacy + type: string + description: | + Specify which bivariate plots have to be made (legacy mode). + [{kde,dot,hex} ...] + - name: --listcolors + type: boolean_true + description: List the colors which are available for plotting and exit. + - name: --listcolormaps + type: boolean_true + description: List the colormaps which are available for plotting and exit. + - name: --no_N50 + type: boolean_false + description: Hide the N50 mark in the read length histogram. + - name: --N50 + type: boolean_true + description: Show the N50 mark in the read length histogram. + - name: --title + type: string + description: Add a title to all plots, requires quoting if using spaces. + - name: --font_scale + type: double + description: Scale the font of the plots by a factor. + - name: --dpi + type: integer + description: Set the dpi for saving images. + - name: --hide_stats + type: boolean_false + description: Not adding Pearson R stats in some bivariate plots. +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/nanoplot:1.43.0--pyhdfd78af_1 + setup: + - type: docker + run: | + version=$(NanoPlot --version) && \ + echo "$version" > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/nanoplot/help.txt b/src/nanoplot/help.txt new file mode 100644 index 00000000..79869392 --- /dev/null +++ b/src/nanoplot/help.txt @@ -0,0 +1,96 @@ +usage: NanoPlot [-h] [-v] [-t THREADS] [--verbose] [--store] [--raw] [--huge] + [-o OUTDIR] [--no_static] [-p PREFIX] [--tsv_stats] + [--only-report] [--info_in_report] [--maxlength N] + [--minlength N] [--drop_outliers] [--downsample N] + [--loglength] [--percentqual] [--alength] [--minqual N] + [--runtime_until N] [--readtype {1D,2D,1D2}] [--barcoded] + [--no_supplementary] [-c COLOR] [-cm COLORMAP] + [-f [{png,jpg,jpeg,webp,svg,pdf,eps,json} ...]] + [--plots [{kde,hex,dot} ...]] [--legacy [{kde,dot,hex} ...]] + [--listcolors] [--listcolormaps] [--no-N50] [--N50] + [--title TITLE] [--font_scale FONT_SCALE] [--dpi DPI] + [--hide_stats] + (--fastq file [file ...] | --fasta file [file ...] | --fastq_rich file [file ...] | --fastq_minimal file [file ...] | --summary file [file ...] | --bam file [file ...] | --ubam file [file ...] | --cram file [file ...] | --pickle pickle | --feather file [file ...]) + +CREATES VARIOUS PLOTS FOR LONG READ SEQUENCING DATA. + +General options: + -h, --help show the help and exit + -v, --version Print version and exit. + -t, --threads THREADS + Set the allowed number of threads to be used by the script + --verbose Write log messages also to terminal. + --store Store the extracted data in a pickle file for future plotting. + --raw Store the extracted data in tab separated file. + --huge Input data is one very large file. + -o, --outdir OUTDIR Specify directory in which output has to be created. + --no_static Do not make static (png) plots. + -p, --prefix PREFIX Specify an optional prefix to be used for the output files. + --tsv_stats Output the stats file as a properly formatted TSV. + --only-report Output only the report + --info_in_report Add NanoPlot run info in the report. + +Options for filtering or transforming input prior to plotting: + --maxlength N Hide reads longer than length specified. + --minlength N Hide reads shorter than length specified. + --drop_outliers Drop outlier reads with extreme long length. + --downsample N Reduce dataset to N reads by random sampling. + --loglength Additionally show logarithmic scaling of lengths in plots. + --percentqual Use qualities as theoretical percent identities. + --alength Use aligned read lengths rather than sequenced length (bam mode) + --minqual N Drop reads with an average quality lower than specified. + --runtime_until N Only take the N first hours of a run + --readtype {1D,2D,1D2} + Which read type to extract information about from summary. Options are 1D, 2D, + 1D2 + --barcoded Use if you want to split the summary file by barcode + --no_supplementary Use if you want to remove supplementary alignments + +Options for customizing the plots created: + -c, --color COLOR Specify a valid matplotlib color for the plots + -cm, --colormap COLORMAP + Specify a valid matplotlib colormap for the heatmap + -f, --format [{png,jpg,jpeg,webp,svg,pdf,eps,json} ...] + Specify the output format of the plots, which are in addition to the html files + --plots [{kde,hex,dot} ...] + Specify which bivariate plots have to be made. + --legacy [{kde,dot,hex} ...] + Specify which bivariate plots have to be made (legacy mode). + --listcolors List the colors which are available for plotting and exit. + --listcolormaps List the colors which are available for plotting and exit. + --no-N50 Hide the N50 mark in the read length histogram + --N50 Show the N50 mark in the read length histogram + --title TITLE Add a title to all plots, requires quoting if using spaces + --font_scale FONT_SCALE + Scale the font of the plots by a factor + --dpi DPI Set the dpi for saving images + --hide_stats Not adding Pearson R stats in some bivariate plots + +Input data sources, one of these is required.: + --fastq file [file ...] + Data is in one or more default fastq file(s). + --fasta file [file ...] + Data is in one or more fasta file(s). + --fastq_rich file [file ...] + Data is in one or more fastq file(s) generated by albacore, MinKNOW or guppy + with additional information concerning channel and time. + --fastq_minimal file [file ...] + Data is in one or more fastq file(s) generated by albacore, MinKNOW or guppy + with additional information concerning channel and time. Is extracted swiftly + without elaborate checks. + --summary file [file ...] + Data is in one or more summary file(s) generated by albacore or guppy. + --bam file [file ...] + Data is in one or more sorted bam file(s). + --ubam file [file ...] + Data is in one or more unmapped bam file(s). + --cram file [file ...] + Data is in one or more sorted cram file(s). + --pickle pickle Data is a pickle file stored earlier. + --feather, --arrow file [file ...] + Data is in one or more feather file(s). + +EXAMPLES: + NanoPlot --summary sequencing_summary.txt --loglength -o summary-plots-log-transformed + NanoPlot -t 2 --fastq reads1.fastq.gz reads2.fastq.gz --maxlength 40000 --plots hex dot + NanoPlot --color yellow --bam alignment1.bam alignment2.bam alignment3.bam --downsample 10000 \ No newline at end of file diff --git a/src/nanoplot/script.sh b/src/nanoplot/script.sh new file mode 100644 index 00000000..fc198e89 --- /dev/null +++ b/src/nanoplot/script.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +# Unset flags +unset_if_false=( + par_verbose + par_store + par_raw + par_huge + par_no_static + par_tsv_stats + par_only_report + par_info_in_report + par_drop_outliers + par_loglength + par_percentqual + par_alength + par_barcoded + par_no_supplementary + par_listcolors + par_listcolormaps + par_no_N50 + par_N50 + par_hide_stats +) + +for var in "${unset_if_false[@]}"; do + test_val="${!var}" + [[ "$test_val" == "false" ]] && unset $var +done + +par_fastq="${par_fastq//;/ }" +par_fasta="${par_fasta//;/ }" +par_fastq_rich="${par_fastq_rich//;/ }" +par_fastq_minimal="${par_fastq_minimal//;/ }" +par_summary="${par_summary//;/ }" +par_bam="${par_bam//;/ }" +par_ubam="${par_ubam//;/ }" +par_cram="${par_cram//;/ }" +par_pickle="${par_pickle//;/ }" +par_feather="${par_feather//;/ }" + + +inputs=( + "$par_fastq" + "$par_fasta" + "$par_fastq_rich" + "$par_fastq_minimal" + "$par_summary" + "$par_bam" + "$par_ubam" + "$par_cram" + "$par_pickle" + "$par_feather" +) + +one_input=false +for var in "${inputs[@]}"; do + if [ -n "$var" ]; then # if the parameter is not empty + if [ "$one_input" = "false" ]; then + one_input=true + else # Multiple input file types specified + echo "Error: Multiple input file types specified." + exit 1 + fi + fi +done + +if [ ! "$one_input" ]; then + echo "Error: No input file type specified." + exit 1 +fi + + + +# Run NanoPlot +NanoPlot \ + ${par_fastq:+--fastq $par_fastq} \ + ${par_fasta:+--fasta $par_fasta} \ + ${par_fastq_rich:+--fastq_rich $par_fastq_rich} \ + ${par_fastq_minimal:+--fastq_minimal $par_fastq_minimal} \ + ${par_summary:+--summary $par_summary} \ + ${par_bam:+--bam $par_bam} \ + ${par_ubam:+--ubam $par_ubam} \ + ${par_cram:+--cram $par_cram} \ + ${par_pickle:+--pickle $par_pickle} \ + ${par_feather:+--feather $par_feather} \ + ${par_verbose:+--verbose} \ + ${par_store:+--store} \ + ${par_raw:+--raw} \ + ${par_huge:+--huge} \ + ${par_no_static:+--no_static} \ + ${par_prefix:+--prefix "$par_prefix"} \ + ${par_tsv_stats:+--tsv_stats} \ + ${par_only_report:+--only-report} \ + ${par_info_in_report:+--info_in_report} \ + ${par_maxlength:+--maxlength "$par_maxlength"} \ + ${par_minlength:+--minlength "$par_minlength"} \ + ${par_drop_outliers:+--drop_outliers} \ + ${par_downsample:+--downsample "$par_downsample"} \ + ${par_loglength:+--loglength} \ + ${par_percentqual:+--percentqual} \ + ${par_alength:+--alength} \ + ${par_minqual:+--minqual "$par_minqual"} \ + ${par_runtime_until:+--runtime_until "$par_runtime_until"} \ + ${par_readtype:+--readtype "$par_readtype"} \ + ${par_barcoded:+--barcoded} \ + ${par_no_supplementary:+--no_supplementary} \ + ${par_color:+--color "$par_color"} \ + ${par_colormap:+--colormap "$par_colormap"} \ + ${par_format:+--format "$par_format"} \ + ${par_plots:+--plots "$par_plots"} \ + ${par_legacy:+--legacy "$par_legacy"} \ + ${par_listcolors:+--listcolors} \ + ${par_listcolormaps:+--listcolormaps} \ + ${par_no_N50:+--no-N50} \ + ${par_N50:+--N50} \ + ${par_title:+--title "$par_title"} \ + ${par_font_scale:+--font_scale "$par_font_scale"} \ + ${par_dpi:+--dpi "$par_dpi"} \ + ${par_hide_stats:+--hide_stats} \ + ${meta_cpus:+--threads "$meta_cpus"} \ + --outdir "$par_outdir" + +exit 0 diff --git a/src/nanoplot/test.sh b/src/nanoplot/test.sh new file mode 100644 index 00000000..cac10c17 --- /dev/null +++ b/src/nanoplot/test.sh @@ -0,0 +1,549 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +# Files at runtime (.gz, .pickle and .feather) +wget https://github.com/wdecoster/nanotest/archive/refs/heads/master.zip +unzip master.zip + +########################################################################### + +# Test 1: Run NanoPlot with only input parameter (Fastq) + +mkdir test1 +pushd test1 > /dev/null # cd test1 (stack) + +echo "> Run Test 1: one input (Fastq)" +"$meta_executable" \ + --fastq "$meta_resources_dir/test_data/test1.fastq" \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -lt 1 ]; then # Apart from log file + echo "Output files are not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find output -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi + +popd > /dev/null # Remove directory from stack (LIFO) + +echo "Test 1 succeeded." + +########################################################################### + +# Test 2: Run NanoPlot with multiple inputs (Fastq) + +mkdir test2 +pushd test2 > /dev/null + +echo "> Run Test 2: multiple inputs (Fastq)" +"$meta_executable" \ + --fastq "$meta_resources_dir/test_data/test1.fastq;$meta_resources_dir/test_data/test2.fastq" \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -lt 1 ]; then + echo "Output files are not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find output -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi + +popd > /dev/null + +echo "Test 2 succeeded." + +########################################################################### + +# Test 3: Run NanoPlot with multiple options-1 + +mkdir test3 +pushd test3 > /dev/null + +echo "> Run Test 3: multiple options-1" +"$meta_executable" \ + --fastq "$meta_resources_dir/test_data/test1.fastq" \ + --maxlength 40000 \ + --format jpg \ + --prefix biobox_ \ + --store \ + --color "yellow" \ + --info_in_report \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -lt 1 ]; then + echo "Output files are not found!" + exit 1 +fi + +# Check if the extracted data exists (--store) +if ! ls output/*.pickle > /dev/null 2>&1; then + echo "Extracted data is not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find output -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi +if find output -name "*.pickle" -type f -size 0 | grep -q .; then + echo "Extracted data is empty." + exit 1 +fi + +# Check if the output file starts with "biobox" prefix +if ! ls output/biobox* > /dev/null 2>&1; then + echo "The prefix is not added to the output files." + exit 1 +fi + +popd > /dev/null + +echo "Test 3 succeeded." + +########################################################################### + +# Test 4: Run NanoPlot with multiple options-2 + +mkdir test4 +pushd test4 > /dev/null + +echo "> Run Test 4: multiple options-2" +"$meta_executable" \ + --fastq "$meta_resources_dir/test_data/test1.fastq" \ + --maxlength 40000 \ + --only_report \ + --raw \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -ne 4 ]; then # 4 output files + echo "Output files are not found!" + exit 1 +fi + +# Check if the extracted data exists (--raw) +if ! ls output/*.tsv.gz > /dev/null 2>&1; then + echo "Extracted data is not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "NanoPlot-report.html" -type f -size 0 | grep -q .; then + echo "NanoPlot report is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi +if find output -name "*.tsv.gz" -type f -size 0 | grep -q .; then + echo "Extracted data is empty." + exit 1 +fi + +popd > /dev/null + +echo "Test 4 succeeded." + +########################################################################### + +# Test 5: Run NanoPlot with different input (Fasta) + +mkdir test5 +pushd test5 > /dev/null + +echo "> Run Test 5: Input Fasta" +"$meta_executable" \ + --fasta "$meta_resources_dir/test_data/test.fasta" \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -lt 1 ]; then # Apart from log file + echo "Output files are not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find output -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi + +popd > /dev/null + +echo "Test 5 succeeded." + +########################################################################### + +# Test 6: Run NanoPlot with different input (Fastq_rich) + +mkdir test6 +pushd test6 > /dev/null + +echo "> Run Test 6: Input Fastq_rich" +"$meta_executable" \ + --fastq_rich "$meta_resources_dir/test_data/test_rich.fastq" \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -lt 1 ]; then # Apart from log file + echo "Output files are not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find output -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi + +popd > /dev/null + +echo "Test 6 succeeded." + +########################################################################### + +# Test 7: Run NanoPlot with different input (Fastq_minimal) + +mkdir test7 +pushd test7 > /dev/null + +echo "> Run Test 7: Input Fasta" +"$meta_executable" \ + --fastq_minimal "../nanotest-master/reads.fastq.gz" \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -lt 1 ]; then # Apart from log file + echo "Output files are not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find output -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi + +popd > /dev/null + +echo "Test 7 succeeded." + +########################################################################### + +# Test 8: Run NanoPlot with different input (Summary) + +mkdir test8 +pushd test8 > /dev/null + +echo "> Run Test 8: Input Summary" +"$meta_executable" \ + --summary "$meta_resources_dir/test_data/summary.txt" \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -lt 1 ]; then # Apart from log file + echo "Output files are not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find output -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi + +popd > /dev/null + +echo "Test 8 succeeded." + +########################################################################### + +# Test 9: Run NanoPlot with different input (BAM) + +mkdir test9 +pushd test9 > /dev/null + +echo "> Run Test 9: Input BAM" +"$meta_executable" \ + --bam "$meta_resources_dir/test_data/test.bam" \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -lt 1 ]; then # Apart from log file + echo "Output files are not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find output -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi + +popd > /dev/null + +echo "Test 9 succeeded." + +########################################################################### + +# Test 10: Run NanoPlot with different input (pickle) + +mkdir test10 +pushd test10 > /dev/null + +echo "> Run Test 10: Input pickle" +"$meta_executable" \ + --pickle "../nanotest-master/alignment.pickle" \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -lt 1 ]; then # Apart from log file + echo "Output files are not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find output -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi + +popd > /dev/null + +echo "Test 10 succeeded." + +########################################################################### + +# Test 11: Run NanoPlot with different input (feather) + +mkdir test11 +pushd test11 > /dev/null + +echo "> Run Test 11: Input feather" +"$meta_executable" \ + --arrow "../nanotest-master/summary1.feather" \ + --outdir output + +# Check if output directory exists +if [[ ! -d output ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "output" | wc -l)" -lt 1 ]; then # Apart from log file + echo "Output files are not found!" + exit 1 +fi + +# Check if files are empty +if find output -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find output -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find output -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi + +popd > /dev/null + +echo "Test 11 succeeded." + +########################################################################### + +# Test 12: Run NanoPlot with different output directory + +mkdir test12 +pushd test12 > /dev/null + +echo "> Run Test 12: different output directory" +"$meta_executable" \ + --fastq "$meta_resources_dir/test_data/test1.fastq" \ + --outdir out + +# Check if output directory exists +if [[ ! -d out ]]; then + echo "Output directory not found!" + exit 1 +fi + +# Check if output files are generated +if [ "$(ls -1 "out" | wc -l)" -lt 1 ]; then + echo "Output files are not found!" + exit 1 +fi + +# Check if files are empty +if find out -name "*.html" -type f -size 0 | grep -q .; then + echo "At least one HTML file is empty." + exit 1 +fi +if find out -name "*.png" -type f -size 0 | grep -q .; then + echo "At least one plot is empty." + exit 1 +fi +if find out -name "*.txt" -type f -size 0 | grep -q .; then + echo "NanoPlot summary file is empty." + exit 1 +fi + +popd > /dev/null + +echo "Test 12 succeeded." + +########################################################################### + +echo "All tests successfully completed!" \ No newline at end of file diff --git a/src/nanoplot/test_data/script.sh b/src/nanoplot/test_data/script.sh new file mode 100644 index 00000000..9bb6ffd6 --- /dev/null +++ b/src/nanoplot/test_data/script.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +## Fastq file ## +# Define the number of reads +NUM_READS=10 +OUTPUT_FILE="./src/nanoplot/test_data/test1.fastq" + +# Function to generate a random DNA sequence of given length +generate_sequence() { + local length=$1 #assigns it the value of the first argument passed to the function + cat /dev/urandom | tr -dc 'ACGT' | fold -w $length | head -n 1 +} + +# Function to generate random quality scores of given length +generate_quality() { + local length=$1 + local average_quality=$2 + local quality="" + for ((i=0; i $OUTPUT_FILE #Create the fastq file +for i in $(seq 1 $NUM_READS); do + # Randomly determine the read length (between 20 and 100 bases) + read_length=$(shuf -i 20-100 -n 1) + # Randomly determine the average quality (between 30 and 40) + average_quality=$(shuf -i 0-40 -n 1) + sequence=$(generate_sequence $read_length) + quality=$(generate_quality $read_length $average_quality) + echo "@read_$i" >> $OUTPUT_FILE + echo $sequence >> $OUTPUT_FILE + echo "+" >> $OUTPUT_FILE + echo $quality >> $OUTPUT_FILE + echo >> $OUTPUT_FILE # Add a blank line between reads +done + +NUM_READS=7 +OUTPUT_FILE="./src/nanoplot/test_data/test2.fastq" +echo -n "" > $OUTPUT_FILE #Create another fastq file +for i in $(seq 1 $NUM_READS); do + # Randomly determine the read length (between 20 and 100 bases) + read_length=$(shuf -i 20-100 -n 1) + # Randomly determine the average quality (between 30 and 40) + average_quality=$(shuf -i 0-40 -n 1) + sequence=$(generate_sequence $read_length) + quality=$(generate_quality $read_length $average_quality) + echo "@read_$i" >> $OUTPUT_FILE + echo $sequence >> $OUTPUT_FILE + echo "+" >> $OUTPUT_FILE + echo $quality >> $OUTPUT_FILE + echo >> $OUTPUT_FILE # Add a blank line between reads +done + +######################################################################################### + +## Fasta file ## +wget -O src/nanoplot/test_data/test.fasta https://raw.githubusercontent.com/merenlab/reads-for-assembly/master/examples/files/fasta_01.fa +# reduced the size of each sequence to ~300 bp. + +######################################################################################### + +## Fastq_rich file ## +wget -O src/nanoplot/test_data/test_rich.fastq.gz https://github.com/epi2me-labs/fastcat/raw/master/test/data/bc0.fastq.gz + +# Unzip file +gunzip -c src/nanoplot/test_data/test_rich.fastq.gz > src/nanoplot/test_data/test_rich.fastq + +rm src/nanoplot/test_data/test_rich.fastq.gz + +######################################################################################### + +## Summary file ## +if [ ! -d nanotest ]; then + git clone --depth 1 --single-branch --branch master https://github.com/wdecoster/nanotest/ +fi + +mv nanotest/sequencing_summary.txt src/nanoplot/test_data/test_summary.txt +# reduce to first 101 lines +head -n 51 src/nanoplot/test_data/test_summary.txt > src/nanoplot/test_data/summary.txt + +rm -rf nanotest + +######################################################################################### + +## BAM file ## +if [ ! -d /tmp/snakemake-wrappers ]; then + git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers +fi + +cp /tmp/snakemake-wrappers/bio/biobambam2/bamsormadup/test/mapped/a.bam src/nanoplot/test_data/test.bam + +# samtools view -h test.bam | head -n 44 > test_sm.sam +# samtools view -bS test_sm.sam > test_sm.bam +# samtools index test_sm.bam +# rm test.bam +# mv test_sm.bam test.bam +# mv test_sm.bam.bai test.bam.bai +# rm test_sm.sam \ No newline at end of file diff --git a/src/nanoplot/test_data/summary.txt b/src/nanoplot/test_data/summary.txt new file mode 100644 index 00000000..b566d6ec --- /dev/null +++ b/src/nanoplot/test_data/summary.txt @@ -0,0 +1,51 @@ +filename read_id run_id channel start_time duration num_events passes_filtering template_start num_events_template template_duration num_called_template sequence_length_template mean_qscore_template strand_score_template +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch124_read148_strand.fast5 170fb1c5-979b-4df7-864f-c5c14689a14c b5e83402e47ea9927694cb6e80d61180dfc8a49a 124 3733.02575 22.56375 12875 True 0.031 12875 22.53275 12875 8242 10.049 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch320_read27_strand.fast5 6d0956c2-c161-48f4-b2fa-142ca872406f b5e83402e47ea9927694cb6e80d61180dfc8a49a 320 1826.8425 123.37625 34771 True 62.52675 34771 60.8495 34771 16881 11.164 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch496_read2_strand.fast5 e9a32f7d-4aa6-4b85-9f76-6764769ad99c b5e83402e47ea9927694cb6e80d61180dfc8a49a 496 7.1315 121.414 52102 True 30.235 52102 91.179 52102 19346 9.822 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch485_read15_strand.fast5 b01da059-de21-4ed3-9eb8-6126ea59cb00 b5e83402e47ea9927694cb6e80d61180dfc8a49a 485 2586.54825 107.53375 36399 True 43.834 36399 63.69975 36399 19861 10.17 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch362_read219_strand.fast5 4d253e4f-2090-4adb-aa3e-16dc5e4d5e55 b5e83402e47ea9927694cb6e80d61180dfc8a49a 362 2720.77225 14.9615 2577 True 10.45175 2577 4.50975 2577 1672 12.663 -0.0004 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch163_read69_strand.fast5 4629b40a-aea4-4c92-9458-0e66ef4ecc17 b5e83402e47ea9927694cb6e80d61180dfc8a49a 163 673.69725 185.45225 95287 True 18.699 95287 166.75325 95287 59133 9.573 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch502_read25_strand.fast5 a8785b36-b442-4de7-9e43-5ddae6e39fdb b5e83402e47ea9927694cb6e80d61180dfc8a49a 502 884.39875 187.91175 83750 True 41.3485 83750 146.56325 83750 55323 11.985 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch355_read19_strand.fast5 436405ef-1e7d-43a5-99b4-929e31897043 b5e83402e47ea9927694cb6e80d61180dfc8a49a 355 571.15325 94.5895 11586 True 74.31375 11586 20.27575 11586 7636 11.865 -0.0003 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch240_read291_strand.fast5 f31d3457-2065-4acf-a9d5-966a4818564c b5e83402e47ea9927694cb6e80d61180dfc8a49a 240 3511.1415 57.23625 19778 True 22.62325 19778 34.613 19778 6176 8.535 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch124_read242_strand.fast5 d67b506a-b026-450d-803e-1e12bd1facaa b5e83402e47ea9927694cb6e80d61180dfc8a49a 124 6315.02775 53.26525 8709 True 38.023 8709 15.24225 8709 5765 12.3 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch217_read62_strand.fast5 68a01ec4-bf8f-4aa4-8763-39cd9a15b8aa b5e83402e47ea9927694cb6e80d61180dfc8a49a 217 3506.43875 16.38525 2944 True 11.23225 2944 5.153 2944 2011 9.229 -0.0007 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch321_read18_strand.fast5 63fcec17-46fd-4cdc-a381-7b09d6f652e9 b5e83402e47ea9927694cb6e80d61180dfc8a49a 321 820.995 47.1295 25668 True 2.21 25668 44.9195 25668 17575 12.18 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch235_read49_strand.fast5 45eb23a8-63d1-4870-9a31-c349836cc728 b5e83402e47ea9927694cb6e80d61180dfc8a49a 235 3662.59625 250.6945 122186 True 36.86825 122186 213.82625 122186 20295 8.707 -0.0003 +nanopore2_20170302_FNFAF09967_MN17024_sequencing_run_170301_MG1655_PC_RAD002_87615_ch150_read334_strand.fast5 1b05de41-d66d-4947-8533-c27bdafeee69 b5e83402e47ea9927694cb6e80d61180dfc8a49a 150 4017.1535 183.56 97579 True 12.79625 97579 170.76375 97579 61111 9.709 -0.0002 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch5_read33_strand.fast5 b5b5833b-9341-4886-9ffd-7dd7f876c009 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 5 142.765 25.96625 9812 True 8.79475 9812 17.1715 9812 225 7.694 -0.0002 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch438_read26_strand.fast5 76a5b578-7c92-458b-9981-437f48b82455 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 438 160.71825 55.85775 31896 True 0.03975 31896 55.818 31896 21845 10.004 -0.0002 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch450_read2842_strand.fast5 26cfa987-1a6d-4137-b4b7-19f84f990bfc 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 450 362.60825 76.74075 43851 True 0.0 43851 76.74075 43851 29248 10.348 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_10881_ch151_read88_strand.fast5 6e2f5cdb-c978-4403-9611-4faaa35722f8 a3f8b1fb56e77905d115a86ef283e1f838d7476d 151 184.193 8.241 4709 True 0.0 4709 8.241 4709 2638 10.235 -0.0004 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch402_read37_strand.fast5 32762878-4ef4-4f27-bfcd-5fe902fb6497 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 402 250.694 77.26225 25086 True 33.3605 25086 43.90175 25086 16574 11.969 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_10881_ch206_read39_strand.fast5 d52c84b1-7a31-4639-b41e-cf5847681395 a3f8b1fb56e77905d115a86ef283e1f838d7476d 206 164.9445 36.5865 20906 True 0.0 20906 36.5865 20906 10700 7.348 -0.0003 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch174_read239_strand.fast5 c61d655a-fa49-4376-a266-d1710fffdc60 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 174 140.031 20.596 11726 True 0.07425 11726 20.52175 11726 5285 7.139 -0.0003 +nanopore2_20170302_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_10881_ch240_read28_strand.fast5 e32e01c1-79ad-4436-96a6-afb4414bccab a3f8b1fb56e77905d115a86ef283e1f838d7476d 240 96.7155 34.78475 3500 True 28.65875 3500 6.126 3500 2284 11.446 -0.0003 +nanopore2_20170302_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_10881_ch461_read3_strand.fast5 d7c4f400-faf1-4574-933c-14cfe563ecdb a3f8b1fb56e77905d115a86ef283e1f838d7476d 461 22.223 40.1695 1803 True 37.0135 1803 3.156 1803 1216 11.478 -0.0006 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch142_read28_strand.fast5 0a779938-c2f0-4fe9-937b-19b8172322b3 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 142 152.8475 63.728 36416 True 0.0 36416 63.728 36416 22419 10.38 -0.0002 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch220_read62_strand.fast5 53d223e3-8341-4fb2-82a9-534b29d917f0 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 220 250.694 22.03525 10606 True 3.47325 10606 18.562 10606 7053 12.447 -0.0003 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch17_read37_strand.fast5 6cd9b908-7d7c-4df2-887b-557631f4ecc4 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 17 320.315 7.64125 4343 True 0.04025 4343 7.601 4343 1726 10.341 -0.0005 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch119_read68_strand.fast5 c1050d07-d676-4f09-bb50-5af9a0d36719 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 119 274.408 2.05275 1157 True 0.02775 1157 2.025 1157 804 11.135 -0.0024 +nanopore2_20170302_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_10881_ch260_read26_strand.fast5 e681ea0c-485a-4170-bb87-13e86878f0d5 a3f8b1fb56e77905d115a86ef283e1f838d7476d 260 280.141 2.97125 1281 True 0.728 1281 2.24325 1281 750 7.439 -0.0013 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch427_read24_strand.fast5 e4208eb0-c817-4512-a0d6-3472748d09a3 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 427 125.59 12.975 7397 True 0.02925 7397 12.94575 7397 4747 12.276 -0.0001 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch507_read3_strand.fast5 cd6e4550-22d9-49e5-8d4a-dc2d54eb78b9 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 507 22.127 64.9935 23188 True 24.41425 23188 40.57925 23188 5082 10.188 -0.0003 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch144_read32_strand.fast5 1ba73b61-7f74-46ce-acbe-643b8946ee07 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 144 147.0335 4.7515 2698 True 0.0285 2698 4.723 2698 1895 10.679 -0.0003 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch222_read21_strand.fast5 a045f9b2-93dd-467f-a7d9-ceb6d72a4f67 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 222 130.9055 1.071 612 True 0.0 612 1.071 612 392 7.268 -0.0036 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch363_read164_strand.fast5 49e5d9e0-b87d-4bb2-867b-fbc6a321bcf8 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 363 431.1165 8.23225 4674 True 0.05125 4674 8.181 4674 3212 11.092 -0.0001 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch170_read40_strand.fast5 7d15ba0b-67c8-4307-961e-5ddeb79b1056 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 170 232.9605 17.50725 9980 True 0.0415 9980 17.46575 9980 5658 10.647 -0.0002 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch410_read30_strand.fast5 a7fc1f72-648d-471e-87f9-e2186b246627 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 410 141.0205 5.52325 3140 True 0.02725 3140 5.496 3140 1913 11.971 -0.0003 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch349_read69_strand.fast5 17df9262-7bf6-4711-bc7d-a0569f473cd3 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 349 307.5495 20.40675 11647 True 0.02425 11647 20.3825 11647 7829 12.098 -0.0004 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch10_read65_strand.fast5 1bc8d128-eed3-41c2-baea-3ca8cd9f0dc9 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 10 250.694 35.269 9451 True 18.72825 9451 16.54075 9451 6468 10.704 -0.0002 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch67_read26_strand.fast5 09437fae-3ba4-40cd-b02a-40b67a067ffe 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 67 127.99425 10.7565 6059 True 0.15325 6059 10.60325 6059 4117 9.926 -0.0004 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch234_read31_strand.fast5 30a2e325-06d5-4c30-843c-153da097c13b 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 234 129.3055 9.26275 5270 True 0.04 5270 9.22275 5270 3704 11.268 -0.0005 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch237_read27_strand.fast5 740be0f7-60f5-4fc5-96d9-225eda8ff83e 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 237 250.6935 35.98925 15850 True 8.251 15850 27.73825 15850 10192 11.631 -0.0002 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch464_read31_strand.fast5 b298c02b-4e8e-4636-b7d2-4920b7e8c292 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 464 157.44275 12.122 6913 True 0.02375 6913 12.09825 6913 4148 10.846 -0.0003 +nanopore2_20170302_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_10881_ch192_read3_strand.fast5 7dd06578-5b15-4485-988f-b039a2d86ead a3f8b1fb56e77905d115a86ef283e1f838d7476d 192 22.223 40.16925 21038 True 3.35275 21038 36.8165 21038 8534 8.957 -0.0003 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch507_read7_strand.fast5 94b3ba2e-2cc3-4a7c-a319-9b1bf976aeff 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 507 98.27225 5.3885 3073 True 0.01025 3073 5.37825 3073 1819 10.48 -0.0006 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch170_read42_strand.fast5 3eec21b1-872f-480b-8d11-daa41209338b 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 170 250.694 77.2625 44150 True 0.0 44150 77.2625 44150 24787 11.046 -0.0002 +nanopore2_20170302_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_10881_ch212_read68_strand.fast5 778f7330-179c-42f3-bdfe-f7c5ccddea01 a3f8b1fb56e77905d115a86ef283e1f838d7476d 212 164.93525 36.59575 20911 True 0.0 20911 36.59575 20911 14734 11.492 -0.0002 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch406_read32_strand.fast5 1592d38b-2bec-4892-8021-1a51507c6327 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 406 250.69425 77.26175 35190 True 15.6785 35190 61.58325 35190 19989 8.682 -0.0002 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch226_read66_strand.fast5 5f428477-799c-443a-986f-2ebd5b84ab18 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 226 351.44525 10.95275 6253 True 0.00925 6253 10.9435 6253 3877 11.287 -0.0004 +nanopore2_20170303_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_26713_ch275_read39_strand.fast5 890ec449-f329-40c8-9e57-f4eb2c358b4c 9ff0fede59c6669aa7f0d860aa73a4f0959d4b99 275 250.69425 8.092 4624 True 0.0 4624 8.092 4624 3122 12.351 -0.0005 +nanopore2_20170302_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_10881_ch466_read71_strand.fast5 db1765d2-0daa-4154-9a6d-6aed0cb13803 a3f8b1fb56e77905d115a86ef283e1f838d7476d 466 217.31975 17.3305 7267 True 4.6125 7267 12.718 7267 4838 11.926 -0.0003 +nanopore2_20170302_FNFAF09967_MN17024_mux_scan_170301_MG1655_PC_RAD002_10881_ch212_read32_strand.fast5 56ab6b26-7b8f-4447-93b8-331d2dea9a99 a3f8b1fb56e77905d115a86ef283e1f838d7476d 212 94.6505 1.855 1048 True 0.02075 1048 1.83425 1048 759 12.249 -0.0014 diff --git a/src/nanoplot/test_data/test.bam b/src/nanoplot/test_data/test.bam new file mode 100644 index 0000000000000000000000000000000000000000..041bceb9ab119e2a6c7e51b4c9811ff5b09adadb GIT binary patch literal 2752 zcmV;x3P1H9iwFb&00000{{{d;LjnNQ0Cmve3W6{Y0O0j2COyPnprb{U?3a~LftfMr zZ_J!2I7e>ei}wJjVS&((lRuWQStRR&XiGlRd z6GkYjbzxfPS4X``VLofEeng(RtjRPZRvHVjP(ugkQ2N&YxtMp})hK8tPV){lZsKz@ zdLe|~Nn!yQCjkHeABzYC000000RIL6LPG)o&IzqrO>-Mr74@)S#X#cNv87gbzkZ*0 zMcI-aNmVRRoT`Myu$fs^X2ezbNeoy#LX>!zPo+*a)Tcya*lD4A|hFGE?H78QgXyVz^6#Wu@ra| zKfU0iQ^~Nk?-I03C@<9MJYNX#rMxj24V}oYYH9 z9fmO8xv50&nmj!m-Ewc+5mYJo%8D+hVkjj<{f0qX9dVLmd=T|yAmXQZ0i>zWa=`|L zgS^0p`_TP;Cv@}mUg$O(0J9kvI$(lh76RfJ{48O^EnY)3^R}{m5bDB%lxH+vSJWF* zONg`Z{7ihp@8(QqK=%>Qt(P}*!uw)(<1N%41A(z94bN{iZUe9FQkVohiO_XYjkj9AzEpg_+}$}}tVVoa z?vYqVNGWICBi@kdn%spkF=viBaoJ268CAKHKH-pSQ<5pL1RrKHSl$KTp~FrmL&dke zSLB&cICtK1RpFvD9B216lD$;z?K+KcU!RNdCxnW0mSBt~qkI3+ys#sK+X+f7{ zRF}!%42BRfs#~THm+XWD-|d-q8gP4|MAH-Qf%lIc;a%?t?|7R$OVurWGq0$17?JeB zB!dq_(Ojw49aMgY%J@O0S||?6Gn?^SjQ7~iye3(A`;(FTu3tkNkL2*xZUC2is_Hjp zy^#UwSPC_pEYa9)w0p~xJYn=+Kfyi-}8hVM^cU@!JqeBT5_ zW#{D){B!mH{qy!i%Cn|V6`HIWPzVtKr-*FQ7l;=!8=#WkGhH=hFSUvar4!o+xR`pC z;T_8e2I4bk$&=>0y4YN=xBmIB9RYp|JG{CJaQGkfGLm~O{8%+8AtKoXs}T@~tSL@d zk{Wvu0_gjY2~ zc*u!H_Ss}jq4Na`IBMu2nfnrHY_W}s@ma2^6~gBHOdi8W*Ow25_m}$%cV&0ugIrM> z7xb7aBq`>bSkEEhfV$bhh~|ob0XJ`~Dnad@6m*M}X7(=%Y)R@Ws9L!$SaG z9YFE&{*`J?Zk3>UTJb5W#Q-(OPPt^T5`!7mBk}_6bxx~A8-{w3^PZ)LoIQeqSugjl zZdF%T4Ko(z)Y2+VwCSR$M8$N?u8?K}-*!xuxvK%JBTdP@RT6s1!SLqui-S5Z3JBh* zJtbO@S~PIt=#(+LNT`Ao)m@GuPFqk#Co_kOW9SWc1JfnVGA-3AhI&?7=ZC{v9ALe% zTLcW5l^=ztDl;>cCkZmoWGI;dSoeta&_v5+A%{>;q!z*Jx2$WSJP3yEIxa& zr#_qStFH$!cz5ff+NwYbga^8@%@<3qcF9JT1ttt#?Vm+eqBxS_)QDPVRitlugAw2A zwN`(>-UHutjlSExyA#?bvkI^R)K+z}G9l|Th(3?}$ixb6K%*(LsXchew_bCqgYkG; z+wjpquceA_v01DAeziyVEiVsh4Jxc$<%^lokdO(vP1Us0Chr|Q*A1N?tQ`Z@*7mH{ zwc+yYxmvV6pXhUw&^}I7qmZO3PXvK}=~oEZLB+^E9HSZIZW4g1I+xiuogl zP|?5-V^fG2)*R$~#=AnXJp#Pt{_rX_uq_{I?ZA3fOWCY|lBv)dmr=%2Sk9d%txsoL z{5~kBXycxq+owD|7~W!j2(xdORm8lH^D~*2(zr)+b|&4l%NsxejkwAiK%vWmLyt(z zGJ|}{0|sZO`F>{qdN+7icXZkD!{NQzSYfMw!4|wiRwt#Y!nVcabq{36o<9<4~Y^DsBlTv3ZVz*F}R1EQ~g+kR=y&ZLH&kGg~KQ zClP)#;Q%zVKr%qN5w*J% zn0PwgW{Stt$l>r-mxuh1iBbV-kxK)H+CM9`L#^Vo9kfz%=BbdIwqQ*+t^H5r8(SRH zlr%vLk+9vt#nEDQy=QY*)k{%PO$};I`amMAbZ=`&tdMf&t&A^kigYU|?VZVoxCk1`wNp;VPJ9U|7NhVt7r0$fJvEKvlsgRJ{;U0AS|^4*&oF literal 0 HcmV?d00001 diff --git a/src/nanoplot/test_data/test.fasta b/src/nanoplot/test_data/test.fasta new file mode 100644 index 00000000..78c66827 --- /dev/null +++ b/src/nanoplot/test_data/test.fasta @@ -0,0 +1,35 @@ +>640612206 slice:0-298 +TTTCTATTTGCCATTCATACCACCTAGTCTCGTTTAAACAGGTCGCGTG +TATAGACCTTGTCCGCCACGTCCGCGAGCTCGTCGCTCCAGCGGTTGGC +GACGATCACGTCGCAGCCGGCCTTGAAGGCCTCCAGGTCGTGCGTGACC +TCGGAGCCGAAAAACTCCGGCGCGTCCAGCGTGGGCTCGTAGACCACCA +CGGGCACGCCCTTGGCTTTCACGCGCTTCATGACGCCCTGGATGGAGCT +CGCGCGGAAGTTGTCGGAGTTGGACTTCATCGTCAGGCGGTACACGCCC +>640612206 slice:15000-15298 +GCTTTTACCTGCGGTTTTAATATCACCAAAATGCCTGTGGTTGAGATCA +TTCAATTCGTCGTAGTAAACCGAAGTACTTTTGTTTGGCTACAAACAGT +ATCGGTATAGGCGATTATGAATATCGCTATAATTTGGATGGTAAAACGA +TTTTCTAGGACAACCGTTCGCCGATGGTAAACGGATGTTGTTTATACAG +CCTGTGTACAACAGATATACTTACATCCTGTGCGTAAAGCCCATGGCCA +GCAGGCCATGATTCTATCGAACTGGACCGTACTATGAGATTGATACACA +>640612206 slice:30000-30298 +GAACCAACAGCGACAGCAGCGTCAACAACGACAGCAGCACCAGGCAAAC +GGCAATGCGCCCAAGCAGCCCCCCACGCACGCTCGAGGCGATCGCGGCC +CCGCGCGCAAGTCCGCCGGCAACAATAAGTCGGGCAAAAAGACGACGCT +CTTTGTCGTCCTGGGTCTAATCGTCATTGTCTATATCGTTGGCGTCGTA +GCATTTTCGCAGGTAGCCTACCCCAACACCATCATCGCCGGCGTCGACG +TCTCGTTCTCTAACGCTTCGTCTGCCGCCACCAAGGTCAACTCGGCTTG +>640612206 slice:45000-45298 +TCCTCGTAGTAGAACGAGAACGCCTCGTCACGCGCGACGGCGATGATGG +GCCGCGCTCCCGCGATCGGCTCAAACCGGTAAGGTTCCTCGCAGATATC +GGGTGCCGTCGCCGCTATTTCGAGCAAGCGGTCGACGTCGACGCTCTTT +TCCACCAGCTCGGCCATCTTATCGATGCGCGCGGAGAGCTGCTCCACCT +CGTCGGCGGTCACAAGCCCCAGATGCCGGCTTTCGAGCGAGAACGCCTC +GTCGGCGGGGATATTCCCCAAAACCGCGACGCCCGTGTGCTTCTCGATC +>640612206 slice:60000-60298 +TCGGCACGCTTAAGGTCCATGAGCTCGTCAATCAGGCGGGCCGTGTCGA +CGCCCTCACCCGAAAGCGCGCGCATCATATTGAGCAGGCAGGAGCGCTC +GGGGCGCAGCGGCTTGTCGTGATATTTGATGAGCAGGCACACGTCGCGC +ACCAGGTCGTGCGAGAGCGCCAGGCGATCCATAATGACGCGCGCTTTCT +TGGCGCCGAGCTCGGGATGACCGTAGAAGTGTCCGCTGCCGGCGTGATC +GACCGTGAAACACTCGGGCTTGGACACATCGTGCAAAAACGCCGCCCAC diff --git a/src/nanoplot/test_data/test1.fastq b/src/nanoplot/test_data/test1.fastq new file mode 100644 index 00000000..f262027d --- /dev/null +++ b/src/nanoplot/test_data/test1.fastq @@ -0,0 +1,49 @@ +@read_1 +TCCTAAGTTCGTTGGTTCAAGCCTCGCTTGCCAACGGCGCATGTCAGACCCGATGGAGTAGTGCACCGGA ++ +MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM + +@read_2 +CCAGGACCAACAGAGTCTCTCAATACCGAGGCTGCGGAGGTAAAATACATCTACTCGAAGAAGAAAAAGCCGTACTACGTTTGTT ++ +00000000))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + +@read_3 +AAAAGCGGATCGGGTTGGTGGTTCCTCGAAGAGATTTGAATGGCACAATTCTCACAGCGGCTGACCCCGATATAGCCAAGTCAAATCATACGGTT ++ +/////////////////////////////////////////////////////////////////////////////////////////////// + +@read_4 +GTTCGGAGATCAGAAAGAGAAACCCAACAAAGAGATGGCTCTA ++ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +@read_5 +GCTCCACCCAACATTGAACGACCCCCAACTTAATATGCTTGGG ++ +4444444444444444444444444444444444444444444 + +@read_6 +AGCTATCACGTTAAATATATCAAACCCCTCGGTGAAAAGCAAGGCTCCGGTTAGCACGCCACGCTTAAGTAATTAGCTACCTAGTT ++ +22222222222222222222222222222222222222222222222222222222222222222222222222222222222222 + +@read_7 +GGCACTCCATCACCGTACTTAACCTGTAAGTTACCTCGCCGAGCAAA ++ +99999999999999999999999999999999999999999999999 + +@read_8 +CAGACTACTGGCAGACATCGGAAATGCCTTGCCTCGGTTTCGCTGTAGCGGT ++ +GGGGGGGGGGKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK + +@read_9 +AACGTTAAAGCAGGGACGCGTGTTCCCTCCGA ++ +DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD + +@read_10 +ACTGGTATGTCGTGGTACCCTTGA ++ +111111111111111111111111 \ No newline at end of file diff --git a/src/nanoplot/test_data/test2.fastq b/src/nanoplot/test_data/test2.fastq new file mode 100644 index 00000000..b9283728 --- /dev/null +++ b/src/nanoplot/test_data/test2.fastq @@ -0,0 +1,34 @@ +@read_1 +TCAGGATCCGACCGTTTTGG ++ +55555555555555555555 + +@read_2 +CGTCAGGTCTTAATGTCGTGGTTGTGATTGTTAATAATATACTCTATGTTC ++ +777777777777777777777777777777777777777777777777777 + +@read_3 +GCTATCTTCCGAAAGAGGCTATTTCAGGTCCTTCGTGGCTCGCCACTTAT ++ +22222222222222222222222222222222222222222222222222 + +@read_4 +ACGGGATCGCCGGTCCATACTGGTTCGGGAACCTCTCTAACTTAACCATGAGAGGTTCGAGTCC ++ +MMMMMMMMMMMMMMMMMMMMKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK + +@read_5 +ATTTCTAAGTCTGTGGCTTATGGACTGGCTCCATGCTCGGGCTGGTATACCGTT ++ +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +@read_6 +CAAAGCCGACCCAAATATTTTCCTAGCCTCTCACCCCGTAGTCGCTCGACCGTCACTGTTCCCTTATCATATTACACTCTG ++ +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + +@read_7 +AATAAAGCCCGTTCCACACTTTAGCAATGTCAAGACTGTATCATCGACAGCGGTAGTTATGTAGCCAGCACATTTCATTACCCCCTCGC ++ +77777777777777777777777777777777777777777777777777777777777777777777777777777777777777777 \ No newline at end of file diff --git a/src/nanoplot/test_data/test_rich.fastq b/src/nanoplot/test_data/test_rich.fastq new file mode 100644 index 00000000..d47af6ae --- /dev/null +++ b/src/nanoplot/test_data/test_rich.fastq @@ -0,0 +1,40 @@ +@32e13a1c-4171-4706-b6ce-a32c0f65fa16 runid=5a21d8a6996146deceeaea3784244c52741cae93 read=9 ch=282 start_time=2021-04-20T17:00:40Z flow_cell_id=FAP67897 protocol_group_id=2021-04-20_UKBC sample_id=RNAsst10002_spike_BA barcode=unclassified barcode_alias=unclassified +GATCTGGGTGTTTTAACTTGATCCCGCTAATGGCTTCTAACTTCGTTTCGCATTTATCGTGAAACGCTTTCGCGTTTTCGTGCGCCGCTTCACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACAATTTATGATGAACCGACGACGACTACCAGTGCCTTTGTAAGCACAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGACAGGTACACGTTAATAGTTAATAGCGTACTTCTTTTG ++ +$#$#%&).6/*.-,,'##$.)*46$$$,$$;77;?B=6::<<>::9<228;<>DA;A<7>@=6.550.47===>0095731+0;667?==>C@A79??6;.7/*++-1')69<=>>>??AD@=@8:?=@?GDC>A:50# +@b87f011e-b802-4993-8f56-fd240b2e784f runid=5a21d8a6996146deceeaea3784244c52741cae93 read=19 ch=213 start_time=2021-04-20T17:00:41Z flow_cell_id=FAP67897 protocol_group_id=2021-04-20_UKBC sample_id=RNAsst10002_spike_BA barcode=unclassified barcode_alias=unclassified +TTGTACTTCGTTCGGTGCAGATGGTGTTTAACCTCAATCAAAGACGACAGGTGTTTTCGCATTTATCGTGAAACGCTTTCGCCCAGCATTTTCGTCCCGCCACTTCACTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAACTTCACAACTGCTCCTGCCATTTGTCTGGAAACACTTTCTGTGAAGGTGTCTTTGTTTCAAGTAAACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACACACAACATTTGTGTCTGGTAACTGTGATGTTGCTTAGCGGAATTGTCAACAACACAGTTTATGATCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACGTACCAGATGTTGGTTGGGAA ++ +%&$&#&'('*,-.'))%%$%#%%'2157//+2/037764-+*(*)''&((496;@<4,'(**.1+++(*))6:6).-///%&*&''(&(+++('($&$'((($$%%%&%.,.004+31211.++,..534;;8<6;)53430(,9<54/8958./0/-'&'**/84/42*'(*,*+3343.'$#/06350>678;>>9>C59/0&&''&&(%%#(17'$-20//557-&),+-1;::6878840,1())78<>D;8<:4'8:;=>/<;;=0'143//../(+)%2435(0*'$$(($$$'%))*-/0+-21-*'''90<-'+-//.$,('.)))%.$%'+2+++,==>=<:=<74-&')/740.-.485776<87-.699::0//4'&)7=;:7623-%&0*%'%## +@6f64aedb-bb8e-4777-b494-43e661841e06 runid=5a21d8a6996146deceeaea3784244c52741cae93 read=13 ch=67 start_time=2021-04-20T17:00:41Z flow_cell_id=FAP67897 protocol_group_id=2021-04-20_UKBC sample_id=RNAsst10002_spike_BA barcode=unclassified barcode_alias=unclassified +ATAGCCGCCGTTCATTGCATCTTAACGCGTTCAGTTATATTTGTTGGAATTGTTTAACCCTTATCCAGGGTTTAACCAGCAACTTTGTTTTCGCATTTATCGTGAAAACGCTTTCGCGTTTTCAATTGCGCCGCTTCAACATTACAAATACCATTTGCTATGCAAATGGCTTATAGATTTAATGGTATTGGAGTTACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAA ++ +&%$'(($'%,12'(&($$$%&'*&$$')/*..+36(#&#$%$(&'''&((+5870.(&'&%)%57-&((('0*%%#$&%(((&%264;ACC=:ADCD@@B:+-(%&$$$$'''$$&('$(%&&%%&0+6586*057;455&&)1235908>@BABF?D:DBAFGH>;;:>@@;9('$%%)((%%),,,.7.0==<76@<@=A=<;1F=C9A64=>ADEDC9?7<967435>=:<=@EFHIJOKH>=G?D>DAE>?C@C;>:@>>EIG>CD>?H><;HIJ:BDC<>?GDEPIIH=@?7*6AB>DB>??-37>A=AA@A97-. +@c372fb2c-dd45-4feb-81b2-c167c3d1ce93 runid=5a21d8a6996146deceeaea3784244c52741cae93 read=18 ch=337 start_time=2021-04-20T17:00:41Z flow_cell_id=FAP67897 protocol_group_id=2021-04-20_UKBC sample_id=RNAsst10002_spike_BA barcode=unclassified barcode_alias=unclassified +ATACTTCGTTCAGTTATCGAAGGTGGGTGTGGCTTGCTGGTGTGTCCTGACGGTAGGTTCACCATTTATCAGTGAGCATTTCACAGAGTTTTGCACAATTGCGCCCTTCCCCATGGTAGATGGGTAAAGTGGGAGGCATCCTGCAAACCTGCTCTGAAGTGGCAGAACTCCTCTCCCATTCTCTGGACCTGCCATGTGGCCACATCCAGCTTCAGGGAGTTTGGGAGGGCCCAGAAGAAAGAAGGGAAACATTGTGTGGGCACACACCAACCCACCTGTCTCAACTCCCCTCAGCTGGTAACAGGAAGAGAATCCTT ++ +'0%''(&.00,+/0-#&$&&$&&-(,,)(&%&##$#$'%'*(($(*,&*,*(*''+02*&$$%('+&'(&'&('%%$$#'(*$$#&#'#&%$$$$%%%'/'&&&&(,45751(+$&%&&&''*+)675+:35-''&+013*%*2/1,+48:8<:78344(%%64A@71$$&%&),'('%%&%$#$%%))$$##$$$''%%#&##$#$&(('$%%%%%$&%&)%&%,%%#%%&(#&$##($$$$,.+-,*++(%.$$-+5(796:B@7**,%&$$,-*.5,,**%%%&$%%&%,+#&%'))(**))0+255596564:<<>92:<57%*''''$%%%$%'*$$%%%%$%&%.&+)&%#$$%%%&#%((($$%#-,06871)..0,.')1'&&),/04*0%&&%#&87@HF;;B?=?A=9('%&''%)(#%+18-17*976;F<=?ACDAAC=6(;<>@=DBB:;;55780/56675571-73/2*/334653($$(%$%%(&#$)'.--,*+9489>7<3532%%%%&$'$,&/*,&%.,'%./(2-+).,222,'110('*(+(%.6;:88,%&%(($',)/5-234-')&%'.,)$*-22%+++./3;555,'&(+50/%)-23*'$(%++//341-BDF7;:99.((92+%,+)%-+-.&)*&-%&%&&&##'(#$)+29:;3'9>>=>3).001)%$%'%%&-,'&$$#$%$%/(%$$$%-7(0*,$(+*,0162233))*$+$))&&$&###%#&$)10566655-&%%(&''*--''6>AAAAC;:344)@A@B<@;?9)6('',$-)*()0-,000(&%.-)()&%)#$$)$###%%(%).*%)'##(##(%%,)%9=AH==>>?>;?@54G@@9?A<57?A>=@<=<96321-(,.11,*7:9:;A=9B4==?@1+)&&(''++)*/0,,77(3.)++2+ADD9EFI@>.*21/&&&&()4883>>989;.*+/-+,..3,3,,*0,''.2.5/256&*7778*('-**'-/655..,9;=64&%&('**('( +@aa81ca34-9310-42fd-9893-33112e283acc runid=5a21d8a6996146deceeaea3784244c52741cae93 read=19 ch=244 start_time=2021-04-20T17:00:41Z flow_cell_id=FAP67897 protocol_group_id=2021-04-20_UKBC sample_id=RNAsst10002_spike_BA barcode=unclassified barcode_alias=unclassified +TACATGTACTTCGTTCAGGCTAGGTGTTTTTAACCGTAACCTATCGTGTTTCCCCTAGTTTTCGCATTTATCGTGCATTGCTTTCGCGTTTTTCGTGCGCCGCTTCATCTGGCATTAATGCTTCCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTGTCGATCTCCAAGAACTTGGAAAGTATGACAGTATATAAATGACATGTACATTTGGCTAGGTTTTTATAGCTGGCTTGATTGCCATATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGGAACTAACTTTGAAGCAAGGTGAAATCAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATGCCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCATAGCGCTTCCAAAATCATAACCCTCAAAGAGATGGCAACTAGCACTCTCCAGATTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAA ++ +#$$###'(334306/&$&$%+-34>:?CA=;92).&))(48>BD>9A;AAEB;=05014?D:<-4469:5:5*%$$$#'+--1002A;@HLI=999:A/:<3'';ABC@BA::444.')&%$&$,*8@E70::47@AA;=>9)$/33135>>:0>CDDCG=@>H>3<)5/%'116@AB@9;@GHGHFE>DDFAG?B?ANH<87-*%&<54<:@?FF?6BAA8EGA@B?B@AC:<;?68?@D:?A58?>=@87<..37<88>>@2???BA@9:AB???8?GCDCFGBDBFEEDBGE;./66;>:9513/&),,,/&&$##$''1264(%+(326)1<-77AA.C=CEFF=@6=G??DFACBEFHH>,B@>-('14554./(*/(&&%59=<==)44-:;A=2=@==>;@=948;<5<;>E>>>?A?98=;?@=?B@HH222(&39EHEFGIG@=>--@@HF>=A51%.6;@BC>@22;($:.("$$$#&#'$%(35)6$547??DDD8J@@BF?EF@FF@CAA54&& +@c746fb2f-78f6-4a0a-9c75-39465c855c8d runid=5a21d8a6996146deceeaea3784244c52741cae93 read=35 ch=379 start_time=2021-04-20T17:00:42Z flow_cell_id=FAP67897 protocol_group_id=2021-04-20_UKBC sample_id=RNAsst10002_spike_BA barcode=unclassified barcode_alias=unclassified +GTCATGGCGCTGGTTCAGCTCGATCTTGTACTTCGTTCCAGTTCAGTGGGTGTTTAACGAGTGGAAAAGGCTGGAGACCGTTTTCGCATTTATCGTTTCGCGTTTTTCGTGCGCCGCTTCATTGTTTGATGAAGCCAGCATCTCGTGTCACTTTGTTGAAAATGAATCTTCAATAAATGACCTCTTGCTTA ++ +%,)$$%'+**)()-**&$&(-))*)$$$&&&&&*02751.,$(%#$&$&%+'+,)#&&)(*/)-0/.,--8.-+(.2489>@@80%%*-.-//)+%%969@@ADGD>86;')*78587:?=ED@FGGECC>9.562.9:79.'&%**$*0357;49<5363''$$6;9;>18;:;:$8:980:<=<+00/$ +@99a108d2-8e72-42bf-bebf-ad8373cfe450 runid=5a21d8a6996146deceeaea3784244c52741cae93 read=38 ch=177 start_time=2021-04-20T17:00:42Z flow_cell_id=FAP67897 protocol_group_id=2021-04-20_UKBC sample_id=RNAsst10002_spike_BA barcode=unclassified barcode_alias=unclassified +TGTGGCCTTTTAATTCAGTTACTGATTTGGTGTTTAACCTCGCCACACTCATAGAGGTCACACGGTGTCGCATTTATGAAACGCTTTCGCGCGTTTTTCGTGCGCCACTTCACTGAAAAATGCATTAGGTAAAAGACTGTGGCTAGCATTACACAGTTACTTCACTTCAGACTATTACCGACATACTCAACTCAATTGGTGCAGACATAAGTGTTGAACATATTTACCTTCTTCATCTACAATAAAATTGATGATGAACCTGAAAATTTATGTCCAAATTCCACTAATCGACGGTTCATCAGGTTGACCCAATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACAGACTTGTAGCACTCATTCGTTTCGGGAAGAGACAGGTACGTTAATAGTTAACTTAATATGCTTCTTTT ++ +($.((('&'&$$(()#$'*'##%#%$%++,/.*+)435256573%14=90,)'$%-),-%)%&$''%(&$&')/.++(*,)((&&)).''%564=A?<777/..00(8898:5.14314.))'&')%)7:>?6/);7,/&&%%*($')-3)%'&%&4;:=??::6<;99894&$%'&'&%#%%&%*0565@?90-01%(+&&%$$$%'&**5358$$3.-6((@B<<@BGBEDBAKDDC?DE@B=6)**,$/)&%''$-'((,('&$&$%%445;47//8-($$$')('()(&/79.66)%0(('&&&,,12/:4224<=??C@>9;%=ACFCB=<>3/,-55++'$'/4;A87:A@?;(1+7846??>;><:@A@;?A.,,7-*+-..-%%%(+00:979<75*-DAB(,45.(?;<;;9>:4,+%&2.-,$$&&&%#%$**3**0-* +@5d01447f-f17b-4acb-b87e-d60d8aeeccc8 runid=5a21d8a6996146deceeaea3784244c52741cae93 read=21 ch=417 start_time=2021-04-20T17:00:41Z flow_cell_id=FAP67897 protocol_group_id=2021-04-20_UKBC sample_id=RNAsst10002_spike_BA barcode=unclassified barcode_alias=unclassified +ATGATGGCCTTCTAGATTTCAGGCATTTGGTGTTTAACCCGACGTAAGTGGTTTTCGCATTTATCGTGGCTTTCGCGTTTTTCGTTGCCGCTTCATTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCGGATTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAATAAGACAAAAACACCCAAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCGCCAATTAAAGATTTTGGTGGTTTAATTTTTCACAAATATTGTAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGGGCCATTTGTGCACAAAGTTTAGCGGCCTTACTGTTTTGCCACCTTGCTCACAGATGAAATGACCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCTATAGAGTTTAATGGTATTGAGTTACA ++ +(*+*+''%&),$&&%%%+(($)(&$#&%$&3*/2-/.($(%%&(()&,*)-2>?<6096688'<-1,++1/28277;@?996*,+)%%%&148456;A9=?=>==E?>=C@>:4326=IJGBFILJBAB54831($%)+%'$148;86744.21312BH???>GCFGK@C?BC<*(2$(.045?@6CB8?=<;@A:*=>>>90146>>:@A?AA:GHGFF>,./0.'&%(%%)4ABEFRQOHFGBGCG=8,=@CEEFDEAC38/5#%1.11/241-/,-/0-174+)39=DB>791;=@>B@?>;;?B:===;?45<942246*>ABCDBA<><66?>AGHG:C@BBA?==::1.-/.21016.1&%('$&*.'<78..==3-?A@:%?7:ADCF/EE?>BB=21:8?3=?,,.),2>@AA;8:=6220143=:32>?DJIGE=>D;?8,++,.)**2::358=@?>==6882424;<<;+/0,).166($-&+--/67?@==GEFHEFA8962-%#%(%%%$'&<:77C=<><>?@*=<>:;% +@b0279f8e-e988-44c5-895f-201b68217623 runid=5a21d8a6996146deceeaea3784244c52741cae93 read=32 ch=435 start_time=2021-04-20T17:00:43Z flow_cell_id=FAP67897 protocol_group_id=2021-04-20_UKBC sample_id=RNAsst10002_spike_BA barcode=unclassified barcode_alias=unclassified +AAATCATGGCCACTTCGTTCAGTTACGGAAAGGTAAGATTGTTTAACCGTCGATACTGGTTCTCATGGACCGCATTTATCGTGAAGCGCTTTCGCGCGTTTTCGTCGCCCGCTTCATGAAAATTAAAACCACCAAAATCTTTAATTGAATTTTGGTGTTTTGTAAATTTGTTTGACTTGTGCAAAAACTTCTTGGGTGTTTTTGTCTTGTTCAACAGCTATTCCAGTTAAAG ++ +('&.-'&&(((&**+'-./-,-/0&%&&**-,,*.03..77<>CAB??;@6542,+**&%)$(($%%&%$$#%&')-094)'%'($%$&.12..($44871.+()#%*-(*,2648A?GFA?-CCBC9:@11?@B@=69AA:+++,,###%(*14:6<<<4.4=;99:A=>=/33365%+#%9;BC<8GH>BCC3=96>>GLIBAA812+:&<><;<8-'.::;;0' From d6c9475ccf825f2df5666cdd0baf4048e98b8812 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Sat, 26 Oct 2024 15:07:08 +0200 Subject: [PATCH 06/15] Add agat sp statistics (#107) * add help * add config * add running script * add test data and expected output + script to fetch them * add tests * update changelog * cleanup * config: replace `-d` by a longer name `--plot` * add set -eo pipefail to script and test files * create temporary directory and clean up on exit * improve config: add requirements, add keywords, format description,.. * cleanup changelog * PR fixes, extended unit tests * Smaller test data, small changes to version format and config format --------- Co-authored-by: Robrecht Cannoodt Co-authored-by: jakubmajercik Co-authored-by: Emma Rousseau --- CHANGELOG.md | 7 ++ src/agat/agat_sp_statistics/config.vsh.yaml | 93 +++++++++++++++++++ src/agat/agat_sp_statistics/help.txt | 60 ++++++++++++ src/agat/agat_sp_statistics/script.sh | 26 ++++++ src/agat/agat_sp_statistics/test.sh | 65 +++++++++++++ src/agat/agat_sp_statistics/test_data/1.gff | 78 ++++++++++++++++ .../agat_sp_statistics/test_data/script.sh | 14 +++ .../test_data/stats_out.txt | 93 +++++++++++++++++++ 8 files changed, 436 insertions(+) create mode 100644 src/agat/agat_sp_statistics/config.vsh.yaml create mode 100644 src/agat/agat_sp_statistics/help.txt create mode 100644 src/agat/agat_sp_statistics/script.sh create mode 100644 src/agat/agat_sp_statistics/test.sh create mode 100644 src/agat/agat_sp_statistics/test_data/1.gff create mode 100755 src/agat/agat_sp_statistics/test_data/script.sh create mode 100644 src/agat/agat_sp_statistics/test_data/stats_out.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e59f784..dbc4d95d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ * `agat`: - `agat/agat_convert_genscan2gff`: convert a genscan file into a GFF file (PR #100). + - `agat_sp_statistics`: provides exhaustive statistics of a gft/gff file (PR #107). + * `bd_rhapsody/bd_rhapsody_sequence_analysis`: BD Rhapsody Sequence Analysis CWL pipeline (PR #96). @@ -49,12 +51,16 @@ based on a provided sequence IDs or region coordinates file (PR #85). * `agat`: + - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). + - `agat_convert_bed2gff`: convert bed file to gff format (PR #97). + - `agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - `agat/agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). - `agat/agat_convert_bed2gff`: convert bed file to gff format (PR #97). - `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - `agat/agat_convert_sp_gff2tsv`: convert gtf/gff file into tabulated file (PR #102). - `agat/agat_convert_sp_gxf2gxf`: fixes and/or standardizes any GTF/GFF file into full sorted GTF/GFF file (PR #103). + * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). @@ -91,6 +97,7 @@ * `trimgalore`: Quality and adapter trimming for fastq files (PR #117). + ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). diff --git a/src/agat/agat_sp_statistics/config.vsh.yaml b/src/agat/agat_sp_statistics/config.vsh.yaml new file mode 100644 index 00000000..6890bb84 --- /dev/null +++ b/src/agat/agat_sp_statistics/config.vsh.yaml @@ -0,0 +1,93 @@ +name: agat_sp_statistics +namespace: agat +description: | + The script provides exhaustive statistics of a gft/gff file. + + If you have isoforms in your file, even if correct, some values calculated + might sounds incoherent: e.g. total length mRNA can be superior than the + genome size. Because all isoforms length is added... It is why by + default we always compute the statistics twice when there are isoforms, + once with the isoforms, once without (In that case we keep the longest + isoform per locus). +keywords: [gene annotations, statistics, gff] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_sp_statistics.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +requirements: + - commands: [agat] +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --gff + alternatives: [-i] + description: Input GTF/GFF file. + type: file + required: true + example: input.gff + - name: --gs_fasta + description: | + Genome size directly from a fasta file to compute more statistics. + type: file + example: genome.fasta + - name: Outputs + arguments: + - name: --output + alternatives: [-o] + description: | + The file where the results will be written. + type: file + direction: output + required: true + example: output.txt + - name: Options + arguments: + - name: --plot + alternatives: [-p, -d] + description: | + When this option is used, an histogram of distribution of the features will be printed in pdf files. + type: boolean_true + - name: --gs_size + description: | + Genome size in nucleotides to compute more statistics. + type: integer + example: 1000000 + - name: --verbose + alternatives: [-v] + description: | + Verbose option. To modify verbosity. Default is 1. 0 is quiet, 2 and 3 are increasing verbosity. + type: integer + example: 1 + - name: --config + alternatives: [-c] + description: | + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The `--config` + option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/.*v\.//; s/\s.*//' | sed 's/^/AGAT: /' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_sp_statistics/help.txt b/src/agat/agat_sp_statistics/help.txt new file mode 100644 index 00000000..fa6ef24d --- /dev/null +++ b/src/agat/agat_sp_statistics/help.txt @@ -0,0 +1,60 @@ +```sh +agat_sp_statistics.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_sp_statistics.pl + +Description: + The script provides exhaustive statistics of a gft/gff file. /!\ If you + have isoforms in your file, even if correct, some values calculated + might sounds incoherent: e.g. total length mRNA can be superior than the + genome size. Because all isoforms length is added... It is why by + default we always compute the statistics twice when there are isoforms, + once with the isoforms, once without (In that case we keep the longest + isoform per locus). + +Usage: + agat_sp_statistics.pl --gff file.gff [ -o outfile ] + agat_sp_statistics.pl --help + +Options: + --gff or -i + Input GTF/GFF file. + + --gs, -f or -g + This option inform about the genome size in oder to compute more + statistics. You can give the size in Nucleotide or directly the + fasta file. + + -d or -p + When this option is used, an histogram of distribution of the + features will be printed in pdf files. (d means distribution, p + means plot). + + -v or --verbose + Verbose option. To modify verbosity. Default is 1. 0 is quiet, 2 + and 3 are increasing verbosity. + + --output or -o + File where will be written the result. If no output file is + specified, the output will be written to STDOUT. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + -h or --help + Display this helpful text. \ No newline at end of file diff --git a/src/agat/agat_sp_statistics/script.sh b/src/agat/agat_sp_statistics/script.sh new file mode 100644 index 00000000..9865c4b2 --- /dev/null +++ b/src/agat/agat_sp_statistics/script.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +# unset flags +[[ "$par_d" == "false" ]] && unset par_d + +if [[ -n "$par_gs_size" && -n "$par_gs_fasta" ]]; then + echo "[error] Please provide only one of the following options to set genome size: --gs_size or --gs_fasta" + exit 1 +fi + +# run agat_sp_statistics +agat_sp_statistics.pl \ + -i "$par_gff" \ + -o "$par_output" \ + ${par_plot:+-d} \ + ${par_gs_size:+--gs "${par_gs_size}"} \ + ${par_gs_fasta:+--gs "${par_gs_fasta}"} \ + ${par_verbose:+--verbose "${par_verbose}"} \ + ${par_config:+--config "${par_config}"} + + diff --git a/src/agat/agat_sp_statistics/test.sh b/src/agat/agat_sp_statistics/test.sh new file mode 100644 index 00000000..35f42ee0 --- /dev/null +++ b/src/agat/agat_sp_statistics/test.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +set -eo pipefail + +test_dir="${meta_resources_dir}/test_data" + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" +} +trap clean_up EXIT + +cd "$TMPDIR" + +mkdir test1 +pushd test1 + +echo "> Run $meta_name with test data and --emblmygff3" +"$meta_executable" \ + --gff "$test_dir/1.gff" \ + --output "output.txt" \ + +echo ">> Checking output" +[ ! -f "output.txt" ] && echo "Output file output.txt does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "output.txt" ] && echo "Output file output.txt is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "output.txt" "$test_dir/stats_out.txt" +if [ $? -ne 0 ]; then + echo "Output file output.txt does not match expected output" + exit 1 +fi + +echo "> Test successful" + + +popd +mkdir test2 +pushd test2 + +cat < genome.fasta +>sample_sequence +ATGCGTACGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC +EOF + +echo "> Run $meta_name with both gs_size and gs_fasta" +error_message=$("$meta_executable" \ + --gff "$test_dir/1.gff" \ + --output "output.txt" \ + --gs_size "1000000" \ + --gs_fasta "genome.fasta" 2>&1 || true) + +expected_error="[error] Please provide only one of the following options to set genome size: --gs_size or --gs_fasta" +if [[ "$error_message" != *"$expected_error"* ]]; then + echo "Output error message: $error_message does not match expected error message: $expected_error" + exit 1 +fi + +echo "> Error test successful" + +echo "---- All tests succeeded! ----" +exit 0 \ No newline at end of file diff --git a/src/agat/agat_sp_statistics/test_data/1.gff b/src/agat/agat_sp_statistics/test_data/1.gff new file mode 100644 index 00000000..775d14fd --- /dev/null +++ b/src/agat/agat_sp_statistics/test_data/1.gff @@ -0,0 +1,78 @@ +##gff-version 3 +##sequence-region 1 1 43270923 +#!genome-build RAP-DB IRGSP-1.0 +#!genome-version IRGSP-1.0 +#!genome-date 2015-10 +#!genome-build-accession GCA_001433935.1 +1 RAP-DB chromosome 1 43270923 . . . ID=chromosome:1;Alias=Chr1,AP014957.1,NC_029256.1 +### +1 irgsp repeat_region 2000 2100 . + . ID=fakeRepeat1 +### +1 irgsp gene 2983 10815 . + . ID=gene:Os01g0100100;biotype=protein_coding;description=RabGAP/TBC domain containing protein. (Os01t0100100-01);gene_id=Os01g0100100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 2983 10815 . + . ID=transcript:Os01t0100100-01;Parent=gene:Os01g0100100;biotype=protein_coding;transcript_id=Os01t0100100-01 +1 irgsp exon 2983 3268 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon1;rank=1 +1 irgsp five_prime_UTR 2983 3268 . + . Parent=transcript:Os01t0100100-01 +1 irgsp five_prime_UTR 3354 3448 . + . Parent=transcript:Os01t0100100-01 +1 irgsp exon 3354 3616 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100100-01.exon2;rank=2 +1 irgsp CDS 3449 3616 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 4357 4455 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon3;rank=3 +1 irgsp CDS 4357 4455 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 5457 5560 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100100-01.exon4;rank=4 +1 irgsp CDS 5457 5560 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 7136 7944 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100100-01.exon5;rank=5 +1 irgsp CDS 7136 7944 . + 1 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8028 8150 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon6;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100100-01.exon6;rank=6 +1 irgsp CDS 8028 8150 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8232 8320 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon7;rank=7 +1 irgsp CDS 8232 8320 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8408 8608 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon8;rank=8 +1 irgsp CDS 8408 8608 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 9210 9615 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon9;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100100-01.exon9;rank=9 +1 irgsp CDS 9210 9615 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 10102 10187 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon10;rank=10 +1 irgsp CDS 10102 10187 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 10274 10297 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 10274 10430 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100100-01.exon11;rank=11 +1 irgsp three_prime_UTR 10298 10430 . + . Parent=transcript:Os01t0100100-01 +1 irgsp exon 10504 10815 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp three_prime_UTR 10504 10815 . + . Parent=transcript:Os01t0100100-01 +### +1 irgsp gene 11218 12435 . + . ID=gene:Os01g0100200;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0100200-01);gene_id=Os01g0100200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11218 12435 . + . ID=transcript:Os01t0100200-01;Parent=gene:Os01g0100200;biotype=protein_coding;transcript_id=Os01t0100200-01 +1 irgsp five_prime_UTR 11218 11797 . + . Parent=transcript:Os01t0100200-01 +1 irgsp exon 11218 12060 . + . Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100200-01.exon1;rank=1 +1 irgsp CDS 11798 12060 . + 0 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp CDS 12152 12317 . + 1 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp exon 12152 12435 . + . Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100200-01.exon2;rank=2 +1 irgsp three_prime_UTR 12318 12435 . + . Parent=transcript:Os01t0100200-01 +### +1 irgsp gene 11372 12284 . - . ID=gene:Os01g0100300;biotype=protein_coding;description=Cytochrome P450 domain containing protein. (Os01t0100300-00);gene_id=Os01g0100300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11372 12284 . - . ID=transcript:Os01t0100300-00;Parent=gene:Os01g0100300;biotype=protein_coding;transcript_id=Os01t0100300-00 +1 irgsp exon 11372 12042 . - . Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100300-00.exon2;rank=2 +1 irgsp CDS 11372 12042 . - 2 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +1 irgsp exon 12146 12284 . - . Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100300-00.exon1;rank=1 +1 irgsp CDS 12146 12284 . - 0 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +### +1 irgsp gene 12721 15685 . + . ID=gene:Os01g0100400;biotype=protein_coding;description=Similar to Pectinesterase-like protein. (Os01t0100400-01);gene_id=Os01g0100400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12721 15685 . + . ID=transcript:Os01t0100400-01;Parent=gene:Os01g0100400;biotype=protein_coding;transcript_id=Os01t0100400-01 +1 irgsp five_prime_UTR 12721 12773 . + . Parent=transcript:Os01t0100400-01 +1 irgsp exon 12721 13813 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100400-01.exon1;rank=1 +1 irgsp CDS 12774 13813 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 13906 14271 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100400-01.exon2;rank=2 +1 irgsp CDS 13906 14271 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 14359 14437 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100400-01.exon3;rank=3 +1 irgsp CDS 14359 14437 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 14969 15171 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100400-01.exon4;rank=4 +1 irgsp CDS 14969 15171 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 15266 15359 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 15266 15685 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp three_prime_UTR 15360 15685 . + . Parent=transcript:Os01t0100400-01 +### +1 irgsp gene 12808 13978 . - . ID=gene:Os01g0100466;biotype=protein_coding;description=Hypothetical protein. (Os01t0100466-00);gene_id=Os01g0100466;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12808 13978 . - . ID=transcript:Os01t0100466-00;Parent=gene:Os01g0100466;biotype=protein_coding;transcript_id=Os01t0100466-00 +1 irgsp three_prime_UTR 12808 12868 . - . Parent=transcript:Os01t0100466-00 +1 irgsp exon 12808 13782 . - . Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon2;rank=2 +1 irgsp CDS 12869 13102 . - 0 ID=CDS:Os01t0100466-00;Parent=transcript:Os01t0100466-00;protein_id=Os01t0100466-00 +1 irgsp five_prime_UTR 13103 13782 . - . Parent=transcript:Os01t0100466-00 +1 irgsp exon 13880 13978 . - . Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon1;rank=1 +1 irgsp five_prime_UTR 13880 13978 . - . Parent=transcript:Os01t0100466-00 \ No newline at end of file diff --git a/src/agat/agat_sp_statistics/test_data/script.sh b/src/agat/agat_sp_statistics/test_data/script.sh new file mode 100755 index 00000000..5b1133ac --- /dev/null +++ b/src/agat/agat_sp_statistics/test_data/script.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/1.gff src/agat/agat_sp_statistics/test_data +cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_statistics_1.txt src/agat/agat_sp_statistics/test_data + +# keep only the first 78 lines of 1.gff +head -n 78 src/agat/agat_sp_statistics/test_data/1.gff > src/agat/agat_sp_statistics/test_data/1.gff.tmp +mv src/agat/agat_sp_statistics/test_data/1.gff.tmp src/agat/agat_sp_statistics/test_data/1.gff \ No newline at end of file diff --git a/src/agat/agat_sp_statistics/test_data/stats_out.txt b/src/agat/agat_sp_statistics/test_data/stats_out.txt new file mode 100644 index 00000000..b160ea52 --- /dev/null +++ b/src/agat/agat_sp_statistics/test_data/stats_out.txt @@ -0,0 +1,93 @@ +-------------------------------------------------------------------------------- + +---------------------------------- chromosome ---------------------------------- +Number of chromosome 1 +Number chromosome overlapping 0 +Total chromosome length (bp) 43270923 +mean chromosome length (bp) 43270923 +Longest chromosome (bp) 43270923 +Shortest chromosome (bp) 43270923 + +-------------------------------- repeat_region --------------------------------- +Number of repeat_region 1 +Number repeat_region overlapping 0 +Total repeat_region length (bp) 101 +mean repeat_region length (bp) 101 +Longest repeat_region (bp) 101 +Shortest repeat_region (bp) 101 + +------------------------------------- mrna ------------------------------------- +Number of gene 5 +Number of mrna 5 +Number of mrnas with utr both sides 4 +Number of mrnas with at least one utr 4 +Number of cds 5 +Number of exon 23 +Number of five_prime_utr 4 +Number of three_prime_utr 4 +Number of exon in cds 20 +Number of exon in five_prime_utr 6 +Number of exon in three_prime_utr 5 +Number of intron in cds 15 +Number of intron in exon 18 +Number of intron in five_prime_utr 2 +Number of intron in three_prime_utr 1 +Number gene overlapping 2 +mean mrnas per gene 1.0 +mean cdss per mrna 1.0 +mean exons per mrna 4.6 +mean five_prime_utrs per mrna 0.8 +mean three_prime_utrs per mrna 0.8 +mean exons per cds 4.0 +mean exons per five_prime_utr 1.5 +mean exons per three_prime_utr 1.2 +mean introns in cdss per mrna 3.0 +mean introns in exons per mrna 3.6 +mean introns in five_prime_utrs per mrna 0.4 +mean introns in three_prime_utrs per mrna 0.2 +Total gene length (bp) 14100 +Total mrna length (bp) 14100 +Total cds length (bp) 5364 +Total exon length (bp) 8107 +Total five_prime_utr length (bp) 1793 +Total three_prime_utr length (bp) 950 +Total intron length per cds (bp) 5738 +Total intron length per exon (bp) 5993 +Total intron length per five_prime_utr (bp) 182 +Total intron length per three_prime_utr (bp) 73 +mean gene length (bp) 2820 +mean mrna length (bp) 2820 +mean cds length (bp) 1072 +mean exon length (bp) 352 +mean five_prime_utr length (bp) 448 +mean three_prime_utr length (bp) 237 +mean cds piece length (bp) 268 +mean five_prime_utr piece length (bp) 298 +mean three_prime_utr piece length (bp) 190 +mean intron in cds length (bp) 382 +mean intron in exon length (bp) 332 +mean intron in five_prime_utr length (bp) 91 +mean intron in three_prime_utr length (bp) 73 +Longest gene (bp) 7833 +Longest mrna (bp) 7833 +Longest cds (bp) 2109 +Longest exon (bp) 1093 +Longest five_prime_utr (bp) 779 +Longest three_prime_utr (bp) 445 +Longest cds piece (bp) 1040 +Longest five_prime_utr piece (bp) 680 +Longest three_prime_utr piece (bp) 326 +Longest intron into cds part (bp) 1575 +Longest intron into exon part (bp) 1575 +Longest intron into five_prime_utr part (bp) 97 +Longest intron into three_prime_utr part (bp)73 +Shortest gene (bp) 913 +Shortest mrna (bp) 913 +Shortest cds piece (bp) 24 +Shortest five_prime_utr piece (bp) 53 +Shortest three_prime_utr piece (bp) 61 +Shortest intron into cds part (bp) 81 +Shortest intron into exon part (bp) 73 +Shortest intron into five_prime_utr part (bp)85 +Shortest intron into three_prime_utr part (bp)73 + From 52f44f5049606ac655154cf54ed53fa76b49896f Mon Sep 17 00:00:00 2001 From: Leila011 Date: Sat, 26 Oct 2024 15:07:43 +0200 Subject: [PATCH 07/15] Add agat sp add introns (#104) * add help * add config * add run script * add test data and expected output + script to fetch them * add tests * update changelog * Update src/agat/agat_sp_add_introns/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * Update src/agat/agat_sp_add_introns/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * Update src/agat/agat_sp_add_introns/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * # create temporary directory and clean up on exit * add set -e to test * fix create temporary directory * fix create temporary directory * add set -eo pipefail to test * add set -eo pipefail to script * remove file added by mistake * update --config description * cleanup changelog * cleanup changelog * minor changes to config * reduce test data size --------- Co-authored-by: Robrecht Cannoodt Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Co-authored-by: Emma Rousseau --- CHANGELOG.md | 5 +- src/agat/agat_sp_add_introns/config.vsh.yaml | 64 +++++++++ src/agat/agat_sp_add_introns/help.txt | 62 +++++++++ src/agat/agat_sp_add_introns/script.sh | 11 ++ src/agat/agat_sp_add_introns/test.sh | 34 +++++ .../test_data/1_truncated.gff | 106 +++++++++++++++ .../agat_sp_add_introns/test_data/script.sh | 12 ++ .../test_data/test_output.gff | 125 ++++++++++++++++++ 8 files changed, 418 insertions(+), 1 deletion(-) create mode 100644 src/agat/agat_sp_add_introns/config.vsh.yaml create mode 100644 src/agat/agat_sp_add_introns/help.txt create mode 100644 src/agat/agat_sp_add_introns/script.sh create mode 100644 src/agat/agat_sp_add_introns/test.sh create mode 100644 src/agat/agat_sp_add_introns/test_data/1_truncated.gff create mode 100755 src/agat/agat_sp_add_introns/test_data/script.sh create mode 100644 src/agat/agat_sp_add_introns/test_data/test_output.gff diff --git a/CHANGELOG.md b/CHANGELOG.md index dbc4d95d..a8cfc83a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ * `nanoplot`: Plotting tool for long read sequencing data and alignments (PR #95). +* `agat`: + - `agat/agat_sp_add_introns`: add intron features to gtf/gff file without intron features (PR #104). + + ## BREAKING CHANGES * `falco`: Fix a typo in the `--reverse_complement` argument (PR #157). @@ -94,7 +98,6 @@ - `kallisto_index`: Create a kallisto index (PR #149). - `kallisto_quant`: Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads (PR #152). - * `trimgalore`: Quality and adapter trimming for fastq files (PR #117). diff --git a/src/agat/agat_sp_add_introns/config.vsh.yaml b/src/agat/agat_sp_add_introns/config.vsh.yaml new file mode 100644 index 00000000..06ec8474 --- /dev/null +++ b/src/agat/agat_sp_add_introns/config.vsh.yaml @@ -0,0 +1,64 @@ +name: agat_sp_add_introns +namespace: agat +description: | + Add intronic elements to a gtf/gff file without intron features. +keywords: [gene annotations, GTF conversion] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_sp_add_introns.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +requirements: + commands: [agat] +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --gff + alternatives: [-f, --ref, --reffile] + description: Input GTF/GFF file. + type: file + required: true + example: input.gff + - name: Outputs + arguments: + - name: --output + alternatives: [-o, --out, --outfile, --gtf] + description: Output GFF3 file. + type: file + direction: output + required: true + example: output.gff + - name: Arguments + arguments: + - name: --config + alternatives: [-c] + description: | + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The `--config` option + gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_sp_add_introns/help.txt b/src/agat/agat_sp_add_introns/help.txt new file mode 100644 index 00000000..48dc1ace --- /dev/null +++ b/src/agat/agat_sp_add_introns/help.txt @@ -0,0 +1,62 @@ +```sh +agat_sp_add_introns.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_sp_add_introns.pl + +Description: + The script aims to add intron features to gtf/gff file without intron + features. + +Usage: + agat_sp_add_introns.pl --gff infile --out outFile + agat_sp_add_introns.pl --help + +Options: + --gff, -f, --ref or -reffile + Input GTF/GFF file. + + --out, --output or -o + Output GFF3 file. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + --help or -h + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/src/agat/agat_sp_add_introns/script.sh b/src/agat/agat_sp_add_introns/script.sh new file mode 100644 index 00000000..95cacee4 --- /dev/null +++ b/src/agat/agat_sp_add_introns/script.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +agat_sp_add_introns.pl \ + -f "$par_gff" \ + -o "$par_output" \ + ${par_config:+--config "${par_config}"} diff --git a/src/agat/agat_sp_add_introns/test.sh b/src/agat/agat_sp_add_introns/test.sh new file mode 100644 index 00000000..d7144d91 --- /dev/null +++ b/src/agat/agat_sp_add_introns/test.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" +} +trap clean_up EXIT + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --gff "$test_dir/1_truncated.gff" \ + --output "$TMPDIR/output.gff" + +echo ">> Checking output" +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$TMPDIR/output.gff" "$test_dir/test_output.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_sp_add_introns/test_data/1_truncated.gff b/src/agat/agat_sp_add_introns/test_data/1_truncated.gff new file mode 100644 index 00000000..a86a94d9 --- /dev/null +++ b/src/agat/agat_sp_add_introns/test_data/1_truncated.gff @@ -0,0 +1,106 @@ +##gff-version 3 +##sequence-region 1 1 43270923 +#!genome-build RAP-DB IRGSP-1.0 +#!genome-version IRGSP-1.0 +#!genome-date 2015-10 +#!genome-build-accession GCA_001433935.1 +1 RAP-DB chromosome 1 43270923 . . . ID=chromosome:1;Alias=Chr1,AP014957.1,NC_029256.1 +### +1 irgsp repeat_region 2000 2100 . + . ID=fakeRepeat1 +### +1 irgsp gene 2983 10815 . + . ID=gene:Os01g0100100;biotype=protein_coding;description=RabGAP/TBC domain containing protein. (Os01t0100100-01);gene_id=Os01g0100100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 2983 10815 . + . ID=transcript:Os01t0100100-01;Parent=gene:Os01g0100100;biotype=protein_coding;transcript_id=Os01t0100100-01 +1 irgsp exon 2983 3268 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon1;rank=1 +1 irgsp five_prime_UTR 2983 3268 . + . Parent=transcript:Os01t0100100-01 +1 irgsp five_prime_UTR 3354 3448 . + . Parent=transcript:Os01t0100100-01 +1 irgsp exon 3354 3616 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100100-01.exon2;rank=2 +1 irgsp CDS 3449 3616 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 4357 4455 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon3;rank=3 +1 irgsp CDS 4357 4455 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 5457 5560 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100100-01.exon4;rank=4 +1 irgsp CDS 5457 5560 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 7136 7944 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100100-01.exon5;rank=5 +1 irgsp CDS 7136 7944 . + 1 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8028 8150 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon6;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100100-01.exon6;rank=6 +1 irgsp CDS 8028 8150 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8232 8320 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon7;rank=7 +1 irgsp CDS 8232 8320 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8408 8608 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon8;rank=8 +1 irgsp CDS 8408 8608 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 9210 9615 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon9;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100100-01.exon9;rank=9 +1 irgsp CDS 9210 9615 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 10102 10187 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon10;rank=10 +1 irgsp CDS 10102 10187 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 10274 10297 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 10274 10430 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100100-01.exon11;rank=11 +1 irgsp three_prime_UTR 10298 10430 . + . Parent=transcript:Os01t0100100-01 +1 irgsp exon 10504 10815 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp three_prime_UTR 10504 10815 . + . Parent=transcript:Os01t0100100-01 +### +1 irgsp gene 11218 12435 . + . ID=gene:Os01g0100200;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0100200-01);gene_id=Os01g0100200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11218 12435 . + . ID=transcript:Os01t0100200-01;Parent=gene:Os01g0100200;biotype=protein_coding;transcript_id=Os01t0100200-01 +1 irgsp five_prime_UTR 11218 11797 . + . Parent=transcript:Os01t0100200-01 +1 irgsp exon 11218 12060 . + . Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100200-01.exon1;rank=1 +1 irgsp CDS 11798 12060 . + 0 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp CDS 12152 12317 . + 1 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp exon 12152 12435 . + . Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100200-01.exon2;rank=2 +1 irgsp three_prime_UTR 12318 12435 . + . Parent=transcript:Os01t0100200-01 +### +1 irgsp gene 11372 12284 . - . ID=gene:Os01g0100300;biotype=protein_coding;description=Cytochrome P450 domain containing protein. (Os01t0100300-00);gene_id=Os01g0100300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11372 12284 . - . ID=transcript:Os01t0100300-00;Parent=gene:Os01g0100300;biotype=protein_coding;transcript_id=Os01t0100300-00 +1 irgsp exon 11372 12042 . - . Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100300-00.exon2;rank=2 +1 irgsp CDS 11372 12042 . - 2 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +1 irgsp exon 12146 12284 . - . Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100300-00.exon1;rank=1 +1 irgsp CDS 12146 12284 . - 0 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +### +1 irgsp gene 12721 15685 . + . ID=gene:Os01g0100400;biotype=protein_coding;description=Similar to Pectinesterase-like protein. (Os01t0100400-01);gene_id=Os01g0100400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12721 15685 . + . ID=transcript:Os01t0100400-01;Parent=gene:Os01g0100400;biotype=protein_coding;transcript_id=Os01t0100400-01 +1 irgsp five_prime_UTR 12721 12773 . + . Parent=transcript:Os01t0100400-01 +1 irgsp exon 12721 13813 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100400-01.exon1;rank=1 +1 irgsp CDS 12774 13813 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 13906 14271 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100400-01.exon2;rank=2 +1 irgsp CDS 13906 14271 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 14359 14437 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100400-01.exon3;rank=3 +1 irgsp CDS 14359 14437 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 14969 15171 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100400-01.exon4;rank=4 +1 irgsp CDS 14969 15171 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 15266 15359 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 15266 15685 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp three_prime_UTR 15360 15685 . + . Parent=transcript:Os01t0100400-01 +### +1 irgsp gene 12808 13978 . - . ID=gene:Os01g0100466;biotype=protein_coding;description=Hypothetical protein. (Os01t0100466-00);gene_id=Os01g0100466;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12808 13978 . - . ID=transcript:Os01t0100466-00;Parent=gene:Os01g0100466;biotype=protein_coding;transcript_id=Os01t0100466-00 +1 irgsp three_prime_UTR 12808 12868 . - . Parent=transcript:Os01t0100466-00 +1 irgsp exon 12808 13782 . - . Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon2;rank=2 +1 irgsp CDS 12869 13102 . - 0 ID=CDS:Os01t0100466-00;Parent=transcript:Os01t0100466-00;protein_id=Os01t0100466-00 +1 irgsp five_prime_UTR 13103 13782 . - . Parent=transcript:Os01t0100466-00 +1 irgsp exon 13880 13978 . - . Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon1;rank=1 +1 irgsp five_prime_UTR 13880 13978 . - . Parent=transcript:Os01t0100466-00 +### +1 irgsp gene 16399 20144 . + . ID=gene:Os01g0100500;biotype=protein_coding;description=Immunoglobulin-like domain containing protein. (Os01t0100500-01);gene_id=Os01g0100500;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 16399 20144 . + . ID=transcript:Os01t0100500-01;Parent=gene:Os01g0100500;biotype=protein_coding;transcript_id=Os01t0100500-01 +1 irgsp five_prime_UTR 16399 16598 . + . Parent=transcript:Os01t0100500-01 +1 irgsp exon 16399 16976 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100500-01.exon1;rank=1 +1 irgsp CDS 16599 16976 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 17383 17474 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100500-01.exon2;rank=2 +1 irgsp CDS 17383 17474 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 17558 18258 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100500-01.exon3;rank=3 +1 irgsp CDS 17558 18258 . + 1 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 18501 18571 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100500-01.exon4;rank=4 +1 irgsp CDS 18501 18571 . + 2 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 18968 19057 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon5;rank=5 +1 irgsp CDS 18968 19057 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 19142 19321 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon6;rank=6 +1 irgsp CDS 19142 19321 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 19531 19593 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 19531 19629 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100500-01.exon7;rank=7 +1 irgsp three_prime_UTR 19594 19629 . + . Parent=transcript:Os01t0100500-01 +1 irgsp exon 19734 20144 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp three_prime_UTR 19734 20144 . + . Parent=transcript:Os01t0100500-01 +### +1 irgsp gene 22841 26892 . + . ID=gene:Os01g0100600;biotype=protein_coding;description=Single-stranded nucleic acid binding R3H domain containing protein. (Os01t0100600-01);gene_id=Os01g0100600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 22841 26892 . + . ID=transcript:Os01t0100600-01;Parent=gene:Os01g0100600;biotype=protein_coding;transcript_id=Os01t0100600-01 +1 irgsp five_prime_UTR 22841 23231 . + . Parent=transcript:Os01t0100600-01 +1 irgsp exon 22841 23281 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100600-01.exon1;rank=1 +1 irgsp CDS 23232 23281 . + 0 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 23572 23847 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon2;rank=2 diff --git a/src/agat/agat_sp_add_introns/test_data/script.sh b/src/agat/agat_sp_add_introns/test_data/script.sh new file mode 100755 index 00000000..e5880652 --- /dev/null +++ b/src/agat/agat_sp_add_introns/test_data/script.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/1.gff src/agat/agat_sp_add_introns/test_data +cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_add_introns_1.gff src/agat/agat_sp_add_introns/test_data + +head -n 106 "src/agat/agat_sp_add_introns/test_data/1.gff" > "src/agat/agat_sp_add_introns/test_data/1_truncated.gff" \ No newline at end of file diff --git a/src/agat/agat_sp_add_introns/test_data/test_output.gff b/src/agat/agat_sp_add_introns/test_data/test_output.gff new file mode 100644 index 00000000..607907f6 --- /dev/null +++ b/src/agat/agat_sp_add_introns/test_data/test_output.gff @@ -0,0 +1,125 @@ +##gff-version 3 +##sequence-region 1 1 43270923 +#!genome-build RAP-DB IRGSP-1.0 +#!genome-version IRGSP-1.0 +#!genome-date 2015-10 +#!genome-build-accession GCA_001433935.1 +1 RAP-DB chromosome 1 43270923 . . . ID=chromosome:1;Alias=Chr1,AP014957.1,NC_029256.1 +1 irgsp repeat_region 2000 2100 . + . ID=fakeRepeat1 +1 irgsp gene 2983 10815 . + . ID=gene:Os01g0100100;biotype=protein_coding;description=RabGAP/TBC domain containing protein. (Os01t0100100-01);gene_id=Os01g0100100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 2983 10815 . + . ID=transcript:Os01t0100100-01;Parent=gene:Os01g0100100;biotype=protein_coding;transcript_id=Os01t0100100-01 +1 irgsp exon 2983 3268 . + . ID=Os01t0100100-01.exon1;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon1;rank=1 +1 irgsp exon 3354 3616 . + . ID=Os01t0100100-01.exon2;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100100-01.exon2;rank=2 +1 irgsp exon 4357 4455 . + . ID=Os01t0100100-01.exon3;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon3;rank=3 +1 irgsp exon 5457 5560 . + . ID=Os01t0100100-01.exon4;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100100-01.exon4;rank=4 +1 irgsp exon 7136 7944 . + . ID=Os01t0100100-01.exon5;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100100-01.exon5;rank=5 +1 irgsp exon 8028 8150 . + . ID=Os01t0100100-01.exon6;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon6;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100100-01.exon6;rank=6 +1 irgsp exon 8232 8320 . + . ID=Os01t0100100-01.exon7;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon7;rank=7 +1 irgsp exon 8408 8608 . + . ID=Os01t0100100-01.exon8;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon8;rank=8 +1 irgsp exon 9210 9615 . + . ID=Os01t0100100-01.exon9;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon9;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100100-01.exon9;rank=9 +1 irgsp exon 10102 10187 . + . ID=Os01t0100100-01.exon10;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon10;rank=10 +1 irgsp exon 10274 10430 . + . ID=Os01t0100100-01.exon11;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100100-01.exon11;rank=11 +1 irgsp exon 10504 10815 . + . ID=Os01t0100100-01.exon12;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp CDS 3449 3616 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 4357 4455 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 5457 5560 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 7136 7944 . + 1 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 8028 8150 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 8232 8320 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 8408 8608 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 9210 9615 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 10102 10187 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 10274 10297 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp five_prime_UTR 2983 3268 . + . ID=agat-five_prime_utr-1;Parent=transcript:Os01t0100100-01 +1 irgsp five_prime_UTR 3354 3448 . + . ID=agat-five_prime_utr-2;Parent=transcript:Os01t0100100-01 +1 irgsp intron 3269 3353 . + . ID=intron_added-1;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp intron 3617 4356 . + . ID=intron_added-2;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp intron 4456 5456 . + . ID=intron_added-3;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp intron 5561 7135 . + . ID=intron_added-4;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp intron 7945 8027 . + . ID=intron_added-5;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp intron 8151 8231 . + . ID=intron_added-6;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp intron 8321 8407 . + . ID=intron_added-7;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp intron 8609 9209 . + . ID=intron_added-8;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp intron 9616 10101 . + . ID=intron_added-9;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp intron 10188 10273 . + . ID=intron_added-10;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp intron 10431 10503 . + . ID=intron_added-11;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp three_prime_UTR 10298 10430 . + . ID=agat-three_prime_utr-1;Parent=transcript:Os01t0100100-01 +1 irgsp three_prime_UTR 10504 10815 . + . ID=agat-three_prime_utr-2;Parent=transcript:Os01t0100100-01 +1 irgsp gene 11218 12435 . + . ID=gene:Os01g0100200;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0100200-01);gene_id=Os01g0100200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11218 12435 . + . ID=transcript:Os01t0100200-01;Parent=gene:Os01g0100200;biotype=protein_coding;transcript_id=Os01t0100200-01 +1 irgsp exon 11218 12060 . + . ID=Os01t0100200-01.exon1;Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100200-01.exon1;rank=1 +1 irgsp exon 12152 12435 . + . ID=Os01t0100200-01.exon2;Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100200-01.exon2;rank=2 +1 irgsp CDS 11798 12060 . + 0 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp CDS 12152 12317 . + 1 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp five_prime_UTR 11218 11797 . + . ID=agat-five_prime_utr-3;Parent=transcript:Os01t0100200-01 +1 irgsp intron 12061 12151 . + . ID=intron_added-12;Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100200-01.exon2;rank=2 +1 irgsp three_prime_UTR 12318 12435 . + . ID=agat-three_prime_utr-3;Parent=transcript:Os01t0100200-01 +1 irgsp gene 11372 12284 . - . ID=gene:Os01g0100300;biotype=protein_coding;description=Cytochrome P450 domain containing protein. (Os01t0100300-00);gene_id=Os01g0100300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11372 12284 . - . ID=transcript:Os01t0100300-00;Parent=gene:Os01g0100300;biotype=protein_coding;transcript_id=Os01t0100300-00 +1 irgsp exon 11372 12042 . - . ID=Os01t0100300-00.exon2;Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100300-00.exon2;rank=2 +1 irgsp exon 12146 12284 . - . ID=Os01t0100300-00.exon1;Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100300-00.exon1;rank=1 +1 irgsp CDS 11372 12042 . - 2 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +1 irgsp CDS 12146 12284 . - 0 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +1 irgsp intron 12043 12145 . - . ID=intron_added-13;Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100300-00.exon1;rank=1 +1 irgsp gene 12721 15685 . + . ID=gene:Os01g0100400;biotype=protein_coding;description=Similar to Pectinesterase-like protein. (Os01t0100400-01);gene_id=Os01g0100400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12721 15685 . + . ID=transcript:Os01t0100400-01;Parent=gene:Os01g0100400;biotype=protein_coding;transcript_id=Os01t0100400-01 +1 irgsp exon 12721 13813 . + . ID=Os01t0100400-01.exon1;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100400-01.exon1;rank=1 +1 irgsp exon 13906 14271 . + . ID=Os01t0100400-01.exon2;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100400-01.exon2;rank=2 +1 irgsp exon 14359 14437 . + . ID=Os01t0100400-01.exon3;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100400-01.exon3;rank=3 +1 irgsp exon 14969 15171 . + . ID=Os01t0100400-01.exon4;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100400-01.exon4;rank=4 +1 irgsp exon 15266 15685 . + . ID=Os01t0100400-01.exon5;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp CDS 12774 13813 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 13906 14271 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 14359 14437 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 14969 15171 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 15266 15359 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp five_prime_UTR 12721 12773 . + . ID=agat-five_prime_utr-4;Parent=transcript:Os01t0100400-01 +1 irgsp intron 13814 13905 . + . ID=intron_added-14;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp intron 14272 14358 . + . ID=intron_added-15;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp intron 14438 14968 . + . ID=intron_added-16;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp intron 15172 15265 . + . ID=intron_added-17;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp three_prime_UTR 15360 15685 . + . ID=agat-three_prime_utr-4;Parent=transcript:Os01t0100400-01 +1 irgsp gene 12808 13978 . - . ID=gene:Os01g0100466;biotype=protein_coding;description=Hypothetical protein. (Os01t0100466-00);gene_id=Os01g0100466;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12808 13978 . - . ID=transcript:Os01t0100466-00;Parent=gene:Os01g0100466;biotype=protein_coding;transcript_id=Os01t0100466-00 +1 irgsp exon 12808 13782 . - . ID=Os01t0100466-00.exon2;Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon2;rank=2 +1 irgsp exon 13880 13978 . - . ID=Os01t0100466-00.exon1;Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon1;rank=1 +1 irgsp CDS 12869 13102 . - 0 ID=CDS:Os01t0100466-00;Parent=transcript:Os01t0100466-00;protein_id=Os01t0100466-00 +1 irgsp five_prime_UTR 13103 13782 . - . ID=agat-five_prime_utr-5;Parent=transcript:Os01t0100466-00 +1 irgsp five_prime_UTR 13880 13978 . - . ID=agat-five_prime_utr-6;Parent=transcript:Os01t0100466-00 +1 irgsp intron 13783 13879 . - . ID=intron_added-18;Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon1;rank=1 +1 irgsp three_prime_UTR 12808 12868 . - . ID=agat-three_prime_utr-5;Parent=transcript:Os01t0100466-00 +1 irgsp gene 16399 20144 . + . ID=gene:Os01g0100500;biotype=protein_coding;description=Immunoglobulin-like domain containing protein. (Os01t0100500-01);gene_id=Os01g0100500;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 16399 20144 . + . ID=transcript:Os01t0100500-01;Parent=gene:Os01g0100500;biotype=protein_coding;transcript_id=Os01t0100500-01 +1 irgsp exon 16399 16976 . + . ID=Os01t0100500-01.exon1;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100500-01.exon1;rank=1 +1 irgsp exon 17383 17474 . + . ID=Os01t0100500-01.exon2;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100500-01.exon2;rank=2 +1 irgsp exon 17558 18258 . + . ID=Os01t0100500-01.exon3;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100500-01.exon3;rank=3 +1 irgsp exon 18501 18571 . + . ID=Os01t0100500-01.exon4;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100500-01.exon4;rank=4 +1 irgsp exon 18968 19057 . + . ID=Os01t0100500-01.exon5;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon5;rank=5 +1 irgsp exon 19142 19321 . + . ID=Os01t0100500-01.exon6;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon6;rank=6 +1 irgsp exon 19531 19629 . + . ID=Os01t0100500-01.exon7;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100500-01.exon7;rank=7 +1 irgsp exon 19734 20144 . + . ID=Os01t0100500-01.exon8;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp CDS 16599 16976 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 17383 17474 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 17558 18258 . + 1 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 18501 18571 . + 2 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 18968 19057 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 19142 19321 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 19531 19593 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp five_prime_UTR 16399 16598 . + . ID=agat-five_prime_utr-7;Parent=transcript:Os01t0100500-01 +1 irgsp intron 16977 17382 . + . ID=intron_added-19;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp intron 17475 17557 . + . ID=intron_added-20;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp intron 18259 18500 . + . ID=intron_added-21;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp intron 18572 18967 . + . ID=intron_added-22;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp intron 19058 19141 . + . ID=intron_added-23;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp intron 19322 19530 . + . ID=intron_added-24;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp intron 19630 19733 . + . ID=intron_added-25;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp three_prime_UTR 19594 19629 . + . ID=agat-three_prime_utr-6;Parent=transcript:Os01t0100500-01 +1 irgsp three_prime_UTR 19734 20144 . + . ID=agat-three_prime_utr-7;Parent=transcript:Os01t0100500-01 +1 irgsp gene 22841 26892 . + . ID=gene:Os01g0100600;biotype=protein_coding;description=Single-stranded nucleic acid binding R3H domain containing protein. (Os01t0100600-01);gene_id=Os01g0100600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 22841 26892 . + . ID=transcript:Os01t0100600-01;Parent=gene:Os01g0100600;biotype=protein_coding;transcript_id=Os01t0100600-01 +1 irgsp exon 22841 23281 . + . ID=Os01t0100600-01.exon1;Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100600-01.exon1;rank=1 +1 irgsp exon 23572 26892 . + . ID=Os01t0100600-01.exon2;Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon2;rank=2 +1 irgsp CDS 23232 23281 . + 0 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp five_prime_UTR 22841 23231 . + . ID=agat-five_prime_utr-8;Parent=transcript:Os01t0100600-01 +1 irgsp intron 23282 23571 . + . ID=intron_added-26;Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon2;rank=2 +1 AGAT three_prime_UTR 23572 26892 . + . ID=agat-three_prime_utr-8;Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100600-01.exon1;rank=1 From ebbc0d45eed4983d1184595420b5940026c2fcc9 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Sat, 26 Oct 2024 20:27:23 +0200 Subject: [PATCH 08/15] Add agat sp filter feature from kill list (#105) * add help * add config * add run script * add test data and expected output + script to fetch them * update config: kill_list as Inputs * all fetch kill_list.txt * add tests * update changelog * run script: fixe `verbose` usage * Update src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * Update src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * Update src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * Update src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * Update src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * update --config description * add requirements * format the description of --type * update --config description * update formatting --type description * add mutliple to --type * create temporary directory and clean up on exit * convert par_type to comma separated list * add set -e * fix create temporary directory * add set -eo pipefail to script and test files * fix create temporary directory * fix typo * cleanup changelog * cleanup changelog * Minor chanegs to config * reduce test data size --------- Co-authored-by: Robrecht Cannoodt Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Co-authored-by: Emma Rousseau --- CHANGELOG.md | 9 +- .../config.vsh.yaml | 105 +++++++++++++++ .../help.txt | 85 ++++++++++++ .../script.sh | 22 ++++ .../test.sh | 36 +++++ .../test_data/1_truncated.gff | 123 ++++++++++++++++++ .../test_data/kill_list.txt | 3 + .../test_data/script.sh | 13 ++ .../test_data/test_output.gff | 113 ++++++++++++++++ 9 files changed, 503 insertions(+), 6 deletions(-) create mode 100644 src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml create mode 100644 src/agat/agat_sp_filter_feature_from_kill_list/help.txt create mode 100644 src/agat/agat_sp_filter_feature_from_kill_list/script.sh create mode 100644 src/agat/agat_sp_filter_feature_from_kill_list/test.sh create mode 100644 src/agat/agat_sp_filter_feature_from_kill_list/test_data/1_truncated.gff create mode 100644 src/agat/agat_sp_filter_feature_from_kill_list/test_data/kill_list.txt create mode 100755 src/agat/agat_sp_filter_feature_from_kill_list/test_data/script.sh create mode 100644 src/agat/agat_sp_filter_feature_from_kill_list/test_data/test_output.gff diff --git a/CHANGELOG.md b/CHANGELOG.md index a8cfc83a..76a1e2ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,9 @@ * `agat`: - `agat/agat_convert_genscan2gff`: convert a genscan file into a GFF file (PR #100). - - `agat_sp_statistics`: provides exhaustive statistics of a gft/gff file (PR #107). - + - `agat/agat_sp_add_introns`: add intron features to gtf/gff file without intron features (PR #104). + - `agat/agat_sp_filter_feature_from_kill_list`: remove features in a GFF file based on a kill list (PR #105). + - `agat/agat_sp_statistics`: provides exhaustive statistics of a gft/gff file (PR #107). * `bd_rhapsody/bd_rhapsody_sequence_analysis`: BD Rhapsody Sequence Analysis CWL pipeline (PR #96). @@ -13,10 +14,6 @@ * `nanoplot`: Plotting tool for long read sequencing data and alignments (PR #95). -* `agat`: - - `agat/agat_sp_add_introns`: add intron features to gtf/gff file without intron features (PR #104). - - ## BREAKING CHANGES * `falco`: Fix a typo in the `--reverse_complement` argument (PR #157). diff --git a/src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml b/src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml new file mode 100644 index 00000000..0608ad4d --- /dev/null +++ b/src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml @@ -0,0 +1,105 @@ +name: agat_sp_filter_feature_from_kill_list +namespace: agat +description: | + Remove features based on a kill list. The default behaviour is to look at the features's ID. + If the feature has an ID (case insensitive) listed among the kill list it will be removed. + Removing a level1 or level2 feature will automatically remove all linked subfeatures, and + removing all children of a feature will automatically remove this feature too. +keywords: [gene annotations, filtering, gff] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_sp_filter_feature_from_kill_list.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +requirements: + - commands: [agat] +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --gff + alternatives: [-f, --ref, --reffile] + description: Input GFF3 file that will be read. + type: file + required: true + - name: --kill_list + alternatives: [--kl] + description: Text file containing the kill list. One value per line. + type: file + required: true + example: kill_list.txt + - name: Outputs + arguments: + - name: --output + alternatives: [-o, --out] + description: | + Path to the output GFF file that contains filtered features. + type: file + direction: output + required: true + - name: Arguments + arguments: + - name: --type + alternatives: [-p, -l] + description: | + Primary tag option, case insensitive, list. Allow to specify the feature types that + will be handled. + + You can specify a specific feature by giving its primary tag name (column 3) as: + + * cds + * Gene + * mRNA + + You can specify directly all the feature of a particular + level: + + * level2=mRNA,ncRNA,tRNA,etc + * level3=CDS,exon,UTR,etc. + + By default all features are taken into account. Fill the option with the value "all" will + have the same behaviour. + type: string + multiple: true + - name: --attribute + alternatives: [-a] + description: | + Attribute tag to specify the attribute to analyse. Case sensitive. Default: ID + type: string + example: ID + - name: --config + alternatives: [-c] + description: | + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. + The `--config` option gives you the possibility to use your own AGAT config file (located + elsewhere or named differently). + type: file + example: custom_agat_config.yaml + - name: --verbose + alternatives: [-v] + description: Verbose option for debugging purpose. + type: boolean_true +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_sp_filter_feature_from_kill_list/help.txt b/src/agat/agat_sp_filter_feature_from_kill_list/help.txt new file mode 100644 index 00000000..b0087916 --- /dev/null +++ b/src/agat/agat_sp_filter_feature_from_kill_list/help.txt @@ -0,0 +1,85 @@ +```sh +agat_sp_filter_feature_from_kill_list.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_sp_filter_feature_from_kill_list.pl + +Description: + The script aims to remove features based on a kill list. The default + behaviour is to look at the features's ID. If the feature has an ID + (case insensitive) listed among the kill list it will be removed. /!\ + Removing a level1 or level2 feature will automatically remove all linked + subfeatures, and removing all children of a feature will automatically + remove this feature too. + +Usage: + agat_sp_filter_feature_from_kill_list.pl --gff infile.gff --kill_list file.txt [ --output outfile ] + agat_sp_filter_feature_from_kill_list.pl --help + +Options: + -f, --reffile, --gff or -ref + Input GFF3 file that will be read + + -p, --type or -l + primary tag option, case insensitive, list. Allow to specied the + feature types that will be handled. You can specified a specific + feature by given its primary tag name (column 3) as: cds, Gene, + MrNa You can specify directly all the feature of a particular + level: level2=mRNA,ncRNA,tRNA,etc level3=CDS,exon,UTR,etc By + default all feature are taking into account. fill the option by + the value "all" will have the same behaviour. + + --kl or --kill_list + Kill list. One value per line. + + -a or --attribute + Attribute tag to specify the attribute to analyse. Case + sensitive. Default: ID + + -o or --output + Output GFF file. If no output file is specified, the output will + be written to STDOUT. + + -v Verbose option for debugging purpose. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + -h or --help + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/src/agat/agat_sp_filter_feature_from_kill_list/script.sh b/src/agat/agat_sp_filter_feature_from_kill_list/script.sh new file mode 100644 index 00000000..6779b857 --- /dev/null +++ b/src/agat/agat_sp_filter_feature_from_kill_list/script.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +# unset flags +[[ "$par_verbose" == "false" ]] && unset par_verbose + +# convert par_type to comma separated list +par_type=$(echo $par_type | tr ';' ',') + +# run agat_sp_filter_feature_from_kill_list +agat_sp_filter_feature_from_kill_list.pl \ + --gff "$par_gff" \ + --kill_list "$par_kill_list" \ + --output "$par_output" \ + ${par_type:+--type "${par_type}"} \ + ${par_attribute:+--attribute "${par_attribute}"} \ + ${par_config:+--config "${par_config}"} \ + ${par_verbose:+-v} diff --git a/src/agat/agat_sp_filter_feature_from_kill_list/test.sh b/src/agat/agat_sp_filter_feature_from_kill_list/test.sh new file mode 100644 index 00000000..d9d775d5 --- /dev/null +++ b/src/agat/agat_sp_filter_feature_from_kill_list/test.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" +} +#trap clean_up EXIT + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --gff "$test_dir/1_truncated.gff" \ + --kill_list "$test_dir/kill_list.txt" \ + --output "$TMPDIR/output.gff" + +echo ">> Checking output" +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$TMPDIR/output.gff" "$test_dir/test_output.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_sp_filter_feature_from_kill_list/test_data/1_truncated.gff b/src/agat/agat_sp_filter_feature_from_kill_list/test_data/1_truncated.gff new file mode 100644 index 00000000..e0fb6bce --- /dev/null +++ b/src/agat/agat_sp_filter_feature_from_kill_list/test_data/1_truncated.gff @@ -0,0 +1,123 @@ +##gff-version 3 +##sequence-region 1 1 43270923 +#!genome-build RAP-DB IRGSP-1.0 +#!genome-version IRGSP-1.0 +#!genome-date 2015-10 +#!genome-build-accession GCA_001433935.1 +1 RAP-DB chromosome 1 43270923 . . . ID=chromosome:1;Alias=Chr1,AP014957.1,NC_029256.1 +### +1 irgsp repeat_region 2000 2100 . + . ID=fakeRepeat1 +### +1 irgsp gene 2983 10815 . + . ID=gene:Os01g0100100;biotype=protein_coding;description=RabGAP/TBC domain containing protein. (Os01t0100100-01);gene_id=Os01g0100100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 2983 10815 . + . ID=transcript:Os01t0100100-01;Parent=gene:Os01g0100100;biotype=protein_coding;transcript_id=Os01t0100100-01 +1 irgsp exon 2983 3268 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon1;rank=1 +1 irgsp five_prime_UTR 2983 3268 . + . Parent=transcript:Os01t0100100-01 +1 irgsp five_prime_UTR 3354 3448 . + . Parent=transcript:Os01t0100100-01 +1 irgsp exon 3354 3616 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100100-01.exon2;rank=2 +1 irgsp CDS 3449 3616 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 4357 4455 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon3;rank=3 +1 irgsp CDS 4357 4455 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 5457 5560 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100100-01.exon4;rank=4 +1 irgsp CDS 5457 5560 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 7136 7944 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100100-01.exon5;rank=5 +1 irgsp CDS 7136 7944 . + 1 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8028 8150 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon6;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100100-01.exon6;rank=6 +1 irgsp CDS 8028 8150 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8232 8320 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon7;rank=7 +1 irgsp CDS 8232 8320 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8408 8608 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon8;rank=8 +1 irgsp CDS 8408 8608 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 9210 9615 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon9;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100100-01.exon9;rank=9 +1 irgsp CDS 9210 9615 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 10102 10187 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon10;rank=10 +1 irgsp CDS 10102 10187 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 10274 10297 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 10274 10430 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100100-01.exon11;rank=11 +1 irgsp three_prime_UTR 10298 10430 . + . Parent=transcript:Os01t0100100-01 +1 irgsp exon 10504 10815 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp three_prime_UTR 10504 10815 . + . Parent=transcript:Os01t0100100-01 +### +1 irgsp gene 11218 12435 . + . ID=gene:Os01g0100200;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0100200-01);gene_id=Os01g0100200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11218 12435 . + . ID=transcript:Os01t0100200-01;Parent=gene:Os01g0100200;biotype=protein_coding;transcript_id=Os01t0100200-01 +1 irgsp five_prime_UTR 11218 11797 . + . Parent=transcript:Os01t0100200-01 +1 irgsp exon 11218 12060 . + . Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100200-01.exon1;rank=1 +1 irgsp CDS 11798 12060 . + 0 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp CDS 12152 12317 . + 1 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp exon 12152 12435 . + . Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100200-01.exon2;rank=2 +1 irgsp three_prime_UTR 12318 12435 . + . Parent=transcript:Os01t0100200-01 +### +1 irgsp gene 11372 12284 . - . ID=gene:Os01g0100300;biotype=protein_coding;description=Cytochrome P450 domain containing protein. (Os01t0100300-00);gene_id=Os01g0100300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11372 12284 . - . ID=transcript:Os01t0100300-00;Parent=gene:Os01g0100300;biotype=protein_coding;transcript_id=Os01t0100300-00 +1 irgsp exon 11372 12042 . - . Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100300-00.exon2;rank=2 +1 irgsp CDS 11372 12042 . - 2 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +1 irgsp exon 12146 12284 . - . Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100300-00.exon1;rank=1 +1 irgsp CDS 12146 12284 . - 0 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +### +1 irgsp gene 12721 15685 . + . ID=gene:Os01g0100400;biotype=protein_coding;description=Similar to Pectinesterase-like protein. (Os01t0100400-01);gene_id=Os01g0100400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12721 15685 . + . ID=transcript:Os01t0100400-01;Parent=gene:Os01g0100400;biotype=protein_coding;transcript_id=Os01t0100400-01 +1 irgsp five_prime_UTR 12721 12773 . + . Parent=transcript:Os01t0100400-01 +1 irgsp exon 12721 13813 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100400-01.exon1;rank=1 +1 irgsp CDS 12774 13813 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 13906 14271 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100400-01.exon2;rank=2 +1 irgsp CDS 13906 14271 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 14359 14437 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100400-01.exon3;rank=3 +1 irgsp CDS 14359 14437 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 14969 15171 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100400-01.exon4;rank=4 +1 irgsp CDS 14969 15171 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 15266 15359 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 15266 15685 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp three_prime_UTR 15360 15685 . + . Parent=transcript:Os01t0100400-01 +### +1 irgsp gene 12808 13978 . - . ID=gene:Os01g0100466;biotype=protein_coding;description=Hypothetical protein. (Os01t0100466-00);gene_id=Os01g0100466;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12808 13978 . - . ID=transcript:Os01t0100466-00;Parent=gene:Os01g0100466;biotype=protein_coding;transcript_id=Os01t0100466-00 +1 irgsp three_prime_UTR 12808 12868 . - . Parent=transcript:Os01t0100466-00 +1 irgsp exon 12808 13782 . - . Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon2;rank=2 +1 irgsp CDS 12869 13102 . - 0 ID=CDS:Os01t0100466-00;Parent=transcript:Os01t0100466-00;protein_id=Os01t0100466-00 +1 irgsp five_prime_UTR 13103 13782 . - . Parent=transcript:Os01t0100466-00 +1 irgsp exon 13880 13978 . - . Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon1;rank=1 +1 irgsp five_prime_UTR 13880 13978 . - . Parent=transcript:Os01t0100466-00 +### +1 irgsp gene 16399 20144 . + . ID=gene:Os01g0100500;biotype=protein_coding;description=Immunoglobulin-like domain containing protein. (Os01t0100500-01);gene_id=Os01g0100500;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 16399 20144 . + . ID=transcript:Os01t0100500-01;Parent=gene:Os01g0100500;biotype=protein_coding;transcript_id=Os01t0100500-01 +1 irgsp five_prime_UTR 16399 16598 . + . Parent=transcript:Os01t0100500-01 +1 irgsp exon 16399 16976 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100500-01.exon1;rank=1 +1 irgsp CDS 16599 16976 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 17383 17474 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100500-01.exon2;rank=2 +1 irgsp CDS 17383 17474 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 17558 18258 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100500-01.exon3;rank=3 +1 irgsp CDS 17558 18258 . + 1 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 18501 18571 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100500-01.exon4;rank=4 +1 irgsp CDS 18501 18571 . + 2 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 18968 19057 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon5;rank=5 +1 irgsp CDS 18968 19057 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 19142 19321 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon6;rank=6 +1 irgsp CDS 19142 19321 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 19531 19593 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 19531 19629 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100500-01.exon7;rank=7 +1 irgsp three_prime_UTR 19594 19629 . + . Parent=transcript:Os01t0100500-01 +1 irgsp exon 19734 20144 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp three_prime_UTR 19734 20144 . + . Parent=transcript:Os01t0100500-01 +### +1 irgsp gene 22841 26892 . + . ID=gene:Os01g0100600;biotype=protein_coding;description=Single-stranded nucleic acid binding R3H domain containing protein. (Os01t0100600-01);gene_id=Os01g0100600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 22841 26892 . + . ID=transcript:Os01t0100600-01;Parent=gene:Os01g0100600;biotype=protein_coding;transcript_id=Os01t0100600-01 +1 irgsp five_prime_UTR 22841 23231 . + . Parent=transcript:Os01t0100600-01 +1 irgsp exon 22841 23281 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100600-01.exon1;rank=1 +1 irgsp CDS 23232 23281 . + 0 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 23572 23847 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon2;rank=2 +1 irgsp CDS 23572 23847 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 23962 24033 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon3;rank=3 +1 irgsp CDS 23962 24033 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 24492 24577 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon4;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100600-01.exon4;rank=4 +1 irgsp CDS 24492 24577 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 25445 25519 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100600-01.exon5;rank=5 +1 irgsp CDS 25445 25519 . + 2 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp CDS 25883 26391 . + 2 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 25883 26892 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon6;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0100600-01.exon6;rank=6 +1 irgsp three_prime_UTR 26392 26892 . + . Parent=transcript:Os01t0100600-01 +### +1 irgsp gene 25861 26424 . - . ID=gene:Os01g0100650;biotype=protein_coding;description=Hypothetical gene. (Os01t0100650-00);gene_id=Os01g0100650;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 25861 26424 . - . ID=transcript:Os01t0100650-00;Parent=gene:Os01g0100650;biotype=protein_coding;transcript_id=Os01t0100650-00 +1 irgsp three_prime_UTR 25861 26039 . - . Parent=transcript:Os01t0100650-00 +1 irgsp exon 25861 26424 . - . Parent=transcript:Os01t0100650-00;Name=Os01t0100650-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100650-00.exon1;rank=1 +1 irgsp CDS 26040 26423 . - 0 ID=CDS:Os01t0100650-00;Parent=transcript:Os01t0100650-00;protein_id=Os01t0100650-00 +1 irgsp five_prime_UTR 26424 26424 . - . Parent=transcript:Os01t0100650-00 diff --git a/src/agat/agat_sp_filter_feature_from_kill_list/test_data/kill_list.txt b/src/agat/agat_sp_filter_feature_from_kill_list/test_data/kill_list.txt new file mode 100644 index 00000000..a9d72f89 --- /dev/null +++ b/src/agat/agat_sp_filter_feature_from_kill_list/test_data/kill_list.txt @@ -0,0 +1,3 @@ +gene:Os01g0100700 +CDS:Os01t0100650-00 +transcript:Os01t0102700-01 diff --git a/src/agat/agat_sp_filter_feature_from_kill_list/test_data/script.sh b/src/agat/agat_sp_filter_feature_from_kill_list/test_data/script.sh new file mode 100755 index 00000000..6f9d1584 --- /dev/null +++ b/src/agat/agat_sp_filter_feature_from_kill_list/test_data/script.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/1.gff src/agat/agat_sp_filter_feature_from_kill_list/test_data +cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_filter_feature_from_kill_list_1.gff src/agat/agat_sp_filter_feature_from_kill_list/test_data +cp -r /tmp/agat_source/t/scripts_output/in/kill_list.txt src/agat/agat_sp_filter_feature_from_kill_list/test_data + +head -n 123 src/agat/agat_sp_filter_feature_from_kill_list/test_data/1.gff > src/agat/agat_sp_filter_feature_from_kill_list/test_data/1_truncated.gff \ No newline at end of file diff --git a/src/agat/agat_sp_filter_feature_from_kill_list/test_data/test_output.gff b/src/agat/agat_sp_filter_feature_from_kill_list/test_data/test_output.gff new file mode 100644 index 00000000..47838fe7 --- /dev/null +++ b/src/agat/agat_sp_filter_feature_from_kill_list/test_data/test_output.gff @@ -0,0 +1,113 @@ +##gff-version 3 +##sequence-region 1 1 43270923 +#!genome-build RAP-DB IRGSP-1.0 +#!genome-version IRGSP-1.0 +#!genome-date 2015-10 +#!genome-build-accession GCA_001433935.1 +1 RAP-DB chromosome 1 43270923 . . . ID=chromosome:1;Alias=Chr1,AP014957.1,NC_029256.1 +1 irgsp repeat_region 2000 2100 . + . ID=fakeRepeat1 +1 irgsp gene 2983 10815 . + . ID=gene:Os01g0100100;biotype=protein_coding;description=RabGAP/TBC domain containing protein. (Os01t0100100-01);gene_id=Os01g0100100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 2983 10815 . + . ID=transcript:Os01t0100100-01;Parent=gene:Os01g0100100;biotype=protein_coding;transcript_id=Os01t0100100-01 +1 irgsp exon 2983 3268 . + . ID=Os01t0100100-01.exon1;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon1;rank=1 +1 irgsp exon 3354 3616 . + . ID=Os01t0100100-01.exon2;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100100-01.exon2;rank=2 +1 irgsp exon 4357 4455 . + . ID=Os01t0100100-01.exon3;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon3;rank=3 +1 irgsp exon 5457 5560 . + . ID=Os01t0100100-01.exon4;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100100-01.exon4;rank=4 +1 irgsp exon 7136 7944 . + . ID=Os01t0100100-01.exon5;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100100-01.exon5;rank=5 +1 irgsp exon 8028 8150 . + . ID=Os01t0100100-01.exon6;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon6;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100100-01.exon6;rank=6 +1 irgsp exon 8232 8320 . + . ID=Os01t0100100-01.exon7;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon7;rank=7 +1 irgsp exon 8408 8608 . + . ID=Os01t0100100-01.exon8;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon8;rank=8 +1 irgsp exon 9210 9615 . + . ID=Os01t0100100-01.exon9;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon9;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100100-01.exon9;rank=9 +1 irgsp exon 10102 10187 . + . ID=Os01t0100100-01.exon10;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon10;rank=10 +1 irgsp exon 10274 10430 . + . ID=Os01t0100100-01.exon11;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100100-01.exon11;rank=11 +1 irgsp exon 10504 10815 . + . ID=Os01t0100100-01.exon12;Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp CDS 3449 3616 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 4357 4455 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 5457 5560 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 7136 7944 . + 1 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 8028 8150 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 8232 8320 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 8408 8608 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 9210 9615 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 10102 10187 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 10274 10297 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp five_prime_UTR 2983 3268 . + . ID=agat-five_prime_utr-1;Parent=transcript:Os01t0100100-01 +1 irgsp five_prime_UTR 3354 3448 . + . ID=agat-five_prime_utr-2;Parent=transcript:Os01t0100100-01 +1 irgsp three_prime_UTR 10298 10430 . + . ID=agat-three_prime_utr-1;Parent=transcript:Os01t0100100-01 +1 irgsp three_prime_UTR 10504 10815 . + . ID=agat-three_prime_utr-2;Parent=transcript:Os01t0100100-01 +1 irgsp gene 11218 12435 . + . ID=gene:Os01g0100200;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0100200-01);gene_id=Os01g0100200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11218 12435 . + . ID=transcript:Os01t0100200-01;Parent=gene:Os01g0100200;biotype=protein_coding;transcript_id=Os01t0100200-01 +1 irgsp exon 11218 12060 . + . ID=Os01t0100200-01.exon1;Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100200-01.exon1;rank=1 +1 irgsp exon 12152 12435 . + . ID=Os01t0100200-01.exon2;Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100200-01.exon2;rank=2 +1 irgsp CDS 11798 12060 . + 0 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp CDS 12152 12317 . + 1 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp five_prime_UTR 11218 11797 . + . ID=agat-five_prime_utr-3;Parent=transcript:Os01t0100200-01 +1 irgsp three_prime_UTR 12318 12435 . + . ID=agat-three_prime_utr-3;Parent=transcript:Os01t0100200-01 +1 irgsp gene 11372 12284 . - . ID=gene:Os01g0100300;biotype=protein_coding;description=Cytochrome P450 domain containing protein. (Os01t0100300-00);gene_id=Os01g0100300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11372 12284 . - . ID=transcript:Os01t0100300-00;Parent=gene:Os01g0100300;biotype=protein_coding;transcript_id=Os01t0100300-00 +1 irgsp exon 11372 12042 . - . ID=Os01t0100300-00.exon2;Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100300-00.exon2;rank=2 +1 irgsp exon 12146 12284 . - . ID=Os01t0100300-00.exon1;Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100300-00.exon1;rank=1 +1 irgsp CDS 11372 12042 . - 2 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +1 irgsp CDS 12146 12284 . - 0 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +1 irgsp gene 12721 15685 . + . ID=gene:Os01g0100400;biotype=protein_coding;description=Similar to Pectinesterase-like protein. (Os01t0100400-01);gene_id=Os01g0100400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12721 15685 . + . ID=transcript:Os01t0100400-01;Parent=gene:Os01g0100400;biotype=protein_coding;transcript_id=Os01t0100400-01 +1 irgsp exon 12721 13813 . + . ID=Os01t0100400-01.exon1;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100400-01.exon1;rank=1 +1 irgsp exon 13906 14271 . + . ID=Os01t0100400-01.exon2;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100400-01.exon2;rank=2 +1 irgsp exon 14359 14437 . + . ID=Os01t0100400-01.exon3;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100400-01.exon3;rank=3 +1 irgsp exon 14969 15171 . + . ID=Os01t0100400-01.exon4;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100400-01.exon4;rank=4 +1 irgsp exon 15266 15685 . + . ID=Os01t0100400-01.exon5;Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp CDS 12774 13813 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 13906 14271 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 14359 14437 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 14969 15171 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 15266 15359 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp five_prime_UTR 12721 12773 . + . ID=agat-five_prime_utr-4;Parent=transcript:Os01t0100400-01 +1 irgsp three_prime_UTR 15360 15685 . + . ID=agat-three_prime_utr-4;Parent=transcript:Os01t0100400-01 +1 irgsp gene 12808 13978 . - . ID=gene:Os01g0100466;biotype=protein_coding;description=Hypothetical protein. (Os01t0100466-00);gene_id=Os01g0100466;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12808 13978 . - . ID=transcript:Os01t0100466-00;Parent=gene:Os01g0100466;biotype=protein_coding;transcript_id=Os01t0100466-00 +1 irgsp exon 12808 13782 . - . ID=Os01t0100466-00.exon2;Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon2;rank=2 +1 irgsp exon 13880 13978 . - . ID=Os01t0100466-00.exon1;Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon1;rank=1 +1 irgsp CDS 12869 13102 . - 0 ID=CDS:Os01t0100466-00;Parent=transcript:Os01t0100466-00;protein_id=Os01t0100466-00 +1 irgsp five_prime_UTR 13103 13782 . - . ID=agat-five_prime_utr-5;Parent=transcript:Os01t0100466-00 +1 irgsp five_prime_UTR 13880 13978 . - . ID=agat-five_prime_utr-6;Parent=transcript:Os01t0100466-00 +1 irgsp three_prime_UTR 12808 12868 . - . ID=agat-three_prime_utr-5;Parent=transcript:Os01t0100466-00 +1 irgsp gene 16399 20144 . + . ID=gene:Os01g0100500;biotype=protein_coding;description=Immunoglobulin-like domain containing protein. (Os01t0100500-01);gene_id=Os01g0100500;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 16399 20144 . + . ID=transcript:Os01t0100500-01;Parent=gene:Os01g0100500;biotype=protein_coding;transcript_id=Os01t0100500-01 +1 irgsp exon 16399 16976 . + . ID=Os01t0100500-01.exon1;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100500-01.exon1;rank=1 +1 irgsp exon 17383 17474 . + . ID=Os01t0100500-01.exon2;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100500-01.exon2;rank=2 +1 irgsp exon 17558 18258 . + . ID=Os01t0100500-01.exon3;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100500-01.exon3;rank=3 +1 irgsp exon 18501 18571 . + . ID=Os01t0100500-01.exon4;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100500-01.exon4;rank=4 +1 irgsp exon 18968 19057 . + . ID=Os01t0100500-01.exon5;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon5;rank=5 +1 irgsp exon 19142 19321 . + . ID=Os01t0100500-01.exon6;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon6;rank=6 +1 irgsp exon 19531 19629 . + . ID=Os01t0100500-01.exon7;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100500-01.exon7;rank=7 +1 irgsp exon 19734 20144 . + . ID=Os01t0100500-01.exon8;Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp CDS 16599 16976 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 17383 17474 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 17558 18258 . + 1 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 18501 18571 . + 2 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 18968 19057 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 19142 19321 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 19531 19593 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp five_prime_UTR 16399 16598 . + . ID=agat-five_prime_utr-7;Parent=transcript:Os01t0100500-01 +1 irgsp three_prime_UTR 19594 19629 . + . ID=agat-three_prime_utr-6;Parent=transcript:Os01t0100500-01 +1 irgsp three_prime_UTR 19734 20144 . + . ID=agat-three_prime_utr-7;Parent=transcript:Os01t0100500-01 +1 irgsp gene 22841 26892 . + . ID=gene:Os01g0100600;biotype=protein_coding;description=Single-stranded nucleic acid binding R3H domain containing protein. (Os01t0100600-01);gene_id=Os01g0100600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 22841 26892 . + . ID=transcript:Os01t0100600-01;Parent=gene:Os01g0100600;biotype=protein_coding;transcript_id=Os01t0100600-01 +1 irgsp exon 22841 23281 . + . ID=Os01t0100600-01.exon1;Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100600-01.exon1;rank=1 +1 irgsp exon 23572 23847 . + . ID=Os01t0100600-01.exon2;Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon2;rank=2 +1 irgsp exon 23962 24033 . + . ID=Os01t0100600-01.exon3;Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon3;rank=3 +1 irgsp exon 24492 24577 . + . ID=Os01t0100600-01.exon4;Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon4;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100600-01.exon4;rank=4 +1 irgsp exon 25445 25519 . + . ID=Os01t0100600-01.exon5;Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100600-01.exon5;rank=5 +1 irgsp exon 25883 26892 . + . ID=Os01t0100600-01.exon6;Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon6;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0100600-01.exon6;rank=6 +1 irgsp CDS 23232 23281 . + 0 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp CDS 23572 23847 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp CDS 23962 24033 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp CDS 24492 24577 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp CDS 25445 25519 . + 2 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp CDS 25883 26391 . + 2 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp five_prime_UTR 22841 23231 . + . ID=agat-five_prime_utr-8;Parent=transcript:Os01t0100600-01 +1 irgsp three_prime_UTR 26392 26892 . + . ID=agat-three_prime_utr-8;Parent=transcript:Os01t0100600-01 +1 irgsp gene 25861 26424 . - . ID=gene:Os01g0100650;biotype=protein_coding;description=Hypothetical gene. (Os01t0100650-00);gene_id=Os01g0100650;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 25861 26424 . - . ID=transcript:Os01t0100650-00;Parent=gene:Os01g0100650;biotype=protein_coding;transcript_id=Os01t0100650-00 +1 irgsp exon 25861 26424 . - . ID=Os01t0100650-00.exon1;Parent=transcript:Os01t0100650-00;Name=Os01t0100650-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100650-00.exon1;rank=1 +1 irgsp five_prime_UTR 26424 26424 . - . ID=agat-five_prime_utr-9;Parent=transcript:Os01t0100650-00 +1 irgsp three_prime_UTR 25861 26039 . - . ID=agat-three_prime_utr-9;Parent=transcript:Os01t0100650-00 From 11118fb144e22622c65d075d22febb67c40f9a94 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Sat, 26 Oct 2024 20:28:05 +0200 Subject: [PATCH 09/15] Add agat sp merge annotations (#106) * add help * add config * add test data and expected output + srcipt to fetch them * add run script and handle multiple inputs * add test * update changelog * fix typo * add second test * Update src/agat/agat_sp_merge_annotations/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * Update src/agat/agat_sp_merge_annotations/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * Update src/agat/agat_sp_merge_annotations/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * Update src/agat/agat_sp_merge_annotations/config.vsh.yaml Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> * update --config description * remove unset IFS * add temporary directory and cleanup on exit * update clean up on exit function * add set -eo pipefail to test and script * fix create temporary directory * cleanup changelog * cleanup changelog * Minor formatting changes --------- Co-authored-by: Robrecht Cannoodt Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Co-authored-by: Emma Rousseau --- CHANGELOG.md | 1 + .../agat_sp_merge_annotations/config.vsh.yaml | 67 +++++++++++++++++++ src/agat/agat_sp_merge_annotations/help.txt | 64 ++++++++++++++++++ src/agat/agat_sp_merge_annotations/script.sh | 19 ++++++ src/agat/agat_sp_merge_annotations/test.sh | 56 ++++++++++++++++ .../test_data/agat_sp_merge_annotations_1.gff | 13 ++++ .../test_data/agat_sp_merge_annotations_2.gff | 3 + .../test_data/file1.gff | 14 ++++ .../test_data/file2.gff | 12 ++++ .../test_data/fileA.gff | 2 + .../test_data/fileB.gff | 2 + .../test_data/script.sh | 15 +++++ 12 files changed, 268 insertions(+) create mode 100644 src/agat/agat_sp_merge_annotations/config.vsh.yaml create mode 100644 src/agat/agat_sp_merge_annotations/help.txt create mode 100644 src/agat/agat_sp_merge_annotations/script.sh create mode 100644 src/agat/agat_sp_merge_annotations/test.sh create mode 100644 src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_1.gff create mode 100644 src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_2.gff create mode 100644 src/agat/agat_sp_merge_annotations/test_data/file1.gff create mode 100644 src/agat/agat_sp_merge_annotations/test_data/file2.gff create mode 100644 src/agat/agat_sp_merge_annotations/test_data/fileA.gff create mode 100644 src/agat/agat_sp_merge_annotations/test_data/fileB.gff create mode 100755 src/agat/agat_sp_merge_annotations/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 76a1e2ec..420a3d39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - `agat/agat_convert_genscan2gff`: convert a genscan file into a GFF file (PR #100). - `agat/agat_sp_add_introns`: add intron features to gtf/gff file without intron features (PR #104). - `agat/agat_sp_filter_feature_from_kill_list`: remove features in a GFF file based on a kill list (PR #105). + - `agat/agat_sp_merge_annotations`: merge different gff annotation files in one (PR #106). - `agat/agat_sp_statistics`: provides exhaustive statistics of a gft/gff file (PR #107). * `bd_rhapsody/bd_rhapsody_sequence_analysis`: BD Rhapsody Sequence Analysis CWL pipeline (PR #96). diff --git a/src/agat/agat_sp_merge_annotations/config.vsh.yaml b/src/agat/agat_sp_merge_annotations/config.vsh.yaml new file mode 100644 index 00000000..bc47921a --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/config.vsh.yaml @@ -0,0 +1,67 @@ +name: agat_sp_merge_annotations +namespace: agat +description: | + Merge different gff annotation files into one. It uses the AGAT parser that takes care of + duplicated names and fixes other oddities met in those files. +keywords: [gene annotations, merge, gff] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_sp_merge_annotations.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +requirements: + commands: [agat] +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] +argument_groups: + - name: Inputs + arguments: + - name: --gff + alternatives: [-f] + description: | + Input GTF/GFF file(s). + type: file + multiple: true + required: true + example: input1.gff;input2.gff + - name: Outputs + arguments: + - name: --output + alternatives: [-o, --out] + description: Output gff3 file where the gene incriminated will be writen. + type: file + direction: output + required: true + example: output.gff + - name: Arguments + arguments: + - name: --config + alternatives: [-c] + description: | + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. + The `--config` option gives you the possibility to use your own AGAT config file (located + elsewhere or named differently). + type: file + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_sp_merge_annotations/help.txt b/src/agat/agat_sp_merge_annotations/help.txt new file mode 100644 index 00000000..2a17e7e4 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/help.txt @@ -0,0 +1,64 @@ +```sh +agat_sp_merge_annotations.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_sp_merge_annotations.pl + +Description: + This script merge different gff annotation files in one. It uses the + AGAT parser that takes care of duplicated names and fixes other oddities + met in those files. + +Usage: + agat_sp_merge_annotations.pl --gff infile1 --gff infile2 --out outFile + agat_sp_merge_annotations.pl --help + +Options: + --gff or -f + Input GTF/GFF file(s). You can specify as much file you want + like so: -f file1 -f file2 -f file3 + + --out, --output or -o + Output gff3 file where the gene incriminated will be write. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + --help or -h + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md diff --git a/src/agat/agat_sp_merge_annotations/script.sh b/src/agat/agat_sp_merge_annotations/script.sh new file mode 100644 index 00000000..5703745a --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/script.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +# Convert a list of file names to multiple -gff arguments +input_files="" +IFS=";" read -ra file_names <<< "$par_gff" +for file in "${file_names[@]}"; do + input_files+="--gff $file " +done + +# run agat_sp_merge_annotations +agat_sp_merge_annotations.pl \ + $input_files \ + -o "$par_output" \ + ${par_config:+--config "${par_config}"} diff --git a/src/agat/agat_sp_merge_annotations/test.sh b/src/agat/agat_sp_merge_annotations/test.sh new file mode 100644 index 00000000..7b882717 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" +} +trap clean_up EXIT + +echo "> Run $meta_name with test data 1" +"$meta_executable" \ + --gff "$test_dir/file1.gff;$test_dir/file2.gff" \ + --output "$TMPDIR/output.gff" + +echo ">> Checking output" +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$TMPDIR/output.gff" "$test_dir/agat_sp_merge_annotations_1.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo ">> cleanup" +rm -rf "$TMPDIR/output.gff" + +echo "> Run $meta_name with test data 2" +"$meta_executable" \ + --gff "$test_dir/fileA.gff;$test_dir/fileB.gff" \ + --output "$TMPDIR/output.gff" + +echo ">> Checking output" +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$TMPDIR/output.gff" "$test_dir/agat_sp_merge_annotations_2.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_1.gff b/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_1.gff new file mode 100644 index 00000000..5f68f1f3 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_1.gff @@ -0,0 +1,13 @@ +##gff-version 3 +chr10 BestRefSeq gene 123237824 123357992 . - . ID=gene-FGFR2;ontology=G0222 +chr10 BestRefSeq mRNA 123237824 123357992 . - . ID=rna-NM_022970.3;Parent=gene-FGFR2;ontology=G0222;merged_ID=IDmodified-mrna-1;merged_Ontology=G0333;merged_Parent=IDmodified-gene-1 +chr10 BestRefSeq exon 123237824 123239535 . - . ID=exon-NM_022970.3-18;Parent=rna-NM_022970.3 +chr10 BestRefSeq exon 123243212 123243317 . - . ID=exon-NM_022970.3-17;Parent=rna-NM_022970.3 +chr10 BestRefSeq exon 123353223 123353481 . - . ID=exon-NM_022970.3-2;Parent=rna-NM_022970.3 +chr10 BestRefSeq exon 123357476 123357992 . - . ID=exon-NM_022970.3-1;Parent=rna-NM_022970.3 +chr10 BestRefSeq CDS 123239371 123239535 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3 +chr10 BestRefSeq CDS 123243212 123243317 . - 1 ID=cds-NP_075259.4;Parent=rna-NM_022970.3 +chr10 BestRefSeq CDS 123353223 123353331 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3 +chr10 BestRefSeq five_prime_UTR 123353332 123353481 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3 +chr10 BestRefSeq five_prime_UTR 123357476 123357992 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3 +chr10 BestRefSeq three_prime_UTR 123237824 123239370 . - . ID=agat-three_prime_utr-54427;Parent=rna-NM_022970.3 diff --git a/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_2.gff b/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_2.gff new file mode 100644 index 00000000..1c3846b2 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/agat_sp_merge_annotations_2.gff @@ -0,0 +1,3 @@ +##gff-version 3 +chr1 AUGUSTUS gene 1000424 1039237 . + . ID=A +chr1 AUGUSTUS mRNA 1000424 1039237 . + . ID=A.t1;Parent=A;merged_ID=B.t1;merged_Parent=B diff --git a/src/agat/agat_sp_merge_annotations/test_data/file1.gff b/src/agat/agat_sp_merge_annotations/test_data/file1.gff new file mode 100644 index 00000000..d822ebfa --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/file1.gff @@ -0,0 +1,14 @@ +chr10 BestRefSeq gene 123237824 123357992 . - . ID=gene-FGFR2;Ontology=G0222; +chr10 BestRefSeq mRNA 123237824 123357992 . - . ID=rna-NM_022970.3;Parent=gene-FGFR2;Ontology=G0222; +chr10 BestRefSeq exon 123237824 123239535 . - . ID=exon-NM_022970.3-18;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123243212 123243317 . - . ID=exon-NM_022970.3-17;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123353223 123353481 . - . ID=exon-NM_022970.3-2;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123357476 123357992 . - . ID=exon-NM_022970.3-1;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123239371 123239535 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123243212 123243317 . - 1 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123353223 123353331 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq five_prime_UTR 123353332 123353481 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3; +chr10 BestRefSeq five_prime_UTR 123357476 123357992 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3; +chr10 BestRefSeq three_prime_UTR 123237824 123239370 . - . ID=agat-three_prime_utr-54427;Parent=rna-NM_022970.3; + + \ No newline at end of file diff --git a/src/agat/agat_sp_merge_annotations/test_data/file2.gff b/src/agat/agat_sp_merge_annotations/test_data/file2.gff new file mode 100644 index 00000000..f072e1b3 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/file2.gff @@ -0,0 +1,12 @@ +chr10 BestRefSeq gene 123237824 123357992 . - . ID=gene-FGFR2;Ontology=G0222; +chr10 BestRefSeq mRNA 123237824 123357992 . - . ID=rna-NM_022970.3;Parent=gene-FGFR2;Ontology=G0333; +chr10 BestRefSeq exon 123237824 123239535 . - . ID=exon-NM_022970.3-18;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123243212 123243317 . - . ID=exon-NM_022970.3-17;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123353223 123353481 . - . ID=exon-NM_022970.3-2;Parent=rna-NM_022970.3; +chr10 BestRefSeq exon 123357476 123357992 . - . ID=exon-NM_022970.3-1;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123239371 123239535 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123243212 123243317 . - 1 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq CDS 123353223 123353331 . - 0 ID=cds-NP_075259.4;Parent=rna-NM_022970.3; +chr10 BestRefSeq five_prime_UTR 123353332 123353481 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3; +chr10 BestRefSeq five_prime_UTR 123357476 123357992 . - . ID=agat-five_prime_utr-54403;Parent=rna-NM_022970.3; +chr10 BestRefSeq three_prime_UTR 123237824 123239370 . - . ID=agat-three_prime_utr-54427;Parent=rna-NM_022970.3; \ No newline at end of file diff --git a/src/agat/agat_sp_merge_annotations/test_data/fileA.gff b/src/agat/agat_sp_merge_annotations/test_data/fileA.gff new file mode 100644 index 00000000..03b2d16d --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/fileA.gff @@ -0,0 +1,2 @@ +chr1 AUGUSTUS gene 1000424 1039237 . + . ID=A; +chr1 AUGUSTUS mRNA 1000424 1039237 . + . ID=A.t1;Parent=A; diff --git a/src/agat/agat_sp_merge_annotations/test_data/fileB.gff b/src/agat/agat_sp_merge_annotations/test_data/fileB.gff new file mode 100644 index 00000000..e796e5f0 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/fileB.gff @@ -0,0 +1,2 @@ +chr1 AUGUSTUS gene 1000424 1039237 . + . ID=B; +chr1 AUGUSTUS mRNA 1000424 1039237 . + . ID=B.t1;Parent=B; diff --git a/src/agat/agat_sp_merge_annotations/test_data/script.sh b/src/agat/agat_sp_merge_annotations/test_data/script.sh new file mode 100755 index 00000000..0d3acae7 --- /dev/null +++ b/src/agat/agat_sp_merge_annotations/test_data/script.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_merge_annotations/file1.gff src/agat/agat_sp_merge_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_merge_annotations/file2.gff src/agat/agat_sp_merge_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_merge_annotations_1.gff src/agat/agat_sp_merge_annotations/test_data + +cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_merge_annotations/fileA.gff src/agat/agat_sp_merge_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/in/agat_sp_merge_annotations/fileB.gff src/agat/agat_sp_merge_annotations/test_data +cp -r /tmp/agat_source/t/scripts_output/out/agat_sp_merge_annotations_2.gff src/agat/agat_sp_merge_annotations/test_data \ No newline at end of file From f96bd72421969e920cb24717df56bb5127d9bf52 Mon Sep 17 00:00:00 2001 From: Theodoro Gasperin Terra Camargo <98555209+tgaspe@users.noreply.github.com> Date: Sat, 26 Oct 2024 20:29:00 +0200 Subject: [PATCH 10/15] Bedtools bamtobed (#109) * adding back my work * adding more tests - fixing bug - more tests * Final test added * Update CHANGELOG.md * minor change - license name - help file * small changes on config * small changes * adding more links * Update script.sh * Adding $TMPDIR to test.sh --------- Co-authored-by: Emma Rousseau Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 4 + .../bedtools_bamtobed/config.vsh.yaml | 118 +++++++++++ src/bedtools/bedtools_bamtobed/help.txt | 43 ++++ src/bedtools/bedtools_bamtobed/script.sh | 39 ++++ src/bedtools/bedtools_bamtobed/test.sh | 183 ++++++++++++++++++ .../bedtools_bamtobed/test_data/example.bam | Bin 0 -> 334 bytes .../bedtools_bamtobed/test_data/example.sam | 3 + 7 files changed, 390 insertions(+) create mode 100644 src/bedtools/bedtools_bamtobed/config.vsh.yaml create mode 100644 src/bedtools/bedtools_bamtobed/help.txt create mode 100644 src/bedtools/bedtools_bamtobed/script.sh create mode 100644 src/bedtools/bedtools_bamtobed/test.sh create mode 100644 src/bedtools/bedtools_bamtobed/test_data/example.bam create mode 100644 src/bedtools/bedtools_bamtobed/test_data/example.sam diff --git a/CHANGELOG.md b/CHANGELOG.md index 420a3d39..dcc783c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,10 +11,14 @@ * `bd_rhapsody/bd_rhapsody_sequence_analysis`: BD Rhapsody Sequence Analysis CWL pipeline (PR #96). +* `bedtools`: + - `bedtools/bedtools_bamtobed`: Converts BAM alignments to BED6 or BEDPE format (PR #109). + * `rsem/rsem_calculate_expression`: Calculate expression levels (PR #93). * `nanoplot`: Plotting tool for long read sequencing data and alignments (PR #95). + ## BREAKING CHANGES * `falco`: Fix a typo in the `--reverse_complement` argument (PR #157). diff --git a/src/bedtools/bedtools_bamtobed/config.vsh.yaml b/src/bedtools/bedtools_bamtobed/config.vsh.yaml new file mode 100644 index 00000000..22ef8b44 --- /dev/null +++ b/src/bedtools/bedtools_bamtobed/config.vsh.yaml @@ -0,0 +1,118 @@ +name: bedtools_bamtobed +namespace: bedtools +description: Converts BAM alignments to BED6 or BEDPE format. +keywords: [Converts, BAM, BED, BED6, BEDPE] +links: + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/bamtobed.html + repository: https://github.com/arq5x/bedtools2 + homepage: https://bedtools.readthedocs.io/en/latest/# + issue_tracker: https://github.com/arq5x/bedtools2/issues +references: + doi: 10.1093/bioinformatics/btq033 +license: MIT +requirements: + commands: [bedtools] +authors: + - __merge__: /src/_authors/theodoro_gasperin.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --input + alternatives: -i + type: file + description: Input BAM file. + required: true + + - name: Outputs + arguments: + - name: --output + alternatives: -o + required: true + type: file + direction: output + description: Output BED file. + + - name: Options + arguments: + - name: --bedpe + type: boolean_true + description: | + Write BEDPE format. Requires BAM to be grouped or sorted by query. + + - name: --mate1 + type: boolean_true + description: | + When writing BEDPE (-bedpe) format, always report mate one as the first BEDPE "block". + + - name: --bed12 + type: boolean_true + description: | + Write "blocked" BED format (aka "BED12"). Forces -split. + See http://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1 + + - name: --split + type: boolean_true + description: | + Report "split" BAM alignments as separate BED entries. + Splits only on N CIGAR operations. + + - name: --splitD + type: boolean_true + description: | + Split alignments based on N and D CIGAR operators. + Forces -split. + + - name: --edit_distance + alternatives: -ed + type: boolean_true + description: | + Use BAM edit distance (NM tag) for BED score. + - Default for BED is to use mapping quality. + - Default for BEDPE is to use the minimum of + the two mapping qualities for the pair. + - When -ed is used with -bedpe, the total edit + distance from the two mates is reported. + + - name: --tag + type: string + description: | + Use other NUMERIC BAM alignment tag for BED score. + Default for BED is to use mapping quality. Disallowed with BEDPE output. + example: "SM" + + - name: --color + type: string + description: | + An R,G,B string for the color used with BED12 format. + Default is (255,0,0). + example: "250,250,250" + + - name: --cigar + type: boolean_true + description: | + Add the CIGAR string to the BED entry as a 7th column. + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: test_data + +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: [bedtools, procps] + - type: docker + run: | + echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/bedtools/bedtools_bamtobed/help.txt b/src/bedtools/bedtools_bamtobed/help.txt new file mode 100644 index 00000000..0cfc23a2 --- /dev/null +++ b/src/bedtools/bedtools_bamtobed/help.txt @@ -0,0 +1,43 @@ +```bash +bedtools bamtobed +``` + +Tool: bedtools bamtobed (aka bamToBed) +Version: v2.30.0 +Summary: Converts BAM alignments to BED6 or BEDPE format. + +Usage: bedtools bamtobed [OPTIONS] -i + +Options: + -bedpe Write BEDPE format. + - Requires BAM to be grouped or sorted by query. + + -mate1 When writing BEDPE (-bedpe) format, + always report mate one as the first BEDPE "block". + + -bed12 Write "blocked" BED format (aka "BED12"). Forces -split. + + http://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1 + + -split Report "split" BAM alignments as separate BED entries. + Splits only on N CIGAR operations. + + -splitD Split alignments based on N and D CIGAR operators. + Forces -split. + + -ed Use BAM edit distance (NM tag) for BED score. + - Default for BED is to use mapping quality. + - Default for BEDPE is to use the minimum of + the two mapping qualities for the pair. + - When -ed is used with -bedpe, the total edit + distance from the two mates is reported. + + -tag Use other NUMERIC BAM alignment tag for BED score. + - Default for BED is to use mapping quality. + Disallowed with BEDPE output. + + -color An R,G,B string for the color used with BED12 format. + Default is (255,0,0). + + -cigar Add the CIGAR string to the BED entry as a 7th column. + diff --git a/src/bedtools/bedtools_bamtobed/script.sh b/src/bedtools/bedtools_bamtobed/script.sh new file mode 100644 index 00000000..10c4cef4 --- /dev/null +++ b/src/bedtools/bedtools_bamtobed/script.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +set -eo pipefail + +# Unset parameters +unset_if_false=( + par_bedpe + par_mate1 + par_bed12 + par_split + par_splitD + par_edit_distance + par_tag + par_color + par_cigar +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done + +# Execute bedtools sort with the provided arguments +bedtools bamtobed \ + ${par_bedpe:+-bedpe} \ + ${par_mate1:+-mate1} \ + ${par_bed12:+-bed12} \ + ${par_split:+-split} \ + ${par_splitD:+-splitD} \ + ${par_edit_distance:+-ed} \ + ${par_tag:+-tag "$par_tag"} \ + ${par_cigar:+-cigar} \ + ${par_color:+-color "$par_color"} \ + -i "$par_input" \ + > "$par_output" + diff --git a/src/bedtools/bedtools_bamtobed/test.sh b/src/bedtools/bedtools_bamtobed/test.sh new file mode 100644 index 00000000..3ea8b59d --- /dev/null +++ b/src/bedtools/bedtools_bamtobed/test.sh @@ -0,0 +1,183 @@ +#!/bin/bash + +# exit on error +set -eo pipefail + +# directory of the bam file +test_data="$meta_resources_dir/test_data" + +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_identical_content() { + diff -a "$2" "$1" \ + || (echo "Files are not identical!" && exit 1) +} +############################################# + +echo "Creating Test Data..." +TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT + +# Generate expected files for comparison +printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t60\t+\nchr2:172936693-172938111\t428\t528\tmy_read/2\t60\t-\n" > "$TMPDIR/expected.bed" +printf "chr2:172936693-172938111\t128\t228\tchr2:172936693-172938111\t428\t528\tmy_read\t60\t+\t-\n" > "$TMPDIR/expected.bedpe" +printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t60\t+\t128\t228\t255,0,0\t1\t100\t0\nchr2:172936693-172938111\t428\t528\tmy_read/2\t60\t-\t428\t528\t255,0,0\t1\t100\t0\n" > "$TMPDIR/expected.bed12" +printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t0\t+\nchr2:172936693-172938111\t428\t528\tmy_read/2\t0\t-\n" > "$TMPDIR/expected_ed.bed" +printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t60\t+\t128\t228\t250,250,250\t1\t100\t0\nchr2:172936693-172938111\t428\t528\tmy_read/2\t60\t-\t428\t528\t250,250,250\t1\t100\t0\n" > "$TMPDIR/expected_color.bed12" +printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t60\t+\t100M\nchr2:172936693-172938111\t428\t528\tmy_read/2\t60\t-\t100M\n" > "$TMPDIR/expected_cigar.bed" +printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t85\t+\nchr2:172936693-172938111\t428\t528\tmy_read/2\t85\t-\n" > "$TMPDIR/expected_tag.bed" + + +# Test 1: +mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null + +echo "> Run bedtools bamtobed on BAM file" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --output "output.bed" \ + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../expected.bed" +echo "- test1 succeeded -" + +popd > /dev/null + +# Test 2: +mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null + +echo "> Run bedtools bamtobed on BAM file with -bedpe" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --output "output.bedpe" \ + --bedpe + +# checks +assert_file_exists "output.bedpe" +assert_file_not_empty "output.bedpe" +assert_identical_content "output.bedpe" "../expected.bedpe" +echo "- test2 succeeded -" + +popd > /dev/null + +# Test 3: +mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null + +echo "> Run bedtools bamtobed on BAM file with -bed12" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --output "output.bed12" \ + --bed12 + +# checks +assert_file_exists "output.bed12" +assert_file_not_empty "output.bed12" +assert_identical_content "output.bed12" "../expected.bed12" +echo "- test3 succeeded -" + +popd > /dev/null + +# Test 4: +mkdir "$TMPDIR/test4" && pushd "$TMPDIR/test4" > /dev/null + +echo "> Run bedtools bamtobed on BAM file with -ed" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --output "output_ed.bed" \ + --edit_distance + +# checks +assert_file_exists "output_ed.bed" +assert_file_not_empty "output_ed.bed" +assert_identical_content "output_ed.bed" "../expected_ed.bed" +echo "- test4 succeeded -" + +popd > /dev/null + +# Test 5: +mkdir "$TMPDIR/test5" && pushd "$TMPDIR/test5" > /dev/null + +echo "> Run bedtools bamtobed on BAM file with -color" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --output "output_color.bed12" \ + --bed12 \ + --color "250,250,250" \ + +# checks +assert_file_exists "output_color.bed12" +assert_file_not_empty "output_color.bed12" +assert_identical_content "output_color.bed12" "../expected_color.bed12" +echo "- test5 succeeded -" + +popd > /dev/null + +# Test 6: +mkdir "$TMPDIR/test6" && pushd "$TMPDIR/test6" > /dev/null + +echo "> Run bedtools bamtobed on BAM file with -cigar" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --output "output_cigar.bed" \ + --cigar + +# checks +assert_file_exists "output_cigar.bed" +assert_file_not_empty "output_cigar.bed" +assert_identical_content "output_cigar.bed" "../expected_cigar.bed" +echo "- test6 succeeded -" + +popd > /dev/null + +# Test 7: +mkdir "$TMPDIR/test7" && pushd "$TMPDIR/test7" > /dev/null + +echo "> Run bedtools bamtobed on BAM file with -tag" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --output "output_tag.bed" \ + --tag "XT" + +# checks +assert_file_exists "output_tag.bed" +assert_file_not_empty "output_tag.bed" +assert_identical_content "output_tag.bed" "../expected_tag.bed" +echo "- test7 succeeded -" + +popd > /dev/null + +# Test 8: +mkdir "$TMPDIR/test8" && pushd "$TMPDIR/test8" > /dev/null + +echo "> Run bedtools bamtobed on BAM file with other options" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --output "output.bed" \ + --bedpe \ + --mate1 \ + --split \ + --splitD \ + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../expected.bedpe" +echo "- test8 succeeded -" + +popd > /dev/null + +echo "---- All tests succeeded! ----" +exit 0 diff --git a/src/bedtools/bedtools_bamtobed/test_data/example.bam b/src/bedtools/bedtools_bamtobed/test_data/example.bam new file mode 100644 index 0000000000000000000000000000000000000000..ffc075ab83a83a98ed1edbf88b26cc27ad8946c6 GIT binary patch literal 334 zcmb2|=3rp}f&Xj_PR>jWAq>SuUsA6mBqS7Y@IB%Aw%O~PhS4S?6Z1_bX2zRMuCZ>` z;o;@Ato^fw$CpQUheTtRYNNz-r#8JXHa3Ry>s4lk0?m>~GxQF_-U<7&m>dP#pU;|5 z)~CHK)-&PMX8(zQnRkjz7tzu&Q_9lpm^;_nXXDZb**~)OH9hZA+GbYw!F2!1eU^u& z=6?J8db>^n+vnS58VqGOpQde!^LhT^FPno$sK1R;RVb&i_o5|>-LG;Kg??MHCx&;~ ziZww?R#r16X1LX_ZFYQZ=WBLl9Y-y@V*W$>-;Wo3eOwoN_@m-GsXhDI Date: Sat, 26 Oct 2024 20:39:23 +0200 Subject: [PATCH 11/15] Rseq bamstat (#155) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * initial commit * add test, test data, version, help * Update CHANGELOG.md * adjust argument names, reduce test data size --------- Co-authored-by: Robrecht Cannoodt Co-authored-by: Kai Waldrant --- CHANGELOG.md | 9 +-- src/rseqc/rseqc_bamstat/config.vsh.yaml | 59 ++++++++++++++++++ src/rseqc/rseqc_bamstat/help.txt | 18 ++++++ src/rseqc/rseqc_bamstat/script.sh | 9 +++ src/rseqc/rseqc_bamstat/test.sh | 49 +++++++++++++++ .../rseqc_bamstat/test_data/ref_output.txt | 22 +++++++ .../test_data/ref_output_mapq.txt | 22 +++++++ src/rseqc/rseqc_bamstat/test_data/sample.bam | Bin 0 -> 9240 bytes 8 files changed, 184 insertions(+), 4 deletions(-) create mode 100644 src/rseqc/rseqc_bamstat/config.vsh.yaml create mode 100644 src/rseqc/rseqc_bamstat/help.txt create mode 100644 src/rseqc/rseqc_bamstat/script.sh create mode 100644 src/rseqc/rseqc_bamstat/test.sh create mode 100644 src/rseqc/rseqc_bamstat/test_data/ref_output.txt create mode 100644 src/rseqc/rseqc_bamstat/test_data/ref_output_mapq.txt create mode 100644 src/rseqc/rseqc_bamstat/test_data/sample.bam diff --git a/CHANGELOG.md b/CHANGELOG.md index dcc783c9..5f720035 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,16 +16,17 @@ * `rsem/rsem_calculate_expression`: Calculate expression levels (PR #93). -* `nanoplot`: Plotting tool for long read sequencing data and alignments (PR #95). +* `rseqc`: + - `rseqc/bam_stat`: Generate statistics from a bam file (PR #155). +* `nanoplot`: Plotting tool for long read sequencing data and alignments (PR #95). -## BREAKING CHANGES +## BUG FIXES * `falco`: Fix a typo in the `--reverse_complement` argument (PR #157). -## BUG FIXES +* `cutadapt`: Fix the the non-functional `action` parameter (PR #161). -* `cutadapt`: fix the the non-functional `action` parameter (PR #161). ## MINOR CHANGES diff --git a/src/rseqc/rseqc_bamstat/config.vsh.yaml b/src/rseqc/rseqc_bamstat/config.vsh.yaml new file mode 100644 index 00000000..6d607e2f --- /dev/null +++ b/src/rseqc/rseqc_bamstat/config.vsh.yaml @@ -0,0 +1,59 @@ +name: rseqc_bamstat +namespace: rseqc +keywords: [ rnaseq, genomics ] +description: Generate statistics from a bam file. +links: + homepage: https://rseqc.sourceforge.net/ + documentation: https://rseqc.sourceforge.net/#bam-stat-py + issue_tracker: https://github.com/MonashBioinformaticsPlatform/RSeQC/issues + repository: https://github.com/MonashBioinformaticsPlatform/RSeQC +references: + doi: 10.1093/bioinformatics/bts356 +license: GPL-3.0 +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] + +argument_groups: +- name: "Input" + arguments: + - name: "--input_file" + alternatives: -i + type: file + required: true + description: Input alignment file in BAM or SAM format. + - name: "--mapq" + alternatives: -q + type: integer + example: 30 + description: | + Minimum mapping quality (phred scaled) to determine uniquely mapped reads. Default: '30'. + +- name: "Output" + arguments: + - name: "--output" + type: file + direction: output + description: Output file (txt) with mapping quality statistics. + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data + +engines: +- type: docker + image: python:3.10 + setup: + - type: python + packages: [ RSeQC ] + - type: docker + run: | + echo "RSeQC bam_stat.py: $(bam_stat.py --version | cut -d' ' -f2-)" > /var/software_versions.txt +runners: +- type: executable +- type: nextflow diff --git a/src/rseqc/rseqc_bamstat/help.txt b/src/rseqc/rseqc_bamstat/help.txt new file mode 100644 index 00000000..b4e9c1d9 --- /dev/null +++ b/src/rseqc/rseqc_bamstat/help.txt @@ -0,0 +1,18 @@ +``` +bam_stat.py -h +``` + +Usage: bam_stat.py [options] + +Summarizing mapping statistics of a BAM or SAM file. + + + +Options: + --version show program's version number and exit + -h, --help show this help message and exit + -i INPUT_FILE, --input-file=INPUT_FILE + Alignment file in BAM or SAM format. + -q MAP_QUAL, --mapq=MAP_QUAL + Minimum mapping quality (phred scaled) to determine + "uniquely mapped" reads. default=30 \ No newline at end of file diff --git a/src/rseqc/rseqc_bamstat/script.sh b/src/rseqc/rseqc_bamstat/script.sh new file mode 100644 index 00000000..32927bb6 --- /dev/null +++ b/src/rseqc/rseqc_bamstat/script.sh @@ -0,0 +1,9 @@ +#!/bin/bash + + +set -eo pipefail + +bam_stat.py \ + --input-file "${par_input_file}" \ + ${par_mapq:+--mapq "${par_mapq}"} \ +> $par_output diff --git a/src/rseqc/rseqc_bamstat/test.sh b/src/rseqc/rseqc_bamstat/test.sh new file mode 100644 index 00000000..f9180da8 --- /dev/null +++ b/src/rseqc/rseqc_bamstat/test.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# define input and output for script + +input_bam="sample.bam" +output_summary="mapping_quality.txt" + +# run executable and tests +echo "> Running $meta_functionality_name." + +"$meta_executable" \ + --input_file "$meta_resources_dir/test_data/$input_bam" \ + --output "$output_summary" + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Checking whether output is present" +[ ! -f "$output_summary" ] && echo "$output_summary file missing" && exit 1 +[ ! -s "$output_summary" ] && echo "$output_summary file is empty" && exit 1 + +echo ">> Checking whether output is correct" +diff "$meta_resources_dir/test_data/ref_output.txt" "$meta_resources_dir/$output_summary" || { echo "Output is not correct"; exit 1; } + +############################################################################# + +echo ">>> Test 2: Test with non-default mapping quality threshold" + +output_summary="mapping_quality_mapq_50.txt" + +# run executable and tests +echo "> Running $meta_functionality_name." + +"$meta_executable" \ + --input_file "$meta_resources_dir/test_data/$input_bam" \ + --output "$output_summary" \ + --mapq 50 + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Checking whether output is present" +[ ! -f "$output_summary" ] && echo "$output_summary file missing" && exit 1 +[ ! -s "$output_summary" ] && echo "$output_summary file is empty" && exit 1 + +echo ">> Checking whether output is correct" +diff "$meta_resources_dir/test_data/ref_output_mapq.txt" "$meta_resources_dir/$output_summary" || { echo "Output is not correct"; exit 1; } + +exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_bamstat/test_data/ref_output.txt b/src/rseqc/rseqc_bamstat/test_data/ref_output.txt new file mode 100644 index 00000000..6b939096 --- /dev/null +++ b/src/rseqc/rseqc_bamstat/test_data/ref_output.txt @@ -0,0 +1,22 @@ + +#================================================== +#All numbers are READ count +#================================================== + +Total records: 90 + +QC failed: 0 +Optical/PCR duplicate: 0 +Non primary hits 0 +Unmapped reads: 1 +mapq < mapq_cut (non-unique): 0 + +mapq >= mapq_cut (unique): 89 +Read-1: 45 +Read-2: 44 +Reads map to '+': 44 +Reads map to '-': 45 +Non-splice reads: 89 +Splice reads: 0 +Reads mapped in proper pairs: 88 +Proper-paired reads map to different chrom:0 diff --git a/src/rseqc/rseqc_bamstat/test_data/ref_output_mapq.txt b/src/rseqc/rseqc_bamstat/test_data/ref_output_mapq.txt new file mode 100644 index 00000000..be8af62f --- /dev/null +++ b/src/rseqc/rseqc_bamstat/test_data/ref_output_mapq.txt @@ -0,0 +1,22 @@ + +#================================================== +#All numbers are READ count +#================================================== + +Total records: 90 + +QC failed: 0 +Optical/PCR duplicate: 0 +Non primary hits 0 +Unmapped reads: 1 +mapq < mapq_cut (non-unique): 6 + +mapq >= mapq_cut (unique): 83 +Read-1: 42 +Read-2: 41 +Reads map to '+': 44 +Reads map to '-': 39 +Non-splice reads: 83 +Splice reads: 0 +Reads mapped in proper pairs: 83 +Proper-paired reads map to different chrom:0 diff --git a/src/rseqc/rseqc_bamstat/test_data/sample.bam b/src/rseqc/rseqc_bamstat/test_data/sample.bam new file mode 100644 index 0000000000000000000000000000000000000000..ed1e24333ba1df0efa75401d7928b790038b762a GIT binary patch literal 9240 zcmV+zBROL8JfAYa&X{eo2(a(4x#KL{xo6|RWh#{l`wJHF zG8Rb+UCXZ?!F*E?$@6n6fpc!H+qhDcO4CJy-SVtVlLK9pDosel z^VV|IVoooa6FAt@UQ4X!YKCDo!o4C#m$XQ}ed3qd%|$c%r+O@P;z%eEf)+$#Wrc{9c#{alYgOlCnfm%W=X~d! zs@iTVGOxdj-^rtIU|Z4C(Z?hEx*yk`!=JBv?6Je0qtnxa)9ZVuM@J{o+wO}ZDY@i{ zNHkM@r&wASRVrEE2`+k7w4%|h)x9peP8ChXi#{!iUbb3yW!rUiStqSbd7GxC&htF3 z;>y4B;a!~3IId*$CC@8+UmVjLb8AdD+xK*dhUq)HKgMktE@zqfNpyFPvAl{G=1+P@ ze^*9d@g76;ag5*%?tS0x*>AjW?<{)k^*^~A{m2{7w(6TV%dNlqj=%OVe*V9`_HVuG zEPCSIXD@xvd(NVtesBGzcfSwUo(piZSl*1@9F6!czWd$pE@Ao6yQ8QCD5BLY5oyAT zqUsCQr&^UtYnFC>Q&&nHT+67#+DzWaZ`&G(*hzGqLq!BMBbBZ`=r?h^xbp2OI8r_|Zq zJvceuG2BVgx=~$Q@)RVg%DR_HS7^m_+SFAlRKpZ6HLp{y`=m~osu`0>A)8cnoh&Pr zh(c8=6_Vxjialon>P)^Hm_`4s9M3aiLY@yJD{k{w^34BrS(aNYX`J5Y^t-A6<+np9 zBXH-{#2v!MVYvvn^Xjc1c3}R>0Or%tCmfg`zcY$%ZcV{FI6gf+iQWq^xk%EU>srXR zWObw4Le#yGDyg+D(j*aWQiz1{wCGx;S=rUB=S_z@ld5jpLKdPDysLOpZ0F`xEgXP` zWck9rH;gi-X3f=s?(H@+w}VF+Yz@D2e2MX0j)B;H+TWg=5p$DaZRKDzhMg_wQf|4N z(QS`aZJ&Iaic>kOwkmBQbO15)Y7vW|y;pQI(JhWimjNTfJeEp3VreIobJ`2aDrLI9ffTyGsjUq^S-S9fm zAgU~?nsvQOlDYw3bz^2%#b)w`AOw?hR@uUFGPnLZvs|Pv3r|%?uzUtXUipcCZ8q_< z-=5Cpx;sU~?#01E}U=vtXWw?#_mZNX9h?dnjZhy15ZDDu^1mpHZ{$~_>?>^e`}M_xPKxv ze6V=MU1Q|+!Q%JtvH~Oo^W@<0V9x}L8F=Lsyks4Csct%f57xX%@JDyUA!Qjjc=tT8 zMS{$IYeR<>bYM_y@9?2B_M|h9g5rn$u@oANz#((bO-!+1n!r(+`;2ajP0*n`2+n^L z!g&Yae8>iipLo>4`9J{YLtEc=(7hpm^DV*HTV{rSbb7eEAAJ`7ZDy=>k~A7IK}*%j zn)h|Ddf6A9s(Mx=Y6R@|y<*2F%=#^J=l1FIo!h4;bKbE5GMw|T&g}83b3Xr{{i2ZY zhLdB?Z&@?vJHt8ur@%zKC}^V5l>T@Y$E?}pba&T4O?5J93ZW|U*QBy)Qe7~YR+UD_ z!0;E&j~Zo7QG8)Ng;6~2-@PzSV?pn2cqZ|^L|b@)vkzc0@4D}2J!|g`Ci8*UI3KY6 zk}*1c#7!o_*hglG+u`xa-fr~Sd!wikoTVDt1_8KEWuj7Dl}gc^Y9+ctLNjXEcq$Q^ zHPeh0qHVb1I^}iOrV^s3kQXL%ew*&8mIJ&09I*S@c{t*0!V%v(-5QZ%yEYvCd^95Z z)3a2~qW3R>UMVHWortPqY0|+RB0m#Ia#fS^M)f>Nm{y2G9Tzp1LKY=cZJ}!|Yskd9 zgI{HQo8k{;993@9&)kI8+=i3PIKV7+j?SgO14wa*f^n3YduJoj;u3gT;t_Z|A$SkY zz}pJIdvNP7JMjMI1Iyv(+DoJ8HPi63zjJ!9yJNynogyn~iARd3eNrT%QytF-hKqCAw3{eUq|Jbc#YjLQtl<>nT*#sj|6>OGp+r0gHoTzz}8wt_1vX?*>0K zYJeeN=f$YDg8Os(lRZ8V@r?9gT#60xpappBS?zA~w7H9Dw%wwmfyl5JW zIaVYr7C!H^UU7!zDsjXn;^@D6CdNQBdIO81UyI%mj73w&xW9LBa&#C$)*zcp;MfQZ zRhK;yI@B0CP_D+?sxAsClt>usdtE5rS+{NJXPvtFM>{=3zTfH=%Ot}fnyX2R4O@)F zq&rE6=N5D;m~3&%pl0Sx^YjAROsgW2ym>UbbEic{C&;xorR1G7nMcZCU!sRY>KYVT zFeJHXSewN}Vw896*H=x*KJFlx&%ou(1nIoJmln;B;|L9p4GsQ}QN zt$=fOWWmS`2wo@JpF)UQikSkJ7lBT4$M&Pv3h5J>F*xVLjzX5aq`rCf1(4+X`7ip? z)m=gG{DK$I_iitHu*bKTs-FADkUyse%m#8<%C7Bdpb8~~TBeo^VYAho2?h&nMT-I! zY%)*#j)DL5;JPJ`ycQrc)M z#~4>#t87s&cg1px#>JpqgVW{<9?EAo)lUAxq;}%<){n7eZ}km~Jv7x@2Sq8l0567zZ86xVAPQ4I7$ErhBt1T z)p@NDpZ^*AnDU`?nNx_h?=pLXuQb3!Z{EFix#F3lJ;f5bvjE~f*oT*c076Lxwu|N6D%GU**@6z$H~d3<`Zb8O3`=y(-fiZ2>6zbL8# zVyk7mD|lTt3W<3)J#D z!nQ%0&{IDxL7Ko>$r2<7EkukKxPn}@mR_Md1GK^*cFl%($PB6p1k2xD`hlDqjyyLv zqxrIxRp&oQm$`X;5k=2Tg~{O#`anlEvtntAS~9d;(m?eKtrT=yuhUYBE|G{jvPIz} zB^?OUW>iICxOIf%!3<%<>%&DKD;LOJUrO`=JM;=>N^IJlHwhvoiCx zLCN@SiakCZ%O_-9LCnnc;RbJ+Pd%so*8t3KUM`UPr1<#(%#RH+lgN57vu{gGl9`o4 z%bO)A3n!dI4Z(Knjyj&5R1|R-}>drg{J*esH)3XA#_!`|6JElsYWGxyjDASK9fF2bY%`eF9_uXJ+Te`^P&r zj(6-r!0{PysEkqVKIzInP4FirMdshWsBYIQUOl`a5-lzB0GQ9LYwsFH#)+#}*p(O1^QFHv+l~S{oce)2$bpq)Q?TJ1J z)OyhYOX}fscv(~t(KtZ>%0_D){248g3G}wFKnK)XkIvPDmnY^kn~3?+2{FInaQn3Z z%cr9^24ioW6Z2^Qu9_ z^!KJ5xP2(iCX;6)xmV0~r8mv5;t#wh*aJ#Gvj(MmLzKKv{o-F7xit9HS47bZGcFzO z?(d%%pNd)=HCvz~(RV?Ll+>z9Q{+de1QTj0D|mwPlBmlRr8`;l9eSqB={5p{QAM^5DK5O8G!9L%rw?^Cu%R9ihe-MrM-Z5jG#j?fKr|!ik`WY^0 z-ifwHf=ObZdSCS7G%mnBR7mSaa#|vbZZlVKQRtHL)GiTU*mdK6wUYH!rbI`JIOE(2 zCkv6yul-W>CG}~0S*D3>7QQ-uI^>7Ge&E!6@x{<_3B>ye#P9sy=T@ynzkd+(ul(NS z^~6tMY!gzCD%VKa z3v@O37rf+H4qS7^TP&mW{T;v_TrMnW{q4S*d zit%}JIGKG`g+IBXLrMa%V zWtL>ybt86m9tby4JJUQQaK4#zSMweI#{=w@c&=||dsF}W-?vNPq2tjKv8l99PvAbJX&-*UvMACC=Dp9iSR9u5J9GaKsuif`!H_K(=RWM1ggy4r$&mA&txfq+2Mg$MooN{Mt%2QwdFeP)1l zEY%UzFPlmv2WzxtVSO*jq!JYn+9Pxx1$#uHu9LcjVv-7lJ*6P$Q~ZMdWNDeeMbz*S zsk}v6iB{AvvSBe-W&|SSQ)Xo$Z6L6u%)Qvjja14)O5+2roEMwuWfwmZtL<%}O_1E0 z7x~=-ajpG1tak#|2ME^x`SxE~wGjM)t>1GH{!w81KkCLRnEv0MvGwHe^yn!1HuBv$ ze!AP=g6n|`1{5ktDX88Z> zFZmhX9i%BA_OXKe*u2C`y!nG^tZ=@~E+QN4i1kn_94#7j4AHqkoTU;Q8a29>1zI{F z1w$g&aRivIZ)mM}S{Jp@x&Wh^TQ*Ih#bS8gM3SK;VAsK#ydt*$+1$j5<;-s895dh8 zN^V;z9#$4E2XrUA{WrmDVMZ3Nzj)6jW5qK8IlLXdE+El|XLe`_^DNw2Np^_en{X7# zjfLfh-3yyrc-)e^$}KpzgO)c=(k(m=X7PExiYd1+wU7MJJ6A3*f6OuE-->0&{nl3@ zIh#ho)BWRvy}js7cOyN!BQk5|H4@jZk?2L2FbuQs0Y(A!i{uQxyF(7!B7s1)Rr5B% z-AvWEp-~M(`kN*#I?}j8otImeb==E0+j3)E`CW`5QS6G<*wq-an3kZDpRy$fvs210 z%(Tn;Ghd)uUM-Q=H_!ff>bPGPp!!X(Exsw1mG50Z_1{e$%<1mw(ebWPzUYH0po88N z+WWNRkR#G5hIFFW$Q%#>p=njkk&cNr1;7FgE~LD)L;`7ss;Q%(V8Y2ome1vRHD6XT z&8C7sgRFHiemu~__RYP`LXUfX$d!G1lknv$`&9(oT=){eub[Ji7oy)t%b zQ6OJ~FKlUL1((H*05UhF#Z+76PL$=h!&FBjddCKN6?POp{Bpm^ zb$H}YM1RLk@UMJ-6#dEUk)IqL9Ka*tkx%D=vQJm0DaJVEJsQdheK&;)9H=4j8(|e1 zlPiG=kz1Grensbkik}5ciL-pv|qOD5>Apw<1?eY?D z70d#@hPE^SIw=*J=c%UE`W5i1i+nMz_s!e}dbj=Atwu3fNjxl3G0QT?9l`^!@v!M< za{R>TgY3K6$rQ`c)bs3jH^sgOS1uF$&j9nFLo3iRyolJC|j~UUz^*Y*ub(_Fxyc_bYQT&ul=kNS>sonXTDuV*^?4uwBHCU%qjZhwD@qS?*aL1iqo85E}c^ zyL@;+o|^^G)M0T#%*wlW255?{S2#4kJaF^r=>5?+_Wn6H_jZntkE5sH2457ImrD}z zOI{A_c)pF0FnZ5C5I%qIivF|bCkVMf7$Wq+Ip1y*LgpW9#MFW^NqXng9=8G`b0u28 zlq$}r{ljpBW$;>rmpthfcG$Cs5PH=bgu+Pr{OcV;-K$6c_A!Uh*D&_Vtr>@oj`j{u zqaBkeg+^5?u4lh;O@H8(gRcLluO7|O>-x`N z>|f8#@n{cCd?W1Fp(7H(NSvm0Y*kk%D3qqCsq#hZ@o5b@LN>aoTA(l{+FSW$bNH*i z{NIxW?HwB-7R!G<5%ZsK()DcRZrxuG@btmr>lpj|5}wC9`>uVZW!j7M;*0s2egwE^%9Con|YQ35JH{d=xg^c>*K=V_ZLhts< z?$u`oG{+>K82cl0S2(isMPn%XRN>?BrniP#gg3OQ#}5dzB0tY(x8>o&9T=}w zZgV3$@1C&pja#FWyf(|?$(erRg*l$aahekEi-J(&u3o1NZ5x($6LXM|8;&jfAqROr z^qQQ$F%`COWsfCnyvD~)9zcHnr74~lXa}Y%&Hd{ELA|+?3wZ4kK?ifctdC4MV^kvN zv~yU~T|%~i8-){Nx;9vwGyg1CArMVT*dKf75hsiEi*D`7Y`K%??$n}uvRE)5=gGvvgQTaEYw z%x(?zK*YEjf)?e4057u(d}4Q$k^gN{6N_aweAM1%ma?0y*>83qmRVzT5!=!-f%%&& z+f~2w$zOeR#lFx725Nrj`e^H(ngsA`=G3%c+Rb9?K}#)Y%7F&vHWakb(Q0VLp4lLQ z{(qX7Q|>G}u9;`35gd7y<$)_TQ?{686Z2eOl5x&rX$k-Q6O**%H#bG1cx5Ci9$D5& z4`b|~%#X9&-`hXjw~>gYZNY>DC_F8}Oq6i?5{e0B<)Wc-jc9s0k^|ikv3HLON1vyA>j*Ur4;RaVra5y)XIy7qJF~5 zqyV}p94b#Gl)#Hj&F2WMxC}!5#JocGx$}=F{M=;r2UgmIv;W6)^L!Drzk*M{_{*CU z%|2T*`#ZwfdzHMiyR4Gmjj?|=d-h0)Pj>8f0`v%VN@pjaOC}QZU3m+u;z}|E&!lY% zgi6sB3BFGxLa0uA!~=-Dx~NebgS;zSWGyUV#@D;#!iZLTn3u2F+fu?}VTWRS1Y2z7 zu{$fx+(0Y2+&cN#AM)wWcCx2r(NZ6?-!pSv8Dl%9zF-c*%3aMr^QU`$5Kum{P9@(x zQOUlPcoKm5qyzIYjQy)Qm?UV@W>LzJ?X8c80;q z%(=q-b9n{Hi?i}iZu0T>tPGlu?=FiF3ZzfIFvD{Unz}=vM5Vti6D=CFL%A$!6b=AU zS3(stS+Nw|VWmV})o9i=vdY4LV7YYH`gO3RFp2>x3n^AW_wVHq$K% zfvISUqGO$4lLgmaWX`1P+)`I6){z!tX(kkAc`y>&nzXG_TS1%I2;%E!7m~NvR;CNT zw6|=jgfW{DE5I{pIm#u7A)HF7@M?6Xq=bWGsjPac8MG*%CPkg9Qh|ghz0%=jT69!7 zAYqGsAM#z6q}!F{mizxD*ew>Oupj%VoG-FL&1bV6E+eeN@Z_|q3ng~Tk0ieWo{#vm z^#?qkh*UI=sX0IQcTY}EqZ{|q`URGlg~-tTbmAMu5>!{Y>@{trM1_E~9a3CaaFNus znN*~zZNXD$A_)yR9Nf8Ts_UGa1vv{BcjVRVK;GcP=PsnmOz?gG9I|`Y__lkekhw() zA!HwzQ}FgeHU%V~OlI9H`2R7))kR|QDnfdeJp{otZq@zp{?aDM z-TmV|pk_)?morQtDbWLI+qz%{FQ6B@PSPGeg?Mp}_Pd%WSz4@RU)gBk@gUA^#nm-G zJ&w*PP;}kexYO+AlNyv)(*Hex(<|w3?JZYG_YZask(7~=zCB3NxgG13^a_*9SG-#d zZCLgT?#*Vm1;@O(7nu1AS5wm8S`kkO-nY+<*+OaJ$!&pAw0+ua`-9v}o7cb^hZ9|X z7aR$*;dS0R<_)9X(tqJWhs0(1|u zVR%d4a@3rApFoEKZD6FPtwJiO%SdQA4@@M{mU!q ut*y&d@Ba*Ga5RY+ZU6uuiwFb&00000{{{d;LjnLB00RI30000000006cpcXO literal 0 HcmV?d00001 From c3d87f54a1554a4dfbc2747d52b21d3b141b3e9f Mon Sep 17 00:00:00 2001 From: Emma Rousseau Date: Sat, 26 Oct 2024 20:40:41 +0200 Subject: [PATCH 12/15] Rseqc inferexperiment (#158) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * full component with two tests * adjust arg names, container base image, test data size --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 2 +- .../rseqc_inferexperiment/config.vsh.yaml | 76 ++++++++++++++++++ src/rseqc/rseqc_inferexperiment/help.txt | 21 +++++ src/rseqc/rseqc_inferexperiment/script.sh | 10 +++ src/rseqc/rseqc_inferexperiment/test.sh | 72 +++++++++++++++++ .../test_data/sample.bam | Bin 0 -> 5595 bytes .../test_data/test.bed12 | 4 + .../test_data/test.paired_end.sorted.bam | Bin 0 -> 19725 bytes 8 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 src/rseqc/rseqc_inferexperiment/config.vsh.yaml create mode 100644 src/rseqc/rseqc_inferexperiment/help.txt create mode 100644 src/rseqc/rseqc_inferexperiment/script.sh create mode 100644 src/rseqc/rseqc_inferexperiment/test.sh create mode 100644 src/rseqc/rseqc_inferexperiment/test_data/sample.bam create mode 100644 src/rseqc/rseqc_inferexperiment/test_data/test.bed12 create mode 100644 src/rseqc/rseqc_inferexperiment/test_data/test.paired_end.sorted.bam diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f720035..3fc134fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ * `rsem/rsem_calculate_expression`: Calculate expression levels (PR #93). * `rseqc`: + - `rseqc/rseqc_inferexperiment`: Infer strandedness from sequencing reads (PR #158). - `rseqc/bam_stat`: Generate statistics from a bam file (PR #155). * `nanoplot`: Plotting tool for long read sequencing data and alignments (PR #95). @@ -27,7 +28,6 @@ * `cutadapt`: Fix the the non-functional `action` parameter (PR #161). - ## MINOR CHANGES * `agat_convert_bed2gff`: change type of argument `inflate_off` from `boolean_false` to `boolean_true` (PR #160). diff --git a/src/rseqc/rseqc_inferexperiment/config.vsh.yaml b/src/rseqc/rseqc_inferexperiment/config.vsh.yaml new file mode 100644 index 00000000..184f2c10 --- /dev/null +++ b/src/rseqc/rseqc_inferexperiment/config.vsh.yaml @@ -0,0 +1,76 @@ +name: "rseqc_inferexperiment" +namespace: "rseqc" +description: | + Infer strandedness from sequencing reads +links: + homepage: https://rseqc.sourceforge.net/ + documentation: https://rseqc.sourceforge.net/#infer-experiment-py + issue_tracker: https://github.com/MonashBioinformaticsPlatform/RSeQC/issues + repository: https://github.com/MonashBioinformaticsPlatform/RSeQC +references: + doi: 10.1093/bioinformatics/bts356 +license: GPL-3.0 +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] + +argument_groups: +- name: "Input" + arguments: + - name: "--input_file" + alternatives: ["-i"] + type: file + required: true + description: input alignment file in BAM or SAM format + - name: "--refgene" + alternatives: ["-r"] + type: file + required: true + description: Reference gene model in bed format + +- name: "Output" + arguments: + - name: "--output" + type: file + direction: output + required: true + description: Output file (txt) of strandness report. + example: $id.strandedness.txt + +- name: "Options" + arguments: + - name: "--sample_size" + alternatives: ["-s"] + type: integer + description: | + Number of reads sampled from SAM/BAM file. Default: 200000 + example: 200000 + - name: "--mapq" + alternatives: ["-q"] + type: integer + description: | + Minimum mapping quality (phred scaled) to determine uniquely mapped reads. Default: 30 + example: 30 + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: test_data + +engines: +- type: docker + image: python:3.10 + setup: + - type: python + packages: [ RSeQC ] + - type: docker + run: | + echo "RSeQC - infer_experiment.py: $(infer_experiment.py --version | cut -d' ' -f2)" > /var/software_versions.txt + +runners: +- type: executable +- type: nextflow diff --git a/src/rseqc/rseqc_inferexperiment/help.txt b/src/rseqc/rseqc_inferexperiment/help.txt new file mode 100644 index 00000000..f19aa318 --- /dev/null +++ b/src/rseqc/rseqc_inferexperiment/help.txt @@ -0,0 +1,21 @@ +``` +infer_eperiment.py --help +``` + +Usage: infer_experiment.py [options] + + +Options: + --version show program's version number and exit + -h, --help show this help message and exit + -i INPUT_FILE, --input-file=INPUT_FILE + Input alignment file in SAM or BAM format + -r REFGENE_BED, --refgene=REFGENE_BED + Reference gene model in bed fomat. + -s SAMPLE_SIZE, --sample-size=SAMPLE_SIZE + Number of reads sampled from SAM/BAM file. + default=200000 + -q MAP_QUAL, --mapq=MAP_QUAL + Minimum mapping quality (phred scaled) for an + alignment to be considered as "uniquely mapped". + default=30 \ No newline at end of file diff --git a/src/rseqc/rseqc_inferexperiment/script.sh b/src/rseqc/rseqc_inferexperiment/script.sh new file mode 100644 index 00000000..c425b6f3 --- /dev/null +++ b/src/rseqc/rseqc_inferexperiment/script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -eo pipefail + +infer_experiment.py \ + -i $par_input_file \ + -r $par_refgene \ + ${par_sample_size:+-s "${par_sample_size}"} \ + ${par_mapq:+-q "${par_mapq}"} \ +> $par_output diff --git a/src/rseqc/rseqc_inferexperiment/test.sh b/src/rseqc/rseqc_inferexperiment/test.sh new file mode 100644 index 00000000..ff2e870c --- /dev/null +++ b/src/rseqc/rseqc_inferexperiment/test.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# define input and output for script +input_bam="$meta_resources_dir/test_data/sample.bam" +input_bed="$meta_resources_dir/test_data/test.bed12" +output="strandedness.txt" + +echo ">>> Prepare test output data" + +cat > "$meta_resources_dir/test_data/strandedness.txt" < "$meta_resources_dir/test_data/strandedness2.txt" <>> Test 1: Test with default parameters" + +"$meta_executable" \ + --input_file "$input_bam" \ + --refgene "$input_bed" \ + --output "$output" + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Checking whether output can be found and has content" + +[ ! -f "$output" ] && echo "$output is missing" && exit 1 +[ ! -s "$output" ] && echo "$output is empty" && exit 1 + + +echo ">> Checking whether output is correct" +diff "$output" "$meta_resources_dir/test_data/strandedness.txt" || { echo "Output is not correct"; exit 1; } + +rm "$output" + +################################################################################ + +echo ">>> Test 2: Test with non-default sample size and map quality" + +"$meta_executable" \ + --input_file "$input_bam" \ + --refgene "$input_bed" \ + --output "$output" \ + --sample_size 150000 \ + --mapq 90 + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Checking whether output can be found and has content" + +[ ! -f "$output" ] && echo "$output is missing" && exit 1 +[ ! -s "$output" ] && echo "$output is empty" && exit 1 + +echo ">> Checking whether output is correct" +diff "$output" "$meta_resources_dir/test_data/strandedness2.txt" || { echo "Output is not correct"; exit 1; } + + +echo "All tests passed" + +exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_inferexperiment/test_data/sample.bam b/src/rseqc/rseqc_inferexperiment/test_data/sample.bam new file mode 100644 index 0000000000000000000000000000000000000000..9b8d417c8cc1651725269c13a1c915b08b2e4d38 GIT binary patch literal 5595 zcmV<16(s5(iwFb&00000{{{d;LjnM80fmy^PQox0#*25wm)Hxe<5ULHs|`qyWSi3o zw@dd2T*6jd8#LbgV7{4Qz@|9#?rYEK@9TGR#ROL8JfAYa&X{eo2(a(4x#KL{xo6|RWh#{l`wJHF zG8Rb+UCXZ?!F*E?$@6n6fpc!H+qhDcO4CJy-SVtVlLK9pDosel z^VV|IVoooa6FAt@UQ4X!YKCDo!o4C#m$XQ}ed3qd%|$c%r*HxdBm&7DZ#=Cd+dEI8Pof!w+ z#1ig(@BVGm@@DP?C)H@kgK8v|NUTC=YcQ=?Xd!#t;wB9uwoRaZNs0X+SP=ZsekfS6 zMub8lhF6J*p+uB)~wY+wnCIP&-1iOt9azYS(?!} zu4MF*=arwArgY+_rZm}~(C%0tGx}K>z2Y20bUQ|H zg1Jxb9Dn7>bH~X;Kl95w$y22W6MN?NonT8u#T3HKedsZ`UYR)w)bR$gH>9R6R6{a#mX`a)OM$U|=GkG^K zi~emK&og2|o)04%ro}6H7JpopD32fyWXx8?%$-Y&T--?6bodB2g^ts3lTEn!KO-J`FfS&* zlC1dpmA9r~dTzc3$7Z#yK|X+Iv{sEFNM+r~npqH47gf!>-Y`}-;Hz!i>Z;T&-Vuag z3eGB922SSQUuT|+^sZKmPi3-R~Irx35PI{r#})RcL$*V}CvK zZ+m-3ySt7<9p^2ptg4w1O4ia8bx~T2RWoTCCK${W1BGl`X8M95C`wx@&1@+dJf4z0 zD_PqC8SmZ(pEQ23tJC>o4^&9U)TmZuqDvWtJkLDm#N4IK+grE+7lwFIEwePT6vKe> zt5?UN2b(PbT5wu#V)!Qx#vt|D&?7T>+m3y?9Ghr4^b=UlLu zfmcq!V^;!C*rrpsv6c_`Nv~8ZvdQk`C#!&_Xap`AHjLo;*9{^{UbOZ z8I3(MGxYtVy`A&PH{jo9##%GhSi}UYO{;6!*S+a=Uv#GGdBMyIu2wSXe;NF!Q`Qv4m)=u2#S?z-rE?lfI``q3 z#P=F);RTLAjm3QQ)=x#&-ZEOur{5cVz}7of(dn767>2QDW{KP0!Qr``NP3mX?2q1kfFd7;`y8fk^ByH;q3o_Q~ zcptF))I1!?D}*DtI9jZb;yXSZeKuJk`iWVp=F$6epjT=Qax+zRT(A!A5c!!xl4}|v z8`De1xHX7Fom92dN*5(JZDDJxYskd9gJ0!xi{cMu998bq&)kL9+=r9QIlwFpjxMCX z14!u@1?MQUaONY?@+$E3#3S&w$KaivfwvfecXsh}0leS5eLei#c}J4GcN%`qZy)XM zY`gGN3uGlN@yJNgvx2G4bcj23h13ou1PP(UHF+1#Y-f=Bva)AZpisaFN@2U6LRBq{ z&s9P~vh)d9Iw}SXVRpoo5r4wj;D=TkU_shlNG}F#i?Hkc#<}<(!yA^0&<CM=oNLLrQI3~n0P z3|yCNM9GZR2>Bdkml_#PgESnXQ&){v%mrxt{vaQb&RMsAp=2yu(o!jv%Y(wsRv_kM zf1YMhlVhgO>9RCS2VtHDH5EnEs+i+N!g3k&PVW_GXl@cmd?JqiAu=%qn#uhk#H5*|!*dV17fil5H-HcA1CZurJlaA$1LkEEv*KHM|y9arphhN;rPiYHJO0SxyemDw=J(3r=l0 zrqeP~zXt!nQn%vFCkQ?}a}@wuvlno|jyxEd10m`}|0#s1rIZ7ts_YthN^yuCE@!UJk>@qKsi7tf zGLWt*xIhq)l9iAks2LY+hinB^Qhr+8@@u#%Z; z5X4y5(RBDJI4h1$gO$ot+G(r6m{47-d{HhB#d43v>7cvTN&pCk7p|MtMXiut{vQ9B@}YE@Q;79vnV%3V4KUG*H!ZHNI2`rX zt9+fDgz8S&S+)<#CxzA*+Shy zyCPi#N%;&+gnmi}M!k$HfjOiCRkg7?cr8aD=M2qF?7VZr&WPr3+__#Ry+I_&&WxQ0 zM~B-7zD$aaSJ4T)SjhaMs0xU!mdmb?b)i8)q^SnADy7l)GH4k=!GOD(afO;yk4iEL z+`?KCW7og5vO2P{*Npl73OHXmC1E>E^tC;pDNjUGY($fgo7YJR;g@FIT%ifo3fPq5 zNJd3tZQXHQk_p$n64olz@;bt{L7LE0KaC+xki6s!$w3PdV->C-SFLqa=)nNJFi1nQ zAssSG8-m^@SOH*O8w~ao~zR#?<5U3?X%drNk zUs+?I+j=WXtvaR=b##lui69*a)8%z?qA8^9QhN5-m z^In&4ta>3%+PriUn0x}}@nQvBq~;$bD}e8w#`FMYPJ0DT3l;J$isQ;S1D?_=y+ zGftjAcXV)+JO~&I7-yjnrVRsbsJtSQz*HytVM!ndQp^I zHNC)%N{hmDTbJWxdOaeEPvsold^#9HH|4ka&>QfK%tsS|$$!nT+}++g=Vj(~gObbZ6nkPimQTpIftXq7!wudtpL$OJ+W^e(TwNfKN%8jw zFh4iQOd{*<%)UJ_NoH0CEpN_H7G_dH4Z(Knjyj&5HWYC<_t;;<26IGuS!tFuKcV=Z>#4I6XJX&aV|aWJ#)rgh#aM&~w&xbs>)( zHNQJ0ZiQNgPHu3WZnz3Vt~DH)7MYcAXsVBZ#0QV};4DXT@>t#Rozj5icTTeNt&Mhm z;q3ZWqvtU8A2T~YIDfG1<9NqU1)Q7#hsqe$?pas%g5f(QMecWBR@duDHxHi>iPpAx z0L)7p+npcyw}WvWioD zmDNKzK?Upkz`Tm{#p&`!8x4-XI9c5}@508b`-?eKu6A{C17|jQ64~Y_G`y0BnFypGj6@pP4?p{hghibLW$ne(5O!p5&{D48L4487)BvWiSSHEk8t*sHXhmKhc0UsLs$!`n=>V#^vO9& zZ%$E4M<~5HL227_V2sl5PC0P>P?{|!&sOB#u-c8@G`|*q5Iw;;p!CTrpmfU^rRYeIQPUhGvM|9*9xrt%})F8tQ587T>v@uk&O*h*nQ-p5=g?QS%$I4{d`5m9NdC*bJeSWJc;jH7FY3h#H^TA;FzyeM6}~sj z80WEUarOCIa7RDG1E>{IWH4oTw*+(U)5ZnUH=vgkHT0~duZsT6*T_|oqi zkGqw;uW}_i+QgaWK{$Dce109bs;{X}-^+4MWVi7(@G~Gk^z}zhEf!xK9oIm-jX-?! zKVRRp7yY(D%>U%O*Y^`YkFl-Al#qw}+ee{C--`xJ>`*$kEm5sPJ+l#LG#iktu^i?D_5z(z-I;=;11(fdokTHCGq2JR*JRKzkZzjjUdLs*wF1>>9>Mv8 zlf-;B5%W=iKL76ZvK&GD##>WPdN^I8IYERO(-Z9^9A2-{t+2=_o4%!Oi^kT-LE#V? zxF|ckrCH2&=sdIDa5+y7C#%n@aZxg#34$xYoL1`?pYn*@{J}{b`a35b`dH)q+j2c? z|NWmxl3$#1^8k(PeP4fXBs^eAt!OJu(IM+BdbmQ4h7aw7zyv(i9z^?O)1IxA-e`EJi6)9#3W;p8GyV=xzAd=wIU}qsF;)l@;UI9U|l+ zHKU~y!hbrYWdQG zszHdV8Jq=5b6xk8+mh|~jri3?Ai_fZO7k&+^OH$;ZN4LZJiy+F=ark;(bRwMr`GFp zH>xB#J7wqL!S>NPpB@z)nWg4!58Bz<@DA}?p?^^q#?TH6rD*R(V_B(~|LYds%f4fs zV!foc5|Y%GQqwL<#Bnf~A(2imR3Y4sHiu<#W*#3xwE^{fG~`Rhc?>%)VNYXpa^$>u zaZ-LX2J`O4#Tl4qMy<t@=zlO&Ixxdu?RDr}`iuO*s9R-r;i zjdcXCV(VHeb>nSgAocd2Tpfncg$Mnc zjS}fH9n5@y_0j)0<6ix!}N;&(C?;)C*fR!l zUf@6UCl@7yi>Tovgno*&678s8X2WK#%n3xur_9Sj`U3(_$~=ml+)1TuOljN@%6X}a zUVif#u%k(?;;y7yld}{NIu3|3Vn6VEVs3 zW9#AG(f)q&2J+oGezxn;Rd46`J(Y)@!0u0|hW}1yr+z5+$-BRl#=UfohGFBFSNLu+ zmiVDr@#pwytnk0u$5=srY+m9e-h6KwD}rzHo5)sn#Cxa}i53kyhUnZN z&QggDjT&3)0xcbof+LaZBmzv=H?&t=)J3hVEx@SeDW9g$W-&Z(BFWGa@cUq0UXl8L zJ~s(sIrD$axypP~FS&iCc-UFE9?*mEj&DZC(v2)bfAQw4j1`wg z<~6vrlI##)C*i2rg{9|*|6ADH!xOgLRbj)qAN0Hll5QDsaGTHbO-y+TOMB*nk8j*w p{{Oo8K`a)JK=Dru*J>A=#?%j0$#VU>-}@qS=ED7V zKXGaH#F+=a@9x)KeEi_C2QN)-I{yHE;gN?QefW`!Z=9Za;6>f>OJ8*N^xS0UJ0IWM zoSfOeue~w;&Wk@Vo;^N>ht2MK@Zu8}?|SIs?1_ta%`QGZJ9zBP)4N`{Z(g%LnQU!t zZEbFyo4m$cxIdk}dGea?zx3#1k34*E?x9C74j=KK-u2+4-D6%eIfoa|eg6+0({b8% z%mbQTeB_D89(%(K2GkutFgLi#xe1Od-ygsE;Y)8Bf9HkQjoSxbe8Z#XW)~kBKK0yW z_}un**>h))J^sY_J0I&F`^Jk8KYr;!dirCJKXK{7bFaJj$kN-dyZFdWD@9SO9g!Ai$zg9|HUufTHoH=+uS=h-P_*YEna)3 zD4df{GR-0n-Ucp=s)Xa-YN>rtjjkhaqK~DuLDiL1UMS^V6QeB~YfWibKa^Mt*SNn7>_IJx4H@k*mV^;(=ptF%Www$S65f7wQ*Igt>KfP_jq-6 zbzH5E@FyI>?^=V;F?5GN1Fo#XZyL`g`~Q44Jt$uM!dFa+7u|obVlK?el?N|9^owu% z!{yHQFJdS|j9ge>n=0taRiTc-oQJ=3bwohGz}Dath6rtKJRYx&*Ty3l-0HX* zjd6_aKZ0yrAsp#TBSdC#z}K1!`|o`4(nHstx#@Xnyf<{?-GAr3iFEumMZs~n2QBG* z@tUIe<~-?4CY!rE>qt9Hm^#>|lp^w`Dvfu{su*|_wW)*-H4n0k(g+znGmHnrxns)J zB3SFnDln}=6@+$t42{Ov4w0y#f0i1Ie7yYncnv6FoNsk_4I6wP$O@0w*2aV-T!-T^ z{Hv4pQBLVfOIt~% z&LW!Q3dt$4R5c#aF;Wz=8nJ5vJ@f4vb|0^f*H+i6wbiw81%x?X8;{pkkrau==>d2J zJ0aB~bnpP+;I$Fl8WSzUYgq-#{Pd6&D}eK(&F^PcytM~swUTM`i!Gc#+0rJ^>&iSU z_G$A(Nb#s`Q)&}xS85$ZP>~6)y{oycgJnW{Sp%tz*MLTd8{rt+DiBMxHl~5Y$cd!j z9I0u%nnV#&6WnNt5jv+sAUF7*PL`4^KBeuC?Ig6x=4kW(?a?8Dd7*f9(b4Na|HeF+ zM4O+xz9^!LMrmf6COD_-0FX*kOT%Iv8>cG6tq+Wu+65MC99;#n83}F;1$YuxYokPy z$n8j7X@H4bljtW!G1rhS#|u>7eJ^A*5OhX@Qhux{hZ(Lgx|Jy*mZEfvs zZXyj?&Kp)mZI}>R8X1&PWsHVJCPU2xlMyZ`*)%M8#bUHFmfEpcO6Hl-(zB8^HZVrw zEe&J-#de3o*PcjWwNDyJ{1GREJ_MRX%0nTBw}4skdDw3aH7DtdYP7bRsdQ}(E zj)pRr-{{d$1`_~&-hd%HFawj0#YQp1V&GhR78)aM$z%*JdLEhd%1I@-i;64JM8gDQ zLI>ICC>twStfaPDmeqI-1k4yL05B#-tE16s64^*NtD__VTU|hGMFdH7nM7e#jS`v} zf8tocbzF@{bQc?<)}4;Jx0LDf*N!tZZkjVRva)#Ab)DwT%HmJ1BLk!l=I-X!<`k91 z0K9S@Ja!a#BGy)yOd46Ul1Y0$Aw?9}O80S(7Xh+#N~(hlx(aL-?2%O`Y7;^UhPqR^TG`L%VGil@9a(IRC3YoY$BOvuBX9c-cJ(&RcqLK4axe3A%fG zaK5s44UYS8ZtrbPHi{2jUlapoHH_8KggUy=I3vCB!8@;Ps61CJbbwvGwwmZ^HaU3Z zP`Lg2;ll0L=f=EF3bH@upBwPw<&OFAKl_T1NQ`qZo=p#4dBm8n?T`6?@3BZW3hJU` zrN0>HF=CrB^tQE$(K=Qu9V(F5U}YJwx|m>1DAA6oR>z|xN71qdEj}Viffi5tyFrnS zS4VJ6>KTmh1+rZ+7Y^R~;H8J&aOUr2s=cur%)wjlPBP%^&7F37dm0S8xxnN8WMgY* zcRDFPbYoG}T5=H$_l;9VI2J^#$`D{oCTOc1V>;HX6wU`0dE{K_rj|iQA&qT>3$;`3 z#Ke4(PABO@Y(Ka_KW3AIzc(QMnLI)r$(6m84kRs1JQD%>2C{CR_k;EA1FpqqQWhh&-HqofQtaU^*(sF4ZvIJfp^==eF?nB zZdp`6w|+}e0BQEX+gRV*oUEhzF@l-c038J>JX1{DU=?Skszne+No5pwAYC|%HVErk z=~)y)HVmK?(Rxr-Mg*FxqzMU#Ku``DkX9`A2RlbUBJV_<4V+b|kyn)N0SQOCE460sr?g4YFgH|lA^9}uLMM%_DS4jBm}wDZvzL*g8u zxy(4?bH>ppG9|7S1uyR9Me*~+rQS7YeKe<=yW3lZG*X+uoN@q0Fr|-7geq7Ot%*!j zMk%L*W}JH;Rgjj%HjZROLLL9;WY2)?C%c6x2`LE1iqm4G7Bn%TC)nXQM}&^3I?y?V zsNuL_oY{cJVO2mRFYlP{&|v}7X%Ovo>7;cy^AKEmrhRnA)=?TJqm#PkMnuhH%~X`m z2i>?BgRKJxffqIOjuxCxNE0wQ!|6Sl$>!bY&l33UY(U?5 zc2R=8>gt!JjOHaZ@(ZKC9wJH{H_gqk~TSkR(kLPukm z2qjZSN#`6_+Lcw~MbCh~8tsTDii z&h72Jovq@j8;Syi6u1JF=7GuD)rQqE2*zx!xZo^G$x0r1*;v)s$Yc{a`TY8S`62D_ zJ}3D$wAtj~wF4;jKRJ)YnI2tzat?^+l3CFQ;$8D*#gzha(lL#e_Cnvv@mORAI}V6G zgMBhfD`2LfN&}`FxWW^Iq5tiqgAdL$qGp1NpdDk@LU$cOSaIcDl$uByZL35v$-4%V zN}A}Svz*ZA54m@I;&$kl#zQC<*=y)K`5SY-lWcFjlrKuF&+wvn#=O0?xxKZyMbmw* z!J>Dnwvq+weB@F4s2NjbV1`TZ&#_W!rGsvwb`H!lucZ%0NzGwF!b-uSkL>(aEiPv-5dgeMSZi-s{xkU(@& zI;Dk^r7OAgs&pn;BcfDg44gqvrG4}<2*IprxQna_(J2sOG`2iKaLKLp)-vdFzt7_3 zY;hdk^W-fN=K#%R)Y(V#&KAw@zqJ$o8O>`%QA`HZxwE&szC&)($eFT&GZk6sm8z78 zw&Bt$X_N~_aS?+pnRYRlVCo>P(~LRBwQ|yPX~4i$F@iC6GUB|vmP7;{=lOkm1#Z%d zYZr!4w<4PISTw~_Gy%DxpA-=Oy#Z}@XlfRuQCf3rgQ#OPmb(&I*mx~s)QuGuu&t$I z9@f*CZJ6XGXV%FkIv%xjEHuivY@sIxB*TDKMbTDEGlQ#1h&8)Z(N`NiL>Y$m9At>#96s6OaIl+Z%f`|g_Kp_|h zFA9$~DlTgUo)RBx#*EbgUHD5!b(*&10mnQxIjzyJfKya*C=iyoMEQHif!Uu}-MO;A z(gBxA^B)x*z<12+^aSP@Vg*hLRTV7gaUGEP!5L6S)>tc89ip_H#*CvL^|&(WZkk?V z!+FSI)U>4a`knMEtu|kPv06FQ@j9zka>T{IXkk1kek#HEt9bs)($;b?75@lQ&Ew{FDObN{ybKY6bR3kjsF+^#C7pmmC_QD0GolwD&^JK}G zh(xZufy)4AfH)mYsvG9F=sqCnl6+rXhuLKR!$U0ZI70W_(en3iju!d*8MhV1|2@ER zbA4+{X699clFP%eJY~z%vDWCplm2n~iFQI?TBdKdVi&(Oo9uu7bc#Grih&5TS(MF8 zAlA(Re-mMXnOTKk8E3UqOiC@B;=)*1@w6_0#^I_AT3Km$WVLLZicW{1MG#Ku#z|L- z5UkRY@w2!Xw1&PAT&_zNJ1*^@9{`MYV(;WKMT(*xiR)W2Knr!bWy7;Ot-4qT>YhKA z@%e|3n-ur?{9McDK!;geWcg!#ytqjVU>OA=-(bbHuyd)Y& zX(cy_PP|O=bd8pVsbwD@K4pMQ?-XBDYq!E;aj#E`ry`lqV)^;w`26frK0klkVyMwi z-&Pd=cEIO58$0Wy$1Oh*aB={gtt0sEnJv9wPBEAi;oqKzcI7P--JmUD__^B&)3(hf z`=466-1%L9_VRm{@cA=Z!UW(lpJ&1ZfW?3?2}~eN0c#sB)>7~$XjmFz!fRzDXTp{) z@Nvvehy2~K@|lF&&$n2{hC z^HE=hp>MXuH}wE_)^`oPIPabxUN6vK|J`$=o016YkM4gMQp#yZmjgJvzZ7Ypch&T6Nn6O3yXSd?0%cbfk0b_jFhS!_!%SzGqC{zmRgzmtfKhFfR?r<8ymYQQ>72E*kZYi(peLj36&5$ zTBSLlCb*JJmQI^e_^6%oHb@a77l9es#8Q?L_$6BJ# z(pPBtj!cFWztcq>;)D{-GYC{n9uH(;Sh#}an6>fRXgtD2tQ06ao9zG5&^W&bR~@|xm8@ehg)-)jaGM_4vRQ9N~HQE)%NB{HEK&zJ#YAJ)F0Nw}_^QCu5S zI|(7O!g?7>7g>l>P>A@5;>L5d63Hrb(LoUBD%n$s5!YybokP_T-~m-)fGj`=fv<@^ z6Y})Rx|f4Tm)7XL zsHL-3n-CiywaSQCi^y1rLDrF5C%KJYDjp-NT*-oAOe@B@vqABojL-&F5=DQS*^TQ& zZM;nIV;f-FyW4lmb-$QS`~3jUuODa3ugw|r*#Ld&+ZNq&0P&aR-Es*|oM_G=!b0#s zcG7C+osKSgp=#ei*kXt#DifSxnh9w`G|-DhtCV9taM>G2Zr!NRYHzU_4-A6CTK>-X z_^wM@rp>P(w?f}HK+D$0`8#Dtn;Fa>`CCQtcjjqxXLoyRoBZ#!jM|m36dgrSHdv;7 zh@5lfAr8VB%Xz@~8>#AOZ3G=-q~$C|$4t;ZSXL{`2k}eDXh)BAKzhU8<6(IA+T5-5 z57sf#+6H}Q`I?_Ro6OFh(qo)ghXA=unwir9;eR?$nh8zdbY>&3%fLCeE(EQD2{kO7 zc^189s&r*E!g}!90n$816Sc8CxJFeqkJd2b!8K>?lNgdsaU(Rk3`80zjkIUWXAHXY zB`w`Ms>`7b?B?-OJda+?&aD2!-`>qNvnsz%7sYM!)VaH}zBi@mk>XBi#~U95ghX;H zqSLlyMuh+o7Ft8>MIBkG8D+O{FTG_}GcRFTsd1`-sj1B-;`neR=6n{3y6^?0^Ns?O zDbUhf?%sVbUm}+=O)>G6QD86LrlqzUR*st=^})Pj<-!2WYkNzVpZfNq_`l}uqTQ|S zt?5p2_67*uDT>!#dl{fQtzzYbcTzbSb+8;3)>()ty(JI)gssCTfrjZEEmNoWK&KeH zlLA=4lLE&WD}#nUhW%jd`6^mS=!XGxXOsQkT52L4vVu8oVf|DKYv-y1sK0gIL`tw0 zD+KEs!AvTh#2lf+NYFFQtaY*B(z~FT48dt9v=cT+E=uM#GpcmLJqflFMAVOHxCjy& zh@;gtv=c$*jgpm=L>>qVFqJxI6I{b-9cB1YNFN4^qg9%SvZY)ww-SZ-X&Jpy)P}HL zYc9-g0a*X|;(L|_g5R?8*#zNNdaVCF=~|_W;>`nU-QC*T-Y&j$T~Q42i`Gl6njwC` z*BA+mDOBzMXlDbrVSrf5Qltz<0ED2x>y*5Kga$oklY^HIoIm?N_TZ(5{vY#ud4$*Z zrYS#`bp^;V8sI$L{Q11DNV1KB$T~jaUNlh#qoS5IW6@f0u|?KI<5aC==>+GRS;=g) zzJ^$FVU*D^Dr;Sh!VSZ@AgU0Eq+JrAI9QxltkQopH%Z3w8YOeKl8wSYaTx&;JCgj$ zlrUZ}ys5PZRW_0+?PIBUJ6czKsz)4|51-#7q8}UZArZ{yt}TiZ_^^No&ZBYE>^z3T zVRS1B5Q;&ikUNkoG*lcskr9oMtUqv-U{v8UN_q0ZOAo#MyI;RFy!^$9Qhr4*3hq}w zrzp=u&Vh}AcHW&8)VqS3f7Nqw-pKXiJ>qX1=y5|(dZ_Epry~*D8&IB#rGO}R3 zw2iYiK**sK(ghxZ_c4aZWz8XELb{UaMnn~KqmP zcCMXAy!ttdx&frXhvs!d;&_1bXoRejHo7`C(efIi(HslLs3zE`m8@jsN^u`q;1Scg z(OO7$D8xl$7JF_OwucZBrkF-cgKmM%Kgn5@IJ~`_C=c&~@T+ zf8maf*FoY#``tT8`uFkN+1T4yFN&uDuSbY31~)peJ^&)imo}aHj-og-c>4qn=sww4>0r!0=opkV zAqLO5WVHl`khamna(NlG4kZt+j*!wI8}D2wgBB5@^(z;pQQ&`};etItOCl-#IYp!3 ztc1o)Gh)ikK#`LWqT`(OGgp2h9kfTiTixf}K@!&g@;Kdh+tM(>|MNSF;?_iI8BDka z$2~A7>yypxy$xCs*HYHHb~ZFY))q9OY+@rNn4Mb0;K9Ct^dY0Ib-@MYg$8qtJFLo~ zYG!x{F3^g2k~m<9=J^VF5)cE*<4ER2GEFlm*~my|=|QP3ON$V-R6t@CBe!98Noy9c zghml=XyxL3U^dy0r;BWxLo+4A_R)O(AQ`rYW^)zMOhsyNgy>Hj!R#4>U?_DcMR5SO zI8&xs06BQ*ZbEB>G(A*&saLv{zQ1skCzzcKRI}-R?6LVw58S)GEZdo0c~(O6qb+T| zxA?)Ly9UQ|XinF6c6N%Vo>mlB1ZE<}6SiY|F5J#2&I3p|%_awr4k&K_%ZEnv?|*p? zp`-}=2>tybyR{FsB|$A-H0w{ zDguO_a|A+tP5R{*CWP#roqYSrgwUt&EQ;F)6%N+7x2Ics#X8QE`b=eltz9DoM3lk; zVDM0pq>9cl8>It>hK@oiQOa0K+tfZv=3!9*%2FlC-;O=Ec|_WSwj~Cdw=c7NOQ_~+ zsEhIUTF8Z@?KVp60_SUZd*%r!fCGdlH7R7%*WXJ94_Jrn1N1>yc z=3tx(C|hM!h%wa4faeS1@gWA?#kzDcYA=~?XbMk*{_Ab|_hb>67#5cU7CX--`~T!P zyPhx2*8QhFJhQU+{X2`|w-)f+S>H&@SJ4^IY8AZ-q0!noskL?5JMOePxmHQ{676*CnWvsmO(`uB=AawuNxGB{DMu6j zQ506Y#Em92rb>b3hrbmnanRqe2DO^t6#S9Vqws-6S3;_C-Tc*iq*d zW2n6d&dJ)r@)e1>Eb9_#bkc9x_cM2`{b8F_4p6MpT2r!b(8Jc7l_Y{C z;g_Cqcsl~y@c_>gEC-6E?Ed>b0?q6WBH+0T1lqFuX=NmnoPLgsLUUfMNU#%MXR zE-7oQ`A4+623470C57g;tziNjnS*YTm#HubayJUWOrAu!^k7sr)SaG&EzN{FI7w`* zX7c=n(q43<@-Pzru)m1Be^Li#t0Ql}9X%d)}EOs<#f;=?yhA)s)PqAN%*dP2qbzE3i-lXv*qbRv}+UtX#OKzRC^=1Hl>U`&M)Or0<>U`A_>ipnc3$De* zsbf@H)-eT0qMpdQCW$aq@7erGObADL>9O}YXx+uMj?zlb-1?Fwi<r}wjf~+)LGFN)Y2!}#N99D^~a;4zTamZ|9l3Qy6DlH0S%!OMmovJW4cJ-7eixzTTuY11G5&kWh~3P#_5r&qltcEZuG z9WnZA`lHWQ^7>@aN`7Qg6z?0n`|0*%cb(D+tYJ|=?F8nzX3^EMDSec|aiJq?Y9&3? z_+yR>=A@~k@Y)8YxltzgXzNfm3@U$cl=W$HfkrD8^Gfd-!y>@>K$l>Gfu1Zha#O7| z>;REmDNY_{cqqJ6Z6~Ud1))Be?}b7f!sC6kjjbMvL~e( zOS&^ZXKUAJ=G$zt|Nfy&LeKsoGD){e6JQ%U7XaSx8E|ES5|!qapr=jXEL2fwr#aPA z=zD8O2%|3{4U}$l2rP{KsCz_H) zT?fykQ${kViEqmgYwjv8q=k&vy0Y9U#dIB^us+xy*42?a1sR&GIZx&WIS>%NJgDqL zNWpb^pyQ<&EVkwM4*SO3Qwh%#D9w3iO|HN7NRL1tE;?cQiXnlL;C#moMR7Smo&|3x zO=dBbnhFpDUNEaC!H7#KBtqLj>{Q)7acK%y$m3659{3{pYQTYz#NM3#{P>}M)%}lW zPBqg-bL_vIE)+(XY)p42yJX&4?!0r#z^)6bj8@oaoia+54$`2R2+YbTd32Ep6B%cv zW6m@#SRNZ^qtLZdmRrr|0Ii7#p?Lj(QYO&_K{m6yBVV0gr)Ji^S@=}vQ3pMvs zWGR}sCoym5t+9+JU^nDcz<7|%-^Y_R(lNy~li*5ioQ9f`t_~#^uJYhBXq>E>G9r|r zWKr<|#ml0$;5h)pR@aWzkVtzLd2y8YOP~}BbnK@>r4VzSN@LLq$PnfP>v&Su%|rQqc)(ZyK+c7EROk@t687?#N}Diz|})Mf%^yfxtY_k zMkdI&$ROetqDpzNJXqBz$&_`fY*?+NY@`y*>(~Ta%PK^xEf1v)CB)$?uOJ&rm?wMg zpO)no2RU9Kv6Iqr_vMqeY)LQj`pLZa7dI9~r{5@w|8cdGeMbOBTD9i@eCI;Z4;EYHu&IlUBy}{XS^(-NCazS41q@mo9nsY6mz9h zJlfjH^E5*yia-k)7v3?kXsxH8J2B82l83Zb_4h$Ns`WV_Gy40Ei)OlMX*AQ>&CYPl zjQ;%1qJTX<=8%M%G-QZ7X-aJy8zG^TF{Oa~DjzBl7hY%+ zl#&%BkXKC%m9;Fb(k*Au+~zSM`dZRDDUmy*=$!Vroh#zzMXjqRy{(q2;b=D5|G?>H z%vq-cc6r8}bvj_YGo(zy6LfkAhBZ+(8f)Zxrpwy!Qr1yfRR>vf=536OH#XKya3ZL> zw6KeVtsA8pD4VNI@K$mf1=!WeGS7LAxQ$^RJPncH%KJJlwuBDmSrRJkw@%b>H^dQ= z<_C_`={GIa>3_1-iOfuzU)?H-_YX*OYh!nBo4h%yl8rJY?4Aiuom$3##ciWCN=4XY z$LZj?#9c8Us8PD;85Eo6!Ag*rg0WH==VCkO&aSZbAK(RHAi(qkj!o4Xmo)0v&2ZX- z{)H5;Ow-bA9YK(CFEnK~*?;#iRs>nSmn+u~w7b1kL4X}ic)A5-!1!guEyD;;3~#t% z7rFFVYkIC7Su)&|{BuUkTA>ouh(zRjBZ>2}Z zc`o#v*>wNiN2#DQOJm&MvfT;JjOItSi{j6R)VVdC>=Bw$x~g)nf=bK{6lsOS8Y<{8 zE*!XWgRE7kn5+W?i!}juB#8p|vh$t=%XH&{jMaRd=96jm%WZ2zSi9r?)kK?re7d3= zd4=YEEt*}ur(&llzBEL0dotPBp%og?Jy4NETN6rI!#)uZ5*DoGflFs#J2nZaKM-Rl zP4rEWQ8~p*UaQ90M#M6}0@mTH@r!M-Uy21C_8Xk%zWG+qM(79mP_QyA;w;oET2>^Y z59S@M-~=+vIw%WHAlKW5j83G95z3bdosZH*h1=q2DIM1Vln`wy0>h*IP1aJ3Jf#HT zc(b4+ZYIMJLz(apx1>w+!_2x@(6Wv6$H(dPzp>PI2SUu&W@dFD*t0`Ci8#@AKP70Y zsbTvQWvo&GBI}KIoQX1WN!gp^>85gFsI`+vD3cAf1*{I$+`XQQcbD{Eg61C`$Lim* zRB+z7x40Pl{=K5u9ypP9);G6ysUVDTtWui8^juk?YaY4dEJQemfKXOemO&0YGuk^S zT%@hltg#Um$ymdn_>Ped%Hrmk(2||4GJ`_vN=4Eo$Yn=$lvGgLH7jO3#whT1{fq(> zDKfXcKua0qEuh_vFsxy8Y(bd*yz-9LB?rjeI@}m!uP8n;n5ZW-QK_p&f*&bpscI9Y z(cF0p@!wo=7DAAD`><#sqAA)SnT5)DA#xLJ10GLVJFcL*8;+WMIfN_)v}2tXC}W4! zg!8IQl7G~j))86)O?etEnq+A8T6=xr68%8SHf?sb2%^$`UeAzxv>%hpM0c=5v>%U43;9tdw@u2kVCha`Ebd z)rmbcDJcy%PsfSRNQuOk8N0Mt5e@{kI*pmCsE}xAJ+)0F{3C9F=~;6vamdRJymjzBQrj49f)g#~L9V zY}NxA0!BkDaScVPc_kp!5b77ht|(9m(?y6&;ZiB5EDNOsmqrQO|16=>D{sqSr=x^r zhj`wOk(PJxHkB3o&C}&UXG8oGEt(yb0>aw^Doto&@{onSB@Kkz!A_m9)47EhWAI06 zqhQi4oUB!3w&og2h1N<5p|rFP_DOBDbEV?kGxy}epfm6F<1O%9j z5+M*7llEssw`HCzrmxK=`%fKbIrhu_+|jZ+0C(#;5ocy~0C?aI%3xw5)|Fz&ba`9(l4>e-1AI%vD3lR;bU1ZO#$?mu-D ztM?bo?ray#KGTwB=SP3jeo?H?&uXUYu+J0~2)-IHf)X>1&DoG?W6w-uu<2|XH`Rp0 zKD1>YI(L#QJaIj}0D2Y_Sj5nuHWmhJ3XQ7~l%}8;hasi3*)38!)>2E4f{D$yteh^P zpe^my12psc+I~^|(va7;H}@v%$m=HtQymFrRC$xW)&Y^`c=!(|aD8(dPkVNVE)&1r=*;Om&h%Y5mr9C0;(N$J}X4q21X~^`YN{+{m-Z{=Yk&v+`T}Xrko& z$>L2#N3Jg(O3p-@P{ZFEdkQpphvt)zO-=#aNIRI3nv+E~$my|i&|5PTlH?@S;?S%rO z@uTzZh%5$iGn6A~N5CqA4IWk?SoAE^N`}ZH$^66IZ^U&FMPVQYI1COruyS37> zXht*kWZ=yMLQ8D>Jk!+xURLYJq!i z8z1mwQxs5GY!0~Ht;uA9@c~3a)^ZtYQ$n6!1kIo-rDE;46R^>lU`jCOJcIOdD>!WX z&7Et!GAhKuj?B)#2tw9FY9ckH7k!C0V?@)f`W2IwZ5^ zxU(sW|9Pl6wkEqNUH^CxOTl2l@CXonQ$<@26RMW8S+AX@FSYjZ^t1Qd$fra|<|}|G zP6&UkpF4AkoakoJN_?~_ihnaib7y;FopPeD3_xvQiI(#aM40n5&i}}*)g8SnUF*xj zVZ+O5&6>(hO95DOs-M9pU{LF#Tm_CZi?dCfz+I?Z%wCr#XF$Yh_AnS>}6^rRZ4FK_kS+$*ZZ2DD*TBw$`3U~@#VpboKChk)_01t zn38=gBbX(Oz-lg;;7qF8L*+1b7L%9A9%xAz-6X3cN{u3)Nvb?SvredMGair7^iTHv zDwaE>a;_t=I{+|Bt%8l1&_|EU%UH=BrVg~)j8jycd902qcq;J2XNLSBfgdhC{qxUV zq61|svq9fwqqePM(GUps1W?@FibNCS4$SI~-oH9#|Izz?O3K;H2* z*lc!=1cw~VLW~l&;C7zZPnZRsvOqwTsnw92v!>AJThx}X4`IRLf4R`<+yGWMY!iRQ@|$kJN| z44>pCOvaAq{MU%!F95me=P8FYY+d&zTxJL=KW&MG(nh)Br|x(2!N zc{^k*iHp-KB36Q-JEn*S^ef;o%*9aaj_^`)iG zfc)7ZzZ0N1e{jJ|=>a394O6w}K3WzWQzklTgN7ZEg<`@r%IR237JQIGa18}jMZ~pa z;iF;XjibdLs)k0xOH-#T;0O%j@)m)9Yl*b~<8oSJ^d)c}?HJAUIeEb%eg5hTisG3= z`rKJx-=V<61yEwit$+ebio+#sn6yka+{4Cm(F!Ll^E@<>%hEykJA9U2gj|m9WbLH= zQd>a2wGQZF5}D~er8k-4OJu74-mRC)RJq~aA{!uZb&&TxO6%gbJvz#yDBX! z_75M&?l&)GcVO)-Ju`KJ2>8NKdiL1;F&<@CX%NLCN|oe;7!v6FKed-Y$X{RS2Eb+!l~Ld5w;3%LS&`0L0hglq-nwSSf!K9 zDHEf=;@UcW$6nXEw={PtQG2K3@@vQ!@*AB^W;d=}Uii(wdf}qw)y#*qrWc zQ*N7TqJ(|IvF2x4tc;*Yi*RVm5aeg+0tUo)(3iM<}6kq*U=D7B2CC*y)4`W#DW?og(-um6r{&!IbMTG zcWI#$Rd*f{h0ws*!Iz7~3;x}X-;p-a)r&+5`^D1!rnsRanV0mqv@n_*8JzHIR_9Go zLUJi#>LUty>Q-p}d<$i_sB+)EMe$t&oxVBQ+MSTltYZ+-DDSXBR0xc@hHF#TT-)GX z2vTqe`wTjIrR%70OO7(cMtSL7Y-EI>2rZev936B@d#K0xb3d{I>V2XkhIZe$=2)RQ zTI%ooXG`Gxx-E)78w$|Z%{|Hya)=8O#Bj%D_&@~D^KCZ?H4+kORp_es3-8Kynkxos>;s6*<}BBE^phb z-(P~}{r48d%?rf2v%XFKkIR8g8rX;rl8dlvK;yXPOtc2)5+sM05^J}2@_ZZ%F=Pmr zJSQQSrT{r+f-ciPJSG<@z7=5gubeI^A*1=(C1_rMZ&7@3fi`#dc8NAm$<7rl5e3*( zTFIt~kqgU8r&!HO+C(f3C}B0>6~yG zUdcz@mDEVMEH<>(hMc}^W#i1!jUs>!Gn(1h1bW?HKoe;bH;TACIy62wNUHG)E2XL6 z>L*c)Vsapjm`D{O6T7L(cpeYT63o-|1Rwmv0+HFv-u6zdJs;i}w5-$Lb6hxWzXa~M z4V1X&-o-$Pm)*Ni%xAi}0l9C*ld$tmFJ5+MKd?6zFgdT-x^=W1asb2 zT9(Xpc4;!#{XIb2owaOHd}RU9R=Y3d=W58$N0OxtJQyuQw|GPQpyt^ngk#X=c}ZB3 z9W%%O8Kv~|B)~840MQXcrvJ*2JTY$JyZ-Fu_bfqEwkd}%Yni?yPY?uynp2tSKXeT- z{V(586j#LN8BgZCmMPPNG*gy}G=px(*$sFAOaFLTTRo<4|9-D!To>5>wgKDsh3RcA zmA?JHOr@PL{rC43#a|AD>DF|T_chV5n!zJ(Y62VZ)KRh!EsLx%3Zk41>?9MI<;q4a zRb-C)5TWL5Wj8$^37;vmyfI;MG~Y81KTPsRXenHXWzi*=wo6lB%n zm#UoNZap~)9}BnRuSKs1t>05#39eFu)%4%?(Y(%Fm|X{Ge&shlbI+0l&g)t<|NVW7 zJP!z8KcLbcno?;SJ%=q{WD}wZ&d30XoQ}aJ5nRL04$n&IZ*U1s^c25F zShYzDxN1-DgkDrftC=H>PC4awmk11{UMJ<|;baTGK*)r}j!4BfNtxJj7K>hV7n^ z5XmiDVkDVU=ka{SAKW1r&0yRfZP{)+sEH1l8FiWnFdI@OU^^(!nQ=t#oR6_&utR1O zOIbUog7=}}P?Sb9(?G6Fscq??XaEwfgUz){9$*U|rkrG^(T$9;Nko%J)#?+%H;q0y zhkAIVUiovO9dh{utuHa3b@E)pysqJ2)-4ldP4p`-Td3JL-QHT?-Jv}RrDU;$2~jM0 zj10E4HP-6VX-vzOTG!EO2GdbxqKd&uXBw#bl$a5t_lNo!*vb7cu=!1;|bL1ATir`*hrl_3FYR5^b%AM$i%>` zg0Q99tEOf=7-nJ}q;t|a7P&G(f=Qrj>s+1KFv-bwlb**E`l#GosM#VaC+H2*?Mt>^!s9*s0r*!7_x3|SbfG&ULbUO`ax_s3u z7MFwH`--CY%n-?)&FLOV%@THQ(;W0MRFz}WS|y|6kn|ND#48938Y+OAfep)iD5F=L z#ZU_YnGr@o`HMOXUB}~-I4Kr;X4W6~2$mr0i(NFxim6=xd?$GoR&??!0}%mA1^~3$%H(4$3P; zK!;go<`p8K*V~6^5^drN(WwCAJQ@=xnjt>#a0dw@3Czdov%9GI4 ztNn2m0nJc;?6?)8zLBX{Upk;(jrP_v@)7SUHYbKROKwVKRknd9|P1_kyhzoYr!D%f|zZw4NS#ljkd8$C;jq< zANRMXb{IhI_0^?!lT>>8YFyqtUDdIQ zHFKdBF~SZV;HR=!0i+CD^I6qMp>1GFaK<<Ik_EuN3#M${*MJ!wG;k+rRp^GVR3 zs#Muz?(2*08XV8zy1TQrwOu@QLs8U0Lpn$m zLqLzT<4qYk>7cC+w zqtQkL#iDUdG`_2DJU5i%Fuq2R35P**=l z8=Mpd)`Fq7EQHjQF(|H$bVjSF#CfVglZMj{wxhp{_V_5cyPGFz$MnwH@=s6aYR@qq z#X%<-GoC+iP!zWe0uJ}KHug3sq0QEw34?naO9u(T71Iu)x}X*>^HC`o3=fXSz-rb6 zCY3NjbFV!ZCb6nC4~iRCnk#n?FZ(jF{*sp7#To|Lli)}@N5L0xk8i5eG@HzdV^fCu z5uE4R70T~DSgdsQO9urm(3Cc-wU}Lcz+dXAR^Fl@anYw6ANYYgS2KNzQ7m zbg5lsU~xUTriwL>5Sb98GIbqGmWt_TebCLj=hr3joP2d{G+M**6RC(kR8vcf$buh_M(rBq+;&6Pr}EtI+Q0dEOA6P0 z)vk0zIE;7)V*TdFGQuiVp2I?dd7s1 zjpDB6-Ye0}{h zIw%q)5?N8bHF9iFy_B^SVwe#{!ozrk1YJ5vF-E}Q6nC)Nmy#bChQ!qymk~R`Z(mcQY^!KmL z`Ml>$kG%YsdmP@zy;;SxXVZh%_L2DH;q`|;nHN@PJ*vo}9Y!s4tn?AQxJrvs!W691-b8C_%Y&|&JGJIq?0dww53!0{8J5qwb2wPGd%*2S zB$3>{GKXZMsXmgA3~B1G1If;!qaVr7lT6hGl;-+g2}dsXRrSkWzwIa_`^$j0xBl6$ ze?QUWHN`>kn%=dS4ry|0XMMU}y!LwXf-9_oTPrS%6gEZ+@hQwY8NJqxhAkwOWjYES zY;g2Pju-#-r6W3ZLyyM`hqCDx=ZD`n8h&wZ^cyhxpPU>0KO54~m5u(m0bG8|Zy!dl zju`zl{n1yMiyrPw&;LAg(ZfaYLxXoenNIh1){DQ2PkDbEeBBkYK! zSZqukD^*)7R1>%tHU?cwE`04&BSW3YAX9Q5m;p$6HKPq>OT7|bz>id+8%u4&V=*kb zS2Yj6z6OQR*2Z8*08VciQpwj3&1BjC`t5z3k{2Ja$=BR~N)k?Ax^kSLTpVt=h9Q_V z=o$_UD+3T1jUDGc_w&FeaMFxZ%<;=^!XGHcA`E zxv!lLLFg!z=1PbV;_`sYJ_d_zby16D%A;@LI@nz7K?`-#13-2(lZW{atQwPp2|{6E zDQz~{|BcH_l?3Fn8J^Jugw2rdIy9vb5TY&{A%ldOwhD|z=|Q>GJV>T=rFpcbj?Qye zGGncExLb*EIyxp;8EYSP1A*1m2%TxL3{$RQOgaS+cOlma zwcI-2+;J^^KilNTqS+JmvpZKt3C+Dni{jSXi8h}|Xnx|+qWIGRZB94$wx`qLJaTd1 z7Iv79QOKyg0YD*k4GKcBV9Js)FJZe+UREmdCN{y8x->CGrPJIQcke~)fUB|A7*%^r zGaVcsI1*BkXt+w(=?Hs+n4{YF6rN2tXvG{SO{3Z5;No!R{HqUMdg!SiU4PyZv-DU0 zum{4e&n7avzv#ev`hd(5Fdx0HC@KjtRgG`T63V}n5TLJX3(?ZN(pqW7WuwmGpNK{7 zu=XShs`N6`EG$(p?Y(G<+iztOek*Iax#3-FE*!k|!AlRl<$w9MCBr*->nGAszTUp~ zdkfpU25oM5J9}G`4b1flm5+@r%i2SkFcoUnFjGb*ed$fZSyS1v6*kG=k%p0C*J0ZY zEE5hT>(Y!PjWmbF)6ij9)8Vk5c<|Cg|M>s7<9Umg+rbk*+aFf(-MlD%xM+?a7WjUJ zDy3+cErs-!qK@b*k^Y1nmfml&WN|7G9aaL4#`UN+X2k%2W)@_9pPgF==3@ zUE!6Gjcec0p(=p{cuSR3Xg9Z>fTD#3>YOP#$u<=JXm${#lLVw)P|>T9(oF#~+{Sx0 z+5h6{@{`}5ko%6u@;m?eVyky;;hk^oZfxxql84H(CaSXLM93vTs~jW8A9+sj<)J*z zQ)bG;pe-L0iLXL}P5XAUa~=O4EF1jt)){=n-viEI7E4_T*qOw7B^~EV3I!BtN+v_? zG_RPcAv?%OZ<}C^wop6@cC)pmGr`ITuiiBb|44AlVtPInbi^!`?AW1e=xWUAp%IsE)C7QypBjD)mVYbQ*@P2>fwm-OM$z<$? zcCq4>k1aaa|C`5(;(ZI;va`9lMdcw+%nT7WE7HR7(3DVM%4lD@U~AZ>mP3XxL~$)+ zm!1vTe)KPNt)-Z3BjtRlj*#*h)x9P|3+;pyc$fO6UieCe8>TPwt@jP6b8Q?KNSbUw zWuGueq^Auv8%ZDV2^-lU)m1&JV=dJ|?dPUeMrbI~1DoVQD7&d@sa$BlooO0V>%g2U zaUUR{s#MxJZA}NYODphd;^G$2XWAPKt2z^Cz=??>DMpJbk7hZP8-bOu>g03_dA||E z4@J+lc^irK``&lYl4O3q^35I@-nvH~mrF%QuCG{t5=rv`@Mz9s=NeWacGaQSou^%x zXul=cwglxXE+$YNaOCb3vz9C)!YRe^qfW&=bA}+Kk{=0#IeNM#*-8KP#?Rif%udR> z>@OZ))MYo%isBas+_|x_JKdrRMGBHf0{2lvon~cXfXoqHGR|VnAhgodB50!(_oCK} zdsz#vloGX&P3@dxRg9I6zQ2}mHCleuv5!m^Kx*_?& Date: Sat, 26 Oct 2024 20:42:22 +0200 Subject: [PATCH 13/15] Rseqc innerdistance (#159) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * full component with two tests * fix default values * adjust argument names and container image --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 1 + .../rseqc_inner_distance/config.vsh.yaml | 116 ++++++++++++++++++ src/rseqc/rseqc_inner_distance/help.txt | 43 +++++++ src/rseqc/rseqc_inner_distance/script.sh | 25 ++++ src/rseqc/rseqc_inner_distance/test.sh | 77 ++++++++++++ .../rseqc_inner_distance/test_data/test.bed12 | 4 + .../test_data/test.paired_end.sorted.bam | Bin 0 -> 10205 bytes .../test_data/test1.inner_distance.txt | 49 ++++++++ .../test_data/test1.inner_distance_freq.txt | 100 +++++++++++++++ .../test_data/test2.inner_distance.txt | 4 + .../test_data/test2.inner_distance_freq.txt | 100 +++++++++++++++ 11 files changed, 519 insertions(+) create mode 100644 src/rseqc/rseqc_inner_distance/config.vsh.yaml create mode 100644 src/rseqc/rseqc_inner_distance/help.txt create mode 100644 src/rseqc/rseqc_inner_distance/script.sh create mode 100644 src/rseqc/rseqc_inner_distance/test.sh create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test.bed12 create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test.paired_end.sorted.bam create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance.txt create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance_freq.txt create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance.txt create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance_freq.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fc134fd..0e32edb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ * `rsem/rsem_calculate_expression`: Calculate expression levels (PR #93). * `rseqc`: + - `rseqc/rseqc_inner_distance`: Calculate inner distance between read pairs (PR #159). - `rseqc/rseqc_inferexperiment`: Infer strandedness from sequencing reads (PR #158). - `rseqc/bam_stat`: Generate statistics from a bam file (PR #155). diff --git a/src/rseqc/rseqc_inner_distance/config.vsh.yaml b/src/rseqc/rseqc_inner_distance/config.vsh.yaml new file mode 100644 index 00000000..e050bb24 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/config.vsh.yaml @@ -0,0 +1,116 @@ +name: "rseqc_inner_distance" +namespace: "rseqc" +description: | + Calculate inner distance between read pairs. +links: + homepage: https://rseqc.sourceforge.net/ + documentation: https://rseqc.sourceforge.net/#inner-distance-py + issue_tracker: https://github.com/MonashBioinformaticsPlatform/RSeQC/issues + repository: https://github.com/MonashBioinformaticsPlatform/RSeQC +references: + doi: 10.1093/bioinformatics/bts356 +license: GPL-3.0 +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] + +argument_groups: +- name: "Input" + arguments: + - name: "--input_file" + alternatives: ["-i"] + type: file + required: true + description: input alignment file in BAM or SAM format + + - name: "--refgene" + alternatives: ["-r"] + type: file + required: true + description: Reference gene model in bed format + + - name: "--sample_size" + alternatives: ["-k"] + type: integer + example: 1000000 + description: Numer of reads sampled from SAM/BAM file, default = 1000000. + + - name: "--mapq" + alternatives: ["-q"] + type: integer + example: 30 + description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30. + + - name: "--lower_bound" + alternatives: ["-l"] + type: integer + example: -250 + description: Lower bound of inner distance (bp). This option is used for ploting histograme, default=-250. + + - name: "--upper_bound" + alternatives: ["-u"] + type: integer + example: 250 + description: Upper bound of inner distance (bp). This option is used for ploting histograme, default=250. + + - name: "--step" + alternatives: ["-s"] + type: integer + example: 5 + description: Step size (bp) of histograme. This option is used for plotting histogram, default=5. + +- name: "Output" + arguments: + - name: "--output_prefix" + alternatives: ["-o"] + type: string + required: true + description: Rrefix of output files. + + - name: "--output_stats" + type: file + direction: output + description: output file (txt) with summary statistics of inner distances of paired reads + + - name: "--output_dist" + type: file + direction: output + description: output file (txt) with inner distances of all paired reads + + - name: "--output_freq" + type: file + direction: output + description: output file (txt) with frequencies of inner distances of all paired reads + + - name: "--output_plot" + type: file + direction: output + description: output file (pdf) with histogram plot of of inner distances of all paired reads + + - name: "--output_plot_r" + type: file + direction: output + description: output file (R) with script of histogram plot of of inner distances of all paired reads + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - path: test_data + +engines: +- type: docker + image: python:3.10 + setup: + - type: apt + packages: [r-base] + - type: python + packages: [ RSeQC ] + - type: docker + run: | + echo "RSeQC - inner_distance.py: $(inner_distance.py --version | cut -d' ' -f2)" > /var/software_versions.txt +runners: +- type: executable +- type: nextflow \ No newline at end of file diff --git a/src/rseqc/rseqc_inner_distance/help.txt b/src/rseqc/rseqc_inner_distance/help.txt new file mode 100644 index 00000000..18f97bb6 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/help.txt @@ -0,0 +1,43 @@ +``` +inner_distance.py --help +``` + +Usage: inner_distance.py [options] + +Calculate the inner distance (insert size) of RNA-seq fragments. + + RNA fragment + _________________||_________________ +| | +| | +||||||||||------------------|||||||||| + read_1 insert_size read_2 + +fragment size = read_1 + insert_size + read_2 + + + +Options: + --version show program's version number and exit + -h, --help show this help message and exit + -i INPUT_FILE, --input-file=INPUT_FILE + Alignment file in BAM or SAM format. + -o OUTPUT_PREFIX, --out-prefix=OUTPUT_PREFIX + Prefix of output files(s) + -r REF_GENE, --refgene=REF_GENE + Reference gene model in BED format. + -k SAMPLESIZE, --sample-size=SAMPLESIZE + Number of read-pairs used to estimate inner distance. + default=1000000 + -l LOWER_BOUND_SIZE, --lower-bound=LOWER_BOUND_SIZE + Lower bound of inner distance (bp). This option is + used for ploting histograme. default=-250 + -u UPPER_BOUND_SIZE, --upper-bound=UPPER_BOUND_SIZE + Upper bound of inner distance (bp). This option is + used for plotting histogram. default=250 + -s STEP_SIZE, --step=STEP_SIZE + Step size (bp) of histograme. This option is used for + plotting histogram. default=5 + -q MAP_QUAL, --mapq=MAP_QUAL + Minimum mapping quality (phred scaled) for an + alignment to be called "uniquely mapped". default=30 \ No newline at end of file diff --git a/src/rseqc/rseqc_inner_distance/script.sh b/src/rseqc/rseqc_inner_distance/script.sh new file mode 100644 index 00000000..fe00c590 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/script.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -exo pipefail + + +inner_distance.py \ + -i $par_input_file \ + -r $par_refgene \ + -o $par_output_prefix \ + ${par_sample_size:+-k "${par_sample_size}"} \ + ${par_lower_bound:+-l "${par_lower_bound}"} \ + ${par_upper_bound:+-u "${par_upper_bound}"} \ + ${par_step:+-s "${par_step}"} \ + ${par_mapq:+-q "${par_mapq}"} \ +> stdout.txt + +if [[ -n $par_output_stats ]]; then head -n 2 stdout.txt > $par_output_stats; fi + + +[[ -n "$par_output_dist" && -f "$par_output_prefix.inner_distance.txt" ]] && mv $par_output_prefix.inner_distance.txt $par_output_dist +[[ -n "$par_output_plot" && -f "$par_output_prefix.inner_distance_plot.pdf" ]] && mv $par_output_prefix.inner_distance_plot.pdf $par_output_plot +[[ -n "$par_output_plot_r" && -f "$par_output_prefix.inner_distance_plot.r" ]] && mv $par_output_prefix.inner_distance_plot.r $par_output_plot_r +[[ -n "$par_output_freq" && -f "$par_output_prefix.inner_distance_freq.txt" ]] && mv $par_output_prefix.inner_distance_freq.txt $par_output_freq + +exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_inner_distance/test.sh b/src/rseqc/rseqc_inner_distance/test.sh new file mode 100644 index 00000000..927a69a9 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test.sh @@ -0,0 +1,77 @@ +#!/bin/bash + + +# define input and output for script +input_bam="$meta_resources_dir/test_data/test.paired_end.sorted.bam" +input_bed="$meta_resources_dir/test_data/test.bed12" + +output_stats="inner_distance_stats.txt" +output_dist="inner_distance.txt" +output_plot="inner_distance_plot.pdf" +output_plot_r="inner_distance_plot.r" +output_freq="inner_distance_freq.txt" + +# Run executable +echo "> Running $meta_functionality_name" + +"$meta_executable" \ + --input_file $input_bam \ + --refgene $input_bed \ + --output_prefix "test" \ + --output_stats $output_stats \ + --output_dist $output_dist \ + --output_plot $output_plot \ + --output_plot_r $output_plot_r \ + --output_freq $output_freq + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Check whether output is present and not empty" + +[[ -f "$output_stats" ]] || { echo "$output_stats was not created"; exit 1; } +[[ -s "$output_stats" ]] || { echo "$output_stats is empty"; exit 1; } +[[ -f "$output_dist" ]] || { echo "$output_dist was not created"; exit 1; } +[[ -s "$output_dist" ]] || { echo "$output_dist is empty"; exit 1; } +[[ -f "$output_plot" ]] || { echo "$output_plot was not created"; exit 1; } +[[ -s "$output_plot" ]] || { echo "$output_plot is empty"; exit 1; } +[[ -f "$output_plot_r" ]] || { echo "$output_plot_r was not created"; exit 1; } +[[ -s "$output_plot_r" ]] || { echo "$output_plot_r is empty"; exit 1; } +[[ -f "$output_freq" ]] || { echo "$output_freq was created"; exit 1; } +[[ -s "$output_freq" ]] || { echo "$output_freq is empty"; exit 1; } + +echo ">> Check whether output is correct" +diff "$output_freq" "$meta_resources_dir/test_data/test1.inner_distance_freq.txt" || { echo "Output is not correct"; exit 1; } +diff "$output_dist" "$meta_resources_dir/test_data/test1.inner_distance.txt" || { echo "Output is not correct"; exit 1; } + +# clean up +rm "$output_stats" "$output_dist" "$output_plot" "$output_plot_r" "$output_freq" +################################################################################ + +echo "> Running $meta_functionality_name with non-default parameters and default output file names" +"$meta_executable" \ + --input_file $input_bam \ + --refgene $input_bed \ + --output_prefix "test" \ + --sample_size 4 \ + --mapq 10 + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Check whether output is present and not empty" + +[[ -f "test.inner_distance.txt" ]] || { echo "test.inner_distance.txt was not created"; exit 1; } +[[ -s "test.inner_distance.txt" ]] || { echo "test.inner_distance.txt is empty"; exit 1; } +[[ -f "test.inner_distance_plot.pdf" ]] || { echo "test.inner_distance_plot.pdf was not created"; exit 1; } +[[ -s "test.inner_distance_plot.pdf" ]] || { echo "test.inner_distance_plot.pdf is empty"; exit 1; } +[[ -f "test.inner_distance_plot.r" ]] || { echo "test.inner_distance_plot.r was not created"; exit 1; } +[[ -s "test.inner_distance_plot.r" ]] || { echo "test.inner_distance_plot.r is empty"; exit 1; } +[[ -f "test.inner_distance_freq.txt" ]] || { echo "test.inner_distance_freq.txt was created"; exit 1; } +[[ -s "test.inner_distance_freq.txt" ]] || { echo "test.inner_distance_freq.txt is empty"; exit 1; } + +echo ">> Check whether output is correct" +diff "test.inner_distance_freq.txt" "$meta_resources_dir/test_data/test2.inner_distance_freq.txt" || { echo "Output is not correct"; exit 1; } +diff "test.inner_distance.txt" "$meta_resources_dir/test_data/test2.inner_distance.txt" || { echo "Output is not correct"; exit 1; } + +exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_inner_distance/test_data/test.bed12 b/src/rseqc/rseqc_inner_distance/test_data/test.bed12 new file mode 100644 index 00000000..33a46951 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test_data/test.bed12 @@ -0,0 +1,4 @@ +MT192765.1 1242 1264 nCoV-2019_5_LEFT 1 + 1242 1264 0 2 10,12, 0,10, +MT192765.1 1573 1595 nCoV-2019_6_LEFT 2 + 1573 1595 0 2 7,15, 0,7, +MT192765.1 1623 1651 nCoV-2019_5_RIGHT 1 - 1623 1651 0 2 14,14, 0,14, +MT192765.1 1942 1964 nCoV-2019_6_RIGHT 2 - 1942 1964 0 2 11,11 0,11, diff --git a/src/rseqc/rseqc_inner_distance/test_data/test.paired_end.sorted.bam b/src/rseqc/rseqc_inner_distance/test_data/test.paired_end.sorted.bam new file mode 100644 index 0000000000000000000000000000000000000000..8b215e12d1a932f1619cf7ded7e172141f45d479 GIT binary patch literal 10205 zcmV<3CnDG%iwFb&00000{{{d;LjnMD0fmy^PQox0hpTtRm)Hxe<5ULHD+VM;vd!s) z+ok&hE@3OK4I1x#Sl_~Iz@|9l?zE@j$SUWjh*u-d8%zMC+9d0b3kPX^@EAz)ObM}( zWuT_^euV=9Rjy-S+oj2yru(5*gZU;Wl4qw>0;k-%ZsST(C`}g)cFWTuiT89-s3ayK z&sy7Ii=3X56WHr%w89S7C{uiWCPTDz#3XP$C+s z&xA^#5*26?p+YG#h&GCa0JXTSQ0t}~%fLyPigm5zveTlGwPB6vOwqPl)Fl^PDnzFmW7?u=+p4INMy0$-)57F=9+z<$ zj=X;qXEcs88GXz1(p?wF^uS&k)5Y#FouXlSM%TxgWP8Ja_}gFtl=xiw;0cI48;D`;qu&$*o!P{OAn(J>})}&a>wPIMv0kU znTujcaDK7*Y7oV{M{t%~ft%0vaDK4oCh>J^DvD!no(m~vB={>UU8_P$ooZo{l*vxj ztOd1`RN|FHl{_PkIBBHE_K1lr`0pl8Gk27t=a~b@tD-!B+>%p#$?;^oD5Q>u*0l1=Df!87MZROa<}*Jgt59hbtQz0Hxqa4UYr!q2mj(Jvz z*;AR5H~#{h=iAn9q+m6u=~ae|RR41DHDD0HzTz1TYbhu|zi& zZwr8^mFXBT?S$eYWy%O9(grv~?ZJ%jMjGCk2V@N9$-&{lo;4OT@QNvT$vW^-Q@0W?tazQ^M|;I1&4jf|lG?x~7^NFk@viE0 zr#jK(|ef+b7OeZl9RUdB+*Z@tl8SCXbh$^Vxs)bs-T1C&!#$ zzh=%ikLUacBM}KsK^+aE^d~br=ENqatJ@mhRI8F&NL`Y?CNHZZuM6VR=)(Fj5d4ML zqt>$~E52}=!g@UZ?_OA?v7pD!Jd^XiL|bTqtMA5S-g?I`2G-s_n#{Xj?sdTS3kK`- zem|K6WAC2@ZimMwd%Mx6Z;zr{a+VtSHZZ_dsuG==ve265RB71?1)tGC##05;Y?xt8 z$fo9+o0L~=lPWkpLYupISYld@*f%M<#H2_RBm`w@+Kx zVCPn*!}ZSkTQlWvo;6Q#7Q3i2cH@Ba8rzza@qrnh_l)p-VCzS>2JGg+3Ss=Ysb2DU zl5b?Vfv_qG*%+nw>YzGdR$8S|yR>b%W5IIV}=8Nvgdr6mOM{JW>Yvk{uLMTO-MW zA;o3Qs?zEJ!QHUf&Ny=cf{V^<27u=5 z95^pW4vfr#5d1{)|0;=$YkjpfR!E82$Gsz% zf=nh_rX^fyQ^BX!kV8l*UJGlA6jsxvS8l97m~TEc{e|;Pm_6ql=w~jbz;5+$KHS6k zRP>f;7<{{uPa=A~IJ2cJ*# zeJ07HSE6otqFdF$n}^~D*F=H*FoWhtykBsMs0Q^1sT8?q9sDm1T_qV6BwCIX0F2?4 z8zyzZSBTGkjk`^GpSsM+#JWeByC7s5Afnf9-MUzEIQUy%W6R_ujK4MYw>+Li&K`{l z{u3wCagMBTs(7Iam{n20%Wq*xInsNYC3K|#;vLwGXrXH0yCPf!N!bicn0^WdMzxG} zVD_OvS+2|uUgq%RoT0gjop(>z8PNP2cQ2<&Z%U(RcgD`+)03TJmnKEUOSCDz8MygE zlmgCH!+0xrB@`%#FjXU0B^Byk8YLrmFyO9Am_*L1LnfIVTwx43V>dpvG91}hYlirK z0M2Dj$+7J(bh$mC$v2{zu0)fNo99Uh;V;a%IiLxz6|hOk5R9g^F;&Y{K_XmrGBrjb zm)8=uHNu3B>S+nW1kMVUAUJ5?#2AS)h*c{UJajLB&M}C6u_5jwgK`4F^0N!ykTcCu z;O1sDU$ipo{Oxp^n-46a=;^6ra=3#!(2|-3!A`X z6ELrC4ZsCz{=;Yh`2ML)_h9DKD{x+@5O0wkm)Z(HB?Azy!o^N;6CmZxN2D~p#Yif% zFyme<<1ln_c+Y*Sf;C@)vD`W{@rrY;hwb32;N^6_y0jT9_6x->5{Sl0iLos)b5z(vE?(2=H{5ih{|yOYuUfQlZ=Jvp5;w z2utEZIZHPe4tmo~@onDM1{@>v(FMTrGjlBOUt@dj@8$dV7R&N|?>$lUzh_t;>>Tbn zH}i%;$@mSjJs}*+CuCef%*@x}dTp6cHK+S)0Or>(X2?TO{9F&_2YWY@$a*l7Z%0gW zGfRzggX8*Q^UGp~C`op3;bE;>)SOjSUdcm6&2^`QR;Y!qeUqGTuaxsE_bj&>eHdf^b0+7< z`^P)Zj<@Vw!0{Py$c$0$K52_CP4JU~BKx;5${TgWtA{sOqNO$ufcf;wa_5iy_t!nN zLeB5)iTUAs200JJB!JTyF&&shOc-w}8`nH#jh0jzO43e>isNTnD8m}wnhtPP30!yh zo~VPsTNf>`q#8blmPHm3g%cQ{Y>?JopHYj<$lneb=n=KnqjP!V6^QxtCStyDLd>su z+iY zoR3VWH)=Gv`jN@(_M8eE&+cE(nR3zDg$B-SawoFYcc}kP8fNOKP>gWH`z@=WNz9fpC zpKQv;~sMrEO5_K1ZNJ*v3G(~)bOfaFAGQkt1mt<9>NZl#XwWytHV2Mi+ zw-+4wJII;l-l=znIPMFY-f}x2MwPcRL`$*Hr`ead@?9Qk%AwYr^_Sb4C#mSnXFRM3 z=e9kEzOqHWh<)=J>2)Ca$Mf`DJ}cmjjeWjVZ4I~)mN$WM{~#Lhy=lfchh>AaPu`AK zbTeE~ypv6b0F#`3s(sPHXxR+gnpYo*eC*`Wk(g)?i|b)Uj!ZY9;cSb&5i>2FYr}kS=1-;ao6dl2W0n zTp?r^sEn#s3x*1GDl4iarsFX4f`Yi@2K58+stfJ+aawLEK{dLL;QX^q&U`j;=7R(J zMJU`{6gVP3@6GRfNJE9$j!s`^O6$TMy-8IyRvC$rDWaZ)m$IRtB9j>~0?&v^uH{@JDq{oRuaeaLbC$6`5ZfA3$8qMx2} z^B9HfBbR@#IW%BFrD&6C(IV;;9aJGh!H4=FZ~}E9+keAFZBUve8v__oLV_wqEu_Os zYSFS;|5ARw$*-wcj6$Y7Jea+C?&=u6w&A`9{TkjGInM11W{g91h>#1^43bU=|JIb6 z9!-+;1PS=M(0~uICW1^P*=s7CbtrL2l!%ZHL53Xwrzs)L3VN+Sm-BPC$SAsJ%FdJH zozp!R9tnoXQn983?M$Uv3;Qimzo>-P)L|hd^o8to`!boj zaod=;GWj^+7WZddf-3e6ER;#1$F^6d^!-7U)uPpB)Zo62eKS{P9YnZKnR5$i0f8fB9-N)rx=PuY(s+SS=f&1~x#mY=wYzLA z6Xb5qll=aM_|pCy)|&zAT?Fel-}LLN8o}?{`XdkF{~L+^Px`SEqW^bhY&|(VJvxfM zgLrq2pJ_W()tfnfkLSJ<*e;>!|64yhc0;)f-rcD<^wL=x`o=Nm;oFNb#h;obf3Cg{ zGyFf*7s3p09fc|H3$}vv*gVBcy!rOjR(Rd!n#cw@Vja8{juH(jhN#@Y&QgjEg&I>S zfszhL!4Sx`90sQCYU&kFD^W>f1Q=C6?!pvm7Q_7}lJq43*9U9kirD>gv59wzX0_O_K%{X`G?T%i|YS+{+icbYonGRg7bzxQ^A>=NPh>T2M((xs-!l zDdie7U0Z(^5>(6G5_xs)>Yq#%_lriTem(dWUzf|C?=3*}-%SF(*#@vilJQ3usP z2em1b_o?NO!_sMnaH2Da9AE+A)9Q*N9Ft87fF%lC2zjYR0%3-(sG^`D!pV8s=kmOq zx0OtzsnO3-)Vdg6KJtg%sorSP+T>7#9@hMjD*MDH$Ct0HR}pY?$Cm(pQQ0Rp>9RW}x-8g+ zH@tM&Hjozh^wc(Zi6=PMBZPDT_*z5QTZU+?MI}I2m1w|XDR>DdSiquy-pttL+}2z0 zerV=Gq(vT%Ebo`=SuE``TJNJxSm};94iY|i&;$EP4J_6G83=n;U(|Zr5PLj7e*Ylr z@9=nj{(h(Z$9Nv^pYHEO(f5(8!_h_k4`~8anqe}fDH4izX>Fe1$Q39-%Su{sx1-Ru zk{xtqx)d~A?ugGA&j)AC{T6WS&GS_9GiO#yu0HcequGaYLU6ALr4P&m)*(Xf6*C=b zadk>*JeG8V;u6Epu2Q+yfi; zi+(2CPYgQ9J(}%Iv20BP&whWC?Yn1Xo8W%}n0L<#;xzW5*^if$(L!t+4SfX0}p;f?&hEHn1{4J2aQxt+O0L6I{=3<>~6Et_G z)TFVu%of9r&}?oXntqcSJ42gK!_@3-?P42e$a0VEBKG_Wg`0fbr@F}U!19sd>njRl zVPATb_cthTQvgjfPEN>KdiUlLnsVzU9?dWI+hYtGHRo#W%<=t-!->jLv) zK|+4P(<3>aFC!$3-aa>k&z;%PfAzTuLf#OL5qi&@Z#M`b`^Q>mYC(}CJqlru@4(1> ziZ--T#rd>;I9^~mIu_$ofpiNu99TpMy>tyiV@vwnD?LK(%Le`SL66W^Fm~V8j6+99 zdxxjdj*XPYLRD$RpbZk(2(zgGs5{z`q%`nCTLbuz(2ypDPgQV~4BMve3{N^LDww=C ztgFeeGBgT61Q&C<^u}9}I3F8?Jodtw^?Akqdp@oTy49u2`Rx1F)CcbC{rZ3UvOye! zU;inL{j0e+9_^usZyoz}=*UDea!ym)wyG5x9!f1L%6w5jKJ}o(WTTpDBobq?c|5;p z41d`p{GM!7-f_=eOl>{Qy2y>r33$h0fdt9Ijahx!%nG4Y-bWA)~rD(ERu& z)4RR0diALu%^`><#{R@y6%OpYZVY9YYP=lU^zl9y5d>|j@kfMN5ufL?%kpt!Pw=(# z@?zuw?hWs?MY}EgM#D5%VY@w;Wr9 zO%C$BuQhr9#%9>Yojs1Q@fsaFxdHjvr>1zGqa2v76!))=2pYtlRKP2i2-=JLMRjDt z8KV?ArM?sqN9C(VL+ z@1Do;u$hFv4O2@$v=@1Letc8u|F%i#KLmk4{lH+?Lvi)J82j8@d7hjcpB%dCYMXY3 zbzO?^H=!Ctswyr}W}(tbgHX9e%uG$EaEUrFk(wxI88*CZozPwg2(6JvK+f4m%x?Eb zS69irW%fPLJrU!wH(C@I0=&#M_{4rIqwr-z6NhEiztml3TiI>Y92UFxZPplF#4fc= zVE)d^a@FsB>^B}-u`cwko|^A{V374dO#=Akb80#;-D0uzprsBp?Lh-`Ycg7>Xw}rQ zXBSAI{+}jxmpg}!FXmZljJCYW@{uaFQ+AkT6Y*T1lJUx7sfB<3ib>e=JDV&~ywVbh z2bcY%`!V)U=G$5B@9iJ%J4?jUMlh)W3Qr3#6DgdofX9TivZyItBkPW~9!Ik5AIP3*jNB|9Ho!Ol-TxRhGl z-96kr-gl+j3-PakW`NE#Re_utijZ2c7MLRQ%o|yqU5J|xp3TBDBJ04*ETl3<{zJ(8 z4{tE|W_B}@RVzv!ShF&8sCInn!DYXbFur5X&H>Lg_`tbGL{7Woaa2NTc>A=gQ3~w< zjG3ZxDD46Ql^`U(r`sXMbS23?kL#o~8*Sh?UH(2#C$7E|SvjiWja{qx)$!yeoUuaZ zfi)Xy4SuKDS?1@XyHWJkS-`$`cyhSoHn43;S5`>pssaW+5wuYeqbhMLRFN{J7_DrA zbJPNIT8~Fdb11Jt8y!_i2kqO!C;?e@ku6oGj{MmUq&u^s=iKd#Ipp5GxeM=``LpyK ze&<~q{m#25fPUA5_t_rM$D*Gdjs5Ig;7?BX4)>y*_7dTV5DL0bm0HuvCx#-ML9SfX zWc3qXBm(HBP^dgr@C2?iG@m21;xq{L+IfcTGiPs4_?gM<51eZ=p8Y?bi|2LB{t`O< z{6`Qcntirr_BV}ZAH3w9-DNNNZ5aFL?A{|JKG|{02~Z<6DeawrDw#}BcjXPFifhGS zJd>stFqN_u37#hkCe)-I>;as-N>s>=!M!UQL@g{~R@eLB!aA*PGp~@dcd3NM!VSf4 z3%1y~$9}IgdjWNF`9AqLZ1U;%c5=I9QLB$#@0t0kjFlamU$7ft+PD?w>?gXuTT&$=3sE)&Qw2kXmYSLz7>t-YK8d z{Nm~5tE<9*@?&$IB-iL9*&EGAc9)Y21n*DIq|!rajpp^BCo7Ezr9^mwu_W(vUwucb zo8hQ4bME2(g&d>c#Y_3eHtG1=RvOJmcbAGXV4&lnusdZg$4&2S>yYzIb^r5(QUU+ zA@fZNW60h;r{Il+Yzjy|nasLl^mz<%c^+H5ijaY2_X4tat%L0531p!|^h1<&Qwb)xfWO$biq`OH*o!lKzsor#OU+vD$OZ|Id*|F` zTz&B~;OLbCS=X(NE6rX!$w38A`oE3f44(8i_LeiG`v*IR2+GKlzA;GBz8&j5=@lZE zuXwZ?+Hu-%bZs`fY_!drKfugixtu5cjTQEU;C<6vm>rbXp4<=^Wz(hgcG$?xmU%6# z@pz)kk49Uh(|=Sh=P&+#PbpRLagl5Y3NB>iR*>bRw@Qq(o^Uo-+~ z1o%COhM_Ha!;y3Dq-k_p^AeXzTvfC%;ygtW3eh%{XZ83f{nLxw@+7^T*#zNg^RZ;4 z)*r!>-rBlc_Wn)#%c`@(*uR;{`oZqr$^L2h^qcEK5%tarnfw%a)%@H$?gT^@YpVd?qNi*2p-SGES^0zE@J6CwQLGjaE5?%0h8S&4F7 zf>Z=j_*Ben8zetbJnl5o`lW%hjXX25TAUnoZnS1d={E|=3m9`%Ak0HNt@sreA%_qy zvpc=JB4lXxDtq3>Wx2Z8HbFES0ePNdECE5#Kf0-#>CTniOxp*8;}`_}VT}F3oR_pG zjdR42XDDRbw1yUxCP)$z{mM?4Ey^CCo6^vs(2yL(<+4l)p=FnLDUuID@RAmgmyN;I z;HAqm$LE`@f0nPiY^+k_ksgji%`^6O>8wxn_`5Yp@Bq(GUMyk`HvQoq&ky!CoiIM0 zGt=QoHeI9Y(HN9H>>l}WD2uvc1Nzu}$IJ9*mpc-VV zfNj@u=4O%EQjVveoA|=T!SS?{)tBwg*d{3USGbPZ3$KT;pN6=w)coWon||j?oBplC z<(T_VG4`?f=Y{Q`oF2JsjwpE}s)9Z}lae|$q}Xn53tWYOG*=12#*T6OsTkDM$d&05 z+H4+LmXpRz6A-pi%Jeb!Xpyq}%6yl0o|SVK%J0^=3QothGs5*=t-6>fb9$wZjuxZV z6yW)hd9TQQvqjEqBI>~e5q2)%84AdR@oVQF8Rqb`?G4urBdfv78miN+ptSqj#eTI> zwg~%ClX;34zE0P#x3}Lym>maB4%_W`470(MpQ#;2OAdP9)GN-H0?m)CGeLK(>~a5v zqvaI?e~7VfFMXQb-tMVGlXF#;N|m$|b3>c7(!-i|&_R(?Q7RYGE_9h7hNNaO2)nF= zN$qFZbscKW2|P~CO*!4C`Nr*~x9Z5|u$~zR5Srh-*rpqyLi5q1LDoZsCOU?{IY;wo z7e1$}(2(t+9Z4k0nNXMXIT6$nrr~~S##Kch$Hr;tk8-T+8bBvt%L@<^3AQFRNK7JP zzsR!KMJh_kd;(W*@%#$evxE6zVWS}rRPUGG`?Y(Ps* z+o60(=unGLM%a(Vk)St|nz;{Pu>zj^&o{|@kGxwFWak?crITjo7}{1Q`!IGN9GKLNBIi literal 0 HcmV?d00001 diff --git a/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance.txt b/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance.txt new file mode 100644 index 00000000..e5f09f8f --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance.txt @@ -0,0 +1,49 @@ +ERR5069949.29668 -4 sameTranscript=No,dist=genomic +ERR5069949.114870 -45 sameTranscript=No,dist=genomic +ERR5069949.147998 94 sameTranscript=No,dist=genomic +ERR5069949.155944 -105 sameTranscript=No,dist=genomic +ERR5069949.184542 49 sameTranscript=No,dist=genomic +ERR5069949.169513 -92 sameTranscript=No,dist=genomic +ERR5069949.257821 -139 sameTranscript=No,dist=genomic +ERR5069949.309410 13 sameTranscript=No,dist=genomic +ERR5069949.376959 -66 sameTranscript=No,dist=genomic +ERR5069949.366975 -106 sameTranscript=No,dist=genomic +ERR5069949.465452 -19 sameTranscript=No,dist=genomic +ERR5069949.479807 5 sameTranscript=No,dist=genomic +ERR5069949.501486 -82 sameTranscript=No,dist=genomic +ERR5069949.532979 -96 sameTranscript=No,dist=genomic +ERR5069949.540529 -61 sameTranscript=No,dist=genomic +ERR5069949.573706 -63 sameTranscript=No,dist=genomic +ERR5069949.576388 -77 sameTranscript=No,dist=genomic +ERR5069949.611123 -125 sameTranscript=No,dist=genomic +ERR5069949.651338 -33 sameTranscript=No,dist=genomic +ERR5069949.686090 -29 sameTranscript=No,dist=genomic +ERR5069949.786562 42 sameTranscript=No,dist=genomic +ERR5069949.870926 -22 sameTranscript=No,dist=genomic +ERR5069949.856527 -69 sameTranscript=No,dist=genomic +ERR5069949.885966 -32 sameTranscript=No,dist=genomic +ERR5069949.937422 18 sameTranscript=No,dist=genomic +ERR5069949.919671 -116 sameTranscript=No,dist=genomic +ERR5069949.973930 -79 sameTranscript=No,dist=genomic +ERR5069949.986441 -22 sameTranscript=No,dist=genomic +ERR5069949.1014693 -150 sameTranscript=No,dist=genomic +ERR5069949.1020777 -122 sameTranscript=No,dist=genomic +ERR5069949.1066259 -4 sameTranscript=No,dist=genomic +ERR5069949.1062611 -124 sameTranscript=No,dist=genomic +ERR5069949.1067032 -103 sameTranscript=No,dist=genomic +ERR5069949.1088785 -101 sameTranscript=No,dist=genomic +ERR5069949.1132353 -142 sameTranscript=No,dist=genomic +ERR5069949.1151736 -55 sameTranscript=No,dist=genomic +ERR5069949.1258508 62 sameTranscript=No,dist=genomic +ERR5069949.1189252 -98 sameTranscript=No,dist=genomic +ERR5069949.1261808 -88 sameTranscript=No,dist=genomic +ERR5069949.1246538 -122 sameTranscript=No,dist=genomic +ERR5069949.1328186 -64 sameTranscript=No,dist=genomic +ERR5069949.1331889 -132 sameTranscript=No,dist=genomic +ERR5069949.1372331 -29 sameTranscript=No,dist=genomic +ERR5069949.1340552 -140 sameTranscript=No,dist=genomic +ERR5069949.1412839 -117 sameTranscript=No,dist=genomic +ERR5069949.1476386 -98 sameTranscript=No,dist=genomic +ERR5069949.1538968 -133 sameTranscript=No,dist=genomic +ERR5069949.1552198 -67 sameTranscript=No,dist=genomic +ERR5069949.1561137 -59 sameTranscript=No,dist=genomic diff --git a/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance_freq.txt b/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance_freq.txt new file mode 100644 index 00000000..908326ff --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance_freq.txt @@ -0,0 +1,100 @@ +-250 -245 0 +-245 -240 0 +-240 -235 0 +-235 -230 0 +-230 -225 0 +-225 -220 0 +-220 -215 0 +-215 -210 0 +-210 -205 0 +-205 -200 0 +-200 -195 0 +-195 -190 0 +-190 -185 0 +-185 -180 0 +-180 -175 0 +-175 -170 0 +-170 -165 0 +-165 -160 0 +-160 -155 0 +-155 -150 1 +-150 -145 0 +-145 -140 2 +-140 -135 1 +-135 -130 2 +-130 -125 1 +-125 -120 3 +-120 -115 2 +-115 -110 0 +-110 -105 2 +-105 -100 2 +-100 -95 3 +-95 -90 1 +-90 -85 1 +-85 -80 1 +-80 -75 2 +-75 -70 0 +-70 -65 3 +-65 -60 3 +-60 -55 2 +-55 -50 0 +-50 -45 1 +-45 -40 0 +-40 -35 0 +-35 -30 2 +-30 -25 2 +-25 -20 2 +-20 -15 1 +-15 -10 0 +-10 -5 0 +-5 0 2 +0 5 1 +5 10 0 +10 15 1 +15 20 1 +20 25 0 +25 30 0 +30 35 0 +35 40 0 +40 45 1 +45 50 1 +50 55 0 +55 60 0 +60 65 1 +65 70 0 +70 75 0 +75 80 0 +80 85 0 +85 90 0 +90 95 1 +95 100 0 +100 105 0 +105 110 0 +110 115 0 +115 120 0 +120 125 0 +125 130 0 +130 135 0 +135 140 0 +140 145 0 +145 150 0 +150 155 0 +155 160 0 +160 165 0 +165 170 0 +170 175 0 +175 180 0 +180 185 0 +185 190 0 +190 195 0 +195 200 0 +200 205 0 +205 210 0 +210 215 0 +215 220 0 +220 225 0 +225 230 0 +230 235 0 +235 240 0 +240 245 0 +245 250 0 diff --git a/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance.txt b/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance.txt new file mode 100644 index 00000000..a1930c9e --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance.txt @@ -0,0 +1,4 @@ +ERR5069949.29668 -4 sameTranscript=No,dist=genomic +ERR5069949.114870 -45 sameTranscript=No,dist=genomic +ERR5069949.147998 94 sameTranscript=No,dist=genomic +ERR5069949.155944 -105 sameTranscript=No,dist=genomic diff --git a/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance_freq.txt b/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance_freq.txt new file mode 100644 index 00000000..021311a2 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance_freq.txt @@ -0,0 +1,100 @@ +-250 -245 0 +-245 -240 0 +-240 -235 0 +-235 -230 0 +-230 -225 0 +-225 -220 0 +-220 -215 0 +-215 -210 0 +-210 -205 0 +-205 -200 0 +-200 -195 0 +-195 -190 0 +-190 -185 0 +-185 -180 0 +-180 -175 0 +-175 -170 0 +-170 -165 0 +-165 -160 0 +-160 -155 0 +-155 -150 0 +-150 -145 0 +-145 -140 0 +-140 -135 0 +-135 -130 0 +-130 -125 0 +-125 -120 0 +-120 -115 0 +-115 -110 0 +-110 -105 1 +-105 -100 0 +-100 -95 0 +-95 -90 0 +-90 -85 0 +-85 -80 0 +-80 -75 0 +-75 -70 0 +-70 -65 0 +-65 -60 0 +-60 -55 0 +-55 -50 0 +-50 -45 1 +-45 -40 0 +-40 -35 0 +-35 -30 0 +-30 -25 0 +-25 -20 0 +-20 -15 0 +-15 -10 0 +-10 -5 0 +-5 0 1 +0 5 0 +5 10 0 +10 15 0 +15 20 0 +20 25 0 +25 30 0 +30 35 0 +35 40 0 +40 45 0 +45 50 0 +50 55 0 +55 60 0 +60 65 0 +65 70 0 +70 75 0 +75 80 0 +80 85 0 +85 90 0 +90 95 1 +95 100 0 +100 105 0 +105 110 0 +110 115 0 +115 120 0 +120 125 0 +125 130 0 +130 135 0 +135 140 0 +140 145 0 +145 150 0 +150 155 0 +155 160 0 +160 165 0 +165 170 0 +170 175 0 +175 180 0 +180 185 0 +185 190 0 +190 195 0 +195 200 0 +200 205 0 +205 210 0 +210 215 0 +215 220 0 +220 225 0 +225 230 0 +230 235 0 +235 240 0 +240 245 0 +245 250 0 From cc67547928466ba5e4bd36173b249ebb539f9509 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Sat, 2 Nov 2024 10:28:08 +0100 Subject: [PATCH 14/15] Add agat sq stat basic (#110) * add help * add config * add run script * add test data and expected output + script to fetch them * add test * update changelog * handle input --gff has multiple=true * cleanup config * add direction for input arguments * update config: add requirements, add keywords, update --config description * remove unset IFS * add set -eo pipefail to script and test files * create temporary directory and clean up on exit * cleanup changelog * Update CHANGELOG.md --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 2 +- src/agat/agat_sq_stat_basic/config.vsh.yaml | 92 ++ src/agat/agat_sq_stat_basic/help.txt | 79 ++ src/agat/agat_sq_stat_basic/script.sh | 31 + src/agat/agat_sq_stat_basic/test.sh | 36 + src/agat/agat_sq_stat_basic/test_data/1.gff | 942 ++++++++++++++++++ .../test_data/agat_sq_stat_basic_1.gff | 12 + .../agat_sq_stat_basic/test_data/script.sh | 10 + 8 files changed, 1203 insertions(+), 1 deletion(-) create mode 100644 src/agat/agat_sq_stat_basic/config.vsh.yaml create mode 100644 src/agat/agat_sq_stat_basic/help.txt create mode 100644 src/agat/agat_sq_stat_basic/script.sh create mode 100644 src/agat/agat_sq_stat_basic/test.sh create mode 100644 src/agat/agat_sq_stat_basic/test_data/1.gff create mode 100644 src/agat/agat_sq_stat_basic/test_data/agat_sq_stat_basic_1.gff create mode 100755 src/agat/agat_sq_stat_basic/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e32edb1..c8d86fa5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - `agat/agat_sp_filter_feature_from_kill_list`: remove features in a GFF file based on a kill list (PR #105). - `agat/agat_sp_merge_annotations`: merge different gff annotation files in one (PR #106). - `agat/agat_sp_statistics`: provides exhaustive statistics of a gft/gff file (PR #107). + - `agat/agat_sq_stat_basic`: provide basic statistics of a gtf/gff file (PR #110). * `bd_rhapsody/bd_rhapsody_sequence_analysis`: BD Rhapsody Sequence Analysis CWL pipeline (PR #96). @@ -68,7 +69,6 @@ - `agat/agat_convert_sp_gff2tsv`: convert gtf/gff file into tabulated file (PR #102). - `agat/agat_convert_sp_gxf2gxf`: fixes and/or standardizes any GTF/GFF file into full sorted GTF/GFF file (PR #103). - * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). diff --git a/src/agat/agat_sq_stat_basic/config.vsh.yaml b/src/agat/agat_sq_stat_basic/config.vsh.yaml new file mode 100644 index 00000000..64958991 --- /dev/null +++ b/src/agat/agat_sq_stat_basic/config.vsh.yaml @@ -0,0 +1,92 @@ +name: agat_sq_stat_basic +namespace: agat +description: | + The script aims to provide basic statistics of a gtf/gff file. +keywords: [gene annotations, gff, statistics] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_sq_stat_basic.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +requirements: + - commands: [agat] +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] +argument_groups: + - name: Inputs + arguments: + - name: --gff + alternatives: [-i, --file, --input] + description: | + Input GTF/GFF file. + type: file + required: true + multiple: true + direction: input + example: input.gff + - name: --genome_size + alternatives: [-g] + description: | + That input is designed to know the genome size in order to calculate the percentage of the genome represented by each kind of feature type. You can provide an INTEGER. Or you can also pass a fasta file using the argument --genome_size_fasta. If both are provided, only the value of --genome_size will be considered. + type: integer + required: false + direction: input + example: 10000 + - name: --genome_size_fasta + description: | + That input is designed to know the genome size in order to calculate the percentage of the genome represented by each kind of feature type. You can provide the genome in fasta format. Or you can also pass the size directly as an integer using the argument --genome_size. If you provide the fasta, the genome size will be calculated on the fly. If both are provided, only the value of --genome_size will be considered. + type: file + required: false + direction: input + example: genome.fasta + - name: Outputs + arguments: + - name: --output + alternatives: [-o] + description: | + Output file. The result is in tabulate format. + type: file + direction: output + required: true + example: output.txt + - name: Arguments + arguments: + - name: --inflate + description: | + Inflate the statistics taking into account feature with + multi-parents. Indeed to avoid redundant information, some gff + factorize identical features. e.g: one exon used in two + different isoform will be defined only once, and will have + multiple parent. By default the script count such feature only + once. Using the inflate option allows to count the feature and + its size as many time there are parents. + type: boolean_true + - name: --config + alternatives: [-c] + description: | + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The `--config` option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + required: false + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_sq_stat_basic/help.txt b/src/agat/agat_sq_stat_basic/help.txt new file mode 100644 index 00000000..65096991 --- /dev/null +++ b/src/agat/agat_sq_stat_basic/help.txt @@ -0,0 +1,79 @@ +```sh +agat_sq_stat_basic.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_sq_stat_basic.pl + +Description: + The script aims to provide basic statistics of a gtf/gff file. + +Usage: + agat_sq_stat_basic.pl -i [-g -o ] + agat_sq_stat_basic.pl --help + +Options: + -i, --gff, --file or --input + STRING: Input GTF/GFF file. Several files can be processed at + once: -i file1 -i file2 + + -g, --genome + That input is design to know the genome size in order to + calculate the percentage of the genome represented by each kind + of feature type. You can provide an INTEGER or the genome in + fasta format. If you provide the fasta, the genome size will be + calculated on the fly. + + --inflate + Inflate the statistics taking into account feature with + multi-parents. Indeed to avoid redundant information, some gff + factorize identical features. e.g: one exon used in two + different isoform will be defined only once, and will have + multiple parent. By default the script count such feature only + once. Using the inflate option allows to count the feature and + its size as many time there are parents. + + -o or --output + STRING: Output file. If no output file is specified, the output + will be written to STDOUT. The result is in tabulate format. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + --help or -h + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/src/agat/agat_sq_stat_basic/script.sh b/src/agat/agat_sq_stat_basic/script.sh new file mode 100644 index 00000000..0f4ab2a6 --- /dev/null +++ b/src/agat/agat_sq_stat_basic/script.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +# unset flags +[[ "$par_inflate" == "false" ]] && unset par_inflate + +# Convert a list of file names to multiple -gff arguments +input_files="" +IFS=";" read -ra file_names <<< "$par_gff" +for file in "${file_names[@]}"; do + input_files+="--gff $file " +done + +# take care of --genome (can originally be either a fasta file or an integer) +if [[ -n "$par_genome_size" ]]; then + genome_arg=$par_genome_size +elif [[ -n "$par_genome_size_fasta" ]]; then + genome_arg=$par_genome_size_fasta +fi + +# run agat_convert_sp_bed2gff.pl +agat_sq_stat_basic.pl \ + $input_files \ + ${genome_arg:+--genome "${genome_arg}"} \ + --output "${par_output}" \ + ${par_inflate:+--inflate} \ + ${par_config:+--config "${par_config}"} diff --git a/src/agat/agat_sq_stat_basic/test.sh b/src/agat/agat_sq_stat_basic/test.sh new file mode 100644 index 00000000..12bd28cd --- /dev/null +++ b/src/agat/agat_sq_stat_basic/test.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" +} +trap clean_up EXIT + + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --gff "$test_dir/1.gff" \ + --output "$TMPDIR/output.txt" + +echo ">> Checking output" +[ ! -f "$TMPDIR/output.txt" ] && echo "Output file output.txt does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$TMPDIR/output.txt" ] && echo "Output file output.txt is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$TMPDIR/output.txt" "$test_dir/agat_sq_stat_basic_1.gff" +if [ $? -ne 0 ]; then + echo "Output file output.txt does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_sq_stat_basic/test_data/1.gff b/src/agat/agat_sq_stat_basic/test_data/1.gff new file mode 100644 index 00000000..40a06c78 --- /dev/null +++ b/src/agat/agat_sq_stat_basic/test_data/1.gff @@ -0,0 +1,942 @@ +##gff-version 3 +##sequence-region 1 1 43270923 +#!genome-build RAP-DB IRGSP-1.0 +#!genome-version IRGSP-1.0 +#!genome-date 2015-10 +#!genome-build-accession GCA_001433935.1 +1 RAP-DB chromosome 1 43270923 . . . ID=chromosome:1;Alias=Chr1,AP014957.1,NC_029256.1 +### +1 irgsp repeat_region 2000 2100 . + . ID=fakeRepeat1 +### +1 irgsp gene 2983 10815 . + . ID=gene:Os01g0100100;biotype=protein_coding;description=RabGAP/TBC domain containing protein. (Os01t0100100-01);gene_id=Os01g0100100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 2983 10815 . + . ID=transcript:Os01t0100100-01;Parent=gene:Os01g0100100;biotype=protein_coding;transcript_id=Os01t0100100-01 +1 irgsp exon 2983 3268 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon1;rank=1 +1 irgsp five_prime_UTR 2983 3268 . + . Parent=transcript:Os01t0100100-01 +1 irgsp five_prime_UTR 3354 3448 . + . Parent=transcript:Os01t0100100-01 +1 irgsp exon 3354 3616 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100100-01.exon2;rank=2 +1 irgsp CDS 3449 3616 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 4357 4455 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon3;rank=3 +1 irgsp CDS 4357 4455 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 5457 5560 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100100-01.exon4;rank=4 +1 irgsp CDS 5457 5560 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 7136 7944 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100100-01.exon5;rank=5 +1 irgsp CDS 7136 7944 . + 1 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8028 8150 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon6;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100100-01.exon6;rank=6 +1 irgsp CDS 8028 8150 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8232 8320 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon7;rank=7 +1 irgsp CDS 8232 8320 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8408 8608 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon8;rank=8 +1 irgsp CDS 8408 8608 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 9210 9615 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon9;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100100-01.exon9;rank=9 +1 irgsp CDS 9210 9615 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 10102 10187 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon10;rank=10 +1 irgsp CDS 10102 10187 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 10274 10297 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 10274 10430 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100100-01.exon11;rank=11 +1 irgsp three_prime_UTR 10298 10430 . + . Parent=transcript:Os01t0100100-01 +1 irgsp exon 10504 10815 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp three_prime_UTR 10504 10815 . + . Parent=transcript:Os01t0100100-01 +### +1 irgsp gene 11218 12435 . + . ID=gene:Os01g0100200;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0100200-01);gene_id=Os01g0100200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11218 12435 . + . ID=transcript:Os01t0100200-01;Parent=gene:Os01g0100200;biotype=protein_coding;transcript_id=Os01t0100200-01 +1 irgsp five_prime_UTR 11218 11797 . + . Parent=transcript:Os01t0100200-01 +1 irgsp exon 11218 12060 . + . Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100200-01.exon1;rank=1 +1 irgsp CDS 11798 12060 . + 0 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp CDS 12152 12317 . + 1 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp exon 12152 12435 . + . Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100200-01.exon2;rank=2 +1 irgsp three_prime_UTR 12318 12435 . + . Parent=transcript:Os01t0100200-01 +### +1 irgsp gene 11372 12284 . - . ID=gene:Os01g0100300;biotype=protein_coding;description=Cytochrome P450 domain containing protein. (Os01t0100300-00);gene_id=Os01g0100300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11372 12284 . - . ID=transcript:Os01t0100300-00;Parent=gene:Os01g0100300;biotype=protein_coding;transcript_id=Os01t0100300-00 +1 irgsp exon 11372 12042 . - . Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100300-00.exon2;rank=2 +1 irgsp CDS 11372 12042 . - 2 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +1 irgsp exon 12146 12284 . - . Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100300-00.exon1;rank=1 +1 irgsp CDS 12146 12284 . - 0 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +### +1 irgsp gene 12721 15685 . + . ID=gene:Os01g0100400;biotype=protein_coding;description=Similar to Pectinesterase-like protein. (Os01t0100400-01);gene_id=Os01g0100400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12721 15685 . + . ID=transcript:Os01t0100400-01;Parent=gene:Os01g0100400;biotype=protein_coding;transcript_id=Os01t0100400-01 +1 irgsp five_prime_UTR 12721 12773 . + . Parent=transcript:Os01t0100400-01 +1 irgsp exon 12721 13813 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100400-01.exon1;rank=1 +1 irgsp CDS 12774 13813 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 13906 14271 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100400-01.exon2;rank=2 +1 irgsp CDS 13906 14271 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 14359 14437 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100400-01.exon3;rank=3 +1 irgsp CDS 14359 14437 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 14969 15171 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100400-01.exon4;rank=4 +1 irgsp CDS 14969 15171 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 15266 15359 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 15266 15685 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp three_prime_UTR 15360 15685 . + . Parent=transcript:Os01t0100400-01 +### +1 irgsp gene 12808 13978 . - . ID=gene:Os01g0100466;biotype=protein_coding;description=Hypothetical protein. (Os01t0100466-00);gene_id=Os01g0100466;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12808 13978 . - . ID=transcript:Os01t0100466-00;Parent=gene:Os01g0100466;biotype=protein_coding;transcript_id=Os01t0100466-00 +1 irgsp three_prime_UTR 12808 12868 . - . Parent=transcript:Os01t0100466-00 +1 irgsp exon 12808 13782 . - . Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon2;rank=2 +1 irgsp CDS 12869 13102 . - 0 ID=CDS:Os01t0100466-00;Parent=transcript:Os01t0100466-00;protein_id=Os01t0100466-00 +1 irgsp five_prime_UTR 13103 13782 . - . Parent=transcript:Os01t0100466-00 +1 irgsp exon 13880 13978 . - . Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon1;rank=1 +1 irgsp five_prime_UTR 13880 13978 . - . Parent=transcript:Os01t0100466-00 +### +1 irgsp gene 16399 20144 . + . ID=gene:Os01g0100500;biotype=protein_coding;description=Immunoglobulin-like domain containing protein. (Os01t0100500-01);gene_id=Os01g0100500;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 16399 20144 . + . ID=transcript:Os01t0100500-01;Parent=gene:Os01g0100500;biotype=protein_coding;transcript_id=Os01t0100500-01 +1 irgsp five_prime_UTR 16399 16598 . + . Parent=transcript:Os01t0100500-01 +1 irgsp exon 16399 16976 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100500-01.exon1;rank=1 +1 irgsp CDS 16599 16976 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 17383 17474 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100500-01.exon2;rank=2 +1 irgsp CDS 17383 17474 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 17558 18258 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100500-01.exon3;rank=3 +1 irgsp CDS 17558 18258 . + 1 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 18501 18571 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100500-01.exon4;rank=4 +1 irgsp CDS 18501 18571 . + 2 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 18968 19057 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon5;rank=5 +1 irgsp CDS 18968 19057 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 19142 19321 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon6;rank=6 +1 irgsp CDS 19142 19321 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 19531 19593 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 19531 19629 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100500-01.exon7;rank=7 +1 irgsp three_prime_UTR 19594 19629 . + . Parent=transcript:Os01t0100500-01 +1 irgsp exon 19734 20144 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp three_prime_UTR 19734 20144 . + . Parent=transcript:Os01t0100500-01 +### +1 irgsp gene 22841 26892 . + . ID=gene:Os01g0100600;biotype=protein_coding;description=Single-stranded nucleic acid binding R3H domain containing protein. (Os01t0100600-01);gene_id=Os01g0100600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 22841 26892 . + . ID=transcript:Os01t0100600-01;Parent=gene:Os01g0100600;biotype=protein_coding;transcript_id=Os01t0100600-01 +1 irgsp five_prime_UTR 22841 23231 . + . Parent=transcript:Os01t0100600-01 +1 irgsp exon 22841 23281 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100600-01.exon1;rank=1 +1 irgsp CDS 23232 23281 . + 0 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 23572 23847 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon2;rank=2 +1 irgsp CDS 23572 23847 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 23962 24033 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon3;rank=3 +1 irgsp CDS 23962 24033 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 24492 24577 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon4;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100600-01.exon4;rank=4 +1 irgsp CDS 24492 24577 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 25445 25519 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100600-01.exon5;rank=5 +1 irgsp CDS 25445 25519 . + 2 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp CDS 25883 26391 . + 2 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 25883 26892 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon6;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0100600-01.exon6;rank=6 +1 irgsp three_prime_UTR 26392 26892 . + . Parent=transcript:Os01t0100600-01 +### +1 irgsp gene 25861 26424 . - . ID=gene:Os01g0100650;biotype=protein_coding;description=Hypothetical gene. (Os01t0100650-00);gene_id=Os01g0100650;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 25861 26424 . - . ID=transcript:Os01t0100650-00;Parent=gene:Os01g0100650;biotype=protein_coding;transcript_id=Os01t0100650-00 +1 irgsp three_prime_UTR 25861 26039 . - . Parent=transcript:Os01t0100650-00 +1 irgsp exon 25861 26424 . - . Parent=transcript:Os01t0100650-00;Name=Os01t0100650-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100650-00.exon1;rank=1 +1 irgsp CDS 26040 26423 . - 0 ID=CDS:Os01t0100650-00;Parent=transcript:Os01t0100650-00;protein_id=Os01t0100650-00 +1 irgsp five_prime_UTR 26424 26424 . - . Parent=transcript:Os01t0100650-00 +### +1 irgsp gene 27143 28644 . + . ID=gene:Os01g0100700;biotype=protein_coding;description=Similar to 40S ribosomal protein S5-1. (Os01t0100700-01);gene_id=Os01g0100700;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 27143 28644 . + . ID=transcript:Os01t0100700-01;Parent=gene:Os01g0100700;biotype=protein_coding;transcript_id=Os01t0100700-01 +1 irgsp five_prime_UTR 27143 27220 . + . Parent=transcript:Os01t0100700-01 +1 irgsp exon 27143 27292 . + . Parent=transcript:Os01t0100700-01;Name=Os01t0100700-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100700-01.exon1;rank=1 +1 irgsp CDS 27221 27292 . + 0 ID=CDS:Os01t0100700-01;Parent=transcript:Os01t0100700-01;protein_id=Os01t0100700-01 +1 irgsp exon 27370 27641 . + . Parent=transcript:Os01t0100700-01;Name=Os01t0100700-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100700-01.exon2;rank=2 +1 irgsp CDS 27370 27641 . + 0 ID=CDS:Os01t0100700-01;Parent=transcript:Os01t0100700-01;protein_id=Os01t0100700-01 +1 irgsp exon 28090 28293 . + . Parent=transcript:Os01t0100700-01;Name=Os01t0100700-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100700-01.exon3;rank=3 +1 irgsp CDS 28090 28293 . + 1 ID=CDS:Os01t0100700-01;Parent=transcript:Os01t0100700-01;protein_id=Os01t0100700-01 +1 irgsp CDS 28365 28419 . + 1 ID=CDS:Os01t0100700-01;Parent=transcript:Os01t0100700-01;protein_id=Os01t0100700-01 +1 irgsp exon 28365 28644 . + . Parent=transcript:Os01t0100700-01;Name=Os01t0100700-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100700-01.exon4;rank=4 +1 irgsp three_prime_UTR 28420 28644 . + . Parent=transcript:Os01t0100700-01 +### +1 irgsp gene 29818 34453 . + . ID=gene:Os01g0100800;biotype=protein_coding;description=Protein of unknown function DUF1664 family protein. (Os01t0100800-01);gene_id=Os01g0100800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 29818 34453 . + . ID=transcript:Os01t0100800-01;Parent=gene:Os01g0100800;biotype=protein_coding;transcript_id=Os01t0100800-01 +1 irgsp five_prime_UTR 29818 29939 . + . Parent=transcript:Os01t0100800-01 +1 irgsp exon 29818 29976 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0100800-01.exon1;rank=1 +1 irgsp CDS 29940 29976 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 30146 30228 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100800-01.exon2;rank=2 +1 irgsp CDS 30146 30228 . + 2 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 30735 30806 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon3;rank=3 +1 irgsp CDS 30735 30806 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 30885 30963 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon4;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100800-01.exon4;rank=4 +1 irgsp CDS 30885 30963 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 31258 31325 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100800-01.exon5;rank=5 +1 irgsp CDS 31258 31325 . + 2 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 31505 31606 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon6;rank=6 +1 irgsp CDS 31505 31606 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 32377 32466 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon7;rank=7 +1 irgsp CDS 32377 32466 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 32542 32616 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon8;rank=8 +1 irgsp CDS 32542 32616 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 32712 32744 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon9;rank=9 +1 irgsp CDS 32712 32744 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 32828 32905 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon10;rank=10 +1 irgsp CDS 32828 32905 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 33274 33330 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon11;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon11;rank=11 +1 irgsp CDS 33274 33330 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 33400 33471 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon12;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon12;rank=12 +1 irgsp CDS 33400 33471 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 33543 33617 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon13;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon13;rank=13 +1 irgsp CDS 33543 33617 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp CDS 33975 34124 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 33975 34453 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon14;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100800-01.exon14;rank=14 +1 irgsp three_prime_UTR 34125 34453 . + . Parent=transcript:Os01t0100800-01 +### +1 irgsp gene 35623 41136 . + . ID=gene:Os01g0100900;Name=SPHINGOSINE-1-PHOSPHATE LYASE 1%2C Sphingosine-1-Phoshpate Lyase 1;biotype=protein_coding;description=Sphingosine-1-phosphate lyase%2C Disease resistance response (Os01t0100900-01);gene_id=Os01g0100900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 35623 41136 . + . ID=transcript:Os01t0100900-01;Parent=gene:Os01g0100900;biotype=protein_coding;transcript_id=Os01t0100900-01 +1 irgsp five_prime_UTR 35623 35742 . + . Parent=transcript:Os01t0100900-01 +1 irgsp exon 35623 35939 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100900-01.exon1;rank=1 +1 irgsp CDS 35743 35939 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 36027 36072 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100900-01.exon2;rank=2 +1 irgsp CDS 36027 36072 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 36517 36668 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100900-01.exon3;rank=3 +1 irgsp CDS 36517 36668 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 36818 36877 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100900-01.exon4;rank=4 +1 irgsp CDS 36818 36877 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 37594 37818 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon5;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100900-01.exon5;rank=5 +1 irgsp CDS 37594 37818 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 37892 38033 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100900-01.exon6;rank=6 +1 irgsp CDS 37892 38033 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 38276 38326 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100900-01.exon7;rank=7 +1 irgsp CDS 38276 38326 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 38434 38525 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon8;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100900-01.exon8;rank=8 +1 irgsp CDS 38434 38525 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 39319 39445 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100900-01.exon9;rank=9 +1 irgsp CDS 39319 39445 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 39553 39568 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon10;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100900-01.exon10;rank=10 +1 irgsp CDS 39553 39568 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 39939 40046 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon11;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100900-01.exon11;rank=11 +1 irgsp CDS 39939 40046 . + 2 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 40135 40189 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon12;constitutive=1;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0100900-01.exon12;rank=12 +1 irgsp CDS 40135 40189 . + 2 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 40456 40602 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon13;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100900-01.exon13;rank=13 +1 irgsp CDS 40456 40602 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 40703 40781 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon14;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100900-01.exon14;rank=14 +1 irgsp CDS 40703 40781 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp CDS 40885 41007 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 40885 41136 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon15;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100900-01.exon15;rank=15 +1 irgsp three_prime_UTR 41008 41136 . + . Parent=transcript:Os01t0100900-01 +### +1 irgsp gene 58658 61090 . + . ID=gene:Os01g0101150;biotype=protein_coding;description=Hypothetical conserved gene. (Os01t0101150-00);gene_id=Os01g0101150;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 58658 61090 . + . ID=transcript:Os01t0101150-00;Parent=gene:Os01g0101150;biotype=protein_coding;transcript_id=Os01t0101150-00 +1 irgsp exon 58658 61090 . + . Parent=transcript:Os01t0101150-00;Name=Os01t0101150-00.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0101150-00.exon1;rank=1 +1 irgsp CDS 58658 61090 . + 0 ID=CDS:Os01t0101150-00;Parent=transcript:Os01t0101150-00;protein_id=Os01t0101150-00 +### +1 irgsp gene 62060 65537 . + . ID=gene:Os01g0101200;biotype=protein_coding;description=2%2C3-diketo-5-methylthio-1-phosphopentane phosphatase domain containing protein. (Os01t0101200-01)%3B2%2C3-diketo-5-methylthio-1-phosphopentane phosphatase domain containing protein. (Os01t0101200-02);gene_id=Os01g0101200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 62060 63576 . + . ID=transcript:Os01t0101200-01;Parent=gene:Os01g0101200;biotype=protein_coding;transcript_id=Os01t0101200-01 +1 irgsp five_prime_UTR 62060 62103 . + . Parent=transcript:Os01t0101200-01 +1 irgsp exon 62060 62295 . + . Parent=transcript:Os01t0101200-01;Name=Os01t0101200-01.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0101200-01.exon1;rank=1 +1 irgsp CDS 62104 62295 . + 0 ID=CDS:Os01t0101200-01;Parent=transcript:Os01t0101200-01;protein_id=Os01t0101200-01 +1 irgsp exon 62385 62905 . + . Parent=transcript:Os01t0101200-01;Name=Os01t0101200-02.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0101200-02.exon2;rank=2 +1 irgsp CDS 62385 62905 . + 0 ID=CDS:Os01t0101200-01;Parent=transcript:Os01t0101200-01;protein_id=Os01t0101200-01 +1 irgsp exon 62996 63114 . + . Parent=transcript:Os01t0101200-01;Name=Os01t0101200-02.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0101200-02.exon3;rank=3 +1 irgsp CDS 62996 63114 . + 1 ID=CDS:Os01t0101200-01;Parent=transcript:Os01t0101200-01;protein_id=Os01t0101200-01 +1 irgsp CDS 63248 63345 . + 2 ID=CDS:Os01t0101200-01;Parent=transcript:Os01t0101200-01;protein_id=Os01t0101200-01 +1 irgsp exon 63248 63576 . + . Parent=transcript:Os01t0101200-01;Name=Os01t0101200-01.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0101200-01.exon4;rank=4 +1 irgsp three_prime_UTR 63346 63576 . + . Parent=transcript:Os01t0101200-01 +1 irgsp mRNA 62112 65537 . + . ID=transcript:Os01t0101200-02;Parent=gene:Os01g0101200;biotype=protein_coding;transcript_id=Os01t0101200-02 +1 irgsp five_prime_UTR 62112 62112 . + . Parent=transcript:Os01t0101200-02 +1 irgsp exon 62112 62295 . + . Parent=transcript:Os01t0101200-02;Name=Os01t0101200-02.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0101200-02.exon1;rank=1 +1 irgsp CDS 62113 62295 . + 0 ID=CDS:Os01t0101200-02;Parent=transcript:Os01t0101200-02;protein_id=Os01t0101200-02 +1 irgsp exon 62385 62905 . + . Parent=transcript:Os01t0101200-02;Name=Os01t0101200-02.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0101200-02.exon2;rank=2 +1 irgsp CDS 62385 62905 . + 0 ID=CDS:Os01t0101200-02;Parent=transcript:Os01t0101200-02;protein_id=Os01t0101200-02 +1 irgsp exon 62996 63114 . + . Parent=transcript:Os01t0101200-02;Name=Os01t0101200-02.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0101200-02.exon3;rank=3 +1 irgsp CDS 62996 63114 . + 1 ID=CDS:Os01t0101200-02;Parent=transcript:Os01t0101200-02;protein_id=Os01t0101200-02 +1 irgsp CDS 63248 63345 . + 2 ID=CDS:Os01t0101200-02;Parent=transcript:Os01t0101200-02;protein_id=Os01t0101200-02 +1 irgsp exon 63248 65537 . + . Parent=transcript:Os01t0101200-02;Name=Os01t0101200-02.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0101200-02.exon4;rank=4 +1 irgsp three_prime_UTR 63346 65537 . + . Parent=transcript:Os01t0101200-02 +### +1 irgsp gene 63350 66302 . - . ID=gene:Os01g0101300;biotype=protein_coding;description=Similar to MRNA%2C partial cds%2C clone: RAFL22-26-L17. (Fragment). (Os01t0101300-01);gene_id=Os01g0101300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 63350 66302 . - . ID=transcript:Os01t0101300-01;Parent=gene:Os01g0101300;biotype=protein_coding;transcript_id=Os01t0101300-01 +1 irgsp three_prime_UTR 63350 63669 . - . Parent=transcript:Os01t0101300-01 +1 irgsp exon 63350 63783 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0101300-01.exon7;rank=7 +1 irgsp CDS 63670 63783 . - 0 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 63877 64020 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0101300-01.exon6;rank=6 +1 irgsp CDS 63877 64020 . - 0 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 64339 64431 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0101300-01.exon5;rank=5 +1 irgsp CDS 64339 64431 . - 0 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 64665 64779 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0101300-01.exon4;rank=4 +1 irgsp CDS 64665 64779 . - 1 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 64902 65152 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0101300-01.exon3;rank=3 +1 irgsp CDS 64902 65152 . - 0 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 65248 65431 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0101300-01.exon2;rank=2 +1 irgsp CDS 65248 65431 . - 1 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp CDS 65628 65950 . - 0 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 65628 66302 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0101300-01.exon1;rank=1 +1 irgsp five_prime_UTR 65951 66302 . - . Parent=transcript:Os01t0101300-01 +### +1 irgsp gene 72816 78349 . + . ID=gene:Os01g0101600;biotype=protein_coding;description=Immunoglobulin-like fold domain containing protein. (Os01t0101600-01)%3BImmunoglobulin-like fold domain containing protein. (Os01t0101600-02)%3BHypothetical conserved gene. (Os01t0101600-03);gene_id=Os01g0101600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 72816 78349 . + . ID=transcript:Os01t0101600-01;Parent=gene:Os01g0101600;biotype=protein_coding;transcript_id=Os01t0101600-01 +1 irgsp five_prime_UTR 72816 72902 . + . Parent=transcript:Os01t0101600-01 +1 irgsp exon 72816 73935 . + . Parent=transcript:Os01t0101600-01;Name=Os01t0101600-01.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0101600-01.exon1;rank=1 +1 irgsp CDS 72903 73935 . + 0 ID=CDS:Os01t0101600-01;Parent=transcript:Os01t0101600-01;protein_id=Os01t0101600-01 +1 irgsp exon 74468 74981 . + . Parent=transcript:Os01t0101600-01;Name=Os01t0101600-02.exon2;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0101600-02.exon2;rank=2 +1 irgsp CDS 74468 74981 . + 2 ID=CDS:Os01t0101600-01;Parent=transcript:Os01t0101600-01;protein_id=Os01t0101600-01 +1 irgsp CDS 75619 77008 . + 1 ID=CDS:Os01t0101600-01;Parent=transcript:Os01t0101600-01;protein_id=Os01t0101600-01 +1 irgsp exon 75619 77205 . + . Parent=transcript:Os01t0101600-01;Name=Os01t0101600-01.exon3;constitutive=0;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0101600-01.exon3;rank=3 +1 irgsp three_prime_UTR 77009 77205 . + . Parent=transcript:Os01t0101600-01 +1 irgsp exon 77333 78349 . + . Parent=transcript:Os01t0101600-01;Name=Os01t0101600-01.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101600-01.exon4;rank=4 +1 irgsp three_prime_UTR 77333 78349 . + . Parent=transcript:Os01t0101600-01 +1 irgsp mRNA 72823 77699 . + . ID=transcript:Os01t0101600-02;Parent=gene:Os01g0101600;biotype=protein_coding;transcript_id=Os01t0101600-02 +1 irgsp five_prime_UTR 72823 72902 . + . Parent=transcript:Os01t0101600-02 +1 irgsp exon 72823 73935 . + . Parent=transcript:Os01t0101600-02;Name=Os01t0101600-02.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0101600-02.exon1;rank=1 +1 irgsp CDS 72903 73935 . + 0 ID=CDS:Os01t0101600-02;Parent=transcript:Os01t0101600-02;protein_id=Os01t0101600-02 +1 irgsp exon 74468 74981 . + . Parent=transcript:Os01t0101600-02;Name=Os01t0101600-02.exon2;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0101600-02.exon2;rank=2 +1 irgsp CDS 74468 74981 . + 2 ID=CDS:Os01t0101600-02;Parent=transcript:Os01t0101600-02;protein_id=Os01t0101600-02 +1 irgsp CDS 75619 77008 . + 1 ID=CDS:Os01t0101600-02;Parent=transcript:Os01t0101600-02;protein_id=Os01t0101600-02 +1 irgsp exon 75619 77699 . + . Parent=transcript:Os01t0101600-02;Name=Os01t0101600-02.exon3;constitutive=0;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0101600-02.exon3;rank=3 +1 irgsp three_prime_UTR 77009 77699 . + . Parent=transcript:Os01t0101600-02 +1 irgsp mRNA 75942 77699 . + . ID=transcript:Os01t0101600-03;Parent=gene:Os01g0101600;biotype=protein_coding;transcript_id=Os01t0101600-03 +1 irgsp five_prime_UTR 75942 75943 . + . Parent=transcript:Os01t0101600-03 +1 irgsp exon 75942 77699 . + . Parent=transcript:Os01t0101600-03;Name=Os01t0101600-03.exon1;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101600-03.exon1;rank=1 +1 irgsp CDS 75944 77008 . + 0 ID=CDS:Os01t0101600-03;Parent=transcript:Os01t0101600-03;protein_id=Os01t0101600-03 +1 irgsp three_prime_UTR 77009 77699 . + . Parent=transcript:Os01t0101600-03 +### +1 irgsp gene 82426 84095 . + . ID=gene:Os01g0101700;Name=DnaJ domain protein C1%2C rice DJC26 homolog;biotype=protein_coding;description=Similar to chaperone protein dnaJ 20. (Os01t0101700-00);gene_id=Os01g0101700;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 82426 84095 . + . ID=transcript:Os01t0101700-00;Parent=gene:Os01g0101700;biotype=protein_coding;transcript_id=Os01t0101700-00 +1 irgsp five_prime_UTR 82426 82506 . + . Parent=transcript:Os01t0101700-00 +1 irgsp exon 82426 82932 . + . Parent=transcript:Os01t0101700-00;Name=Os01t0101700-00.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0101700-00.exon1;rank=1 +1 irgsp CDS 82507 82932 . + 0 ID=CDS:Os01t0101700-00;Parent=transcript:Os01t0101700-00;protein_id=Os01t0101700-00 +1 irgsp CDS 83724 83864 . + 0 ID=CDS:Os01t0101700-00;Parent=transcript:Os01t0101700-00;protein_id=Os01t0101700-00 +1 irgsp exon 83724 84095 . + . Parent=transcript:Os01t0101700-00;Name=Os01t0101700-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0101700-00.exon2;rank=2 +1 irgsp three_prime_UTR 83865 84095 . + . Parent=transcript:Os01t0101700-00 +### +1 irgsp gene 85337 88844 . + . ID=gene:Os01g0101800;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0101800-01);gene_id=Os01g0101800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 85337 88844 . + . ID=transcript:Os01t0101800-01;Parent=gene:Os01g0101800;biotype=protein_coding;transcript_id=Os01t0101800-01 +1 irgsp five_prime_UTR 85337 85378 . + . Parent=transcript:Os01t0101800-01 +1 irgsp exon 85337 85600 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0101800-01.exon1;rank=1 +1 irgsp CDS 85379 85600 . + 0 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 85737 85830 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0101800-01.exon2;rank=2 +1 irgsp CDS 85737 85830 . + 0 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 85935 86086 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0101800-01.exon3;rank=3 +1 irgsp CDS 85935 86086 . + 2 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 86212 86299 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon4;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0101800-01.exon4;rank=4 +1 irgsp CDS 86212 86299 . + 0 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 86399 87681 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0101800-01.exon5;rank=5 +1 irgsp CDS 86399 87681 . + 2 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 88291 88398 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0101800-01.exon6;rank=6 +1 irgsp CDS 88291 88398 . + 0 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp CDS 88500 88583 . + 0 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 88500 88844 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0101800-01.exon7;rank=7 +1 irgsp three_prime_UTR 88584 88844 . + . Parent=transcript:Os01t0101800-01 +### +1 irgsp gene 86211 88583 . - . ID=gene:Os01g0101850;biotype=protein_coding;description=Hypothetical protein. (Os01t0101850-00);gene_id=Os01g0101850;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 86211 88583 . - . ID=transcript:Os01t0101850-00;Parent=gene:Os01g0101850;biotype=protein_coding;transcript_id=Os01t0101850-00 +1 irgsp exon 86211 86277 . - . Parent=transcript:Os01t0101850-00;Name=Os01t0101850-00.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101850-00.exon4;rank=4 +1 irgsp three_prime_UTR 86211 86277 . - . Parent=transcript:Os01t0101850-00 +1 irgsp three_prime_UTR 86384 87326 . - . Parent=transcript:Os01t0101850-00 +1 irgsp exon 86384 87694 . - . Parent=transcript:Os01t0101850-00;Name=Os01t0101850-00.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101850-00.exon3;rank=3 +1 irgsp CDS 87327 87662 . - 0 ID=CDS:Os01t0101850-00;Parent=transcript:Os01t0101850-00;protein_id=Os01t0101850-00 +1 irgsp five_prime_UTR 87663 87694 . - . Parent=transcript:Os01t0101850-00 +1 irgsp exon 88308 88396 . - . Parent=transcript:Os01t0101850-00;Name=Os01t0101850-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101850-00.exon2;rank=2 +1 irgsp five_prime_UTR 88308 88396 . - . Parent=transcript:Os01t0101850-00 +1 irgsp exon 88496 88583 . - . Parent=transcript:Os01t0101850-00;Name=Os01t0101850-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101850-00.exon1;rank=1 +1 irgsp five_prime_UTR 88496 88583 . - . Parent=transcript:Os01t0101850-00 +### +1 irgsp gene 88883 89228 . - . ID=gene:Os01g0101900;biotype=protein_coding;description=Similar to OSIGBa0075F02.3 protein. (Os01t0101900-00);gene_id=Os01g0101900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 88883 89228 . - . ID=transcript:Os01t0101900-00;Parent=gene:Os01g0101900;biotype=protein_coding;transcript_id=Os01t0101900-00 +1 irgsp three_prime_UTR 88883 88985 . - . Parent=transcript:Os01t0101900-00 +1 irgsp exon 88883 89228 . - . Parent=transcript:Os01t0101900-00;Name=Os01t0101900-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101900-00.exon1;rank=1 +1 irgsp CDS 88986 89204 . - 0 ID=CDS:Os01t0101900-00;Parent=transcript:Os01t0101900-00;protein_id=Os01t0101900-00 +1 irgsp five_prime_UTR 89205 89228 . - . Parent=transcript:Os01t0101900-00 +### +1 irgsp gene 89763 91465 . - . ID=gene:Os01g0102000;Name=NON-SPECIFIC PHOSPHOLIPASE C5;biotype=protein_coding;description=Phosphoesterase family protein. (Os01t0102000-01);gene_id=Os01g0102000;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 89763 91465 . - . ID=transcript:Os01t0102000-01;Parent=gene:Os01g0102000;biotype=protein_coding;transcript_id=Os01t0102000-01 +1 irgsp three_prime_UTR 89763 89824 . - . Parent=transcript:Os01t0102000-01 +1 irgsp exon 89763 91465 . - . Parent=transcript:Os01t0102000-01;Name=Os01t0102000-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0102000-01.exon1;rank=1 +1 irgsp CDS 89825 91411 . - 0 ID=CDS:Os01t0102000-01;Parent=transcript:Os01t0102000-01;protein_id=Os01t0102000-01 +1 irgsp five_prime_UTR 91412 91465 . - . Parent=transcript:Os01t0102000-01 +### +1 irgsp gene 134300 135439 . + . ID=gene:Os01g0102300;Name=OsTLP27;biotype=protein_coding;description=Thylakoid lumen protein%2C Photosynthesis and chloroplast development (Os01t0102300-01);gene_id=Os01g0102300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 134300 135439 . + . ID=transcript:Os01t0102300-01;Parent=gene:Os01g0102300;biotype=protein_coding;transcript_id=Os01t0102300-01 +1 irgsp five_prime_UTR 134300 134310 . + . Parent=transcript:Os01t0102300-01 +1 irgsp exon 134300 134615 . + . Parent=transcript:Os01t0102300-01;Name=Os01t0102300-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0102300-01.exon1;rank=1 +1 irgsp CDS 134311 134615 . + 0 ID=CDS:Os01t0102300-01;Parent=transcript:Os01t0102300-01;protein_id=Os01t0102300-01 +1 irgsp exon 134698 134824 . + . Parent=transcript:Os01t0102300-01;Name=Os01t0102300-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0102300-01.exon2;rank=2 +1 irgsp CDS 134698 134824 . + 1 ID=CDS:Os01t0102300-01;Parent=transcript:Os01t0102300-01;protein_id=Os01t0102300-01 +1 irgsp CDS 134912 135253 . + 0 ID=CDS:Os01t0102300-01;Parent=transcript:Os01t0102300-01;protein_id=Os01t0102300-01 +1 irgsp exon 134912 135439 . + . Parent=transcript:Os01t0102300-01;Name=Os01t0102300-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0102300-01.exon3;rank=3 +1 irgsp three_prime_UTR 135254 135439 . + . Parent=transcript:Os01t0102300-01 +### +1 irgsp gene 139826 141555 . + . ID=gene:Os01g0102400;Name=HAP5H SUBUNIT OF CCAAT-BOX BINDING COMPLEX;biotype=protein_coding;description=Histone-fold domain containing protein. (Os01t0102400-01);gene_id=Os01g0102400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 139826 141555 . + . ID=transcript:Os01t0102400-01;Parent=gene:Os01g0102400;biotype=protein_coding;transcript_id=Os01t0102400-01 +1 irgsp exon 139826 139906 . + . Parent=transcript:Os01t0102400-01;Name=Os01t0102400-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0102400-01.exon1;rank=1 +1 irgsp five_prime_UTR 139826 139906 . + . Parent=transcript:Os01t0102400-01 +1 irgsp five_prime_UTR 140120 140149 . + . Parent=transcript:Os01t0102400-01 +1 irgsp exon 140120 141555 . + . Parent=transcript:Os01t0102400-01;Name=Os01t0102400-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0102400-01.exon2;rank=2 +1 irgsp CDS 140150 141415 . + 0 ID=CDS:Os01t0102400-01;Parent=transcript:Os01t0102400-01;protein_id=Os01t0102400-01 +1 irgsp three_prime_UTR 141416 141555 . + . Parent=transcript:Os01t0102400-01 +### +1 irgsp gene 141959 144554 . + . ID=gene:Os01g0102500;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0102500-01);gene_id=Os01g0102500;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 141959 144554 . + . ID=transcript:Os01t0102500-01;Parent=gene:Os01g0102500;biotype=protein_coding;transcript_id=Os01t0102500-01 +1 irgsp five_prime_UTR 141959 142083 . + . Parent=transcript:Os01t0102500-01 +1 irgsp exon 141959 142631 . + . Parent=transcript:Os01t0102500-01;Name=Os01t0102500-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0102500-01.exon1;rank=1 +1 irgsp CDS 142084 142631 . + 0 ID=CDS:Os01t0102500-01;Parent=transcript:Os01t0102500-01;protein_id=Os01t0102500-01 +1 irgsp exon 143191 143431 . + . Parent=transcript:Os01t0102500-01;Name=Os01t0102500-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0102500-01.exon2;rank=2 +1 irgsp CDS 143191 143431 . + 1 ID=CDS:Os01t0102500-01;Parent=transcript:Os01t0102500-01;protein_id=Os01t0102500-01 +1 irgsp exon 143563 143680 . + . Parent=transcript:Os01t0102500-01;Name=Os01t0102500-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0102500-01.exon3;rank=3 +1 irgsp CDS 143563 143680 . + 0 ID=CDS:Os01t0102500-01;Parent=transcript:Os01t0102500-01;protein_id=Os01t0102500-01 +1 irgsp CDS 143817 143908 . + 2 ID=CDS:Os01t0102500-01;Parent=transcript:Os01t0102500-01;protein_id=Os01t0102500-01 +1 irgsp exon 143817 144554 . + . Parent=transcript:Os01t0102500-01;Name=Os01t0102500-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0102500-01.exon4;rank=4 +1 irgsp three_prime_UTR 143909 144554 . + . Parent=transcript:Os01t0102500-01 +### +1 irgsp gene 145603 147847 . + . ID=gene:Os01g0102600;Name=Shikimate kinase 4;biotype=protein_coding;description=Shikimate kinase domain containing protein. (Os01t0102600-01)%3BSimilar to shikimate kinase family protein. (Os01t0102600-02);gene_id=Os01g0102600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 145603 147847 . + . ID=transcript:Os01t0102600-01;Parent=gene:Os01g0102600;biotype=protein_coding;transcript_id=Os01t0102600-01 +1 irgsp five_prime_UTR 145603 145644 . + . Parent=transcript:Os01t0102600-01 +1 irgsp exon 145603 145786 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0102600-01.exon1;rank=1 +1 irgsp CDS 145645 145786 . + 0 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 145905 145951 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon2;constitutive=0;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0102600-01.exon2;rank=2 +1 irgsp CDS 145905 145951 . + 2 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 146028 146082 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon3;constitutive=0;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0102600-01.exon3;rank=3 +1 irgsp CDS 146028 146082 . + 0 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 146179 146339 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon4;constitutive=0;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0102600-01.exon4;rank=4 +1 irgsp CDS 146179 146339 . + 2 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 146450 146532 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon5;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0102600-01.exon5;rank=5 +1 irgsp CDS 146450 146532 . + 0 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 146611 146719 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon6;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0102600-01.exon6;rank=6 +1 irgsp CDS 146611 146719 . + 1 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 147106 147184 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon7;constitutive=0;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0102600-01.exon7;rank=7 +1 irgsp CDS 147106 147184 . + 0 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 147311 147375 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-02.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0102600-02.exon2;rank=8 +1 irgsp CDS 147311 147375 . + 2 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp CDS 147507 147575 . + 0 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 147507 147847 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon9;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0102600-01.exon9;rank=9 +1 irgsp three_prime_UTR 147576 147847 . + . Parent=transcript:Os01t0102600-01 +1 irgsp mRNA 147104 147805 . + . ID=transcript:Os01t0102600-02;Parent=gene:Os01g0102600;biotype=protein_coding;transcript_id=Os01t0102600-02 +1 irgsp five_prime_UTR 147104 147105 . + . Parent=transcript:Os01t0102600-02 +1 irgsp exon 147104 147184 . + . Parent=transcript:Os01t0102600-02;Name=Os01t0102600-02.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0102600-02.exon1;rank=1 +1 irgsp CDS 147106 147184 . + 0 ID=CDS:Os01t0102600-02;Parent=transcript:Os01t0102600-02;protein_id=Os01t0102600-02 +1 irgsp exon 147311 147375 . + . Parent=transcript:Os01t0102600-02;Name=Os01t0102600-02.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0102600-02.exon2;rank=2 +1 irgsp CDS 147311 147375 . + 2 ID=CDS:Os01t0102600-02;Parent=transcript:Os01t0102600-02;protein_id=Os01t0102600-02 +1 irgsp CDS 147507 147575 . + 0 ID=CDS:Os01t0102600-02;Parent=transcript:Os01t0102600-02;protein_id=Os01t0102600-02 +1 irgsp exon 147507 147805 . + . Parent=transcript:Os01t0102600-02;Name=Os01t0102600-02.exon3;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0102600-02.exon3;rank=3 +1 irgsp three_prime_UTR 147576 147805 . + . Parent=transcript:Os01t0102600-02 +### +1 irgsp gene 148085 150568 . + . ID=gene:Os01g0102700;biotype=protein_coding;description=Translocon-associated beta family protein. (Os01t0102700-01);gene_id=Os01g0102700;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 148085 150568 . + . ID=transcript:Os01t0102700-01;Parent=gene:Os01g0102700;biotype=protein_coding;transcript_id=Os01t0102700-01 +1 irgsp five_prime_UTR 148085 148146 . + . Parent=transcript:Os01t0102700-01 +1 irgsp exon 148085 148313 . + . Parent=transcript:Os01t0102700-01;Name=Os01t0102700-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0102700-01.exon1;rank=1 +1 irgsp CDS 148147 148313 . + 0 ID=CDS:Os01t0102700-01;Parent=transcript:Os01t0102700-01;protein_id=Os01t0102700-01 +1 irgsp exon 149450 149548 . + . Parent=transcript:Os01t0102700-01;Name=Os01t0102700-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0102700-01.exon2;rank=2 +1 irgsp CDS 149450 149548 . + 1 ID=CDS:Os01t0102700-01;Parent=transcript:Os01t0102700-01;protein_id=Os01t0102700-01 +1 irgsp exon 149634 149742 . + . Parent=transcript:Os01t0102700-01;Name=Os01t0102700-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0102700-01.exon3;rank=3 +1 irgsp CDS 149634 149742 . + 1 ID=CDS:Os01t0102700-01;Parent=transcript:Os01t0102700-01;protein_id=Os01t0102700-01 +1 irgsp exon 149856 149931 . + . Parent=transcript:Os01t0102700-01;Name=Os01t0102700-01.exon4;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0102700-01.exon4;rank=4 +1 irgsp CDS 149856 149931 . + 0 ID=CDS:Os01t0102700-01;Parent=transcript:Os01t0102700-01;protein_id=Os01t0102700-01 +1 irgsp CDS 150152 150318 . + 2 ID=CDS:Os01t0102700-01;Parent=transcript:Os01t0102700-01;protein_id=Os01t0102700-01 +1 irgsp exon 150152 150568 . + . Parent=transcript:Os01t0102700-01;Name=Os01t0102700-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0102700-01.exon5;rank=5 +1 irgsp three_prime_UTR 150319 150568 . + . Parent=transcript:Os01t0102700-01 +### +1 irgsp gene 152853 156449 . + . ID=gene:Os01g0102800;Name=Cockayne syndrome WD-repeat protein;biotype=protein_coding;description=Similar to chromatin remodeling complex subunit. (Os01t0102800-01);gene_id=Os01g0102800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 152853 156449 . + . ID=transcript:Os01t0102800-01;Parent=gene:Os01g0102800;biotype=protein_coding;transcript_id=Os01t0102800-01 +1 irgsp five_prime_UTR 152853 152853 . + . Parent=transcript:Os01t0102800-01 +1 irgsp exon 152853 153025 . + . Parent=transcript:Os01t0102800-01;Name=Os01t0102800-01.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0102800-01.exon1;rank=1 +1 irgsp CDS 152854 153025 . + 0 ID=CDS:Os01t0102800-01;Parent=transcript:Os01t0102800-01;protein_id=Os01t0102800-01 +1 irgsp exon 153178 154646 . + . Parent=transcript:Os01t0102800-01;Name=Os01t0102800-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0102800-01.exon2;rank=2 +1 irgsp CDS 153178 154646 . + 2 ID=CDS:Os01t0102800-01;Parent=transcript:Os01t0102800-01;protein_id=Os01t0102800-01 +1 irgsp exon 155010 155450 . + . Parent=transcript:Os01t0102800-01;Name=Os01t0102800-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0102800-01.exon3;rank=3 +1 irgsp CDS 155010 155450 . + 0 ID=CDS:Os01t0102800-01;Parent=transcript:Os01t0102800-01;protein_id=Os01t0102800-01 +1 irgsp CDS 155543 156214 . + 0 ID=CDS:Os01t0102800-01;Parent=transcript:Os01t0102800-01;protein_id=Os01t0102800-01 +1 irgsp exon 155543 156449 . + . Parent=transcript:Os01t0102800-01;Name=Os01t0102800-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0102800-01.exon4;rank=4 +1 irgsp three_prime_UTR 156215 156449 . + . Parent=transcript:Os01t0102800-01 +### +1 irgsp gene 164577 168921 . + . ID=gene:Os01g0102850;biotype=protein_coding;description=Similar to nitrilase 2. (Os01t0102850-00);gene_id=Os01g0102850;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 164577 168921 . + . ID=transcript:Os01t0102850-00;Parent=gene:Os01g0102850;biotype=protein_coding;transcript_id=Os01t0102850-00 +1 irgsp exon 164577 164905 . + . Parent=transcript:Os01t0102850-00;Name=Os01t0102850-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0102850-00.exon1;rank=1 +1 irgsp five_prime_UTR 164577 164905 . + . Parent=transcript:Os01t0102850-00 +1 irgsp five_prime_UTR 168499 168804 . + . Parent=transcript:Os01t0102850-00 +1 irgsp exon 168499 168921 . + . Parent=transcript:Os01t0102850-00;Name=Os01t0102850-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0102850-00.exon2;rank=2 +1 irgsp CDS 168805 168921 . + 0 ID=CDS:Os01t0102850-00;Parent=transcript:Os01t0102850-00;protein_id=Os01t0102850-00 +### +1 irgsp gene 169390 170316 . - . ID=gene:Os01g0102900;Name=LIGHT-REGULATED GENE 1;biotype=protein_coding;description=Light-regulated protein%2C Regulation of light-dependent attachment of LEAF-TYPE FERREDOXIN-NADP+ OXIDOREDUCTASE (LFNR) to the thylakoid membrane (Os01t0102900-01);gene_id=Os01g0102900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 169390 170316 . - . ID=transcript:Os01t0102900-01;Parent=gene:Os01g0102900;biotype=protein_coding;transcript_id=Os01t0102900-01 +1 irgsp three_prime_UTR 169390 169598 . - . Parent=transcript:Os01t0102900-01 +1 irgsp exon 169390 169656 . - . Parent=transcript:Os01t0102900-01;Name=Os01t0102900-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0102900-01.exon3;rank=3 +1 irgsp CDS 169599 169656 . - 1 ID=CDS:Os01t0102900-01;Parent=transcript:Os01t0102900-01;protein_id=Os01t0102900-01 +1 irgsp exon 169751 169909 . - . Parent=transcript:Os01t0102900-01;Name=Os01t0102900-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0102900-01.exon2;rank=2 +1 irgsp CDS 169751 169909 . - 1 ID=CDS:Os01t0102900-01;Parent=transcript:Os01t0102900-01;protein_id=Os01t0102900-01 +1 irgsp CDS 170091 170260 . - 0 ID=CDS:Os01t0102900-01;Parent=transcript:Os01t0102900-01;protein_id=Os01t0102900-01 +1 irgsp exon 170091 170316 . - . Parent=transcript:Os01t0102900-01;Name=Os01t0102900-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0102900-01.exon1;rank=1 +1 irgsp five_prime_UTR 170261 170316 . - . Parent=transcript:Os01t0102900-01 +### +1 irgsp gene 170798 173144 . - . ID=gene:Os01g0103000;biotype=protein_coding;description=Snf7 family protein. (Os01t0103000-01);gene_id=Os01g0103000;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 170798 173144 . - . ID=transcript:Os01t0103000-01;Parent=gene:Os01g0103000;biotype=protein_coding;transcript_id=Os01t0103000-01 +1 irgsp three_prime_UTR 170798 171044 . - . Parent=transcript:Os01t0103000-01 +1 irgsp exon 170798 171095 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0103000-01.exon7;rank=7 +1 irgsp CDS 171045 171095 . - 0 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 171406 171554 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0103000-01.exon6;rank=6 +1 irgsp CDS 171406 171554 . - 2 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 171764 171875 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103000-01.exon5;rank=5 +1 irgsp CDS 171764 171875 . - 0 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 172398 172469 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0103000-01.exon4;rank=4 +1 irgsp CDS 172398 172469 . - 0 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 172578 172671 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0103000-01.exon3;rank=3 +1 irgsp CDS 172578 172671 . - 1 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 172770 172921 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0103000-01.exon2;rank=2 +1 irgsp CDS 172770 172921 . - 0 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp CDS 173004 173072 . - 0 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 173004 173144 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0103000-01.exon1;rank=1 +1 irgsp five_prime_UTR 173073 173144 . - . Parent=transcript:Os01t0103000-01 +### +1 irgsp gene 178607 180575 . + . ID=gene:Os01g0103100;biotype=protein_coding;description=TGF-beta receptor%2C type I/II extracellular region family protein. (Os01t0103100-01)%3BSimilar to predicted protein. (Os01t0103100-02);gene_id=Os01g0103100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 178607 180548 . + . ID=transcript:Os01t0103100-01;Parent=gene:Os01g0103100;biotype=protein_coding;transcript_id=Os01t0103100-01 +1 irgsp five_prime_UTR 178607 178641 . + . Parent=transcript:Os01t0103100-01 +1 irgsp exon 178607 180548 . + . Parent=transcript:Os01t0103100-01;Name=Os01t0103100-01.exon1;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103100-01.exon1;rank=1 +1 irgsp CDS 178642 180462 . + 0 ID=CDS:Os01t0103100-01;Parent=transcript:Os01t0103100-01;protein_id=Os01t0103100-01 +1 irgsp three_prime_UTR 180463 180548 . + . Parent=transcript:Os01t0103100-01 +1 irgsp mRNA 178652 180575 . + . ID=transcript:Os01t0103100-02;Parent=gene:Os01g0103100;biotype=protein_coding;transcript_id=Os01t0103100-02 +1 irgsp five_prime_UTR 178652 178677 . + . Parent=transcript:Os01t0103100-02 +1 irgsp exon 178652 180575 . + . Parent=transcript:Os01t0103100-02;Name=Os01t0103100-02.exon1;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103100-02.exon1;rank=1 +1 irgsp CDS 178678 180462 . + 0 ID=CDS:Os01t0103100-02;Parent=transcript:Os01t0103100-02;protein_id=Os01t0103100-02 +1 irgsp three_prime_UTR 180463 180575 . + . Parent=transcript:Os01t0103100-02 +### +1 irgsp gene 178815 180433 . - . ID=gene:Os01g0103075;biotype=protein_coding;description=Hypothetical protein. (Os01t0103075-00);gene_id=Os01g0103075;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 178815 180433 . - . ID=transcript:Os01t0103075-00;Parent=gene:Os01g0103075;biotype=protein_coding;transcript_id=Os01t0103075-00 +1 irgsp three_prime_UTR 178815 179511 . - . Parent=transcript:Os01t0103075-00 +1 irgsp exon 178815 180433 . - . Parent=transcript:Os01t0103075-00;Name=Os01t0103075-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103075-00.exon1;rank=1 +1 irgsp CDS 179512 180054 . - 0 ID=CDS:Os01t0103075-00;Parent=transcript:Os01t0103075-00;protein_id=Os01t0103075-00 +1 irgsp five_prime_UTR 180055 180433 . - . Parent=transcript:Os01t0103075-00 +### +1 Ensembl_Plants ncRNA_gene 182074 182154 . + . ID=gene:ENSRNA049442722;Name=tRNA-Leu;biotype=tRNA;description=tRNA-Leu for anticodon AAG;gene_id=ENSRNA049442722;logic_name=trnascan_gene +1 Ensembl_Plants tRNA 182074 182154 . + . ID=transcript:ENSRNA049442722-T1;Parent=gene:ENSRNA049442722;biotype=tRNA;transcript_id=ENSRNA049442722-T1 +1 Ensembl_Plants exon 182074 182154 . + . Parent=transcript:ENSRNA049442722-T1;Name=ENSRNA049442722-E1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSRNA049442722-E1;rank=1 +### +1 irgsp gene 185189 185828 . - . ID=gene:Os01g0103400;biotype=protein_coding;description=Hypothetical gene. (Os01t0103400-01);gene_id=Os01g0103400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 185189 185828 . - . ID=transcript:Os01t0103400-01;Parent=gene:Os01g0103400;biotype=protein_coding;transcript_id=Os01t0103400-01 +1 irgsp three_prime_UTR 185189 185434 . - . Parent=transcript:Os01t0103400-01 +1 irgsp exon 185189 185828 . - . Parent=transcript:Os01t0103400-01;Name=Os01t0103400-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103400-01.exon1;rank=1 +1 irgsp CDS 185435 185827 . - 0 ID=CDS:Os01t0103400-01;Parent=transcript:Os01t0103400-01;protein_id=Os01t0103400-01 +1 irgsp five_prime_UTR 185828 185828 . - . Parent=transcript:Os01t0103400-01 +### +1 irgsp repeat_region 186000 186100 . + . ID=fakeRepeat2 +### +1 irgsp gene 186250 190904 . - . ID=gene:Os01g0103600;biotype=protein_coding;description=Similar to sterol-8%2C7-isomerase. (Os01t0103600-01)%3BEmopamil-binding family protein. (Os01t0103600-02);gene_id=Os01g0103600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 186250 190262 . - . ID=transcript:Os01t0103600-02;Parent=gene:Os01g0103600;biotype=protein_coding;transcript_id=Os01t0103600-02 +1 irgsp three_prime_UTR 186250 186515 . - . Parent=transcript:Os01t0103600-02 +1 irgsp exon 186250 186771 . - . Parent=transcript:Os01t0103600-02;Name=Os01t0103600-02.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0103600-02.exon4;rank=4 +1 irgsp CDS 186516 186771 . - 1 ID=CDS:Os01t0103600-02;Parent=transcript:Os01t0103600-02;protein_id=Os01t0103600-02 +1 irgsp exon 189607 189715 . - . Parent=transcript:Os01t0103600-02;Name=Os01t0103600-02.exon3;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0103600-02.exon3;rank=3 +1 irgsp CDS 189607 189715 . - 2 ID=CDS:Os01t0103600-02;Parent=transcript:Os01t0103600-02;protein_id=Os01t0103600-02 +1 irgsp exon 189841 189990 . - . Parent=transcript:Os01t0103600-02;Name=Os01t0103600-02.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0103600-02.exon2;rank=2 +1 irgsp CDS 189841 189990 . - 2 ID=CDS:Os01t0103600-02;Parent=transcript:Os01t0103600-02;protein_id=Os01t0103600-02 +1 irgsp CDS 190087 190231 . - 0 ID=CDS:Os01t0103600-02;Parent=transcript:Os01t0103600-02;protein_id=Os01t0103600-02 +1 irgsp exon 190087 190262 . - . Parent=transcript:Os01t0103600-02;Name=Os01t0103600-02.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0103600-02.exon1;rank=1 +1 irgsp five_prime_UTR 190232 190262 . - . Parent=transcript:Os01t0103600-02 +1 irgsp mRNA 187345 190904 . - . ID=transcript:Os01t0103600-01;Parent=gene:Os01g0103600;biotype=protein_coding;transcript_id=Os01t0103600-01 +1 irgsp three_prime_UTR 187345 189395 . - . Parent=transcript:Os01t0103600-01 +1 irgsp exon 187345 189715 . - . Parent=transcript:Os01t0103600-01;Name=Os01t0103600-01.exon3;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0103600-01.exon3;rank=3 +1 irgsp CDS 189396 189715 . - 2 ID=CDS:Os01t0103600-01;Parent=transcript:Os01t0103600-01;protein_id=Os01t0103600-01 +1 irgsp exon 189841 189990 . - . Parent=transcript:Os01t0103600-01;Name=Os01t0103600-02.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0103600-02.exon2;rank=2 +1 irgsp CDS 189841 189990 . - 2 ID=CDS:Os01t0103600-01;Parent=transcript:Os01t0103600-01;protein_id=Os01t0103600-01 +1 irgsp CDS 190087 190231 . - 0 ID=CDS:Os01t0103600-01;Parent=transcript:Os01t0103600-01;protein_id=Os01t0103600-01 +1 irgsp exon 190087 190904 . - . Parent=transcript:Os01t0103600-01;Name=Os01t0103600-01.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0103600-01.exon1;rank=1 +1 irgsp five_prime_UTR 190232 190904 . - . Parent=transcript:Os01t0103600-01 +### +1 irgsp gene 187545 188586 . + . ID=gene:Os01g0103650;biotype=protein_coding;description=Hypothetical gene. (Os01t0103650-00);gene_id=Os01g0103650;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 187545 188586 . + . ID=transcript:Os01t0103650-00;Parent=gene:Os01g0103650;biotype=protein_coding;transcript_id=Os01t0103650-00 +1 irgsp five_prime_UTR 187545 187546 . + . Parent=transcript:Os01t0103650-00 +1 irgsp exon 187545 188020 . + . Parent=transcript:Os01t0103650-00;Name=Os01t0103650-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103650-00.exon1;rank=1 +1 irgsp CDS 187547 187768 . + 0 ID=CDS:Os01t0103650-00;Parent=transcript:Os01t0103650-00;protein_id=Os01t0103650-00 +1 irgsp three_prime_UTR 187769 188020 . + . Parent=transcript:Os01t0103650-00 +1 irgsp exon 188060 188385 . + . Parent=transcript:Os01t0103650-00;Name=Os01t0103650-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103650-00.exon2;rank=2 +1 irgsp three_prime_UTR 188060 188385 . + . Parent=transcript:Os01t0103650-00 +1 irgsp exon 188455 188586 . + . Parent=transcript:Os01t0103650-00;Name=Os01t0103650-00.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103650-00.exon3;rank=3 +1 irgsp three_prime_UTR 188455 188586 . + . Parent=transcript:Os01t0103650-00 +### +1 irgsp gene 191037 196287 . + . ID=gene:Os01g0103700;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0103700-01);gene_id=Os01g0103700;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 191037 196287 . + . ID=transcript:Os01t0103700-01;Parent=gene:Os01g0103700;biotype=protein_coding;transcript_id=Os01t0103700-01 +1 irgsp exon 191037 191161 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103700-01.exon1;rank=1 +1 irgsp five_prime_UTR 191037 191161 . + . Parent=transcript:Os01t0103700-01 +1 irgsp five_prime_UTR 191625 191693 . + . Parent=transcript:Os01t0103700-01 +1 irgsp exon 191625 191705 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0103700-01.exon2;rank=2 +1 irgsp CDS 191694 191705 . + 0 ID=CDS:Os01t0103700-01;Parent=transcript:Os01t0103700-01;protein_id=Os01t0103700-01 +1 irgsp exon 192399 192506 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0103700-01.exon3;rank=3 +1 irgsp CDS 192399 192506 . + 0 ID=CDS:Os01t0103700-01;Parent=transcript:Os01t0103700-01;protein_id=Os01t0103700-01 +1 irgsp exon 192958 193161 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0103700-01.exon4;rank=4 +1 irgsp CDS 192958 193161 . + 0 ID=CDS:Os01t0103700-01;Parent=transcript:Os01t0103700-01;protein_id=Os01t0103700-01 +1 irgsp exon 193248 193356 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103700-01.exon5;rank=5 +1 irgsp CDS 193248 193356 . + 0 ID=CDS:Os01t0103700-01;Parent=transcript:Os01t0103700-01;protein_id=Os01t0103700-01 +1 irgsp CDS 193434 193507 . + 2 ID=CDS:Os01t0103700-01;Parent=transcript:Os01t0103700-01;protein_id=Os01t0103700-01 +1 irgsp exon 193434 196287 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon6;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0103700-01.exon6;rank=6 +1 irgsp three_prime_UTR 193508 196287 . + . Parent=transcript:Os01t0103700-01 +### +1 irgsp gene 197647 200803 . + . ID=gene:Os01g0103800;Name=OsDW1-01g;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0103800-01);gene_id=Os01g0103800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 197647 200803 . + . ID=transcript:Os01t0103800-01;Parent=gene:Os01g0103800;biotype=protein_coding;transcript_id=Os01t0103800-01 +1 irgsp exon 197647 197838 . + . Parent=transcript:Os01t0103800-01;Name=Os01t0103800-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103800-01.exon1;rank=1 +1 irgsp five_prime_UTR 197647 197838 . + . Parent=transcript:Os01t0103800-01 +1 irgsp five_prime_UTR 198034 198129 . + . Parent=transcript:Os01t0103800-01 +1 irgsp exon 198034 198225 . + . Parent=transcript:Os01t0103800-01;Name=Os01t0103800-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0103800-01.exon2;rank=2 +1 irgsp CDS 198130 198225 . + 0 ID=CDS:Os01t0103800-01;Parent=transcript:Os01t0103800-01;protein_id=Os01t0103800-01 +1 irgsp exon 198830 200036 . + . Parent=transcript:Os01t0103800-01;Name=Os01t0103800-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103800-01.exon3;rank=3 +1 irgsp CDS 198830 200036 . + 0 ID=CDS:Os01t0103800-01;Parent=transcript:Os01t0103800-01;protein_id=Os01t0103800-01 +1 irgsp CDS 200253 200479 . + 2 ID=CDS:Os01t0103800-01;Parent=transcript:Os01t0103800-01;protein_id=Os01t0103800-01 +1 irgsp exon 200253 200803 . + . Parent=transcript:Os01t0103800-01;Name=Os01t0103800-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0103800-01.exon4;rank=4 +1 irgsp three_prime_UTR 200480 200803 . + . Parent=transcript:Os01t0103800-01 +### +1 irgsp gene 201944 206202 . + . ID=gene:Os01g0103900;biotype=protein_coding;description=Polynucleotidyl transferase%2C Ribonuclease H fold domain containing protein. (Os01t0103900-01);gene_id=Os01g0103900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 201944 206202 . + . ID=transcript:Os01t0103900-01;Parent=gene:Os01g0103900;biotype=protein_coding;transcript_id=Os01t0103900-01 +1 irgsp five_prime_UTR 201944 202041 . + . Parent=transcript:Os01t0103900-01 +1 irgsp exon 201944 202110 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0103900-01.exon1;rank=1 +1 irgsp CDS 202042 202110 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 202252 202359 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0103900-01.exon2;rank=2 +1 irgsp CDS 202252 202359 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 203007 203127 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103900-01.exon3;rank=3 +1 irgsp CDS 203007 203127 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 203302 203429 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0103900-01.exon4;rank=4 +1 irgsp CDS 203302 203429 . + 2 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 203511 203658 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103900-01.exon5;rank=5 +1 irgsp CDS 203511 203658 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 203760 203938 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0103900-01.exon6;rank=6 +1 irgsp CDS 203760 203938 . + 2 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 204203 204440 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon7;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103900-01.exon7;rank=7 +1 irgsp CDS 204203 204440 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 204543 204635 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon8;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0103900-01.exon8;rank=8 +1 irgsp CDS 204543 204635 . + 2 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 204730 204875 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0103900-01.exon9;rank=9 +1 irgsp CDS 204730 204875 . + 2 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 205042 205149 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0103900-01.exon10;rank=10 +1 irgsp CDS 205042 205149 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 205290 205378 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon11;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0103900-01.exon11;rank=11 +1 irgsp CDS 205290 205378 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp CDS 205534 205543 . + 1 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 205534 206202 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0103900-01.exon12;rank=12 +1 irgsp three_prime_UTR 205544 206202 . + . Parent=transcript:Os01t0103900-01 +### +1 irgsp gene 206131 209606 . - . ID=gene:Os01g0104000;biotype=protein_coding;description=C-type lectin domain containing protein. (Os01t0104000-01)%3BSimilar to predicted protein. (Os01t0104000-02);gene_id=Os01g0104000;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 206131 209581 . - . ID=transcript:Os01t0104000-02;Parent=gene:Os01g0104000;biotype=protein_coding;transcript_id=Os01t0104000-02 +1 irgsp three_prime_UTR 206131 206449 . - . Parent=transcript:Os01t0104000-02 +1 irgsp exon 206131 207029 . - . Parent=transcript:Os01t0104000-02;Name=Os01t0104000-02.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0104000-02.exon4;rank=4 +1 irgsp CDS 206450 207029 . - 1 ID=CDS:Os01t0104000-02;Parent=transcript:Os01t0104000-02;protein_id=Os01t0104000-02 +1 irgsp exon 207706 208273 . - . Parent=transcript:Os01t0104000-02;Name=Os01t0104000-02.exon3;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0104000-02.exon3;rank=3 +1 irgsp CDS 207706 208273 . - 2 ID=CDS:Os01t0104000-02;Parent=transcript:Os01t0104000-02;protein_id=Os01t0104000-02 +1 irgsp exon 208408 208836 . - . Parent=transcript:Os01t0104000-02;Name=Os01t0104000-01.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0104000-01.exon2;rank=2 +1 irgsp CDS 208408 208836 . - 2 ID=CDS:Os01t0104000-02;Parent=transcript:Os01t0104000-02;protein_id=Os01t0104000-02 +1 irgsp CDS 209438 209525 . - 0 ID=CDS:Os01t0104000-02;Parent=transcript:Os01t0104000-02;protein_id=Os01t0104000-02 +1 irgsp exon 209438 209581 . - . Parent=transcript:Os01t0104000-02;Name=Os01t0104000-02.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0104000-02.exon1;rank=1 +1 irgsp five_prime_UTR 209526 209581 . - . Parent=transcript:Os01t0104000-02 +1 irgsp mRNA 206134 209606 . - . ID=transcript:Os01t0104000-01;Parent=gene:Os01g0104000;biotype=protein_coding;transcript_id=Os01t0104000-01 +1 irgsp three_prime_UTR 206134 206449 . - . Parent=transcript:Os01t0104000-01 +1 irgsp exon 206134 207029 . - . Parent=transcript:Os01t0104000-01;Name=Os01t0104000-01.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0104000-01.exon4;rank=4 +1 irgsp CDS 206450 207029 . - 1 ID=CDS:Os01t0104000-01;Parent=transcript:Os01t0104000-01;protein_id=Os01t0104000-01 +1 irgsp exon 207706 208276 . - . Parent=transcript:Os01t0104000-01;Name=Os01t0104000-01.exon3;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0104000-01.exon3;rank=3 +1 irgsp CDS 207706 208276 . - 2 ID=CDS:Os01t0104000-01;Parent=transcript:Os01t0104000-01;protein_id=Os01t0104000-01 +1 irgsp exon 208408 208836 . - . Parent=transcript:Os01t0104000-01;Name=Os01t0104000-01.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0104000-01.exon2;rank=2 +1 irgsp CDS 208408 208836 . - 2 ID=CDS:Os01t0104000-01;Parent=transcript:Os01t0104000-01;protein_id=Os01t0104000-01 +1 irgsp CDS 209438 209525 . - 0 ID=CDS:Os01t0104000-01;Parent=transcript:Os01t0104000-01;protein_id=Os01t0104000-01 +1 irgsp exon 209438 209606 . - . Parent=transcript:Os01t0104000-01;Name=Os01t0104000-01.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0104000-01.exon1;rank=1 +1 irgsp five_prime_UTR 209526 209606 . - . Parent=transcript:Os01t0104000-01 +### +1 irgsp gene 209771 214173 . + . ID=gene:Os01g0104100;Name=cold-inducible%2C cold-inducible zinc finger protein;biotype=protein_coding;description=Similar to protein binding / zinc ion binding. (Os01t0104100-01)%3BSimilar to protein binding / zinc ion binding. (Os01t0104100-02);gene_id=Os01g0104100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 209771 214173 . + . ID=transcript:Os01t0104100-01;Parent=gene:Os01g0104100;biotype=protein_coding;transcript_id=Os01t0104100-01 +1 irgsp exon 209771 209896 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104100-01.exon1;rank=1 +1 irgsp CDS 209771 209896 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 210244 210563 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104100-01.exon2;rank=2 +1 irgsp CDS 210244 210563 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 210659 210890 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104100-01.exon3;rank=3 +1 irgsp CDS 210659 210890 . + 1 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 211015 211160 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104100-01.exon4;rank=4 +1 irgsp CDS 211015 211160 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 212265 212352 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104100-01.exon5;rank=5 +1 irgsp CDS 212265 212352 . + 1 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 212433 212579 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104100-01.exon6;rank=6 +1 irgsp CDS 212433 212579 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 213490 213639 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104100-01.exon7;rank=7 +1 irgsp CDS 213490 213639 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp CDS 213741 213788 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 213741 214173 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon8;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0104100-01.exon8;rank=8 +1 irgsp three_prime_UTR 213789 214173 . + . Parent=transcript:Os01t0104100-01 +1 irgsp mRNA 209794 214147 . + . ID=transcript:Os01t0104100-02;Parent=gene:Os01g0104100;biotype=protein_coding;transcript_id=Os01t0104100-02 +1 irgsp five_prime_UTR 209794 209794 . + . Parent=transcript:Os01t0104100-02 +1 irgsp exon 209794 209896 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-02.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104100-02.exon1;rank=1 +1 irgsp CDS 209795 209896 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 210244 210563 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104100-01.exon2;rank=2 +1 irgsp CDS 210244 210563 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 210659 210890 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104100-01.exon3;rank=3 +1 irgsp CDS 210659 210890 . + 1 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 211015 211160 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104100-01.exon4;rank=4 +1 irgsp CDS 211015 211160 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 212265 212352 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104100-01.exon5;rank=5 +1 irgsp CDS 212265 212352 . + 1 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 212433 212579 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104100-01.exon6;rank=6 +1 irgsp CDS 212433 212579 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 213490 213639 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104100-01.exon7;rank=7 +1 irgsp CDS 213490 213639 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp CDS 213741 213788 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 213741 214147 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-02.exon8;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0104100-02.exon8;rank=8 +1 irgsp three_prime_UTR 213789 214147 . + . Parent=transcript:Os01t0104100-02 +### +1 irgsp gene 216212 217345 . + . ID=gene:Os01g0104200;Name=NAC DOMAIN-CONTAINING PROTEIN 16;biotype=protein_coding;description=No apical meristem (NAM) protein domain containing protein. (Os01t0104200-00);gene_id=Os01g0104200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 216212 217345 . + . ID=transcript:Os01t0104200-00;Parent=gene:Os01g0104200;biotype=protein_coding;transcript_id=Os01t0104200-00 +1 irgsp exon 216212 216769 . + . Parent=transcript:Os01t0104200-00;Name=Os01t0104200-00.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104200-00.exon1;rank=1 +1 irgsp CDS 216212 216769 . + 0 ID=CDS:Os01t0104200-00;Parent=transcript:Os01t0104200-00;protein_id=Os01t0104200-00 +1 irgsp exon 216884 217345 . + . Parent=transcript:Os01t0104200-00;Name=Os01t0104200-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104200-00.exon2;rank=2 +1 irgsp CDS 216884 217345 . + 0 ID=CDS:Os01t0104200-00;Parent=transcript:Os01t0104200-00;protein_id=Os01t0104200-00 +### +1 irgsp gene 226897 229301 . + . ID=gene:Os01g0104400;biotype=protein_coding;description=Ricin B-related lectin domain containing protein. (Os01t0104400-01)%3BRicin B-related lectin domain containing protein. (Os01t0104400-02)%3BRicin B-related lectin domain containing protein. (Os01t0104400-03);gene_id=Os01g0104400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 226897 229229 . + . ID=transcript:Os01t0104400-01;Parent=gene:Os01g0104400;biotype=protein_coding;transcript_id=Os01t0104400-01 +1 irgsp five_prime_UTR 226897 227181 . + . Parent=transcript:Os01t0104400-01 +1 irgsp exon 226897 227634 . + . Parent=transcript:Os01t0104400-01;Name=Os01t0104400-01.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104400-01.exon1;rank=1 +1 irgsp CDS 227182 227634 . + 0 ID=CDS:Os01t0104400-01;Parent=transcript:Os01t0104400-01;protein_id=Os01t0104400-01 +1 irgsp exon 227742 227864 . + . Parent=transcript:Os01t0104400-01;Name=Os01t0104400-03.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104400-03.exon2;rank=2 +1 irgsp CDS 227742 227864 . + 0 ID=CDS:Os01t0104400-01;Parent=transcript:Os01t0104400-01;protein_id=Os01t0104400-01 +1 irgsp exon 228557 228785 . + . Parent=transcript:Os01t0104400-01;Name=Os01t0104400-03.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104400-03.exon3;rank=3 +1 irgsp CDS 228557 228785 . + 0 ID=CDS:Os01t0104400-01;Parent=transcript:Os01t0104400-01;protein_id=Os01t0104400-01 +1 irgsp CDS 228930 228931 . + 2 ID=CDS:Os01t0104400-01;Parent=transcript:Os01t0104400-01;protein_id=Os01t0104400-01 +1 irgsp exon 228930 229229 . + . Parent=transcript:Os01t0104400-01;Name=Os01t0104400-01.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104400-01.exon4;rank=4 +1 irgsp three_prime_UTR 228932 229229 . + . Parent=transcript:Os01t0104400-01 +1 irgsp mRNA 227139 229301 . + . ID=transcript:Os01t0104400-02;Parent=gene:Os01g0104400;biotype=protein_coding;transcript_id=Os01t0104400-02 +1 irgsp five_prime_UTR 227139 227181 . + . Parent=transcript:Os01t0104400-02 +1 irgsp exon 227139 227634 . + . Parent=transcript:Os01t0104400-02;Name=Os01t0104400-02.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104400-02.exon1;rank=1 +1 irgsp CDS 227182 227634 . + 0 ID=CDS:Os01t0104400-02;Parent=transcript:Os01t0104400-02;protein_id=Os01t0104400-02 +1 irgsp exon 227742 227864 . + . Parent=transcript:Os01t0104400-02;Name=Os01t0104400-03.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104400-03.exon2;rank=2 +1 irgsp CDS 227742 227864 . + 0 ID=CDS:Os01t0104400-02;Parent=transcript:Os01t0104400-02;protein_id=Os01t0104400-02 +1 irgsp exon 228557 228785 . + . Parent=transcript:Os01t0104400-02;Name=Os01t0104400-03.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104400-03.exon3;rank=3 +1 irgsp CDS 228557 228785 . + 0 ID=CDS:Os01t0104400-02;Parent=transcript:Os01t0104400-02;protein_id=Os01t0104400-02 +1 irgsp CDS 228930 228931 . + 2 ID=CDS:Os01t0104400-02;Parent=transcript:Os01t0104400-02;protein_id=Os01t0104400-02 +1 irgsp exon 228930 229301 . + . Parent=transcript:Os01t0104400-02;Name=Os01t0104400-02.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104400-02.exon4;rank=4 +1 irgsp three_prime_UTR 228932 229301 . + . Parent=transcript:Os01t0104400-02 +1 irgsp mRNA 227179 229214 . + . ID=transcript:Os01t0104400-03;Parent=gene:Os01g0104400;biotype=protein_coding;transcript_id=Os01t0104400-03 +1 irgsp five_prime_UTR 227179 227181 . + . Parent=transcript:Os01t0104400-03 +1 irgsp exon 227179 227634 . + . Parent=transcript:Os01t0104400-03;Name=Os01t0104400-03.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104400-03.exon1;rank=1 +1 irgsp CDS 227182 227634 . + 0 ID=CDS:Os01t0104400-03;Parent=transcript:Os01t0104400-03;protein_id=Os01t0104400-03 +1 irgsp exon 227742 227864 . + . Parent=transcript:Os01t0104400-03;Name=Os01t0104400-03.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104400-03.exon2;rank=2 +1 irgsp CDS 227742 227864 . + 0 ID=CDS:Os01t0104400-03;Parent=transcript:Os01t0104400-03;protein_id=Os01t0104400-03 +1 irgsp exon 228557 228785 . + . Parent=transcript:Os01t0104400-03;Name=Os01t0104400-03.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104400-03.exon3;rank=3 +1 irgsp CDS 228557 228785 . + 0 ID=CDS:Os01t0104400-03;Parent=transcript:Os01t0104400-03;protein_id=Os01t0104400-03 +1 irgsp CDS 228930 228931 . + 2 ID=CDS:Os01t0104400-03;Parent=transcript:Os01t0104400-03;protein_id=Os01t0104400-03 +1 irgsp exon 228930 229214 . + . Parent=transcript:Os01t0104400-03;Name=Os01t0104400-03.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104400-03.exon4;rank=4 +1 irgsp three_prime_UTR 228932 229214 . + . Parent=transcript:Os01t0104400-03 +### +1 irgsp gene 241680 243440 . + . ID=gene:Os01g0104500;Name=NAC DOMAIN-CONTAINING PROTEIN 20;biotype=protein_coding;description=No apical meristem (NAM) protein domain containing protein. (Os01t0104500-01);gene_id=Os01g0104500;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 241680 243440 . + . ID=transcript:Os01t0104500-01;Parent=gene:Os01g0104500;biotype=protein_coding;transcript_id=Os01t0104500-01 +1 irgsp exon 241680 241702 . + . Parent=transcript:Os01t0104500-01;Name=Os01t0104500-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0104500-01.exon1;rank=1 +1 irgsp five_prime_UTR 241680 241702 . + . Parent=transcript:Os01t0104500-01 +1 irgsp five_prime_UTR 241866 241907 . + . Parent=transcript:Os01t0104500-01 +1 irgsp exon 241866 242091 . + . Parent=transcript:Os01t0104500-01;Name=Os01t0104500-01.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0104500-01.exon2;rank=2 +1 irgsp CDS 241908 242091 . + 0 ID=CDS:Os01t0104500-01;Parent=transcript:Os01t0104500-01;protein_id=Os01t0104500-01 +1 irgsp CDS 242199 242977 . + 2 ID=CDS:Os01t0104500-01;Parent=transcript:Os01t0104500-01;protein_id=Os01t0104500-01 +1 irgsp exon 242199 243440 . + . Parent=transcript:Os01t0104500-01;Name=Os01t0104500-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104500-01.exon3;rank=3 +1 irgsp three_prime_UTR 242978 243440 . + . Parent=transcript:Os01t0104500-01 +### +1 irgsp gene 248828 256872 . - . ID=gene:Os01g0104600;Name=DE-ETIOLATED1;biotype=protein_coding;description=Homolog of Arabidopsis DE-ETIOLATED1 (DET1)%2C Modulation of the ABA signaling pathway and ABA biosynthesis%2C Regulation of chlorophyll content (Os01t0104600-01)%3BSimilar to Light-mediated development protein DET1 (Deetiolated1 homolog) (tDET1) (High pigmentation protein 2) (Protein dark green). (Os01t0104600-02);gene_id=Os01g0104600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 248828 256571 . - . ID=transcript:Os01t0104600-02;Parent=gene:Os01g0104600;biotype=protein_coding;transcript_id=Os01t0104600-02 +1 irgsp three_prime_UTR 248828 248970 . - . Parent=transcript:Os01t0104600-02 +1 irgsp exon 248828 249107 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104600-01.exon11;rank=11 +1 irgsp CDS 248971 249107 . - 2 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 249369 249468 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon10;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104600-01.exon10;rank=10 +1 irgsp CDS 249369 249468 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 249861 249956 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon9;rank=9 +1 irgsp CDS 249861 249956 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 250617 250781 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon8;rank=8 +1 irgsp CDS 250617 250781 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 250860 250940 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon7;rank=7 +1 irgsp CDS 250860 250940 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 251026 251082 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon6;rank=6 +1 irgsp CDS 251026 251082 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 251316 251384 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon5;rank=5 +1 irgsp CDS 251316 251384 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 251695 251790 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon4;rank=4 +1 irgsp CDS 251695 251790 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 255325 255553 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104600-01.exon3;rank=3 +1 irgsp CDS 255325 255553 . - 1 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 255674 256098 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104600-01.exon2;rank=2 +1 irgsp CDS 255674 256098 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp CDS 256361 256441 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 256361 256571 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-02.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104600-02.exon1;rank=1 +1 irgsp five_prime_UTR 256442 256571 . - . Parent=transcript:Os01t0104600-02 +1 irgsp mRNA 248828 256872 . - . ID=transcript:Os01t0104600-01;Parent=gene:Os01g0104600;biotype=protein_coding;transcript_id=Os01t0104600-01 +1 irgsp three_prime_UTR 248828 248970 . - . Parent=transcript:Os01t0104600-01 +1 irgsp exon 248828 249107 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104600-01.exon11;rank=11 +1 irgsp CDS 248971 249107 . - 2 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 249369 249468 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon10;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104600-01.exon10;rank=10 +1 irgsp CDS 249369 249468 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 249861 249956 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon9;rank=9 +1 irgsp CDS 249861 249956 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 250617 250781 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon8;rank=8 +1 irgsp CDS 250617 250781 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 250860 250940 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon7;rank=7 +1 irgsp CDS 250860 250940 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 251026 251082 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon6;rank=6 +1 irgsp CDS 251026 251082 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 251316 251384 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon5;rank=5 +1 irgsp CDS 251316 251384 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 251695 251790 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon4;rank=4 +1 irgsp CDS 251695 251790 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 255325 255553 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104600-01.exon3;rank=3 +1 irgsp CDS 255325 255553 . - 1 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 255674 256098 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104600-01.exon2;rank=2 +1 irgsp CDS 255674 256098 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp CDS 256361 256441 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 256361 256872 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104600-01.exon1;rank=1 +1 irgsp five_prime_UTR 256442 256872 . - . Parent=transcript:Os01t0104600-01 +### +1 irgsp gene 261530 268145 . + . ID=gene:Os01g0104800;biotype=protein_coding;description=Sas10/Utp3 family protein. (Os01t0104800-01)%3BHypothetical conserved gene. (Os01t0104800-02);gene_id=Os01g0104800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 261530 268145 . + . ID=transcript:Os01t0104800-01;Parent=gene:Os01g0104800;biotype=protein_coding;transcript_id=Os01t0104800-01 +1 irgsp five_prime_UTR 261530 261561 . + . Parent=transcript:Os01t0104800-01 +1 irgsp exon 261530 261661 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0104800-01.exon1;rank=1 +1 irgsp CDS 261562 261661 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 261767 261805 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon2;constitutive=0;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0104800-01.exon2;rank=2 +1 irgsp CDS 261767 261805 . + 2 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 261895 261941 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon3;constitutive=0;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0104800-01.exon3;rank=3 +1 irgsp CDS 261895 261941 . + 2 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 262582 262681 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon4;constitutive=0;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104800-01.exon4;rank=4 +1 irgsp CDS 262582 262681 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 262925 263181 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon5;constitutive=0;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0104800-01.exon5;rank=5 +1 irgsp CDS 262925 263181 . + 2 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 263525 263640 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon6;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104800-01.exon6;rank=6 +1 irgsp CDS 263525 263640 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 264014 264098 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon7;rank=7 +1 irgsp CDS 264014 264098 . + 1 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 265236 265415 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon8;rank=8 +1 irgsp CDS 265236 265415 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 265506 265649 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon9;rank=9 +1 irgsp CDS 265506 265649 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 265740 265817 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon10;rank=10 +1 irgsp CDS 265740 265817 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 265909 266045 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon11;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104800-01.exon11;rank=11 +1 irgsp CDS 265909 266045 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 266138 266246 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon12;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon12;rank=12 +1 irgsp CDS 266138 266246 . + 1 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 267237 267514 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon13;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104800-01.exon13;rank=13 +1 irgsp CDS 267237 267514 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 267591 267657 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon14;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon14;rank=14 +1 irgsp CDS 267591 267657 . + 1 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 267734 267802 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon15;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon15;rank=15 +1 irgsp CDS 267734 267802 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp CDS 267880 268011 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 267880 268145 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon16;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0104800-01.exon16;rank=16 +1 irgsp three_prime_UTR 268012 268145 . + . Parent=transcript:Os01t0104800-01 +1 irgsp mRNA 263523 268120 . + . ID=transcript:Os01t0104800-02;Parent=gene:Os01g0104800;biotype=protein_coding;transcript_id=Os01t0104800-02 +1 irgsp five_prime_UTR 263523 263524 . + . Parent=transcript:Os01t0104800-02 +1 irgsp exon 263523 263640 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-02.exon1;constitutive=0;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0104800-02.exon1;rank=1 +1 irgsp CDS 263525 263640 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 264014 264098 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon7;rank=2 +1 irgsp CDS 264014 264098 . + 1 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 265236 265415 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon8;rank=3 +1 irgsp CDS 265236 265415 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 265506 265649 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon9;rank=4 +1 irgsp CDS 265506 265649 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 265740 265817 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon10;rank=5 +1 irgsp CDS 265740 265817 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 265909 266045 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon11;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104800-01.exon11;rank=6 +1 irgsp CDS 265909 266045 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 266138 266246 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon12;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon12;rank=7 +1 irgsp CDS 266138 266246 . + 1 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 267237 267514 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon13;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104800-01.exon13;rank=8 +1 irgsp CDS 267237 267514 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 267591 267657 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon14;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon14;rank=9 +1 irgsp CDS 267591 267657 . + 1 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 267734 267802 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon15;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon15;rank=10 +1 irgsp CDS 267734 267802 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp CDS 267880 268011 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 267880 268120 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-02.exon11;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0104800-02.exon11;rank=11 +1 irgsp three_prime_UTR 268012 268120 . + . Parent=transcript:Os01t0104800-02 +### +1 irgsp gene 270179 275084 . - . ID=gene:Os01g0104900;biotype=protein_coding;description=Transferase family protein. (Os01t0104900-01)%3BHypothetical conserved gene. (Os01t0104900-02);gene_id=Os01g0104900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 270179 275084 . - . ID=transcript:Os01t0104900-01;Parent=gene:Os01g0104900;biotype=protein_coding;transcript_id=Os01t0104900-01 +1 irgsp three_prime_UTR 270179 270355 . - . Parent=transcript:Os01t0104900-01 +1 irgsp exon 270179 271333 . - . Parent=transcript:Os01t0104900-01;Name=Os01t0104900-01.exon2;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0104900-01.exon2;rank=2 +1 irgsp CDS 270356 271333 . - 0 ID=CDS:Os01t0104900-01;Parent=transcript:Os01t0104900-01;protein_id=Os01t0104900-01 +1 irgsp CDS 274529 274957 . - 0 ID=CDS:Os01t0104900-01;Parent=transcript:Os01t0104900-01;protein_id=Os01t0104900-01 +1 irgsp exon 274529 275084 . - . Parent=transcript:Os01t0104900-01;Name=Os01t0104900-01.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104900-01.exon1;rank=1 +1 irgsp five_prime_UTR 274958 275084 . - . Parent=transcript:Os01t0104900-01 +1 irgsp mRNA 270250 271518 . - . ID=transcript:Os01t0104900-02;Parent=gene:Os01g0104900;biotype=protein_coding;transcript_id=Os01t0104900-02 +1 irgsp three_prime_UTR 270250 270355 . - . Parent=transcript:Os01t0104900-02 +1 irgsp exon 270250 271333 . - . Parent=transcript:Os01t0104900-02;Name=Os01t0104900-02.exon2;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0104900-02.exon2;rank=2 +1 irgsp CDS 270356 271309 . - 0 ID=CDS:Os01t0104900-02;Parent=transcript:Os01t0104900-02;protein_id=Os01t0104900-02 +1 irgsp five_prime_UTR 271310 271333 . - . Parent=transcript:Os01t0104900-02 +1 irgsp exon 271457 271518 . - . Parent=transcript:Os01t0104900-02;Name=Os01t0104900-02.exon1;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0104900-02.exon1;rank=1 +1 irgsp five_prime_UTR 271457 271518 . - . Parent=transcript:Os01t0104900-02 +### +1 irgsp gene 284762 291892 . - . ID=gene:Os01g0105300;biotype=protein_coding;description=Similar to HAT family dimerisation domain containing protein%2C expressed. (Os01t0105300-01);gene_id=Os01g0105300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 284762 291892 . - . ID=transcript:Os01t0105300-01;Parent=gene:Os01g0105300;biotype=protein_coding;transcript_id=Os01t0105300-01 +1 irgsp three_prime_UTR 284762 284930 . - . Parent=transcript:Os01t0105300-01 +1 irgsp exon 284762 287047 . - . Parent=transcript:Os01t0105300-01;Name=Os01t0105300-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105300-01.exon5;rank=5 +1 irgsp CDS 284931 285020 . - 0 ID=CDS:Os01t0105300-01;Parent=transcript:Os01t0105300-01;protein_id=Os01t0105300-01 +1 irgsp five_prime_UTR 285021 287047 . - . Parent=transcript:Os01t0105300-01 +1 irgsp exon 291398 291436 . - . Parent=transcript:Os01t0105300-01;Name=Os01t0105300-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105300-01.exon4;rank=4 +1 irgsp five_prime_UTR 291398 291436 . - . Parent=transcript:Os01t0105300-01 +1 irgsp exon 291520 291534 . - . Parent=transcript:Os01t0105300-01;Name=Os01t0105300-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105300-01.exon3;rank=3 +1 irgsp five_prime_UTR 291520 291534 . - . Parent=transcript:Os01t0105300-01 +1 irgsp exon 291678 291738 . - . Parent=transcript:Os01t0105300-01;Name=Os01t0105300-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105300-01.exon2;rank=2 +1 irgsp five_prime_UTR 291678 291738 . - . Parent=transcript:Os01t0105300-01 +1 irgsp exon 291838 291892 . - . Parent=transcript:Os01t0105300-01;Name=Os01t0105300-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105300-01.exon1;rank=1 +1 irgsp five_prime_UTR 291838 291892 . - . Parent=transcript:Os01t0105300-01 +### +1 irgsp gene 288372 292296 . + . ID=gene:Os01g0105400;biotype=protein_coding;description=Similar to Kinesin heavy chain. (Os01t0105400-01);gene_id=Os01g0105400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 288372 292296 . + . ID=transcript:Os01t0105400-01;Parent=gene:Os01g0105400;biotype=protein_coding;transcript_id=Os01t0105400-01 +1 irgsp exon 288372 288846 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon1;rank=1 +1 irgsp five_prime_UTR 288372 288846 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 288950 289116 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon2;rank=2 +1 irgsp five_prime_UTR 288950 289116 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 289202 289572 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon3;rank=3 +1 irgsp five_prime_UTR 289202 289572 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 289661 289830 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon4;rank=4 +1 irgsp five_prime_UTR 289661 289830 . + . Parent=transcript:Os01t0105400-01 +1 irgsp five_prime_UTR 290395 290432 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 290395 290512 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon5;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0105400-01.exon5;rank=5 +1 irgsp CDS 290433 290512 . + 0 ID=CDS:Os01t0105400-01;Parent=transcript:Os01t0105400-01;protein_id=Os01t0105400-01 +1 irgsp CDS 291372 291558 . + 1 ID=CDS:Os01t0105400-01;Parent=transcript:Os01t0105400-01;protein_id=Os01t0105400-01 +1 irgsp exon 291372 291574 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon6;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0105400-01.exon6;rank=6 +1 irgsp three_prime_UTR 291559 291574 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 291648 291779 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon7;rank=7 +1 irgsp three_prime_UTR 291648 291779 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 291859 291948 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon8;rank=8 +1 irgsp three_prime_UTR 291859 291948 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 292073 292296 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon9;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon9;rank=9 +1 irgsp three_prime_UTR 292073 292296 . + . Parent=transcript:Os01t0105400-01 +### +1 irgsp gene 303233 306736 . + . ID=gene:Os01g0105700;Name=basic helix-loop-helix protein 071;biotype=protein_coding;description=Basic helix-loop-helix dimerisation region bHLH domain containing protein. (Os01t0105700-01);gene_id=Os01g0105700;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 303233 306736 . + . ID=transcript:Os01t0105700-01;Parent=gene:Os01g0105700;biotype=protein_coding;transcript_id=Os01t0105700-01 +1 irgsp five_prime_UTR 303233 303328 . + . Parent=transcript:Os01t0105700-01 +1 irgsp exon 303233 303471 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0105700-01.exon1;rank=1 +1 irgsp CDS 303329 303471 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 303981 304509 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0105700-01.exon2;rank=2 +1 irgsp CDS 303981 304509 . + 1 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 305572 305718 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105700-01.exon3;rank=3 +1 irgsp CDS 305572 305718 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 305834 305899 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105700-01.exon4;rank=4 +1 irgsp CDS 305834 305899 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 305993 306058 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105700-01.exon5;rank=5 +1 irgsp CDS 305993 306058 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 306171 306245 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105700-01.exon6;rank=6 +1 irgsp CDS 306171 306245 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp CDS 306353 306493 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 306353 306736 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0105700-01.exon7;rank=7 +1 irgsp three_prime_UTR 306494 306736 . + . Parent=transcript:Os01t0105700-01 +### +1 irgsp gene 306871 308842 . - . ID=gene:Os01g0105800;Name=IRON-SULFUR CLUSTER PROTEIN 9;biotype=protein_coding;description=Similar to Iron sulfur assembly protein 1. (Os01t0105800-01);gene_id=Os01g0105800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 306871 308842 . - . ID=transcript:Os01t0105800-01;Parent=gene:Os01g0105800;biotype=protein_coding;transcript_id=Os01t0105800-01 +1 irgsp three_prime_UTR 306871 307123 . - . Parent=transcript:Os01t0105800-01 +1 irgsp exon 306871 307217 . - . Parent=transcript:Os01t0105800-01;Name=Os01t0105800-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0105800-01.exon4;rank=4 +1 irgsp CDS 307124 307217 . - 1 ID=CDS:Os01t0105800-01;Parent=transcript:Os01t0105800-01;protein_id=Os01t0105800-01 +1 irgsp exon 307296 307413 . - . Parent=transcript:Os01t0105800-01;Name=Os01t0105800-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0105800-01.exon3;rank=3 +1 irgsp CDS 307296 307413 . - 2 ID=CDS:Os01t0105800-01;Parent=transcript:Os01t0105800-01;protein_id=Os01t0105800-01 +1 irgsp CDS 308397 308601 . - 0 ID=CDS:Os01t0105800-01;Parent=transcript:Os01t0105800-01;protein_id=Os01t0105800-01 +1 irgsp exon 308397 308626 . - . Parent=transcript:Os01t0105800-01;Name=Os01t0105800-01.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0105800-01.exon2;rank=2 +1 irgsp five_prime_UTR 308602 308626 . - . Parent=transcript:Os01t0105800-01 +1 irgsp exon 308703 308842 . - . Parent=transcript:Os01t0105800-01;Name=Os01t0105800-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105800-01.exon1;rank=1 +1 irgsp five_prime_UTR 308703 308842 . - . Parent=transcript:Os01t0105800-01 +### +1 irgsp gene 309520 313170 . - . ID=gene:Os01g0105900;biotype=protein_coding;description=Carbohydrate/purine kinase domain containing protein. (Os01t0105900-01);gene_id=Os01g0105900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 309520 313170 . - . ID=transcript:Os01t0105900-01;Parent=gene:Os01g0105900;biotype=protein_coding;transcript_id=Os01t0105900-01 +1 irgsp three_prime_UTR 309520 309821 . - . Parent=transcript:Os01t0105900-01 +1 irgsp exon 309520 310070 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0105900-01.exon8;rank=8 +1 irgsp CDS 309822 310070 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 310256 310367 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0105900-01.exon7;rank=7 +1 irgsp CDS 310256 310367 . - 1 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 310455 310552 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon6;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0105900-01.exon6;rank=6 +1 irgsp CDS 310455 310552 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 310632 310739 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105900-01.exon5;rank=5 +1 irgsp CDS 310632 310739 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 310880 310918 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105900-01.exon4;rank=4 +1 irgsp CDS 310880 310918 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 311002 311073 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105900-01.exon3;rank=3 +1 irgsp CDS 311002 311073 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 311163 311426 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105900-01.exon2;rank=2 +1 irgsp CDS 311163 311426 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp CDS 312867 313064 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 312867 313170 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0105900-01.exon1;rank=1 +1 irgsp five_prime_UTR 313065 313170 . - . Parent=transcript:Os01t0105900-01 +### +1 irgsp gene 319754 322205 . + . ID=gene:Os01g0106200;biotype=protein_coding;description=Similar to RER1A protein (AtRER1A). (Os01t0106200-01);gene_id=Os01g0106200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 319754 322205 . + . ID=transcript:Os01t0106200-01;Parent=gene:Os01g0106200;biotype=protein_coding;transcript_id=Os01t0106200-01 +1 irgsp five_prime_UTR 319754 319874 . + . Parent=transcript:Os01t0106200-01 +1 irgsp exon 319754 320236 . + . Parent=transcript:Os01t0106200-01;Name=Os01t0106200-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0106200-01.exon1;rank=1 +1 irgsp CDS 319875 320236 . + 0 ID=CDS:Os01t0106200-01;Parent=transcript:Os01t0106200-01;protein_id=Os01t0106200-01 +1 irgsp exon 321468 321648 . + . Parent=transcript:Os01t0106200-01;Name=Os01t0106200-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0106200-01.exon2;rank=2 +1 irgsp CDS 321468 321648 . + 1 ID=CDS:Os01t0106200-01;Parent=transcript:Os01t0106200-01;protein_id=Os01t0106200-01 +1 irgsp CDS 321928 321975 . + 0 ID=CDS:Os01t0106200-01;Parent=transcript:Os01t0106200-01;protein_id=Os01t0106200-01 +1 irgsp exon 321928 322205 . + . Parent=transcript:Os01t0106200-01;Name=Os01t0106200-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0106200-01.exon3;rank=3 +1 irgsp three_prime_UTR 321976 322205 . + . Parent=transcript:Os01t0106200-01 +### +1 irgsp gene 322591 323923 . - . ID=gene:Os01g0106300;biotype=protein_coding;description=Similar to Isoflavone reductase homolog IRL (EC 1.3.1.-). (Os01t0106300-01);gene_id=Os01g0106300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 322591 323923 . - . ID=transcript:Os01t0106300-01;Parent=gene:Os01g0106300;biotype=protein_coding;transcript_id=Os01t0106300-01 +1 irgsp three_prime_UTR 322591 322809 . - . Parent=transcript:Os01t0106300-01 +1 irgsp exon 322591 322973 . - . Parent=transcript:Os01t0106300-01;Name=Os01t0106300-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0106300-01.exon2;rank=2 diff --git a/src/agat/agat_sq_stat_basic/test_data/agat_sq_stat_basic_1.gff b/src/agat/agat_sq_stat_basic/test_data/agat_sq_stat_basic_1.gff new file mode 100644 index 00000000..d8fc1f4e --- /dev/null +++ b/src/agat/agat_sq_stat_basic/test_data/agat_sq_stat_basic_1.gff @@ -0,0 +1,12 @@ +Type (3rd column) Number Size total (kb) Size mean (bp) /!\Results are rounding to two decimal places +cds 290 69.69 240.30 +chromosome 1 43270.92 43270923.00 +exon 320 107.30 335.32 +five_prime_utr 79 11.77 149.03 +gene 52 158.83 3054.40 +mrna 65 197.99 3045.94 +ncrna_gene 1 0.08 81.00 +repeat_region 2 0.20 101.00 +three_prime_utr 70 25.60 365.66 +trna 1 0.08 81.00 +Total 881 43842.46 49764.43 diff --git a/src/agat/agat_sq_stat_basic/test_data/script.sh b/src/agat/agat_sq_stat_basic/test_data/script.sh new file mode 100755 index 00000000..5527955d --- /dev/null +++ b/src/agat/agat_sq_stat_basic/test_data/script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/1.gff src/agat/agat_sq_stat_basic/test_data/ +cp -r /tmp/agat_source/t/scripts_output/out/agat_sq_stat_basic_1.gff src/agat/agat_sq_stat_basic/test_data/ \ No newline at end of file From 06005a79b49911f1197ccfddf066fc566d5b1def Mon Sep 17 00:00:00 2001 From: Leila011 Date: Sat, 2 Nov 2024 10:29:37 +0100 Subject: [PATCH 15/15] Add agat convert mfannot2gff (#112) * add help * add config * add run script * add test data and expected output + script to fetch them * add test * update changelog * cleanup * create temporary directory and clean up on exit * add requirements * update keywords * update --config description * add set -eo pipefail to script and test files * fxi create temporary directory * cleanup changelog * cleanup changelog --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 1 + .../agat_convert_mfannot2gff/config.vsh.yaml | 66 + src/agat/agat_convert_mfannot2gff/help.txt | 67 + src/agat/agat_convert_mfannot2gff/script.sh | 11 + src/agat/agat_convert_mfannot2gff/test.sh | 35 + .../test_data/agat_convert_mfannot2gff_1.gff | 240 ++ .../test_data/script.sh | 10 + .../test_data/test.mfannot | 2914 +++++++++++++++++ 8 files changed, 3344 insertions(+) create mode 100644 src/agat/agat_convert_mfannot2gff/config.vsh.yaml create mode 100644 src/agat/agat_convert_mfannot2gff/help.txt create mode 100644 src/agat/agat_convert_mfannot2gff/script.sh create mode 100644 src/agat/agat_convert_mfannot2gff/test.sh create mode 100644 src/agat/agat_convert_mfannot2gff/test_data/agat_convert_mfannot2gff_1.gff create mode 100755 src/agat/agat_convert_mfannot2gff/test_data/script.sh create mode 100644 src/agat/agat_convert_mfannot2gff/test_data/test.mfannot diff --git a/CHANGELOG.md b/CHANGELOG.md index c8d86fa5..35aa33b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,7 @@ - `agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - `agat/agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). - `agat/agat_convert_bed2gff`: convert bed file to gff format (PR #97). + - `agat/agat_convert_mfannot2gff`: convert MFannot "masterfile" annotation to gff format (PR #112). - `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - `agat/agat_convert_sp_gff2tsv`: convert gtf/gff file into tabulated file (PR #102). - `agat/agat_convert_sp_gxf2gxf`: fixes and/or standardizes any GTF/GFF file into full sorted GTF/GFF file (PR #103). diff --git a/src/agat/agat_convert_mfannot2gff/config.vsh.yaml b/src/agat/agat_convert_mfannot2gff/config.vsh.yaml new file mode 100644 index 00000000..625c4613 --- /dev/null +++ b/src/agat/agat_convert_mfannot2gff/config.vsh.yaml @@ -0,0 +1,66 @@ +name: agat_convert_mfannot2gff +namespace: agat +description: | + Conversion utility for MFannot "masterfile" annotation produced by the + [MFannot pipeline](http://megasun.bch.umontreal.ca/RNAweasel/). Reports + GFF3 format. +keywords: [gene annotations, GFF , Mfannot] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_convert_mfannot2gff.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3. +requirements: + - command: [agat] +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] +argument_groups: + - name: Inputs + arguments: + - name: --mfannot + alternatives: [-m, -i] + description: The mfannot input file. + type: file + required: true + direction: input + example: input.mfannot + - name: Outputs + arguments: + - name: --gff + alternatives: [-g, -o] + description: The GFF output file. + type: file + direction: output + required: true + example: output.gff + - name: Arguments + arguments: + - name: --config + alternatives: [-c] + description: | + AGAT config file. By default AGAT takes the original agat_config.yaml shipped with AGAT. The `--config` option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + required: false + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_convert_mfannot2gff/help.txt b/src/agat/agat_convert_mfannot2gff/help.txt new file mode 100644 index 00000000..83536c5a --- /dev/null +++ b/src/agat/agat_convert_mfannot2gff/help.txt @@ -0,0 +1,67 @@ +```sh +agat_convert_mfannot2gff.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_convert_mfannot2gff.pl + +Description: + Conversion utility for MFannot "masterfile" annotation produced by the + MFannot pipeline (http://megasun.bch.umontreal.ca/RNAweasel/). Reports + GFF3 format. + +Usage: + agat_convert_mfannot2gff.pl -m -o + agat_convert_mfannot2gff.pl --help + +Copyright and License: + Copyright (C) 2015, Brandon Seah (kbseah@mpi-bremen.de) ... GPL-3 ... + modified by jacques dainat 2017-11 + +Options: + -m or -i or --mfannot + The mfannot input file + + -g or -o or --gff + the gff output file + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + -h or --help + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/src/agat/agat_convert_mfannot2gff/script.sh b/src/agat/agat_convert_mfannot2gff/script.sh new file mode 100644 index 00000000..e4a32b1e --- /dev/null +++ b/src/agat/agat_convert_mfannot2gff/script.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +agat_convert_mfannot2gff.pl \ + --mfannot "$par_mfannot" \ + --gff "$par_gff" \ + ${par_config:+--config "${par_config}"} diff --git a/src/agat/agat_convert_mfannot2gff/test.sh b/src/agat/agat_convert_mfannot2gff/test.sh new file mode 100644 index 00000000..19f79b6d --- /dev/null +++ b/src/agat/agat_convert_mfannot2gff/test.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" +} +trap clean_up EXIT + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --mfannot "$test_dir/test.mfannot" \ + --gff "$TMPDIR/output.gff" + +echo ">> Checking output" +[ ! -f "$TMPDIR/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$TMPDIR/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$TMPDIR/output.gff" "$test_dir/agat_convert_mfannot2gff_1.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_convert_mfannot2gff/test_data/agat_convert_mfannot2gff_1.gff b/src/agat/agat_convert_mfannot2gff/test_data/agat_convert_mfannot2gff_1.gff new file mode 100644 index 00000000..6c6c6e2f --- /dev/null +++ b/src/agat/agat_convert_mfannot2gff/test_data/agat_convert_mfannot2gff_1.gff @@ -0,0 +1,240 @@ +##gff-version 3 +tig00000088 mfannot mRNA 375 3557 . - . ID=mRNA_1;Name=atp1;gene=atp1;transl_table=4 +tig00000088 mfannot exon 375 3557 . - . ID=exon_1;Parent=atp1;Name=atp1;gene=atp1;transl_table=4 +tig00000088 mfannot mRNA 2947 3618 . + . ID=mRNA_2;Name=orf223;gene=orf223;transl_table=4 +tig00000088 mfannot exon 2947 3618 . + . ID=exon_2;Parent=orf223;Name=orf223;gene=orf223;transl_table=4 +tig00000088 mfannot mRNA 3948 8683 . - . ID=mRNA_3;Name=cox3;gene=cox3;transl_table=4 +tig00000088 mfannot exon 3948 8683 . - . ID=exon_3;Parent=cox3;Name=cox3;gene=cox3;transl_table=4 +tig00000088 mfannot group_II_intron 8789 9291 . + . ID=group_II_intron_1;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot mRNA 9292 9432 . - . ID=mRNA_4;Name=nad9;gene=nad9;transl_table=4 +tig00000088 mfannot exon 9292 9432 . - . ID=exon_4;Parent=nad9;Name=nad9;gene=nad9;transl_table=4 +tig00000088 mfannot group_II_intron 9491 9970 . + . ID=group_II_intron_2;Name=group%3DII(derived);gene=group%3DII(derived);transl_table=4 +tig00000088 mfannot mRNA 9971 10423 . - . ID=mRNA_5;Name=nad9;gene=nad9;transl_table=4 +tig00000088 mfannot exon 9971 10423 . - . ID=exon_5;Parent=nad9;Name=nad9;gene=nad9;transl_table=4 +tig00000088 mfannot mRNA 10429 10545 . - . ID=mRNA_6;Name=cox2;gene=cox2;transl_table=4 +tig00000088 mfannot exon 10429 10545 . - . ID=exon_6;Parent=cox2;Name=cox2;gene=cox2;transl_table=4 +tig00000088 mfannot group_II_intron 10613 11201 . + . ID=group_II_intron_3;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot mRNA 11202 11519 . - . ID=mRNA_7;Name=cox2;gene=cox2;transl_table=4 +tig00000088 mfannot exon 11202 11519 . - . ID=exon_7;Parent=cox2;Name=cox2;gene=cox2;transl_table=4 +tig00000088 mfannot group_II_intron 11584 12755 . + . ID=group_II_intron_4;Name=group%3DII(derived);gene=group%3DII(derived);transl_table=4 +tig00000088 mfannot mRNA 12756 13190 . - . ID=mRNA_8;Name=cox2;gene=cox2;transl_table=4 +tig00000088 mfannot exon 12756 13190 . - . ID=exon_8;Parent=cox2;Name=cox2;gene=cox2;transl_table=4 +tig00000088 mfannot mRNA 13595 15460 . - . ID=mRNA_9;Name=orf621;gene=orf621;transl_table=4 +tig00000088 mfannot exon 13595 15460 . - . ID=exon_9;Parent=orf621;Name=orf621;gene=orf621;transl_table=4 +tig00000088 mfannot mRNA 15841 33346 . - . ID=mRNA_10;Name=cox1;gene=cox1;transl_table=4 +tig00000088 mfannot exon 15841 33346 . - . ID=exon_10;Parent=cox1;Name=cox1;gene=cox1;transl_table=4 +tig00000088 mfannot group_II_intron 33462 34862 . + . ID=group_II_intron_5;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot group_II_intron 35352 35430 . + . ID=group_II_intron_6;Name=group%3DII(derived);gene=group%3DII(derived);transl_table=4 +tig00000088 mfannot mRNA 35431 37011 . - . ID=mRNA_11;Name=orf526;gene=orf526;transl_table=4 +tig00000088 mfannot exon 35431 37011 . - . ID=exon_11;Parent=orf526;Name=orf526;gene=orf526;transl_table=4 +tig00000088 mfannot mRNA 37784 38089 . - . ID=mRNA_12;Name=nad4L;gene=nad4L;transl_table=4 +tig00000088 mfannot exon 37784 38089 . - . ID=exon_12;Parent=nad4L;Name=nad4L;gene=nad4L;transl_table=4 +tig00000088 mfannot group_II_intron 38283 38632 . + . ID=group_II_intron_7;Name=group%3DII(derived);gene=group%3DII(derived);transl_table=4 +tig00000088 mfannot mRNA 38633 40147 . - . ID=mRNA_13;Name=orf504;gene=orf504;transl_table=4 +tig00000088 mfannot exon 38633 40147 . - . ID=exon_13;Parent=orf504;Name=orf504;gene=orf504;transl_table=4 +tig00000088 mfannot mRNA 43290 43955 . - . ID=mRNA_14;Name=nad1;gene=nad1;transl_table=4 +tig00000088 mfannot exon 43290 43955 . - . ID=exon_14;Parent=nad1;Name=nad1;gene=nad1;transl_table=4 +tig00000088 mfannot group_II_intron 44168 44599 . + . ID=group_II_intron_8;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot mRNA 44600 53026 . - . ID=mRNA_15;Name=cob;gene=cob;transl_table=4 +tig00000088 mfannot exon 44600 53026 . - . ID=exon_15;Parent=cob;Name=cob;gene=cob;transl_table=4 +tig00000088 mfannot mRNA 54956 55507 . - . ID=mRNA_16;Name=rpl5;gene=rpl5;transl_table=4 +tig00000088 mfannot exon 54956 55507 . - . ID=exon_16;Parent=rpl5;Name=rpl5;gene=rpl5;transl_table=4 +tig00000088 mfannot mRNA 55526 55897 . - . ID=mRNA_17;Name=rpl14;gene=rpl14;transl_table=4 +tig00000088 mfannot exon 55526 55897 . - . ID=exon_17;Parent=rpl14;Name=rpl14;gene=rpl14;transl_table=4 +tig00000088 mfannot mRNA 56168 56542 . - . ID=mRNA_18;Name=atp8;gene=atp8;transl_table=4 +tig00000088 mfannot exon 56168 56542 . - . ID=exon_18;Parent=atp8;Name=atp8;gene=atp8;transl_table=4 +tig00000088 mfannot mRNA 57298 58023 . - . ID=mRNA_19;Name=orf241;gene=orf241;transl_table=4 +tig00000088 mfannot exon 57298 58023 . - . ID=exon_19;Parent=orf241;Name=orf241;gene=orf241;transl_table=4 +tig00000088 mfannot mRNA 58024 58434 . - . ID=mRNA_20;Name=rpl16;gene=rpl16;transl_table=4 +tig00000088 mfannot exon 58024 58434 . - . ID=exon_20;Parent=rpl16;Name=rpl16;gene=rpl16;transl_table=4 +tig00000088 mfannot mRNA 58447 59346 . - . ID=mRNA_21;Name=rps3;gene=rps3;transl_table=4 +tig00000088 mfannot exon 58447 59346 . - . ID=exon_21;Parent=rps3;Name=rps3;gene=rps3;transl_table=4 +tig00000088 mfannot mRNA 58447 59430 . - . ID=mRNA_22;Name=orf327;gene=orf327;transl_table=4 +tig00000088 mfannot exon 58447 59430 . - . ID=exon_22;Parent=orf327;Name=orf327;gene=orf327;transl_table=4 +tig00000088 mfannot mRNA 59324 59578 . - . ID=mRNA_23;Name=rps19;gene=rps19;transl_table=4 +tig00000088 mfannot exon 59324 59578 . - . ID=exon_23;Parent=rps19;Name=rps19;gene=rps19;transl_table=4 +tig00000088 mfannot mRNA 62407 64761 . - . ID=mRNA_24;Name=orf784;gene=orf784;transl_table=4 +tig00000088 mfannot exon 62407 64761 . - . ID=exon_24;Parent=orf784;Name=orf784;gene=orf784;transl_table=4 +tig00000088 mfannot mRNA 62484 64694 . - . ID=mRNA_25;Name=orf736;gene=orf736;transl_table=4 +tig00000088 mfannot exon 62484 64694 . - . ID=exon_25;Parent=orf736;Name=orf736;gene=orf736;transl_table=4 +tig00000088 mfannot mRNA 62497 64800 . + . ID=mRNA_26;Name=orf767;gene=orf767;transl_table=4 +tig00000088 mfannot exon 62497 64800 . + . ID=exon_26;Parent=orf767;Name=orf767;gene=orf767;transl_table=4 +tig00000088 mfannot mRNA 62505 64790 . + . ID=mRNA_27;Name=orf761;gene=orf761;transl_table=4 +tig00000088 mfannot exon 62505 64790 . + . ID=exon_27;Parent=orf761;Name=orf761;gene=orf761;transl_table=4 +tig00000088 mfannot mRNA 62579 64786 . + . ID=mRNA_28;Name=orf735;gene=orf735;transl_table=4 +tig00000088 mfannot exon 62579 64786 . + . ID=exon_28;Parent=orf735;Name=orf735;gene=orf735;transl_table=4 +tig00000088 mfannot mRNA 67403 71938 . - . ID=mRNA_29;Name=orf1511;gene=orf1511;transl_table=4 +tig00000088 mfannot exon 67403 71938 . - . ID=exon_29;Parent=orf1511;Name=orf1511;gene=orf1511;transl_table=4 +tig00000088 mfannot mRNA 67413 71873 . - . ID=mRNA_30;Name=orf1486;gene=orf1486;transl_table=4 +tig00000088 mfannot exon 67413 71873 . - . ID=exon_30;Parent=orf1486;Name=orf1486;gene=orf1486;transl_table=4 +tig00000088 mfannot mRNA 67417 71835 . - . ID=mRNA_31;Name=orf1472;gene=orf1472;transl_table=4 +tig00000088 mfannot exon 67417 71835 . - . ID=exon_31;Parent=orf1472;Name=orf1472;gene=orf1472;transl_table=4 +tig00000088 mfannot mRNA 68331 70100 . + . ID=mRNA_32;Name=orf589;gene=orf589;transl_table=4 +tig00000088 mfannot exon 68331 70100 . + . ID=exon_32;Parent=orf589;Name=orf589;gene=orf589;transl_table=4 +tig00000088 mfannot mRNA 68495 70594 . + . ID=mRNA_33;Name=orf699;gene=orf699;transl_table=4 +tig00000088 mfannot exon 68495 70594 . + . ID=exon_33;Parent=orf699;Name=orf699;gene=orf699;transl_table=4 +tig00000088 mfannot mRNA 69979 71091 . + . ID=mRNA_34;Name=orf370;gene=orf370;transl_table=4 +tig00000088 mfannot exon 69979 71091 . + . ID=exon_34;Parent=orf370;Name=orf370;gene=orf370;transl_table=4 +tig00000088 mfannot tRNA 72094 72164 . + . ID=tRNA_1;Name=trnW(uca)_1;gene=trnW(uca)_1;transl_table=4 +tig00000088 mfannot exon 72094 72164 . + . ID=exon_35;Parent=tRNA_1;Name=trnW(uca)_1;gene=trnW(uca)_1;transl_table=4 +tig00000088 mfannot mRNA 72179 72577 . + . ID=mRNA_35;Name=rps13_1;gene=rps13_1;transl_table=4 +tig00000088 mfannot exon 72179 72577 . + . ID=exon_36;Parent=rps13_1;Name=rps13_1;gene=rps13_1;transl_table=4 +tig00000088 mfannot mRNA 72669 91559 . + . ID=mRNA_36;Name=rps11;gene=rps11;transl_table=4 +tig00000088 mfannot exon 72669 91559 . + . ID=exon_37;Parent=rps11;Name=rps11;gene=rps11;transl_table=4 +tig00000088 mfannot mRNA 72981 73280 . + . ID=mRNA_37;Name=rps14_1;gene=rps14_1;transl_table=4 +tig00000088 mfannot exon 72981 73280 . + . ID=exon_38;Parent=rps14_1;Name=rps14_1;gene=rps14_1;transl_table=4 +tig00000088 mfannot mRNA 73309 74238 . + . ID=mRNA_38;Name=rps8_1;gene=rps8_1;transl_table=4 +tig00000088 mfannot exon 73309 74238 . + . ID=exon_39;Parent=rps8_1;Name=rps8_1;gene=rps8_1;transl_table=4 +tig00000088 mfannot mRNA 73708 74238 . + . ID=mRNA_39;Name=rpl6_1;gene=rpl6_1;transl_table=4 +tig00000088 mfannot exon 73708 74238 . + . ID=exon_40;Parent=rpl6_1;Name=rpl6_1;gene=rpl6_1;transl_table=4 +tig00000088 mfannot mRNA 74288 74656 . + . ID=mRNA_40;Name=rps12_1;gene=rps12_1;transl_table=4 +tig00000088 mfannot exon 74288 74656 . + . ID=exon_41;Parent=rps12_1;Name=rps12_1;gene=rps12_1;transl_table=4 +tig00000088 mfannot mRNA 74597 74917 . - . ID=mRNA_41;Name=orf106;gene=orf106;transl_table=4 +tig00000088 mfannot exon 74597 74917 . - . ID=exon_42;Parent=orf106;Name=orf106;gene=orf106;transl_table=4 +tig00000088 mfannot tRNA 75137 75208 . + . ID=tRNA_2;Name=trnP(ugg)_1;gene=trnP(ugg)_1;transl_table=4 +tig00000088 mfannot exon 75137 75208 . + . ID=exon_43;Parent=tRNA_2;Name=trnP(ugg)_1;gene=trnP(ugg)_1;transl_table=4 +tig00000088 mfannot mRNA 76605 77011 . - . ID=mRNA_42;Name=rpl16;gene=rpl16;transl_table=4 +tig00000088 mfannot exon 76605 77011 . - . ID=exon_44;Parent=rpl16;Name=rpl16;gene=rpl16;transl_table=4 +tig00000088 mfannot mRNA 81073 83373 . + . ID=mRNA_43;Name=orf766;gene=orf766;transl_table=4 +tig00000088 mfannot exon 81073 83373 . + . ID=exon_45;Parent=orf766;Name=orf766;gene=orf766;transl_table=4 +tig00000088 mfannot mRNA 81081 83363 . + . ID=mRNA_44;Name=orf760;gene=orf760;transl_table=4 +tig00000088 mfannot exon 81081 83363 . + . ID=exon_46;Parent=orf760;Name=orf760;gene=orf760;transl_table=4 +tig00000088 mfannot mRNA 81155 83359 . + . ID=mRNA_45;Name=orf734;gene=orf734;transl_table=4 +tig00000088 mfannot exon 81155 83359 . + . ID=exon_47;Parent=orf734;Name=orf734;gene=orf734;transl_table=4 +tig00000088 mfannot mRNA 81661 82935 . - . ID=mRNA_46;Name=orf424;gene=orf424;transl_table=4 +tig00000088 mfannot exon 81661 82935 . - . ID=exon_48;Parent=orf424;Name=orf424;gene=orf424;transl_table=4 +tig00000088 mfannot mRNA 82320 83267 . - . ID=mRNA_47;Name=orf315;gene=orf315;transl_table=4 +tig00000088 mfannot exon 82320 83267 . - . ID=exon_49;Parent=orf315;Name=orf315;gene=orf315;transl_table=4 +tig00000088 mfannot mRNA 85976 90457 . - . ID=mRNA_48;Name=orf1493;gene=orf1493;transl_table=4 +tig00000088 mfannot exon 85976 90457 . - . ID=exon_50;Parent=orf1493;Name=orf1493;gene=orf1493;transl_table=4 +tig00000088 mfannot mRNA 85986 90419 . - . ID=mRNA_49;Name=orf1477;gene=orf1477;transl_table=4 +tig00000088 mfannot exon 85986 90419 . - . ID=exon_51;Parent=orf1477;Name=orf1477;gene=orf1477;transl_table=4 +tig00000088 mfannot mRNA 85990 90522 . - . ID=mRNA_50;Name=orf1510;gene=orf1510;transl_table=4 +tig00000088 mfannot exon 85990 90522 . - . ID=exon_52;Parent=orf1510;Name=orf1510;gene=orf1510;transl_table=4 +tig00000088 mfannot mRNA 86082 89342 . + . ID=mRNA_51;Name=orf1086;gene=orf1086;transl_table=4 +tig00000088 mfannot exon 86082 89342 . + . ID=exon_53;Parent=orf1086;Name=orf1086;gene=orf1086;transl_table=4 +tig00000088 mfannot mRNA 86161 89838 . + . ID=mRNA_52;Name=orf1225;gene=orf1225;transl_table=4 +tig00000088 mfannot exon 86161 89838 . + . ID=exon_54;Parent=orf1225;Name=orf1225;gene=orf1225;transl_table=4 +tig00000088 mfannot mRNA 89216 90571 . + . ID=mRNA_53;Name=orf451;gene=orf451;transl_table=4 +tig00000088 mfannot exon 89216 90571 . + . ID=exon_55;Parent=orf451;Name=orf451;gene=orf451;transl_table=4 +tig00000088 mfannot tRNA 90678 90748 . + . ID=tRNA_3;Name=trnW(uca)_2;gene=trnW(uca)_2;transl_table=4 +tig00000088 mfannot exon 90678 90748 . + . ID=exon_56;Parent=tRNA_3;Name=trnW(uca)_2;gene=trnW(uca)_2;transl_table=4 +tig00000088 mfannot mRNA 90763 91161 . + . ID=mRNA_54;Name=rps13_2;gene=rps13_2;transl_table=4 +tig00000088 mfannot exon 90763 91161 . + . ID=exon_57;Parent=rps13_2;Name=rps13_2;gene=rps13_2;transl_table=4 +tig00000088 mfannot mRNA 91566 91865 . + . ID=mRNA_55;Name=rps14_2;gene=rps14_2;transl_table=4 +tig00000088 mfannot exon 91566 91865 . + . ID=exon_58;Parent=rps14_2;Name=rps14_2;gene=rps14_2;transl_table=4 +tig00000088 mfannot mRNA 91894 92277 . + . ID=mRNA_56;Name=rps8_2;gene=rps8_2;transl_table=4 +tig00000088 mfannot exon 91894 92277 . + . ID=exon_59;Parent=rps8_2;Name=rps8_2;gene=rps8_2;transl_table=4 +tig00000088 mfannot mRNA 92295 92825 . + . ID=mRNA_57;Name=rpl6_2;gene=rpl6_2;transl_table=4 +tig00000088 mfannot exon 92295 92825 . + . ID=exon_60;Parent=rpl6_2;Name=rpl6_2;gene=rpl6_2;transl_table=4 +tig00000088 mfannot mRNA 92875 93243 . + . ID=mRNA_58;Name=rps12_2;gene=rps12_2;transl_table=4 +tig00000088 mfannot exon 92875 93243 . + . ID=exon_61;Parent=rps12_2;Name=rps12_2;gene=rps12_2;transl_table=4 +tig00000088 mfannot mRNA 93224 93682 . + . ID=mRNA_59;Name=rps7;gene=rps7;transl_table=4 +tig00000088 mfannot exon 93224 93682 . + . ID=exon_62;Parent=rps7;Name=rps7;gene=rps7;transl_table=4 +tig00000088 mfannot tRNA 93720 93791 . + . ID=tRNA_4;Name=trnP(ugg)_2;gene=trnP(ugg)_2;transl_table=4 +tig00000088 mfannot exon 93720 93791 . + . ID=exon_63;Parent=tRNA_4;Name=trnP(ugg)_2;gene=trnP(ugg)_2;transl_table=4 +tig00000088 mfannot mRNA 93823 94440 . + . ID=mRNA_60;Name=rps4;gene=rps4;transl_table=4 +tig00000088 mfannot exon 93823 94440 . + . ID=exon_64;Parent=rps4;Name=rps4;gene=rps4;transl_table=4 +tig00000088 mfannot mRNA 95255 96652 . + . ID=mRNA_61;Name=orf465;gene=orf465;transl_table=4 +tig00000088 mfannot exon 95255 96652 . + . ID=exon_65;Parent=orf465;Name=orf465;gene=orf465;transl_table=4 +tig00000088 mfannot group_II_intron 96715 97278 . + . ID=group_II_intron_9;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot group_II_intron 97835 97857 . + . ID=group_II_intron_10;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot mRNA 97858 100740 . + . ID=mRNA_62;Name=nad5;gene=nad5;transl_table=4 +tig00000088 mfannot exon 97858 100740 . + . ID=exon_66;Parent=nad5;Name=nad5;gene=nad5;transl_table=4 +tig00000088 mfannot mRNA 100756 100971 . + . ID=mRNA_63;Name=nad6;gene=nad6;transl_table=4 +tig00000088 mfannot exon 100756 100971 . + . ID=exon_67;Parent=nad6;Name=nad6;gene=nad6;transl_table=4 +tig00000088 mfannot mRNA 101416 103482 . + . ID=mRNA_64;Name=orf688;gene=orf688;transl_table=4 +tig00000088 mfannot exon 101416 103482 . + . ID=exon_68;Parent=orf688;Name=orf688;gene=orf688;transl_table=4 +tig00000088 mfannot group_II_intron 103569 103575 . + . ID=group_II_intron_11;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot mRNA 103576 103974 . + . ID=mRNA_65;Name=orf132;gene=orf132;transl_table=4 +tig00000088 mfannot exon 103576 103974 . + . ID=exon_69;Parent=orf132;Name=orf132;gene=orf132;transl_table=4 +tig00000088 mfannot tRNA 104056 104128 . + . ID=tRNA_5;Name=trnR(ucu);gene=trnR(ucu);transl_table=4 +tig00000088 mfannot exon 104056 104128 . + . ID=exon_70;Parent=tRNA_5;Name=trnR(ucu);gene=trnR(ucu);transl_table=4 +tig00000088 mfannot mRNA 104153 104224 . - . ID=mRNA_66;Name=nad3;gene=nad3;transl_table=4 +tig00000088 mfannot exon 104153 104224 . - . ID=exon_71;Parent=nad3;Name=nad3;gene=nad3;transl_table=4 +tig00000088 mfannot group_II_intron 104436 105029 . + . ID=group_II_intron_12;Name=group%3DII(derived);gene=group%3DII(derived);transl_table=4 +tig00000088 mfannot mRNA 105030 107969 . - . ID=mRNA_67;Name=atp6;gene=atp6;transl_table=4 +tig00000088 mfannot exon 105030 107969 . - . ID=exon_72;Parent=atp6;Name=atp6;gene=atp6;transl_table=4 +tig00000088 mfannot mRNA 108059 108412 . - . ID=mRNA_68;Name=rps10;gene=rps10;transl_table=4 +tig00000088 mfannot exon 108059 108412 . - . ID=exon_73;Parent=rps10;Name=rps10;gene=rps10;transl_table=4 +tig00000088 mfannot mRNA 108421 109893 . - . ID=mRNA_69;Name=nad2;gene=nad2;transl_table=4 +tig00000088 mfannot exon 108421 109893 . - . ID=exon_74;Parent=nad2;Name=nad2;gene=nad2;transl_table=4 +tig00000088 mfannot mRNA 110001 118556 . + . ID=mRNA_70;Name=nad7;gene=nad7;transl_table=4 +tig00000088 mfannot exon 110001 118556 . + . ID=exon_75;Parent=nad7;Name=nad7;gene=nad7;transl_table=4 +tig00000088 mfannot group_II_intron 119144 119308 . + . ID=group_II_intron_13;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot mRNA 119309 121269 . + . ID=mRNA_71;Name=nad4;gene=nad4;transl_table=4 +tig00000088 mfannot exon 119309 121269 . + . ID=exon_76;Parent=nad4;Name=nad4;gene=nad4;transl_table=4 +tig00000088 mfannot mRNA 121551 121778 . + . ID=mRNA_72;Name=atp9;gene=atp9;transl_table=4 +tig00000088 mfannot exon 121551 121778 . + . ID=exon_77;Parent=atp9;Name=atp9;gene=atp9;transl_table=4 +tig00000088 mfannot tRNA 121887 121959 . + . ID=tRNA_6;Name=trnD(guc);gene=trnD(guc);transl_table=4 +tig00000088 mfannot exon 121887 121959 . + . ID=exon_78;Parent=tRNA_6;Name=trnD(guc);gene=trnD(guc);transl_table=4 +tig00000088 mfannot tRNA 121962 122033 . + . ID=tRNA_7;Name=trnC(gca);gene=trnC(gca);transl_table=4 +tig00000088 mfannot exon 121962 122033 . + . ID=exon_79;Parent=tRNA_7;Name=trnC(gca);gene=trnC(gca);transl_table=4 +tig00000088 mfannot tRNA 122051 122123 . + . ID=tRNA_8;Name=trnH(gug);gene=trnH(gug);transl_table=4 +tig00000088 mfannot exon 122051 122123 . + . ID=exon_80;Parent=tRNA_8;Name=trnH(gug);gene=trnH(gug);transl_table=4 +tig00000088 mfannot tRNA 122142 122214 . + . ID=tRNA_9;Name=trnV(uac);gene=trnV(uac);transl_table=4 +tig00000088 mfannot exon 122142 122214 . + . ID=exon_81;Parent=tRNA_9;Name=trnV(uac);gene=trnV(uac);transl_table=4 +tig00000088 mfannot mRNA 122234 122446 . + . ID=mRNA_73;Name=rnpB;gene=rnpB;transl_table=4 +tig00000088 mfannot exon 122234 122446 . + . ID=exon_82;Parent=rnpB;Name=rnpB;gene=rnpB;transl_table=4 +tig00000088 mfannot rRNA 122544 123762 . + . ID=rRNA_1;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot exon 122544 123762 . + . ID=exon_83;Parent=rRNA_1;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot group_II_intron 123576 123762 . + . ID=group_II_intron_14;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot rRNA 123763 124009 . + . ID=rRNA_2;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot exon 123763 124009 . + . ID=exon_84;Parent=rRNA_2;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot rRNA 124010 124127 . + . ID=rRNA_3;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot exon 124010 124127 . + . ID=exon_85;Parent=rRNA_3;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot rRNA 124128 124832 . + . ID=rRNA_4;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot exon 124128 124832 . + . ID=exon_86;Parent=rRNA_4;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot mRNA 124833 125279 . + . ID=mRNA_74;Name=orf148;gene=orf148;transl_table=4 +tig00000088 mfannot exon 124833 125279 . + . ID=exon_87;Parent=orf148;Name=orf148;gene=orf148;transl_table=4 +tig00000088 mfannot group_II_intron 124847 124962 . + . ID=group_II_intron_15;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot rRNA 124963 125117 . + . ID=rRNA_5;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot exon 124963 125117 . + . ID=exon_88;Parent=rRNA_5;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot rRNA 125118 125231 . + . ID=rRNA_6;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot exon 125118 125231 . + . ID=exon_89;Parent=rRNA_6;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot rRNA 125232 125279 . + . ID=rRNA_7;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot exon 125232 125279 . + . ID=exon_90;Parent=rRNA_7;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot rRNA 125493 125529 . + . ID=rRNA_8;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot exon 125493 125529 . + . ID=exon_91;Parent=rRNA_8;Name=rns;gene=rns;transl_table=4 +tig00000088 mfannot mRNA 125530 125635 . + . ID=mRNA_75;Name=rrn5;gene=rrn5;transl_table=4 +tig00000088 mfannot exon 125530 125635 . + . ID=exon_92;Parent=rrn5;Name=rrn5;gene=rrn5;transl_table=4 +tig00000088 mfannot tRNA 125644 125715 . + . ID=tRNA_10;Name=trnF(gaa);gene=trnF(gaa);transl_table=4 +tig00000088 mfannot exon 125644 125715 . + . ID=exon_93;Parent=tRNA_10;Name=trnF(gaa);gene=trnF(gaa);transl_table=4 +tig00000088 mfannot tRNA 125734 125806 . + . ID=tRNA_11;Name=trnK(uuu);gene=trnK(uuu);transl_table=4 +tig00000088 mfannot exon 125734 125806 . + . ID=exon_94;Parent=tRNA_11;Name=trnK(uuu);gene=trnK(uuu);transl_table=4 +tig00000088 mfannot tRNA 126093 126165 . + . ID=tRNA_12;Name=trnT(ugu);gene=trnT(ugu);transl_table=4 +tig00000088 mfannot exon 126093 126165 . + . ID=exon_95;Parent=tRNA_12;Name=trnT(ugu);gene=trnT(ugu);transl_table=4 +tig00000088 mfannot tRNA 126180 126251 . + . ID=tRNA_13;Name=trnM(cau)_1;gene=trnM(cau)_1;transl_table=4 +tig00000088 mfannot exon 126180 126251 . + . ID=exon_96;Parent=tRNA_13;Name=trnM(cau)_1;gene=trnM(cau)_1;transl_table=4 +tig00000088 mfannot tRNA 126284 126356 . + . ID=tRNA_14;Name=trnM(cau)_2;gene=trnM(cau)_2;transl_table=4 +tig00000088 mfannot exon 126284 126356 . + . ID=exon_97;Parent=tRNA_14;Name=trnM(cau)_2;gene=trnM(cau)_2;transl_table=4 +tig00000088 mfannot tRNA 126364 126435 . + . ID=tRNA_15;Name=trnA(ugc);gene=trnA(ugc);transl_table=4 +tig00000088 mfannot exon 126364 126435 . + . ID=exon_98;Parent=tRNA_15;Name=trnA(ugc);gene=trnA(ugc);transl_table=4 +tig00000088 mfannot tRNA 126453 126525 . + . ID=tRNA_16;Name=trnR(ucg);gene=trnR(ucg);transl_table=4 +tig00000088 mfannot exon 126453 126525 . + . ID=exon_99;Parent=tRNA_16;Name=trnR(ucg);gene=trnR(ucg);transl_table=4 +tig00000088 mfannot tRNA 126528 126600 . + . ID=tRNA_17;Name=trnI(gau);gene=trnI(gau);transl_table=4 +tig00000088 mfannot exon 126528 126600 . + . ID=exon_100;Parent=tRNA_17;Name=trnI(gau);gene=trnI(gau);transl_table=4 +tig00000088 mfannot tRNA 126629 126710 . + . ID=tRNA_18;Name=trnL(uag);gene=trnL(uag);transl_table=4 +tig00000088 mfannot exon 126629 126710 . + . ID=exon_101;Parent=tRNA_18;Name=trnL(uag);gene=trnL(uag);transl_table=4 +tig00000088 mfannot tRNA 126724 126796 . + . ID=tRNA_19;Name=trnN(guu);gene=trnN(guu);transl_table=4 +tig00000088 mfannot exon 126724 126796 . + . ID=exon_102;Parent=tRNA_19;Name=trnN(guu);gene=trnN(guu);transl_table=4 +tig00000088 mfannot tRNA 126797 126881 . + . ID=tRNA_20;Name=trnY(gua);gene=trnY(gua);transl_table=4 +tig00000088 mfannot exon 126797 126881 . + . ID=exon_103;Parent=tRNA_20;Name=trnY(gua);gene=trnY(gua);transl_table=4 +tig00000088 mfannot tRNA 126907 126978 . + . ID=tRNA_21;Name=trnE(uuc);gene=trnE(uuc);transl_table=4 +tig00000088 mfannot exon 126907 126978 . + . ID=exon_104;Parent=tRNA_21;Name=trnE(uuc);gene=trnE(uuc);transl_table=4 +tig00000088 mfannot tRNA 127002 127072 . + . ID=tRNA_22;Name=trnQ(uug);gene=trnQ(uug);transl_table=4 +tig00000088 mfannot exon 127002 127072 . + . ID=exon_105;Parent=tRNA_22;Name=trnQ(uug);gene=trnQ(uug);transl_table=4 +tig00000088 mfannot tRNA 127097 127167 . + . ID=tRNA_23;Name=trnG(ucc);gene=trnG(ucc);transl_table=4 +tig00000088 mfannot exon 127097 127167 . + . ID=exon_106;Parent=tRNA_23;Name=trnG(ucc);gene=trnG(ucc);transl_table=4 +tig00000088 mfannot rRNA 127170 132900 . + . ID=rRNA_9;Name=rnl;gene=rnl;transl_table=4 +tig00000088 mfannot exon 127170 132900 . + . ID=exon_107;Parent=rRNA_9;Name=rnl;gene=rnl;transl_table=4 +tig00000088 mfannot group_II_intron 128101 130559 . + . ID=group_II_intron_16;Name=group%3DII;gene=group%3DII;transl_table=4 +tig00000088 mfannot group_II_intron 132446 132900 . + . ID=group_II_intron_17;Name=group%3DII(derived);gene=group%3DII(derived);transl_table=4 +tig00000088 mfannot rRNA 132901 132923 . + . ID=rRNA_10;Name=rnl;gene=rnl;transl_table=4 +tig00000088 mfannot exon 132901 132923 . + . ID=exon_108;Parent=rRNA_10;Name=rnl;gene=rnl;transl_table=4 +tig00000088 mfannot tRNA 132924 133010 . + . ID=tRNA_24;Name=trnS(gcu);gene=trnS(gcu);transl_table=4 +tig00000088 mfannot exon 132924 133010 . + . ID=exon_109;Parent=tRNA_24;Name=trnS(gcu);gene=trnS(gcu);transl_table=4 +tig00000088 mfannot tRNA 133023 133103 . + . ID=tRNA_25;Name=trnL(uaa);gene=trnL(uaa);transl_table=4 +tig00000088 mfannot exon 133023 133103 . + . ID=exon_110;Parent=tRNA_25;Name=trnL(uaa);gene=trnL(uaa);transl_table=4 +tig00000088 mfannot tRNA 133131 133218 . + . ID=tRNA_26;Name=trnS(uga);gene=trnS(uga);transl_table=4 +tig00000088 mfannot exon 133131 133218 . + . ID=exon_111;Parent=tRNA_26;Name=trnS(uga);gene=trnS(uga);transl_table=4 diff --git a/src/agat/agat_convert_mfannot2gff/test_data/script.sh b/src/agat/agat_convert_mfannot2gff/test_data/script.sh new file mode 100755 index 00000000..f60aa8dd --- /dev/null +++ b/src/agat/agat_convert_mfannot2gff/test_data/script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/test.mfannot src/agat/agat_convert_mfannot2gff/test_data/ +cp -r /tmp/agat_source/t/scripts_output/out/agat_convert_mfannot2gff_1.gff src/agat/agat_convert_mfannot2gff/test_data/ \ No newline at end of file diff --git a/src/agat/agat_convert_mfannot2gff/test_data/test.mfannot b/src/agat/agat_convert_mfannot2gff/test_data/test.mfannot new file mode 100644 index 00000000..7a33b19a --- /dev/null +++ b/src/agat/agat_convert_mfannot2gff/test_data/test.mfannot @@ -0,0 +1,2914 @@ +;; Masterfile modified automatically by mfannot version 1.33 +;; - Gene Totals: 106 +;; - List of genes added: +;; atp1 (3 introns) atp6 (1 introns) atp8 +;; atp9 cob (6 introns) cox1 (11 introns) +;; cox3 (3 introns) nad1 nad2 +;; nad3 nad4 (1 introns) nad4L +;; nad5 (2 introns) nad7 (6 introns) orf101 +;; orf106 orf1086 orf119 +;; orf1225 orf123 orf132 +;; orf1472 orf1477 orf148 +;; orf1486 orf149 orf1493 +;; orf1510 orf1511 orf158 +;; orf204 orf223 orf240 +;; orf241 orf259 orf269 +;; orf315 orf327 orf353 +;; orf370 orf385 orf424 +;; orf451 orf465 orf499 +;; orf504 orf505 orf511 +;; orf526 orf550 orf580 +;; orf589 orf621 orf671 +;; orf673 orf676 orf688 +;; orf699 orf734 orf735 +;; orf736 orf750 orf760 +;; orf761 orf766 orf767 +;; orf784 rnpB rpl14 +;; rpl16 rpl5 rpl6 +;; rps10 rps11 rps12 +;; rps13 rps14 rps19 +;; rps3 rps4 rps7 +;; rps8 rrn5 trnA(ugc) +;; trnC(gca) trnD(guc) trnE(uuc) +;; trnF(gaa) trnG(ucc) trnH(gug) +;; trnI(gau) trnK(uuu) trnL(uaa) +;; trnL(uag) trnM(cau) trnN(guu) +;; trnP(ugg) trnQ(uug) trnR(ucg) +;; trnR(ucu) trnS(gcu) trnS(uga) +;; trnT(ugu) trnV(uac) trnW(uca) +;; trnY(gua) +;; +;; end mfannot +;; + + +>tig00000088 gc=4 + 1 GAATTTTAAGTTTATCTAAAATATAGAAAATAAAAATATATTTTTATTTTATGCAGTTTT + 61 TGTATATCATAAATCTTAAGTGTTATTTAACATTTATTTTAGTAAATTTAAGAATAGATT + 121 TTTAAAATAACAAATATAATAATGAACCAGTTATTATTTATAAATTATTTGTAGTAATAA + 181 GATAAATTAACTTTATATTTTAGTTATATAGTTATAATTAGTATAGTATGTATAAATTGG + 241 CATTTATAATATTAGTTACATTAACTATAAAATTAATTTTATATGTTTTTTGATTTTTTC + 301 TAAAAAAATTTGTATCATTTGGAGAAATCTAAGATGAGTTGGTATTAACTAATGATGGTT + 361 ATTGGTTAAAAATA +; G-atp1 <== end +; G-atp1-E4 <== end + 375 TTAAAGATTAATTTCAGAGGTAAGCAGAGATTGTAATTTTTTTTTAAGCTCGGGAGAAAT + 435 TTTTTTTTGTTCTTTAATTTCATTTAAAATTTTTGTATGTTTTGTTTTTAAAAGGTTTAA + 495 AAGTTTTTGTTCAAAATTTGATACTTTGTTTGTAGCAATTTTATCTAAAAACCCATTCAT + 555 CCCAGCGAAAATTATAACAACTTGGTATTCAATTGGCATTGGTATGAATTGATTTTGTTT + 615 TAACAATTCGATTAGACGAGAACCTCGATTTAATACGTGTTGTGTAGATGCATCTAAATC + 675 AGACCCGAATTGAGCAAAAGCTTCAACTTCACGGTATTGGGCTAGTTCTAGTTTTAAACC + 735 CCCGGCAACTTGTCTCATTGCTGGAATTTGAGCAGCAGAACCAACACGACTTACTGATAA + 795 ACCCACATTAATTGCGGGCCGAATTCCTTTATAAAAAAGTTCAGCTTCTAGAAAGATTTG + 855 ACCATCTGTAA +; G-atp1-E4 <== start +; G-atp1-I3 <== end + 866 aatgggtagataaatattgttaattattatatcccccaatgtaaactgtacatgatagtt + 926 agttatcatacagcttcttttaagagaaaagaattatgtttataaattaaaatatatttt + 986 gataagttataaactacacataattatccagttaggtataattatgtgtagtttattata + 1046 caatttattttattaaaaaataatatttactataaaaacttcccctcacactgttcagtt + 1106 tgattgtttataaaaacaatttttttaagaaaaatgatacagttttcatgccttttttaa + 1166 aaaaaagcttttatttttacaaaaatttgtacttaattttttggaaaaaatatccaatga + 1226 tagctgatatcaaaatttaatattgtttattttggttaaaaagttttaaataaaaattaa + 1286 ttttctattaaaaatagatttttctaaaaaaatttttttttcagtttaatgtttttctat + 1346 aaatgaatttttaatattattatattatgataattaaatatgtataattagataatgtaa + 1406 tttgatgtaaaaattacaagttttcatatataaaattttttaaaaaaaatttcttatcat + 1466 aatttatataattttatacaaattgatgtaattacaaataactgccc +; G-atp1-I3 <== start /group=II ;; mfannot: splice boundaries uncertain +; G-atp1-E3 <== end + 1513 TAGAAATAACATTTGTTGGAATATAAGCTGAAACATCTCCAGCTTGTGTTTCTATTATAG + 1573 GAAGCGCGGTTAATGATCCAGCCCCATAGTCTTTATTTAATTTAGCTGCACGTTCTAATA + 1633 AACGAGAATGTAGATAAA +; G-atp1-E3 <== start +; G-atp1-I2 <== end + 1651 attgagtcaaaaacttattagtaatgttaattactgttgtatgctcttagagctttacaa + 1711 aataattacttattataaagctctcttttcgttgaaaaattggattttgtgaattattaa + 1771 ttaagtttaatattttttgatgtaaaaaaatatttataaattttatcgaaataaattcat + 1831 attatgaattttataaaattttattgttttataaaattacataaaaagaattgtctgttc + 1891 tatattattttatacaatataaactctagtattaggaactttatgaaaaagttttaaaca + 1951 aaaaaataattatgaatttgtcatatttttgcttgaaatgtttatgaaatacgtcaaaat + 2011 ttctctataactattttttcttagcggtaaagatatgtatatatattaaaaagtttattt + 2071 tatttttgaataaaactttttgacaaacacataaatagttatttatttaaatatacattt + 2131 atgaatatactgtatatttaaaattttttggaaaaaatttacctaattaactaaataccc +; G-atp1-I2 <== start /group=II(derived) +; G-atp1-E2 <== end + 2191 AAACGTCTCCGGGATATGCTTCACGACCTGGTGGTCGTCTTAATAGTAAAGACATTTGTC + 2251 TATAAGCTACTGCCTGTTTACTTAAATCATCATAAATGATTAACGCATGCTTTTTATTAT + 2311 CGCGAAAATATTCTCCTATTGTACACCCAGTATATG +; G-atp1-E2 <== start +; G-atp1-I1 <== end + 2347 tttggatagaaaaaatttcttccacaaaacttaacgtataaatttctttatattaagctt + 2407 aatgaaaaaatttctagttaaattattaataacctaataaatacatttaatgtagatgtg + 2467 atatacgtctaaaatttggtatttataaattagatttttaaagaaattttttcaaaaact + 2527 gtttttactttaaagtaatttcagattcaaaattataaaattaattataaacttaactag + 2587 tttcttttatactatttataattaaagcgaatttttttagtagataatatttaatttttt + 2647 tgcattgttttatgataatagcaccttttaatcaaaaagtttttatataaatatttataa + 2707 ttgatttgttatataacaaacgtatacatatacttattaatataagtattaaactacctt + 2767 aaatttggggtttagtaatatataaaaagaaacgattgttaaatac +; G-atp1-I1 <== start /group=II(derived) ;; mfannot: splice boundaries uncertain +; G-atp1-E1 <== end + 2813 GTGCTAAAAACTGTAATGGAGCGGCTTCAGATGCCGTTGCAGCTACAATGATTGTATATG + 2873 AAAATGCGTTTTCTTTTTCTAATATAGATACTAGTTGAGCAACTGTTGAACGTTTTTGTC + 2933 CGATTGCGACATAA +; G-orf223 ==> start + 2947 ATGCAGTATAACTTATCGGAATCGTTTAGCTCATTATTTTGATATTTTTGATTTAAAATG + 3007 GTGTCAATTGCAATTGCAGTTTTTCCAGTTTGCCTGTCACCAATGATTAGTTCCCGTTGA + 3067 CCACGTCCAATAGGAACTAAACTGTCAACAGCTTTTAATCCAGTTTGCATTGGCTCAGAA + 3127 ACTGATTTTCTTGGAATAATTCCGGGTGCTTTAACTTCTACCCGCCGAGTTTCATTACTT + 3187 TTAATTGCTCCTTTTCCGTCGATAGGGGCACCTAAAGCGTTAATTGCTCGGCCTAAAAGG + 3247 TCTGTACCTACAGGCACACTAACAATATTTTTAGTACGTTTTACGGATTCTCCTTCTGAA + 3307 ACAAATTTGTCGTTTCCAAAAATAACAATTCCTGCATTATCATTTTCTAAATTTAGAGCC + 3367 ATTCCTTTTAAACCGGAACTAAATTCGACCATTTCACCAGCTTTTAAATTTTGTAATCCA + 3427 AAAACTCGAGCAATTCCGTCTCCTACAGTTAATACTTTTCCTTTTTCAGTGAAGGAATTT + 3487 TTATTAATTCCTGTTGTTGCTATTTGAATTTCTAATAATTGAGATAATTCGTTTATATGT + 3547 AGTTTTTGCAT +; G-atp1-E1 <== start +; G-atp1 <== start + 3558 TTCTGCTATAAGTTTTGTATTAAAATTTAAAGTATTTTTTTGTATAATTTTTTTTAACTA + 3618 A +; G-orf223 ==> end + 3619 AACCGCTAATTTGATAAAAAATTGTTAAGAAATTTATTCATAAAATCTAGAAAACTAAAA + 3679 GAATTTTCCAGTAAAGGAAAAAGTTATTTAATATAAAATTTTTTACATATTAAAAATAAT + 3739 AATATAATTTATATTTTATTTAATTTTTAAGATTTTAAAATTAATGATCCTTTTTTAAAA + 3799 AATGTAGAATTTTATTAAAAATTGAATATCCCATAAACTTATGGTTTATGGGATAATTTT + 3859 CTTACCCATGAAAAATAAGTTTTTAACTTAATCAAAATATTAATATATAATATTTATATA + 3919 TTTTTCATGTAAGTGAACACTAGTCAAGT +; G-cox3 <== end +; G-cox3-E4 <== end + 3948 TTAAGCTTTATTTCCCCATATATATATGGAAATGAATAAAAAAAGTCAAACAACATATA +; G-cox3-E4 <== start +; G-cox3-I3 <== end + 4007 aacttaaaaatatgcttttttagattttgatttttgcgcatattaatagtttttgaacca + 4067 aacgtaataatttcttattattaggctcttatacaaattaaatatttgtctttatactgt + 4127 atatgtttaatcgtgtagtataaaaaattgcaggaggtaggtaattaaattttaattttt + 4187 ttaaaaaaacatatttaatttgtttagaaggtgttctaatcatttttaaatttctaaaaa + 4247 agaaaagaatatcagcatcaataaacataattatataaatataattatgtttttgctaaa + 4307 atttttgttaagtgtacagtaatatgttttaaactttctaaaagaaagtattttacataa + 4367 gttttattttattctatttgttaaaaatgaattttttttttgaaaaacgtataattagaa + 4427 gtctttaaaagagattttggcttaaaaagtcaatttcaatataatgttgaatttttgatc + 4487 tttttaaagcaacttctatctaattaagaaaaaggacctaactataaatttataagcaca + 4547 caccaacaaaatatattaatcgatatgctttgaagcaataagcgttaattcacacatggc + 4607 gtgcaaattgctaaacaccatgcgttttttatgaaatactttaaatttaaaaaatttttt + 4667 ttcataaataagatacttttaaagcgaagtatcttgcaatactaatttagtgtattagta + 4727 aaataatgctttacattttttttaatttaaaaatctgtttttagattagagtaaattttt + 4787 tggttaaagaagaaatatattggctataatattttttctttaaaaactttggtgtaaaaa + 4847 atatattaaaaacagtatactatatttttatataaaagtattatatattcaaatgcaaag + 4907 aaatat +; G-cox3-I3 <== start /group=II(derived) ;; mfannot: splice boundaries uncertain +; G-cox3-E3 <== end + 4913 TCGCCCGCCAATATCATGCTGCTGCTTCGAAAGCAAAATGATGATTATCCGTAAAATGAT + 4973 GTTTTATTAAACGTATTAAACAAATACCCAAAAAAATACTTCCTATTAAAA +; G-cox3-E3 <== start +; G-cox3-I2 <== end + 5024 tttgagattaattataaattattaattttctcttagaactgtacatataattttattata + 5084 tacggctcaacataataaattgttattgtgcatacaaaattgatggaattagtattatgc + 5144 aatacatttttattataaatagtgtagtacttaagaattcctattatagggaagcgtagt + 5204 aaatattataaaatttttttgttataatactgttgtttactattttcatatttcattttt + 5264 ttatttaaaaaaaaatgaaaaagtttataatgcatatttgtttttttaaatgcaaattta + 5324 gatatttattattgttataatttttaatatcaaaaatgcaataaaatttgtttgtattag + 5384 aattttcatgtcaaaagaaatatttacaactttaaaaaatatactaaaatatttttatta + 5444 aacaaatacaataaaaaccgtac +; G-cox3-I2 <== start /group=II(derived) +; G-cox3-E2 <== end + 5467 CATGAAATCCGTGAAAACCTGTTGCTAAATAAAAAGTTGAACCATAAATACTATCTGAAA + 5527 TATCAAAATCAGCATTTCAATATTCGAAAATCTGTAAAGTAGTAAATATAAATGCTAATA + 5587 TTACTGTCAACAGTAAGCTAATTATAGCTTCTTCTCTAAACCTTTTTAAAATAGTATGAT + 5647 GGCACCACGTTACGCTGCATCCAGATAATAATAAAATTCCAGTGTTTAAAGCAGGCACAT + 5707 ATTTAGCGCTTAAAGAAAAAATACCAAGAGGGGGCCATTTAGTGCCAAGTTCAATAATTG + 5767 GGGCAAAGCTTGAAGTG +; G-cox3-E2 <== start +; G-cox3-I1 <== end + 5784 catcaaataatattaattagttatttgtgctcaaaaccgtatgaacttattgtactaagt + 5844 attacggctcccaggaaaaaacaacgtttttaaaa +; G-cox3-I1-orf673 <== end + 5879 ttaattaatatgcctgatgcggtattctatatctttaatgcgtaattgttttacattacc + 5939 gcattgtactattcctttttttgagaaaaagttctgatatatataataacgaagttgtgg + 5999 ttttgatccaaactttgtaattaaataattaagaaaatagcaagaagataaaaaatcaaa + 6059 ttgctttaattggaaaacaatagttttacttagaccaaaatatataataactttaattaa + 6119 ccattgattatatttttgaattaaaacgtttagtggtaattttaattgtattggggcaaa + 6179 tatatttctaagatcttctttaaggttagaaaaagttttagagcatatagtaatagttat + 6239 attattgtaataattaaataaaatttttctataaaagaaattttcttgatttagatatct + 6299 agtgcattgaatttttcgattaaacatattagtaaacttaaaacctaaatattcaaaaaa + 6359 catatttggatataatatttgtattgaagttacgtttttatctacttgaataaatttttt + 6419 ttttaaaaaaatcaataatcgataataaattattaaaaaatatgaaaaattagcagtaaa + 6479 atctactattaaaatgttacctaaaaatctataaatttgtgtatttaacttaaaatattg + 6539 taaaataagtgtactctgttgataatttttatttaaaaaacaattagaacggtcatttaa + 6599 tttttcggttaatttaaatgttaacggtaacaatacaaatgattccatattatttaacat + 6659 aacatttgcaattaatgcacccaaaattgtatttcataattttttaatatgtaatgtatt + 6719 atgtattcttctttcaagaagatatttatataaaaaaggataacatcagataattataag + 6779 ggagcggtatttgttacaaatgggcatatgttttgccatgacaagataagaattcatatt + 6839 taagttcttaaaaatatctatattaacaaattttttataaaaaatagttttataaaataa + 6899 ctttaattttattttcatattatgaatataatataaatatggtttgttttgctggtctaa + 6959 tttagaaattcaaaaattatacaatttttgtttaaaattaaaaaattttttatattttaa + 7019 aattaatagttttttataaaaaattaaataatatttaatttttcgagaaaactgtaaata + 7079 ccgaattaatgattttactaaaaatgtcttagatgaattagagaaagttgtaaattgttg + 7139 aatatttttctgccataaaattataggcaataatgctacataaacaattttttgtagtat + 7199 acgatcctgtattaaaatgttcggtaacacattatactttttataaaattgtaaatacct + 7259 ggcagcacttttatagtctaatcagaaattagaaatattatgtcttttaagaagacatca + 7319 gtttaattctttcctttttcttaaaatttgctggaatgtgcaattaattaattttctaat + 7379 tgaatataattcagttatagattttttagatttacaaaattgagatttattgcaatttga + 7439 atttttaccactgctttttacataacatcattttgattttaatatagaaaaagtaaacgt + 7499 tttgttatttaaatttgaaattgtacttgcttcttgacattgtatattatatataatata + 7559 tcatcgaattgctggtgattctaaaatccattgttggagtaatttaactttgaagggaag + 7619 agatgcagctgtaacagcatataaataattaattccttgactttgttgttgtaatagtaa + 7679 taataccatataattataaatactaattgcagtatttatacatttcatgagctcgacaaa + 7739 cgaatgttgttttaacaaccgaatattttcgttatttttatgagcgtatgctttaaggat + 7799 cagattattcaatcgaattaaatcttttttccaaaatttaagcgtatcaaaacttttcct + 7859 accaaaaaattttataagtttttttccatgaaaaatatacat +; G-cox3-I1-orf673 <== start + 7901 ttttttaattttctaaaatatttttttgtattttttttaaattgattagaaaaaatctta + 7961 tttttttattagattctgtctgaattaagaatacaaatgatgtatgtttacataacataa + 8021 aatttaataaaatatatattttattaaattttattttaataaatattgattatactgcaa + 8081 taaaagactattattgattaatttctgaaaaatccacacataattcaaaaaaagactact + 8141 tacgagagtaacttttaaaaccaatttttatacattatttatcaaatacatcacatatac + 8201 tacatgtattttgttaaaatgcgtacgtgaaatattttataaaataataattataaaata + 8261 tctcttcttacaattatttattaataaccaatttatctatatgctagatatgtatcttgc + 8321 cgacaaattcagagtatacccatgg +; G-cox3-I1 <== start ;; mfannot: no intron type identified +; G-cox3-E1 <== end + 8346 AAAAAGGCTCAAAAAAAAGCAAAAAAAAATAAAACTTCTGAAAGAATGAAAAGCGCCATT + 8406 CCAAAACTCAAACCAGTCTGTACTATTTGTGTATGCTGACCTTCGAAAGTTGATTCACGG + 8466 ATTACATCTCGCCATCAACATGTAATGCAAAAAATTATTGCGATTAACCCAAAAAGAACA + 8526 AACATATTACTATATTTATACGAATGCAAATAACTTACAAATCCACTTGTAAAAATTCAG + 8586 GCAGCGCAAGCCGTGAAAATTGGCCATGGGCTAGAATCCACTAAATGAAAACCATGAGTA + 8646 CATGTTAAAATTTTTTTTTTTAAAGATTTTAATAACAC +; G-cox3-E1 <== start +; G-cox3 <== start + 8684 TTAAATCCAAGTTTTATTACAAATTTTTACAACAATTGTTAGTTGACCTAAACTATTATG + 8744 A +;; mfannot: + 8745 tccctaattcgaaactatgcgtggtattttctaccacatagctt +;; mfannot: /group=II + 8789 CTTTATTGTAAGTAAAACTCTCCTACTTAATGTACCATTATTACTTACAATAATAAACAA + 8849 ACTTTGCATAGTATTGCTTAATCCATTCTTTTAATATTGATACAATTTCGCTTAATTTTT + 8909 CATGTTTTAAAATTTTAAACTTAGGATTTATATTTTATACCAAATAGATTTTTTCTTTAT + 8969 TTATACTTATGTTTTACGACATAAATACTATCTCTAATAAATAAATTTAAAAAATTTTTT + 9029 TTAAAGTAAATCAATAATAATAATTTAAAATTTATCCATTTTCAAAATATTAATATTGTA + 9089 GCAAATACATTAAATTTTGTTAAAACCTAATATATTTACTACAAATAATTAATTCTACTA + 9149 ATTACAGATTATCATTATTAATTAAAAATATAAAAACTAATATGTAACCTTTTATAGAAA + 9209 TAACAAAATACTAGAAAAATTTTATAAAATTAGCCTACTACATAAAATACTCAATTTATT + 9269 TGATAATAGTTTTGAATTAGAAT +;; G-nad9 <== end + 9292 TTATAA +;; G-nad9 <== end + 9298 AAAATCGAAATCCCGATACTCTTGAGCCATTTCTAAGGATTCAGTTAAAATACGTTTTTG + 9358 ATTTTCATCATACCGTACCTCAACATATCCACTTAAAGGAAAATCTTTCCGAAAAGGATG + 9418 TCCATCGGTAGGATA +;; G-nad9 <== start ;; 138,182 + 9433 GGAAATTCTATA +;; mfannot: + 9445 aaccctccactaaaaccacgcatacaatttatattataagtggctt +;; mfannot: /group=II(derived) + 9491 TCGTTAAATTTACTTTTTTCAATCAAAAAATTTCTATAAAATTTATAAACAGTATATACT + 9551 GTTTCCATTTTTTGGAAAAAAAGTAATTTAAACTTTTATCAAATTATACTCTAAATGATT + 9611 ACTCCAATTCGTACACAATAATTTATATTATCTAGTAAAAACGAATCCATATTTCAAATT + 9671 TAATATTTTTTGTTTTCAATATTTTTATATTAATTTAGTATAAAAAACAGGAAAACTAAT + 9731 AAATACCTTTTTTTGATTAAAAACTTATTATAAACTATAGAAACTAGTCTCTGTTTTCCT + 9791 TTTTAACATAAAAATGTTATTATTTAATCATTATACAGCAAATTCACAAACTATTATTGT + 9851 ATTTATATTTTATTAAAACCCATTTTAGCCAATTATCCTTTTATATTAAATAATATTATA + 9911 TATTTTTATTTAACTGTATTTATTAAGAATAAATAGGGAATAATAATTAAATTTTTAAAA +;; G-nad9 <== end + 9971 ATGGCTAACAAAACCGTAATCTGTTAGAATACGTCGTAAGTCAAAATTATTTATAAAAAA + 10031 AATACCAAACATATCCCACACTTCCCTTTCAAATCAAACTGCTGCTGGATAAATTAATGA + 10091 GATTGAATTAATTGTTGCTAATAAAGTTAAATTACTTTTTAAAAAAAATCTAGAATTTCG + 10151 GGATATACTTAAAAAATTATATATAATCTCAAAACGTTTTAATTTTGAAAGATAATCTAC + 10211 AGCAATAATATCAATTAAAATTTTATATTGTGTAAGTGTATGATTTTTTAAAAAAATAGA + 10271 AATGGGTTGGATAAATTCGTTTCAAACCCCCATGGCTATAATTTTTCTGTTTACGCATAC + 10331 AGAAATAATTCCACGCAAACAAGACTTTACTATATTTAAAGTATACTTTTCTAT +;; G-nad9 <== start ;; 6,143 + 10385 TAATTTATGTAATTGTCCAACTTTCAAAACTTTTTCCAT +;; G-nad9 <== start + 10424 CGTTT +;; G-cox2 <== end + 10429 TTAAATTAATTCTCCATTTGAATC +;; G-cox2 <== end + 10453 TTCAACATATTTGAAGAAAATTCAAGATACATATTCTTTAAAAGGTACAGCTTCAAGCGC + 10513 AATTGGCATAAATCCATGATTAATACCACTTAG +;; G-cox2 <== start ;; 238,268 + 10546 GTAGATATTATATAAAAATAATTA +;; mfannot: + 10570 cccctaattgaacttaacaagcgcttctcaacgcattaagctc +;; mfannot: /group=II + 10613 GATTTCAATCTAAAATCTTAGTGACGAAATTTCACAATATTTTTTATATATTTATCTTTT + 10673 GGGACATTGTATTTATTTTTACAAAAATAATTTAGTCATAATAAACATATAAACAGACTA + 10733 TATCTAAAAAAAAAATATTCTATGTAAAATTTAAAAAATATATTAAAGAAAGATGTACAG + 10793 TTTTTAAAAATATTTAGTTATCTAAGATTTTCCAAACTGTATCTTATTCACTTATAATCT + 10853 TAATATTAAAATAAAAGCAAAAAGAAATATCTTAATACATTTTTATAATATTAAAATTTT + 10913 AAATGAAATTTTTATAGCTACATTTATTACTAAAATTAGTATATAATTTATATATCACAG + 10973 TATTCCCAACATCTGTAATTTCAACTGAAAAAACTTACTCAATAAATACAATCTGATATA + 11033 TATTTATTTTTTAGAAAATATTTACGTAAATTTGATAAAATTTTAACTGTTGGCTCTAAA + 11093 GTTTTATAGATTTCCCAAAGCTAGTGCACTATAATATTTTTATATTACACATAGGAAATC + 11153 GACTTGTTTCTTTTCTAAACAAAAATTTAATAAATTAACTATACCGCCA +;; G-cox2 <== end + 11202 ACAGATCTCACTACACTGGCCATAATAAACACCAGGACGATCGATAAAAACTAGCACTTG + 11262 ATTTAATCTACCAGGACATGCATCGATTTTAATGCCTAACGAAGGTAATGCTCAACTATG + 11322 TAAAACATCAGTTGACGTTACAATCGCACGGATATTTGTATATATAGGTAAAATAATTCG + 11382 TTTATCTACTTCTAATAATCGAAAACTCCCTTCTTGTAAATCATCATCCCCTATAAGGTA + 11442 ACTATCAAATAAGAACGATACATCTGTTGGTAAATTAACCACTGTATAATCTGAATACTC + 11502 ATAACTTCACTGCCAATA +;; G-cox2 <== start ;; 137,239 + 11520 AGTAATTTCACAAGGAA +;; mfannot: + 11537 ttttctttggcaagaaccgtacaagcgttttgcaacgcatacggctc +;; mfannot: /group=II(derived) + 11584 TAGTAAATTTCTACGTAAACGTATAAACACATAAAATAGGAATTATAATTTGCAGACTGT + 11644 ATTATTTTTTTAAATTAATAACTACTAACTCTGAAAAAATTTTCAATATAATAATCATAT + 11704 TTTTTTTGAAAAATTTGAAATATACCTTAAGCTCTATACGTATTTGATAAATTCATACTG + 11764 ATTTATATGGCAAAAAAAATTCAAATTTTCTGAAAAAACACTGAATTTTAAAAACATATT + 11824 TTTATAAAAGAATTTTAATAAAAAATTAATATTATTTTCATTAAACAAAATAAATACAAT + 11884 TTTGAAGATTATAAACTATAAAGGCATTTATTTAAAATTTTTTCAAAAAACTAATATTAA + 11944 TTTTATATTAAATTTTTTTTTCCTTCAAAAAATCGAATTCATTTTACTTTGTAAAAAAAT + 12004 ATTTTTTTCTAAATATTTTTTCTGCTAAATCAATCCTCCTATTATTATTTTTATCTAAAA + 12064 ATAAAATACAATTATAAATAATTATTTCTACTTAAAAATAGAATATAACTTACTCTCTGA + 12124 ACTACCATAACCTACTTATGCAGATTTTTTTAGCTATTAACCTTTTTGTAAATTTTTTAA + 12184 TTATACTAAAAACACGTACGTTATTTTCAAACTAAATAAAATTTCTTTAGTGTCTAATTT + 12244 ACCAGAAATAACAAATTTCTTTTCATAAATTTGACCATTTCATCGTAATCTTAAACTTAA + 12304 TTTTTTATAAGACTATAAGTTTAAATTATAAAAAATTATAATATTAAGCTTAAGAAAAAC + 12364 TCAACTTCTATCCTAAAATACTTATATCGAATTCAATAAATACCTATGGTTTCAACAAAG + 12424 TAAAATTAAACTTTGTTTTTAATCTTTTGTACATTATTTTTGTACGAAATTATATTCATT + 12484 TCTTACATAAAGATATACTTATACACCGACCAATCAATACTTTTAATTTTATTATCACCT + 12544 TCGAACAAAAAACTTGTGTTTAAGGATTTTAATTTAATAATACAACCAATTTACTTACTA + 12604 CATTTAAACTTTTAATTCTATTATTAAATAACTCAAGCATAAGAATACGTATTAATACGA + 12664 CATTAACGTAAACCCTTATTCAAAAACTTTGAAACCTTATACATGTATTACGTATTTTCT + 12724 ATAATTTAGAAAATTTAATGAATTTCCCACTC +;; G-cox2 <== end + 12756 ATACCATTGGTGACCAATAACCTTTAAAGTTAGAACTGGATCTATAATTTCATCTATTGA + 12816 ATAAAGTATAGCTAAAGAAGAACTCATCACTCCTACTAAAAGTAACGCAGGTATTAAAAC + 12876 CCATAAAAACTCTAATATCATAATAACCCGATCTGACATATGTCATTCAGTTGTTCTAGG + 12936 GCTTGTAACATCATATTGCTTAGCTATACAAAATAAAATCCACATAACAATTCCTAAAAT + 12996 TAAAAATGCTATGAAAAATAAATCTTGGTACAGTGTAACAATACCATCCATAATAGGAGA + 13056 TGCAGAATCTTGAAATTCAACTTGCCAATTTTCAGCAGAATCAGCAAATAACTCATACCG + 13116 AAAAAGATCCAAAAAAAATATAAATATTAATATAAAATTAAAATT +;; G-cox2 <== start ;; 10,139 + 13161 ACGAAAAAACGAACTTTTTAATAAACACAT +;; G-cox2 <== start + 13191 AGAACACATAGATATCATATTTTTATGCTTTATACACAAACCCTAAAGTTTTTCTCTCTT + 13251 CAAATTTTTTACCAGACGTTAAAGTTTTATAAACAAGGACAAAAAATACTATTAACGAAA + 13311 TTACTGAAATATATGAACCTAGTGACGCAACTCAATTTCAATGAATAAACGCATCAGGAT + 13371 AATAGTACACTAGTCTAAATAAACCGTGCCCAAAACCGTATAAACTTATTATGCTAAGAA + 13431 TTACGGCTTCGAAGAAAGTAAAATTTAACATTTCTACCGTTATACGATAAATATATATGT + 13491 TTATTATATTAATTATATCAAAAATTATATACTATTATTTAATAACTATTTTTTATAATT + 13551 TTAACATCCGAAGTTATCCTGTATTATTAATTTAATAATATAAC +; G-orf621 <== end + 13595 TTATACTAAATTAATTTTAATAAATCTACTATAAGTAGATAAATATCGATATATATACGT + 13655 ACGTATTTTAGATACTGAATTATATTTTTTATACAAAAATTTTAAAATTCTTTTATAAAG + 13715 AATATAACTTAAAATTATTAACTGCCTATGTAGACTCTCAAAATAACGATAATATTGTAA + 13775 TATTTTACTCATAAACCTATTTACTTCATTTACAAGAATTTTTAAATTGAGTAATAGATT + 13835 CTTACAAGAAAATAACTGTAAAATCAATCTTCTAACCCGGTTAAAAAAATTTATATTCAA + 13895 TGAAACAGTCCACTTACTCAAAATTTTTTCATAAAAAGATAAATAATTTTTACATATTAT + 13955 AAAATTTTTAAAATTATATTCATTATTTTTAAAAAAAATGCTATTCAAACAATTAAACTT + 14015 CAATCCTGATAAATTCAACGTTGCATTTGGCCTACAATATTTAAATTTCACTATATTTGA + 14075 GCAATTTAACACACTTAATCCACATTTTATCAAAAATTTCACAAAAAATTCATAAAATCG + 14135 AATAAAATATTTACAACTTTTTTTTCCAAAAATTAAAAGTCTACCAGAATAATATACTAT + 14195 TTGTACCATTTGTCGATACTTCATTAATTCTAAATATAAATTACTCTTCTTTAACTCTAA + 14255 ACCACCGTTACTATTAGTAGTAATTTCTCTTGTGAATAAAAAAAATTCAAATAAAATAAA + 14315 TAAAAAAAAACTTAATAAACTTCTTAAAAATACATTTCACAGCATTTCATATTCAAAATG + 14375 CACTCCTGCGGCCTTCAATAAAACTTTATTAACATGGTTAAAAGCAAATGCACCAACTCA + 14435 AATTTTCGATAAAAAAAAATATTTTTTGCAAACTAGTATATTCTCCATAATAGGTAGACA + 14495 AGATATAAAACCCAAATATCTATTAATATTAATATCAATATATTTAGAAAAAGTCATAAT + 14555 ACCACTTATAATTAAACGTTTTCTCCGAACAGATAAATATCAAAATCAAAACTTATAACT + 14615 AATTTTACTTTGCCCAGTAGTTTTTTCGTGTAAAAAATTGTTAGAATTAACAAAAAAATA + 14675 TGAAATATTACAATTTCTTAAATAAAACAAATCCATATCATTAGACATGATACAGTTTGC + 14735 TAATTTTATATAACTTGGTATTAAATTATTTTTTTGTAAGTCTAAAACCTTACCTAAATT + 14795 TAAACAAATTCGAGAATACTCTAAATAGGGACGTAAAGAAAAACCAAAAATCTTTTGCAT + 14855 AATACAATCTTTTAATATAAATAAATAAATAATACAAAATTTCCTAAAATCGTATCTAAA + 14915 ATTAATAAGAGTTTCTTTAAAAGAAATTAACTTATAATAACATAAATAATACTTACAACT + 14975 ACAAATTAAACTTAACAACTGCAAACATCAATAATTAACTTTTTTAAATTTGATTAAAAG + 15035 ACAAATGAATTTTTTTTTTATACTATAGATTTTTTTTGGAAAATCTTGCCTTAACCTAAT + 15095 ATTTTTTTTAGATTTACCATATTTAGTTGTACTCAAAAATTTTTCTAAAAGACTTTTATT + 15155 ATAAACATTCTTAGAAGATAAAAATATAATATTTTTATTACCCATTACTTCACTAAGTGG + 15215 AAGTAACTGCGTATATCAAATTAAATACATTCGTACACTGAATGATTCCATAAATATAAT + 15275 TTGCATTATCACAAAAAATGTAGGTAATTTGAAACTTCCAAAATAATTAAGATTAGCCCC + 15335 ACCATATATTAACATCATTTTAAATACAACATGATTATAAAAAATAATTAAACTTTCTAA + 15395 TTTCAAAATTATCTCTATTTTAAATAAATTCGTATCTTTTCGTAGAAAAATATATCGTAA + 15455 CCGCAT +; G-orf621 <== start + 15461 ATCTACTATATGACAGCATCTAACTCAATACCTTAATAAATTAAAACTTTCTTTACAACT + 15521 AATAATATCTTTTTTTAAAACCTACCATTCTACGATAGTGCTTCAATTAACATTTTTAGC + 15581 TATTCTATCTGAAAAAAATTACAATAATTAACAAAAATCTTACCTAATCACAAATAATTT + 15641 TGCAAATTAAATATAACATTGGTATATACATAACAATAAAACTAACTTTTATCAATTTTT + 15701 TATACACGTCAACTTAACTGTAAACTATCCAATCGAAAACAGTACTTATCTTAATAAAAA + 15761 ATTTATAATATACTATTCTTATACATTACCATTATTTTAAGTATATTTGCTTGATTTAAG + 15821 GTATTATAATTATCTTCATA +; G-cox1 <== end +; G-cox1-E12 <== end + 15841 TTATCTACAATAAGAAAATAAATCAAGCTCGTTAACAATTTTTTTAATTTCAGACTTTAC + 15901 TCCTGGAATTCGTCTTGGCATTCCGGCTAAACCTAAAGCATGCATTGGAAAAAAAGTTAT + 15961 ATTTACACCAAAAAAAAATGTTCAAAAATGTATTTTACCTAATCTTTCAGGATATTTATA + 16021 TCCACTAATTTTACCAATTCATAAATAAAATCCGGCAAATA +; G-cox1-E12 <== start +; G-cox1-I11 <== end + 16062 ctccacaaagaatagaatttttaaaaacaaatccttctaaaaaactgcacatacaactta + 16122 attttgtatacagcttatttttataactaataaggcactatattatccattaaaaaatat + 16182 aaaataaatttaattaagtactttacttaacacttttattttattaagtttatcgattta + 16242 tactaaatttataattttaaaactaaccttttttctatttacctgtgcttatattaatta + 16302 ataaaaatatattaaaataataactacatatattcaataatctttcctttttaaaaaagt + 16362 ataacctaacgtattaatacaactatgttactaataaaaagctcctgtaatcctattgaa + 16422 ttaatttttttgtttactacaaataaaaatatgaataaaaatgaatttatatatttctag + 16482 attataatattataatatgtttacttataatttcaaagaatttactttatatagttatta + 16542 ttcaaaaaacaaagcaaatcttcaatacatactaaaatatactgaaatacattaaaattc + 16602 ttaaacgtaaaatttttacctttttcatatttaattgaattctatgattttacgctaaaa + 16662 tatatacatataaaccttataaaaaaattatcatatatgcacacgtccatctcttataaa + 16722 ttttttattaacttgttaaacatagatacatatattcaaaaattctactattcaaatact + 16782 tttttcaaaatcattattaacatccaattgattttaggt +; G-cox1-I11 <== start /group=II(derived) ;; mfannot: splice boundaries uncertain +; G-cox1-E11 <== end + 16821 ACAAAAACATAGCCCCCATAGATAAATAATACTTCGAATGCTGAA +; G-cox1-E11 <== start +; G-cox1-I10 <== end + 16866 aagcaagctgtattccacacataacaagcgatttttcaacggcattatgcgttctgatga + 16926 aacaaagcaatatttttatcacaatagtataatatcatctaaacaaataat +; G-cox1-I10-orf671 <== end + 16977 ttaacttttattaaaattaactttcaataatttttgtaatgaaaatccatcatatttacc + 17037 tgaacatattcatatataatgaattttacataaaaataattgtttaacaaaaaaaagtga + 17097 attctttcaaagtatctctgcagtcttatcaactttttcttcccgcgaagaaaaatatca + 17157 ttcagtagcttctaataaacaattcggaatacaacaacgaagtttctctactgaagtttc + 17217 taatgaaaaatcagacaaaattggaaaaaatactgaatctggagaaaaattagttaaaca + 17277 ccttttgtaaaaatttgttctctcagttggtatatataaaagaattactttacgggaatc + 17337 tttagaacatcgaactgttagatcaataccaaatttcttaaatgcccagtttgcagactg + 17397 ttttttataccgatgtgctaatgttaacgcagcacttcgttttaatgcatgtcaaaattt + 17457 gaaaaaaatctttcgagcaaaaataaaataataattttcaatattttgtataataagatt + 17517 ataccgatagacaacctctcaatctgaagcaaaggcaagtcatttatcttgacatttccc + 17577 aacaaatttaatacgtttgcctaaacgattaatcctaaaaaatccgaattcaacatactg + 17637 tttaaataatttcattattggaatgttaaattgtaattgaaacttagaaaattcttgaaa + 17697 tatatttatattagtactaatacaaaattgataatttttatgtagataataattcaaaaa + 17757 aaaaatctttttttcagaatattttataaaaatattaaatttcaaatcaatacctaaaga + 17817 acaacttatataattagataaacacaccaacactgcatgcataagttcttttttaccagc + 17877 aatacctaataaaatacaatttaaattccgcacataatataatttattatgataccataa + 17937 atattccttatttaataaatttaatgaatttttcattgttaaatatttgtgaattcgaaa + 17997 ttcgacaaacttatcaagctcccgaaaaaaaatatcataaaataacaagtttaacataaa + 18057 atcttgcaaatgtaatacactattataatagtaatcaccatcatataaattttcaaaaaa + 18117 aatataaccacaattccaaaatttattaattaacgaaactactcaataatcatttaaatg + 18177 atgactaataacgcttaaaaaaaaagtacaatttttaaaatcaaatatttgaataaattc + 18237 actcttgataaaccaagttacccccttccacttatctttaatatgttgcaaaaataaatg + 18297 attcttagtagtctcaaaaaacattttaaaagaaaatggcttaaacactacttctaaaag + 18357 tattttcaatgcctgctgaattaacttatctcgcataggtattaaactaaacaattttat + 18417 actaccataaacgttacttccaaaaaatcgcttaattggatgcggattataacattttga + 18477 ctctaattcttctgaaagctttacaatttgctctaaggtaagatttatcgaaaataactt + 18537 aactttcttgctattataataggatttcaaaaaataattacaataacaaattaacaaata + 18597 acgtggatcacataatattctatatataaaaaattgagtatcaacaatatttttactatt + 18657 aacgccagatccaatgtgaattaaaaaatgatctaaatctttaaaaaagattgtcaattc + 18717 tttaagagaaaaaatactttgaattttcttatcaataactacgcgcttaaacttattttt + 18777 gtaaaattttaacaaataccatgcttcactaatttgtttcatctgagatatttcacaaac + 18837 ttgatcgtaaattatatatcaaacatttatacttactaatacatccgctactgctatctc + 18897 gcttctaccaagtgtagaaaattcatatgtacacttgacatgtcttaactgcttagctca + 18957 agctaaaaaactatttaggtaacttattctaaacat +; G-cox1-I10-orf671 <== start + 18993 cctctttgaaaactattaacttttaaataaaattcgattaaaaacatttgtttattcgaa + 19053 taaaacattgtaatcagtgtacactccatatattatttattttagaataattaataacgc + 19113 aaccatataatcatattttaaaaagctcttataggtttaacctactaaaaaatatttact + 19173 aagaaaaatgttcatcttagattagcctttcaagcgttaatcgcccaacgtaatgaaaat + 19233 gtgctacaacataattgggatagtcaattttgctaagctatgtgtactcaactacattta + 19293 ctcccaatgaacatagcaaactaattacttagtactatgctctattatccgttttctata + 19353 tcctatttttcataaaaaaatgacttcataataaaacaaccaataatattaaaagcatag + 19413 tataatatttaaattaacaatgcacttagcctatttttatgaaaaatttaacttacataa + 19473 gatttactacgacagtaagtacttttataaaataaaataacgcattactaatttcaaaaa + 19533 ttcttaaaattaatctttaattcaattttctaaaaagaaattgtattaactaccctcaaa + 19593 ttactttgaactactaaatataaccagaaacgaaattacttcatataaaaatataacaat + 19653 ttcgacaaaacaatatacttcaatcaattatataatttatatttgctatatattaataaa + 19713 ataatgtttagaaatctctcacttaaattaatttaaaacataaaaattaaaccgttcata + 19773 ctttttctaatccatattattaaattaattatacatacttatctaataattagcgtatat + 19833 ttaatcttttctttctatcaatcagttttagaaaacacaaagttattggaattag +; G-cox1-I10 <== start /group=II ;; mfannot: splice boundaries uncertain +; G-cox1-E10 <== end + 19888 CGAAACCATAAGTATCATGAAGTGCAATATCAATTCCCGAATTTGCTAAAATAACCCCCG + 19948 TTAGTCCTCCAATAGTAAATAAAAATAAAAATCCAAAAGTAAATAAAACAGACGTATTAA + 20008 ATTGTATAACACCTCCTCACATAGTAACTAATCAACTAAA +; G-cox1-E10 <== start +; G-cox1-I9 <== end + 20048 ttagattggataaaaccataaattttcgccttttggttttaaactctattgaactttacg + 20108 caatacatcctatactataaagctcacatatataatttaaaacagcttactattaattta + 20168 ttaaaaaaacttttaataccctaagaacatatatgaatatttaagcattcaaaaatattt + 20228 agtaatttttttaaaatctaattaaataatatattgaacataaaaaaaatttagtaaaac + 20288 actaagataaattttttagtcatcttatatcataagatatgaattaaaaataataaaaag + 20348 cgaaatataaacaacgtactcaacatagccttagttatacatacttaaaatatttataaa + 20408 attaaatacttatattcaatattattcatttcccaaataaaagcccatcagtatatcaca + 20468 taattgcatcttacaataagatacttcctatttttagaaaataaattttttaacttattt + 20528 cttataaagaaatttcaaacaaaaacacataacataaattttcactattatgtaaactaa + 20588 taatagacagtaaacactactaccactctctatttttccttttgttactttaccaatctt + 20648 ttttcaaaaatatcttaccctaaatttttttctaaaaatacaatccattaacaccacatt + 20708 ctatcactacccactaatccaatctacaataaatttaaactaaatttctttgcatattaa + 20768 acgaaaataaatgatctaaattaaataaattattaataatgacatactgaacgtactact + 20828 ccca +; G-cox1-I9 <== start ;; mfannot: no intron type identified +; G-cox1-E9 <== end + 20832 AACTTTGATACCTGTAGGAACCGCAATAAT +; G-cox1-E9 <== start +; G-cox1-I8 <== end + 20862 aaggatgatcgatataaattttatctaccccttccgaaccttccaagctaattactcagc + 20922 ataaggctctgtaatgaaaacaactcgacttacgacttgttgataaaaagctaaaaaaaa + 20982 taatttctttactatatacaatatattgagaatataaaataaaattatccaaaataaata + 21042 caataggaacatcttacctacatgttaaccaataacgtaaaataaacatattaaaacaaa + 21102 tcttaagcctataaggacattaaacatatttaatatattttaactaa +; G-cox1-I8-orf385 <== end + 21149 ctaaactaaaataggtttatttaatgtaaaaaacaataacaatcccgtttggtaatatga + 21209 gatttctaaaaaaaatattggtacattactagtatgttgtatcgacgcaaaccgtcttaa + 21269 ttttaaaaacatattaagccaaatacaataacaatcagtagctattttagaatctgactt + 21329 aacacgtaaatgccaaggtaatccataaggagaacagtttgcttgatttttcaaaaaatt + 21389 acgtataacccaagtagagcgccgtaaaccctctaatcgaaacttctgaattaaatactt + 21449 tttaaatactttatatattagtttgtcaaaaagttttaatttatttgaaatacctactct + 21509 cacaaaataactaaaaatcttttgaataataacattaacttttaatattactatttttgt + 21569 agctaaaaccaaaagacttttaatagtttgaatcaaacactgttttaataaactaaaaac + 21629 ttgtcaagtagggtatatagacactattctcgaagaataaaaacaatctgaaattaaaaa + 21689 attcttacttaccgctccaaatttaataaaaaatataaagcctaaatagaaacaataaat + 21749 tttacttaacctctgaaaaaaataaggctgcactctaatatatatcaaaccaacttgata + 21809 aaacaataactgaagttttgaccaaaaggccataatatttatgttatctgtaacaagtaa + 21869 taaaatactcccattacaataaaataattctataggaacataactataatctatttttcc + 21929 taaaaaataacatcttttataaacccccctaattttcttatcattaaatttaatacagca + 21989 cttaaccaaaaaatcattaaatatccatcaaattacaacattttgcactaacaaccaaac + 22049 acgcaatcaaatttctaaatttatatcgcaagaaaactgtagtttgcattggctagtgta + 22109 caaactaatccactgctttaatagatatctcaaatacaaaggaatgtgaaaaaaacatct + 22169 attaattaaatctgaatttaatttctctgcgtatttaaataatttgattttaaaaaaagt + 22229 taaacgtttattacccatcatttccttcaatgttctaaaagcacacgtagcatttcgacc + 22289 ttttcgatttgaatacat +; G-cox1-I8-orf385 <== start + 22307 attccgtgtaaactttgcttcataatagggttcaattaaataacaaaaaataacctgtac + 22367 tacattatctggaaagtttctttctacataacctattcatttttttttatttcaattaat + 22427 tcaaaatcattttcgaaataaacgaaatgtgcttaataaaatacaataatttgattcttt + 22487 aataccattaatttttccagtaaattttctaattctagaaaaaatatttcctatgtctgt + 22547 taccgccaaataataaatacaaatattaataataaatttatttattaatacaaacgaact + 22607 agcatcttggttattagatgaaagacgacaaatttctcgttgtaatttatacaatatccc + 22667 caaaataaatttatgaaattttatacccaattgccaaaccccgcactcaaggcacttaac + 22727 taattgatttgtatagaatttaaactgtgataaaaaaaaccgttctattttttggcctaa + 22787 atgcacctccctaaaaatcaaattttcataaaaaatctacaattatttctcttaacccta + 22847 catttataaatacccttaaatctagttaaaagcattcaatctgaataagtatataaacct + 22907 acttataaaatttacctaaaatctaaaaatcaaatactcatagtatacatatttaattta + 22967 agatatttttgctgttttacttaatttgaatttactgtttaatcaacacattgaatcctt + 23027 aataaactatattaatttttagacaaaaattatataatattttatcacacaaaaagcaat + 23087 attctaacagtacaccaatatcacaaaatattttaagtgcatcaaatcaatcaaaaattt + 23147 gaaatttatataatttaatccaaaattaaaacaatttttccaagaattataaatagaaaa + 23207 attatgtatttaatttatttctaataattaacttagaaataaccaccaacatcgcaaaaa + 23267 aagtatctttaataatctatcat +; G-cox1-I8 <== start /group=II ;; mfannot: splice boundaries uncertain +; G-cox1-E8 <== end + 23290 CATAGTTGCTGCTGTAAAATAAGCACGAGTATCTACATCTAAACCTACTGTATACATATG +; G-cox1-E8 <== start +; G-cox1-I7 <== end + 23350 gttaagatcgaggtacataacctctccctcttgaactgtgcatgccagttagccagcaca + 23410 cagctcacaataaaaaaaaatctttttaaacgattactaattaaacaaggtttaattacc + 23470 tttaattattttgtaattttttcactaaaataatacataatatagcacaaatattaatta + 23530 gttaagttataaagtaaatctaataatatttttaaaatcttccaattacaaaaattttct + 23590 cttaacgctctgatattatttttcctaaaaaaaaaataaattcaaaatcctaaattccat + 23650 tttacaatatccatataccacatttctaaagtttaaatagaacattaactgacttctaat + 23710 tagaatcgctatatgtaatttgtactatataaaaagcaaaatccaacggatcctcttaca + 23770 aactcacttgcataaaccaaagattcgcattagccacataaaactaatcatttggtacta + 23830 gtatatgcaaaatcgttttacaaacatttcagagaataccactactttcctctgcttctt + 23890 cttactctatcgctggataactactcaattagctgctaatttatacactcacaatagttt + 23950 attttttttacaaaactatcttcataaattaacaaaaattttaataaaaaatttaattaa + 24010 ttctaataaaattccagtcgaac +; G-cox1-I7 <== start /group=II +; G-cox1-E7 <== end + 24033 ATGCGCTCATACAATAAATCCTAAAAAACCAATACAAAGCATA +; G-cox1-E7 <== start +; G-cox1-I6 <== end + 24076 attgttataggtaataaatcaaattctatgattcataaaccccaactcagatccgtacac + 24136 gcaaatctctaagcatacggctctttaaatctaaaatagcaaattttctatctttcattt + 24196 tcttttaacaataccaaattaataaagcattagatatttacaacatatactaaattaaca + 24256 tttcttctttaatcaacttatagaaacaatctttttatcttttatacgcatattaattaa + 24316 caactgacactttaaaaattctcttatataagatatcaaaagcacattaaaaataaaaaa + 24376 atctcataaatt +; G-cox1-I6-orf676 <== end + 24388 ttatcgtcgcatcaaaaattttttatacagttttcaagaataccaatccaaacttctacg + 24448 aacttcacgttttccaattaacggtaaaaaataataaaaccaattactaagcaacaatca + 24508 aagttttgttttcaatatacttattgaaaatctaccacttgataacacattagaatataa + 24568 gtaacgtattttacatcgtagtgtaacaataccactgtttataggatataagctaaaatt + 24628 accgcaacataaaaacatacttaatataccccaatttactatagtcggaatacgtaccca + 24688 tttaaataagtgtcaataaaaaattcagatataaaaattaaaacaattcctatatgaata + 24748 ctctcatttaaaaatatctaattgagactctaacaacgtaaaattacgttttcaaaaata + 24808 taaacttatcctaaactttaaatttctaatctctgctaacgtacgcaagttagttataat + 24868 aagcctattcccatactgtatatgaaaaattttcttaaaataatcctggcacaaacagta + 24928 attaaatcaattataggaaaaatgcgacaattctattcgtcaattttttattctcctgta + 24988 attacatatatatataaaatgaaaattcaataaagaataatatcaaccaacctccctaat + 25048 acttattagtaaataatttatgaacacaaaacctaaaatctttattgaaaaagcgcatac + 25108 ccaattagtttttaatttttcatcaacacccagaatataattcacacctaacattcatcg + 25168 aaaatttttaaaatttgatacctttctaaaataatattgtaattttttcgagatatttat + 25228 gcaatttgtaaaccaactaaaatcagcacaactaaaataatttttaaaattaatatcaaa + 25288 tatataaaatttctgaaaaaacccaatacaactttgattatcaatattattatattctga + 25348 taaagatttaattttttgcaaaaaaataatttcgccactttcacttaaaccataattaaa + 25408 tcataaactatactgctgaatatcatagctaattttaaaaattgtaggaaatcattgaca + 25468 attttgtcgaagtgagaatccggaaaaaaacacattatttaatacttctaataagggctc + 25528 aatgttaaattgaactaatttttgcaataacttctcaaatttagaaaattggtaaaaaat + 25588 ttgaaactttttatatttatctataaaaaaataaactaattttaaattctttaagtattg + 25648 acaaaatatagaattagtataaatactattaaataatccacatttatgtgccgtattcaa + 25708 acgcatcaaaaaaattctgtgatgcatcaaaaattttattggtcctttaaaaaaaattca + 25768 ttttttttcaaatgaatctaacattcattttaactttaaattatcataatttaattcatt + 25828 aaaaaaccccaataacatcagcgtgtaccgagccttagttgaaaatattttaaacctaac + 25888 gcgtcaatttacacaaaacttcaaatatctttcatgaatgctattaatattttttataaa + 25948 agaatcttttgaaatttgctcaattatgtaaattttccaaataaatgaattgatattaga + 26008 ttgtattcgtaccaatcaccctttccaaaaaaaattaggtaaatctaataaaacaagaca + 26068 aaatcgccataattgacagcgcttgagaaaaaaaactgaatctataataaatttagaaaa + 26128 aatatctgaaaataatataataattctttgaaataaaaatctaaaattcatatcaatttg + 26188 ctgaacaatccgctgtgcacaccacgcgtaaatcataggtcatatttcttttttaaccaa + 26248 tcaaaatacttctaaaattaattttgattcttctaaaaaacattgatttttaaaagtatt + 26308 tacaaaaatatttttaaaaattgaatgatgatatcttacttgaaaacagctattatatag + 26368 atttcaaaatttattaactttatctaaccgacttttaatcgactgtgccat +; G-cox1-I6-orf676 <== start + 26419 cttattctaaaatacgttttccttttatctttacatgtctaatctcgttacttttaaata + 26479 aacaatacgctatttatattttcttttattttatacattcataaaatttaatctattttc + 26539 taatatattttagcgttcacggaaatatgtgcgaaataactactcatgcaaataaaactt + 26599 ttggttaaatagtatttaaattatttctaaaaattaaccattataaccctaaattttgtt + 26659 tttatattcaaataaaattaaattgaactgctactatttttagagttgtataaacaccac + 26719 ac +; G-cox1-I6 <== start /group=II +; G-cox1-E6 <== end + 26721 GCATAAACCATACCTAAAAAACCAAAAATACGTTTTTTTGAAAATAATTCTATAGTTTGA + 26781 CTTATTGTACCAAATGCTGGTAAAATTAAAATATATACTTCTGG +; G-cox1-E6 <== start +; G-cox1-I5 <== end + 26825 agtatgatagattacaatatttactaatgtagccccataccgaactgcacaagcaattta + 26885 cactgcaaacagctcttaacaaacaaattataactttt +; G-cox1-I5-orf550 <== end + 26923 ttataaaaaaaatcttttattcaaactataaataataaaatgccaagcaaatcaaataga + 26983 atggctaaataaactaaaagtatacgaaccaaatcaattattcgcttttaaaaaaaaact + 27043 cctaaatatcttacacattttgcataaaaattgaattagtactttacgaaattctataca + 27103 aactttcttattaactaaaatacaactcgatacagaaaaatctatatatatcacaaattt + 27163 taaccttaaacttctcaaaaaactccttactatttgaacatagcgttgaaaaacaaatcc + 27223 taaaaaatatatactaatgtttttataccataccaaccactcaatagttcgaatcactac + 27283 agttaacccacgcgccttagaaaacactttaaaacgttcttgtataaaacttaaacaact + 27343 tttcttcctaataacaacaataaaaacatctttataacgaaccaacaaccataacctctc + 27403 ttctaactgcttgcgtaaatataatgttgaatttaaacaatttcttccttttttttgacc + 27463 gttgagtctctcaatttcaacattaaaaatatcccttaacccatctaatataaaatttac + 27523 taaagaaactccaattctgcttttggaaaaaaatcgatttcaaatataacctcctgtttt + 27583 caaccagaatgacagatttcttaatcaatatattataatatttttaaaaaaatagggcat + 27643 tggaaaattaattcgaatccaattacagccctttgtatcaaaaaaatttacaaaataccc + 27703 gcataaaatatgtttaacttctaataagttagtagaaaacctaaataacctcttttgggt + 27763 acaaatcgtattttctgacaaaatcgtatacaagtgtagaagagcacgcggcgcattccg + 27823 tcctacacaataaccataattatcaaaatcagcatgtacatcaactactggttctaaaag + 27883 ctgcataaacaattcttgcacaattttatcgtaaataaaaaattttactgagaatttact + 27943 aatccgattataatttaactgcttagagtagaaagcaactaaatttttctttgtaatttt + 28003 cgtaaaccaagtgaatttatgcgttgaggcttttagatacagttttggaaccaaatattg + 28063 aaacgacttacttacaatttcaatcgcagctaaacatacatctggctttaaaactcaatc + 28123 cattatcagggattgaactaagattgatcgcataccatgtttataagaaagtaatgatat + 28183 gtatttttgacgtaattttactaattctaaaatttccatgctatagctacgcatgggtca + 28243 taattttaaacttaacatagtaactaattttggtattaattttttacttttattcataat + 28303 tctgaacctgtattgtagtacaatacctcttatactatgaattaaatagaaacgtttaat + 28363 tctttgctcattataatgacaaatgccaatatgccatttaatatttcatctatcaacata + 28423 tttgctaacagcaacgcttcgatattttccactagaaaaccacagcttaccgtagactac + 28483 aatatgcagtagaaacgtactctttctgaataagctgcttataataaactttgagtaaaa + 28543 cataataccatcgtgtgatccaccctttaacat +; G-cox1-I5-orf550 <== start + 28576 atttacctaattcggaaaaagcttcctttacatatactttggcactaaagtatttgacta + 28636 attacctttttaaaagaaacaaatctaaatgataaatattatataatattaccaaccatt + 28696 catttaaattcgatgtataaaaatgtgtcttccagcagatttttgctctttctaattaca + 28756 aaatgataaagggaatatccactaaaatatattcaaaatgttacgtcgccc +; G-cox1-I5 <== start /group=II +; G-cox1-E5 <== end + 28807 ATGACCAAAAAACCAAAAAAGATGCTGAAATAATACAGGATCACCACCACC +; G-cox1-E5 <== start +; G-cox1-I4 <== end + 28858 atttaaaatagaatctttttactaaaattctttaaaaaaaactgtacttgttacttatta + 28918 acatacagcttaactaaataaatataatttactcatattgaaatacttgctcttaaaatt + 28978 atacaccaccagctaaaagaggattcttttacaagcatcacaattatcttcttttcatag + 29038 aattcttttaaaaaaataattaacaatcattattaaaaaaatttcatgaaattactgtat + 29098 aaatttttaaacacactctcagatactactacaagaaaatctaaattaaattaattaata + 29158 tgaaacgcaaaatttatccaacaataataaatttttcccatattgaaaaatcaaatacaa + 29218 tttttatttattaaaatctctaatcttcattaagtacattactaattataaacaaattac + 29278 aattcaactaactgatatcgtcatctttttcttttttcacacaacataacgactaaatac + 29338 tacaattttaattaaaaaactaatctaaaaattctaaaaccaactaagattataatttta + 29398 atgataaaataacacatcacac +; G-cox1-I4 <== start /group=II(derived) +; G-cox1-E4 <== end + 29420 TGCCGGATCA +; G-cox1-E4 <== start +; G-cox1-I3 <== end + 29430 ttcttgatagaatttattttatttatataaataccccaattaaaacttattaagctaatc + 29490 tcttagcaataagctctttaaattttttcttaaaaaatttccgtaaatacataaattttc + 29550 taacgtatatacaacttttataatctctaaaaaaaaattatttttacccgtttttataag + 29610 taaaacattttattgctcaagtaaacaaaatcttaatttatttcgctttaatcgctaaca + 29670 cactttgtttactactaacaataaaaccgccattattcccatacttaatcttcacacttg + 29730 taaataagcgaaactaaatctaaaaattttgttattctgcgttgtttaacgttttattta + 29790 cttaaaagataatattatatagctctgaattttttctcaatatcaactaatatatacact + 29850 atatctatttaaattaaacataatcaaaaatttacttctactcaaaaaatacataatttc + 29910 aactaaaaacaacactataatccacatttaaacactaactgcttcgatgtcataaaaaaa + 29970 atttttactttaaaaaagaaaattgaatcggcgcac +; G-cox1-I3 <== start ;; mfannot: no intron type identified +; G-cox1-E3 <== end + 30006 AGAAACGTTGTATTAAAATTTCTATCAGTTAAAAGA +; G-cox1-E3 <== start +; G-cox1-I2 <== end + 30042 taatagttcgatcagttattctttaaattaaccagcgctatttcacacagaaacaagcta + 30102 atttcttagcatatctgcgttccgataattctattgagaaatgttttctaaacaaatgaa + 30162 aacctaaaaaatctataatataaaatttatacacaactgctaattgtttaaaagtattat + 30222 taaattttaatgaatctaaacacacacgcataccaaactcacaaactaaagtaacctaaa + 30282 tcttaatatctaaattttttataattatt +; G-cox1-I2-orf580 <== end + 30311 ttatctatctttaaattctgagacaaaatccataattaaatttgtaccaaaacgaacata + 30371 aacctttttagcagattttagcttataccaatgcgctatcgtcaacgctaaacatctctt + 30431 caaaagataaaaaatttcatataaaataaccgtattactcgtaattttgtaataaagttt + 30491 aatagctatccataatctaccaaaccatcttgtaatagcgtcaatagaacctaaagctaa + 30551 taatttatcacaacgccgagcaacatattttatatgatttgttttgcgcgcaatttgaaa + 30611 aaaacctaattttgtataatacttatataactgagataaaggtactttaaaaaaaatacc + 30671 accagcacaaacttttttaaccaaaccaggcgcagtatttaaaagaattccattttttac + 30731 taagttatatcccaaaaaatgagtacctatcccactacaacaataaattccagttttagc + 30791 agtacatatttgtaaaaaaagtttcgtttcaataaaaaaaacaatttgttgcaatatcaa + 30851 taaagcttcctgttcagagcctataaaatataaaagaatttcgtttgaataacgataata + 30911 atgtaaactactacgcgaataaattcccttagtatgaattctcgtcaacttataaaaata + 30971 tttaacaaacttccttttaatcaatctagaccaaaaacaatttattttaaagaacatgcc + 31031 tcttcaaaaaaatgcagacacaatatctgaaatttcaaatctaccagtcaaacttctatt + 31091 aaattgaggaattaagctgctataaatcattatatctaattcatgtaaacaaatattcaa + 31151 aataaaaaaagaaaaaatatgttttaaaaaaaactttttacaatattttacagtataatt + 31211 attcgaaaacactacaaaattatccttgaaaacttgtattattaattgaattaaagaata + 31271 ttcacaaagtttactatagagaatacaaaataaagattgaacagtaaaaatactatctga + 31331 cattccaacaatattcaagttaattgatcaaattggagatttagttgttgagcgaatacg + 31391 tgataaacaagaaaacacattacgtctataccgaaaaccaaaagaaacattcaaaaatct + 31451 atactcataaatgggccctaataataaaattatagcctgttgaataattacctctgaaag + 31511 atgattaaattcaaacttatcactgtaaaaatttcgtgaattaacaaataacctcttagc + 31571 acaacaatatgtacctaaccgaatactttctgctaagtaaacaatcccacctaaagtagc + 31631 tttcatcggaaaatttgaaatacctcgaattctaatccccctataaacataaatcaaaaa + 31691 attaggatctatgagtaatttaaataaaccactacttttaacagacaaacagcgtttgca + 31751 attaagaacaaaagcattgtattcatataaaatgccaattaacctctcctctgaaaaatg + 31811 ctttcggattttagcagaaactctatttaatttaggtaaaatttcactatataaacgaaa + 31871 accttcccaatataattgatttatctgaggaacttccctcttcgaacaataaaccgttgc + 31931 aataaccatatgctttaaaactcctatgtttttaaacggatgccgataatttagtactcc + 31991 aacctcgcttttacattttaaagaaaattttctttgaactagcattaacttaccaaaatg + 32051 cat +; G-cox1-I2-orf580 <== start + 32054 atcatttcttcttcaagtatattttattttatcataaaataaatgattttagactttatg + 32114 cgattcataaatactacatacatgtatcattccaactacaacaatccagagacacaatgc + 32174 gacacactgatgttgaatttaatataatattaatcaaaaatttataataaaaatatattg + 32234 tgaaattaaaactgaaccgtcccaaatacctcaacgtacccttaaccatttaaaaataac + 32294 tctaaatgcttttttaaacgaaaatcaaaccaatgccacttcgtataaaatttagagcat + 32354 tctaataactcacaagtattaataagtatactttaatagaaattcgcccc +; G-cox1-I2 <== start ;; mfannot: no intron type identified +; G-cox1-E2 <== end + 32404 ATTGTAATTCCTCCTGCAAAAACTGGAAGAGATAATAATAATAAAAATGCAGTGATGAAT + 32464 ACTGATCAAACAAATAAAGGAAGGCGTTGTCAATTCATACCTAACAACCTCATATTAACT + 32524 ATCGTAGTTATAAAATTAATTGCACCTAAAATTGATGAAATTCCTGATAAATGTAAACTA + 32584 AAAATAGCCATATCTACAGACGGTCCTGAGTGCGATTGTTCTGCAGATAATGGAGGATAA + 32644 ACCGTTCACCCGGTACCAGCACCTACTTCCACTAAAGATGAACCCAATAATAACAAAAGA + 32704 GACGGAGGTAGTAATCAAAAGCTTACGTTAT +; G-cox1-E2 <== start +; G-cox1-I1 <== end + 32735 aaaaagtaaaacgttggatagaaaagcccctttttctattttatttaattctaagcccaa + 32795 cagagttctcataaaactttacgtatcaatcccttattataaagcttttttatatatcaa + 32855 atttttaaaattgtacctaactttatattatgttaaataatataagaaccaatacacaat + 32915 attcaaacagatatacattttttagtttctaccaactggtatcatactatgtataaacat + 32975 tctaacaataatactttaattaagtaaaattcttattcgttgcggtattccaataacttt + 33035 taccatttaccttttatctaaaaaaattattaaactgttgtcttaaaatttccaaaatat + 33095 ttcctcacctgaaaatattataaattaaattattgattaaacaaataaaagtattacgta + 33155 aaatcaaaactgcaaaacacagttttaattacctaaatataccccgtaaagtaactaaat + 33215 ttttagattcaacgctactattcaacttttcaacaaaattagataacaaaatatattaaa + 33275 taaactgaacatataaaatacacatatattacagaatgcattcgcaaccacct +; G-cox1-I1 <== start ;; mfannot: no intron type identified +; G-cox1-E1 <== end + 33328 TTAATCGAGGAAATGCCAT +; G-cox1-E1 <== start +; G-cox1 <== start + 33347 ATCAGGAGCACCAATATAGATAGGAACAAATCAATTTCCATTTGAGATAGAGCATAT +;; mfannot: G-cox1 <== start Def by similarity + 33404 TTAACCAGTTAAAAC +;; mfannot: + 33419 cccctcaaagaaccatatttgcaaagcaatccacacatggctc +;; mfannot: /group=II + 33462 AATATTATACATATCGCTAATGCACAATACTATCTCGAATTTTACAATAAAAAATTAATC + 33522 CGATTATCTACTTCCTTGCGATACAAAATTAAGTAAACAATATTGACTTCCTACTTTTAT + 33582 GCGCTATTAAAAAAATAAATATATTTAACTAAAAACCTTACTATTCTTAACAACAACATA + 33642 AATTATAACCAGTACTTACAATTACATACTTTCACCGCAAATTTACCTTAAAAATATGTA + 33702 AATTTTTTAATTTACTTCTAAATTATAGATATTATTAGCCCCTTTCAGTCTAATTTAAAC + 33762 ATATTTTAACTAAATTCTTAGCAAATCAAAATAGCTTTAAATTTAATCTGCTAAATACTT + 33822 ATTTCTTATTTGTCTCACTATGCATAAAGATTAATTTTTAAAAATATATTTGTATTAAAC + 33882 TAAAATATCTATTTACTTTATCCTTAGCTGCTTAACGTAATTAACCTGAATAATAGATTA + 33942 ATACTAAAAAATACACTAAACAAACACTAAAATACATATCTTTTTGTAACATAAAAAGCC + 34002 ACCAATTAAAACTGGCATAAGCATAAAAAATATTTGAGTCAAATATTTATAGCTTGAATT + 34062 TGCTCATAGAATCAAACATGATAATTGCATACCATTTAACTCCACATTTTTACTTTAAGA + 34122 AGAATTTTAATTTAATAAAAAATACTAAATATTCCTTAAAATTGAAAATTTTTATTTTAA + 34182 TCTACAACAATCATGTATAAATTATGAATGCGCGATAAATCAATCCATTTTTACCCTATT + 34242 TTAAGCGATTTTATCACCGCGTTTTGCCAGAGAATAAAACTCTATCTTATATACTATTAT + 34302 TTACACAAACTCTTCTAACACCATTAAATCAATACATACTAATAATAAATAGAAAAGTTA + 34362 GTTCCAACGCCCCCTGCCAATTTATCCCATATAACTATAAATAAACTAACCAAAACCGAA + 34422 CTATATATTACAATAATATACAAAATTAAATTAGTTTTATTTTTGAAAACAATGTATCAG + 34482 GTTTGTAATTCTAAACGTAAAAACTTTTTAAAAATCTTTAGCACTTGTTATCTCTTTTCT + 34542 AGCTTTTTCTATGATATAATAAATCCTACAAGTTTTAACTTAATACAAGTCATATCTATT + 34602 ATTAATTAAATTAAAATAGTATATAAATAAAAATAAACTTACTCATAATAAAAGCATGTG + 34662 CCGTAACAACAACGTTATAAAATTGATGATTTCCTAATAAAATCTGGTTTCCTGGGTAAG + 34722 CCAATTCAGCTCGTATTAAAATTGATAACGTTGTACCAATAACACCAGAAAATGCCCCAA + 34782 ACAATAAATATAAAGGTCAGGTAGGTACATATATTTTTT +;; mfannot: + 34821 ccctcctattaatctgtacatgcgttacaacgcatacagctt +;; mfannot: /group=II + 34863 AGTTTAATTTAGAATTTATCAACAGTATAATATAAATAATGATAAAATAATAAATATTAA + 34923 AATCATTACCTTTTACAAAAAATTCAGATCAGATTTCCCTTTATTCAAAAAACACTATCT + 34983 TGAAACACCCAATTATATTTTTCATGAAAATTTTTATATTAAATTCTTGATACCATACAA + 35043 AAAGTTATTAATAAAAATTCAATTTTTCAATTTGGAACCCTTGATTTATTATAAAACCTA + 35103 TATAAAAAAATTAACTAAATTAAAATTTCATATCAAAATTTTTTCATACGGTTTAACATT + 35163 AGTATCCTTATAAATATAATTACAGATACTCCTAAAAAATACTACAAACTATTAAAATTT + 35223 TCTTTAAAAATTAAAAAAATACACAACTGAAAAACACAGTAATCGACACACTTCCAAATT + 35283 GGATAGAATTGGCAACTAAGCAA +;; mfannot: + 35306 atcccccaagtaaactgtacatatgaattaacttcacatacagctt +;; mfannot: /group=II(derived) + 35352 AATTCAGGTTAAAAAAAATTATTATTACCACTGTCCATTTAAAAAAATGCTATATAATAA + 35412 AGAAGTTACAGTACAATTA +; G-orf526 <== end + 35431 TTACATTAACCACTGCAAATATGCTCTAATATTTGAATGAAATGCATGGATTCTACGAAA + 35491 TTTAACAGGAAGAAAAAAACTATAAATTGATAAACAATTTGTATCTACAATAGGCGACCA + 35551 CAAATACACAATCTCAGTATTGTAATTTAAATTACTTTGTAATTTATTTTTAATTAAACC + 35611 TTTAAAAACTCATTTGCGCTTAGAAAATTTAAATGTAGTTTGTAAAAAATATTGATACGC + 35671 TATCATATTTTTTCCCCACTTCGGATGTTTTCTGCGAGCCCAATTTCAACATAATTTAAA + 35731 TAAATAATTATCTAACTTCAAACGATATCAAAATGAATATCCAAAAGAATAATATTGGCA + 35791 TCACCGCATAATTAAAGGATTAACCTGCGTTATTAATTCAAAGGCTGTTTTATGTGTTTG + 35851 ATAATAAAAAATATCATGCAATTGCCTACAAATAACTACAAATTTACTAAACGTTGGAAA + 35911 TAAAATAAAAAAAAACATATTTACATTTTTACAAGTATAACCAAAATCATACCCCAAAAA + 35971 CGATAAATTCTCATTTTTTAATGAAAATAATCTTAAAATATACTGTGTTGCATGTACTCC + 36031 CCTAAATTGTAAAAAATTAACTATAAATGATCTTAAATTTAAAACTTGTAACCACCATAA + 36091 ATTCCCAATTATAATAAATTCCCCAGCATATCTAATAAACTGAAAAGTATCAATAACCTG + 36151 GCTTAAATTACAATGCTTATTTAAATTACTACTAATAGATAAAGATTTTAAATAAAAAAA + 36211 CCGTCCTCACCGCTTTAATTTTTTCTCCAAATTATTTAAAATAAAATTAATAACAGTATT + 36271 GGTTAAAATTCCATTTACAAAAACCCCACCTTCTGCTGAAGAGGTAGTTAAGGCTTTTCG + 36331 GCGCAATAGGCCGGAACATAATCAATTATGCAATAACGGAACACATCTAATAGGTACCGG + 36391 TAAATATTTTAATATTCACGTAGAAACAGAAAAAGTAAAAAAATTCAAAACATTACATTT + 36451 TAAGATTCCTACTTCTTTTTTAAATTTTGATTGGATAGCAACATAAACATCTGAAATAGC + 36511 TTGCTGCTGTGAACGATGTCTTCGGAATCCGTAATTATTATAATCAGAAATCGATTCCAC + 36571 AATAGGCTCAAGTATTAAATTAAATAAACTTTGAGCTGCACGTTCTTCTAAAGTAACTAC + 36631 ATATGAAACACAAGTTTTTTTTTTGCTAAACTTAGAAAAAATTTTATATTTTACTAATTC + 36691 AAATTTCAAATCTGAAAAAGAATTTAATTTAGCTACTAACTTAAGCTTATCTATATTTCG + 36751 ACAGACAAAAGATTTCCTATTTTGAACTAATTTAATATCTTTACTCTCTACAACACGACG + 36811 AACTGCTACTAACTTAAACACCAAAGACGAAAGTAAATAATTTTGATATTGTTGTACTAC + 36871 TACATGACGTGATCCATATAAAACTGTTAACTTGGCTAAATTCATCTGCCTTAAATAAAC + 36931 AAGTCGCTCAATTCGACCTCAATATTTAGGCCAACTAAAAATTGAATTGTTGTAAATCAA + 36991 AAATCAATTTCATCCTAACAT +; G-orf526 <== start + 37012 ACTTAAATTTAATTTTGCTAAATACGACAAATTACTTAATTATTAATATTTTAAAAGCAT + 37072 AATTTATACTTTATTAGATTACAAAGTATATTATATTTTGTAAAAAAAAATAACTAAAAC + 37132 TTATATTAACTAGTAAATCATAACTGTTACAAAACCCGAAAATCTGAAAAAATCATTTTT + 37192 ACATCAAATTATAACATTTTTTCAAAAATCTTTCTATGCATAAAACAACCTACATTACAC + 37252 CTGTATTTTATCCTCTCTCTTCAAAAATAAAGTATTAAACTATAAAAGTCTAAAATAAAA + 37312 TCCAAGTTAATTACAAAACGCCCATGCATACTTTCTATGATTAGAAAAAAATTAACAATT + 37372 ATTTAAAATAATCGTTACATAAAATACGAATTTTTCTATAATTTACAAACATAATTAAAA + 37432 AAAACGATATCCAATATTTTTTATTTAAAAAAACAAATTACCGCCATATATTACTGCTAT + 37492 TAAAAAAATTCAAATATTAATCGCATTCACATCTAAAAATAAAAATAACTTAATAACAGT + 37552 ACTAGCATAATTACATTTTAATTATAATAGTACAAAATAATCTTCTCGCATATACATTAA + 37612 AATATGCTACAATAAATACACTTCTAACCTCTATAATACAATATCCTTATGATTAGTTGA + 37672 GAAAAATCACCGCATTAATGAAAAATTGCTTGGAATCAAAAACATTAAAAAAAAAATTCA + 37732 AATAAATAAATAACTATAAACTTATTAACCCAATTCAAAAATAACATAACGA +; G-nad4L <== end + 37784 CTACCCACGTAATCCATAAATAAAATCAAAATCAATATTTTGATGTTTTTTATAAAATAT + 37844 AACAAGAATAGCTAATCCAATAGCAGATTCTGAAGCAGCAACCGTTAAAATCAACAAAGA + 37904 AAAAACCTGCCCTTTTAAATCATCCATAAAAATAGAAAAAAAAATAAAATTTAAACTTGC + 37964 CCCCAATAATAAAATTTCAACTGCCATAATTAATATTATCACATTTTTTCTATTAAGAAC + 38024 TATACCCCATAAACCAATTGCAAACATAAATATTGAAAAAACTAAACACTGAAATGAAAT + 38084 TAACAT +; G-nad4L <== start + 38090 GCTTACGTATTTTTATAAACTAAACATAATATTAGGTAGACCATTATACAAGATCAAAAA + 38150 ACTAGAAACAATTATAATCAAACTAATAGATACTGGGAGAAACGACTTTCAACCTAACTG + 38210 CATTAATTGAGGACAAGTAAGAAAATTT +;; mfannot: + 38238 tcttctttaagaaactgtacatgataattacttatcatacagctt +;; mfannot: /group=II(derived) + 38283 CACTCACATACTTCCTAAAATAAAAAAATAAATTAAAAAAAAATAATAACACTAAAACTA + 38343 ATCTTTTTTTAAAAATATATTCCAATTTAAATCAAAAAAACAACACAAAAGAACAATTCT + 38403 TTTAATGGATTGATTTAATATAACATCTGCTCATTTATTAAATTTAAAATATTTCGCTAA + 38463 AATAGACAACTAACCAATACAATTAAAATAAACATAATTGTATACAAAAATAAAGTTCCT + 38523 AATGATTTGAAAAAAAAATTTTGTTAACAAATTAAACTTTCATAATTTTTAAAGAATTAT + 38583 CACTATATAACATAAAAAATAATAAAAAAACTATTATCAAAATAAAGTAT +; G-orf504 <== end + 38633 TTAAATAAATATTCCATTAACACATTTATAATAAGTTTCTAAATTTCAATTTTTTCTATA + 38693 AAATATAAGTATATCCATTATAAAAAAACTTTTATAAACTATAATAAATTTTTTTATAAA + 38753 ATTAAAAAAATACTTAAGTAACAAAATATCAAAAATCTTATTTTTATACAAAAAAATATT + 38813 ATATGCTAGTAAACAAATATCTAAATTTTGTAAATATCCATATCTCACAAATTTTTTAAT + 38873 TGCTTTATTAAAAACTATCTCTTTCAATAAAGCAATTAAAAAAAACTTACCCACTTTATA + 38933 TATAATAATTCCAAACAATTTAACAAAACACCTAAATACTGCACTTCTCATAAAATCTAT + 38993 ACATGTTTTTTTGCAAAAAATATAATTGAAACAAAATTTACTAAAAACTTTAAATAAACC + 39053 CCCTTTCCGTAACACAGAAATATTCTGGTATTTGATTAATTTTATTTTCTGATTAAAATT + 39113 CAATACCTGCTGTTTTAAAAAAAAAATACGATCAATAAAAGCGTTAATTAAAGTATCCCT + 39173 TATATTTTTAGAATTTAATTGAAAAACACCCAATAAAAAAGAAAAAATTATTTTCTTAAT + 39233 TTTCAAAGCAAACTCCTTATCACCATAAATTCCAATTAAACAATTTTGAGTATTTCGAAT + 39293 ATACTTAACACTAATAAATTCATGGCTATTTATCAAATTGTATTTAACCTTACCAAAACA + 39353 CTTCGCATAAAAGCATTTTCGCTTTAGTTTATCTATAAAATCATCTAAAATTAATAAAAA + 39413 AAAAGATCTAAATAAATCAGTTGATCTTACATTTAAATGGCAGTTATTTTCAAACTCTAA + 39473 GACTAATTTTTCAGGAAAAATTTCCCTGTTACGCAATTGAACAGTAAATACTCTTCTCCA + 39533 GCTATACATAATATAAGGAAATAAAACTAACAATTCATCTTGTAAATATTTATTTACCCA + 39593 ATCCATTAAAACATTATAATTAAAAAATTTTAAAATTTTTTGAAAATTAAATTTTATATA + 39653 TCAGCGTGAAGCAACTCAGTGAAATTTCAAAGCTTTATAGAAAAATTGATTTCCTACCGT + 39713 CTGGACAATAAAACGGGTTTTTAAAAATAATTGCTTTGTTCAAATTTCCATTAAAATTAT + 39773 ATAAAGACTTTGTTCTATAAACGTATAAATTAAATACAATTCAATTACCTTACAACCTCC + 39833 CATCTTTCATTTAATAGATCTAAATCACCATAAAAATCTGCAATATAATTGATTTCCTAA + 39893 TTTCAAAAAAGTTTCTATTTTTAAACCAAAATAATATTTGTAATACTTATTTTTTACATT + 39953 TCTATATTTCTGGTTGTATACAAACCACAAAAATGTTGGTTGCATTAATAAACCGGATAA + 40013 TAACAACTTAAATTTACCATTTTTCTTTTCTAGGTTAGATAACGTAAAAAAAGGCTCATA + 40073 CTTTCCTAAACAAAAAAAATCCATTTCCCCATATAACACATATAATCAATTTTTAATAAA + 40133 TCGAAAAAAATACAT +; G-orf504 <== start + 40148 ATCTTTACTACTTACACATGTATTTACTTCAACAAAAATACACAAGCACTTTTATTACAA + 40208 TTTCTGTGAGTCTTAGATCTTCTAAAATATATTAATTTTACTTAAACAAAAAATGTATAT + 40268 CAAAACTATCTTCTGAATTCCCTTTTCTTGTTTTTGTTATTATTAAAATCCAAAAATCCT + 40328 CCTTTATTCTATTTACACCTAAATACATTCAAAGTATGTAAAAGAATTTCAAGAAATTAT + 40388 ACTTAACGTAATATATTCACTTATAGTATCCTAAAAATTTTTATTAATAAAACTTTTAAC + 40448 TTCTACTTATATCTATAGCTAACACCTTTATAAAATATTATTTTTTACAATTATATTATC + 40508 ACCTTTCCAAGTCATGATCAAACCATACTTCAAATTCTTTAATTAATAAAAAAAAACTAT + 40568 TTAAAATCAATAAAATTTACATTAAATAGTCTAAACTAAATAACTTCTTATACTAAATTA + 40628 TGAAAATTCTTAAATTTTAGCTTAATATCAATACTCAATATTGATACCTAAAAAATTTTT + 40688 TAAGTCTATAAATAAAAAATAAACTATACGCAATGTCTTCTATCTCGCACTCATAACGTA + 40748 ACCTAGGGTAGGTTGCACGTACTCAAACAAAACAAAATGAAAAAACGGAAATTTTTATAC + 40808 CAAACCACACCAATAATTCAACTGTCATATTAAAAACGGAAAACCATCCTCCACAAAAAA + 40868 ATAAAGAAAGCAATACACACATAACAAGATATTAGTTCGAAACGCTAAAATGTAATTTTA + 40928 ACGTGCGCTAATTAACACATCACATGCGAGTTTCCAAGCATTATGCGTTTCGCTGTTTTA + 40988 CTAAAACTAGAAAAAAATTTGCATTAAAAATTGCGTTAATACGTATTAATAATGCAATTT + 41048 TTTTATTTCTTAAAAATACTTCTAATTAATAATGCAATAATACATTCTACAAAAAATACA + 41108 TCTATAGTATATACACATAATATACTATTCAATTGATGGAATGGATGCAAAATATTTTGA + 41168 TACAGCAAAAATTTTTCGATTTACTAACTACTTAATTTTAGTTTTAGATATCTACGTATA + 41228 CTGCACTAAACCGTAACATCCATAATTATACTATAATTTTACTTTGTAGATATTTTAATA + 41288 AATTAATATAAAAATACATACGCCATAGGTTTATTTACAACAACACACTCAAATAAATTC + 41348 GTATAATCAACTCTTCTTTAATTATCTGCAACAAATTACTAAATTGAATAATCGCTAAAT + 41408 TAAAATATACATATGCACCTTTCAATATATATGCAACTATAATTAAATACAATTAATATA + 41468 TGTTATCATACACCAACCGTTAAGAATTAACTAAATTAAGCATTTATACCTATGATTAGT + 41528 GAAAAGCTCTAAATATTTAAATAATTTTTAAATACTAAACATTAACCAAAATATTAGCTG + 41588 CCACTGCTAATACAATTATTGATAAAATACCTATTCGCCCCATATTTGAATACTCACCTA + 41648 AAAAAAATAAAGCAAACGCCATCGCTGAATATTCAACAAAATAACCCGCTACTAATTGGT + 41708 AGGCTAAAATCAATTTTAATTCCTGATTTTCCCGTAAAACATGACGAACAATTTTCACTG + 41768 TATCCTGCTTCAATACTATCTACTTTATCTTCGCTGCAATAACTAAATAGATTTTATTTG + 41828 AAATTTTTTTAAAATAAAAATTATATTTGACAAAAAATACCTTTTATATATATTTATTTT + 41888 TAATTTAATTCTTTAAAAAAAAAGAAAATAACAATAAAAAAAATTTAATCTAGTTGCGAA + 41948 TTTAAATAAATACTTATCGGTATATATATTTAGTAGTAACCATTAAAAAACATCATTAAA + 42008 ATCTAAATCAGCCTATAATATAGACTTATCTTATATACCATCCCTGCGATATTACATACT + 42068 CTATATACCAAAACATTTCAAAAACAATTGAATATTCTCTTTATAAATAAAAAAAATAAC + 42128 TAATCGCCAAACCTATCGTAAAATATATATATTGTATGTTATCCACTTAGCCTATTATTA + 42188 ATATTTATAATCTCATTTTAGTATTATAATACAGTATATAAGTTATTTTTGAAAATCTCA + 42248 ATTTCTAATTTTTTTATATATATATTAGAATAAGCAGCCTACAAAAAATACTTAAATTAT + 42308 AATATTACAAAATTAGCCAAATATGCTAATCACCTTAAATACGCATTATATATACTTAAA + 42368 TTTTAATTTTTAAAATTAAAAACATCATCTCTGAAAATTTGTTTTCTATCAGTACTTCAT + 42428 AAAAAATGAAAATGAATTAAACAATTATAAATATTTATTAGCGCAATACCCCTGCTTCAG + 42488 CCTCTGCTAAATCATTCGAGATGGAATTTAAAATCCCCCCAAAGAACCACAAATATTAAC + 42548 CACTTAATATGCAGCTCAGCTTGAAAACTCTCCGTTGAATTAAATAGAAAAACTTAAATA + 42608 ATTGTAACACTGCCTCCTATCATCATTTTCCTTTTTCTACTAAAAATTATTTTTATAACT + 42668 TAATATCTATTTTTTTTCTCCACAGACCAATTTTCCAAGCATAAACAAAAAAAAATAATA + 42728 AATATATAAATATTTTTTTTACTTAAATCAGAATCTTATATACCTAATTTATTATTAACA + 42788 TAAAATTATATACACTATTACCTAAACTCAATACAAAACTAAATCTTTTTAGAAGCTATA + 42848 TTCTATTTACTTCATTATATATATAAAAACTTCCTTCTTAAGTTTTTTAACTATTTTCTA + 42908 TAAAAAATTTATATAAACCCAAAGTTGAAAACATATATTTTTTTATAAATACCTATACAA + 42968 GTACAGACTAGCCTTCACCCCCCTGTGCACACAAAAAACAACACTTTCATTTTAAGCTAA + 43028 CTCTAATTTAACCGTAATTCATTAAAATATCTTTCCATAATATAAATATTTCCTATAAAA + 43088 ATGTAAAACATTACATTTACATTTTCAACTATCTACAAAAAACTAATATAAATTTTAATC + 43148 TAAAATATACATAAATAATAATACTATAGCATTAAAAAATTCATAAATAATTAATATTAG + 43208 CCTAGTATAACAAAAATTGATAATTAACAACAAATATCAATTTGCTAAAAACTGCACGTT + 43268 TCACTTTCCAATAAACTTCAGA +; G-nad1 <== end + 43290 TTATGAAATGACTATAATCGTAGATAAATCAATATACCGTAAAAAAGGAGCTCGATTTGT + 43350 TTCCGCTAACGCAGAAAAAAAAAATAAAAAAAATTGTGGTCATAAATACCAACAATGTCA + 43410 AAAAAAAAAATTATTCTGATGTAATATCAACTGATACAAATTAGCCGAGCCTACACAAAT + 43470 TAAAACCGAAATTATAATAAAACCAATTGATACTTCATAAGAAATCATCTGTGCAGCCGC + 43530 TCGTAACGAACCTAAAAACGCATATTTAGAATTACTTGACCAACCAGCAAAAATTATACC + 43590 GTAAACACCAAATGATGATATTGCAAGAATAAATAAAACACCAGTTTCTACATCCACCAA + 43650 AGAACCATAATTTGTATATGGTATTAATGATCAACTTGCTAAACTAACAACAAATGTCAA + 43710 CATTGGTGCTAGATTAAATAAAAATCCAGTTGCATTGGTTGGTACAACAAGCTCTTTTAC + 43770 CAATAATTTTAAACCATCGGCTAAAGGCTGAAGCAATCCCCAAATACCAACAACATTAGG + 43830 CCCACGTCTACGCTGCATGCTAGCCATCACTTTACGATCTAACAATGTAAAATACGCAAC + 43890 AGCTATTAAAACACAAACTACTATTAATAAACTATAAATAACAATATAAATAAAATAACT + 43950 AAACAT +; G-nad1 <== start + 43956 CTGTATGTTATACCTCTAATACACAAAATTTATAATCTAAACTGAAACGTTTCAGAACAA + 44016 AATTCAGCATCCTCTATTAATGGGAATATAACTAAGAAAATAAAAAAATACAAACTAGTC + 44076 GCTATTTGACCAATAATCATATAAGGAGTACACTAGAAATCTAATATTT +;; mfannot: + 44125 ccgtgcctaaaactgtagaaacttatcactaagaatacagctc +;; mfannot: /group=II + 44168 CAAAGAAATTTCAATAAAAAATTATTCTGAATTAAAATAATAAATTGTATAAGTTAAAAA + 44228 AACAAATTATAACAGTCAAAAAAACATTAATTTACGTAAAATAAATTTTTAAAATAACAT + 44288 TAAAAAAATATCAATTATTATATAATAATAAACAGATTTGAAATTTCTAATAAAACTATT + 44348 ATTTTTTATTTTTAAAAAACAATTTAAATTTAAAAAAATCTATTACATTTTTAATCTTAT + 44408 TGGTTTCCAAGAATATACATATTCGGATATAATTTAATCCAAAAAATTCATAACCTACTT + 44468 AAACAATTTACTTTTAAAGTATGCATATAAATTAAAAATTAAGTAACCACCACACCTATA + 44528 TGTAAGGTTATTTTTGTAATTATCCAGTTATTCTCTAAAAAGGCAAATATTTATACTTTT + 44588 TAATCTATATAA +; G-cob <== end +; G-cob-E7 <== end + 44600 CTAAACAAAATCGTTATTGCCACTATATATTCAATTACTAAATTCCAATTTATACTCATA + 44660 CTCAACAGGTTTCCCTCCAATTCAACCTAAAATAAAAAAAACTCCAATTAAAATTCAATA + 44720 CATATTTTTATAAGAAGACTTAAACAGTGCACTACGTACTGAAGAAGTACTATAAAACGG + 44780 TAAAAATAATCAAACTAAAATAGATCCTAGCATAGCTATAACTCCTAACAATTTGT +; G-cob-E7 <== start +; G-cob-I6 <== end + 44836 agtgcacactagactaattctaataatccgtgcctcgaaccggataaatatcttacaaaa + 44896 aatccggctcccaagaacaataaataaaaataataattcttatgcaaatttacttcggtt + 44956 tgcgtataatcatcaaaaaattaactccaccttaacttttaaaaagataattttttaaac + 45016 aatcttaattaaaacttataatttcataaaaaacctgtatattctgctatatcattatac + 45076 caaaatcttttaacaatcaatagactttataattaatacctatgttcccattaaattcct + 45136 tttaattttttttgttagctaataaatttaaaaaacaagctaaatatttcccataacctt + 45196 tacttaactacataaaattaaattttcttcattctaactgaattacggcaaaaaatctac + 45256 aatttcaaaaacatttttaatagttagtagatattatgtatttatacatttctaccaaaa + 45316 atagtccatttcatgcatacctctctaagcttcccactcacttgaaaaattaaactaatt + 45376 ctaataattaaactaataaaaattatcatcaattatctactaattaaaccatatacacaa + 45436 aatttatgtttacccctacattatcttcctattttaatatcaacattgtgtattttattt + 45496 cttccgtatcaaaatcagacaacactc +; G-cob-I6 <== start ;; mfannot: no intron type identified +; G-cob-E6 <== end + 45523 CAGGAATTGAACGCAAAATCGCATAAAAAGGCAAAAA +; G-cob-E6 <== start +; G-cob-I5 <== end + 45560 ctaccagtaagtattattaattttcaactaaaaattcctctggttagaactgtacaagct + 45620 tttcgcaaagcatacagctcttcgtagatttctcctcttaaataaaaaactataatacaa + 45680 aataaacatatcgttttacatgtttttaaaaacaataaatatattcatcaacaacactaa + 45740 agtatccaacgaactctatatttaatgttatactctactgtattaaaagattataaacat + 45800 gcaacctaaattcctttcacaattttttctttacctgataaccactttacaatactaaat + 45860 agcaaatatacaattataataacgtctaattttcaattgattaacaattaattttctaat + 45920 atttaaataaaattaaaacaaaatactgacatatattcattacttaatttatttatataa + 45980 agcaaata +; G-cob-I5-orf353 <== end + 45988 ttaattatatttcctcctaactttaatccttagtttacctaaacaattaacataataaat + 46048 caatacgcaaccgcttcacgtctcaattaaaaaattataattaaattgcacaaaataaaa + 46108 atttacttctttactaaaatttgttaaactaacatattttgttctaaaacataaacctaa + 46168 caatttatagaaataaactaaacccattaaaccgtaatcaattaaacaaaaatcatcaaa + 46228 atttgaatatacattttgcacaaaaaaatttctatttatagatataacttttatgttatt + 46288 aaatcaattttttatacttctatttaaaactaaaaagaaattacaatataaaattaagaa + 46348 agcaccgtctgtactaaaaactatatataaattattaacaccaaatatccaataaaagca + 46408 taaacttttaattcaacaataaaaacagataaactcaaacaaccaactaaaaaaaataat + 46468 cctaacattttttcgcgttaaaaattctgaagtaaatttaaaaaatctagctaaatgtga + 46528 actttttattttcaaaatatttcaaaaaaatttacaaattagcaaattaatatagacact + 46588 aatccctcttaacccataattatcaatcaataaaacactgactaaattttcctctgaaaa + 46648 aaaaaaaatcttaagtttccttaacaaatacgtgcaagaagacacaaaaaatttattact + 46708 tacaaatttatcaaaaaaattgtagtaataaataatttgatctataacatagaggcgatt + 46768 tctaatatcaccgaaaaaatctctactcttatctggtcataaacaattttgcaaattaaa + 46828 acaaataataaattttcatctaagctcaacaaaacgaataactgctaaacgtaacagtat + 46888 actccacattgatcatcgaccacaatacaaatcgcgcaaaataaagtaaattaatttaat + 46948 atacaatctatcaaaaaacacctttaaaaattttctactaaagaaaacaacattattttt + 47008 tacacaagtaaatcgttttctatctcgcaaattattttccat +; G-cob-I5-orf353 <== start + 47050 acctttttatttttaaaataaaaattataaaataattagacccacttttaaatacttgtt + 47110 ctaaaatacatattatcactatatctattaaaaattttaatacttacgtatatttatact + 47170 tacatattcatttcgatattttatttatttatatctattaaaaatatacgattaattttt + 47230 gtttcataaaaaatattaaaactaaaaaattttgatatccaattttaaaacattatcatt + 47290 aatttagtgataataaaaattttttattaggaatttaatttatttaaaatttaaaatcga + 47350 taaacaaccctaaaacaaataaaaattttaaattctgaaccttcagctatactgctatca + 47410 tatttataaaagtacgcttatgtaactgtaactaaaaattactaaatcaagtccacaaac + 47470 acaaatttaagcactccattc +; G-cob-I5 <== start /group=II ;; mfannot: splice boundaries uncertain +; G-cob-E5 <== end + 47491 ATACCACTCAGGAACTATATGCGTCGGCGTAACCATCGCATTAGCTT +; G-cob-E5 <== start +; G-cob-I4 <== end + 47538 cattttactagacttgttttaaaatagtctgcaaataaaactgtaaaaacttattaacta + 47598 agaatacagctctaaaaaaatacaaataaaggttaaaaaataaaacttttaattgacatt + 47658 taacccatgatatgataatcaactctcaaattcatatttcaagaaaattttttttagaaa + 47718 ataaaaaataatccagaatgtatatacgaaataaaataacctccctgtctgtatcttccg + 47778 ctttttcataaattttgaaaaagctctcttctatccttaattctaaaaattgatttaact + 47838 ttacaattaaatcccatttgaaaacccgttgttatattaaaatcttttaaactatagcta + 47898 catacacaattataactctaaaacaatattattttctgattaacactaataaatttctaa + 47958 aaatttcatacaaacatactgcatatgaaattaatcaaagctaattaaagctattttttt + 48018 ttcataattttaaaagattatttctatactttaattctctaaatttaataaaaaatatcg + 48078 ttctaccaatttacccccccctattaataattttttttaagtatatccaggttactaagt + 48138 caatagaatatttc +; G-cob-I4 <== start ;; mfannot: no intron type identified +; G-cob-E4 <== end + 48152 CTATGTAATTATCAGAATGCCCTAACACATTCGGAATAAAAAATACAAGTAACGACGCAC + 48212 CGATAAACAATAAAATCAAACTATAAATATCCTTAATATAAGAATACGGATAAAACGGTA + 48272 AATTTTCAACCCTTCAATCTACTCCCAAAGGACTACTAGAGCCTACTAAATGTAAAAGAT + 48332 ATAGATGCACTAATGCTATCGCAGCAATAATAAATGGAATAAGATAGTGTATAGCAAAAA + 48392 ATCTATTTAGAGTCGCA +; G-cob-E4 <== start +; G-cob-I3 <== end + 48409 aacgagtaaagatttatattaaatcctcctcaataaacagtacatggtaattattcacca + 48469 tactgctttaaaataaaaatacattagatattattattcgtacctttaataaactataac + 48529 caattaatctatgacaataaacttgacaaattttaataacaccatctataaaaaataata + 48589 ttctaaattatgtagagttattgataaattcttctatctatatttttactcgtttatttt + 48649 tatactttacgggagtaataccaataacgcaacgcttaatttaaataaaatctttgaatt + 48709 tatactttatagattcacttattcatccatatcataataaatcatgaaaattttttacaa + 48769 aatttaactcataaaagcaacttacttttaaaactagtatttaaggatttgtatatcact + 48829 aatagtatatcattaaaaaatatagaacttatattaaaattttaccaaatttatcgattc + 48889 ttccaatttatatgacagataatgtagtctaattttggatactaaattttaataactatt + 48949 tttttcaattataaaacttaaatttaattcattttttcttttatactaacacaaaacaaa + 49009 ttctattttatcaatccacaattttaaatttaattatttatcctaaaaattatactattc + 49069 aactttaaccttttcctattgattaaatcaatgctttaagttaactcaaaactgcatata + 49129 tcaaacagtatttcacaccaattcaataaaaaataaatattcccacac +; G-cob-I3 <== start /group=II +; G-cob-E3 <== end + 49177 TTATCAACACTAAAACCACCTCAAAGCCGATAAACTATTGAATTACCTATACCGGGTATC + 49237 GCAGACGCTAAATTCGTTATAACGGTTGCTCCTCAAAATGACATCTGACCCCAAGGAAGA + 49297 ACATATCCTAAAAAAGCTGCAGCCATTGTTAATAAAAAAATGATAACCCCTGAACATCAA + 49357 AGCCATTGCTTTGGATAGGAATAAGAACCATAATATAACCCTTTACCAATATGAATATAT + 49417 AACATAATAAAAAAAATTGACGCACCATTCGCATGAATGTACCGCAACAACCATCCATAA + 49477 TTAA +; G-cob-E3 <== start +; G-cob-I2 <== end + 49481 gacaagtcaataatctactattgctctcgaagctgtacatacacatttacgcgtatacag + 49541 ctttcataagcgttaaataaaattcctaaaataacaaaaat +; G-cob-I2-orf750 <== end + 49582 ttacaaacaatttatactatataaaatatacctaaaacttaatttccaaacaaagaatcc + 49642 aaatttaaaaaatttcctatcaagattttgtatataacaagttgacgggaattgaataac + 49702 tacattaccatcttcattatacacacaaggatcaagcccataatatttacgaacccaatt + 49762 acaattctgacgatgcttaaccgcaagagtcaatacgcaacttttacgtaaaaaaccaac + 49822 tatacgttttacttctagaaaattatcaacacaccgatatttactcattaaacaataagt + 49882 caaaattaaaaatcgcttcaaaatctctgaatctggcattaaaatgtaatagcgattgaa + 49942 tactcctttacaagtaattggatgaacaaattccattttcttaagtttatctaaaatatc + 50002 atctactggagctaataaaactatgcgtttcgcagaaaattgcgaagtcagcgcagtatt + 50062 atttattttatttctgaatcttcaaatactcaaatgtttttgttttttaatagttggcgt + 50122 caacttttgtatcatatggttttttacttcaagctctaacgaaattattttagcaataaa + 50182 atcaaactttgattgctgtgcctgactaattatgtgcctaatgcctgtgtttaaatttcg + 50242 cttaaaaaaaagtgaaataaatttagaattttctacaccacgccgcaatttagttagcat + 50302 taataaaacattttttgcctcttcacgaataaaatattgtttccagtttactaaatttag + 50362 attcataggcgaaagtttttccccctcacctctaaaaacagataaataatttcctaccaa + 50422 gcgtataaaaaatttctttacaatagtatttaccgtaatcaaccgatgtctcacatttga + 50482 agagctaagccctaattgtaaaatattgcgccataaaatttttatcaataaacgttgaat + 50542 actgcttattacattatgagttaaatcaaaatctttaaaaatcccattagagtctatttg + 50602 agccctaacaggtggcaaagcggaagaaggcccaatcgcacatacttttttagcgtcttc + 50662 tgctttaaccatacaaattttataccctaaaaattcaataaaccctttactacaacatgc + 50722 tagtttatttactcttacaattatgcataaatcactttttaaaaagtgatttatgcagtt + 50782 ctgaataaacataatgaactctttagatccaacactgccaatcaaaatattatctaaaca + 50842 ccgaacatactgaaaataattatgcgtcctccgccgtgttgcacaaaaataaaacataaa + 50902 ataggattttaaaaaccttctagaaattttatgaatttttttgaaaaacctatgtttgtt + 50962 attattagctattagttttatttgatacacattcaataatttaaatttttttaatttaat + 51022 tcttccataaaactgtgtaatcgccttaacaaatgaatctaaatgagacaaataaaaatt + 51082 aagcaaaaaaatcaatattaaactattagaacacatcaaaaaactttcattttgcttcaa + 51142 acaacaagaaacctcactctttactattttttctatttctcttcatatacgataatccaa + 51202 tatatacatttttagcagatttgctaaatgacctaaattaacactattaattataaggtg + 51262 tgcattgatatttaaaaatcaacttgtatgcaaacttcatccttttacgcatcgtaaaat + 51322 aatttgaggcgataaactaattcaattagataataaattaaatcctaacatcgaatttaa + 51382 taacttagaaacgcgatatgaatctcaatctaatgcatctcgattatttttttttaatct + 51442 aaaaaaattaaaaggaaaaattaacttttccatacctaatttattaaaaatagttacaat + 51502 ccctaataaaatagctacctcaattattttaacttttcaatcaaaaccttgatattgttt + 51562 atacgaatttactaccttccattgcttttttaagtagctatagtttccaattaatagagc + 51622 cttgctcgtttttgcaaaccatacaacgggtattcgatctaaagaaagtttttgataacc + 51682 aaaattacctcctgcattcaacaacactaaccgcaactgacatcaggctgtaactaagtt + 51742 ctcactagaaattatatattcatataaactattattattaatttttttttcgcttatggc + 51802 cgactcatttcctttaaaaacactacaaaccat +; G-cob-I2-orf750 <== start + 51835 aaagttttcacaagcctcccccgtaataccaaaaaaaaatacaattatctcaaatctttt + 51895 tagtccttacttgtgtacatatttctgaattcaacgccaataactgatttattactaaag + 51955 tgatatgataataaattctcttaaaatattcaaaaaatttaatcctaatacaattaatca + 52015 tctttacgcataggtcctatttaaaactatactaattgaaattatttctctttgacacct + 52075 cagtactccaacttcaaacttaaccactaaatcatacaactatcacagtaataaaacatt + 52135 atttctcttcgagaaatttttaacaaataatttttttaaataaattcattattcctcgga + 52195 tataatgctgaaaaccaaacgatacccacac +; G-cob-I2 <== start /group=II(derived) +; G-cob-E2 <== end + 52226 CATCACGCATAATATGCTCAACACTACTAAACGCCAATGCTATGTTTGGCGTATAATGCA + 52286 TTGTTAAAAAAAGCCCCGACAATAATTGTATTACTAAACACATCCCAGCTAATGAC +; G-cob-E2 <== start +; G-cob-I1 <== end + 52342 cgtataagatagaatttataagttccctttaaagaactacacatttaatttaaactattt + 52402 gtagctcaatttattaatttaaataaaatttattaattttttattgtaattcttttgcta + 52462 tatttcttcaaacctacactaaaaaattattatactaaatactataaaattaataaataa + 52522 caccgaatttaatttataattaacttaatttaaaatctatattatatagattttaccatg + 52582 atataataatgtagcacacgttaatttataattttttcgggaatcctttcacaattcaat + 52642 ttaatgtacattaattaaattataaagtaatttttaaatactctcaatcaaaaatatatt + 52702 tgaaaacaaatttatatatgcttagaaaatataaattctaaaaaattaacgttttatgat + 52762 taattatccaaaaaataatattattttaaaattattcattctaagtaatatttgtataaa + 52822 ttaaacctgatccaaacttttaaaccaaaaaataggattatacattcaaacatatattgc + 52882 aattacaggttaattcacatacatatttaaatctcaataaaaatttctaattgaacttta + 52942 ttaca +; G-cob-I1 <== start ;; mfannot: no intron type identified +; G-cob-E1 <== end + 52947 CCAAAACTTCATAAATATGAAATATTTCCTACAACAGGATAATCAACAATATGATTGTTA + 53007 ATTCAACTTCATGCTTTCAT +; G-cob-E1 <== start +; G-cob <== start ;; mfannot: alternative ATG start pos 53041 + 53027 ATGTAAATACCGCATAAAACCGAATTAATATACATTTAACAATATACCTAAAAAAGTACT + 53087 CAATATGCAATACAGCGTTAATCATAATTCATTAAATAAATATAGTTGAACAAGAAATTT + 53147 TATAATTGAGAACCAACTATCGGTAAAAAATATACAAACAATTAATTATAGTTTTATATT + 53207 TAATCTAATAATAAAAATTTAATTTTTAAGTCATAAAAAACCCATCCCTATACATTAATT + 53267 TTAAAACGAAAATCTACCTACATTTAAAAAAATTAATAGTAAACATAATTTCATTAAAAA + 53327 TTTTTGTATAAAAAACAAAAAATTTAACAACCAATAAATAATTATCTGCAAATTACCAAA + 53387 AAACCTAACTAAATTATAGCAAAAAATTTGATTAACCCTAACTAAATATTTTTACCCAAT + 53447 TCAAAAAAACCTTTTAATAATACTTTTAATATAATGGTTTACTTATATAAAATTCATATT + 53507 AATATTGACCCACCCTGATCCTCCTCTACAATATAGACAATGTCTATATATTTATTATAA + 53567 TTTTATAAAAACCAAGATATATTTTATTTTTAATTTTATCAATTAATGTTTATTTAAAAA + 53627 ATCTACTCTAATAATATTTTTATTAGCATTAAATAATATAAAATAAATTAACTTACTAAT + 53687 AATTCACATAAACATCAAAAAACTTGTTTCTTTTAGACGCACAATCTACAAAATTTAATT + 53747 AAAAAATATGCATAAACACAAAAATTTTAATATATAAAATTAAATTCTATTTTTACTTAT + 53807 GATAATTTTTTCTAGACTACAGCCAGCTTACGTAGATAAACTCATACATAATCTAAACTA + 53867 ATAATAATTTTCCTTTCATACATATCCTTAAACAACAATTATACACCAGTAATAATAAAA + 53927 AATAACAAAAAAAAGCTAACTACTACAATAAACATAAACATATATACTTAGTTAATCTCG + 53987 TAAACGAATAAAAAAATTCAAAAAATCTATTAAATAAAGTAATTAATTATTGACTTTTCT + 54047 CTTAATCTAGTTAACATCACATAAAAACACAGCCAAGGTAAGTATACAATAAATATAAAA + 54107 ATTTACAATAATTTTTAATATTGCTACACAAAATAAACTCTTTTTGACTATTCTTAATAT + 54167 CTCTAGTTACCTGACTAAACTAACTTCTCCTTAATACCTAACTTCAAACTAAATTTATCA + 54227 AACCCCCACACCTATAATCTTCATCATTGCTATAAAACAAAAAAAAATAAACACTAACCA + 54287 AAATTTAAAACTATTTTACAACCAAAAACCTAAAAAGCTAACATATAAAAATTTATATTG + 54347 AAAATGCTTCTAATATATTATTACATTCTTAATAAAAATATTGTTTTTATTAAATTATCT + 54407 ATTATATTTTTACTTGTTTTTTCAATTTATAGTAATTTTTTTAACTTAAAAATAAAAACA + 54467 CCATCTGCCTTTATTAAACATAAAATTCCCTTTAACATCTTAAGTAAAACACATTTACGT + 54527 ACCTAGTACATAAATTTAAACACTTATGTACAAATTCTATATCAATAAACTCTAATATAA + 54587 ATAAATAATTCTTTTAGATATTTCCTAAAACAATAATAACACTATTTTTTTACAACCCAC + 54647 CTAAACCTATTTTAAAATTATGAAAATACGCACTTTAAAATTAATAAACGATTATTTATA + 54707 TATTATAGTAACCATAAAAATACTTACACATATTACAGTATTCAAAATATTTTTATGTTT + 54767 ACATAAAAATATTTTGAATACTGTAAGCATATATATATCACTAACATATTAACATTCTAC + 54827 ACAATTTTTAAACATATATACGTATTTACTCCAAAATCTAAACGGCAATAAATATTACTT + 54887 CTATATATCTACAAATATAGTACAAGTAAACATATATAAAAAAAGCATAAAAACATGCAT + 54947 TCAAAAAAG +; G-rpl5 <== end + 54956 TTACGAAACTGAAACTCGACAAGGAATTTTATAACTGATTAACAAAGAATGAAAAGCCTG + 55016 GTACACAGTTCCAACCGTTCCGTATACGTTTATATTGTAAACGAATGAATCTTCAAATTT + 55076 TAATACTTGCGAAAGAAAATCATCATCTTGTATTCTTTGAACAATTTTAAGCAAAAAATT + 55136 TGGTTTTAAACTATTCAATTTAATAGGATAAAACTGTTGACTCAAAGGAAGTTGCCTTGC + 55196 AAAAAAAAACTCCGATATTCAAATACTATGACGCAACGTTAGCCAAAGCCCACTCAACTT + 55256 CTTTTTCTTCAAGCCACGAATACTATGAGTACGTATTAAAATTTGTGGTTTTTGTCCGGT + 55316 TGTTAAGTATAGCAAAACTAATAACCGGTAAAAATTATTAGTTACCTTAAGATCTGATAA + 55376 AAATTTTGAATAAATTACAATAGAATCCAATTTTGGGCAATTATAAATATTACTTAATAA + 55436 AAATTTATCAAATAAAAAAATTTGAGTAAATATAGATTTATATAATAAAATTTTAGACTC + 55496 AATAGGTCGCAT +; G-rpl5 <== start + 55508 ATAACTAAAATATTTATA +; G-rpl14 <== end + 55526 CTATACTAACTTACTAACAACTGAGGCTAATCTCATAAACAATCCACAGCGAATTTCTTT + 55586 TAACGCAGGTCCAAATACACGCGTACCTAAAAGTTTTTTTGTTTCCGATAAAACAATACC + 55646 TCGTGTCTCATCAAAACGTATACGAATACCATTTTTTCTACTGATATTTCTCTTAACAGT + 55706 TACGATTAAAGCCAAACATCTCTGTTTTTTTTGAACTTTACGACCTACTCGATATCTAAA + 55766 TATTGATCCTAATACCAATTCTCCAACCTTACTATAATTTTGTATCAAAGAATACCCAAA + 55826 TAAATGAAATATTCTAATCAATTTAGCTCCCGAATTATCAACGATTTTTAACTTAGTTTG + 55886 TTTTCTAATCAT +; G-rpl14 <== start + 55898 TTATCAAATAAATACACTTATATTATAAAGCACCCTTCTAAACACCACCAATCTAACAGA + 55958 ACATGATTAAAGAATATTTAACCAGTCAATTCCAACAATAATCCTACTTAACAGCACAAA + 56018 CCTAATCTTTATAATTTTATTAATAAAAATTATATTTTAGTAGCTAAACCATTAAAATCA + 56078 TCCTCACAATTCAACTAATAGTTTTAAAAGCTAAAGAACATGCCGAATACCTAACGCAAG + 56138 GAGATACTTTTTATAAAATTTTATTTACTT +; G-atp8 <== end + 56168 TTATGTAAAAAAAATTTCAGACACACATGCCTGTCTTAATAGCACAACACTATAATTCTT + 56228 GCTATACTTCTGCTCAATAAAATCCCACTGCTTTTGCTTATAATGTTTTCAGTTCAATTC + 56288 TTTATTAAATAATAAAGAATTACCGGTTATATTTAAATTTTTAAAATGTACTAAAAAATT + 56348 ATAAATAAAATTATTTACAAAAACACTGCGTTCATTTAAAACACCACAACCAATAGAATT + 56408 ATAAATCTCACGAAGTTTAAATAACTTACTAAAATCTAACAAATAATACTTTCATAAAAT + 56468 TAAAAAAAAAAAATAATAAAACAAAATTGTTCAAAAAACTTGAGAAAATACGGTTACTTT + 56528 ATCTAATTGTGGCAT +; G-atp8 <== start + 56543 CAATTTACATTAACAAAAATACAACTAGTTTAAGGCAAAGTTTTCTAGTATACGTCACAA + 56603 CCATAATTTTTCAAATACAAAAACATAAATAGAATTTTTTAAAAAATAATGAACCATACA + 56663 TCATCTGATACCCCTAAACCTTTATCTTTTTTAATTTATAATAGTTTTATTTAGCAAAAC + 56723 TAATATATTGAAAATATTTTGCTAACAGCCACGAAATAGTGCATTTAATTTAATCATAAA + 56783 AATACCTTCCTAATATTTTAGAAACAACCTTAACTAGTAAAATTCTATGAATATTTTTAG + 56843 TATTAATTTTAATGGCTGTTCAAAATAACCAACAATATAATCAAAATCATATCTAACAAA + 56903 TAAAAATATTTAAATTTTAATTTGGAATTTGCACTTTAAACTGAAATCAATCTGTACTTC + 56963 ACTTTTATACAATATTTTCTCGTAGTACATAAAACTCCCACACTACTAACCGTTAGCTAT + 57023 AGCATTTAATTACAATCTATAATTTACTCCTATACCGTAGTAACTCTTTCTAATATTAAA + 57083 AAATCAAACTATACCCCAACTTTTTTATTAATGTTTAAATAATCTTTTTTAAAGTAAAAC + 57143 AGTAAAGGCTTTGATAAGTTTATACAATCTTAAAGCTAATTTAAATTATTCAATTTCTAT + 57203 ATAGTATTAAATTAAAAATACTTTTCATAAAATGAATACGAAAATCGTTGCCATAATATG + 57263 TACACGAAAATTATTTATCACAAATTAATAAACCG +; G-orf241 <== end + 57298 TTAGATGTTATTTCTACTAGTAGTAAAAGGCGGTATTAATCCTAAACCATGAAAATTAAT + 57358 CAATTTCAATCGAATTTGTAATAAATTAAAAATTTGTAAAAAATTAACCGTTTTCTGATA + 57418 CTCAGACATCGCTAAAAAAAATATTTTACCAAAATTAACGACCCGAAAATAATCATTTCG + 57478 TAAAGTAAATTTTTTCAAAAAACTTCACTTATAACGATAAAAAAGTTTTTTTACAAAAAA + 57538 TATTACAGGCCTATCTAGAAAACTAAACCAAGAAATTACTTTTGTTAGATATTCTAACAT + 57598 GTAAAATGCCAAAGTAGCATTAATAACCAATGCTAAAATGTTTTCCCTCAAATTAGTAAA + 57658 TACTACATTAAATAAATCAGCCTCAACATCTTCATCCTCTATTTCATTTTTATTTTCCTC + 57718 TAATGGGCGAATAAGTGTTTCTTTTCAAGTACTATTTAAATACATTGATAAAGGTAAAAC + 57778 CCCAACAACACGAACATAAGATGAATAAATTTTAGGAAAAACTACTATATTCAATATACA + 57838 ATAACGCGTAAATAAAAACTTCAAAAAAGAAAAAATATATTTAGTTTTATCAACAAAAAC + 57898 ATCAAAAGTACATATACTTAATTTGCAAGATTCGTATTCCTCTTGATATATCAATTTTCT + 57958 TGATTCAAATTGAAGTTTTCGTTTCTTAATTTTCTTATGCCCTCCAAATCAACTTCGCTT + 58018 TTTCAT +; G-orf241 <== start +; G-rpl16 <== end + 58024 TTATTTACAAATACAATAAACCTGAATTGGTAGTTTTCGTGCAATACTTTTCAACAGTAA + 58084 ACGAGCCTGATTTGATGGCAATCCAGATACTTCACATAGCACAAAACCAGCCTTAACTTT + 58144 ACATACCCAATCGTCTATGTACCCCTTACCTTTTCCCATACGTACCTCAAGCGGCTTAGC + 58204 TGTTATAGCTTGGTGAGGAAATACACGTATTCAATACTGACCAATTCGCTTAGTACTCTT + 58264 AGACAAATTTAACTTAACCATTTCTAACTGCTTAGATGTAATATAACCATTCTTTTTAGC + 58324 TTTTAAACCAAAATTGCCAAAATCCAAATTTAAAAAACGTGTTGCTAAATTTTTAATCTT + 58384 CTTTTTCTGAAATTTTATAAACTTAGTTTTTTTAGGAATAATTCCAACCAT +; G-rpl16 <== start + 58435 TTTCTCAACAAA +; G-orf327 <== end +; G-rps3 <== end + 58447 TTAATATATTCAAATTTTAACTCCTATAATTCCTGCCCGTGTAATAGCTTCAGCAAATCC + 58507 ATATCCCAGTATTAAAGGACTATTTTTAGAAGCAACAGAACCAATTTGTATATGTTTAGT + 58567 ACGAGCACGGCTAAAACCATTTAGTTTACCTGCTAATAAAATCTTTATCCCTTTAAACCT + 58627 AAAATATTTTCAAATCTCCTTCAATACTTTTGACAAAAATGATAAAAAAGATAAATGTTT + 58687 ATATATTTTAGACAAAATTGGAGCAATATAATTTGCAATTAATTTCGCATCCGGAATTTT + 58747 ATATTTTAGCGCATGCACTAAAAATACTAACGTATTCCGATATATTCCAACATCTTTATT + 58807 AAAACGTAAACGGAAGCGCTTGAAACTATCTGAAACAACTCGTAATAACGTTGACAAATT + 58867 TTTTTTTCATTCTTTATAACCTACAACACATATATTTACGAATTTGTAAAATATCTGCCT + 58927 CTTTAGCAAAAACTCTAAAACGTATAAAAACATTATACTACGTAATTGCACTCGGCATTT + 58987 ATGAATTGGCTTCATAACAGTTCCTAGTGTAGCTAAAATCTTTTTTCTATCCTTACGTTT + 59047 TTTAGTTTTCTTACTTTTATATTGAAAATTTTTCTTGCGCTTCTCTTCTTCCCTTATTGG + 59107 ATAAAAAAATAAGAAATTTACGTATAATTTCCCTAAAACTCAAAATAAACGCACCGTACC + 59167 CACTACTCTACGTTTTCGAGCTTTTGTTCAACGTGAAAGAAAAAAATTAACATACTTACG + 59227 AACAAAAATCTCTTCATAAATATGCCTACAATACTCTAAAGATTTCTTAACAAATCAATT + 59287 TGATTTGAATAAAAATTTTTGATAAATAACTTTATGT +; G-rps19 <== end + 59324 TTACCGTTTCGCTTTATATACAT +; G-rps3 <== start + 59347 GCAATTTTCGAGTAAACACAAAACATCCAAATTTATACCCAACCATTCCTGGAGAAATAC + 59407 ACAAATCAAAAAATCTACATCCAT +; G-orf327 <== start + 59431 TATAAATTTTAATCCTAACCTGAATAAAATCTGGCAAAATCATACTATCTTTACGTTTTA + 59491 AAAAAATAATTTTATTACTTTTGTTCTCACCATAAAAACTTGAATAAACTTTTTGCGTTA + 59551 TAAAAGGCCCTTTTCAAATTGCTCTCAT +; G-rps19 <== start + 59579 ATTTATATATTATTTATTACCTAAGCAAGCATTTTACAAAATGCGATACAGCATTTCCAA + 59639 AAATACATATTAAAATTTCAATCATAAACCAAAAAAAGGGAGGGGGGGGGTAAGGCGTTT + 59699 CAACAATTCCAGCCTTCGCAGAACAAAAACGCATCACATTTATCTGCGAAGCACTTTTAT + 59759 GGAACAGTGAATAAATCACAGCAAAACGCATGGATTTAGGTGTAAAATCGCTTAGGCAAT + 59819 TCTTTTAGAATACAACCGCGTAAAATTTCCTTAGCGATTCTGTGCGACGATGCATAAAAA + 59879 TTGGAGGGAATTTTGCACCTACGCCAGAGCAAGCATTTCCAGAAATGCATATTAGAATTT + 59939 TAAGCGTAAACCAAGGGGGGGGTAAGGCGTTTCAACAATTCCAGCCTTCGCAGAACAAAA + 59999 ACGCATCACATTTATCTGCGAAGCACTTTTATGGAACAGTGAATAAATCACAGCAAAACG + 60059 CATGGATTTAGGTGTAAAATCGCTTAGGCAATTCTTTTAGAATACAACCGCGTAAAATTT + 60119 CCTTAGCGATTCTGTGCGACGATGCATAAAAATTGGAGGGAATTTTGCACCTACGCCAGA + 60179 GCAAGCATTTCCAGAAATGCATATTAGAATTTTAAGCGTAAACCAAGGGGGGGGGGGGTA + 60239 AGGCGTTTCAACAATTCCAGCCTTCGCAGAACAAAAACGCATCACACTTATCTGCGAAGA + 60299 ACTTTTATGGAACAGTGAATAAATCACAGCAAAACGCATGGATTTAGGTGTAAAATCGCT + 60359 TAGGCAATTCTTTTAGAATACAACCGCGTAAAATTTCCTTAGCGATTCTGTGCGACGATG + 60419 CATAAAAATTGGAGGGAATTTTGCACCTACGCCAGAGCAAGCATTTCCAGAAATGCATAT + 60479 TAGAATTTTAAGCGTAAATCAAGGAGGGGGGGGGGGGTAAGGCGTTTCAACAATTCCAGC + 60539 CTTCGCAGAACAAAAACGCATCACATTTATCTGCGAAGCACTTTTATGGAACAGTGAATA + 60599 AATCACAGCAAAACGCATAGGTTTGGGTGTAAAATCGCTTAGGCAATTCTTTTAGAATAC + 60659 AACCGCGTAAAATTTCCTTAGCGATTCTGTGCGACGATGCATAAAAATTGGAGGGAATTT + 60719 TGCACCTACGCCAGAGCAAGCATTTCCAGAAATGCATATTAGAATTTTAAGCGTAAATCA + 60779 AGGAGGGGGGGTAAGGCGTTTCAACAATTCCAGCCTTCGCAGAACAAAAACGCATCACAC + 60839 TTATCTGCGAAGAACTTTTATGGAACAGTGAATAAATCACAGCAAAACGCATAGATTTAG + 60899 GTGTAAAATCGCTTAGGCAATTACTTTAAAATACAACCGCGTAAAATTTCCTTAGCGATT + 60959 CTGTGCGACGATGCATAAAAATTGGAGGGAATTTTGCACCTACGCCAGAGCAAGCATTTC + 61019 CAGAAATGCATATTAGAATTTTAAGCGTAAATCAAGGAGGGGGGGGGTAAGGCGTTTCAA + 61079 CAATTCCAGCCTTCGCAGAACAAAAACGCATCACACTTATCTGCGAAGAACTTTTATGGA + 61139 ACAGTGAATAAATCACAGCAAAACGCATAGATTTAGGTGTAAAATCGCTTAGGCAATTCT + 61199 TTTAGAACACAACCGCGTAAAATTTCCTTAGCGATTCTGTGCGACGATGCATAAAAATTG + 61259 GAGGGAATTTTGCACCTACGCCAGAGCAAGCATTTCCAGAAATGCATATTAGAATTTTAA + 61319 GCGTAAATCAAGGAGGGGGGTACGGCGTTTCAACAATTCCAGCCTTCGCAGAACAAAAAC + 61379 GCATCACACTTATCTGCGAAGCATTTTTATGGAACAATAAATAAATCACAGCAAAACGCA + 61439 TGGATTTAGGTGTAAAATCGCTTAGGCAATTACTTTAAAATACAACCGCGTAAAATTTCC + 61499 TTAGCGATTCTGTGCGACAATGCATAAAAATTAGAGGGAACTTTGCACCTACGCCAGAGC + 61559 AAGCATTTCCAGAAATACATATTAGAATTTTAAGCGTAAATCAAGGAGGGGGGGGGGGGT + 61619 AAACGTTTCGATAACTTCAGCCACCACAGAACAAAAACGCATCAAATTTATCTGCGAAGC + 61679 ACTTTTACGGAACAATAAATAAATCACAGCAAAACGCATGGATTTAGGTGTAAAATCGCT + 61739 TAGGCAATTACTTTAAAATACAACCGCGTAAAATTTCCTTAGCGATTCTGTGCGACAATG + 61799 CATAAAAATTAGAGGGAACTTTGCACCTACGCCAGAGCAAGCATTTCCAGAAATACATAT + 61859 TAGAATTTTAAGCGTAAATCAAGGAGGGGGGGGTAAACGTTTCGATAACTTCAGCCACCA + 61919 CAGAACAAAAACGCATCAAATTTATCTGCGAAGCACTTTTACGGAACAATAAATAAATCA + 61979 CAGCAAAACGCATAGATTTAGGTGTAAAATCGCTTAGGCAATTACTTTAAAATACAACCG + 62039 CGTAAAATTTCCTTAGCGATTCTGTGCGACAATGCATAAAAATTAGAGGGAACTTTGCAC + 62099 CTACGCCAGAGCAAGCATTTCCAGAAATACATATTAGAATTTTAAGCGTAAATCAAGGAG + 62159 GGGGGTGGGGTAAACGTTTCGATAACTTCAGCCACCACAGAACAAAAACGCATCAAATTT + 62219 ATCTGCGAAGCACTTTTACGGAACAATAAGTAAATCACAGCAAAACGCATAGATTTAGGT + 62279 GTAAAATCGCTTAGGCAATTACTTTAAAATACAACCGCATAAAATTTCCTTAGCAATGAC + 62339 GCATAAAAATTAGAGGGAACTTTGCACCTACGCCAGAGCAAGCATTTCCAGAAATACATA + 62399 TTAGAATT +; G-orf784 <== end + 62407 TTAAGCGTAAATCAAGGAGGGGGGGGGTGGGGTAAACGTTTCGATAACTTCAGCCACCAC + 62467 AGAACAAAAGCACACTT +; G-orf736 <== end + 62484 TTAGGGGCAGCAA +; G-orf767 ==> start + 62497 ATGCCAGA +; G-orf761 ==> start + 62505 ATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAGT + 62565 TATGCAAATCAAAA +; G-orf735 ==> start + 62579 ATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAGT + 62639 TGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCC + 62699 CCCTCGGCACGGCATATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTA + 62759 CAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCCCAG + 62819 GGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCAT + 62879 ATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACG + 62939 TGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCCAG + 62999 AATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAG + 63059 TTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGA + 63119 GAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAGCT + 63179 CGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAGTTATGCAAATCAAAA + 63239 ATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAGT + 63299 TGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCC + 63359 CCCTCGGCACGGCATATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTA + 63419 CAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCCCAG + 63479 GGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCAT + 63539 ATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACG + 63599 TGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCCAG + 63659 AATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAG + 63719 TTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGA + 63779 GAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAGCT + 63839 CGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAGTTATGCAAATCAAAA + 63899 ATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAAT + 63959 TGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCC + 64019 CCCTCGGCACGGCATATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTA + 64079 CAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCCCAG + 64139 GGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCAT + 64199 ATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACG + 64259 TGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCCAG + 64319 AATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAG + 64379 TTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGA + 64439 GAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAGCT + 64499 CGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAGTTATGCAAATCAAAA + 64559 ATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAGT + 64619 TGCCACCCTCCCCAGTGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCC + 64679 CCCCTCGGCACGGCAT +; G-orf736 <== start + 64695 ATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACG + 64755 TGCCCAT +; G-orf784 <== start + 64762 TTTGGCGAGAAAATTTGTACGTTAA +; G-orf735 ==> end + 64787 CTAG +; G-orf761 ==> end + 64791 TTTATGGTAA +; G-orf767 ==> end + 64801 TATATATAGTATAAACATTAATAATATTTATAATATATGTATACATTATACTTAATATAT + 64861 ATAGTATAAACATTAATAATATTTATAACATATGTATACATTATACTTAATATATATAGT + 64921 ATAGACATTAATAATATTTATAATATATGTATAATGTATACATATGTTAACATTATACTT + 64981 AATATATATAGTATAGACATTAATAATATTTATAATATATGTATAATGTATACATATGTT + 65041 AACATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATAT + 65101 ATGTATAATGTATACATATGTTAACATATGTATACATTATACTTAATATATATAGTATAG + 65161 ACATTAATAATATTTATAATATATGTATAATGTATACATATGTTAACATATGTATACATT + 65221 ATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATAATGTATACA + 65281 TATGTTAACATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTA + 65341 TAATATATGTATAATGTATACATATGTTAACATATGTATACATTATACTTAATATATATA + 65401 GTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATA + 65461 AACATTAATAATATTTATAATATATGTATAATGTATACATATGTTAACATATGTATACAT + 65521 TATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATAATGTATAC + 65581 ATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATG + 65641 TATAATGTATACATATGTTAACATATGTATACATTATACTTAATATATATAGTATAGACA + 65701 TTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAAT + 65761 AATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATAT + 65821 TTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATA + 65881 ATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATA + 65941 TGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTAT + 66001 ACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACATT + 66061 ATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATACT + 66121 TAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATA + 66181 TATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATAT + 66241 AGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTAT + 66301 AGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACA + 66361 TTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAAT + 66421 AATATTTATAATATATGTATAATGTATACATATGTATACATTATACTTAATATATATAGT + 66481 ATAGACATTAATAATATTTATAATATATGTATAATGTATACATATGTTAACATATGTATA + 66541 CATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATAATGTA + 66601 TACATATGTTAACATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATA + 66661 TTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTAT + 66721 AATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATAT + 66781 ATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTA + 66841 TACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACAT + 66901 TATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATAC + 66961 TTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAAT + 67021 ATATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATA + 67081 TAGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTA + 67141 TAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGAC + 67201 ATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAA + 67261 TAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATA + 67321 TTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTAT + 67381 AATATATGTATAATGTATCATA +; G-orf1511 <== end + 67403 TTACCATAAA +; G-orf1486 <== end + 67413 CTAG +; G-orf1472 <== end + 67417 TTAACGTACAAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCAC + 67477 ATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGCGGCGCGCGC + 67537 GAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAATTCCATGAATTTT + 67597 CTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATA + 67657 ACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACAT + 67717 TCTGGCATTTGCTGCCCTGGGGAGGGTGGCAATTCCATGAATTTTCTCGCCAAAATGGGC + 67777 ACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATA + 67837 TATGCCGTGCCGAGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCC + 67897 CTGGGGAGGGTGGCAATTCCATGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTG + 67957 TAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGG + 68017 GGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAA + 68077 CTCCGTGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATT + 68137 TTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGA + 68197 GCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCATGAATTTTCT + 68257 CGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAAC + 68317 TTTTTCGAGTATAT +; G-orf589 ==> start + 68331 ATGCCGTGCCGAGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCC + 68391 TGGGGAGGGTGGCAACTCCATGAATTTTCTTGCCAAAATGGGCACGTCTGAAAGATCTGT + 68451 AGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATAT +; G-orf699 ==> start + 68495 ATGCCGTGCCGAGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCC + 68555 TGGGGAGGGTGGCAACTCCATGAATTTTCTTGCCAAAATGGGCACGTCTGAAAGATCTGT + 68615 AGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGG + 68675 GGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAA + 68735 CTCCATGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATT + 68795 TTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGA + 68855 GCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCATGAATTTTCT + 68915 TGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAAC + 68975 TTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTC + 69035 TGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCGTGAATTTTCTCGCCAAAATGGGCAC + 69095 GTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATA + 69155 TGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCC + 69215 TGGGGAGGGTGGCAACTCCATGAATTTTCTTGCCAAAATGGGCACGTCTGAAAGATCTGT + 69275 AGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGG + 69335 GGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAA + 69395 CTCCATGAATTTTCTTGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATT + 69455 TTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGA + 69515 GCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCGTGAATTTTCT + 69575 CGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAAC + 69635 TTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTC + 69695 TGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCATGAATTTTCTCGCCAAAATGGGCAC + 69755 GTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATA + 69815 TGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCC + 69875 TGGGGAGGGTGGCAACTCCGTGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGT + 69935 AGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATAT +; G-orf370 ==> start + 69979 ATGCCGTGCCGAGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCC + 70039 TGGGGAGGGTGGCAACTCCGTGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGT + 70099 AG +; G-orf589 ==> end + 70101 CACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGG + 70161 GCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAATT + 70221 CCATGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTT + 70281 TGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGC + 70341 TTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAATTCCATGAATTTTCTCG + 70401 CCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTT + 70461 TTTCGAGTATATATGCCGTGCCGAGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGG + 70521 CATTTGCTGCCCTGGGGAGGGTGGCAACTCCGTGAATTTTCTCGCCAAAATGGGCACGTC + 70581 TGAAAGATCTGTAG +; G-orf699 ==> end + 70595 CACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGG + 70655 GCGGCGCGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGC + 70715 AACTCCATGAATTTTCTTGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACA + 70775 TTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGGGCGCGCGCG + 70835 AGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCATGAATTTTC + 70895 TTGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAA + 70955 CTTTTTCGAGTATATATGCCGTGCCGAGGGGGGCGGCGCGCGCGAGCTTTTCATACATTC + 71015 TGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCGTGAATTTTCTCGCCAAAATGGGCAC + 71075 GTCTGAAAGATCTGTAG +; G-orf370 ==> end + 71092 CACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGG + 71152 GGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAAC + 71212 TCCGTGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTT + 71272 TTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGCGGCGCGCGCGAGC + 71332 TTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCATGAATTTTCTCG + 71392 CCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTT + 71452 TTTCGAGTATATATGCCGTGCCGAGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGG + 71512 CATTTGCTGCCCTGGGGAGGGTGGCAATTCCATGAATTTTCTCGCCAAAATGGGCACGTC + 71572 TGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGC + 71632 CGTGCCGAGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGG + 71692 GAGGGTGGCAACTCCATGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCA + 71752 CTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGC + 71812 GGCGCGCGCGAGCTTTTCATACAT +; G-orf1472 <== start + 71836 TCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCAT +; G-orf1486 <== start ;; mfannot: GTG upstream: 71924 + 71874 GAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTGATT + 71934 TGCAT +; G-orf1511 <== start + 71939 AACTTTTTCGAGTATACGTTATTATAAGTTATATTTAGAAATGATATAAATTTTCTAGCG + 71999 GTGGTTAATAACGACCAACATTATATACATTTTTATTTTTGTTAATAGCAGTTAAGTATT + 72059 GGTTAGAAAATAATATAATTTTTAATTTTCTTTAA +; G-trnW(uca)_1 ==> start + 72094 AGGGAGATAGTTTAACGGTAAAATATCGATCT!TCA!ACATCGAGGTTATAGGTTCAAAT + 72152 CCTTTTCTCCCTG +; G-trnW(uca)_1 ==> end + 72165 AGATTTTTTAAGGT +; G-rps13_1 ==> start + 72179 ATGAAAACATCAATTCAATTTTTTAATTTACAGTTTTTGATTGAAAAAAAATTATTAATT + 72239 TCGTTAACGCAAATTTTTGGCATTGGTTTTTACTCTGCTATAGTAATTTGCAAAAAATTT + 72299 GGTTTTAATAAAAATACATATATTAAGAGTGTGGATGTAAGGATTGTAAATGCAATGCGT + 72359 AACTTTATTTTGGATAAATTTGTTGTTCAAGAACAACTGAAAGAGCAGATTCAGGTATCT + 72419 ATAGTAGAGTTGGACACTATAAAGAGTATTAGAGGGTTTCGGCATAAATTGTGTTTACCT + 72479 GTTCATGGACAGCGAACTAAAACTAATCGGCGTACTCAACGTAAATTTAAAAGAATGCAG + 72539 AGTAAATTATGGGAAGAGGATTCAACACATATTCGTTAA +; G-rps13_1 ==> end + 72578 AACATAAATTTCAGTTTAGAAAATTACGTAGAACCCTTTTATCCTTTCGAAAGAGATCTT + 72638 GTATTCTAAATATTAAAATTACATTGAATAA +;; G-rps11 ==> start ;; First ATG found at 72867 HMMmatch = 60,139 + 72669 CATATATTTAACTTTATCTGATTGATTTGGTCAAATTATTATGGTGAAATCTGGTGGGTT + 72729 ATTAAAATTGCCGGTTCCGGTAGAAATACGAATTATGCCTTAGAGCTTTTAATATTAGAT + 72789 GCTATTAAGCAATTAACTTTGTTAAATACAAAACATATTGTTTTAAAGTTTGATCATCGT + 72849 GTTTTAAGGAAAAAGAAAATGATTTTAAAGTTATTAAAAAAATTTAATATTAAAATTTTT + 72909 CTTATACGATTAATTATGTGTAAAGTTCATAATGGAATTACATTAGCTAAAAAACGGCGG + 72969 GTTTAA +;; G-rps11 ==> end + 72975 GTTATC +; G-rps14_1 ==> start + 72981 ATGTTGCGTAAGGTTATTTTTGAGTCAAATACCAGATATACATTTAAGTATTTTGAGATT + 73041 AAACAAAGAATTATAAAATCGTTATCAAAAAATTTATACTTGCCTATATTAGTTCGACGT + 73101 AAATTGTTGTGGCAATTAGATAAATTATCTTTATTATCATCTTTAATTTATGTAAAAAAT + 73161 CGATGTGTTGTTTCTGGTCGTGCTAAATCGATTTATAAATTTTTTAATTTATCTAGAATT + 73221 GTTATAAAAAAATTTTTTAGATTAGGTTATATACCTGGTTTAAATAGATCAAGTTGGTAA +; G-rps14_1 ==> end + 73281 TTTAGTAATATAAAATAAAAGTTTATTG +; G-rps8_1 ==> start + 73309 ATGGTTAAATTAGGACAATTTATTTCAATTTTAAATTTTAATATTAAAGCAGGAAAGTCT + 73369 TTTTTTGTAATAGTTAAAACAAGGATAATTTTGGATATTGTAAAAATCTTGATTGAGCAA + 73429 AATTACATTCTTGGTTATACGGATTTAAAAGAAAATGGTGATAAAATTATTGTGTTTTTT + 73489 AAGTTAGATTTTGCGAAAAGTAATAGCCTTTTACTTAAGGGATGTAAATTTGCATTATAT + 73549 AAAAATAGATTTACAAGTATTGGTGCCAATAATATAGTGAATAACTCGTCGTTGGTACTT + 73609 GTGTCTACTGTGAAGGGCGTTATGACTCAGTTGGAGGCTAAAAAACTTCGACTTGGGTAT + 73669 TATCTTGTGTTATATAATATAAAATTGTATAAAAAAATA +; G-rpl6_1 ==> start + 73708 ATGAGAGCTAAATTTATTTATCAAATTTTTAATAGGTTGTTTATCTATATATTTCAACAC + 73768 AATAAATTACTGTATATTCGAGGCCCTCTGGGTTTACTACGCTATAATGTTCCCAGTGGC + 73828 ATTGATATTTGTAAATATCGGTCAATGGTGTATATTTCTGGACAAAAAGCTGCCCACCCT + 73888 TTAGTTGCAATGTCACATAGAATAGTTTGCCAGAAAATGAAAGGGCTTGAGGTTGGTTTT + 73948 TCTGAAATTATGATAATTGCTGGTATGGGTTGGCGCGTTGATAAAGAAGACGTTTTATTA + 74008 AAATTTACAATTGGTTATAGTCATATTGTACATTATCTGATTCCGAATGATATTGAAATT + 74068 GTTTTACTTAGTAAAAATCTTTTTAAGATTTTTGGTTCTGATTTGAGTCGAATTCAGTGC + 74128 ATTGCGTCCGAATTGTGCAAACTGCGTTCATCTGATGTGTATAAAGGTAAAGGAATTCGT + 74188 CGTCAAGCTTTTAAAGTAGTTTTAAAATCAAGTACTAAATCGAAAGTTTAA +; G-rps8_1 ==> end +; G-rpl6_1 ==> end + 74239 TTTATGAAGAAAGTAAGCAGTGTTTTTATATTTTATTGTTTTTTAAATT +; G-rps12_1 ==> start + 74288 ATGGTTACAATTAATCAATTAATTCGATTAAGTCATCCTACTAAAAATAGGAAAAATACG + 74348 GTGCCCGCTTTAGACAGTAGTCCATACAAGAAGGGTGTTTGTTTAAGAGTATTTACGATG + 74408 ACTCCTAAAAAACCAAATTCCGCATTACGAAAAGTTGCCCGCATTAGATTATCAAATGGA + 74468 TATAAAATAACGGCGCATATCCCTGGTGAAGGTCACAATTTACAAGAATATTCGATTGTA + 74528 TTAGTACGTGGTGGGCGTGCTCGCGATTTGCCTAGTGTTCGATATAAAGTTGTTAGAGGT + 74588 AAATACGAT +; G-orf106 <== end + 74597 TTAGAACCTGTACGTAATAGGAGAACTCGGCGATCTAAATATGGTATTAAAAAAATATAA +; G-rps12_1 ==> end + 74657 AAATTGATTGATTGCGTCTGAGAGTATACATAAGTATTTGTTGTGGGTTAATGTTGAATG + 74717 GAAAAGTGTCTCAATTAGAAAAAATTGTTTTTTTTGTTTTCGAGACTTAAAATATAAGTT + 74777 TAATATGGATTCGTTGTTCTCTGTTTTTATATGTTGTAGACGAGATAATGCCTTATATAG + 74837 AGCTTCGTACGTTAAGGTTAGGGAGTGTTTTTTATCGAATACCAAAGCCTCTTCGAAAAG + 74897 TAAGCAGTTAAATTGTGGCAT +; G-orf106 <== start + 74918 TAAGCTGTTAGCCAAAACTGTCTAATTTAAACTTGTGTGTACGCAATGTAGCGGCGCTAT + 74978 AAAAAATACAACAGGAAATTTTAGCTGTTCTTCAAAAGAAAAGTTTACTTTTTAAGCAAA + 75038 ATAGAAATCTGTATCAAGTTGCGTCAACAACAGATCGTTTGCACATTACCGGTGGGATTA + 75098 GTTTTTACGAAATAGCGTGTCATATATGTGTAGTACAAT +; G-trnP(ugg)_1 ==> start + 75137 CGGAATATAGGCATAATGTAATGTATCTGATT!TGG!GATCAGATGAGTATAGGTTCGAG + 75195 TCCTATTATTCCGA +; G-trnP(ugg)_1 ==> end + 75209 AGTAAGGTATTTATTATAATTAGAAGTGTATATGAAGCGAATTAAATATTTTAAATTTAA + 75269 GTTTAGGGATATTTCAAAGGAAATTATTTAAGAAAGTCATATTTTAGATTGTTAAAAACA + 75329 AAAGCATATTTTAAGATTTTATTGGTGGATTAAAACAACGACAATTAGCACGGATTTACA + 75389 AAATTATTTATTCTAAACGGTTGTTTTTAACTTTTCTTACGAAATTAGAATATCGTATGA + 75449 ATTTATCTTGAAAGCCGGGTTTGTTTTAACCGGAAAACAGGCTAGGCAATTAATTCGCAT + 75509 AAGCATGTTATTGTGAATGGACAGCGGACTCAATTTTGCAATTTGCATATAAAAACATTT + 75569 GATATTATATCTCTAGAATCAGTAGTATTTTCAAAGTATAAACGCAAACTAGTATCAAGT + 75629 TTTTTTAAAACTCCAGGTTTTTTTGGTTATTTACGGCGACGTGGTATAAAAAAGAAACTT + 75689 ACAGTTCAACGTATGTTTATTTATGCTAAATTTCAATTTTTTTGCGAAACTAATTATAAA + 75749 ATCTACGATGTTTTTTGTGCGAAAGCTTAATTTGCATAAGATTTCTTCGTCTCAAGTTCT + 75809 TTTAATGTATGGGTGGTGACGAATACGTTTTTTATTTTAAAAAACGTTTTGTGATTAGTT + 75869 AAATTTTTATTATAATTATTTTTAGGTTTATAATGAGTTTAATTTTAGGTGATGTGTGTT + 75929 TAACAATAGTGTGCTTAGTTTAATTATTTTTTTACCGTTGTTTAGTAGTTTTTGTTCTGG + 75989 ATTGTTTTTGTTGGATTGGAGCTAAAGGTGTTGCTTTTATAACTTTTTTATCTCTACTAG + 76049 GGTCATTAATTTTAACTTGTAATTATTTAAGTTTTATAAGTTTTTATTTGGTTTCAAATT + 76109 ATGTATCCGTATTATCTTGGATGAAATTAGGTTCATTTTATGTGACATGGTCATTTTGTT + 76169 TTGATAGTTTATCTCGTTAATGCGGTTTTTTAGTTACTGTGTTAGTTAGTTTAGTTTATC + 76229 TATATGTGCGTCCTAGGTTATTTTTATTTTATAAATTTATTAAAGTAAAGTTGGAAATTT + 76289 AAGTGAGTTGTAATGGGCGAATAAGTGTTTCTTTCAAGTACTATTAAAATACATTGATAA + 76349 AGGTAAAACCCCAACAACACGAACATAAGATGAATAAATTTTTAGGAAAAACTACTATAT + 76409 TCAATATACAATAACGCGTAAATAAAAACTTCAAAAAAGAAAAAATATATTTAGTTTTAT + 76469 CAACAAAAACATCAAAAGTACATATACTTAATTTGCAAGATTCGTATTCCTCTTGATATA + 76529 TCAATTTTCTTGATTCAAATTGAAGTTTTCGTTTCTTAATTTTCTTATGCCCTCCAAATC + 76589 AACTTCGCTTTTTCAT +;; G-rpl16 <== end + 76605 TTATTTACA +;; G-rpl16 <== end + 76614 AATACAATAAACCTGAATTGGTAGTTTTCGTGCAATACTTTTCAACAGTAAACGAGCCTG + 76674 ATTTGATGGCAATCCAGATACTTCACATAGCACAAAACCAGCCTTAACTTTACATACCCA + 76734 TCGTC +;; G-rpl16 <== end + 76739 TATACCCCTTACCTTTCCC +;; G-rpl16 <== start ;; 86,134 + 76758 ATACGTACCTCAAGCGGCTTAGCTGTTATAGCTTGGTGAGGAAATACACGTATTCAATAC + 76818 TGACCAATTCGCTTAGTACTCTTAGACAAATTTAACTTAACCATTTCTAACTGCTTAGAT + 76878 GTAATATAACCATTCTTTTTAGCTTTAAAACCAAAATTGCCAAAATCCAAATTTAAAAAA + 76938 CGTGTTGCTAAATTTTTAATCTTCTTTTTCTGAAATTTTATAAACTTAGTTTTTTTAGGA + 76998 AT +;; G-rpl16 <== start ;; 4,91 + 77000 AATTCCAACCAT +;; G-rpl16 <== start + 77012 TTTCTCAACAAATTAATATATTCAAATTTTAACTCCTATAATTCCTGCCCGTTAATAGCT + 77072 TCAGCAAATCCATATCCCAGTATTAAAGGACTATTTTTAGAAGCAACAACCAATTTGTAT + 77132 ATGTTTAGTACGAGCACGGCTAAAAACCATTTAGTTTACCTGCTAATAAAATCTTTATCC + 77192 CTTTAACCTAAAATATTTTCAAATCTCCTTCAATACTTTTGACAAAAATGATAAAAAAGA + 77252 TAAATGTTTATATATTTTAGACAAAATTGGAGCAATATAATTTGCAATTAATTTCGCATC + 77312 CGGAATTATATTTTAGCGCATGCACTAAAAATACTAACGTATTCCGATATATCCAACATC + 77372 TTTATTAAAACGTAAACGGAAGCGCTTGAAACTATCTGAAACAACTCGTAATAACGTTGA + 77432 CAAATTTTTTTTTCATTCTTTATAACCTACAACACATATATTTACGAATTTGTAAAATAT + 77492 CTGCCTCTTTAGCAAAAACTCTAAAACGTATAAAAACATTATACTACGTAATTGCACTCG + 77552 GCATTTATGAATTGGCTTCATAACAGTTCCTAGTGTAGCTAAAATCTTTTTTCTATCCTT + 77612 ACGTTTTTTAGTTTTCTTACTTTTATATTGAAAATTTTTCTTGCGCTTCTCTTCTTCCCT + 77672 TATTGGATAAAAAAATAAGAAATTTACGTATAATTTCCCTAAAACTCAAAATAAACGCAC + 77732 CGTACCCACTACTCTACGTTTTCGAGCTTTTGTTCAACGTGAAAAAAAAAATTAACATAC + 77792 TTACGAACAAAAAATCTCTTCATAAATATGCCTACAATACTCTAAAGATTTCTTAACAAA + 77852 TCAATTTGATTTGAATAAAAATTTTTGATAAATAACTTTATGTTTACCGTTTCGCTTTAT + 77912 ATACATGCAATTTTCGAGTAAACACAAAACATCCAAATTTATACCCAACCATTCCTGGAG + 77972 AAATACACAAATCAAAAAAATCTACATCCATTATAAATTTTAATCCTAACCTGAATAAAA + 78032 TCTGGCAAAATCATACTATCTTTACGTTTTAAAAAAAATAATTTTATTACTTTTGTTCTC + 78092 ACCATAAAAACTTGAATAAACTTTTTGCGTTATAAAAGGCCCTTTTCAAATTGCTCTCAT + 78152 ATTTATATATTATTTATTACCTAAGCAAGCATTTTACAAAATGCGATACAGCATTTCCAA + 78212 AAATACATATTAAAATTTCAATCATAAACCAAAAAAAGGGAGGGGGGGGGTAAGGCGTTT + 78272 CAACAATTCCAGCCTTCGCAGAACAAAAAACGCATCACATTTATCTGCGAAGCACTTTTA + 78332 TGGAACAGTGAATAAATCACAGCAAAACGCATGGATTTAGGTGTAAAATCGCTTAGGCAA + 78392 TTCTTTTAGAATACAACCGCGTAAAATTTCCTTAGCGATTCTGTGCGACGATGCATAAAA + 78452 ATTGGAGGGAATTTTGCACCTACGCCAGAGCAAGCATTTCCAGAAATGCATATTAGAATT + 78512 TTAAGCGTAAACCAAGGGGGGGGTAAGGCGTTTCAACAATTCCAGCCTTCGCAGAACAAA + 78572 AACGCATCACATTTATCTGCGAAGCACTTTTATGGAACAGTGAATAAATCACAGCAAAAC + 78632 GCATGGATTTAGGTGTAAAATCGCTTAGGCAATTCTTTTAGAATACAACCGCGTAAAATT + 78692 TCCTTAGCGATTCTGTGCGACGATGCATAAAAATTGGAGGGAATTTTGCACCTACGCCAG + 78752 AGCAAGCATTTCCAGAAATGCATATTAGAATTTTAAGCGTAAACCAAGGGGGGGGGGGTA + 78812 AGGCGTTTCAACAATTCCAGCCTTCGCAGAACAAAAAACGCATCACACTTATCTGCGAAG + 78872 AACTTTTATGGAACAGTGAATAAATCACAGCAAAACGCATGGATTTAGGTGTAAAATCGC + 78932 TTAGGCAATTCTTTTAGAATACAACCGCGTAAAATTTCCTTAGCGATTCTGTGCGACGAT + 78992 GCATAAAAATTGGAGGGAATTTTGCACCTACGCCAGAGCAAGCATTTCCAGAAATGCATA + 79052 TTAGAATTTTAAGCGTAAATCAAGGAGGGGGGGGGGGGTAAGGCGTTTCAACAATTCCAG + 79112 CCTTCGCAGAACAAAAACGCATCACATTTATCTGCGAAGCACTTTTATGGAACAGTGAAT + 79172 AAATCACAGCAAAACGCATAGGTTTGGGTGTAAAATCGCTTAGGCAATTCTTTTAGAATA + 79232 CAACCGCGTAAAATTTCCTTAGCGATTCTGTGCGACGATGCATAAAAATTGGAGGGAATT + 79292 TTGCACCTACGCCAGAGCAAGCATTTCCAGAAATGCATATTAGAATTTTAAGCGTAAATC + 79352 AAGGAGGGGGGGGTAAGGCGTTTCAACAATTCCAGCCTTCGCAGAACAAAAACGCATCAC + 79412 ACTTATCTGCGAAGAACTTTTATGGAACAGTGAATAAATCACAGCAAAACGCATAGATTT + 79472 AGGTGTAAAATCGCTTAGGCAATTACTTTAAAATACAACCGCGTAAAATTTCCTTAGCGA + 79532 TTCTGTGCGACGATGCATAAAAATTGGAGGGAATTTTGCACCTACGCCAGAGCAAGCATT + 79592 TCCAGAAATGCATATTAGAATTTTAAGCGTAAATCAAGGAGGGGGGGGGTAAGGCGTTTC + 79652 AACAATTCCAGCCTTCGCAGAACAAAAACGCATCACACTTATCTGCGAAGAACTTTTATG + 79712 GAACAGTGAATAAATCACAGCAAAACGCATAGATTTAGGTGTAAAATCGCTTAGGCAATT + 79772 CTTTTAGAACACAACCGCGTAAAATTTCCTTAGCGATTCTGTGCGACGATGCATAAAAAT + 79832 TGGAGGGAATTTTGCACCTACGCCAGAGCAAGCATTTCCAGAAATGCATATTAGAATTTT + 79892 AAGCGTAAATCAAGGAGGGGGGTACGGCGTTTCAACAATTCCAGCCTTCGCAGAACAAAA + 79952 ACGCATCACACTTATCTGCGAAGCATTTTTATGGAACAATAAATAAATCACAGCAAAACG + 80012 CATGGATTTAGGTGTAAAATCGCTTAGGCAATTACTTTAAAATACAACCGCGTAAAATTT + 80072 CCTTAGCGATTCTGTGCGACAATGCATAAAAATTAGAGGGAACTTTGCACCTACGCCAGA + 80132 GCAAGCATTTCCAGAAATACATATTAGAATTTTAAGCGTAAATCAAGGAGGGGGGGGGGG + 80192 GTAAACGTTTCGATAACTTCAGCCACCACAGAACAAAAACGCATCAAATTTATCTGCGAA + 80252 GCACTTTTACGGAACAATAAATAAATCACAGCAAAACGCATGGATTTAGGTGTAAAATCG + 80312 CTTAGGCAATTACTTTAAAATACAACCGCGTAAAATTTCCTTAGCGATTCTGTGCGACAA + 80372 TGCATAAAAATTAGAGGGAACTTTGCACCTACGCCAGAGCAAGCATTTCCAGAAATACAT + 80432 ATTAGAATTTTAAGCGTAAATCAAGGAGGGGGGGGTAAACGTTTCGATAACTTCAGCCAC + 80492 CACAGAACAAAAACGCATCAAATTTATCTGCGAAGCACTTTTACGGAACAATAAATAAAT + 80552 CACAGCAAAACGCATAGATTTAGGTGTAAAATCGCTTAGGCAATTACTTTAAAATACAAC + 80612 CGCGTAAAATTTCCTTAGCGATTCTGTGCGACAATGCATAAAAATTAGAGGGAACTTTGC + 80672 ACCTACGCCAGAGCAAGCATTTCCAGAAATACATATTAGAATTTTAAGCGTAAATCAAGG + 80732 AGGGGGGGTGGGGTAAACGTTTCGATAACTTCAGCCACCACAGAACAAAAACGCATCAAA + 80792 TTTATCTGCGAAGCACTTTTACGGAACAATAAGTAAATCACAGCAAAACGCATAGATTTA + 80852 GGTGTAAAATCGCTTAGGCAATTACTTTAAAATACAACCGCATAAAATTTCCTTAGCAAT + 80912 GACGCATAAAAATTAGAGGGAACTTTGCACCTACGCCAGAGCAAGCATTTCCAGAAATAC + 80972 ATATTAGAATTTTAAGCGTAAATCAAGGAGGGGGGGGGTGGGGTAAACGTTTCGATAACT + 81032 TCAGCCACCACAGAACAAAAGCACACTTTTAGGGGCAGCAA +; G-orf766 ==> start + 81073 ATGCCAGA +; G-orf760 ==> start + 81081 ATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAGT + 81141 TATGCAAATCAAAA +; G-orf734 ==> start + 81155 ATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAGT + 81215 TGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCC + 81275 CCTCGGCACGGCATATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTAC + 81335 AGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCCCAGG + 81395 GCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCATA + 81455 TATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACGT + 81515 GCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCCAGA + 81575 ATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAGT + 81635 TATGCAAATCAAAAATGTGACAAGTG +; G-orf424 <== end + 81661 CTACAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCC + 81721 CAGGGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCCCCTCGGCACGGC + 81781 ATATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGA + 81841 CGTGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCC + 81901 AGAATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAA + 81961 AGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGC + 82021 GAGAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAG + 82081 CTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAGTTATGCAAATCAA + 82141 AAATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGA + 82201 GTTGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCC + 82261 CCCCCTCGGCACGGCATATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTG +; G-orf315 <== end + 82320 CTACAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCC + 82380 CAGGGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCCCCCCTCGGCACGGC + 82440 ATATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGA + 82500 CGTGCCCATTTTGGCGAGAAAATTCATGGAATTGCCACCCTCCCCAGGGCAGCAAATGCC + 82560 AGAATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAA + 82620 AGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGC + 82680 GAGAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAG + 82740 CTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAGTTATGCAAATCAA + 82800 AAATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGAGAAAATTCATGGA + 82860 GTTGCCACCCTCCCCAGGGCAGCAAATGCCAGAATGTATGAAAAGCTCGCGCGCGCCGCC + 82920 CCCCTCGGCACGGCAT +; G-orf424 <== start + 82936 ATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACG + 82996 TGCCCATTTTGGCGAGAAAATTCATGGAGTTGCCACCCTCCCCAGGGCAGCAAATGCCAG + 83056 AATGTATGAAAAGCTCGCGCGCGCCGCCCCCCCTCGGCACGGCATATATACTCGAAAAAG + 83116 TTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACGTGCCCATTTTGGCGA + 83176 GAAAATTCATGGAGTTGCCACCCTCCCCAGTGCAGCAAATGCCAGAATGTATGAAAAGCT + 83236 CGCGCGCGCCGCCCCCCCCCTCGGCACGGCAT +; G-orf315 <== start + 83268 ATATACTCGAAAAAGTTATGCAAATCAAAAATGTGACAAGTGCTACAGATCTTTCAGACG + 83328 TGCCCATTTTGGCGAGAAAATTTGTACGTTAA +; G-orf734 ==> end + 83360 CTAG +; G-orf760 ==> end + 83364 TTTATGGTAA +; G-orf766 ==> end + 83374 TATATATAGTATAAACATTAATAATATTTATAATATATGTATACATTATACTTAATATAT + 83434 ATAGTATAAACATTAATAATATTTATAACATATGTATACATTATACTTAATATATATAGT + 83494 ATAGACATTAATAATATTTATAATATATGTATAATGTATACATATGTTAACATTATACTT + 83554 AATATATATAGTATAGACATTAATAATATTTATAATATATGTATAATGTATACATATGTT + 83614 AACATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATAT + 83674 ATGTATAATGTATACATATGTTAACATATGTATACATTATACTTAATATATATAGTATAG + 83734 ACATTAATAATATTTATAATATATGTATAATGTATACATATGTTAACATATGTATACATT + 83794 ATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATAATGTATACA + 83854 TATGTTAACATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTA + 83914 TAATATATGTATAATGTATACATATGTTAACATATGTATACATTATACTTAATATATATA + 83974 GTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATA + 84034 AACATTAATAATATTTATAATATATGTATAATGTATACATATGTTAACATATGTATACAT + 84094 TATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATAATGTATAC + 84154 ATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATG + 84214 TATAATGTATACATATGTTAACATATGTATACATTATACTTAATATATATAGTATAGACA + 84274 TTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAAT + 84334 AATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATAT + 84394 TTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATA + 84454 ATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATA + 84514 TGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTAT + 84574 ACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACATT + 84634 ATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATACT + 84694 TAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATA + 84754 TATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATAT + 84814 AGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTAT + 84874 AGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACA + 84934 TTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAAT + 84994 AATATTTATAATATATGTATAATGTATACATATGTATACATTATACTTAATATATATAGT + 85054 ATAGACATTAATAATATTTATAATATATGTATAATGTATACATATGTTAACATATGTATA + 85114 CATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATAATGTA + 85174 TACATATGTTAACATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATA + 85234 TTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTAT + 85294 AATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATAT + 85354 ATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTA + 85414 TACATTATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACAT + 85474 TATACTTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATAC + 85534 TTAATATATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAAT + 85594 ATATATAGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATA + 85654 TAGTATAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTA + 85714 TAGACATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGAC + 85774 ATTAATAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAA + 85834 TAATATTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATA + 85894 TTTATAATATATGTATACATTATACTTAATATATATAGTATAGACATTAATAATATTTAT + 85954 AATATATGTATAATGTATCATA +; G-orf1493 <== end + 85976 TTACCATAAA +; G-orf1477 <== end + 85986 CTAG +; G-orf1510 <== end + 85990 TTAACGTACAAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCAC + 86050 ATTTTTGATTTGCATAACTTTTTCGAGTATAT +; G-orf1086 ==> start + 86082 ATGCCGTGCCGAGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCC + 86142 TGGGGAGGGTGGCAATTCC +; G-orf1225 ==> start + 86161 ATGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTG + 86221 ATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTT + 86281 TTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAATTCCATGAATTTTCTCGCC + 86341 AAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTT + 86401 TCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGC + 86461 ATTTGCTGCCCTGGGGAGGGTGGCAATTCCATGAATTTTCTCGCCAAAATGGGCACGTCT + 86521 GAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCC + 86581 GTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGG + 86641 GAGGGTGGCAACTCCGTGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCA + 86701 CTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGC + 86761 GGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCC + 86821 ATGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTG + 86881 ATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTT + 86941 TTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCATGAATTTTCTTGCC + 87001 AAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTT + 87061 TCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGC + 87121 ATTTGCTGCCCTGGGGAGGGTGGCAACTCCATGAATTTTCTTGCCAAAATGGGCACGTCT + 87181 GAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCC + 87241 GTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGG + 87301 GAGGGTGGCAACTCCATGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCA + 87361 CTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGC + 87421 GGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCC + 87481 ATGAATTTTCTTGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTG + 87541 ATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTT + 87601 TTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCGTGAATTTTCTCGCC + 87661 AAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTT + 87721 TCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGC + 87781 ATTTGCTGCCCTGGGGAGGGTGGCAACTCCATGAATTTTCTTGCCAAAATGGGCACGTCT + 87841 GAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCC + 87901 GTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGG + 87961 GAGGGTGGCAACTCCATGAATTTTCTTGCCAAAATGGGCACGTCTGAAAGATCTGTAGCA + 88021 CTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGC + 88081 GGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCC + 88141 GTGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTG + 88201 ATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTT + 88261 TTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCATGAATTTTCTCGCC + 88321 AAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTT + 88381 TCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGC + 88441 ATTTGCTGCCCTGGGGAGGGTGGCAACTCCGTGAATTTTCTCGCCAAAATGGGCACGTCT + 88501 GAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCC + 88561 GTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGG + 88621 GAGGGTGGCAACTCCGTGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCA + 88681 CTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGC + 88741 GGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAATTCC + 88801 ATGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTG + 88861 ATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTT + 88921 TTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAATTCCATGAATTTTCTCGCC + 88981 AAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTT + 89041 TCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTGGC + 89101 ATTTGCTGCCCTGGGGAGGGTGGCAACTCCGTGAATTTTCTCGCCAAAATGGGCACGTCT + 89161 GAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATAT +; G-orf451 ==> start + 89216 ATGCCGTGCCGAGGGGGGGCGGCGCGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGC + 89276 TGCCCTGGGGAGGGTGGCAACTCCATGAATTTTCTTGCCAAAATGGGCACGTCTGAAAGA + 89336 TCTGTAG +; G-orf1086 ==> end + 89343 CACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGG + 89403 GCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACT + 89463 CCATGAATTTTCTTGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTT + 89523 TGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGC + 89583 TTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCGTGAATTTTCTCG + 89643 CCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTT + 89703 TTTCGAGTATATATGCCGTGCCGAGGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCT + 89763 GGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCGTGAATTTTCTCGCCAAAATGGGCACG + 89823 TCTGAAAGATCTGTAG +; G-orf1225 ==> end + 89839 CACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGG + 89899 GCGGCGCGCGCGAGCTTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACT + 89959 CCATGAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTT + 90019 TGATTTGCATAACTTTTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGC + 90079 TTTTCATACATTCTGGCATTTGCTGCCCTGGGGAGGGTGGCAATTCCATGAATTTTCTCG + 90139 CCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTT + 90199 TTTCGAGTATATATGCCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACATTCTG + 90259 GCATTTGCTGCCCTGGGGAGGGTGGCAACTCCATGAATTTTCTCGCCAAAATGGGCACGT + 90319 CTGAAAGATCTGTAGCACTTGTCACATTTTTGATTTGCATAACTTTTTCGAGTATATATG + 90379 CCGTGCCGAGGGGGGGCGGCGCGCGCGAGCTTTTCATACAT +; G-orf1477 <== start + 90420 TCTGGCATTTGCTGCCCTGGGGAGGGTGGCAACTCCAT +; G-orf1493 <== start ;; mfannot: GTG upstream: 90508 + 90458 GAATTTTCTCGCCAAAATGGGCACGTCTGAAAGATCTGTAGCACTTGTCACATTTTGATT + 90518 TGCAT +; G-orf1510 <== start + 90523 AACTTTTTCGAGTATACGTTATTATAAGTTATATTTAGAAATGATATAA +; G-orf451 ==> end + 90572 ATTTTCTAGCGGTGGTTAATAACGACCAACATTATATACATTTTTATTTTTGTTAATAGC + 90632 AGTTAAGTATTGGTTAGAAAATAATATAATTTTTAATTTTCTTTAA +; G-trnW(uca)_2 ==> start + 90678 AGGGAGATAGTTTAACGGTAAAATATCGATCT!TCA!ACATCGAGGTTATAGGTTCAAAT + 90736 CCTTTTCTCCCTG +; G-trnW(uca)_2 ==> end + 90749 AGATTTTTTAAGGT +; G-rps13_2 ==> start + 90763 ATGAAAACATCAATTCAATTTTTTAATTTACAGTTTTTGATTGAAAAAAAATTATTAATT + 90823 TCGTTAACGCAAATTTTTGGCATTGGTTTTTACTCTGCTATAGTAATTTGCAAAAAATTT + 90883 GGTTTTAATAAAAATACATATATTAAGAGTGTGGATGTAAGGATTGTAAATGCAATGCGT + 90943 AACTTTATTTTGGATAAATTTGTTGTTCAAGAACAACTGAAAGAGCAGATTCAGGTATCT + 91003 ATAGTAGAGTTGGACACTATAAAGAGTATTAGAGGGTTTCGGCATAAATTGTGTTTACCT + 91063 GTTCATGGACAGCGAACTAAAACTAATCGGCGTACTCAACGTAAATTTAAAAGAATGCAG + 91123 AGTAAATT +; G-rps11 ==> start + 91131 ATGGGAAGAGGATTCAACACATATTCGTTAA +; G-rps13_2 ==> end + 91162 AACATAAATTTCAGTTTAGAAAATTACGTAGAACCCTTTTATCCTTTCGAAAGAGATCTT + 91222 GTATTCTAAATATTAAAATTACATTGAATAACATATATTTAACTTTATCTGATTGATTTG + 91282 GTCAAATTATTATGGTGAAATCTGGTGGGTTATTAAAATTGCCGGGTTCCGGTAGAAATA + 91342 CGAATTATGCCTTAGAGCTTTTAATATTAGATGCTATTAAGCAATTAACTTTGTTAAATA + 91402 CAAAACATATTGTTTTAAAGTTTGATCATCGTGTTTTAAGGAAAAAGAAAATGATTTTAA + 91462 AGTTATTAAAAAAATTTAATATTAAAATTTTTCTTATACGATTAATTATGTGTAAAGTTC + 91522 ATAATGGAATTACATTAGCTAAAAAACGGCGGGTTTAA +; G-rps11 ==> end + 91560 GTTATC +; G-rps14_2 ==> start + 91566 ATGTTGCGTAAGGTTATTTTTGAGTCAAATACCAGATATACATTTAAGTATTTTGAGATT + 91626 AAACAAAGAATTATAAAATCGTTATCAAAAAATTTATACTTGCCTATATTAGTTCGACGT + 91686 AAATTGTTGTGGCAATTAGATAAATTATCTTTATTATCATCTTTAATTTATGTAAAAAAT + 91746 CGATGTGTTGTTTCTGGTCGTGCTAAATCGATTTATAAATTTTTTAATTTATCTAGAATT + 91806 GTTATAAAAAAATTTTTTAGATTAGGTTATATACCTGGTTTAAATAGATCAAGTTGGTAA +; G-rps14_2 ==> end + 91866 TTTAGTAATATAAAATAAAAGTTTATTG +; G-rps8_2 ==> start + 91894 ATGGTTAAATTAGGACAATTTATTTCAATTTTAAATTTTAATATTAAAGCAGGAAAGTCT + 91954 TTTTTTGTAATAGTTAAAACAAGGATAATTTTGGATATTGTAAAAATCTTGATTGAGCAA + 92014 AATTACATTCTTGGTTATACGGATTTAAAAGAAAATGGTGATAAAATTATTGTGTTTTTT + 92074 AAGTTAGATTTTGCGAAAAGTAATAGCCTTTTACTTAAGGGATGTAAATTTGCATTATAT + 92134 AAAAATAGATTTACAAGTATTGGTGCCAATAATATAGTGAATAACTCGTCGTTGGTACTT + 92194 GTGTCTACTGTGAAGGGCGTTATGACTCAGTTGGAGGCTAAAAAACTTCGACTTGGGGGT + 92254 ATTATCTTGTGTTATATAATATAA +; G-rps8_2 ==> end + 92278 AATTGTATAAAAAAATA +; G-rpl6_2 ==> start + 92295 ATGAGAGCTAAATTTATTTATCAAATTTTTAATAGGTTGTTTATCTATATATTTCAACAC + 92355 AATAAATTACTGTATATTCGAGGCCCTCTGGGTTTACTACGCTATAATGTTCCCAGTGGC + 92415 ATTGATATTTGTAAATATCGGTCAATGGTGTATATTTCTGGACAAAAAGCTGCCCACCCT + 92475 TTAGTTGCAATGTCACATAGAATAGTTTGCCAGAAAATGAAAGGGCTTGAGGTTGGTTTT + 92535 TCTGAAATTATGATAATTGCTGGTATGGGTTGGCGCGTTGATAAAGAAGACGTTTTATTA + 92595 AAATTTACAATTGGTTATAGTCATATTGTACATTATCTGATTCCGAATGATATTGAAATT + 92655 GTTTTACTTAGTAAAAATCTTTTTAAGATTTTTGGTTCTGATTTGAGTCGAATTCAGTGC + 92715 ATTGCGTCCGAATTGTGCAAACTGCGTTCATCTGATGTGTATAAAGGTAAAGGAATTCGT + 92775 CGTCAAGCTTTTAAAGTAGTTTTAAAATCAAGTACTAAATCGAAAGTTTAA +; G-rpl6_2 ==> end + 92826 TTTATGAAGAAAGTAAGCAGTGTTTTTATATTTTATTGTTTTTTAAATT +; G-rps12_2 ==> start + 92875 ATGGTTACAATTAATCAATTAATTCGATTAAGTCATCCTACTAAAAATAGGAAAAATACG + 92935 GTGCCCGCTTTAGACAGTAGTCCATACAAGAAGGGTGTTTGTTTAAGAGTATTTACGATG + 92995 ACTCCTAAAAAACCAAATTCCGCATTACGAAAAGTTGCCCGCATTAGATTATCAAATGGA + 93055 TATAAAATAACGGCGCATATCCCTGGTGAAGGTCACAATTTACAAGAATATTCGATTGTA + 93115 TTAGTACGTGGTGGGCGTGCTCGCGATTTGCCTAGTGTTCGATATAAAGTTGTTAGAGGT + 93175 AAATACGATTTAGAACCTGTACGTAATAGGAGAACTCGGCGATCTAAAT +; G-rps7 ==> start + 93224 ATGGTATTAAAAAAATATAA +; G-rps12_2 ==> end + 93244 AAATTGATTGATTGCGTCTGAGAGTATACATAAGTTTATTTGTGGGTTAATGTTGAATGG + 93304 AAAAGTGTCTCAATTAGAAAAAATTGTTTTTTTTTGTTTTCGAGACTTAAAATATAAGTT + 93364 TAATATGGATTCGTTGTCTCTGTTTTTATATGTTGTAGACGAGATAATGCCTTATATAGA + 93424 GCTTCGTACGTTAAGGTTAGGGAGTGTTTTTTATCGAATACCAAAGCCTCTTTCGGAAAG + 93484 TAAGCAGTTAAATTGTGGCATTAAGCTGTTAGCCAAAACTGTTAAAATTACTTGTGTACG + 93544 CAATGTAGCGGCTGCTATAAAAATACAACAGGAAATTTTAGCTGTTCTTCAAAAGAAAAG + 93604 TTTACTTTTTAAGCAAAATAGAAATCTGTATCAAGTTGCGTCCAACAACAGATCGTTTGC + 93664 ACATTACCGGTGGGATTAG +; G-rps7 ==> end + 93683 TTTTTGACGAATAGCGTGTCATATTGTGTAGTACAAT +; G-trnP(ugg)_2 ==> start + 93720 CGGAATATAGCATAATGGTAATGTATCTGATT!TGG!GATCAGATGAGTATAGGTTCGAG + 93778 TCCTATTATTCCGA +; G-trnP(ugg)_2 ==> end + 93792 AGTAAGGTATTTATTATAATTAGAAGTGTAT +; G-rps4 ==> start + 93823 ATGAAGCGAATTAAATATTTTAAATTTAAGTTTAGGGATATTTCAAAGGAAATTTATTTA + 93883 AGAAAGTCATATTTTAGATTGTTAAAAACAAAGCATATTTTAAGATTTTTTATTGGTGGA + 93943 TTAAAACAACGACAATTAGCACGGATTTACAAAATTATTTATTCTAAACGGTTGTTTTTA + 94003 ACTTTTCTTACGAAATTAGAATATCGTATTGAATTTATCTTGATAAAAGCCGGGTTTGTT + 94063 TTAACCGGAAAACAGGCTAGGCAATTAATTTCGCATAAGCATGTTATTGTGAATGGACAG + 94123 CGGACTCAATTTTGCAATTTGCATATAAAAACATTTGATATTATATCTCTAGAATCAGTA + 94183 GTATTTTCAAAGTATAAACGCAAACTAGTATCAAGTTTTTTTAAAACTCCAGGTTTTTTT + 94243 GGTTATTTACGGCGACGTGGTATAAAAAAGAAACTTACAGTTCAACGTATGTTTATTTAT + 94303 GCTAAATTTCAATTTTTTTGCGAAACTAATTATAAAATCTTTACGATGGTTTTTGTGCGA + 94363 AAGCTTAATTTGCATAAGATTTCTTCGTCTCAAGTTCTTTTAATGTATGGGTGGTGACGA + 94423 ATACGTTTTTTATTTTAA +; G-rps4 ==> end + 94441 AAAACGTTTTGTGATTAGTTAAATTTTTATTATAATTATTTTTAGGTTTATAATGAGTTT + 94501 AATTTTAGGTGATGTGTGTTTAACAATAGTGTGCTTAGTTTAATTATTTTTTTACCGTTG + 94561 TTTAGTAGTTTTTGTTCTGGATTGTTTGGTTGTTGGATTGGAGCTAAAGGTGTTGCTTTT + 94621 ATAACTTTTTTATCTCTACTAGGGTCATTAATTTTAACTTGTAATTATTTAAGTTTTATA + 94681 AGTTTTTATTTGGTTTCAAATTATGTATCCGTATTATCTTGGATGAAATTAGGTTCATTT + 94741 TATGTGACATGGTCATTTTGTTTTGATAGTTTATCGTCGTTAATGGCGGTTTTAGTTACT + 94801 GTTGTTAGTTGTTTAGTTTATCTATATGTGCGTCCTAGGTTATTTTTATTTTATAAAATT + 94861 TATAAAGTAAAGTTGGAAATTTAAGTGAGTTGAAAAAGAGGATTCATAAAAAGTACATTA + 94921 AAGGATTTTTCATTATTCTTTATGAAGCAGGTAATGCTCAACTAGTTAAGTTTGAAAAGT + 94981 AGAATTAGCAATTACAAATATGTATTTAACGCGTATGCTATGGGATTTATTTAATTTTTC + 95041 TACTATTTGTGTATTTGTATAAGTACACAAATAATTTAGCTTTAGTGTAAAATATATCTA + 95101 CTTAATTAAATGTGTGAGGTGAGTGTTAGTAATGGGTAAGCTACGTTAATACTGGTTTTT + 95161 TCGTAGATATAGGATAATATTAATCAATTACATAAATTTAATTGTAAAATTTTTTTTAAT + 95221 AAAATTTATGAACTGAGCAAATGTAGTATAGAAG +; G-orf465 ==> start + 95255 ATGAAAAAATTTAATTTTATGCCATCAGTTTCTGTTTTTTTTCAATTTAGTTCGAATTTT + 95315 TTATTTATCTTTAATTTTTGTTTTTCTTGGGAAGTTGTGAATTGGAATTCGATTAATAGG + 95375 CATCTGTATAAATATCAAAGAGTAATATTTATTAACGTTAAGACTAGGTGGGGTTTGTCT + 95435 ATATGTGATCATTGGGAATTAAAATTAGATGTGTTTTGGTTTCAAATTAAATGTTTTGGT + 95495 TCATTTAGTTTTAATTTGGTTTGTATTAGAATTTATTTCTTAGATTTGATCTTCAAATTT + 95555 TTTTCTTATAGTAAAATTGGTTGATTTTTAGATCGAGGTATACGTTTAAGCTGTTTCGGA + 95615 GTAGAAGGTTATATTCGTAATTATATTTTATTAGTAAATTTTGATTTGAATAATATTCAA + 95675 GAAAAGCATGATTTTTTATGATTATCTAAACGATTAATTAGTTTAGTATGAGAGCCTGAG + 95735 TGTGTGGCAAGATATTTGTTTAACTTTTGTGGTTTTGTAGGAACTCGTAATGTGTATTTT + 95795 ATTTTGCGAATAGTTTACCAAACTGCTTTATGCGGAATTAAATTTGTTTTTACAATTAAT + 95855 TTGTTAAAATTATTTAAATTTATAACAGTCAAACATTTATTTTTTTGATTGGGTTTTCCT + 95915 GTTTATATTTGTAAATGATATGAATACGATAAAAAAATAATATTAAGCAATCTTTCTGAT + 95975 TTTCTTAGACAGGATACAGAAAAACACTTGTTATTTATATTAATTAATAAATTTTTTTGT + 96035 GAATTAGAAAACCACTTAAGGAGATTTTATGTTTTTTTATTAGGTAATATTTTGCCATTT + 96095 TTTTCTGAAAATTTTATAGCGAACTTGGTAGTTTTATGTTATTTAGGAGATCTAATAATT + 96155 ATACATAGGGATAATTTAATCGTTGATTTATTAAGGTTAGAGTTTTTTTACAAATTAACT + 96215 ACGCTTGGAGTCGATGTAGTAGACGAACAAACGTTTGGTTTATCAAATTGTATTGATACA + 96275 ACTAAAGGGTTTAATTTTATAGGTTTTTATATTCGATTTAATAATGCGTTTTTATTTGGT + 96335 GTATACCAAACTAAAAATTGTATAGTTTTGCAACCTTCTGTTGGTTCTATTAAAGGGCTT + 96395 TTAACCGGTATTCAACGTTTTTTGAAAAATAATAATTGTGAACAAGTAGTATTAACTAAT + 96455 ATATTTTATATTATGCGAAGATGGTTTTGTTATTATTTTCCATTCATTAGGTTATGCCGT + 96515 AGATTAATTGTTTCGTTAATTTTTTGTTTACGTCTTAAATTATTTTATTGGTTGTTTAGA + 96575 AAATATGGTCGAATGGGTAAAAAGTACATCTATAAGCAATATATAAAATTTTTATTTAGC + 96635 CAATTTAAATTTTGGTAA +; G-orf465 ==> end + 96653 AAAAATAAAAATTATTTTTTTTTTTGCTATTGCAGTTTAGTATAAATTTTATTTTTAAAA + 96713 AT +;; mfannot: /group=II + 96715 gagctgtaagatgaaaaattatcgtgtacagttcagaagtaggg +;; mfannot: + 96759 ATTTGTTTATTAAATTAAATCTACTATAACTCATTGGCATACATGTTAGAAGATCCTCAT + 96819 ATAGTTAGGTTTTCATGTTATATTTCGTTATGTGTGGCTTGTTAGGTGATAGGTATAGAT + 96879 AATTTTGGAACTATACATTAATTTATTAAATTTAAAAATTATAGTCATAGCTGATTTTAT + 96939 AAGCTTAGATTGAGCATAATTGTAGAATTTGTTAAAATCTATTCTATAATACATATATAG + 96999 ATATAATGCATAATTACATATTTTCTTCGTTACCGTACGGTATGGGTGGTAAATTTTTTA + 97059 TAAATAGCGATAAAAAAAATCGGTCAATAGGACTTATGTTAAATTTGTAGTTATATGGGT + 97119 AGGTTGTTTGGTTAAGTGTAAATAAATGTATGGAACAAGTTTTTATGGCTAAGTTTTAGT + 97179 GTATAAGAAGGATAGGATGAAAAATTAAGTTTGATATTTTTCCATAAATTCAAAAGTATA + 97239 AATTTTATGTTATTATGCTAATTATGAATTTACTGGTAAG +;; mfannot: /group=II + 97279 aagccgtatgattttgaaaatcatgtacggttttgaattagagg +;; mfannot: + 97323 TTTAATTGATCGACTAAAACTTACATTTTTCAGTGCGGCACGTAAAAATTTTGTTATTAA + 97383 AATTTAAAAGAAATTAATACGAATTGTAATTGTGTTTTGATTTTTGTGTAAGTTCTACAG + 97443 TTATCGGATTTTATTATTTATTAATTTTTTAATTTTAAGCTGATTATTTGTATCTTAATG + 97503 GTTGTAATTTAAATATATAGGGATACTGGTTTTAAATGGTATACTTAAAGTATTTAATCT + 97563 ATTAAGTAGTTTCTAGTTTATTATAAATAAACTTTAGATAGTTTTGCTAAATTTTATTTT + 97623 GAAAAGTAAAGTTTAGTTTTAACTATTTATATAAAATGCTAATCCTATTTTATATTTTTG + 97683 CGTAATGCGCGAAACGTGTTATGGTGTAAGTAATAAAAAATTTTTTCATCTAGGTTAAAT + 97743 AAGATATGGTATATATCTTTTATATAGGATAAACTTAGTTTTATTTATTTTTAAATTTAA + 97803 TAAGCTTTAATACAGTTAAGAATTTAGAAAAT +;; mfannot: /group=II + 97835 gagccgtatgctaacaaattagc +; G-nad5 ==> start +; G-nad5-E1 ==> start + 97858 atgtacggttttgagtcag +;; mfannot: + 97877 AAATTTCAGCCAATTTTATTGAAGTTTTATGA +;; mfannot: G-nad5 ==> start Def by similarity + 97909 ATACTGTTAGTGTTAGTATCATCCGAAAATTTTGTTCAATTGTTTTTTGGATGGGAAGGT + 97969 GTAGGATTATGTTCTTACTTATTAATAAATTTTTGATATATTCGATTACAAGCTAATAAA + 98029 GCGGCTATTCAAGCTTTAATGGTTAATAAAATAGGTGATATTGGGGTATTATTAGGTATT + 98089 TGTTCTATTTTTTCATTGTATCGTTCAGTTGAATTTAGTATTATTTTTGCGTTAACTTCT + 98149 TATATGCAAGGTGAATCATTTATTTTATCAATTTTTAATGTTAATGGTTTATTAATGATT + 98209 GGTTTATTTTTGTTTGTTGGAGTTGTTGGAAAATCCGCGCAATTAGGGTTACATACATGA + 98269 TTACCATCTGCAATGGAGGGACCTACTCCAGTGTCTGCATTAATACATGCTGCAACAATG + 98329 GT +; G-nad5-E1 ==> end +; G-nad5-I1 ==> start /group=II(derived) + 98331 gtatgaaatgctaacaattccagtttaggttattttaaattgtatacatttgttaatggc + 98391 aatttttatttttttgcgatgattttggataagtatttgtcgaatttaagattaattttc + 98451 ttgaataacatttttttagttggataaggttatacgaattttttaatgataacggtgttt + 98511 ataatttgttattgtatttgtgagatacataaggtgttggcgattcatgattttttttga + 98571 taaattaaagcctaataaaatataatatgctgaaatttaattatgcggtcatgatcttta + 98631 tattgttaatgtgatacagaatatccagtgaaatatattaatagtctgtgcctagataga + 98691 tctaaacgttatttttgtgtaatgtaggttaggaaatatgttagtggaataattgtgaaa + 98751 ttaattcagaaataatatttaatttaaataattaagtattattgaatatttttgtaaatt + 98811 cataggaaatatttaaaaattgcttgttaattgagtatgtttaaaattttgatagtttta + 98871 gctgatattatttaatagagtattattaattggtattttaatggattttctgaaatttta + 98931 aagctgaatgcaaagtaatttgctcgttcagtttaatgagggtttaaccgtaaagtctta + 98991 aactacctttat +; G-nad5-I1 ==> end +; G-nad5-E2 ==> start + 99003 AACTGCTGGTATATTTCTTATTATTAGGTGTTCAGAATTTTTTGAATATGTGGATTTTAT + 99063 TTTAGTTTGTCTAGTGTTATTAGGGGCGTTAACTGCTTTTTTTGCAGCAACTGTTGGTTT + 99123 ATTTCAAAATGATCTTAAACGTGTTATTGCGTACTCTACGTGTTCACAACTGGGTTATAT + 99183 GGCGTTTTCTTGTGGATTATCTGCTTATTCTGTAGCGTTTTTTCATTTAGTCAATCATGG + 99243 GT +; G-nad5-E2 ==> end +; G-nad5-I2 ==> start ;; mfannot: no intron type identified + 99245 ttgggcgaagttctagtttaattaaaatataattttttataaatttgatgcaaaaaaata + 99305 ttaggataatttaaaacttagaagttgagtgaaattacgaatatatagttagtagataat + 99365 aaaatttctatatggttatgaacccatattctatagagtatatttaataatttttttatt + 99425 tgatgcataatgataataatacaagagctaatttttcttttaaatattaaactgaagatt + 99485 tatttattaatttataattttattaaaagtagtttataataaaattggaataaaaagatt + 99545 tctatttgagagtaatgcatagaatggtttaaatgatacttttttagaattaaagttgaa + 99605 aaaaatttgatttattttatcgtaatggagttataagcatacgggtacgtgttttaaatt + 99665 atgatttaacaaataaatgaaaaatacttgtggaagcttagtgtattgagatatactagt + 99725 tgagtttcaaagggggatttaataagtaaggattccatccaa +; G-nad5-I2 ==> end +; G-nad5-E3 ==> start + 99767 ATTTTAAGGCTTTGCTATTTTTAAGCGCAGGGTCAGTGATTCATGGTTTTTCTGATGAGC + 99827 AGGATTTACGCCGTATGGGTGGATTAGGTAAGGTTTATCCTTTAACGTATTGTAGTATAT + 99887 TAATTGGATCGTTTGCTTTAATGGGTTTTCCATTTTTATCTGGTTTTTATTCTAAAGATT + 99947 TAATTTTAGAAATTACTTTTATTCAACATACTGTTGCTAGTTTTTTTGTTTATTGTTTAG +100007 GAGTGTTTTCCGCATTTTTTACTGCATTTTATTCTTTTCGTGTTATTTATTTAACTTTTA +100067 TTGTTCCAACAAATAGTACCCGGCAATTTATATTACGTATTCATGAATCTCCGCTATTAA +100127 TTATAATACCTTTATGTATTTTGAGTATGGGAAGTGTATTTAGTGGTTTTTTACTTAAAG +100187 ATATGTTTATAGGTTTAGGTTCAGTATTTCTAGGAAATTCTATTTTTAGAATGGCCGGTA +100247 GATTTGATTTAATAGAAGCAGAAATTTTACCTGTAGAAGTTAAATTGGTACCTTTAATTG +100307 TTAGTTTAGGTGGAGTTTTAGCTGTTATATGTATAAATTATGTCTATAGGCAAACTGCAT +100367 TTTACTTAAAGATTAGTAATAAGTACCTTATGAAGTATTATTCATTTTTTAATCAAAAAT +100427 GGTACATTGATGGTATATATAATGTTTATTGTATAAAGCATTTTTTTAACTTTGGGTATT +100487 TGGTGCCTTTTCAGATGCTAGATAAAGGCTTTATTGAGTTAGTTGGACCATTTGGTGTAT +100547 CTTCTAAATTTAATATAATTTCAAGAAAAATAAGTGAATTTCAAACTGGATTAATATATC +100607 ATTATACATTTGTTATTTCAGTTGGTGTACTTGTTTATATCAATATATTATCAATTTTTA +100667 ATGCAGTTTCAGTATTTATTGAATTAGAAGGTATACTGGTATATATTTTTATTTCATATA +100727 TTATACTGTTATAA +; G-nad5-E3 ==> end +; G-nad5 ==> end +100741 ATTTAGTAGTGAATG +;; G-nad6 ==> start +100756 ATGTTAGTTTTTTTT +;; G-nad6 ==> start ;; 4,70 +100771 CAATTTTTCTTTTATTTGTTTTCGAGCGTTGCTAGTATTTCAGCGGTGATGGTAATCCTA +100831 AGTACTAACGCAATCTATTCAGGTTTATTTTTGATTTGAGTTTTTTTTAACTCAGCTTTG +100891 TTGTTATTACTTTTAGATTTGGAGTATTTAGCTATAATTTTTATTATAGTTTATGTTGGT +100951 GCAGTTATGGTTCTTTTTTTA +;; G-nad6 ==> end +100972 TTGTGCGGATACGAGACATTATGGAAAAATTGTTATAAAGTGTATTATGAAAAATTGACT +101032 AAAATTTAATATTTTAAGGAAAAGCGTTAGATGTCAAATATTCTAATTAAAATCAAGATC +101092 TAAATTTATATGCAATGTACTTTTAAATTTGAATTGTAAAAGTTTTTATATTTTGTTTTG +101152 TTTGATATGTTTTAATGTTTGACTAAAGTAATGTTGGGTAAGCATGAAAAGTTTTGTGTT +101212 ATGTTAAAATTATAGGTTTAGTCTTGCATTTAATATGAAAAAATGTAGTATAGCTAAGAA +101272 CTACTTATATTGAACAGATTAGAAAAATTTAAAAGGGATTTTATTTAGGAATAAGTTTTT +101332 ATAAATGTTTAATGTTATCGTGTTGATTTGGGAAAAATTTTTTTCAAAGAAGAAGGTAAA +101392 ATCATAATTTTTGTAAAAAAGAAT +; G-orf688 ==> start +101416 ATGTTAGTTAATTACAGATACTTTAAAAAGTGAATTAAGATTTTTAGAAATGAAATTTCT +101476 TTAAATTTTTTATGAAAATTTTTAGGGTTTGTACCTCTGAATATATTAAAAACTCAGATA +101536 CAGTTTACAAATTATATCAGTTATGACTCTATTGATGCTAAAGCTGATTTAGTTATAGCA +101596 TTGAGTTGTCTTAAGAAAATTAATGGGAAGTTTAGAAAATTATTTATTCGTTTTATTATT +101656 GACCCTGAGCTACTGTGGTTAGCCTATATTAATTTAGTGATAGTTGGAGTGAAATGAATT +101716 TTTAGAAAAACGCGCAAGTTTTTACTATATAGTTTAAGTTGTAAATTTTATTATTTTGAT +101776 AACTTAAGATTTCTTTTAAGAAAATTAAATATTTATAATGAGAAATTATATTCTGATGAT +101836 AGGCTTCAAATTACATTGATACAAGAAAGCATTCGGCTATTATTTACAGTTATAATTGGA +101896 GATTACGTATATTATTTTGGAGGTAATACTTTATTTAAAGTTAAAGATGTAGAAGGTATT +101956 GGATTTGTATTTGAATATATTCGACGAAATTGTGGTTCTATGCGTTGATTTATTGAGTTT +102016 GTTTTAAAGAAAAAAAAAATTACCCTGGATATTTTAGTTTTTTTGCAACGTTTACTTAGT +102076 CTCTATGTAGATGATAGTCAATTTGTAGGTTTTTTATTTAAATTGCTTAAAAACAATATA +102136 CAAACTGTTAAACTAATTGATAGCTATCAAGTGTTAAAGATCGATTTGTTAGATTGGTTA +102196 TTATCGAAATTTTATTTTTTAACTTTAGATAATTTTGTTGAGAAATTATTTGTAAAATGC +102256 ACTAATACTAATTTTTGTATTTTGAATAGTGCACCGCAATACAATTTAATATTTAAATTT +102316 CCTTTTTTGAATGTAAAAGTTTTTCCAAAATGTAGTAATGGTTGTGTATTATCTTTAAAA +102376 TATATTCGGTATGGATCTAATTTTTTAATAGGTGTTAGTGATACTTCTAAAAATATTGTG +102436 GATTGAATTAGTAATATAATACTTAATTATATAAATTCTTTTTTGTATCTTGACCAAATT +102496 TTAGTTGTAAAAAAAGTTATAATCAATAATTCTCTAATGATTAGACTTTTTGGGATGCGT +102556 TTTGAAAAATGTCGATATAAAGATTTTGTTAAGAAAGTTAAAATGTATTCTTTGAAAAAT +102616 AAAATGAATTTAATTTTTTCTAAAATACATTATTTGCGTTATAATCTTGATATAGGGGAT +102676 AATAAATTTAGATGTGAGCAACATAATATATTTTGTTGAGAGTATAAACAAATAGCATCA +102736 TATATGTTTCCTGTAAGCATAAGAAAAAAGATTTCTTTTTTTTGTATATATGTGTATTTA +102796 CGTGATAAAATAAATGATTTTAGTGTAATATTTTGAAATATTAGTGCGTTAAATTACTTT +102856 GTAATTAATTATGTACCAAGGAAATTGCAAATTCCGTATCGTGTTTTGCTTAGACTGATT +102916 AAAAATATAACCTGCTCTAGGTATACTGGATTAATAATTAATTACTGTGTTATGTTAAGT +102976 TATTTATTATGGTTGCAAGAATATAGTAGCGGCGTCTTATCAAGAAATTTAATGCTATAT +103036 TATTATCCTAATTTTGAAAATCAGTTTATTGTATCTAAAAAGTTTACTGTTCGGTTACTT +103096 GTAGATACTGCTTTAGTATCGCGTTGGTTATATAGTGTTGGTATTATTAATAAATTTGGT +103156 TGCCCGCTAGTTAAACGTAAATTGATTTTGTTAGAAGATTTTATCATTGTATTGTATTAT +103216 CGGAAGTTAGCTTTTAAACTAATAAGATATTATTTATATGCTAATGATTGAGTTAAATTA +103276 TATAGTATTTTATTTAAGTTAAAAATTTCGTTGATGAAGACTTTAGGGGTTAAATATAAA +103336 TTGAATATGAATGTAATTAAGCAAATTTATGGAGATTCGATCTATTGTTCATCTTTAGAT +103396 GGGAAGTTTATATCTTATTTTTTTAAAACAGATTTATATTTATATAAGCGCAAATTTTTG +103456 ATAAATTTTTTCAGATGAAAACAGTAG +; G-orf688 ==> end +103483 AATATAATGTATAAAATGGTAGTTTAAGAGAGTTAGAGAGAGAGTTAAATTTTTTGTTAG +103543 TAATTTTCTGAGGAAGTAATATATTG +;; mfannot: /group=II +103569 gagctgt +; G-orf132 ==> start +103576 atgataaaaaattatcatgtacagttttggatgggag +;; mfannot: +103613 TTAATTGCTTACCTAAAATC +;; G-nad6 ==> start ;; 72,199 +103633 ATTGTAATGATGTTAGACGTTAAATATCAATCTATTAATCTTGAAATGGGTTATTATCAT +103693 ATTATTGGAGGAATTGTGTTATTATGTTTAATGGTAAAATTTGTAAATATTTTAGTAAAT +103753 GAATTAATTTTCGAACATGGTTATTTGATGGGGATAAGTGTAGATTATCTAAATTGGTTT +103813 GATTTAATTGTAGAAGTTGTAAATATTCGTAATATTGGGTTGCATTTGTATAATTATTTT +103873 TTTATTCCTTTTATAAGTGCTGGGTTAATTCTTTTAGTAGCTATGATTGGGGCTATAAGT +103933 TTAGTTTTGCCTTCTGAGACCTCGAAC +;; G-nad6 ==> end +103960 TTGAAAAGTTATTAG +;; G-nad6 ==> end +; G-orf132 ==> end +103975 TTTTAATTATTAAATAACATAGAAATTGGGAGATTATTATCTTTATTTGTAGAGAGTACT +104035 GGTACTTATAGTATTTTAAAC +; G-trnR(ucu) ==> start +104056 GCATCTTTAGCTTAATTGGAAAAGCATTGATTT!TCT!AAATCAATAAATATAGGTTCGA +104114 GTCCTATAAGATGTA +; G-trnR(ucu) ==> end +104129 GTGCAATTTAATTTCAGAATGTAT +; G-nad3 <== end +104153 TTATCATTCTAATGCACCGCGCCCTCATTCGTAGTAAAAACCAATAGTTAATAAGAATAA +104213 AAATAAGATCAT +; G-nad3 <== start +104225 ATGATAGAATACAGAATAAGAGTTTAAAGTAAGTGCCCATGGAAATAGAAATATGATTTC +104285 TAGATCAAATATTATGAATAAGATAGCTATGAGATAAAATTGTACAGAAAATGTATGCCT +104345 TGCATCTCCAAAAGAGTG +;; mfannot: G-nad3 <== start Def by similarity +104363 GATAGGTTTTATAAATTTATAATTCTATTT +;; mfannot: +104393 accttcctagaaacttaacaagttatttttaaacattaagctt +;; mfannot: /group=II(derived) +104436 TGCATAGAGAAATTTCAAATTGAGGTGTAGAGATTTTAAAATATTAAGGTAGATTTTAAT +104496 TTTTAGTAAATTTTTGAAATTTAGTATGTTTTTATTTATTTTAATATAAACAATATTTTG +104556 TGTACGTTAAAAGTATAAAAAATCCATGAATTCATATAACCACTCATATGAATTTGCGAT +104616 TCTATCTTCTTAGAAAAAGATGATTTAAATTTTTATTTTTACTTTTAAAAAAATTAATTT +104676 ATAGTTTTGTTAATTATTGGTGTTATTTATTTGACTAAAATAAATAAATAAAAATATTGT +104736 TTTATTACATATTATATTTTGGTGTATTAGATACATGTTGTATATTTTAGAGGTTATTGT +104796 AGAAATTGATGATTAGTTTTTTTTAAATTATGCTATAAAAAGATAGAAAACTGCGACACG +104856 GTCAAAACCACATTCGTATGCTGAGTATTTGTCGAAATTACTTTTACGTTCACCAGCTTG +104916 AATAGTTAAAAATATAAGAAGTATAGTTATTAATGAATTTATAATTATAAATAATAAAAG +104976 TGAGTAATATTCAATAAAATTTGTAGGAATTAAAAAATAAGACATGATTTTAAC +; G-atp6 <== end +; G-atp6-E2 <== end +105030 TTAATGCGATAAATTGATTGCGTCGTTTAAATATAAGCATATTAGTATTGTAAACACGTA +105090 AGCTTGTAAAAGTGCTATTCCTAATTCTAGGACAGTTACAGCAATTATTATTAATAGTGG +105150 GAACATAGCTAAAATATATCATAATCCTCCAAAATTTATCATAGATCATGTAAAGCCAGC +105210 TATAATTTTTAACAAGGTATGGCCTGACATTATATTGGCAAAAAGTCGAATTGAAAGACT +105270 AAATACTCTGGTGATGTAGGATATTAATTCAATTGGAACAAGAAGGGGTATAAGTAATGC +105330 ACTGATACCATTTGGTAGAAAGAATCCAAAAAACTGGAATTTATGTCGTGAAAAAGCTAT +105390 TAAATTTATTCCAATAAAAAATGTAAGTGCTAAACTAAACGTTACAGAGATGTGGCTTGT +105450 AATAGTGAAAG +; G-atp6-E2 <== start +; G-atp6-I1 <== end +105461 agtaagaatttttttatcttctcgcagaactgtacgtgagtattattactcatacagctc +105521 tttttagattttatataattatcttataacagtaacgtgagaatactaagttcaatgttt +105581 ttgaaaaattaactttttaatttttgttaagcttttgcttaatagagagtacttctttat +105641 tattatttgtgtttatttttatcttgaaatatttga +; G-atp6-I1-orf499 <== end +105677 ttatcttttgcatttaaaagctaatgtacaatataatgatttttttagatgataatcaat +105737 aagtttttgaatttcttgtatgttatctgtatatttatagtaaaccgataagtttgatgc +105797 ttgtaacgcatatcatcatattattttactaattggtccatatgatatgactatacaatt +105857 ttgtggttgtccatttgttgaaagtatacctttatttttccaattttgttttattttttt +105917 aataggcgcatataattttaaatagctttgttgtttttgtatggctttggttattgaatt +105977 ttcatagatttttgatagtcaatttttttttattaaaatgcttttatatgttttaatttg +106037 ttcttctaaaatatgtgttgtagctaagatttttttgggaagtattttgttgatcgctat +106097 tagcgagtttaatattgttttggagaaaattggttttaatgaattgagattaagggtttt +106157 aattagtgcagttaggtactcttgtgtatgcagaaaattgagattttttgaatttttaga +106217 tatagatttatataaactaaatttcaaatttttcttggttttttttaaattttgatgctc +106277 cgtattattttttattcactttcaatattttgttaatcttgctatatattctgttggatt +106337 attgtttatatgatttttttttttatgtatgttaactccaagaaaattgacataatgttt +106397 gtttgcttgggagcatattgggttgattatcgaaattaagggtaatttattttttgaaaa +106457 tatttttattttattatatataaattttactaagtttaatgatccataaaatccgagtag +106517 taattcgttgttatatctatgaaattgtattttaaaagcgctatgtatattatagcgatt +106577 tatatttgataaaaattggtctaaagtaagtaagtatgtattactaagaattgaaaaaag +106637 attgcttactttgaaaactgaattgctgattgtatctgcgtattctttgattaagatatt +106697 tataaattgtttatccatgattattttttttagcggtttaaaaagttttataaaatttat +106757 gctagtaatatttactgttatatttaactttaaaaatcattttatattgtttcatgagtt +106817 ttttatattgaaaagtatatcatgtggtccaaaatttgtttgtattccaaatgagttttt +106877 gtggaattttggttctaaaatagctaataaaattatttgtaaagctttatgtttcttaaa +106937 aagaaatggtggagtatttttttgtcacatttctatgaggattatcaatctaatgaaaat +106997 tttatttaattttttgtattttccttttttatagagcttagttattcatttatttacaat +107057 tatgttaatagtttttgttttagtattgaaattgtaatgaattattttttttggacaaaa +107117 tttaattaattttattatctggttcaaattggggtatatatattttataagatgtttcat +; G-atp6-I1-orf499 <== start +107177 ttttgtttctaaagttttctacttatatattttataattgtgtagaatctcaatgcaacc +107237 tttcgatttttttgaaaaatctatagaaaggcgctatagtagtttctcatagtttatcat +107297 aaaatagtttgtattttttaaaaaaggttatgattttatcataaaataaattagaatttt +107357 atagtattattagatttttttaaaaaaattgctgtgtcaaaaatgcttctgtttttctag +107417 caattgtaattatcgatttatagttgtgttagaattaactttgtaaggtgaataaaatgt +107477 tatttaattatgcagagtttatggtaggtttgtttaatcaaccttttagatattctaaac +107537 ctgtttctttaatatctcttgtatttaagatagtttggttagtaataacgttaacatgtt +107597 aacaggcagtagccattataatttgtgagaatcatactccgaataagc +; G-atp6-I1 <== start /group=II ;; mfannot: splice boundaries uncertain +; G-atp6-E1 <== end +107645 TATAGGGTATCATCCCTAATAGGTTAGTTAATGCTATAGAAGAGAATAGAGAAAAGATTA +107705 AAGGAAAATATTGTAGGCCTGCTTGTCCTATGTTTTTTTCAATAAGATTTCGGTTAAAAA +107765 TTAATAATTCTTCGAGAATGGATTGTCAATAAGATGGAATAATTTTATTATTGTAGGTAA +107825 TAATGTGAAAGGTTAGGAGTATACTTGCAAAAGTGATTATTGCAAATATAGTAGAATTTG +107885 TAATAGTTAATTGTTGGTTAAAAATCTTAAGATTTGGAAGTAATGCAGTTATTTCGAATT +107945 GTTCTAAGGGTGAATGGTGTAGCAT +; G-atp6-E1 <== start +; G-atp6 <== start +107970 AATATTTATATTTTTTAAGAGGTTGTTGGTGATACTTAAAGTAATTTTATGATGTATACG +108030 TTATATAGTGAATTAAATATATTTTTTTC +; G-rps10 <== end +108059 CTAAATTTTTGTTAAGATTTTTTGTTTAAATTGTATAGTAAGTGAATGCGGTAGATTTTT +108119 TGAAGAAAAATTTAATATTTGCATTAGTTTTTGCATGTTATATGATGATATATTAGATAT +108179 GTATAGTGTGTATTTATAGGTTCGTATTTCTAATTGGGTACGTGCTGTTTTATGGACATG +108239 TGGTGATTTAAGTATAGTAAATTTTTTGATATATAAAGGTAAATAGGTTTTTGTTATTTG +108299 TAAATTTAAAATATTATTTTTTTTTAAAAGAAAAGCAAGTAAAGAAAAAAAATGTTTTAT +108359 TGTGTTTGTATTTAAGCTGTTAGCAATAAATTGAATTGTGGTTTTAAATTTCAT +; G-rps10 <== start +108413 TAGTATTA +; G-nad2 <== end +108421 TTACAGTAAAATACTTAATTCGTGAGTTGGAAGTAATAAAATGTTTGTTGTTGTCAGAAA +108481 TAAACTGATTAATAGTGATCCAGAAAATATTATTAATATTGCAATAGTACTATCTAAAGT +108541 ATATTTTTTATAATTTAAATTTATAAATTTTTCGAAATTAATTATTTTGATTAATCGTAT +108601 ATAATAATAAGTACTAGTTGTACTTGTTAATATACCTAATGCAACTAGAATATAAGAATG +108661 TAAATCAATTAAGGAAAGAAAAATTTGAAACTTGATAAAGAATCCTCAAAGGGGTGGGAT +108721 TCCAGCAATTGAAAATAGTAGTAGAATAAATGTGAATTTATACAATGGATGAATTGTGTT +108781 TGGATTCAATAAGTCTGTTAAGTAGATTAAATTTTTATTTGTGTTTTTTTTAATAAGTAT +108841 AAGAAGCCCAAAAAAACAGAATATTGTAGTTACATAGATTAAAAGATAAAGAAAAAAAGA +108901 ATGCAAACCGAGCATAGTTCCAGTTGAAAGTCCCATTAACATATAACCTATATGGCTTAT +108961 AGAACTATAGGCTAAGAAGCGTTTTAATTTTTTTTGATATAATGTTGCGAAACTTGCAAA +109021 AATTATGGTTAAAATGGAAGATAATACAAATATTGGGTGTCATAGATTATGGAATTGTAA +109081 AAATACTGAGAATATGAGTCTGATAAAAATGCTAAAGATAGCTATTTTTGGAATTGTCGA +109141 GAAAAAAATTGTTATAGATAATGGTGCGCCTTCGTAAACGTCTGGACTTCAAATATGGAA +109201 AGGTACCGCAGTTAATTTAAGCAAAAAACTACAAAGCAGAAGAATGAATCCTAATTTTAA +109261 AAGAATTGGGGTGTTTTGAATAATAATTGTTAACTGATATAAATTGCTAAAATTAGTTGA +109321 ACCTGTAAGACCATATATTAAAGAAATCCCAAAAAGCAGAAAACTAGATGAAAGGGCGCC +109381 TACTATAAAATATTTTAGACCTCCTTCTAAGGAAAATCTAGAAGTTTTTTTAGAAGCTGT +109441 TAATATATAAAAACAGAGGCTTTGTAATTCTAAACTTAGGTAAAGAATCACGAAATCATT +109501 AGCAGAAGTTAATATAAGCATTGCGCTAAGGGAAAGCATCTTTAAGATGATTGATTCAAA +109561 GTCAGTTGGGAAATTTTGTTTTTCGGAATATTTGATTAAGCTTAAGAAAAAAATTGTGAA +109621 TAAAATTATTAACAATTTTATGTTAGTTGTATAATTATTTATAACTAAATTTTCATAACA +109681 AATTGTTTTTGTTATATTTAGGTTATTAAGCATTAGAATAAATCCTAGTATTGTTGTAGT +109741 TATTACTAAAGTAATAAGATTTTTTAGTATAGATTCAGCATTTATTTTATGCATAGTAAT +109801 TTGAAATGTGCTTCAGATTAGAAGAAAAATAGTTATGCTTCCGATAAAAATTTCTGGTAA +109861 AAGAAAATATAGATCTGTATTGAGTTGAGTCAT +; G-nad2 <== start +109894 ATAAGAAAGTTTAATTTATTTAGATTTTGGGTAGATAGTTATTTAATGGGGTATTTATAA +109954 TAAATGGAGGTGGTAAAGTATTGTAATGGTAAATACATATAAATTTA +; G-nad7 ==> start +; G-nad7-E1 ==> start +110001 ATGGAAAAACGATTAATTAAAAGTTTTACAATGAACTTTGGGCCACAGCATCCAGCTGCG +110061 CAT +; G-nad7-E1 ==> end +; G-nad7-I1 ==> start /group=II(derived) ;; mfannot: splice boundaries uncertain +110064 ttacggtagtaaggttggttaattttattgtttatttttgttagcattgtgtaatgaatt +110124 tttttaagataatgttttttaaaaaattttatatgagaatcaatttataatatatataca +110184 tgcattctataactttaatttttgattaatttttaatctgatgggtatgtttaagaaaaa +110244 ttttatatgtttttagaaattagagattaatgcgtgttttaattattattagtaaatata +110304 atacctagggaaatctgtgatattttattcgataaagaagaaattttaagtaatctatac +110364 ggaaatttttcagtttttaatattgttgaagaaaattatttagatattaagtttttacga +110424 cgtagttagcttgttattatataaattttttgatctcttgcagaaaatttgaaaaagtag +110484 atgattgaaaatgttttaattttttgaagtaaaatttagattaacaagataattcaattt +110544 agcttagagaatggcaattattaaatcggtattaatttaaagatatttttttattttttc +110604 gattttaaaatttttctctttaggttgagccgtatattcatggaaattatatttacggtt +110664 ctttgaaaagggatttctctctatttcagt +; G-nad7-I1 ==> end +; G-nad7-E2 ==> start +110694 GGAGTGTTGCGATTAGTTTTAGAATTAGATGGTGAGATTGTAAAGCGAGCTGACCCGCAT +110754 ATTGGATTATTACACCGGGGTACTGAAAAATTAATAGAGCATAAGCTGTATATACAAGCA +110814 TTGCCATACTTTGATAGATTGGAT +; G-nad7-E2 ==> end +; G-nad7-I2 ==> start /group=II(derived) +110838 gtataagggctttaactaaataatataattgtattgagtcaggatttatagttaaattta +110898 ttttcgttttttttatatagtataaaaattttatcttaaatgcaacgcactaaatattta +110958 atttttgcttgtataaagaaccttggtatgattaaataattttataaacaattaagttgg +111018 ctaatatgttatattttttacattgcagattaaattagtataataatttttatgtaataa +111078 gtattttataggatctatatgcattaatcaagttaacttactaataatttttgaaaatga +111138 tatgagtccagtattaaatgaaattatggttaaattataaaataggtatagtattcttgt +111198 ttataagtggaaagttaatatattaacaagttaaagtttatttataaccttaattggttt +111258 tatgtaaaggatttttgcagtttatagtgtggtatgtaaaaagatgtgatttactttatt +111318 aagtaaataagtttacttgttgtaatataaataatatatggtgaaaactagtaaatttaa +111378 taattatcaaagagccaagtattttgtaaaaaatttgtttggttcggaatgggctaaaaa +111438 attatgctatacatctaattttgatatataggaaattcaaacagctaccattac +; G-nad7-I2 ==> end +; G-nad7-E3 ==> start +111492 TATGTTTCTATGATGGCACAAGAACATGCATTTTCATTAGCTATTGAAAAATTGTTAGGT +111552 TGTATGATACCACGCCGTGCCCAGTATATTCGTGTTATTTTTTTAGAAATTACAAGAATT +111612 TTAAAT +; G-nad7-E3 ==> end +; G-nad7-I3 ==> start /group=II +111618 gagtaggaagctgttagattagtattttattggctagttattgtttaagtaattaggctt +111678 tttaatgtatttaaacttaagcctaaactatcgtaactttgtatttcttttgaaagagaa +111738 atttaagtatagtactcctaattagatagtattttgttccaaaacaaataaaaggaaaga +111798 ataacttttaagttaatatgtttttattttttgacatattttttattgtgggttgggcgg +111858 acagtaaaatttaattaattcatgttatatttgtaagtaaacttaaaataggtataggtt +111918 taatattgaaaagaaaagtttaagttttgattttattataaatgaaagaggtatttggta +111978 caatttaataaaatacagtaaatatacaatagtgtttttattaccatttttgttagaatt +112038 tagattaaaaatatataaaatcgttttacttaatagtggtataaattcagaaattttcac +112098 aaatctttattatatggggatagcgaaattagtgaacaattttttct +; G-nad7-I3-orf505 ==> start +112145 atgtccaaaaatagtttcttgagattgcgtttaaggcatagagagttagttactgattgt +112205 aattttttttgtcaattatttttggaagcacggcaattgcttagtagtgatttttttcct +112265 acagaaaagtatagttttagattaaaattaattagagaagtttttaagttgcaaaaaaga +112325 attgctcagttgggaaatataggaaatacgaatggagcattatttttaataaataaatat +112385 gtatctcatttatgtgtacgacttttcgtcataggcgcgctaaaaggtagcataagtgtt +112445 aattttttatttcattttaaaattttagaagtatttgattgcttttacatattaaaatat +112505 ggttggtttttaattggacttaaatcattatataatgtgaaaaaaatttattttaaaaaa +112565 gggaatgatagcgtatatagcgttttacttagctctgtttttgacaaaattgcacagcga +112625 caaattttgattttattagacccattagttaatgcaatttcaaaatttaatcgatatggt +112685 ttaagtcgtgagcggttttatagacagttggtaaatcactcggattttttttttttaaat +112745 aaaaattttataaaattgagattattaaaatttgaagttagtaattgatttagtaaaata +112805 tcacatgtatatttgtataattatttaccttggccgcgtggatataaatatttactagag +112865 cggtgattgaacccaagcttggcggtaaagataagaaaaaataattgtagtaagatttta +112925 acacaaggtataatgcaggatttgatacttggtcctattatatttaattttattttaaat +112985 agtttttttaaatttttattgtttaaattgaattatagaacaatttttgtgaaacattta +113045 cgtgtgtttattataggaagtattattggggttattactgtttctaattcagttttgagc +113105 cgtttttattcaaatattattaattatttaaattttagaaaaattgtaaattatgatttt +113165 ttgaagatgtgttatgttgatttttttcatgtaagaaaatttaattttttaggttgacag +113225 gcgttttttactcgtagagtttgaataagcgtagcattgagcaaaaattatttgggtttt +113285 agatatcctttgaagagaagtgttactggtttacttgaatggcgaccttgtttttatcat +113345 cgtttgggatttaagaaggcaattaagtgggaaatttttaaatctccttataatatacga +113405 attattatatttgttaaagtatatttaattatgcggaattttgtttggtattatttatat +113465 atagataattttattatatactttatattgttgtgttgttttgtatctaggtgcttgagg +113525 agaaatttaaaatataagatgtattgtggtaagagacgattattttctaatttgatttga +113585 aaagtgtttttcggattagatatggtctttttttataaaatttgaggatactgggtgaga +113645 atatttttaaaatattaa +; G-nad7-I3-orf505 ==> end +113663 tttggttaataaataagatttttagttaagattagtattagaattttaacatttagtttt +113723 tgatttttattttaaattgtatgtattttaaaaaatttaaaaatacaaattaaaagaaga +113783 tattaataaaattaaggggaaataatatatttaagctttagttattaagttaactttggt +113843 ttagatttaaggcgctatataaaggttttattgtttaaaattgttatatttatatatttt +113903 tagagatacatattttgaaaaattcggttcatgatgagcctaatacggggcaactcgttt +113963 gtttggttctgagaagaggaaatttagtacttacttctat +; G-nad7-I3 ==> end +; G-nad7-E4 ==> start +114003 CATTTAATGGCATTAACTACTCACGCAATGGATGTGGGGGCGGTAACTCCTTTTTTATGA +114063 GGATTTG +; G-nad7-E4 ==> end +; G-nad7-I4 ==> start /group=II(derived) +114070 gtgtaatttgttttttctatttatataatagttttaatttagcaaaaaatatttaaaaaa +114130 ttgattttaggattttgtgaatgcagagattagtacgaattttctaatatttatggctta +114190 tgatacaaatatgtatcgaaattcaaattatttttttgtggttgttatactataaaatat +114250 agtatttaattttacacaggctgttaaaattaataatgaatgtgttcacttagattaaat +114310 tagtttatgattagggcataaaaaattacttaaaaattttaaattagcggtctaagctaa +114370 cgtgattaagtatagaaatttgtgaattaaaaattactattaataaggtatttaggtacg +114430 aatagcgtgtttttttaattagattaggtattataatgtaatattga +; G-nad7-I4-orf511 ==> start +114477 atggagttaatatttcaattatatgaagtaaaatttgaaattaataaaattaataaaata +114537 ataggtaattattttagatatttacgatgacctattggattaggtgtaggttgtttgatt +114597 agaaagatagccatttttattcaatatttgattcgtgtgggttttattgtaaatgatgta +114657 ttttgtgttaaattgcagagagaatttttttgctcgataatcaatcgacttcttgttgta +114717 gattacatatcgcattttgtttacagagcattgaagaaaatttttcaatttttaatttga +114777 gagtgaagatttgaaataattaataaatttaagcaaaataatttatataactatttaaat +114837 aggaacatttcttcaattttgttttatgaagaggcagcacaagttcttattttaaatact +114897 acagtatcttgcctagaggtatttactgtatttggattattcattttccaacatattcga +114957 agagtttgtgtaacaatgaattatttaggtcgttttttggttcctaagctgaacactgtt +115017 aactactcaattattaaattaaaaattaagtgtattctgaagacatttttttgtaaacag +115077 ctaaaagggcctgctttaatacttattaaatatttcaaatttattccaaatttatggaag +115137 agccaaatattttttaaacagtgagagtgttacaattgagtgtctgcattcagtttatca +115197 agtatgtggaatcagtttttatttgatttggtgttagcagatttcgaatttgtaattaaa +115257 gaaagaatttttaatttttattatagaaaattttttgggagctatcagaattttaaaagc +115317 aaatttcaatgcggagtaggtttattttttgttaaatgtgtagatgaaatattgatatgt +115377 tgtgaaaattatgaagagaaagattggataattggggtattatttgaaattttagtagat +115437 aaacagttaattatagattttttaagttctaaaattttactaggtcagcgcaatactaat +115497 tttctttatttaggttttgagattagaaatcatgacgtaagaagtagaaataagtacatt +115557 agactatgttgtgtaaaattttgaggtgatttagtgattcttccatgccaacgttcggta +115617 ttgggattgaaatcagagttaagaggggtgttatctaacgttaacgcctcggtttcttct +115677 ataatttaccgcttgaaccgtattgtttatcaatgaggtatgtattattcatttagtatt +115737 tcaagtgttttatgtctcttattggacagttttattcattttagggtttggagattttta +115797 aaacagaaattttctaaaataggtaaaacatatttagcagagcgatttttttttaccggg +115857 aatctgaaatatcaaacaaattttaagaaatggcattttcatgatgttttatctgaatca +115917 acgcagaatttattgtttaataataaaatttggtttatctgattagtgtctttaaggcaa +115977 ttttgttctataaaaagattttactatattcaataa +; G-nad7-I4-orf511 ==> end +116013 atattagagttatttgttggaataaaatttttctattgattttacatattgtaatatgta +116073 aaaaaagatataatacttaatttgtagcagcaacgcgcataagctgaatactataagaat +116133 agtttgttcagttttatagggaaaagtttttgcaacaacgatttatcctaat +; G-nad7-I4 ==> end +; G-nad7-E5 ==> start +116185 AAGAGCGGGAAAAGTTATTAGAATTTTATGAGCGTGTTTCGGGAGCACGGATGCATGCTA +116245 ATTATATACGGCCTGGTGGTGTAAATAGGGATTTACCATTAGGATTTTTAGAAGATTTAT +116305 ATACATTTATTGTACAGTTTGGTTCACGAATTGATGAAATTGAAGAATTGTTAACGTATA +116365 ATCGTATTTGAAAGCAGCGATTAGTTGATATTGGTATTGTATCTAAAGAATTGGCTTTAG +116425 ATTGAGGATTTACTGGGGTTTTATTACGAGGATCTGGTGTAGTTTGGGATTTAAGAAAAA +116485 CGCAACCTTATGAAATTTATAATGAATTATTTTTTGATATTCCAGTTGGTAAAAATGGTG +116545 ATTGTTATGATGT +; G-nad7-E5 ==> end +; G-nad7-I5 ==> start ;; mfannot: no intron type identified +116558 gtggtgtaagaaacgtattatattagagtaaataggttaattaatgtttaaagtgtctgt +116618 aaaatgtttagcaactataatattaaaatattagtgttataaacaatcaattcgtaagat +116678 tgaaattgatattttttgatttaaaatatatataataaaaaattattaaattgatcatat +116738 ttaataaatgtattgtattactgcattagtttattaataagaaatttattaaagtgttaa +116798 ataaatttaattagataagtagtttagtgtaatcgttatggcatacatagtttacaaaat +116858 ttttattattacgttatttttgtaaaagaaaagagaattaattgattattcaaagtattt +116918 agggagatttatattaaagaccttttgattgtaaataggattttgtcttaagattaattt +116978 agttaaaaaaattatcctagtcgattgtagaatattttaatattaatattaaataagatt +117038 aattttggtagtataaattgggtttttatttattttttttacgtaatttgtatttctagt +117098 aaatactaagaataaaatggttagttgtatttatttttaattattaaggataatttgcca +117158 ttagttagaatattttgctaatagtttgttacgtagtttacaaattgtaaatgccttgta +117218 aatttattgatagtttagcttaatttgaacgcttttaaaaattttgctgagcacagttat +117278 tatattatattattttcgtaaagcttaatatattgggaaatatacgttaagtttgaattt +117338 gaaatcaatttaaagattttaaaataccg +; G-nad7-I5 ==> end +; G-nad7-E6 ==> start +117367 TTATCTAGTTCGTATTGGTGAAATGCGTCAAAGTTTAAACATATTAAATCAATGTATTAA +117427 TGAAATTCCGACTGGTTCAGTTAAATCAGATGATAAAAAATTGATGTCTCCAACAAGAAG +117487 TGAAATAAAACAATCAATGGAAGCTTTGATACATCATTTTAAATTATATAGTAGAGGGTT +117547 TGATGTTCCGCAGGGTGAAACATATGTTGGGGTTGAAGCGCCTAAGGGTGAATTT +; G-nad7-E6 ==> end +; G-nad7-I6 ==> start /group=II(derived) ;; mfannot: splice boundaries uncertain +117602 ttaagttaatatataattttacatatatttttgcattactatgatttagttaataatata +117662 ttattataggtttttatttttagtagcttattttccaattaagaataagtaaaaatcaag +117722 ttattgaacaatattaaatttgaggttatagtttatagtgtatttttatttaggcaatat +117782 atattgttgttaataagaaaaaagtttttacaaaattttgtaaataatgtataggtcaaa +117842 gtgatctaatttttgttcgagaaaaaattatatatctatttttttaattaaattcgtgga +117902 gtttacgtagtttaaaaagttgttgtatatttattgggcaaaaagtacatattaaatagt +117962 tagctatataaataaaatgtaaattttatttaaaagcaaaacgtatatatgtaaaaaatt +118022 atgtgatcttattagtataataatagaatattggaaattaaaggattttgttaaaaattt +118082 tccagttataagaaataacttaaaatgtaagttagattttaaatttaattaaaattttta +118142 ttaattattttagatgagaagaatggttaaaaaagttagataaacattattgtttcattt +118202 ctaattgtagttaaacaattataattatatttttttaattttaatttagttgttaaatta +118262 tgtataataagaaatggaattaagctgtatgtattgtgaattacatgtacagttttataa +118322 agggttttctactttttagtatagaaattctattttacgt +; G-nad7-I6 ==> end +; G-nad7-E7 ==> start +118362 GGAGTTTATTTAGTAGCTGATGGTTCAAATAAACCTTATAGATGCAAGATAAAAGCGCCA +118422 GGATTTGCGCATCTTCAAGGGTTAAATTTTATGGCTAAAGGACATATGATTGCGGACGTT +118482 GTTACTATTATTGGTACACAAGATATAGTTTTTTTATGATTTAAGTTTAGCGTTAATTTA +118542 TTTTATTTTTATTAA +; G-nad7-E7 ==> end +; G-nad7 ==> end +118557 AAAAATATTTAAAATTTTAATGAAAATATAATTGTTTAATATAGTGCATTGGAAGTATAT +118617 TTAAGTAAAATTTCTGAAAAAATTTATATTTAATCGTAGATGTTTTCAAAATTTATTATA +118677 GGATTTTAAGAAACAAATTTTATATTAATTTATTTTGTATGTAATAAAGTAAGAAGGTAC +118737 TAGTGTAAAATGCTGAATCGTAATGTGGGTAGGTATGGTATAGACTGTTTAACGTAAAAA +118797 TGTAAATTTTTTAAAAAAATTTTCTACCCATAAATTCTTATGATATAGAAATATTTTTAG +118857 AGTTTTTGTGTAATATGTAGAAAATATATAATTAAAATAGCAATAAAAAATGAAATGGAG +118917 TTTATTTCTATATTGTAATAGGCTACATTTTTTAAGTTTATGAATCGTACAGTAGATTAG +118977 CACTAAAAAAATTAGTTTTTTAAATTTACTATTAAATTAGGTAATGGTTATGATGAATTG +119037 GCTAAGATATTATATTAAAATAAATATTGATCAAAAGCTAGTTAAAGTAACCATTTAATA +119097 TTTTTTTGTTTATTTTTAGAATTTACTGCATCTATATGTATGTATTT +;; mfannot: /group=II +119144 aagctgtatactaatctgattggtatgtgcagttttttaggagga +;; mfannot: +119189 TTTAGAAGATTCTATCTTTTGTGGTGAAGTAGATAGGTAGGTTTTGAAAAAATTTATGTT +119249 GTAGTATTAATATATTAGTTTATACAAATTTCAGTTATTTTTTGGATTGTTATTAGTTAT +; G-nad4 ==> start +; G-nad4-E1 ==> start +119309 ATGATTATTTTACCGATTTTAGTTTTGATTTGTGGAATTGTATGTATTAGTTTAATTTCT +119369 TCAGTGAGGTATATATATATTAAAAAGTTAGCTTTGTTTATTACAATTGCTGTGTTTTAT +119429 TTATCGCTATTGTTTTGAATATTTTATGTTAAGCAAAGTTTATTTTTTCAATTCATGTTT +119489 TATAGAGAATGGTTAGTGTTCATGAATATTGATATTATATTTGGTTTAGATGGTATATCA +119549 ATATTTTTTATTATTTTAACAACTTTTTTATTTCCTATATGTGTATTGTCAAGTTGAAAA +119609 ATAATTTTAGTAAATGTAAAGGAATTTTTTCTTTTACTTTTATTTTTAGAAAGTTTTTTA +119669 TTATTTATTTTTTCTACATTAGATTTAATATTATTTTATATTTTCTTCGAGAGTGTGCTG +119729 ATTCCTATG +; G-nad4-E1 ==> end +; G-nad4-I1 ==> start /group=II(derived) ;; mfannot: splice boundaries uncertain +119738 gtggtattataactttcttttctgaatatatttagaaaaaaataaatttcttattgatat +119798 tgttttttagctaatttgattagataaaatttaagtgattttgaagaatgcatttaagat +119858 ttttattaattactaaagcaataatttgtaataattgtaagtaatatttttataaaatac +119918 ttaagcaaaatttctgattagaagttgatataggattttaatgagagtaaatttaaagaa +119978 ttttcaatatagaaatattagaagggtgacttataaattactattaatttcaataatttt +120038 tagttgttggaaaagtcattgtttaataatgaatttataaaaatttaaattgattgggaa +120098 ttgtgtgggaattaatattaaaggaaaaaaattaaagtttttattagagctatataatag +120158 gaaactattttgtatggtttagaaacagaatttcttgtaataaactttattatttttaag +120218 tagtttagtgtaaaattttaattctagtttgt +; G-nad4-I1 ==> end +; G-nad4-E2 ==> start +120250 TTTTTAATTATAGGAATTTGAGGTTCGCGTGAGTGTAAAATTAAAGCTTCTTATTACTTT +120310 TTTATGTATACATTACTTGGATCATTGGTTGCTCTTATTGGTATATTAATAATTTTTTTT +120370 GAAACAGGCACTACGAATTTTTTTATTTTGTTAACTCATAAATTCAGCTTTGAACGGCAA +120430 TTGTTACTATGAATTATGTTGTTTATTTCATTTGCAGTTAAATTTCCAATAGTTCCTTTT +120490 CATATTTGGTTGCCAGAAGCTCATGTGGAAGCGCCTACGGTGGGATCTATTATTTTAGCG +120550 GGGGTTTTATTAAAATTAGGTATTTATGGTATGCTACGTTTTTCAATTTCTTTATTCCCT +120610 CAAGCCAGTAGTTATTTTACACCTTTTGTATATACAATTTGTATTATTTCCATCCTTTAT +120670 AGTTCGTTAACAACAATTCGTCAAGTGGATTTAAAACGTATAATAGCGTATTCTTCAGTT +120730 TCTCATATGAATTTTGGTTTGTTAGGTTTATTTTCTGGTACTTTACATGGTATTATTGGT +120790 GGTTTAGTTTTATCTATAAGTCACGGTTTTGTGACAAGTGGGTTATTTATTTGCATTGGT +120850 GTATTATATGATCGTTATCATACTCGTCTGCTAAAGTATTATAGCGGTATTGTGTTAGTA +120910 ATGCCTGTTTTTTCTGTTTTATTTTTATTTTTTTCGTTAAGTAATTTAGGTATGCCGGGT +120970 ACAAGTAGTTTTGTAGGTGAATTACTTATTTTGATAGGTACATTTAGCCAAAATAGTATT +121030 TCAGCTATTTTTGGATCGAGTGGTATTTTGCTTGGTACTTTATATTCAATTTGGTTATAT +121090 AATAGAGTGTGCTTTGGTAATTTGAAAATACAGGATAATGTATTAGTATATCTAGATATA +121150 TCAAAACGTGAATGTTTTTGTATTTTTCCATTAGTAGGTTTAGTGTTATTGTTAGGTTTA +121210 AATTCTAATTTATTTTTAGATTACTTACAGAGTGCAGGTTATATGTTACTTTTTGAATAG +; G-nad4-E2 ==> end +; G-nad4 ==> end +121270 TTTATTTTTTACTAGCAGTAAAAAAATTATATTTTTAATTATTATTACATTTATTAAAAT +121330 AAAAGTTATATGTTTAATATTAGAAAAATTTCGTTTTACGTAGTTTTTTATATCAGTTTA +121390 TGATAAAAATAATTTTGTCATAATAATACTTTATATTTTAGACTTGACGTTTTTTTTGTG +121450 TTTCTACATGTAATTTACATGTAAATTAAAGTGTGGTATTTTTTATAAAAAAGATGTATT +121510 TTTATATAAAATAGTCTTTAAAGTTGTTTATAAGTGTTATG +; G-atp9 ==> start ;; mfannot: alternative ATG start pos 121548 +121551 ATGATTTTAGAAAGTGCAAAAGTTATTGGTGCTGGGTTAGCAACGATTGGATTAGCTGGT +121611 GTAGGTTTGGGTATTGGGACAGTATTTGCGGCATTAATTACAGGAGTAGCTCGTAATCCA +121671 TCTTTAGTAAATCAGTTATTTACGTATGCGATGTTAGGGTTTGCTTTAACAGAAGCAATA +121731 GCTTTGTTTGTTTTAATGATTGCTTTTTTATTGCTTTTTGCTTTTTAG +; G-atp9 ==> end +121779 TGTTTACGGCAAAAATATATTAAAGATTAATTTTTTATAATTCTATACAGAAACTGAAGG +121839 ATCTGTTTTTTGTATAGAAGATAGAGGATTGGGTACTTGAAATATTTA +; G-trnD(guc) ==> start +121887 GGATTAGTAGCTTAATCGGGAAAGCTCCAAATT!GTC!ATTTTGGTAGATGTAGGTTCAA +121945 GTCCTATCTAATTCG +; G-trnD(guc) ==> end +121960 TT +; G-trnC(gca) ==> start +121962 GATTGGATAACATAACGGTAATGTGTTGAATT!GCA!AATTCATTTTATAGCGGTTCGAT +122020 TCCGCTTCCAATCT +; G-trnC(gca) ==> end +122034 TGTTAATGTGATTGGTT +; G-trnH(gug) ==> start +122051 GGCGGATATAGCTCAATGGTAGAGTATTAGTTT!GTG!GAGCTGATTGTTATGAGTTCAA +122109 ATCTCATTATCCGCC +; G-trnH(gug) ==> end +122124 TATTTATTAAGATTTTTT +; G-trnV(uac) ==> start +122142 TGGTAGTTAGCTCAAGTGGTAGAGCATCTCTTT!TAC!ACGGAGGGGGTTGTTGGTTCAA +122200 ATCCGATACTATCAA +; G-trnV(uac) ==> end +122215 AAAATTTAAGTTTTTTACG +; G-rnpB ==> start ;; mfannot: Approximate position +122234 AAGGAAAATCCTAATGTATTGTTATTTATACTGTAGTCAGTAAATGTAAATTTAAGAAGA +122294 CTTATTAGAAATAATTAAAATTTATTTTATGTTTTAAATTTATGTAATAAGAATATGCGT +122354 AAGCTTATCTATTTGTTGTTAAGTCTGCTGAAACAATAGAATATTTATTAATCATTAATT +122414 TAAGGTTTGGTTTTTTTACAGAATTAGGTTTAT +; G-rnpB ==> end +122447 AAAAATTTTAGTTTACTTAATATAGATAATAAAAGTTGTATTTGGGTGTTTGAAATAGTT +122507 AAATGAAAATTTATTATATTTGGATTTGGAGTTTCTT +;; rns ==> ;; mfannot: start of 5' +122544 TATAAGAAGGGTTTGATCCTGGCTCAGAATGAATGCTAGAAGTATACATAACACATGCAA +122604 GTTG +;; +122608 GACGAGTAATTATTTTACAAGTAGCGAACGGGTGCGTAATGTGTAAGAATTTGCCTTCTA +122668 ATTTGGGATAACCGGGTAATGCTGGCTAATACCAAATAATTTTTTTAAAAAGATTGAATC +122728 GTTAGGAGATAAGCTTACATAGGATTAGGTAGTTGGTAGAGTAATGGTTTACCAAGCCAA +122788 TGATCCTTAGCTAGTCTGGGAGGATGAATAGCCACATTGAAACTGAGACAAGGTTCAAAC +122848 TTTTACGGAGGGCAGCAGTGTGGAATATCGGACGTGCGGTTCATCTAATAATTTTATTTC +122908 GTAGTAAAATTACTTTGAAGTAAAAAAAAATTGTATATGTGTTACCTTTTTTAGGATGTA +122968 TTGATTTGTACAAACATCAGTACAATTAAATATGAGATTTATATAGAGAAATTTTAATAT +123028 AAATTGGTATTGTGTATAATAAAGGTTAAAAAATTATAATTAATAGTAATAAAATATATT +123088 TTATGTGGAATAGAGTTGTTGATAGCAAATCATTGAATGGGAAATACCATAATTTCATAT +123148 GCAAACTTAAATATGATTAGTAAAAGAGAATGTAATATAAAAATTAATGATAGATATATT +123208 TGTATGAAGTATGTAATGTAGGATTGTCGCAAGTTCTACATTTTTTTAAGCGTTAAGATA +123268 CTTCTGGTAGATAAATGAGTTCCAATTAATAAGTAAACACAAAAAGATGAATATTTTGTT +123328 TAATTCATATATATTAAAAGTTTAGTATGAATTTTAATAAAAAGGATATTCAATATATAG +123388 TTGATGAAATTAGTTTTCATTATAAAAATATATAAGTGAACATTGTAGTTTATTTAAGTA +123448 TTCAAAAATTAAAAAAAAGTACTTTAACTTATTTTTATTTAAAGTTTAGATAAAATATAA +123508 ATAATTTTGGTAAGAAGAACTTTAATATAAAATGCAATATTTTTGAATACGTAAATTTAA +123568 TTACTGAG +;; mfannot: /group=II +123576 tagctgtataaaaggttacttttatgtacagttccgtaggggaagg +;; mfannot: +123622 TTAAATTTACAAATATAACTTTACTCTCTGACAATGAGCGCAAGCTTGATCCAGTAATAC +123682 TTTATGTGTGATGTGAAGAGTAGGAGACTATTTGTAAAGCACTATCGGTAAAAACGAAAT +123742 TGACTATATTTACATAAGAAG +;; rns ==> ;; mfannot: corr to pos 485-571 of R.americana +123763 CTCCGGCAAATTTCGTGCCAGCCGCCGCGGTAATACGAAAGGAGCAGGTGTTATTCAGAT +123823 TAACTGGGCGTAAAGGGCATGTAGACGG +;; +123851 TTCATTATGTGTACTATGAGTTACAAAGTATAATTTTGGAAAGTAGTATACACAGCAGAA +123911 CTTGAGTTGGGTATAGGGTAGCAGAATCTTTAATGTAAAGGTGAAATTTGGTGAAATTAA +123971 AGAGAATACCAAGGCGAAAGCAGTTACCTATGACGAAAC +;; rns ==> ;; mfannot: corr to pos 735-810 of R.americana +124010 TGACGTTGAGGTGCGAAGGCATGGGTAGCAAATAGGATTAGAGACCCTAGTAGTCCATGC +124070 AGTAAACGATGAATATT +;; +124087 AAATTTTGAAATAATGATTTTCAAAGTTAAAGCTAACGCGT +;; rns ==> ;; mfannot: corr to pos 850-965 of R.americana +124128 CAAATATTCCGCCTGGGGAGTACAATCGCAAGATTGAAACTTAAAGGAATTGACGGGGAT +124188 CTAAACAAGCGGTGGAACATGTGGTTTAATCCGATGTGCGTTTCGGTAAGAGTGAG +;; +124244 TAGGAGGCTCATTGTCTTATTCAGTTTTTATAGTTAAGCTGATTTTTTGGTATATATATA +124304 TAGATGTAAATTCTGTATCTTTTATGTATAGATAGTTCACCAGAAGCTAAATTTCGGTTT +124364 AGTTATCCCACCACAAGAGAATAAGTAGGTAGTATTTGTGTGGTAAGCAGGGACTTTAAT +124424 ATTTAATGTATGCGGTATATTCAAAATACAGTAAAGTGTGAACATAGATTATTAGGAGAG +124484 AAAATACGTACTATTATTGTAATAGTGAAATTCGTCATAAAAACTTTATTTAGAGATTTT +124544 TTAAATCGAAAAGTTTAAAGATTTGTATTGATTATTGTGTAAAGTTGGATTACGCAACAT +124604 GTATAATAACTTTTGGCGTTTATAATAGCTCCAACAATAATTTTAATTGGAATGTATACC +124664 AAAACGAAATTTTTTCTTCATAGTCTATTCTTTAATTTCTATGGTATTCTACAAGGGGTA +124724 TGAAGAATTTAGGATAATATAGAATATTAATAAAACCTGTTGTTTTGGAAAAATTAGGTA +124784 AAATTAAATTATATTTACAAAAATTTTTTATTTTCTGATTTTAGAATTT +; G-orf148 ==> start +124833 ATGTTTTTGTTAAG +;; mfannot: /group=II +124847 tagctgtatgaattggaaaattcatgtatggtttcgaataggcgg +;; mfannot: +124892 TCTAAGTTTTTTAGAATATAGTATCGACCTTACCACTACGCGTAAAATCTTACCAGTTTT +124952 TGAATATTTTA +;; rns ==> ;; mfannot: corr to pos 1014-1044 of R.americana +124963 TACAGGTGTTGCATGGCTGTCGTCAGTTCGTGTTGTGAAG +;; +125003 TGTTTGGTTTAGTCCCTATAACGAACGCAATCCCTATCTCTTATTGCTAAAATACTTCTG +125063 CAAAAGTATTAAGAACTTAGGAGAATCGCTAATAACAAATAAGCTGAAAGTGGGG +;; rns ==> ;; mfannot: corr to pos 1190-1252 of R.americana +125118 GTGACGCCAAGTCGTCATGGCCCTTATAGACTGGGCTACACACGTGTTACAATAATTATT +125178 ACA +;; +125181 ATGAGAAGCAATAATGTAAGTTGGAGCAAAACTCTAAAGGTAATTTTAGTT +;; rns ==> ;; mfannot: corr to pos 1305-1411 of R.americana +125232 CAGATTATTCTCTGTAACTCGAGAATATGAAGTTGAAATCGCAAGTAA +; G-orf148 ==> end +125280 TCGCAGATTAGTATGCTGCGGTGAATATGTTCTTAGATCTTGTACACACCGCCCGTCAC +;; +125339 ACCCTGGGAATCGGTTTTATTGTAAACAGATTGTATAACTTAAAGGAGATTGTAAAATAA +125399 ATTTAGGAGTTCGTCTGTTAGATTAGAAT +;; +125428 CGGTGATTGGGGTGAAGTCGTAACAAGGTAGTTGTAGGGGAACCTGCAGCTGGAAGTAAG +125488 ATATA +;; rns ==> ;; mfannot: end of 3' +125493 AATAACACTCATTTATTATTTTGTATGTATTTTATCG +; G-rrn5 ==> start ;; mfannot: complete +125530 GATATTCTAATAATATATATTGATACTGGATCCCATTTCGAATTCCGGAGTAAAACATAT +125590 ATATTTCATATATAGCATAAATGTTGTGAAACGTGATTATGGTATT +; G-rrn5 ==> end +125636 TAATGTAG +; G-trnF(gaa) ==> start +125644 GTTTAGATAGCTCAGCGGTAGAGTAAAACACT!GAA!ACTGTTTGTGTCGCTGGTTCAAA +125702 TCCAGTTCTAAACA +; G-trnF(gaa) ==> end +125716 AAAATTGCTATAAAAAAG +; G-trnK(uuu) ==> start +125734 GAATGTGTAGCTCAAGTGGTAGAGCAGTAGGCT!TTT!AACTTAATGGTTCCGAGTTCAA +125792 GTCTCGGTACATTCA +; G-trnK(uuu) ==> end +125807 ATTGTATAGGGTTTTAACTCAAATATTTTTTGTGATATGAAACGGCAAAATAAATTTAAC +125867 AGTTTGAAGTTTATGTATGTATACACTAATGGTTCTATTTTAATTTCTAAAGATTTTTGT +125927 AAATATAATTTTTTATTAGGTGTGGATATTTTTAACTCAAAGCATTGGTTACGTGTAAGA +125987 TCAATATTTTTCGAAGGTAAATCAGTGATAAAATTTAAATCAAAATTTTCAAAAATTGGG +126047 AATATCTAAATTATATAGTAATAAATTCATATAAAATAGAAATATC +; G-trnT(ugu) ==> start +126093 GTATCGTTAGCTTAATTGGTAGAGCATTGATTT!TGT!AGTTCAGAGGTTGTGGGTCCGA +126151 GTCCCATGCGATACA +; G-trnT(ugu) ==> end +126166 ATTTTTTGGGTTAT +; G-trnM(cau)_1 ==> start +126180 TGTAGTATTGAGTAATTGGTAACTCACTAGATT!CAT!GCTCTAGGAATATTGGTTCAAG +126238 TCCAATTACTACAA +; G-trnM(cau)_1 ==> end +126252 ATTTAGACTAGAATTGAAGAAGAGAGTAATAA +; G-trnM(cau)_2 ==> start +126284 GGGTTTATAGCTTAATGGTTAAAGCAGACTACT!CAT!AATGGTTTTATTGTAGGTTCGA +126342 ATCCTACTAGACCCA +; G-trnM(cau)_2 ==> end +126357 TATATGG +; G-trnA(ugc) ==> start +126364 GGGGATGTAGCTTAATGGAAAAGTTCATACTT!TGC!AAGTATGCAGATATCGGTTCGAA +126422 TCCGGTTGTCTCCA +; G-trnA(ugc) ==> end +126436 AAGTATTTAGAGTGAGT +; G-trnR(ucg) ==> start +126453 GCGTCTATAGCTTAATTGGAAAAGTACCGAACT!TCG!GATTCGTGTTATGAGAGTTCAA +126511 ATCTTTCTAGACGTA +; G-trnR(ucg) ==> end +126526 TA +; G-trnI(gau) ==> start +126528 AGGCTTATAACTCAATTGGTAGAGTACGCAAGT!GAT!ATTTGTGGAGTTGGTGGTTCAA +126586 GTCCACTTAGGCCTA +; G-trnI(gau) ==> end +126601 ACATTTTTTAATAAAGATTTATCGTATG +; G-trnL(uag) ==> start +126629 GCCTTTGTGGCGGAATTGGTAGACGCGCTAAACT!TAG!AATTTAGTTTTTTCGGATGTA +126687 AGAGTTCGAGTCTCTTCAAAGGTA +; G-trnL(uag) ==> end +126711 TAGAAATTGAAAA +; G-trnN(guu) ==> start +126724 TTCCATCTAGCTTAATAGGTAAAGCAATTCACT!GTT!AATGAATGGAGTATAGGTTCGA +126782 GTCCTATGATGGAAG +; G-trnN(guu) ==> end +; G-trnY(gua) ==> start +126797 GAAGGAGTGGCTGAGTGGTTTAAGGCGGTAAACT!GTA!ACTTTACTAATGTTATCATTA +126855 TCATAGGTTCGAATCCTATCTCCCTCA +; G-trnY(gua) ==> end +126882 AAAGATATTAATGAAGTTAAAAGAA +; G-trnE(uuc) ==> start +126907 GTTCCTTTCGTCTAGTGATTAGGACATTGCCTT!TTC!AGGGTGAGAACGTGGGTTTAAT +126965 TCCCACAAGGAATA +; G-trnE(uuc) ==> end +126979 ATGTATTGTTATGAATATATTAT +; G-trnQ(uug) ==> start +127002 TGGGATATAGCCAAATGGTAAGGCATTGGTTT!TTG!ACATCATGAGTATAGGTTCGATT +127060 CCTATTATCCCAA +; G-trnQ(uug) ==> end +127073 AGTTATTCATTTGAAAATCGTATA +; G-trnG(ucc) ==> start +127097 GCGAATATAAATTAATGGTAAATTATTTGTCT!TCC!AAACAGATTTTGAGAGTTCGAGT +127155 CTCTCTATTCGCA +; G-trnG(ucc) ==> end +127168 AT +;; rnl ==> ;; mfannot: 5' +/- 50 nt +127170 AATATATAACTTAATATTTGCATGTAAAGTATATTTAATGAATACCTTGGTATAACAAAT +127230 GGTAAGGACGTTTTGAAATGCGAAAAGTCGTGGTGTTAAGTAGAAGATTGTTAAACGCGA +127290 ATTTCCTTGCGAAGAAATTTATTCTTATAAGAATTATGAAAAAGAATTTAGGGAATTGAA +127350 ACATCTTAGTACCTAGAGAAAAGAAATCAATCGAGATTCCGAAAGTAGTGGTGAGCGATT +127410 TCGGATATAGGTTAATTAAATTAGTTTTTATACACTAGGAAATATCTTGAAAGGTATACC +127470 GTAGAAAGTTGTAGTCTTGTTATTTGGTGTATAGAGATTTATATATTTAAAATATTTAAA +127530 ACGATTTTCGTGTAGAATTGTTTGAAAATGGGAGGCCCACCTTCCAAACCTAAATATTTG +127590 TTATAACCGATAGTGTAT +;; +127608 AAGTACCGTGAGGGAAAGGTGAAAGAAAACCCATTAGGGAGTGAAAAGAAGTTGAAATTA +127668 AATATAAAGAAATAATTTAATAATGATTTTATTTTATAATTATTATAAATGTACCTTTTG +127728 TATAAGTGTTACAAATAAAGTTATTGGGAGAAAGAAGTAGTCGCTGCATTAGATAGGAAA +127788 TAGAAAAAAAACGTTCATATTGCATTGTATTAATAAATAGTAAATAAAAGAATAAGTTAT +127848 TAATTAAGAATAGGCTTCCATAGATAAAAGGTTTTAACTACTGAAGTATTAAAATTCTTA +127908 TACGGTTAAATTAAATTGTTAAAAAATTGGGAGTAAACTTTGATTCTAATTTACTAAAAA +127968 CCTCTTAATAAATATTTGGGTTTATTCATAATTATATACCTAATTATGAATTAATGAAGA +128028 GTATTTAGATAATTTTTTAAATAAATACCAAATTAAAGTTTAATTATTTATATTAATTAA +128088 TTTGAAATTGGCG +;; mfannot: /group=II +128101 gagcttcatgttatgaaatagcatgtgtagttttaggtggg +;; mfannot: +128142 GAAAATTTTAATTTTCTATCATAATTGGGTCAGCAAGTTAATAAGGATAGTTTGCTTAAC +128202 TTTGGTGATAAAAGGGAGGCGTAGCGAAAGCGAGTTTTAAAAAAGCGAAAATTGGATCTT +128262 TCTTATTAAACCCGAAGCCAAGTGATCTAACCATGATCAAGTTGATATTACTGTGATAGG +128322 TAATTGAGGACTGAACCCGTATATGTGGCAAAATATTGGGATGAATTGTGGTTTGGAGTG +128382 AAAGGCTAATCAAACTTGGCAATAGCTGGTTTTCTGCGAAATCTATTTGTGTGCTTAGTG +128442 CGAATACGCTTATAATGTAAAAGAATTGTAATAATAATATTAATGTAAAAAATATAGATA +128502 AATTTAATTTATTAAATTTTATATAATATGAATTTCGTCATATTTTTGGTTTTAAACTAG +128562 AAAAAATATATGAGAGTAAATTCTAGTATAAAAATAATGAATTTTTTAATTGACTTTAAA +128622 GTTTTTAAACAGAATATTTATTTTACAAATTGTTAAAAATTATTTGTGAATAATATAAAA +128682 TAAACTATGTTTGTTAATTGTTACCGTAATTCGCGATTTTACAAAGTTAAGAAGTTTATA +128742 GTATAAAAAAAATATATTTGTTTGTATAAGATAGATATGGAAAATTTAAATTAGTTTTTT +128802 CGGTAAAAAACATAGATTCATTTAATAATAAACATAAGAGATATATAAACTAAGAGTGTT +128862 TTTAAAATAAATTTGAAAGAAATTTATAGAAAAATTACACAACGGATCAAATTCATATTT +128922 TTTCTTTAAAGATTTATAATTAATTTGCGTTTTAAATATAGCATTAAATAATGTATATAC +128982 TATGTATGGTTTTTGTTATGTAAAAATATTTTGAAATAAGGGAGGTTTTTACAAATTGCG +129042 TAATTAAAAATATAATCATACTATACGTTTGTTAATTTTATATTTAAAGTGAGAAGCTAG +129102 GTAATAATAAATTATTATGTGTAGTTTTGAAGTAAAGTTTTCTTAATAGAAATCGATTAT +129162 AACAGGTAGAGTGTTATATAGTTTATTTTACGGGTAGAGCTCTAGTTATTTGATGGGAGT +129222 GTAGCAGCTTTACTGAGAATAATTAAACTTCGAATAGTAAATTTTAAGTTATAATAAACA +129282 GACTTTTGGCGATAAGGTCGAAGGTCAAGAGGGAAACAGCCCAGATTACATGATAAGGTC +129342 TTAAAATAATTTTTTGAGTGAAAAAGGAAAATTTAGTACTTAAACAATTAAGAGGTAGGC +129402 TTGGAAGCAGCCATTCTTTAAAGAAATCGTATTAGATCATTAGTTATTCTAGTTTAAATT +129462 TTTCTAAAATGTATAGAGGCTAAAAAATTTACCGAAGCAGTAAATAAGAAATAATTTCTT +129522 ATGGTAGCAGAACGTTCCGTAGTTTTTTGAAGGAAAATTGTGAAATTTTTTGCAGAAATC +129582 GGAAGTGAGGATGCTGATATGAGTAACGAAAAATATAGTAATAATCTATATCGCTGTAAG +129642 TTTAAGGTTTTCAAAGTATGGGTTAACTACTTTGAGTAACACAGTATCTAAGATAAAAAA +129702 AGGGTGAAGACTTAAGTTGATGAAGAAAGAAGTTTATATTCTTCAGTAATTTTAGAAAAT +129762 TAATAGTTATTGTGCGAATTTGGTTTAATTATCTTATCAAGTTTCTCATAAGCTATTCGA +129822 GAAAAATTCTAAATATTAAAACTGTATTTAAACCGACACTGGTGAACTGGTACGATTATG +129882 TACTAAAGCGATTGAAAGAATAGTATTGAAGGAACTCGGCAAAATTGTTCTGTGACTTCG +129942 GTATAAAGAACACCAATCATATTTATATAGGTTTATATTTTGGTTGGTAGCAGAAATAGG +130002 GGGTAGCGACTGTTTAATAAAAAGTATGATTTGTTATTATGATTCTGTTTACTAATGGTT +130062 AATTAACATTTTTTTCTTAATACTGTAATAAAAAAATTATGTTAAATTTAGAATTTACAC +130122 CATTAACTTGCGATGCAGGCATTTTATATAAGAATAATTTAAATATATGTATATTTTAGG +130182 CGTCTAGAAGGCAGCGTATTTTATGAAAAAAATAGAAATATTAGGTTATATAAATTAAGA +130242 TGAGAAAATAGCGTAATTATTCTTTTTAGTAATTACGTAAGAATTGTATAATTATTTTTA +130302 CAATTTTTGTAAGGCGTAGAGAATAATTTTAACTACAAAATGAGATGCATTATTCATAAT +130362 AAATAAAGTAGTTTTTTAATATATTGTAAGTAGTTGAACAAAGTTGTTTTAGAATTTTGT +130422 TATATATATAGTTAAAAAATTAAAGAAAATATAAAAATACGATTAAATTTTTAATGTAAT +130482 TTATATAATTAGTGTTGATTAAATACTCCCCTAGTATTATAATCTTTTTAGATATACAAT +130542 AGGGAGTTATTAACATTT +;; mfannot: /group=II(derived) +130560 gagctgtatataatgaaaattatatgtacagtttttatagggggaa +;; mfannot: +130606 AATTTGAAAAAATTTACCTATCTAAATCACAGGACTCTGCTAAATTGTAAAATGATGTAT +130666 AGGGTCTGACACCTGCCCAGTGCTGTAAAGTTAAAAATTAGTTGTTTATGCTTCTAATTT +130726 AATCTCCAGTAAACGGCGGCTGTAACTCTGACGGTCCGTGTGTTTCCGTAATTAAAATAT +130786 AGTTTAATTAAAATTATATTGAATAAGAATTTATAGTGTGGATTTAGAAAAATTATTTTG +130846 GTATATAAAATGCAAGAAAATTATTAATTTTGATCAAAGATGGTTTTATACTTAAGTAAT +130906 TTATAAATTAGAAAAAAATAGATCATTGTTTGAAAATAAAGAACGACTCCAGTTAAGTTT +130966 CGAATAACAGAGAGTTATACTTTAAAAAATTTATAAATATAGAAAATATAGGGTTTGAAA +131026 GTTTTTTATTAAAAATTGTGATGTTTTTAAATTGGACTAATTTAAATATGTTTTATAAAG +131086 ACAATTCGGAATAAAAGTCGAATCTATTTTTTGTCTAAACACTGTAAAAACGGAAATAAC +131146 ATTTATATTTATTTATTTTTTAATATAAGTATATTATCAATTGAAAGGTAATAAAGATTA +131206 AAATAGTAAGTATAAACGGAAAGATACTATAAAAATGCTTTTAATTTTTTAAACATGTTC +131266 ATAGTATTAACTTAATAAAAATTTGAATGTTTTTAGAATGGTTAATAGAAGTTGTATGGT +131326 AATAATTACCAGGTACAATTTTAATTAGCAAATTATTATTAATGATTTGACTATAATTAA +131386 GGTAGCGAAATTCCTTGTCTAGTAATTTTAGACCTGCATGAATGGTGTAACGACTTCCCT +131446 ACTGTCTCCAATACTATTTCAGTGAAATTAGAATATCCGTGAAGATACGGATTATTATAT +131506 GATTAGACGGAAAGACCCTATGCACCTTTACTAGATTTTTATATTGTTACAAAGACTAAA +131566 TTGTGTAGAATAGGTGGGATGTTTTTGATCTTTTTTAAAAAGGAAAACGTAAGTGAAATA +131626 CCACTCGTTTTAGTTCTTTGAACTTACTTATTTTCAATAAGGATAGTGTATATTTGCTAG +131686 TTTGGCTGGGGCGGCCGCTTCCTAAAGAGTAACGGAGGTGTACAAAGGTAAATTTGATTT +131746 AATGTTTATTAAATTTTAAGTGTAATGGCAAAATTTGCTTGACTGCGAGACTAACAAGTC +131806 AAGCAGGGACGTAAGTCGGTCATAATGATCCGGTAATTCTGCGTGGTAAGGTTATCGCTC +131866 AACGGATAAAAGGTACGCTAGGGATAACAGGCTTATGACCCTCGAGAGTTCTTATCGGCG +131926 GGGTCGTTTGGCACCTCGATGTCGAGTGTAATTCGCTAATTATCATATATAGGAAATAAT +131986 AATATTATTTTTTATATTGATGTAATTTTTGTTATGTTTAATGTATTTATTTATTAAATT +132046 AATTTTTGGTAAACTTTTAGATTCTATCAAATAATTTTTTCCAAGCAATACATTATAATT +132106 TACTTAGAGTTGAGTTAGGTCTTATTTATGAAGAATATTTCGTTATGAGGTGTATATAAT +132166 CCGAAAGGGTAGTATGAATTTTTTTATATACATAAACTGCTATTATATTGGCGTTAATAG +132226 GTTTATAAATTATAATTGGATCGGAATAGAGTAAACAAAACTAAGTATTATAATAGCAAA +132286 AGAGGTGAATAGACGTTGAATTATATGTTAAAATGTAATTCGCGAAAATGGATTCGATAA +132346 TATATGTTTCTATTTATGGAAATAGAAGTTACTAGTAATATCGAAAGAAAATTGAAAATT +132406 TTTTTGTTTTACGAAGCATAAAGTTTTGGATATTGATTTA +;; mfannot: /group=II(derived) +132446 aagctatatagtaagaaattactacgtatagtttggcagtagcagta +;; mfannot: +132493 TGATGTTTATATGATATTGACT +;; +132515 ATAACCTTTTCACATCCTGGAGCTGAAGAAGGTTCCAAGGGTTCGGTTGTTCGCCGATTA +132575 AAGTGGAACATGAGTTGGGTTTAGAACGTCGTGAGACAGTTTGGTCCCTATCTGTCATAT +132635 ACGTTTTAAAACTGAAAAAATTTGTATCTAGTACGAGAGGATCGATATGAATTGGCCGCT +132695 GGTAAATCAATTATTTTGATATAAAGTATCGTTGAGACGCTACGCCAATTATATATAACT +132755 GCTGAAGGCATATCAAGCAGGAAGATGATTTTAAGAAGAGTTTTAATTAGTTGTTGAAAC +132815 AGTTAGTTGGTTATAGATAATGACTTTGATAGGCTACTAGATGTACATAGTGTAAATTAT +132875 TCAGTCTGGAGTACTAAATAACTAAT +;; rnl ==> ;; mfannot: 3' -20/+180 +132901 ATATAATTTATATATACAATTAT +; G-trnS(gcu) ==> start +132924 GGAAAGGTGACTGAGGGGTTGAAGGTGATGGTTT!GCT!AAATCATTATATAAAGTTTTA +132982 TATCGTGGGTTCGAATCCCATTCTTTCCA +; G-trnS(gcu) ==> end +133011 ATTTAAAATATA +; G-trnL(uaa) ==> start +133023 GCTTACTTGGTGGAATTGGTAGACACGATTGACT!TAA!AATCAATTCTTTAAGAGGTAT +133081 CGGTTCAATTCCGATAGTAAGTA +; G-trnL(uaa) ==> end +133104 AATTAATTTTAAAATATAAACAAAGGA +; G-trnS(uga) ==> start +133131 GGGCGTATGGCTGAGTGGTTTAAAGCGTTAGTCT!TGA!ACACTAATATGTAAAATTTTT +133189 ATATCGTGGGTTCGAATCCTGCTACGTCTA +; G-trnS(uga) ==> end +133219 AGGGT