From f7acecc47868b1639585e03ba8cddc8a92436181 Mon Sep 17 00:00:00 2001 From: dorien-er Date: Tue, 30 Jan 2024 17:01:56 +0100 Subject: [PATCH] merge --- src/busco/config.vsh.yaml | 231 ------------------------------ src/busco/help.txt | 60 -------- src/busco/script.sh | 70 --------- src/busco/test.sh | 29 ---- src/busco/test_data/protein.fasta | 64 --------- src/busco/test_data/script.sh | 9 -- 6 files changed, 463 deletions(-) delete mode 100644 src/busco/config.vsh.yaml delete mode 100644 src/busco/help.txt delete mode 100644 src/busco/script.sh delete mode 100644 src/busco/test.sh delete mode 100644 src/busco/test_data/protein.fasta delete mode 100644 src/busco/test_data/script.sh diff --git a/src/busco/config.vsh.yaml b/src/busco/config.vsh.yaml deleted file mode 100644 index f03ad7e8..00000000 --- a/src/busco/config.vsh.yaml +++ /dev/null @@ -1,231 +0,0 @@ -functionality: - name: busco - description: Assessment of genome assembly and annotation completeness with single copy orthologs - info: - keywords: [Genome assembly, quality control] - homepage: https://busco.ezlab.org/ - documentation: https://busco.ezlab.org/busco_userguide.html - repository: https://gitlab.com/ezlab/busco - reference: "10.1007/978-1-4939-9173-0_14" - licence: MIT - argument_groups: - - name: Inputs - arguments: - - name: --input - alternatives: ["-i"] - type: file - description: | - Input fasta file or directory containing input fasta files to analyse. Fasta files can either be a nucleotide or protein fasta file, depending on the BUSCO mode. - required: true - example: file.fasta - - name: --mode - alternatives: ["-m"] - type: string - choices: ["protein", "genome", "transcriptome"] - required: true - description: | - Busco assesment mode - example: protein - - name: --lineage_dataset - alternatives: ["-l"] - type: string - required: false - description: | - Specify a BUSCO lineage dataset that is most closely related to the assembly or gene set being assessed. - The full list of available datasets can be viewed using "busco --list-datasets". - When unsure, the "--auto_lineage" flag can be set to automatically find the optimal lineage path. - Requested datasets will automatically be downloaded if not already present in the download folder. - example: stramenopiles_odb10 - - - name: Outputs - arguments: - - name: --short_summary_json - required: false - direction: output - type: file - example: short_summary.json - description: | - Output file for short summary in JSON format. - - name: --short_summary_txt - required: false - direction: output - type: file - example: short_summary.txt - description: | - Output file for short summary in TXT format. - - name: --full_table - required: false - direction: output - type: file - example: full_table.tsv - description: | - Full table output in TSV format. - - name: --missing_list - required: false - direction: output - type: file - example: missing_list.tsv - description: | - Missing list output in TSV format. - - name: --output_dir - required: false - direction: output - type: file - example: output_dir/ - description: | - The full output directory, if so desired. - - - name: Resource and Run Settings - arguments: - - name: --force - type: boolean_true - description: | - Force rewriting of existing files. Must be used when output files with the provided name already exist. - - name: --offline - type: boolean_true - description: | - In offline mode BUSCO will not attempt to download files. Ensure all required dataset files are already downloaded and available. - - name: --opt_out_run_stats - type: boolean_true - description: | - Opt out of data collection (from v5.6.0). Collected data is used to improve BUSCO. - All collected data is anonymised and includes the pipelines used, the datasets selected, options used and runtime statistics. - - name: --quiet - alternatives: ["-q"] - type: boolean_true - description: | - Disable the info logs, displays only errors. - - name: --restart - alternatives: ["-r"] - type: boolean_true - description: | - Continue a run that had already partially completed. Restarting skips calls to tools that have completed but performs all pre- and post-processing steps. - - name: --tar - type: boolean_true - description: | - Compress some subdirectories with many files to save space. - - - name: Download Settings - arguments: - - name: --download - type: string - required: false - description: | - Download dataset. Possible values are a specific dataset name, "all", "prokaryota", "eukaryota", or "virus". - - name: --download_base_url - type: string - description: | - Set the url to the remote BUSCO dataset location. - - name: --download_path - type: string - description: | - Specify filepath for storing BUSCO dataset downloads. The default is a busco_downloads subdirectory in the current working directory. - - - name: Lineage Dataset Settings - arguments: - - name: --auto_lineage - type: boolean_true - description: | - Run auto-lineage pipelilne to automatically determine BUSCO lineage dataset that is most closely related to the assembly or gene set being assessed. - - name: --auto_lineage_euk - type: boolean_true - description: | - Run auto-placement just on eukaryota tree to find optimal lineage path. - - name: --auto_lineage_prok - type: boolean_true - description: | - Run auto_lineage just on prokaryota trees to find optimum lineage path. - - name: --datasets_version - type: string - required: false - description: | - Specify the version of BUSCO datasets - example: odb10 - - - name: Augustus Settings - arguments: - - name: --augustus - type: boolean_true - description: | - Use augustus gene predictor for eukaryote runs. - - name: --augustus_parameters - type: string - required: false - description: | - Additional parameters to be passed to Augustus (see Augustus documentation: https://github.com/Gaius-Augustus/Augustus/blob/master/docs/RUNNING-AUGUSTUS.md). - Parameters should be contained within a single string, without whitespace and seperated by commas. - example: "--PARAM1=VALUE1,--PARAM2=VALUE2" - - name: --augustus_species - type: string - required: false - description: | - Specify the augustus species - - name: --long - type: boolean_true - description: | - Optimize Augustus self-training mode. This adds considerably to the run time, but can improve results for some non-model organisms. - - - name: BBTools Settings - arguments: - - name: --contig_break - type: integer - default: 10 - description: | - Number of contiguous Ns to signify a break between contigs in BBTools analysis. - - name: --limit - type: integer - default: 3 - description: | - Number of candidate regions (contig or transcript) from the BLAST output to consider per BUSCO. - This option is only effective in pipelines using BLAST, i.e. the genome pipeline (see --augustus) or the prokaryota transcriptome pipeline. - - name: --scaffold_composition - type: boolean_true - description: | - Writes ACGTN content per scaffold to a file scaffold_composition.txt. - - - name: BLAST Settings - arguments: - - name: --e_value - type: double - default: 0.001 - description: | - E-value cutoff for BLAST searches. - - - name: Protein Gene Prediction settings - arguments: - - name: --miniprot - type: boolean_true - description: | - Use Miniprot gene predictor. - - - name: MetaEuk Settings - arguments: - - name: --metaeuk_parameters - type: string - description: | - Pass additional arguments to Metaeuk for the first run (see Metaeuk documentation https://github.com/soedinglab/metaeuk). - All parameters should be contained within a single string with no white space, with each parameter separated by a comma. - example: "--max-overlap=15,--min-exon-aa=15" - - name: --metaeuk_rerun_parameters - type: string - description: | - Pass additional arguments to Metaeuk for the second run (see Metaeuk documentation https://github.com/soedinglab/metaeuk). - All parameters should be contained within a single string with no white space, with each parameter separated by a comma. - example: "--max-overlap=15,--min-exon-aa=15" - - resources: - - type: bash_script - path: script.sh - test_resources: - - type: bash_script - path: test.sh - - type: file - path: test_data -platforms: - - type: docker - image: quay.io/biocontainers/busco:5.6.1--pyhdfd78af_0 - setup: - - type: docker - run: | - busco --version | sed 's/BUSCO\s\(.*\)/busco: "\1"/' > /var/software_versions.txt - - type: nextflow diff --git a/src/busco/help.txt b/src/busco/help.txt deleted file mode 100644 index 2cacec4d..00000000 --- a/src/busco/help.txt +++ /dev/null @@ -1,60 +0,0 @@ -```bash -busco -h -``` - -Welcome to BUSCO 5.6.1: the Benchmarking Universal Single-Copy Ortholog assessment tool. -For more detailed usage information, please review the README file provided with this distribution and the BUSCO user guide. Visit this page https://gitlab.com/ezlab/busco#how-to-cite-busco to see how to cite BUSCO - -optional arguments: - -i SEQUENCE_FILE, --in SEQUENCE_FILE - Input sequence file in FASTA format. Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set. Also possible to use a path to a directory containing multiple input files. - -o OUTPUT, --out OUTPUT - Give your analysis run a recognisable short name. Output folders and files will be labelled with this name. The path to the output folder is set with --out_path. - -m MODE, --mode MODE Specify which BUSCO analysis mode to run. - There are three valid modes: - - geno or genome, for genome assemblies (DNA) - - tran or transcriptome, for transcriptome assemblies (DNA) - - prot or proteins, for annotated gene sets (protein) - -l LINEAGE, --lineage_dataset LINEAGE - Specify the name of the BUSCO lineage to be used. - --augustus Use augustus gene predictor for eukaryote runs - --augustus_parameters --PARAM1=VALUE1,--PARAM2=VALUE2 - Pass additional arguments to Augustus. All arguments should be contained within a single string with no white space, with each argument separated by a comma. - --augustus_species AUGUSTUS_SPECIES - Specify a species for Augustus training. - --auto-lineage Run auto-lineage to find optimum lineage path - --auto-lineage-euk Run auto-placement just on eukaryote tree to find optimum lineage path - --auto-lineage-prok Run auto-lineage just on non-eukaryote trees to find optimum lineage path - -c N, --cpu N Specify the number (N=integer) of threads/cores to use. - --config CONFIG_FILE Provide a config file - --contig_break n Number of contiguous Ns to signify a break between contigs. Default is n=10. - --datasets_version DATASETS_VERSION - Specify the version of BUSCO datasets, e.g. odb10 - --download [dataset [dataset ...]] - Download dataset. Possible values are a specific dataset name, "all", "prokaryota", "eukaryota", or "virus". If used together with other command line arguments, make sure to place this last. - --download_base_url DOWNLOAD_BASE_URL - Set the url to the remote BUSCO dataset location - --download_path DOWNLOAD_PATH - Specify local filepath for storing BUSCO dataset downloads - -e N, --evalue N E-value cutoff for BLAST searches. Allowed formats, 0.001 or 1e-03 (Default: 1e-03) - -f, --force Force rewriting of existing files. Must be used when output files with the provided name already exist. - -h, --help Show this help message and exit - --limit N How many candidate regions (contig or transcript) to consider per BUSCO (default: 3) - --list-datasets Print the list of available BUSCO datasets - --long Optimization Augustus self-training mode (Default: Off); adds considerably to the run time, but can improve results for some non-model organisms - --metaeuk_parameters "--PARAM1=VALUE1,--PARAM2=VALUE2" - Pass additional arguments to Metaeuk for the first run. All arguments should be contained within a single string with no white space, with each argument separated by a comma. - --metaeuk_rerun_parameters "--PARAM1=VALUE1,--PARAM2=VALUE2" - Pass additional arguments to Metaeuk for the second run. All arguments should be contained within a single string with no white space, with each argument separated by a comma. - --miniprot Use miniprot gene predictor - --skip_bbtools Skip BBTools for assembly statistics - --offline To indicate that BUSCO cannot attempt to download files - --opt-out-run-stats Opt out of data collection. Information on the data collected is available in the user guide. - --out_path OUTPUT_PATH - Optional location for results folder, excluding results folder name. Default is current working directory. - -q, --quiet Disable the info logs, displays only errors - -r, --restart Continue a run that had already partially completed. - --scaffold_composition - Writes ACGTN content per scaffold to a file scaffold_composition.txt - --tar Compress some subdirectories with many files to save space - -v, --version Show this version and exit \ No newline at end of file diff --git a/src/busco/script.sh b/src/busco/script.sh deleted file mode 100644 index da74dd26..00000000 --- a/src/busco/script.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash - -## VIASH START -## VIASH END - - -[[ "$par_tar" == "false" ]] && unset par_tar -[[ "$par_force" == "false" ]] && unset par_force -[[ "$par_offline" == "false" ]] && unset par_offline -[[ "$par_opt_out_run_stats" == "false" ]] && unset par_opt_out_run_stats -[[ "$par_quiet" == "false" ]] && unset par_quiet -[[ "$par_restart" == "false" ]] && unset par_restart -[[ "$par_auto_lineage" == "false" ]] && unset par_auto_lineage -[[ "$par_auto_lineage_euk" == "false" ]] && unset par_auto_lineage_euk -[[ "$par_auto_lineage_prok" == "false" ]] && unset par_auto_lineage_prok -[[ "$par_augustus" == "false" ]] && unset par_augustus -[[ "$par_long" == "false" ]] && unset par_long -[[ "$par_scaffold_composition" == "false" ]] && unset par_scaffold_composition -[[ "$par_miniprot" == "false" ]] && unset par_miniprot - -tmp_dir=$(mktemp -d -p "$meta_temp_dir" busco_XXXXXXXXX) - -busco \ - --in "$par_input" \ - --mode "$par_mode" \ - --out "" \ - --out_dir "$tmp_dir" \ - ${meta_cpus:+--cpu "${meta_cpus}"} \ - ${par_lineage_dataset:+--lineage_dataset "$par_lineage_dataset"} \ - ${par_augustus:+--augustus} \ - ${par_augustus_parameters:+--augustus_parameters "$par_augustus_parameters"} \ - ${par_augustus_species:+--augustus_species "$par_augustus_species"} \ - ${par_auto_lineage:+--auto-lineage} \ - ${par_auto_lineage_euk:+--auto-lineage-euk} \ - ${par_auto_lineage_prok:+--auto-lineage-prok} \ - ${par_contig_break:+--contig_break $par_contig_break} \ - ${par_datasets_version:+--datasets_version "$par_datasets_version"} \ - ${par_e_value:+--evalue "$par_e_value"} \ - ${par_force:+--force} \ - ${par_limit:+--limit "$par_limit"} \ - ${par_long:+--long} \ - ${par_metaeuk_parameters:+--metaeuk_parameters "$par_metaeuk_parameters"} \ - ${par_metaeuk_rerun_parameters:+--metaeuk_rerun_parameters "$par_metaeuk_rerun_parameters"} \ - ${par_miniprot:+--miniprot} \ - ${par_offline:+--offline} \ - ${par_opt_out_run_stats:+--opt-out-run-stats} \ - ${par_quiet:+--quiet} \ - ${par_restart:+--restart} \ - ${par_scaffold_composition:+--scaffold_composition} \ - ${par_tar:+--tar} \ - ${par_download_base_url:+--download_base_url "$par_download_base_url"} \ - ${par_download_path:+--download_path "$par_download_path"} \ - ${par_download:+--download "$par_download"} - -if [[ -n "$par_short_summary_json" ]]; then - cp "$tmp_dir/run_*/short_summary.json" "$par_short_summary_json" -fi -if [[ -n "$par_short_summary_txt" ]]; then - cp "$tmp_dir/run_*/short_summary.txt" "$par_short_summary_txt" -fi -if [[ -n "$par_full_table" ]]; then - cp "$tmp_dir/run_*/full_table.tsv" "$par_full_table" -fi -if [[ -n "$par_missing_busco_list" ]]; then - cp "$tmp_dir/run_*/missing_busco_list.txt" "$par_missing_busco_list" -fi -if [[ -n "$par_output" ]]; then - cp -r "$tmp_dir" "$par_output" -fi - diff --git a/src/busco/test.sh b/src/busco/test.sh deleted file mode 100644 index 064897e7..00000000 --- a/src/busco/test.sh +++ /dev/null @@ -1,29 +0,0 @@ -test_dir="$meta_resources_dir/test_data" - -echo "> Running busco" -echo "$(busco --version 2>&1 | sed -n 's/BUSCO \([0-9.]*\)/\1/p')" - -"$meta_executable" \ - --input $test_dir/protein.fasta \ - --mode protein \ - --lineage_dataset stramenopiles_odb10 \ - --output_dir output - -echo ">> Checking output" -[ ! -f "output/short_summary.specific.stramenopiles_odb10.protein.fasta.json" ] && echo "specific_short_summary.json does not exist" && exit 1 -[ ! -f "output/short_summary.specific.stramenopiles_odb10.protein.fasta.txt" ] && echo "specific_short_summary.txt does not exist" && exit 1 -[ ! -f "output/run_stramenopiles_odb10/full_table.tsv" ] && echo "full_table.tsv does not exist" && exit 1 -[ ! -f "output/run_stramenopiles_odb10/missing_busco_list.tsv" ] && echo "missing_busco_list.tsv does not exist" && exit 1 -[ ! -f "output/run_stramenopiles_odb10/short_summary.json" ] && echo "short_summary.json does not exist" && exit 1 -[ ! -f "output/run_stramenopiles_odb10/short_summary.txt" ] && echo "short_summary.txt does not exist" && exit 1 - -echo ">> Checking if output is empty" -[ ! -s "output/short_summary.specific.stramenopiles_odb10.protein.fasta.json" ] && echo "specific_short_summary.json is empty" && exit 1 -[ ! -s "output/short_summary.specific.stramenopiles_odb10.protein.fasta.txt" ] && echo "specific_short_summary.txt is empty" && exit 1 -[ ! -s "output/run_stramenopiles_odb10/full_table.tsv" ] && echo "full_table.tsv is empty" && exit 1 -[ ! -s "output/run_stramenopiles_odb10/missing_busco_list.tsv" ] && echo "missing_busco_list.tsv is empty" && exit 1 -[ ! -s "output/run_stramenopiles_odb10/short_summary.json" ] && echo "short_summary.json is empty" && exit 1 -[ ! -s "output/run_stramenopiles_odb10/short_summary.txt" ] && echo "short_summary.txt is empty" && exit 1 - - -rm -r output/ diff --git a/src/busco/test_data/protein.fasta b/src/busco/test_data/protein.fasta deleted file mode 100644 index 3224f32e..00000000 --- a/src/busco/test_data/protein.fasta +++ /dev/null @@ -1,64 +0,0 @@ ->341721at2759_1001832_1:000010 -MASRPVKKRKLTPPGDDEASSRKSGGKIQKAFLKNAANWDLEQDYETRARKGKKKEKESTRLPLKLPGGRVQHVSAPDNDFQAIESDEDWLDGAEDVSEDEESKDKKAPEEPEKPEHEQILEAKEELAKIALMLNESPDENTGAFKALAKIGQSRIITIKKLALATQLTVYKDVIPGYRIRPVAEDGPEEKLSKDVRKLRTYETCLISGYQAYVKELTKHAKTGHANGLASVAITCACNLLTAVPHFNFRSDLVKILVGKLSTRRVDDDFNKCLQALETLFEEDEEGRPSMEAVSLLSKMMKAREYQVNESVVNLFLHLRLLSDFSGKGSKDSVDRMDDGPSKKPKSKREFRTKRERKQIKEQKALQKDMAQADALVQHEERDRMEGETLKLVFGTYFRVLKMRVPHLMGAVLEGLSKYAHLINQNFFGDLLEALKDLIRHSDASEKDDAEEKEDEEADDDAPVRNPSREALLCTTTAFALLAGQDAHNARADLHLDLSFFTTHLYQSLFPLSLHPDLELGARSLHLPDPDKPSQNRKSNSSNKVNLQTTTVLLIRCLTAVLLPPWNVRSVPPVRLAAFAKQLMTAALHVPEKSAQALLALLADVAGTHGRRIAALWNTEERKGDGAFNPLAESAEASNPFAATVWEGEILRRHYCPAVRRGVGIVEKSLSLAER ->296129at2759_1069680_1:000010 -MMKKKQIDSRIPTLIKNGVQEKKRTLFVIVGDRGRDQIVNLHWLLSQTRIASRPSVLWMYKKDLLGFTSHRKKREAKIKKEIKKGIRDPNEATTPFELFISVTNIRYTYYKESEKILGQTFGMLVLQDFEAITPNLLARTIETVEGGGIIVILFKTMENLKQLYTMTMDIHSRYRTEAHQDVVARFNGRFILSLGHCSSCLFVDDELNVLPISEAKKVKPLPKPQLEEPKKELEELKQKYEDKQLLRSLIDVAKTVDQARALITFVEAISEKTLRSTVALTAARGRGKSAALGLAISAAVAYGYSNIFITSPNPENLKTLFEFTFKGFNSLKYEEHIDYDIIQSLNPSFNKSIVRVNIFRNHRQTIQYIHPSDAYVLGQAELLVIDEAAAIPMPLVKKLLGPYLTFMASTVNGYEGTGRSLSLKLIQQLREQSRGFAHENTKSGNSEKSMINRSEKLNKESGINSIGGRKLREITLEEPIRYSYGDPVEEWLNKLLCLDINISLKQFLEQGCPHPSQCELYYVNRDTLFSYHPVSESFLQMMMSLYVASHYKNSPNDLQLMADAPAHQLFVLLPPVKEDDNKLPEPLCVIQVALEGEISRESVVNNLTRGYRTGGDLIPWVITEQFQDDKFASLSGARIVRIATNPEYIRMGYGSHALKLLENFYEGKYLNLSEETISESNENIKIINNNLESSLLTDDIKIKDLKIMPPLLLKLSEKKPGLIHYLGVSYGLTPQLYKFWKRAEFIPVYLRQTPNDLTGEHTCLMLKLLQDKSETWLNEFSNDFRKRFLSLLSFSFRSFPTILCLNIIESINNDLIQKDNVHVITKSEIDINLSPFDLKRLESYANNMLDYHTIIDMLPYIADLYFKGRFGKDLKMTGVQSAILLALGLQKRLLEDIEKELNLPSNQVLAMLVKILRKLSSFFKDIYYKAIDNTLPIERKNLKNQLQTHADENDNFRGFIPLKATLKEELDHLSSEMEDSIKEKQRELINSLDLQKYIIKGQEEDWDKAEQHIKNGIYSGKSSVVSIQSHSLKREHESLTDIPHIKKKHQKKHKRKV ->1217666at2759_1073089_1:000010 -MPINQPSNQIKFTNVSVVRLKKGKKRFELACYKNKLLEYRSGAEKDLDNVLQVPTIFLSVSKAQTAPSAELTKAFGANIPADEIRQEILRKGEVQVGERERKEISERVEKELLDIVSGRLVDPTTKRVYTPGMISKALDQLSSASGQMQQTQGEGSGATDEKGAAQPRKPMWTGVAPNKSAKSQALDAMKALIAWQPIPVMRARMRLRVTCPVSILKHSVKAPSGGGASKEKEAPSGNSKSNKGKKGPKSRAARQQDSDAEDGKSDAEAAPKTPSNVKDKILGYIESIESQEVIGGDEWEVVGFAEPGAYKGLNEFVGNETRGRGRVEVLDMTVTHEE ->513979at2759_1159556_1:000010 -MAVVDIQARFSPHHPLEPDLLYEIQSILRLHGLSVDDLFFKWDAYCIRMDLDAQAALSLANVRSLKQSIQDDLEKSHRSTTQVRSERKVAAAPKAVSGGDVYGMLDGLVPSTPAAGGKRSRGVAAGGGGSGLKKKMDSLKMNSSPAGMKEQLSAFNGLPATSFAERANAGDVVEILNAQLPPCEAPLAPFPEPRIKLTAASDQKKMAYKPLAVKLSEASEVLDDRIDEFAALVQDYHGLEDSAFGSAASQGTTEVVAVGRIASDAMEGKLNAAALVLETSRRTGMGLRVPLKMHKVPSWSFFPGQVVALRGTNATGGEFVVEQVLDVPLLPSAASTPSALEAHRARMSGVPPGGGAAAATTDSDAAAPAPAPAPLTILYAAGPYTADDNLDYEPLHALCSQAADALADALVLAGPFLDIDHPLVAAGDFDLPPEDEAALDPDTATMSAVFRHLVAPALNRACAANPHLTVVLVPSVRDVLARHVSWPQDAIARKELGLAKAARIVSNPMTLSMNEVVVGVSSQDVLHELRNEECSRACPPGDLMGRLCRYLVEQRHYFPLFPPTDRARLPRTGTQSGLATGAVLDPSYLRLGEMVNVRPDVMVVPSSLPPFAKASSVVESVLAINPGPLSKRKGAGTFARMTLHAPPVGGGSEMTSHRVFDRARVEIVRI ->543764at2759_1165861_1:000010 -MALGRAARPVGWTDCCAAVEKKPNYKSGMTQPARTITAGDNLLLKLPSGQTRTIKNVTSDSSISLGKFGKFQTNELIDQPFGLTFDILEDGKLVRNEQINLALELNPMLDELNSFESIKGMANGISNVEDIEATNEMIKESDGAQKLTNVEIEELKKSGLSGREIILRQIQQHSAFELKSEFSKAKYIKRKEKKFLKMFTCIDPTIHNMSQYLFENHNFAIKGLRPDTLSQMLSLSNVRPGWKGIVVDDIGGLLVAAVLIRMGGEGTIFVLNNADSPPDLHLLELFNLPKSVLGPLKSLNWAQTEADWTTSDIEELLLLHRDPPQPLPILDSTLPDPQLKQLSQRTKKQPNNRSKSMRKFERVQELLSMRQEFLDTQFEGLLTCSEYEPESIVTKLVNKLSGSSTIVIYSCHLRPLSDLQTLLKKSSMPSTSSSSLGGSSSLVEQNELTKRMKENKTEFIQITISEPWLRAYQVLVGRTHPEMAGTHHGGFVFSAIKVFNSCS ->1558822at2759_1266660_1:000010 -MSIAEILPLEIIDKTVGQPVLVMLTSHREFSGTLVGYDDFVNVVLEEVVEYDHDQEIKRHAGKMLLSGNNIAMLVPGGKRVQ ->1287094at2759_1291522_0:000010 -MGNILVKKNRVTITEADRAILTLRTQRRKMEEHRRRVEALMERETTVARTLVAKQQRPAALLALKKKRLHETQLEGLDNCLLTLEETLTQVESAQRTARLMAALKQGADVLSALQRAMPLESVEQLMEQGAESREYEMRLQALLGESLGEDQSAAAERELDEMEAQLIEEDVLDLPKVPSHAVARPASARAIGQAASERQLEPEIAA ->83779at2759_1296121_1:000010 -MCGLTLTIRPLSLSLSSPSVSDCSSSDSTEDADLALLDSFRSTNAQRGPDSQRTFKHTVTLDDDDNGVTTTTTTKSTTKSKVEICLTATVLGLRGDLTAQPLVGNRGVLGWNGQVFEGIDIGTEENDTRKIFERLEKGERVEDVLSGVEGPFAFIYLDLENDILHYQLDPLSRRSLLIHPAEVAVDSNPSVTRHFILSSSRSTLAREHGVDMRALLGGEGGTIDLRRIKVVQNQGFLTMDMSDALKHRHTLSPDQDASCSSSSGSWTKVAPINTALPPDNLPLDNPKIKEEVPKFIEQLKESVKRRVENIPNPEKGCSRVAVLFSGGIDCTFLAYLIHLCLPPEDPIDLINVAFSPAPKLSSLSSNGADKGKGKSPALPAAPTYDVPDRLSGRDALVELKQVCPDREWRFVEIDVPYDEARAHRQNVLDLMYPSSTEMDHSLALPLYFASRGYGSVRKEGSNHSEPYRVKAKVYISGLGADEQLGGYARHRHAYQREGWQGLISETQMDIARLPTRNLSRDDRMLSSHARDARYPYLSLSFISYLSSLPVHLKCDPRLGEGQGDKILLRKAVESVGLVRASGRVKRAMQFGTRSSKLGGRGSGVKGPKAGERQVE ->1057950at2759_1314783_1:000010 -MSSRQATHADSWYVGDGRRLDSELSKNLAAVEGDANYSPPIKGCKAVIAPHAGYSYSGRAAAWAYKSIDTTGIKRIFILGPSHHVYLDGCALSKCEKYETPLGELPIDLDTVKELRATGEFQDMDIQTDEDEHSIEMHLPYVRKVFEGLDIAIVPILIGAINLNKENKFGTVLAPYLAKDDTFFVISSDFCHWGTRFQYTFYYPRPPPTSTPAIRLSKADPNPSTLATHPIHASISAIDHEAMDLMTMPPQTAQQAHIDFAEYLRTTKNTICGRHPIGVLLGALAVLQSQGRVPHLKFVRYEQSSQCQTVRDSSVSYASAYITV ->453044at2759_1330018_1:000010 -MPAAPQDPFFKSIGSAAADTEALREQPDEQDEQETDLEPIDEDRPLQEVESLCMSCGEQGVTRMLLTSIPYFREVIVMSFRCEHCGNQNNEIQSASTIREHGAMYTVKILNQGDLNRQLVKSEAATVTIPEFELTIPPLRGQLTTVEGTLRDTIQDLAADQPLRRIQDPPTFDKIEALLAKLKEVVPDDEDEAAPTMKERHPEDPVRPFTVILDDPTGNSFIEFSGSMSDPKWSLREYARSMDQNITLGLSQPEDEEKEKVTQKGGPFTEEDEDGLPAEEVFIFPGICSSCGHPVDTRMKKVNIPYFKDIIIMSTNCSACGYRDNEVKSGGAISDKGKRITLKVEDAEDLSRDILKSETCGLEIPEIDLALHAGTLGGRFTTVEGILTQVYDELSEKVFRGDSVGSANSKDNQEFETFLGSMKEVMTAARPFTLILDDPLANSYLQNLYAPDPDPNMEIVTYDRTFDQNEDLGLNDMKVEGYEAPS ->1323575at2759_1392248_1:000010 -MSQPQPPPLRYIRYEPSREDEYVAAMRQLISKDLSEPYSIYVYRYFLYQWGDLCFMTVDDSRPEDPIVGVVVSKLEPHRGGPMRGYIAMLAVREEYRGRGIATKLVRMAIDAMIARDADEIALETEITNTAAMKLYERLGFLRSKRLHRYYLNGNSAYRLVLYLKEGVGNMRTSFDPYAAPAEARPEMSGAAAVPAAPAPPPLLQGNGR ->160593at2759_139723_0:000010 -MADAELAKALKDLPNRVLNVPVEERPELFQNVIAVLPNPGINATIVRGICKVIGTTLTKYKDPESQTLVKELLVAVLKQHPDLTYEHFNAVLKALLAKDLAGAPPIKAAQASALALGWANLIALHADHETAVGKKEFPKLLEVQAGLYQLSLTSGIQKISDKAYSFLRDFFASDESLAQRYFDKLLAMEPSSGVIVMLCTIVRYLHQEQGTVELLDQHKPKLLDHLVKGLITVKTKPHASDIVACSILLKAITKDELRTIIVPALQRSMLRSAEVILRAVGAIVNEIELDVSDYALDLGKPLVQNLASKEETVRQEAVESLKQVALKCGTPNAIETLLKEVFAVLNGSGGKITVAELRINLLQGAGNLSYNKIPSQKIQTILPAACDHFTKVIEAEIQEKVVCHALEMFGLWTVNHRGEIPAKIVQLFKKGLDAKAQTIRTSYLQWFLSCLHDGKLPNGIDFTTTLSKIVERAAQSPTQTPVVSEGVGAACILLLTNPSVSEKLKDFWNIVLDTNKSPFLSERFLSTTNAETRCYVMVICEQLLIKHRNELKGSSTTDPLIRAATVCVMSAQAKVRRYCLPLVTKIVNSEDGVSLAKFLLAELTRYVECTKILSEGEPAEEGIAPAQALVDAVCTVCNVEKVANPDAQSLALSALLCSHHPAAVSVRGDLWESILERYGLYGKQFIALNTAQIEEVFFNSYKATAMYENTLATLSRISPELILSVLVKNVTDQLNNSRMSNVTDEEYFTYLTPDGELYDKSVIPNTDEQVQTAHLKRENKAYSYKEQLEELQLRRELEEKRRKEGKWKPPQLTPKQKEVIDKQREKENAIKARLQALHDTITTLISQIEGAAKGTPKQLPLFFPALLPAILRVFSSPLAAPAMVKLYYRLKDICFGEERVELGRDIAIATIRLSKPHCDLEESWCTANLVELVSDILVALYDETIDMYNVHREEEASKRYLLDAPAFSYTFEFLKRALTLPEAKKDESLLINGVQIIAYHAQLKGDTVDGKDLGDVYHPLYMPRLEMIRLLLRLIQQHRGRVQTQAVAALLDVAESCSGREYTTRAEQREIEALLVALQEELDAVRDVALRALAIMIDVLPSIADDYEFGLRLTRRLWVAKHDLSADIKQLATGIWQDGAYEVPIVMADELMKDIIHPELCVQKAAAAALVSILVEDSSTIDGVVEQLLEIYREKVVMIPAKLDQFDREVEPAIDPWGPRRGVAITLGSISPFLTPELVKSVIQFMVRSGLRDRQEIVHKEMLAASLAIVEHHGKDSVTYLLPTFEYFLDKAPSKGAYDNIRQAVVILMGSLARHLDREDERIQPIIDRLLAALETPSQQVQEAVANCIPHLIPSVKDKAPEIVKKLLQQLVKSEKYGVRRGAAYGIAGVVKGLGILSLKQLDIMSKLTHYIQDKKNYKSREGALFAFEMLCSTLGRLFEPYIVHVLPHLLQCFGDSSVYVRQAADECAKTVMAKLSAHGVKLVLPSLLNALDEDSWRTKTASVELLGSMAFCAPKQLSSCLPSIVPKLMEVLGDSHIKVQEAGANALRVIGSVIKNPEIQAIVPVLLTALEDPSSKTSACLQSLLETKFVHFIDAPSLALIMPVVQRAFMDRSTETRKMAAQIIGNMYSLTDQKDLTPYLPNIIPGLKTSLLDPVPEVRGVSARALGAMVRGMGESSFEDLLPWLMQTLTSESSSVDRSGAAQGLSEVVGGLGVEKLHKLMPEIIATAERTDIAPHVKDGYIMMFIYMPSAFPNDFTPYIGQIINPILKALADENEYVRDTALKAGQRIVNLYAESAITLLLPELEKGLFDDNWRIRYSSVQLLGDLLYKISGVSGKMTTQTASEDDNFGTEQSHKAIIRSLGADRRNRVLAGLYMGRSDVSLMVRQAALHVWKVVVTNTPRTLREILPTLFSLLLGCLASTSYDKRQVAARTLGDLVRKLGERVLPEIIPILERGLSSDQADQRQGVCIGLSEIMASTSRDMVLTFVNSLVPTVRKALADPLPEVRHAAAKTFDSLHTTVGARALEDILPSMLESLADPDPDVAEWTLDGLRQVMAIKSRVVLPYLIPQLTAKPVNTKALSILASVAGEALTKYLPKILPALLAALAAAQGTPEEVQELEYCQAVILSVSDEVGIRTIMDTVMESTKSEIPETRRAAATLLCAFCTHSPGDYSQYVPQLLRGLLWLLSDGDREVLQRSWDALNAVTKTLDSAQQIAHVTDVRQAVKFASSDLPKGGELPGFCLPKGITPLLPVFREAILNGLPEEKENAAQGLGEVIKLTSPASLQPSVVHITGPLIRILGDRFNAGVKAAVLETLAILLHKVGIMLKQFLPQLQTTFLKALHDPSRTVRIKAGHALAELIVIHTRPDPLFVEMHNGIKSADDSAVRETMLQALRGIVTPAGDKMTEPLRKQIYATLAGMLAHPEDVSRAAAAGCFGALCRWLTPEQVDDALTSHMLNEDYGDDATLRHGRTAALFVALKEHPGGIVTTKYEPKICKVITGALVSDKISVAMNGVRAGGYLLQYGMTDGTAKLSTAVIGPFVKSMNHSSNEVKQLLAKTCTYLARVVPAERIAPEYLKLAIPMLVNGTKEKNGYVRSNSEIALVHVLRLRDGEEFHQRCITLLEPGARESLSEVVSKVLRKVAMQAVGKEEELDDTILT ->1346432at2759_1447883_1:000010 -MSSMRNAVQRRVHRERAQPANREKWGILEKHKDYSLRARDYSVKKAKLQRLREKADTRNPDEFAFGMMSGKSRTQGKHGARDTESAALSLETVKLLKTQDAGYLRVVGERIRRQMMAVDEEVRVQEGISGVSANGAAAGGGGGGGRKVVFVDSVEEQRERALEDEGKSDDDEEQGDFDEVDEEEQRQQKTQPKSKKQLEAEKLAQKEMLKARKLKIKAAEARSKKLQALTDQHKNIVAAEQELDWQRGKMENSVGGVNKHGLRWKVRERKR ->761109at2759_198730_1:000010 -MAMTFTEDSIKELRLRLEDAVVKCSERCLYQSAKWAAEMLNSLVSTDGNDTDAESPMETDLQPTVNPFSLQSDPTEATLELQEAHKYLLAKSYFDTREYDRCAAVFLPPTIPPVPLSTVSPNVKSRASLTPQKGKRKSFIRPGLKSGQALPRNPYPNLSQKSLFLALYAKYLAGEKRRDEETEMVLGPADGGMTVNRELPDLARGLEGWFEERRERGLQDQGQGWLEYLYAVILIKGKNEEEAKKWLVRSVHLFPFHWGAWQELNDLLPSVDDLKQVAETLPQNIMSFIFQVHCSQELYQATDETHQTLNGLESIFPTSAFLKTERALLYYHSRDFEDASAIFADILIDSPHRLDSLDHYSNILYVMGARPQLAFVAQLATATDKFRPETCCVVGNYYSLKSEHEKAVMYFRRALTLDRNFLSAWTLMGHEYIEMKNTHAAIESYRRAVDVNRKDYRAWYGLGQAYEVLDMCFYALYYYQRTAALKPYDPKMWQAVGTCYAKMNQIPQSIKAMKRALVAGAYYEQRADAATADHPAAGRKILDPDLLHQIALLYEKMNNEDEAAAYMELTLQQESGEIERTETDSDDDDGDDNSDDGTTQRRSRRQRRRQKSRDDDNEIEAVGGTGVTATTSKARLWLARWALKHGDLNRADQLAGELCQDGVEVEEAKALMRDVRARREGGGG ->1617752at2759_2004952_1:000010 -MPSSFVTPGQQRYLRACMVCSIVMTYSRFRDEGCPNCDEFLHLAGSQDQIESCTSQVFEGLITLANPAKSWIAKWQRLDGYVGGVYAIKVSGQLPDEIRTTLEDEYRIQYIPRDGTQTEADA ->1588798at2759_215358_0:000010 -MTLPPTQQEPHTPEAFSLFVSFNHREPQNDDVMADLGIKAGDKVMMVWTQPSAPEGLKQHAEELAAIVGADGKVSVENLERLLLSSHSASSFDCVLSCLLADSSPVHTSETLEELARVLKPGGKLVLDEAVTGAETSQVRTAEKLISALKLSGFMSVTEVSKAELTAEALSALRTATGYQGNTLSRVRVSASKPNFEVGSSSQIKLSFGKKTPKPAEKPALDPNTVKMWTLSANDMGDDDVDLVDSDALLDEEDLKKPDPASLKVSCRDSGKKKACKNCSCGLAEELEQESTGKQKTNLPKSACGSCYLGDAFRCASCPYAGMPAFKPGEKIVLDKKTLTDA ->1275837at2759_28005_1:000010 -MSSRDKASPSSPKETKGEHHLNEESDNDNNERRDEQQVTASAYLPSASRVDVHPLVLLSLVDHFARMNTKVRQKKRVVGLLLGRYKTDAAGTQVLDINNSFAVPFDEDPHNSDVWFFDTNYAEEMFVMHRRVHPKTKIVGWYASGPTVQQNDMLLHLLVADRFCANPVYCVVNTDPSHKGVPVLAYTTVQGREGARSLEFRNIPTHVGAEEAEEIGVEHLLRDLTDSTVTTLSSQLEERERSLEHMARVLVQIEEYLSDVASGALPASEDVLEALQELISLQPETYLKKKSLELNRFTNDRTIATFLGSIARCIGGLHEVILNRRVLARELKEIKARRAEAEEQRMDNEKNKIAEASPERKQ ->1264469at2759_29058_0:000010 -MRPPLAIVRTYCTTAAPKSSNFIDEMKRNFIATNTFQKTLLSCGSAAISLLNPHRGDMIACLGEVTGESAIKYMRQKMTETEEGTEILKEKPRINSGTVSFDKLSQMPDNTLGRVYADFMTENNITADSRLPVQFIEDPELAYVMQRYREVHDLVHATLFMRTSMLGEVTVKWVEGIQTRLPMCISGGIWGAARLKPKHRQMYLKYYLPWAIKTGNNAKFMQGIYFEKRWDQDIDDFHKEMNIVRLVKK ->673132at2759_326594_0:000010 -MTLLTVFKQFKKFQDAGKSVARSLSIKDDQESKKTCLYDLHIENNGKMVNFSGWLLPIQYRDSITASHQHTRTHASLFDVGHMLQSHVSGCDSGEFLESLTTADLQNLAQGGAALTVFTNKSGGILDDLIITKDRNDRFFVVSNAGRRNEDIELMLGRQAEMKSQGKNVTIEFLDPLEQGLIALQGPSAATTLQTLVKIDLTKLKFMNSVETKINQKSVRISRCGYTGEDGFEISVNGKDARTISEMILEVPDIKLAGLGARDSLRLEAGFCLYGHDINESITPVEASLQWLIAKRRREAANFPGAEFILEQIKNGPKKKRVGLILGQGPPARENATILTSAGERVGIVTSGGPSPTLGKPIAMGYVPLEHVHTGTPVLTEIRGKTYKALITKMPFVKPHYYSDKR ->887370at2759_331117_1:000010 -MVVRSFLPLLSLLIALATFTSAASDYHEALVLQPLPQSSLLASFNFRGNTSQEAFDQRHFRYFPRALGQILQHTHTKELHIRFTTGRWDAESWGTRPWNGTKEGNTGVELWAWIDAPDSESAFARWISLTQSLSGLFCASLNFIDSTRTTRPVVSFEPIGDHSPSSDLHLLHGTLPGEVVCTENLTPFLKLLPCKGKAGVSSLLDGHKLFDASWQSMSVDVRPVCPQGGECLMQIEQTVDIVLDIERSKRPRDNPIPRPVPNDQLNCDNSKPYHSDDTCYPLERGSGKGWSLNEIFGRTLNGVCSLDEGQRPGEEAICLRVPHEQGVYTTSGVEETKRPDGYTRCFTLQPSGTFDLVIPEQSHTSLAPRDEPVLSAERTIVGHGQERGGMRIIFDNPSDAHPVDFIYFETLPWFLRPYVHTLRATITGRDGATRSVPVSHIVKETFYRPAIDRERGTQLELALSVPAASIVTLTYDFEKAILRYTEYPPDANRGFNVAPAVIKLSSANGNTIAHDTPIYMRTTSLLLPLPTPDFSMPYNVIILTSTVIALAFGSIFNLLVRRFVAADQAAALTAQTLKGRLLGKIVALRDRISGKRSKVE ->166920at2759_38123_0:000010 -MAFLDFVFPLSKDELLERSDSQYYVRDQVTTSELPEKLKGCFESLHDDGPLFILENFDTLYGLLAHFKSVDFNQLHKVYTKLLIKSITEFIPILENYFSKETPDDELQNKYLNVIKMTVYILTEFIISFESRLQKEYQKVVIDVRARKVKVRAAIKHKEKYNWDWDFHLSNGLNSIHQLLKAKINKLWDPPVVEEEFVNTIANCCYKIIEDPCIASVKHKELRIFIFQVIGYLIKKYNHGISCTVKIVQLLKNCDHLVSPLAQAVTMFIRNHGCKSLVREIVREISEMDDGNEAAGQGQDNSKMVAAFLNEIAAEGPEYVIPAMDELLLNLEKESYMMRNCTLTILTELLLQVYKKENLSSEAKDQRDEYLNSLMEHIYDVHTFVRTKVLQLFQKLVIEKALPLAFTLQLVDRAIGRLMDKSSNVVKYAVQLLRTMIVSNPFAAKLGVEELKKKLAEAKATLTELEKNLPETSAQLSLVDEWNNIHYPVLLKIIREILEDGMYGCFLFYFL ->1275837at2759_402676_1:000010 -MESMNDMFKKINAREKLVGWYHTGPQLRSSDLEINNLFKKYIPNPVLVIIDVQSKAVGLPTSAYFAVDEIKDDGTKSSLTFVHLPSSIEAEEAEEIGVEHLLRDTRDITAGTLATRVTEQVQSLRALEQRLDEIAVYLRKVVDGQLPINHTILGELQGVFNLLPNIFKTSNENDPLGLENGDERSFNINSNDQLMTVYLSSIVRSVIALHDLLDSLAASKAAEQEQDKLDLKQESTDSEKRATTAAVDEDPFMPN ->1284731at2759_42254_0:000010 -MAEAGAVAAEYPSGGRARAARTLLDQVVLPGEELLLPEQEDADGPGGAGERPLQARDPYLKWGVRRACCEIPYVPVRGDHVIGIVTAKSGDTFKVDVGGSEPASLSYLAFEGATKRNRPNVQVGDLIYGQFVVANKDMEPEMVCIDGCGRANGMGVIGQDGLLFKVTLGLIRKLLAPDCEIIQELGKLYPLEIVFGMNGRIWVKAKTIQQTLILANILEACEHMTTDQRKQIFSRLAES ->1228942at2759_45354_1:000010 -MNHDPFQWGRPRDEIYGHYDHKIAQASTSEFPSMHTQQPIITGTSVLGLKFDTGVVIAADHMGSYGSLLRFNNLERLICVGSETIVGVSGDISDFQHIERLLHELETEEEVYDTDGGHNLRAPNIHEYLSRVLYNRRLKMDPLWNAILVAGFNDDRTPFIRYVDLLGVTYGALALATGFGAHLAIPLLRKLVPYDLDYVKVKEADAREAVVNAMRVLYYRDARASDKYTLAVLSFKDGKVDVHFDQELKVTNQSWKFAEKVIGYGSKQQ ->759498at2759_502779_1:000010 -MDGSRGSRKRKAVTRDLGEEPGVVSGNELHLDSADGSLADHSEDLDGSSDSEIELADDLNSDDDEEEEEEEEEDEDEINSDEVPSDIEPKVVGKKSGPGGEVDIIVRGDDTASDDDDDDDDDFESDDRPNYRVVKDANGNERYVYDEINPDDNSDYSETDENANTIGNIPLSFYDQYPHIGYNINGKKIMRPAKGQALDALLDSIELPKGFTGLTDPATGKPLELTQDELELLRKVQMNEITEEGYDPYQPTIEYFTSKLEVMPLSAAPEPKRRFVPSKHEAKRVMKLVKAIREGRILPYKQPAEEDEAEEGVQTYDIWANETPRADHPMHIPAPKLPPPGYEESYHPPPEYLPDEKEKSAWLNTDPEDRETEYLPTDHDALRKVPGYESFVKEKFERCLDLYLAPRVRRSKLNIDPESLLPKLPSPEELKPFPSTCATLFRGHQGRVRTLAIDPTGVWLASGGDDGTVRVWDILTGRQFWSVALSGDDAINVVRWRPGKDAVVLAAAAGDSIFLMVPPVLDPEMEKASFEVVDAGWGYAKTSPSTFTSTDSTKTSPVQWTRPSSSLLDSGVQAVISLGYVAKSLSWHRRGDYFVTVCPGTSTPVSLAIAIHTLSKHLTQQPFRRRLKGGGPPQTAHFHPSKPILFVANQRTIRAYDLSRQTLVKILQPGARWISSFDIHPTSSSTSGGDNLIVGSYDRRLLWHDVDLSPRPYKTLRYHQKAIRAVRYHANYPLFADASDDGSLQIFHGSVTGDLLSNASIVPLKVLRGHKVTGELGVLDLDWHPKEAWCVSAGADGTCRLWM ->375960at2759_51337_0:000010 -MFFREHIFNIIGAFDIPRFVYNSERKKFLPLLMTNHPAPNLLGTAKDKAELYRERYTLLHQRTHRHELFTPPVIGSYPNESGSKFQLKTIETLLGSTTKIGDVIVLGMITQLKEGKFFLEDPTRTVQLDLSQAQFHSGLYTEACFVLAEGKAYYGSINFFGGPSNTSVKTSTKLKQLEEENKDAMFVFVSDVWLDRAEVLEKLHIMFSGYSPAPPSCFILCGNFSSAPYGKNQIQALKDSLKTLADIICEYPNIHQSSRFVFVPGPKDPGFGSILPRPPLAESITSEFRQKIPFSVFTTNPCRIQYCTEEIIIFREDIVNKMCRNCVRFPSSNLDIPNHFVKTILSQGHLTPLPLYVCPVYWARFPSSNLDIPNHGSFPRSGFSFKVFYPSSKTVEDSKLQGF ->919955at2759_5643_1:000010 -MAAPMAVDKAKAPKIDVDEFLTLAISETPAELHPFFESFRSLYSRKLWHQLTNKLFEFFDHPLSKPYRVDVFNKFVRDFGLRLNQLRLVEMGVKVSKEIDNPVTHLQFLTDLLERVNIEKSPEAHVLLLSSLAHAKLLYGDHEGTKNDIDAAWKVLDELSSVDPSVNAAYYGVAADYYKSKAEYAPYYKNSLLYLACIDPAKDLTAEERLLRAHDLGIAAFLGDTIYNFGELPILQENYPFLRQKICLMALIESVFKRGSYDRTMSFQTIAEETHLPLDEVEHLVMKALSLKLIKGSLDQVDQKAQITWVQPRVLSREQIGQLAQRLAAWNSKLHQVEERIAPEVLVNS ->817008at2759_5849_1:000010 -MDKLKTIYIDSALSIIKGALCVILQIPTGRTTESIKKKQNNVGIITVKSIFKEPTISQYNDIKQLIKTKIEENCPFYNYQINRTIAEKIYGDTIYDNYGLSKEINEVNLIILEEWNINCNRNRVLKHSGLIKNIEINKFKYLNNKESLEVHFLVNPKYTFEELNTIYKNEEELNNFLLSPIIKVTNKKIYEIEDKKSEFSYLYEEDILPKNKVLPPSGIENVNYESSKVVTPWDVNIGEEGINYNKLIKEFGCSKISDEHIRKIEKLTNRKAHHFIRRGIFFSHRDLDFLLNYYEQNGYFYIYTGRGPSSLSMHLGHLIPFYFCKYLQDAFNVPLIIQLSDDEKFLFNQNYSLDDINRFTKENVKDIIAVGFNPELTFIFKNTEYANHLYPTVLAIHKKTTLNQSMNVFGFNNSDNIGKISYPSFQIAPCFSQCFPNFLKKNIPCLVPQGIDQDPYFRLSRDIAVKLALYKPVVIHSVFMPGLQGVNTKMSSTKKKDNKNMDSKQDINNSVIFLTDSPEQIKNKINKYAFSGGGATIAEHKEKGADLEKDISYQYLRYFLVDDEKLNEIGEKYKKGEMLSGEIKKILIDILTDLVQKHQEKRNSLTDEDILYFFNDNKSSLKKFKDM ->1426075at2759_61621_0:000010 -MTASQPNPQLPQSLPALKTSGTCARLPSTGRKLHLRIARAHPRVSRELFRRSGCGCGAGLSSAETDIAFLFSASGYRSHILKTMSGSFYFVIVGHHDNPVLKWSFXPAGKAESKDDHRHLNQFIAHAALDLVDENMWLSNNMYLKTVDKFNEWFVSAFVTAGHMRFIMLHDIRQEDGIKNFFTDVYDLYIKFSMNPFYEPNSPIRSSAFDRKVQFLGKKHLLS ->655400at2759_688394_1:000010 -MAASRSPRLSSLLLRTTPLSRPTWQRTLSTRGFATAISNKLDNVYDMVIVGGGIAGTALACSLATNPSMKDYRIALIEAMDLSNTNNWAPATGRYSNRVVSLTPASMQFFEKIGVADELYRDRIQPYNCMKVSDGVTNASIEFDTNLLSSSTNPDDLPIAYMIENVHLQHSILKTLQTSKGKGATVDILQKARVASIRMQEQDAKETKDTLDLSDWPIIEMENGQSLQARLLVGADGVNSPVRSFAKIESLGWDYNMHGVVATFKTDPSRKNDTAYQRFLPTGPIAMLPLGDGHASMVWSMPPDMAHKVKKIPAQAFCTLVNSAFRLSMEDLDYLRSKIDPTTFEPLCDFDSEYNWRQGVAKHGLGDMEMMERELAFPPIVESVDETSRASFPLRMRNSQQYFADRVVLVGDAAHTVHPLAGQGLNQGILDVACLSDILQRGASEGQDIGNLHLLREYASVRYLRNLLMISACDKLHRLYSTDFAPITWIRSLGLSSVNQLDFVKAEIMKYAMGIEQ ->946128at2759_765440_1:000010 -MPTTVCTAKASYKKTPGQLELTETHLQWFADGKKAPSVRVLYAEAASLFCSKEGAAQIRLKLGLVGDDTGHNFTFTSPQSVAYKERETFKKELTNIISRNRSVPNVTTPRPPLNTSISSTTPAISNAPTPRSVVPPSRASTSRAPSVSSDGRTPIVPGSDPTSDFRLRKQVLVSNPELGALHRDLVMSGQITEAEFWEGREHLLLAQTATESQKRGRPGQLVDPRPETVEGGEVKIVITPQLVHDIFEEYPVVAKAYNDNVPNKLSEAEFWKRYFQSKLFNAHRASIRSSAAQHVVKDDKIFDKYLEKDDDELEPRRQRDEGINLFVNLGATREDHGETGNEQDITMQAGRQRGALPLIRKFNEHSERLLNSALGDEPTAKRRRIDAGKEDAYSQIDLDDLHDPEASAGIILEMQDRQRYFEGQMASAASAEAAAGKNLDIRAILGETKVNLHDWETNLAQLKINKKSGDAALLSMTENVSARLEIKMKKNDIPPELFSQMTTCQTAANEFLRQFWLSMYPPAADHQVLAPATPAQKAAKAAKMIGYLGKTHEKVDALIRTAQVEAVDAAKVEIVRAVCFVYIITVNFNANLQAMKPILDAVDRALAFYRSRKPPK ->1287401at2759_870435_1:000010 -MSSSIVGSLTRGCRTPSVNINPHPFFRCRTSLYHGIGKPPSWLHSRTQLWRTIGTSSSKHTPPSSASVSARRPTAIPSYNASREQMYKTRNRNLLMYTSAVVILGVGITYAAVPLYRMFCSATGFAGTPSVVSTSSGRFDPSRLTPDTDARRIRVHFNADRAEALPWKFFPQQKYVEVLPGESSLAFYKARNESKKDIIGIATYNVTPDRVAPYFSKVECFCFEEQKLLAGEEVDMPLLFFIDKDILDDPSCRGVNDVVLSYTFFKARRNAQGHLEPDAEEDVVQRSLGFEGYEHSPRAETKKVEGSKANS diff --git a/src/busco/test_data/script.sh b/src/busco/test_data/script.sh deleted file mode 100644 index 2b983f0d..00000000 --- a/src/busco/test_data/script.sh +++ /dev/null @@ -1,9 +0,0 @@ -# busco test data - -# Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/busco/test - -if [ ! -d /tmp/snakemake-wrappers ]; then - git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers -fi - -cp -r /tmp/snakemake-wrappers/bio/busco/test/protein.fasta src/busco/test_data \ No newline at end of file