Skip to content

Commit

Permalink
Merge pull request #39 from PeptoneLtd/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
CFisicaro authored Nov 15, 2022
2 parents abca53f + 22a4b5f commit 1caa427
Show file tree
Hide file tree
Showing 55 changed files with 9,253 additions and 24 deletions.
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Network Trash Folder
Temporary Items
.apdisk

# Jupyter chekcpoints & pycache
# Jupyter checkpoints & pycache
.ipynb_checkpoints
__pycache__
*.pyc
Expand All @@ -44,6 +44,12 @@ __pycache__
# End of https://www.toptal.com/developers/gitignore/api/macos
*.html

# pytorch files
*.pt

# onnx files
*.onnx

.adopt-venv
.vscode
build/
18 changes: 18 additions & 0 deletions Dockerfiles/Dockerfile_adopt_mac_m1
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM sonoisa/deep-learning-coding:pytorch1.12.0_tensorflow2.9.1
RUN pip install \
pandas \
fair-esm \
biopython \
bertviz \
skl2onnx \
onnxruntime \
spacy \
plotly \
wandb \
jupyterlab
RUN git clone --recurse-submodules https://github.com/PeptoneLtd/ADOPT.git \
&& cd ADOPT \
&& python setup.py install \
&& cd ../
RUN pip install tables

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ MODEL_TYPE = "esm-1b"
STRATEGY = "train_on_cleared_1325_test_on_117_residue_split"

# Extract residue level representations
multi_head = MultiHead(MODEL_TYPE, SEQUENCE, PROTID)
representation, tokens = multi_head.get_representation()
multi_head = MultiHead(MODEL_TYPE)
representation, tokens = multi_head.get_representation(SEQUENCE, PROTID)

# Predict the Z score related to each residue in the sequence specified above
z_score_pred = ZScorePred(STRATEGY, MODEL_TYPE)
Expand Down
2 changes: 1 addition & 1 deletion adopt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# LICENSE file in the root directory of this source tree.

"@generated"
from . import constants, utils
from . import constants, utils, inference, embedding
from .data import CheZod
from .inference import ZScorePred
from .training import DisorderPred
Expand Down
3 changes: 3 additions & 0 deletions adopt/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import argparse
import subprocess
from pathlib import Path
import time

from adopt import constants, utils

Expand Down Expand Up @@ -66,3 +67,5 @@ def get_representations(fasta_file, repr_dir, msa):
parser = create_parser()
args = parser.parse_args()
get_representations(args.fasta_path, args.repr_dir, args.msa)
print("--- %s seconds ---" % (time.time() - start_time))

3 changes: 3 additions & 0 deletions adopt/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import sys
import subprocess
from pathlib import Path
import time

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -164,6 +165,7 @@ def main(args):
)
sys.exit(2)

start_time = time.time()

if __name__ == "__main__":
parser = create_parser()
Expand All @@ -173,3 +175,4 @@ def main(args):
z_score_pred.get_z_score_from_fasta(
args.fasta_path, args.repr_dir, args.pred_z_scores_path
)
print("--- %s seconds ---" % (time.time() - start_time))
41 changes: 27 additions & 14 deletions adopt/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,34 @@ def get_attention(self, sequence, brmid):
return attention, tokens

def get_representation(self, sequence, brmid):
results = self.get_results(sequence, brmid)

if self.model_type in constants.model_types:
representation = results[0]["representations"][33]
elif self.model_type == 'esm-msa':
representation = results[0]["representations"][12][0]
elif self.model_type == "combined":
representation = torch.cat(
(results[0]["representations"][33], results[1]["representations"][33]), -1)

max_len = 1000
if len(sequence) < max_len:
print("Extracting representations.")
else:
print("The model types are:")
print(*constants.msa_model_types, sep="\n")
sys.exit(2)
print("Protein sequence longer than context window!")
print("Applying sliding windows.")

sequence_chunks = [sequence[i:i + max_len] for i in range(0, len(sequence), max_len)]

chunks_representation = []
for seq in sequence_chunks:
results = self.get_results(seq, brmid)
if self.model_type in constants.model_types:
representation = results[0]["representations"][33]
elif self.model_type == 'esm-msa':
representation = results[0]["representations"][12][0]
elif self.model_type == "combined":
representation = torch.cat(
(results[0]["representations"][33], results[1]["representations"][33]), -1)
else:
print("The model types are:")
print(*constants.msa_model_types, sep="\n")
sys.exit(2)
# remove first and last token (<cls> and <sep>)
representation = representation[:, 1:-1, :]
chunks_representation.append(representation)

representation = torch.cat(chunks_representation, 1)
tokens = list(sequence)
# remove first and last token (<cls> and <sep>)
representation = representation[:, 1:-1, :]
return representation, tokens
234 changes: 234 additions & 0 deletions datasets/chezod_117_all_rand_1.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
>4307
GQGGGTHNQWNKPSKPKTNMKHMANAAAAGAVVGGLGGYMLGSAMSRPMMHFGNDWEDRYYRENMNRYPNQVYYRPVDQYNNQNNFVHDCVNITIKQHTVTTTTKGENFTETDIKIMERVVEQMCTTQYQKESQAYYDGRRS
>15768
MSTNPKPQRKTLRNTNRRPQDVKFPGGGQIVGGVYLLPRRGPRLGVRATRKTSERSQPRGRRQPIPKARRPEGRTWAQPGYPHHHHHH
>15397
GTQNRPLLRNSLDDKVGPPSNLEGQSDERALLDQLHTLLSNTDATGLEEIDRALGIPELVNQGQALEPKQD
>15123
GESGERWARQVSGGHFVTLHGAERLEEETNDEDVSDIERRIAMRLAERRYEDSATHGDE
>6047
MKLAPYILELLTSVNRTGGTADLLVPLLRELAKGRPVSRTTLAGILDWPAERVAAVLEQATSTEYDKDGNIIGYGLTLRETSYVFEIDDRRLYAWCALDTLIFPALIGRTARVSSHCAATGAPVSLTVSPSEIQAVEPAGMAVSLVLPQEAADVRQSFCCHVHFFASVPTAEDWASKHQGLEGLAIVSVHEAFGLGQEFNRHLLQTMSSRTP
>17290
MGSSHHHHHHSSGLVPRGSHMYRKDKRRQEPLRQPSPQRGAWAPELGAAPEEELAALQLGPTHHECEASPPHDTLRLTALPDYYLTLRRSPDDIPLMTPNTITMIPNSLVGLQTLHPYNTFAAGFNSTGLPHSHSTTRV
>16876
MAEEIKNVPEQEVPKVATEESSAEVTDRGLFDFLGKKKDETKPEETPIASEFEQKVHISEPEPEVKHESLLEKLHRSDSSSSSSSEEEGSDGEKRKKYKEKKKPTTEVEVKEEEKKGFMEKLKEKLPGHKKPEDGSAVAAAPVVVPPPVEEAHPVEKKGILEKIKEKLPGYHPKTTVEEEKKDKE
>15736
MGSSHHHHHHSSGLVPRGSHMDIDATCTPRRASSNQRGLNQIWNVKKQSVYLMNLRKSGTLGHPGSLDETTYERLAEETLDSLAEFFECLADKPYTFEDYDVSFGSGVLTVKLGGDLGTYVINKQTPNKQIWLSSPSSGPKRYDWTGKNWVYSHDGVSLHELLAAELTKALKTKLDLSSLAYSGKDA
>25118
LADGSSDAAREPRPAPAPIRRRSSNYRAYATEPHAKKKSKISASRKLQLKTLLLQIAKQELEREAEERRGFKGRHHHHHH
>15274
GSHMSSSSESTGTPSNPDLDAGVSEHSGDWLDQDSVSDQFSVEFEVESLDSEDYSLSEEGQELSDEDDEVYQVTVYQAGESDTDSFERDPEISLADYWK
>4950
GKKWSKSSVVGWPAVRERMRRAEPAADGVGAASRDLEKHGAITSSNTAANNAACAWX
>16505
MKKRPKPGGWNTGGSRYPGQGSPGGNRYPPQGGGTWGQPHGGGWGQPHGGGWGQPHGGGWGQPHGGGIGQGGGTHNQWNKPSKPK
>11454
GILTNSQGQTPQRVSGLQGSDALNIQQNQTSGGSLQAGQQKEGEQNQQTQQQQILIQPQLVQGGQALQALQAAPLSGQTFTTQAISQETLQNLQLQAVPNSGPIIIRTPTVGPFGQVSWQTLQLQNLQVQNPQAQTITLAPMQGVSLGQTPGGTLEDYKDDDDKGSHHHHHH
>15131
ASQKRPSQRSKYLATASTMDHARHGFLPRHRDTGILDSIGRFFSGDRGAPKRGSGKDSHTRTTHYGSLPQKSQHGRTQDENPVVHFFKNIVTPRTNPPSQGKGRGLSLSRFSWGAEGQRPGFGYGGRASDYKSAHKGFKGAYDAQGTLSKIFKLGGRDSRSGSPMARRLEHHHHHH
>6078
MNTDQQKVSEIFQSSKEKLQGDAKVVSDAFKKMASQDKDGKTTDADESECHNYQEQYNKLKGAGHKKELEHHHHHH
>4287
GSHMSYNNPYQLETPFEESYELDEGSSAIGAEGHDFVGFMNKISQINRDLDKYDHTINQVDSLHKRLLTEVNEEQASHLRHSLDNFVAQATDLQFKLKNEIKSAQRDGIHDTNKQAQAENSRQRFLKLIQDYRIVDSNYKEENKEQAKRQYMIIQPEATEDEMEAAISDVGGQQIFSQALLNANRRGEAKTALAEVQARHQELLKLEKSMAELTQLFNDMEELVIEQQENVDVIDKNVEDAQLDVEQGVGHTDKAVKSARKARKNKIR
>15086
MPTKTYSEEFKRDAVALYENSDGASLQQIANDLGINRVTLKNWIIKYGSNHNVQGTCPSAAVSEAEQIRQLKKENALQRARTRHPAESCLEHHHHHH
>16912
SIKQYSQEELKEMALVEIAHELFEEHKKPVPFQELLNEIASLLGVKKEELGDRIAQFYTDLNIDGRFLALSDQTWGLRSWYPYDQLDEETQPTVKAKKKKAKKAVEEDLDLDEFEEIDEDDLDLDEVEEELDLEADDFDEEDLDEDDDDLEIEEDIIDEDDEDYDDEEEEIK
>18177
MQKLSARLRALRQRQLDRAAAAVEPDVVVKRQEALAAARLKMNEELNAQVEKHKEKLKQLEEEKRRQKIEMWDSMQEGKSYKGNAKKPQEEDSPGPSTSSVLKRKSDRKPLRGGGYNPLSGEGGGACSWRPGRRGPSSGGCG
>18198
GAMGASGGEDDELKGLLGLPEEETEFDNLTEFNTAHNKRISTLTIEEGNLDIQRPKRKRKNSRVTFSEDDEIINPEDVDPSVGRFRNM
>15179
GHMPIKNILKNKTSTTSSMVASAEQPRGNVDEELSKKSQKWDEMNILATYHPADKDYGLMKIDEPSTPYHSMMGDDEDACSDTEATEAMAPDILARKLAAAEGLEPKYRIQEQESSGEEDSDLSPEEREKKRQFEMKRKLHYNEGLNIKLARQLISKDL
>18248
ASSHSLLFSSSFLSKPSSFTSSLRRFVYLPTRQFWPRQRHGFSTVFAVATERAISSSGP
>7279
MSESSSKSSQPLASKQEKDGTEKRGRGRPRKQPPVSPGTALVGSQKEPSEVPTPKRPRGRPKGSKNKGAAKTRKTTTTPGVKPRGRPKKLEKEEEEGISQESSEEEQ
>18446
MGEKTVKLYEDTHFKGYSVELPVGDYNLSSLISRGALNDDLSSARVPSGLRLEVFQHNNFKGVRDFYASDAAELSRDNDASSVRVSKMETTN
>19650
SLLDKDDSKAGMEEDHTYEGLDIDQTATYEDIVTLRTGEVKWSVGEHPGTE
>19224
GHMASMEMEKEFEQIDKSGSWAAIYQDIRHEASDFPCRVAKLPKNKNRNRYRDVSPFDHSRIKLHQEDNDYINASLIKMEEAQRSYILTQGPLPNTCGHFWEMVWEQKSRGVVMLNRVMEKGSLKCAQYWPQKEEKEMIFEDTNLKLTLISEDIKSYYTVRQLELENLTTQETREILHFHYTTWPDFGVPESPASFLNFLFKVRESGSLSPEHGPVVVHCSAGIGRSGTFCLADTCLLLMDKRKDPSSVDIKKVLLEMRKFRMGLIQTADQLRFSYLAVIEGAKFIMGDSSVQDQWKELSHEDLEPPPEHIPPPPRPPKRILEPHNGKCRECFPNHQWVKEETQEDKDCPIKEEKGSPLNAAPYGIESMSQDTEVRSRVVGGSLRGAQAASPAKGEPS
>5204
SKKIKDPDAAKPEDWDERAKIDDPTDSKPEDWDKPEHIPDPDAKKPEDWDEEMDGEWEPPVIQNPEAKGEWKPR
>4686
SKSMQVPFSRCCFSFAEQEIPLRAILCYRNTSSICSNEGLIFKLKKGKEACALDTVGWVQRHRKMLRHCPSKRK
>10010
EAEAYVTQTMKGLDIQKVAGTWYSLAMAASDISLLDCQSAPLRVYVEELKPTPEGDLEILLQKWENDECAQKKIIAEKTKIPAVFKIDALNENKVLVLDTDYKKYLLFCMENSAEPEQSLVCQCLVRTPECDDEALEKFDKALKALPMHIRLSFNPTQLEEQCHI
>19672
WVTVQRTEAAERCGLHGSYVLRVEAERLTLLTVGAQSQILEPLLSWPYTLLRRYGRDGVMFSFEAGRRCPSGPGTFTFQTAQGNDIFQAVETAIHRQKAQG
>18890
SLQDGVRQSRDSDKQTLLPNDQLYQPLKDREDDQYSHLQGNQLRRN
>5545
SYYHHHHHHDYDIPTTENLYFQGAMGSEPCRRHLDSVLQQLQTEVYRGAQTLYVPNCDHRGFYRKRQCRSSQGQRRGPCWCVDRMGKSLPGSPDGNGSSSCPTGSSG
>17483
MSDAGVKGFGEKASEALKPDSQKSYAEQGKEYITDKADKVAGKVQPEDNKGVFQGVHDSAEKGKDNAEGQGESLADQARDYMGAAKSKLNDAVEYVSGRVHGEEDPTKK
>11019
GSFTMPGLVDSNPAPPESQEKKPLKPCCACPETKKARDACIIEKGEEHCGHLIEAHKECMRALGKKI
>15563
MASNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGERGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGSAWSHPQFEK
>18578
GSKRRELDDPGAYNYPFTWNTPSAPPGYNIAVKPDQIQYTELSNAKIAYKQNKANIAQEQQYGSHEEHLPADLETLQREIRMAQERLDLAIQAYHHWNNPHGPREKKAKVGSKSGSNKSSISSKSGDGKTSVWI
>17836
MAYSEKVIDHYENPRNVGSFDNKDENVGSGMVGAPACGDVMKLQIKVNDEGIIEDARFKTYGCGSAIASSSLVTEWVKGKSLDEAQAIKNTDIAEELELPPVKIHCSILAEDAIKAAIADYKSKREAK
>5076
MKKVIATKVLGTVKWQNVRNGYGFINRNDTKEDVFVHQTAIKKNNPRKYLRSVGDGETVEFDVVEGEKGAEAANVTGPG
>5736
MHHHHHHLEMSFLKSFPPPGSAEGLRQQQPETEAVLNGKGLGTGTLYIAESRLSWLDVSGLGFSLEYPTISLHAVSRDLNAYPREHLYVMVNAKFGEESKESVAEEEDSDDDVEPIAEFRFVPSDKSALEAMFTAMCECQALHPDPEDEDSDDYDGEEYDVEAHEQGQGDIPTFYTYEEGLSHLTAEGQATLERLEGMLSQSVSSQYNMAGVRTEDSTRDYEDGMEVDTTPTVAGQFEDADVDH
>18631
MAAPGERGRFHGGNLFFLPGGARSEMMDDLATDARGRGAGRRDAAASASTLAQAPTSDSPVAEDASRRRPCRACVDFKTW
>4286
GSPEFMSSSTPFDPYALSEHDEERPQNVQSKSRTAELQAEIDDFVGIMRDNINKVAERGERLTSIEDKADNLAVSAQGFKRGANRVRKAMWYKDLKMK
>19332
MVRTKADSVPGTYRKVVAARAPRKVLGSSTSATNSTSVSSRKHENKYAGGNPVCVRPTPKWQKGIGEFFRLSPKDSEKENQIPEEAGSSGLGKAKRKACPLQPDHTNDEKE
>15180
GHMDEEDGEPPYEPESGCVEIPGLSEEEDPAPSRKIHFSQAPIQVFSTYSNEDYDRRNEDVDPMAASAEYELEKRVERLEL
>2956
MKPVTLIDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELIYIPNRVAQQL
>18927
ENREIQPPFKPKVSGKGAENFDKFFTRGQPVLTPPDQLVIANIDQSDFEGFWYVNPQFVHPILQSAVX
>18867
SLRKRWQNEKLGLDAGDEYEDENLYEGLNLDDCSMYEDISRGLQGTYQWVGSLNIGDVQLEKP
>4034
AMPKRLCLVCGDIASGYHYGVASCEACKAFFKRTIQGNIEYSCPATNECEITKRRRKSCQACRFMKALKVGMLKEGVRLDRVRGGRQKYKRRLDSENS
>19482
NPLEAVVFEERDGNAVLNLLFSLRGTKPSSLSRAVKVFETFEAKIHHLETRPAQRPLAGSPHLEYFVFFEVPSGDLAALLSSVRRVSDDVRSA
>25093
MIHSSVKRWGNSPAVRIPATLMQALNLNIDDEVKIDLVDGKLIIEPVRKEPVFTLAELVNDITPENLHENILWGEPKDKEVW
>15430
MNLEPPKAEIRSATRVMGGPVTPRKGPPKFKQRQTRKFKSKPPKKGVQGFGDDIPGMEGLGTDITVIAPWEAFNHLELHELAQYGII
>25327
GSHMADENANRPVNPGEDPNEAFRSTPFEATTSALGDCRECIAYRTGATTGKGSRRHDDTCSREIKGSSATPGGSEKAGTGRQ
>17048
ASTASPAKAESSGVIDLFGDAFGSGASETQPAPQRVSSSSASADLLAGFGGSFMAPST
>16670
MAWGAWGRLACYLEFLKKEELKEFQLLLANKAHSRSSSGETPAQPEKTSGMEVASYLVAQYGEQRAWDLALHTWEQMGLRSLCAQAQEGAGHSLEHH
>15162
NDDKLYRADSRPPDEIKQSGGLMPRGQSDGFDRGTQMNINLYDHARGTQTGFVRHDDGYVSTSISLRSAHLVGQTILSGHSTYYIYVIATAPNMFNVNDVLGAYSPHPDEQDVSALGGIPYSQIVGWYRVHFGVLDEQLHRNRGYRDRYYSNLDIAPAADGYGLAGF
>15336
GAAESAERRNSILTETLHRFSLEGDAPVSWTETKKQSFKQTGEFGEKRKNSILNPINSIRKFSIVQKTPLQMNGIEEDSDEPLERRLSLVPDSEQGEAILPRISVISTGPTLQARRRQSVLNLMTHSVNQGQNIHRKTTASTRKVSLAPQANLTELDIYSRRLSQETGLEISEEINEEDLKECLFDDME
>5022
AFPSPAADYVEQRIDLNQLLIQHPFATYFVKASGDSMIDGGISDGDLLIVDSAITASHGDIVIAAVDGEFTVKKLQLRPTVQLIPMNSAYSPITISSEDTLDVFGVVIHVVKAMR
>15855
GPHMTLEEFSAGEQKTERMDKVGDALEEVLSKALSQRTITVGVYEAAKLLNVDPDNVVLCLLAADEDDDRDVALQIHFTLIQAFCCENDINILRVSNPGRLAELLLLETDAGPAASEGAEQPPDLHCVLVTNPHSSQWKDPALSQLICYCRESRYMDQWVPVINLPER
>16450
MAYQQDPCANPTRQTGKTGGQTDQYGNPVHQTEALGAYGAGTGTGMHGGEHQQQPHQQPGVLNRSGSSSSEDDGQGGRRKKGMKEKIKERIPGMGRKDEQKQTSATSTPGQGQQKKGMMEKIKEKLPGAH
>15766
AVPVESKPDKPSGKSGMDAALDDLIDTLGGPEETEEENTTYSGPEVSDPMSSTYIEELGKREVTIPPKYRELLAKKEGITGPPADSSKPIGPDDAIDALSSDFTCGSPTAAGKKTEKEESTEVLKAQSAGTVRSAAPPQEK
>6431
EAESLTSAQKAKAEERKRRKMSRGLPDKTEEEKSVMAKKLEQKPKGEGIPTTAKLKVDEFESNVNEVKDPYPSADFPGDDEEDEPEIPVSPSPRPLAELQLKEKAVPIPEASSFFIFSPTNKVRVL
>17926
MGSSHHHHHHSSGLVPRGSHMAAPEGGISDVVEKLIKEAQETCAGDPVSGECVAAWDEVEELSAAASHARDKKKADGSDPLEEYCKDNPETNECRTYDN
>4280
ASASPKQRRSIIRDREPMYDDPTLPEGWTRKLKQRKSGRSAGKYDVYLINPQGKAFRSKVELIAYFEKVGDTSLDPNDFDFTVTGRGSGSGC
>6139
DEAQFKECYDTCHKECSDKGNGFTFCEMKCDTDYSVKDVKEKLENYKPKN
>11338
GSSGSSGMTDYGEEQRNELEALESIYPDSFTVLSENPPSFTITVTSEAGENDETVQTTLKFTYSEKYPDEAPLYEIFSQENLEDNDVSDILKLLALQAEENLGMVMIWTLVTAVQEKLNEIVDQIKTR
>18889
SLKNRKAKAKPVTRLAGAGGRQRGQNKERPPPVPNPDYEPIRKGQRDLYSGLNQRRI
>19485
MGLSFPSPGESAPPTPDLEEKRAKLAEAAERRQKEAASRGILDVQSVQEKRKKKEKIEKQIATSGPPPEEGLRWTVSLEHHHHHH
>15174
GXSFCKADEKXCEYHADCCNCCLSGICLXSTNWILPGCSTSSFFKI
>6974
MQPASAKWYDRRDYVFIEFCVEDSKDVNVNFEKSKLTFSCLGGSDNFKHLNEIDLFHCIDPNDSKHKRTDRSILCCLRKGESGQSWPRLTKERAKLNWLSVDFVNWKDWEDDSDEDMSNFDRFSEMMNNMGGDEDVDLPEVDGADDDSQDSDDEKMPDLE
>19796
GRAMGLPGERGLRGEPGSVPNVDRLLETAGIKASALREIVETWDTSSGSFLPVPERRRGPKGDSGEQGPPGKE
>5956
MADKRAHHNALERKRRDHIKDSFHSLRDSVPSLQGEKASRAQILDKATEYIQYMRRKVHTLQQDIDDLKRQNALLEQQVNALEGSGC
>7292
GAMGSEPQDGNYFEHKHNIRPKPFVIPGRSSGCSTPSGIDCGSGRSTPSSVSTVSTICPGDLKVAAKLAPNIPLEMELPGVKIVHAQFNTPMQLYSDDNIMETCQGQVSTALGETPLMSEPTASVPPESDVYRMLHDNRNEPTQPRQSGSFRVLQGMVDDGSDDRPAGTRS
>19037
LLQEALSAYFDSMKIPSGQPETADVSREQVDKELDRASNSLISGLSQDEEDPPLPPTPMNSLVDECPLDQGLPKLSAEAVFEKCSQISLSNSTTASLSKK
>1128
VVYTDCTESGQNLCLCEGSNVCGQGNKCIPGSDGEKNQCVTGEGTPKPQSHNDGDFEEIPEEYLQ
>4272
MSATAATAPPAAPAGEGGPPAPPPNLTSNRRLQQTQAQVVEVVDIMRVNVDKVLERDQKLSELDDRADALQAGASQFETSAAKLKRKYWWKNLKMM
>15136
MSQKQEEENPAEETGEEKQDTQEKEGILPERAEEAKLKAKYPSLGQKPGGSDFLMKRLQKGQKYFDSGDYNMAKAKMKNKQLPSAYPDKNLVTGDHIPTPQDLPQRKSSLVTSKLAGGQVE
>11388
GSSGSSGEAQTRVKLNFLDQIAKYWELQGSTLKIPHVERKILDLFQLNKLVAEEGGFAVVCKDRMWTKIATKMGFAPGKAVGSHIRGHYERILNPYNLFLSGDSLRCLQKPNLTSDTKDKEY
>6968
MDVFMKGLSKAKEGVVAAAEKTKQGVAEAAGKTKEGVLYVGSKTKEGVVHGVATVAEKTKEQVTNVGGAVVTGVTAVAQKTVEGAGSIAAATGFVKKDQHGKNEEGAPQEGILEDMPVDPDNEAYEMPSEEGYQDYEPEA
>6498
MASVLEELQKDLEEVKVLLEKSTRFRLRDTLTSEKSKIETELKNKMQQKSQKKPELDNEKPAAVVAPLTTGYTVKIS
>6212
NKELDPVQKLFVDKIREYRTKRQTSGGPVDAGPEYQQRLDRELFKLKQMYGKADMNTFPNFTFEDPKFEVVEKPQS
>19521
GHSMSQSNRELVVDFLSYKLSQKGYSWSQFSDVEENRTEAPEGTESEMETPSAIEGNPSWHLADSPAVNGATGHSSSLDAREVIPMAAVKQALREAGDEFELRYRRAFSDLTSQLHITPGTAYQSFEQVVNELFRDGVNWGRIVAFFSFGGALCVESVDKEMQVLVSRIAAWMATYLNDHLEPWIQENGGWDTFVELYGNNAAAESRKGQER
>6970
MQAIKCVVVGDGAVGKTCLLISYTTNAFPGEYIPTVFDNYSANVMVDGKPVNLGLWDTAGQEDYDRLRPLSYPQTDVFLICFSLVSPASFENVRAKWYPEVRHHCPNTPIILVGTKLDLRDDKDTIEKLKEKKLTPITYPQGLAMAKEIGAVKYLECSALHQRGLKTVFDEAIRAVLSPPPVK
>5313
GPLGSPSKDCGSPKYAYFNGCSSPTAPLSPMSPPGYKLVTGDRNNSSCRNYNKQASEQNWANYSAEQNRMGQAGSTISNSHAQPFDFPDDNQNAKKVAAGHELQPLAIVDQRTSSRASSRASSRPRPDDLEI
>6846
HLMYTLGPDGKRIYTLKKCTESGEITKSAHPARFSPDDKYSRQRVTLKKRFGLVPGQHHHHHH
>19135
MADERKDEGKAPHWTSASLTEAAAHPHSPEMKDQGGSGEGLSRSANGFPYREEEEGAFGEHGSQGTYSDTKENGINGELTSADRETAEEVSARIVQVVTAEAVAVLKGEQEKEAQHKDQPAALPLAAEETVNLPPSPPPSPASEQTAALEEATSGESAQAPSAFKQAKDKVTDGITKSPEKRSSLPRPSSILPPRRGVSGDREENSFSLNSSISSARRTTRSEPIRRAGKSGTSTPTTPGSTAITPGTPPSYSSRTPGTPGTPSYPRTPGTPKSGILVPSEKKVAIIREPPKSPATPKQLRLINQPLPDLKNVKSKIGSTDNIKYQPKGGQVQIVTKKIDLSHVTSKCGSLKNIRHRPGGGRVKIESVKLDFKEKAQAKVGSLDNAHHVPGGGNVKIDSQKLNFREHAKARVDHGAEIITQSPSRSSVASPRRLSNVSSSGSINLLESPQLATLAEDVTAALAKQGL
>4716
GPLGSPEFSMPHSSPQNRPNYNVSFSSMPGGQNERGKAAANLEGKQKAADFEDLLSGQGFNAHKDKKGPRTIAEMRKEEMAKEMDPEKLKIVEWIEGKERNIRALLSTMHTVLWAGETKWKPVGMADLVTPEQVKKVYRKAVLVVHPDKATGQPYEQYAKMIFMELNDAWSEFENQGQKPLY
>6521
GSHMNNTSSSPQPKKKPLDGEHFTLQIRGRERFEMFRELNEALELKDAQAGKEPG
>26549
GHMASGSLRGGEPEPDVTVLTSMLTDPSHITAETAKRRLARGSPPSLASSSASQLSAPSLKATCTTHHDSPDADLIEANLLWRQEMGGNITRVESENKVVILDSFEPLHADGDEREISVAAEILRKSRKFPSALPIWARPDYNPPLLESWKDPDYVPPVVHGCPLPPTKAPPIPPPRRKRTVVLTESNVSSALAELATKTFGSSGSSAVDSGTATALPDQASDDGDKGSDVESYSSMPHLEGEPGDPDLSDGSWSTVSEEASEDVVCC
>11526
MNIVPQDTFKSQVSTDQDKSVLSSAVPSLPDTLRQQEGGAVPLSTQLNDRHPLESTLKNWETTQRQRQMEQYRQIFGIAEPMKRTMEMEIVNRTDFNPLSTNGSIHRDILLNKECSIDWEDVYPGTGLQASTMVGDDVHSKIEKQLGY
>19364
MDGSHHHHHHGSATFPPATSAPQQPPGPEDEDSSLDESDLYSLAHSYLGGGGRKGRTKREAAANTNRPSPGGHERKLVTKLQNSERKKRGARR
>15719
AKTKGNKVNVGVKYAEKQERRFEPEKLREGRNIIGFQMGINKFASQQGMTAYGTRRHLYDPKLGTDQPLDQATISLQMGTNKGASQAGMTAPGTKRQI
>4985
ATPAPAAEMPGITIISASQTGNARRVAEALRDDLLAAKLNVKLVNAGDYKFKQIASEKLLIVVTSTQGEGEPPEEAVALHKFLFSKKAPKLENTAFAVFSLGDTSYEFFCQSGKDFDSKLAELGGERLLDRVDADVEYQAAASEWRKRVVDALKSRAPVAAPSQSVA
>15506
GSNGADNAHNNAFGGGKNPGIGNTSGAGSNGSASSNRGNSNDWSWSNKPHKNDGFHSDGSYHITFHGDNNSKPKPGGNSGNRGNNGDGAS
>6112
GSHMEHPKPSACRNLFGPLDHEELTRDLEKHCRDMEEASQRKWNFDFQNHKPLEGKYEWQEVEKGSLPEFYYRPPRPPKGACKVPAQE
>15398
PNRSISPSALQDLLRTLKSPSSPQQQQQVLNILKSNPQLMAAFIKQRHAKYVANQPGMQ
>19357
MEANIRGQGGLAYPGVRTHGTLESVNGPKAGSRGLTSLADTFEHMIEELLDEDQKVRPNEENNKDADLYTGRVMLSSQVPLEPPLLFLLEEY
>15141
MKAKRSHQAIIMSTSLRVSPSIHGYHFDTASRKKAVGNIFENTDQESLERLFRNSGDKKAEERAKIIFAIDQDVEEKTHALMALKKRTKDKLFQFLKLRKYSIKVH
>15711
ATKAVAVLKGDGPVQGIINFEQKESNGPVKVWGSIKGLTEGLHGFHVHEEEDNTAGCTSAGPHFNPLSRKHGGPKDEERHVGDLGNVTADKDGVADVSIEDSVISLSEDHAIIGRTLVVHEKADDLGKGGNEESTKTGNAGSRLACGVIGIAQ
>16296
PGPEEQEEEPQPRKPGTRRERTLGAPASSERSAAGGRGPRGHKRKLNEEDAASESSREKSNEDEGSSSEADEMAKALEAELNDLM
>17760
MEEPQSDPSVEPPLSQETFSDLWKDLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPL
>19507
MAEMKTDAATLAQEMGNFERISGDLKTQIDQVESTAGSLQGQWRGAAGTAAQAAVVRFQEAANKQKQELDEISTNIRQAGVQYSRADEEQQQALSSQMGFKLAAALEHHHHHH
>4922
IDVLLGADDGSLAFVPSEFSISPGEKIVFKNNAGFPHNIVFDEDSIPSGVDASKIDMSEEDLLNAKGETFEVALSNKGEYSFYCSPHQGAGMVGKVTVN
>17325
GPMELSVSSDSDSSQAGSNAGNQPMLWPAWVYCTRYSDRPSSGPRSRKPKKKNPNKEDKRPRTAFTAEQLQRLKAEFQTNRYLTNQRRQSLAQELGLNESQIKIWFQNKRAKIKKAT
>15672
PALTPSLGAKPKPQVLSDSGGPLIDLLTEDPPPYRDPRPPPSDRDGNGGEATPAGEAPDPSPMASRLRGRREPPVADSTTSQAFPLIAGGNGQLQYWPFSSSDLYNWKNNNPSFSEDPGKLTALIESVLITHQPTWDDCQQLLGTLLTGEEKQRVLLEARKAVRGDDGRPTQLPNEVDAAFPLERPDWDYTTQAGRNHLVHYRQLLLAGLQNAGRSHHHHHHHH
>15176
GHMDPKGRKKIQFSVPAPPSQLDPRQVEMIRRRRPTPALLFRVSEHSSPEEESSPHQRTSGEGHHPKSKRPNPSAYTPPSLKAVQRIAEFHLQTISNLSENQASEEEDELGELRELGYPQ
>17205
MTETDKKQEQENHAECEDKPKPCCVCKFEKEERDTCILFNGQDSEKCKEFIEKYKECMKGYGFEVPSAN
>4914
LGASWHRPDKCCLGYQKRPLGQVLLSSWYPTSQLCSKPGVIFLTKRGRQVCADKSKDWVKKLMQQLPVTAR
>hmbdi
MKHDHHIGHTHSGKGHACHHEHNSPKTQQASSKMEGPIVYTCPMHPEIRQSAPGHCPLCGMALEPETVTVSEVVSPEYLDMRRR
>15097
PRLFECSNKTGRFLATEIVDFTQDDLDENDVYLLDTWDQIFFWIGKGANESEKEAAAETAQEYLRSHPGSRDLDTPIIVVKQGFEPPTFTGWFMAWDPLCWSDRKSYDELKAELGDNASIGQLVSGLTSKNEVFTATTTLVPTKLETFPLDVLVNTAAEDLPRGVDPSRKENHLSDEDFKAVFGMTRSAVANLPLWKQQNLKKEKGLF
>19478
GPDSMDCQAVLTTLLSRTDLDPRVLSNTGWGQTQIKQDTVWDIEEVPRPEGKSDKGTEGWESAATQTKNSGGWGDAPSQSNQMKSGWGEL
>6580
LKTFSSKSEYQLVVNAVRKHQESGFYWSAVTGGEANLLLSAEPAGTFLIRDSSDQRHFFTLSVKTQSGTKNLRIQCEGGSFSLQSDPRSTQPVPRFDCVLKLVHHYMPPPGTPSFSLPPTEPSSEVPEQPPAQALPGSTPKRAYYIYSGGEKIPLVLSRPLSSN
>16160
GHMKMADAKQKRNEQLKRWIGSETDLEPPVVKRKKTKVKFDDGAVFLAACSSGDTEEVLRLLERGADINYANVDGLTALHQASIDDNVDMVKFLVEAGAN
>5070
ALFSGDIANLTAAEITQGFKDVPSFVHEGGDVPLVELLVSAGISPSKRQAREDIQNGAIYVNGERLQDVGAILTAEHRLEGRFTVIRRGKKKYYLIRYALEHHHHHH
>15409
GSLRVKFSRSADAPAYQQGQNQLLNELNLGRREEYDVLDKRRGRDPEMGGKPRRKNPQEGLYNELQKDKMAEAYSEIGMKGERRRGKGHDGLYQGLSTATKDTYDALHMQALPPR
>19171
MHHHHHHSSGLVPRGSEFDEWTPGTAVITSPVLVPGCPSKAVDPGLPSVKQEPPDPEEDKEENKDDSASKLAPEEEAGGAGTPVITEIFSLGGTRFRDTAVWLPRSKDLKKPGARKQ
>15441
SHHHHHHSMANSAGSSNTQTPDASLQATYNGQSGGKKQPLNLSVYNVQATNIPPKETLVYTKQTQTTSTGGGNGD
>19114
MPLGSPEFMSSSAGSGHQPSQSRAIPTRTVAISDAAQLPHDYCTTPGGTLFSTTPGGTRIIYDRKFLLDRRNSPMAQTPPCHLPNIPGVTSPGTLIEDSKVEVNNLDNLDNHDRKHAVGDDAQFEMHI
>7358
GNHSGKRELSANKASKDGEIHRGEAGKKRSVGKLSQTASEDSDVFGEADAIQNNGTSAEDTAVTDSKHTADPKNNWQGAHPADPGNRPHLIRLFSRDAPGREDNTFKDRPSESDELQTIQEDPTAASGGLDVMASQKRPSQRSKYLATASTMDHARHGFLPRHRDTGILDSIGRFFSGDRGAPKRGSGKVSLEHHHHHH
Loading

0 comments on commit 1caa427

Please sign in to comment.