diff --git a/examples/enzeptional/README.md b/examples/enzeptional/README.md index ea561b270..2bdc60de4 100644 --- a/examples/enzeptional/README.md +++ b/examples/enzeptional/README.md @@ -1,97 +1,22 @@ -# Enzyme Optimization Experiment +# Enzyme Optimization in Biocatalytic Reactions -## Description -This script performs an optimization experiment for enzyme sequences using different mutation strategies. +This repository provides an exmaple on how ro run the framework for the optimization of enzymes within the context of biocatalytic reactions. -## Import modules -```python -import logging -import pandas as pd -from gt4sd.frameworks.enzeptional.processing import HFandTAPEModelUtility -from gt4sd.frameworks.enzeptional.core import SequenceMutator, EnzymeOptimizer -from gt4sd.configuration import sync_algorithm_with_s3 -from gt4sd.configuration import GT4SDConfiguration -configuration = GT4SDConfiguration.get_instance() -``` +## Prerequisites -## Load datasets and scorers -```python -sync_algorithm_with_s3("proteins/enzeptional/scorers", module="properties") -``` -Feasibility scorer path -```python -scorer_path = f"{configuration.gt4sd_local_cache_path}/properties/proteins/enzeptional/scorers/feasibility/model.pkl" -``` -## Set embedding model/tokenizer paths -```python -language_model_path = "facebook/esm2_t33_650M_UR50D" -tokenizer_path = "facebook/esm2_t33_650M_UR50D" -unmasking_model_path = "facebook/esm2_t33_650M_UR50D" -chem_model_path = "seyonec/ChemBERTa-zinc-base-v1" -chem_tokenizer_path = "seyonec/ChemBERTa-zinc-base-v1" -``` -## Load protein embedding model -```python -protein_model = HFandTAPEModelUtility( - embedding_model_path=language_model_path, tokenizer_path=tokenizer_path - ) -``` -## Create mutation config -```python -mutation_config = { - "type": "language-modeling", - "embedding_model_path": language_model_path, - "tokenizer_path": tokenizer_path, - "unmasking_model_path": unmasking_model_path, - } -``` -## Set key parameters -```python -intervals = [(5, 10), (20, 25)] -batch_size = 5 -top_k = 3 -substrate_smiles = "NC1=CC=C(N)C=C1" -product_smiles = "CNC1=CC=C(NC(=O)C2=CC=C(C=C2)C(C)=O)C=C1" +Before initiating the enzyme optimization process, execute the following command in your terminal to activate the environment: -sample_sequence = "MSKLLMIGTGPVAIDQFLTRYEASCQAYKDMHQDQQLSSQFNTNLFEGDKALVTKFLEINRTLS" +```console +conda activate gt4sd ``` -## Load mutator -```python -mutator = SequenceMutator(sequence=sample_sequence, mutation_config=mutation_config) -``` -## Set Optimizer -```python -optimizer = EnzymeOptimizer( - sequence=sample_sequence, - protein_model=protein_model, - substrate_smiles=substrate_smiles, - product_smiles=product_smiles, - chem_model_path=chem_model_path, - chem_tokenizer_path=chem_tokenizer_path, - scorer_filepath=scorer_path, - mutator=mutator, - intervals=intervals, - batch_size=batch_size, - top_k=top_k, - selection_ratio=0.25, - perform_crossover=True, - crossover_type="single_point", - concat_order=["substrate", "sequence", "product"], -) -``` -## Define optmization parameters -```python -num_iterations = 3 -num_sequences = 5 -num_mutations = 5 -time_budget = 3600 -``` -## Optimize -```python -optimized_sequences, iteration_info = optimizer.optimize( - num_iterations=num_iterations, - num_sequences=num_sequences, - num_mutations=num_mutations, - time_budget=time_budget, -) + +## Citation + +```bibtex +@inproceedings{teukam2023enzyme, + title={Enzyme optimization via a generative language modeling-based evolutionary algorithm}, + author={Teukam, Yves Gaetan Nana and Grisoni, Francesca and Manica, Matteo and Zipoli, Federico and Laino, Teodoro}, + booktitle={American Chemical Society (ACS) Spring Meeting}, + year={2023} +} ``` \ No newline at end of file diff --git a/examples/enzeptional/data.csv b/examples/enzeptional/data.csv new file mode 100644 index 000000000..66f082648 --- /dev/null +++ b/examples/enzeptional/data.csv @@ -0,0 +1,106 @@ +substrates,products,sequences,intervals +NCC(=O)O,CC(=O)C(C(=O)[O-])N,MRGEFYQQLTNDLETARAEGLFKEERIITSAQQADITVADGSHVINFCANNYLGLANHPDLIAAAKAGMDSHGFGMASVRFICGTQDSHKELEQKLAAFLGMEDAILYSSCFDANGGLFETLLGAEDAIISDALNHASIIDGVRLCKAKRYRYANNDMQELEARLKEAREAGARHVLIATDGVFSMDGVIANLKGVCDLADKYDALVMVDDSHAVGFVGENGRGSHEYCDVMGRVDIITGTLGKALGGASGGYTAARKEVVEWLRQRSRPYLFSNSLAPAIVAASIKVLEMVEAGSELRDRLWANARQFREQMSAAGFTLAGADHAIIPVMLGDAVVAQKFARELQKEGIYVTGFFYPVVPKGQARIRTQMSAAHTPEQITRAVEAFTRIGKQLGVIA,"[(50, 52), (77, 80), (92, 92), (96, 96), (107, 107), (109, 110), (113, 116), (136, 139), (141, 141), (180, 184), (187, 188), (209, 209), (214, 215), (217, 218), (223, 226), (238, 240), (245, 253), (269, 273), (276, 277), (281, 281), (285, 285), (325, 325), (327, 327), (368, 368), (370, 370)]" +CC(=CCC/C(=C/CC/C(=C/COP(=O)(O)OP(=O)(O)O)/C)/C)C,CC(=CCC[C@@]1([C@H]2CC[C@H](C2)C1=C)C)C,MDSSTATAMTAPFIDPTDHVNLKTDTDASENRRMGNYKPSIWNYDFLQSLATHHNIVEERHLKLAEKLKGQVKFMFGAPMEPLAKLELVDVVQRLGLNHLFETEIKEALFSIYKDGSNGWWFGHLHATSLRFRLLRQCGLFIPQDVFKTFQNKTGEFDMKLCDNVKGLLSLYEASYLGWKGENILDEAKAFTTKCLKSAWENISEKWLAKRVKHALALPLHWRVPRIEARWFIEAYEQEANMNPTLLKLAKLDFNMVQSIHQKEIGELARWWVTTGLDKLAFARNNLLQSYMWSCAIASDPKFKLARETIVEIGSVLTVVDDGYDVYGSIDELDLYTSSVERWSCVEIDKLPNTLKLIFMSMFNKTNEVGLRVQHERGYNSIPTFIKAWVEQCKSYQKEARWFHGGHTPPLEEYSLNGLVSIGFPLLLITGYVAIAENEAALDKVHPLPDLLHYSSLLSRLINDIGTSPDEMARGDNLKSIHCYMNETGASEEVAREHIKGVIEENWKILNQCCFDQSQFQEPFITFNLNSVRGSHFFYEFGDGFGVTDSWTKVDMKSVLIDPIPLGEE,"[(44, 44), (281, 283), (285, 286), (317, 320), (322, 324), (326, 327), (396, 396), (399, 399), (414, 414), (418, 419), (421, 422), (456, 459), (461, 462), (464, 466), (468, 468), (477, 477), (479, 479), (481, 481), (496, 496), (539, 539), (546, 546), (548, 548)]" +CC(=CCOP(=O)(O)OP(=O)(O)O)C,O=P([O-])([O-])OP(=O)([O-])[O-],MTADELVFFVNGKKVVEKNADPETTLLVYLRRKLGLCGTKLGCGEGGCGACTVMISKYDRLQNKIVHFSVNACLAPICSLHHVAVTTVEGIGNTQKLHPVQERIARSHGSQCGFCTPGIVMSMYTLLRNQPEPTVEEIENAFQGNLCRCTGYRPILQGFRTFAKDGGCCGGSGNNPNCCMNQTKDQTVSLSPSLFNPEDFKPLDPTQEPIFPPELLRLKDTPQKKLRFEGERVTWIQASTMEELLDLKAQHPDAKLVVGNTEIGIEMKFKNMLFPLIVCPAWIPELNSVVHGPEGISFGASCPLSLVESVLAEEIAKLPEQKTEVFRGVMEQLRWFAGKQVKSVASIGGNIITASPISDLNPVFMASGAKLTLVSRGTRRTVRMDHTFFPGYRKTLLRPEEILLSIEIPYSKEGEFFSAFKQASRREDDIAKVTSGMRVLFKPGTIEVQELSLCFGGMADRTISALKTTPKQLSKSWNEELLQSVCAGLAEELQLAPDAPGGMVEFRRTLTLSFFFKFYLTVLQKLGRADLEDMCGKLDPTFASATLLFQKDPPANVQLFQEVPKDQSEEDMVGRPLPHLAANMQASGEAVYCDDIPRYENELSLRLVTSTRAHAKITSIDTSEAKKVPGFVCFLTAEDVPNSNATGLFNDETVFAKDEVTCVGHIIGAVVADTPEHAQRAARGVKITYEDLPAIITIQDAINNNSFYGSEIKIEKGDLKKGFSEADNVVSGELYIGGQEHFYLETNCTIAVPKGEAGEMELFVSTQNTMKTQSFVAKMLGVPDNRIVVRVKRMGGGFGGKETRSTVVSTALALAAHKTGRPVRCMLDRDEDMLITGGRHPFLAKYKVGFMKTGTVVALEVAHFSNGGNTEDLSRSIMERALFHMDNAYKIPNIRGTGRICKTNLPSNTAFRGFGGPQGMLIAEYWMSEVAITCGLPAEEVRRKNMYKEGDLTHFNQKLEGFTLPRCWDECIASSQYLARKREVEKFNRENCWKKRGLCIIPTKFGISFTLPFLNQGGALVHVYTDGSVLLTHGGTEMGQGLHTKMVQVASRALKIPTSKIHISETSTNTVPNTSPTAASASADLNGQGVYEACQTILKRLEPFKKKKPTGPWEAWVMDAYTSAVSLSATGFYKTPNLGYSFETNSGNPFHYFSYGVACSEVEIDCLTGDHKNLRTDIVMDVGSSLNPAIDIGQVEGAFVQGLGLFTMEELHYSPEGSLHTRGPSTYKIPAFGSIPIEFRVSLLRDCPNKRAIYASKAVGEPPLFLASSIFFAIKDAIRAARAQHGDNAKQLFQLDSPATPEKIRNACVDQFTTLCVTGVPENCKSWSVRI,"[(25, 25), (27, 27), (39, 42), (44, 47), (49, 50), (52, 53), (70, 72), (74, 76), (110, 111), (113, 114), (116, 119), (143, 146), (148, 148), (150, 152), (155, 155), (235, 235), (244, 244), (248, 248), (254, 255), (264, 270), (274, 274), (277, 281), (286, 286), (298, 304), (307, 307), (332, 335), (337, 338), (341, 342), (344, 345), (351, 355), (357, 358), (360, 364), (371, 373), (393, 393), (397, 397), (401, 402), (404, 406), (408, 408), (419, 420), (422, 423), (429, 431), (433, 434), (514, 514), (585, 585), (592, 592), (741, 744), (765, 766), (768, 769), (793, 797), (799, 802), (829, 829), (839, 839), (910, 911), (913, 914), (917, 917), (1007, 1009), (1036, 1036), (1038, 1039), (1075, 1078), (1080, 1081), (1193, 1194), (1197, 1198), (1201, 1201), (1224, 1224), (1229, 1230), (1260, 1261), (1264, 1264)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,MLDDRARMEAAKKEKVEQILAEFQLQEEDLKKVMRRMQKEMDRGLRLETHEEASVKMLPTYVRSTPEGSEVGDFLSLDLGGTNFRVMLVKVGEGEEGQWSVKTKHQMYSIPEDAMTGTAEMLFDYISECISDFLDKHQMKHKKLPLGFTFSFPVRHEDIDKGILLNWTKGFKASGAEGNNVVGLLRDAIKRRGDFEMDVVAMVNDTVATMISCYYEDHQCEVGMIVGTGCNACYMEEMQNVELVEGDEGRMCVNTEWGAFGDSGELDEFLLEYDRLVDESSANPGQQLYEKLIGGKYMGELVRLVLLRLVDENLLFHGEASEQLRTRGAFETRFVSQVESDTGDRKQIYNILSTLGLRPSTTDCDIVRRACESVSTRAAHMCSAGLAGVINRMRESRSEDVMRITVGVDGSVYKLHPSFKERFHASVRRLTPSCEITFIESEEGSGRGAALVSAVACKKACMLGQ,"[(76, 77), (84, 85), (107, 110), (115, 115), (122, 123), (126, 126), (148, 152), (167, 167), (169, 171), (205, 205), (224, 224), (226, 227), (229, 230), (290, 290), (293, 294), (297, 300), (302, 302), (329, 331), (337, 340), (351, 351), (355, 355), (371, 371), (375, 375), (408, 410), (416, 417), (419, 420), (423, 424), (438, 438), (440, 442), (445, 445)]" +NCCCC[C@H](N)C(=O)O,CC(C)(N)CO,MSHEELNDQLRVRREKLKKIEELGVDPFGKRFERTHKAEELFELYGDLSKEELEEQQIEVAVAGRIMTKRGMGKAGFAHIQDVTGQIQIYVRQDDVGEQQYELFKISDLGDIVGVRGTMFKTKVGELSIKVSSYEFLTKALRPLPEKYHGLKDIEQRYRQRYLDLIMNPESKKTFITRSLIIQSMRRYLDSHGYLEVETPMMHAVAGGAAARPFITHHNALDMTLYMRIAIELHLKRLIVGGLEKVYEIGRVFRNEGISTRHNPEFTMLELYEAYADFRDIMKLTENLIAHIATEVLGTTKIQYGEHLVDLTPEWRRLHMVDAIKEYVGVDFWRQMSDEEARELAKEHGVEVAPHMTFGHIVNEFFEQKVEDKLIQPTFIYGHPVEISPLAKKNPDDPRFTDRFELFIVGREHANAFTELNDPIDQRQRFEEQLKEREQGNDEAHEMDEDFLEALEYGMPPTGGLGIGVDRLVMLLTNSPSIRDVLLFPQMRHK,"[(284, 284), (288, 288), (314, 314), (376, 378), (403, 404), (406, 408), (410, 411), (413, 414), (465, 467), (470, 471), (474, 474)]" +N,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MAKYTREDIEKLVKEENVKYIRLQFTDILGTIKNVEIPVSQLGKALDNKVMFDGSSIEGFVRIEESDMYLYPDLNTFVIFPWTAEKGKVARFICDIYNPDGTPFEGDPRNNLKRILKEMEDLGFSDFNLGPEPEFFLFKLDEKGEPTLELNDKGGYFDLAPTDLGENCRRDIVLELEEMGFEIEASHHEVAPGQHEIDFKYAGAVRSCDDIQTFKLVVKTIARKHGLHATFMPKPLFGVNGSGMHCNLSLFKNGVNAFFDENADLQLSETAKHFIAGIVKHATSFTAVTNPTVNSYKRLVPGYEAPCYVAWSAQNRSPLIRIPASRGISTRVEVRSVDPAANPYLALSVLLAAGLDGIKNKLEAPAPIDRNIYVMSKEERMENGIVDLPATLAEALEEFKSNEVMVKALGEHLFEHFIEAKEIEWDMFRTQVHPWEREQYMSQY,"[(126, 131), (133, 133), (135, 136), (154, 157), (169, 169), (182, 183), (185, 188), (190, 191), (194, 195), (197, 201), (214, 214), (230, 231), (233, 235), (238, 239), (242, 244), (246, 248), (250, 251), (256, 258), (290, 290), (294, 297), (299, 300), (302, 303), (305, 306), (311, 315), (317, 320), (322, 324), (328, 328), (330, 332), (334, 334), (336, 339), (373, 373)]" +Oc1ccccc1,C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)O)OP(=O)(O)O)O)N,MELIQDTSRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLEKCHRAPIFMRVPFLEFKAPGIPSGMETLKDTPAPRLLKTHLPLALLPQTLLDQKVKVVYVARNAKDVAVSYYHFYHMAKVHPEPGTWDSFLEKFMVGEVSYGSWYQHVQEWWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETVDFVVQHTSFKEMKKNPMTNYTTVPQEFMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAEKMAGCSLSFRSEL,"[(43, 43), (45, 47), (54, 57), (60, 60), (75, 75), (106, 106), (108, 108), (128, 129), (131, 132), (134, 137), (139, 142), (145, 145), (172, 172), (191, 192), (194, 197), (200, 200), (204, 204), (223, 226), (233, 235), (238, 240), (248, 248), (250, 250), (253, 254), (260, 261), (263, 263), (266, 266), (291, 291)]" +C1=C2C(=NC=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)CO)O)O,O=P([O-])([O-])O[C@H]1O[C@H](CO)[C@@H](O)[C@H]1O,MANGYTYEDYQDTAKWLLSHTEQRPQVAVICGSGLGGLVNKLTQAQTFDYSEIPNFPESTVPGHAGRLVFGILNGRACVMMQGRFHMYEGYPFWKVTFPVRVFRLLGVETLVVTNAAGGLNPNFEVGDIMLIRDHINLPGFSGENPLRGPNEERFGVRFPAMSDAYDRDMRQKAHSTWKQMGEQRELQEGTYVMLGGPNFETVAECRLLRNLGADAVGMSTVPEVIVARHCGLRVFGFSLITNKVIMDYESQGKANHEEVLEAGKQAAQKLEQFVSLLMASIPVSGHTG,"[(30, 32), (34, 35), (50, 50), (56, 57), (81, 83), (87, 87), (89, 91), (95, 96), (99, 99), (113, 115), (117, 119), (126, 126), (192, 192), (194, 200), (202, 203), (205, 206), (209, 209), (217, 218), (221, 224), (239, 242), (244, 245), (255, 256), (258, 262), (271, 271)]" +O,CCC(=O)SCCNC(=O)CCNC(=O)[C@@H](C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)OP(=O)(O)O)O,MAEIRKLKNYINGEWVESKTDQYEDVVNPATKEVLCQVPISTKEDIDYAAQTAAEAFKTWSKVAVPRRARILFNFQQLLSQHKEELAHLITIENGKNTKEALGEVGRGIENVEFAAGAPSLMMGDSLASIATDVEAANYRYPIGVVGGIAPFNFPMMVPCWMFPMAIALGNTFILKPSERTPLLTEKLVELFEKAGLPKGVFNVVYGAHDVVNGILEHPEIKAISFVGSKPVGEYVYKKGSENLKRVQSLTGAKNHTIVLNDANLEDTVTNIVGAAFGSAGERCMACAVVTVEEGIADEFMAKLQEKVADIKIGNGLDDGVFLGPVIREDNKKRTLSYIEKGLEEGARLVCDGRENVSDDGYFVGPTIFDNVTTEMTIWKDEIFAPVLSVIRVKNLKEAIEIANKSEFANGACLFTSNSNAIRYFRENIDAGMLGINLGVPAPMAFFPFSGWKSSFFGTLHANGKDSVDFYTRKKVVTARYPAPDFN,"[(24, 24), (26, 26), (38, 38), (93, 93), (148, 152), (174, 175), (181, 182), (185, 185), (204, 208), (211, 212), (215, 215), (252, 254), (283, 284), (327, 328), (331, 331), (334, 334), (338, 338), (380, 381), (383, 384), (408, 408)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,CC(C)(COP(=O)(O)OP(=O)(O)OCC1C(C(C(O1)N2C=NC3=C(N=CN=C32)N)O)OP(=O)(O)O)C(C(=O)NCCC(=O)NCCSC(=O)CCCl)O,MPSTLTINGKAPIVAYAELIAARIVNALAPNSIAIKLVDDKKAPAAKLDDATEDVFNKITSKFAAIFDNGDKEQVAKWVNLAQKELVIKNFAKLSQSLETLDSQLNLRTFILGGLKYSAADVACWGALRSNGMCGSIIKNKVDVNVSRWYTLLEMDPIFGEAHDFLSKSLLELKKSANVGKKKETHKANFEIDLPDAKMGEVVTRFPPEPSGYLHIGHAKAALLNQYFAQAYKGKLIIRFDDTNPSKEKEEFQDSILEDLDLLGIKGDRITYSSDYFQEMYDYCVQMIKDGKAYCDDTPTEKMREERMDGVASARRDRSVEENLRIFTEEMKNGTEEGLKNCVRAKIDYKALNKTLRDPVIYRCNLTPHHRTGSTWKIYPTYDFCVPIVDAIEGVTHALRTIEYRDRNAQYDWMLQALRLRKVHIWDFARINFVRTLLSKRKLQWMVDKDLVGNWDDPRFPTVRGVRRRGMTVEGLRNFVLSQGPSRNVINLEWNLIWAFNKKVIDPIAPRHTAIVNPVKIHLEGSEAPQEPKIEMKPKHKKNPAVGEKKVIYYKDIVVDKDDADVINVDEEVTLMDWGNVIITKKNDDGSMVAKLNLEGDFKKTKHKLTWLADTKDVVPVDLVDFDHLITKDRLEEDESFEDFLTPQTEFHTDAIADLNVKDMKIGDIIQFERKGYYRLDALPKDGKPYVFFTIPDGKSVNKYGAKK,"[(87, 87), (132, 134)]" +C[C@]12CCCC(C1CCC34C2C[C@@H]5C(C3)C5(C4)C)(C)C,O,MKNRIPVVLLACGSFNPITNMHLRLFEVARDHLHQTGRYQVIEGIISPVNDSYGKKDLVASHHRVAMARLALQTSDWIRVDPWESEQAQWMETVKVLRHHHRELLRSSAQMDGPDPSKTPSASAALPELKLLCGADVLKTFQTPNLWKDTHIQEIVEKFGLVCVSRSGHDPERYISDSPILQQFQHNIHLAREPVLNEISATYVRKALGQGQSVKYLLPEAVITYIRDQGLYINDGSWKGKGKTG,"[(11, 13), (16, 21), (23, 26), (46, 50), (52, 55), (57, 58), (64, 64), (67, 67), (88, 89), (91, 92), (94, 97), (131, 133), (135, 135), (137, 138), (140, 145), (148, 149), (152, 152), (155, 155), (163, 165), (167, 169), (174, 174), (177, 180), (195, 196), (198, 201), (206, 209), (212, 212), (214, 214), (217, 218), (232, 232)]" +N#CC(O)c1ccccc1,N,MAPKAVLVGLPGSGKSTIGRRLAKALGVGLLDTDVAIEQRTGRSIADIFATDGEQEFRRIEEDVVRAALADHDGVLSLGGGAVTSPGVRAALAGHTVVYLEISAAEGVRRTGGNTVRPLLAGPDRAEKYRALMAKRAPLYRRVATMRVDTNRRNPGAVVRHILSRLQVPSPSEAAT,"[(7, 11), (18, 22), (30, 30), (77, 78), (80, 80), (100, 100), (102, 102), (107, 107), (110, 111), (115, 116), (118, 120), (122, 124), (127, 127), (129, 129), (148, 152), (154, 155), (157, 158), (161, 161)]" +C[C@@H](C(=O)N[C@H](CC(=O)[O-])C(=O)[O-])[NH3+],C[C@@H](C(=O)N[C@@H](CC(=O)O)C(=O)O)N,MKIIRIETSRIAVPLTKPFKTALRTVYTAESVIVRITYDSGAVGWGEAPPTLVITGDSMDSIESAIHHVLKPALLGKSLAGYEAILHDIQHLLTGNMSAKAAVEMALYDGWAQMCGLPLYQMLGGYRDTLETDYTVSVNSPEEMAADAENYLKQGFQTLKIKVGKDDIATDIARIQEIRKRVGSAVKLRLDANQGWRPKEAVTAIRKMEDAGLGIELVEQPVHKDDLAGLKKVTDATDTPIMADESVFTPRQAFEVLQTRSADLINIKLMKAGGISGAEKINAMAEACGVECMVGSMIETKLGITAAAHFAASKRNITRFDFDAPLMLKTDVFNGGITYSGSTISMPGKPGLGIIGAALLKGEKEQ,"[(160, 164), (189, 190), (192, 193), (217, 218), (220, 221), (242, 243), (245, 248), (266, 266), (268, 268), (271, 271), (293, 293), (321, 321)]" +C1=CN(C(=O)N=C1N)[C@H]2[C@@H]([C@@H]([C@H](O2)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O,O=C1CCCN1CCl,MNGDVQSVIRGYLERAQVAKTMSDAGRWNEAGDLLRQLMTDVKSCKISASNRDEHDARNTFLRALEANLKLVQQNVRDEDDLHEAMTRQSGSPEPPADPDVWSKPSPPLPSSSKFGATKKGVGAAGPRPREISKSTSSMSTNPADVKPANPTQGILPQNSAGDSFDASAYDAYIVQAVRGTMATNTENTMSLDDIIGMHDVKQVLHEAVTLPLLVPEFFQGLRSPWKAMVLAGPPGTGKTLIARAIASESSSTFFTVSSTDLSSKWRGDSEKIVRLLFELARFYAPSIIFIDEIDTLGGQRGNSGEHEASRRVKSEFLVQMDGSQNKFDSRRVFVLAATNIPWELDEALRRRFEKRIFIPLPDIDARKKLIEKSMEGTPKSDEINYDDLAARTEGFSGADVVSLCRTAAINVLRRYDTKSLRGGELTAAMESLKAELVRNIDFEAALQAVSPSAGPDTMLKCKEWCDSFGAM,"[(228, 228), (231, 232), (241, 245), (254, 254), (256, 256), (290, 290), (292, 292), (318, 318), (322, 322), (325, 325), (336, 336), (338, 340), (347, 350), (353, 354), (356, 356), (358, 361), (397, 399), (453, 454), (462, 462)]" +CC(C)c1ccc(CO)cc1,NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,MDFTSLETTTFEEVVIALGSNVGNRMNNFKEALRLMKDYGISVTRHSCLYETEPVHVTDQPRFLNAAIRGVTKLKPHELLNVLKKIEKEMGREENGLRYGPRPLDLDILFYGKHKIISDKLIIPHERIWERPFVLAPLVDLLGTEDIDNDKIVAYWHSLSMHSGGIFQAWERLGGESLLGKDGIIQRVIPIGDHLWDFSKKTYVMGILNLTPDSFSDGGKFQSVDTAVSRVRSMISEGVDIIDIGAQSTRPMASRISSQEEIDRLIPVLKVVRGMAEMKGKLISVDTFNSEVALEAIRNGADILNDVSGGSLDENMHKVVADSDVPYMIMHMRGDPCTMQNKENLEYNEICKDVATELYERVREAELSGIPAWRIMIDPGIGFSKGIDHNLDIVMELPKIREEMAKKSIGLSHAPILIGPSRKRFLGDICGRPEASERDAATVACVTAGILKGANIIRVHNVRDNVDAARLCDAMMTKRFKNVD,"[(204, 208), (210, 212), (230, 230), (243, 246), (284, 285), (287, 288), (303, 304), (306, 307), (328, 331), (358, 358), (376, 377), (379, 383), (417, 422), (424, 427), (438, 439), (442, 442), (446, 446), (456, 457), (461, 462), (465, 465)]" +C([C@H]([C@@H]([C@H]([C@H](C(=O)O)O)O)O)O)O,O,MTLPKIKQVRAWFTGGATAEKGAGGGDYHDQGANHWIDDHIATPMSKYRDYEQSRQSFGINVLGTLVVEVEAENGQTGFAVSTAGEMGCFIVEKHLNRFIEGKCVSDIKLIHDQMLSATLYYSGSGGLVMNTISCVDLALWDLFGKVVGLPVYKLLGGAVRDEIQFYATGARPDLAKEMGFIGGKMPTHWGPHDGDAGIRKDAAMVADMREKCGEDFWLMLDCWMSQDVNYATKLAHACAPYNLKWIEECLPPQQYESYRELKRNAPVGMMVTSGEHHGTLQSFRTLSETGIDIMQPDVGWCGGLTTLVEIAAIAKSRGQLVVPHGSSVYSHHAVITFTNTPFSEFLMTSPDCSTMRPQFDPILLNEPVPVNGRIHKSVLDKPGFGVELNRDCNLKRPYSH,"[(171, 171), (186, 188), (190, 190), (210, 210), (214, 214), (220, 221), (223, 225), (243, 244), (246, 247), (249, 251), (263, 263), (266, 267), (270, 270), (274, 275), (277, 278), (296, 298), (347, 347)]" +O,O=P([O-])([O-])OP(=O)([O-])[O-],MTTNYIFVTGGVVSSLGKGIAAASLAAILEARGLNVTIMKLDPYINVDPGTMSPIQHGEVFVTEDGAETDLDLGHYERFIRTKMSRRNNFTTGRIYSDVLRKERRGDYLGATVQVIPHITNAIKERVLEGGEGHDVVLVEIGGTVGDIESLPFLEAIRQMAVEIGREHTLFMHLTLVPYMAASGEVKTKPTQHSVKELLSIGIQPDILICRSDRAVPANERAKIALFCNVPEKAVISLKDVDSIYKIPGLLKSQGLDDYICKRFSLNCPEANLSEWEQVIFEEANPVSEVTIGMVGKYIELPDAYKSVIEALKHGGLKNRVSVNIKLIDSQDVETRGVEILKGLDAILVPGGFGYRGVEGMITTARFARENNIPYLGICLGMQVALIDYARHVANMENANSTEFVPDCKYPVVALITEWRDENGNVEVRSEKSDLGGTMRLGAQQCQLVDDSLVRQLYNAPTIVERHRHRYEVNNMLLKQIEDAGLRVAGRSGDDQLVEIIEVPNHPWFVACQFHPEFTSTPRDGHPLFAGFVKAASEFQKRQAK,"[(8, 13), (21, 25), (39, 41), (43, 43), (70, 71), (73, 76), (79, 79), (92, 92), (138, 139), (141, 146), (150, 153), (174, 178), (180, 180), (183, 183), (185, 186), (193, 196), (208, 208), (210, 215), (218, 222), (224, 229), (237, 238), (242, 244), (246, 247), (250, 250), (297, 298), (349, 351), (353, 355), (358, 358), (361, 361), (375, 375), (377, 379), (384, 387), (401, 402), (404, 405), (413, 413), (415, 415), (438, 440), (468, 469), (471, 473), (498, 501), (511, 513)]" +CSCCC(=O)/C(=C/O)/O,O=C[O-],MVQAWYMDESTADPRKPHRAQPDRPVSLEQLRTLGVLYWKLDADKYENDPELEKIRKMRNYSWMDIITICKDTLPNYEEKIKMFFEEHLHLDEEIRYILEGSGYFDVRDKEDKWIRISMEKGDMITLPAGIYHRFTLDEKNYVKAMRLFVGEPVWTPYNRPADHFDARVQYMSFLEGTA,"[(83, 84), (86, 87), (89, 89), (91, 93), (95, 96), (105, 107), (114, 114), (125, 127), (129, 132), (134, 135), (155, 160), (167, 167)]" +CCCCCCCCCCCCCC(=O)O,O,MNAKPGFTDYIVKDIALADFGRKEISLAETEMPGLMATREEYGPKQPLKGARIAGSLHMTIQTAVLIETLAALGADIRWVSCNIYSTQDHAAAAIAAAGIPVFAVKGETLTEYWDYTAKLFDWHGGGTPNMILDDGGDATMLVHAGYRAEQGDTAFLDKPGSEEEEIFYALVKRLLKEKPKGWFAEIAKNIKGVSEETTTGVHRLYEMANKGTLLFPAINVNDSVTKSKFDNLYGCRESLVDGIRRGTDVMLSGKVAMVAGFGDVGKGSAASLRQAGCRVMVSEVDPICALQAAMEGYEVVTMEDAAPRADIFVTATGNKDIITIEHMRAMKDRAIVCNIGHFDNEIQIASLRNLKWTNIKPQVDEIEFPDKHRIIMLSEGRLVNLGNAMGHPSFVMSASFTNQTLAQIELFANNKDSKYAKKVYVLPKTLDEKVARLHLAKIGVKLTELRKDQADYIGVKQEGPYKSDHYRY,"[(58, 58), (82, 83), (135, 136), (196, 197), (201, 204), (222, 222), (227, 228), (230, 231), (233, 236), (239, 240), (243, 243), (261, 264), (266, 270), (282, 283), (285, 286), (289, 290), (300, 301), (314, 318), (320, 322), (338, 339), (343, 346), (383, 384), (386, 392), (429, 429), (433, 433), (460, 460), (465, 466), (468, 470), (472, 473)]" +[C@@H]([C@@H]([C@H](C(=O)O)O)O)([C@@H](C(=O)O)O)O,C([C@@H]([C@H](C(=O)[O-])O)O)C(=O)C(=O)[O-],MALSANSDAVTYAKAANTRTAAETGDRIEWVKLSLAFLPLATPVSDAKVLTGRQKPLTEVAIIIAEIRSRDGFEGVGFSYSKRAGGQGIYAHAKEIADNLLGEDPNDIDKIYTKLLWAGASVGRSGMAVQAISPIDIALWDMKAKRAGLPLAKLLGAHRDSVQCYNTSGGFLHTPLDQVLKNVVISRENGIGGIKLKVGQPNCAEDIRRLTAVREALGDEFPLMVDANQQWDRETAIRMGRKMEQFNLIWIEEPLDAYDIEGHAQLAAALDTPIATGEMLTSFREHEQLILGNASDFVQPDAPRVGGISPFLKIMDLAAKHGRKLAPHFAMEVHLHLSAAYPLEPWLEHFEWLNPLFNEQLELRDGRMWISDRHGLGFTLSEQARRWTQLTCEFGKRP,"[(165, 165), (195, 199), (224, 225), (227, 228), (250, 251), (253, 254), (275, 277), (279, 280), (299, 299), (301, 301), (304, 304), (328, 328)]" +S,O=P([O-])([O-])[O-],MALADISGYLDVLDSVRGFSYLENAREVLRSGEARCLGNPRSEPEYVKALYVIGASRIPVGDGCSHTLEELGVFDISVPGEMVFPSPLDFFERGKPTPLVRSRLQLPNGVRVWLKLEWYNPFSLSVKDRPAVEIISRLSRRVEKGSLVADATSSNFGVALSAVARLYGYRARVYLPGAAEEFGKLLPRLLGAQVIVDPEAPSTVHLLPRVMKDSKNEGFVHVNQFYNDANFEAHMRGTAREIFVQSRRGGLALRGVAGSLGTSGHMSAAAFYLQSVDPSIRAVLVQPAQGDSIPGIRRVETGMLWINMLDISYTLAEVTLEEAMEAVVEVARSDGLVIGPSGGAAVKALAKKAAEGDLEPGDYVVVVPDTGFKYLSLVQNALEGAGDSV,"[(123, 123), (125, 127), (152, 154), (156, 160), (182, 182), (224, 225), (230, 231), (234, 234), (238, 238), (259, 260), (266, 269), (285, 285), (293, 297), (303, 306), (339, 340), (342, 345), (366, 366), (368, 369), (373, 374)]" +C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)OC[C@@H]2[C@H]([C@@H]([C@H]([C@H](O2)O[C@@H]3[C@H](O[C@@H]([C@@H]([C@H]3O)O)O)CO)O)O)O[C@@H]4[C@@H]([C@H]([C@@H]([C@H](O4)CO)O)O)O)O)O)O)O,O=P(O)(O)O[C@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O,MLDIVELSRLQFALTAMYHFLFVPLTLGMAFLLAIMETVYVLSGKQIYKDMTKFWGKLFGINFALGVATGLTMEFQFGTNWSYYSHYVGDIFGAPLAIEGLMAFFLESTFVGLFFFGWDRLGKVQHMCVTWLVALGSNLSALWILVANGWMQNPIASDFNFETMRMEMVSFSELVLNPVAQVKFVHTVASGYVTGAMFILGISAWYMLKGRDFAFAKRSFAIAASFGMAAVLSVIVLGDESGYEMGDVQKTKLAAIEAEWETQPAPAAFTLFGIPDQEEETNKFAIQIPYALGIIATRSVDTPVIGLKELMVQHEERIRNGMKAYSLLEQLRSGSTDQAVRDQFNSMKKDLGYGLLLKRYTPNVADATEAQIQQATKDSIPRVAPLYFAFRIMVACGFLLLAIIALSFWSVIRNRIGEKKWLLRAALYGIPLPWIAVEAGWFVAEYGRQPWAIGEVLPTAVANSSLTAGDLIFSMVLICGLYTLFLVAELFLMFKFARLGPSSLKTGRYHFEQSSTTTQPAR,"[(14, 18), (20, 23), (65, 66), (69, 70), (73, 73), (182, 185), (187, 190), (233, 234), (237, 239), (389, 392), (394, 397), (436, 436), (439, 440)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MKVARFQKIPNGENETMIPVLTSKKASELPVSEVASILQADLQNGLNKCEVSHRRAFHGWNEFDISEDEPLWKKYISQFKNPLIMLLLASAVISVLMHQFDDAVSITVAILIVVTVAFVQEYRSEKSLEELSKLVPPECHCVREGKLEHTLARDLVPGDTVCLSVGDRVPADLRLFEAVDLSIDESSLTGETTPCSKVTAPQPAATNGDLASRSNIAFMGTLVRCGKAKGVVIGTGENSEFGEVFKMMQAEEAPKTPLQKSMDLLGKQLSFYSFGIIGIIMLVGWLLGKDILEMFTISVSLAVAAIPEGLPIVVTVTLALGVMRMVKKRAIVKKLPIVETLGCCNVICSDKTGTLTKNEMTVTHIFTSDGLHAEVTGVGYNQFGEVIVDGDVVHGFYNPAVSRIVEAGCVCNDAVIRNNTLMGKPTEGALIALAMKMGLDGLQQDYIRKAEYPFSSEQKWMAVKCVHRTQQDRPEICFMKGAYEQVIKYCTTYQSKGQTLTLTQQQRDVYQQEKARMGSAGLRVLALASGPELGQLTFLGLVGIIDPPRTGVKEAVTTLIASGVSIKMITGDSQETAVAIASRLGLYSKTSQSVSGEEIDAMDVQQLSQIVPKVAVFYRASPRHKMKIIKSLQKNGSVVAMTGDGVNDAVALKAADIGVAMGQTGTDVCKEAADMILVDDDFQTIMSAIEEGKGIYNNIKNFVRFQLSTSIAALTLISLATLMNFPNPLNAMQILWINIIMDGPPAQSLGVEPVDKDVIRKPPRNWKDSILTKNLILKILVSSIIIVCGTLFVFWRELRDNVITPRDTTMTFTCFVFFDMFNALSSRSQTKSVFEIGLCSNRMFCYAVLGSIMGQLLVIYFPPLQKVFQTESLSILDLLFLLGLTSSVCIVAEIIKKVERSREKIQKHVSSTSSSFLEV,"[(83, 84), (86, 87), (90, 90), (106, 106), (109, 110), (113, 113), (266, 266), (270, 270), (273, 274), (298, 302), (305, 305), (307, 307), (309, 312), (350, 350), (352, 354), (358, 358), (456, 456), (622, 622), (625, 625), (641, 643), (645, 647), (649, 653), (661, 661), (664, 666), (669, 669), (702, 702), (706, 706), (710, 710), (713, 713), (733, 737), (739, 741), (743, 747)]" +CCC(=O)C(=O)[O-],CCC=O,MRSKRFEALAKRPVNQDGFVKEWIEEGFIAMESPNDPKPSIKIVNGAVTELDGKPVSDFDLIDHFIARYGINLNRAEEVMAMDSVKLANMLCDPNVKRSEIVPLTTAMTPAKIVEVVSHMNVVEMMMAMQKMRARRTPSQQAHVTNVKDNPVQIAADAAEGAWRGFDEQETTVAVARYAPFNAIALLVGSQVGRPGVLTQCSLEEATELKLGMLGHTCYAETISVYGTEPVFTDGDDTPWSKGFLASSYASRGLKMRFTSGSGSEVQMGYAEGKSMLYLEARCIYITKAAGVQGLQNGSVSCIGVPSAVPSGIRAVLAENLICSSLDLECASSNDQTFTHSDMRRTARLLMQFLPGTDFISSGYSAVPNYDNMFAGSNEDAEDFDDYNVIQRDLKVDGGLRPVREEDVIAIRNKAARALQAVFAGMGLPPITDEEVEAATYAHGSKDMPERNIVEDIKFAQEIINKNRNGLEVVKALAQGGFTDVAQDMLNIQKAKLTGDYLHTSAIIVGDGQVLSAVNDVNDYAGPATGYRLQGERWEEIKNIPGALDPNEID,"[(140, 145), (168, 169), (171, 172), (186, 186), (200, 200), (202, 202), (208, 208), (219, 220), (222, 223), (257, 261), (294, 295), (297, 298), (300, 300), (331, 335), (359, 361), (363, 364), (374, 374)]" +CC(=O)N[C@@H](CCC(N)=O)C(=O)O,NC(=O)CC[C@H](N)C(=O)O,MTSKGPEEEHPSVTLFRQYLRIRTVQPKPDYGAAVAFFEETARQLGLGCQKVEVAPGYVVTVLTWPGTNPTLSSILLNSHTDVVPVFKEHWSHDPFEAFKDSEGYIYARGAQDMKCVSIQYLEAVRRLKVEGHRFPRTIHMTFVPDEEVGGHQGMELFVQRPEFHALRAGFALDEGIANPTDAFTVFYSERSPWWVRVTSTGRPGHASRFMEDTAAEKLHKVVNSILAFREKEWQRLQSNPHLKEGSVTSVNLTKLEGGVAYNVIPATMSASFDFRVAPDVDFKAFEEQLQSWCQAAGEGVTLEFAQKWMHPQVTPTDDSNPWWAAFSRVCKDMNLTLEPEIMPAATDNRYIRAVGVPALGFSPMNRTPVLLHDHDERLHEAVFLRGVDIYTRLLPALASVPALPSDS,"[(19, 19), (26, 26), (78, 79), (81, 84), (110, 112), (114, 116), (118, 118), (121, 121), (144, 144), (146, 147), (149, 150), (173, 174), (176, 177), (187, 187)]" +[C-]#N,CC(C)(O)C#N,MASLPVSFAKPDKNGVITCKAIMLKEAKLPGMSYADTVQIIDIQVDPPQNVELRVKMLCASVCRTDILTIEGFMAPTQFPKINGHEGVGIIESMGPDTKNFKVGDVIVAPTLGECQVCSSCRSGRTNFCQNYGANESALEPDGTSRFSYIDSDGKKKLLYYKLGCSTWTQYMVVDSNYATKLNEIAPELPPPHGSILSCAFATGYGAVWLDAAVQEGDSVAIFGVGSVGISAVIAAKELKAKQIIVVDRNEYKLKMAMELGATHCINSEKLPEGVTPSQAVRKLTPKEVGVDASIESSGYDVFMNEAMKAAIHGKAKTVITGEGIYENDRIFFDFKDFLFGGNVVGNVTGRVRIHSDFPGLLRKAQEPVIRAGMDKILGYDAATMKCKYEVDIREGTPALLKALEEVENVDCVKLVIKLNDY,"[(61, 62), (64, 67), (83, 84), (86, 87), (109, 109), (113, 114), (116, 117), (119, 120), (122, 124), (126, 128), (130, 133), (162, 162), (167, 168), (195, 198), (200, 204), (349, 349), (353, 353), (414, 414)]" +C[N+](C)(C)CCOC(=O)C1=CC=CC=C1,C[N+](C)(C)CCO,MHSKVTIICIRFLFWFLLLCMLIGKSHTEDDIIIATKNGKVRGMNLTVFGGTVTAFLGIPYAQPPLGRLRFKKPQSLTKWSDIWNATKYANSCCQNIDQSFPGFHGSEMWNPNTDLSEDCLYLNVWIPAPKPKNATVLIWIYGGGFQTGTSSLHVYDGKFLARVERVIVVSMNYRVGALGFLALPGNPEAPGNMGLFDQQLALQWVQKNIAAFGGNPKSVTLFGESAGAASVSLHLLSPGSHSLFTRAILQSGSFNAPWAVTSLYEARNRTLNLAKLTGCSRENETEIIKCLRNKDPQEILLNEAFVVPYGTPLSVNFGPTVDGDFLTDMPDILLELGQFKKTQILVGVNKDEGTAFLVYGAPGFSKDNNSIITRKEFQEGLKIFFPGVSEFGKESILFHYTDWVDDQRPENYREALGDVVGDYNFICPALEFTKKFSEWGNNAFFYYFEHRSSKLPWPEWMGVMHGYEIEFVFGLPLERRDNYTKAEEILSRSIVKRWANFAKYGNPNETQNNSTSWPVFKSTEQKYLTLNTESTRIMTKLRAQQCRFWTSFFPKVLEMTGNIDEAEWEWKAGFHRWNNYMMDWKNQFNDYTSKKESCVGL,"[(108, 109), (111, 112), (133, 133), (139, 142), (193, 193), (195, 195), (461, 465), (467, 470), (475, 475), (482, 482)]" +O,CC(C)(N)CO,MVCKVCGQKAQVEMRSRGLALCREHYLDWFVKETERAIRRHRMLLPGERVLVAVSGGKDSLALWDVLSRLGYQAVGLHIELGIGEYSKRSLEVTQAFARERGLELLVVDLKEAYGFGVPELARLSGRVACSACGLSKRYIINQVAVEEGFRVVATGHNLDDEAAVLFGNLLNPQEETLSRQGPVLPEKPGLAARVKPFYRFSEREVLSYTLLRGIRYLHEECPNAKGAKSLLYKEALNLVERSMPGAKLRFLDGFLEKIRPRLDVGEEVALRECERCGYPTTGAVCAFCRMWDAVYRRAKKRKLLPEEVSFRPRVKPLRAG,"[(2, 2), (4, 5), (7, 11), (20, 21), (23, 24), (26, 29), (51, 52), (56, 58), (60, 64), (76, 78), (80, 81), (83, 83), (86, 86), (90, 90), (94, 94), (105, 105), (108, 108), (110, 110), (118, 118), (128, 129), (131, 132), (134, 137), (139, 141), (154, 155), (157, 160), (162, 165), (198, 198), (203, 203), (206, 206), (210, 210), (212, 212), (218, 218), (220, 221), (234, 234), (272, 273), (275, 276), (278, 281), (284, 285), (287, 288), (290, 293), (311, 311), (313, 316)]" +O,Oc1ccccc1,MKIIRIETSRIAVPLTKPFKTALRTVYTAESVIVRITYDSGAVGWGEAPPTLVITGDSMDSIESAIHHVLKPALLGKSLAGYEAILHDIQHLLTGNMSAKAAVEMALYDGWAQMCGLPLYQMLGGYRDTLETDYTVSVNSPEEMAADAENYLKQGFQTLKIKVGKDDIATDIARIQEIRKRVGSAVKLRLDANQGWRPKEAVTAIRKMEDAGLGIELVEQPVHKDDLAGLKKVTDATDTPIMADESVFTPRQAFEVLQTRSADLINIKLMKAGGISGAEKINAMAEACGVECMVGSMIETKLGITAAAHFAASKRNITRFDFDAPLMLKTDVFNGGITYSGSTISMPGKPGLGIIGAALLKGEKEQ,"[(160, 164), (189, 190), (192, 193), (217, 218), (220, 221), (242, 243), (245, 248), (266, 266), (268, 268), (271, 271), (293, 293), (321, 321)]" +O,C[C@H](CCC(=O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@@]21C,MSSAEEKLFMKALKEKFEESPEEKYTKFYIFGGWKQSERKKEFKEWADKIVEERGVPHYNPDIGVPLGQRKLMSYQVSGTDVFVEGDDLHFVNNAAMQQMWDDIRRTVIVGMDTAHRVLERRLGKEVTPETINEYMETLNHALPGGAVVQEHMVEIHPGLTWDCYAKIITGDLELADEIDDKFLIDIEKLFPEEQAEQLIKAIGNRTYQVCRMPTIVGHVCDGATMYRWAAMQIAMSFICAYKIAAGEAAVSDFAFASKHAEVINMGEMLPARRARGENEPGGVPFGVLADCVQTMRKYPDDPAKVALEVIAAGAMLYDQIWLGSYMSGGVGFTQYATAVYTDNILDDYVYYGLEYVEDKYGIAEAEPSMDVVKDVATEVTLYGLEQYERYPAAMETHFGGSQRAAVCAAAAGCSTAFATGHAQAGLNGWYLSQILHKEGHGRLGFYGYALQDQCGAANSLSVRSDEGLPLELRGPNYPNYAMNVGHLGEYAGIVQAAHAARGDAFCVHPVIKVAFADENLVFDFTEPRKEFAKGALREFEPAGERDLIVPAE,"[(147, 149), (151, 153), (227, 228), (231, 232), (254, 258), (260, 261), (263, 263), (271, 272), (274, 275), (319, 319), (333, 335), (337, 339), (402, 403), (445, 446), (448, 450), (485, 485)]" +NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,MDSKYQCVKLNDGHFMPVLGFGTYAPAEVPKSKALEAVKLAIEAGFHHIDSAHVYNNEEQVGLAIRSKIADGSVKREDIFYTSKLWSNSHRPELVRPALERSLKNLQLDYVDLYLIHFPVSVKPGEEVIPKDENGKILFDTVDLCATWEAMEKCKDAGLAKSIGVSNFNHRLLEMILNKPGLKYKPVCNQVECHPYFNQRKLLDFCKSKDIVLVAYSALGSHREEPWVDPNSPVLLEDPVLCALAKKHKRTPALIALRYQLQRGVVVLAKSYNEQRIRQNVQVFEFQLTSEEMKAIDGLNRNVRYLTLDIFAGPPNYPFSDEY,"[(17, 19), (25, 27), (29, 29), (33, 34), (36, 38), (40, 41), (44, 44), (46, 46), (48, 49), (51, 52), (55, 55), (57, 57), (61, 61), (82, 82), (84, 84), (115, 119), (164, 165), (168, 169), (172, 172), (188, 189), (191, 195), (214, 215), (223, 224), (227, 228), (235, 236), (251, 251), (253, 254), (256, 257), (260, 260), (267, 269), (281, 283), (305, 306), (308, 308), (318, 319)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,MNFYSAYQHGFVRVAACTHHTTIGDPAANAASVLDMARACHDDGAALAVFPELTLSGYSIEDVLLQDSLLDAVEDALLDLVTESADLLPVLVVGAPLRHRHRIYNTAVVIHRGAVLGVVPKSYLPTYREFYERRQMAPGDGERGTIRIGGADVAFGTDLLFAASDLPGFVLHVEICEDMFVPMPPSAEAALAGATVLANLSGSPITIGRAEDRRLLARSASARCLAAYVYAAAGEGESTTDLAWDGQTMIWENGALLAESERFPKGVRRSVADVDTELLRSERLRMGTFDDNRRHHRELTESFRRIDFALDPPAGDIGLLREVERFPFVPADPQRLQQDCYEAYNIQVSGLEQRLRALDYPKVVIGVSGGLDSTHALIVATHAMDREGRPRSDILAFALPGFATGEHTKNNAIKLARALGVTFSEIDIGDTARLMLHTIGHPYSVGEKVYDVTFENVQAGLRTDYLFRIANQRGGIVLGTGDLSELALGWSTYGVGDQMSHYNVNAGVPKTLIQHLIRWVISAGEFGEKVGEVLQSVLDTEITPELIPTGEEELQSSEAKVGPFALQDFSLFQVLRYGFRPSKIAFLAWHAWNDAERGNWPPGFPKSERPSYSLAEIRHWLQIFVQRFYSFSQFKRSALPNGPKVSHGGALSPRGDWRAPSDMSARIWLDQIDREVPKG,"[(52, 52), (58, 58), (125, 126), (128, 130), (177, 177), (201, 202), (204, 208), (210, 214), (230, 230), (232, 232), (243, 244), (350, 353), (355, 358), (361, 365), (374, 377), (396, 399), (410, 412), (423, 423), (452, 455), (457, 461), (466, 470), (472, 474), (476, 479), (481, 484), (486, 489), (494, 500), (502, 503), (505, 505), (510, 510), (513, 514), (517, 517), (537, 538), (541, 542), (557, 557), (560, 561), (564, 564), (627, 627), (630, 634), (636, 641), (659, 660), (662, 663)]" +CC(C)C[C@H](N)C(=O)O,CC(C)C[C@H](NC(=O)[C@@H](N)Cc1ccccc1)C(=O)O,MKSSAAKQTVLCLNRYAVVALPLAIASFAAFGASPASTLWAPTDTKAFVTPAQVEARSAAPLLELAAGETAHIVVSLKLRDEAQLKQLAQAVNQPGNAQFGKFLKRRQFLSQFAPTEAQVQAVVAHLRKNGFVNIHVVPNRLLISADGSAGAVKAAFNTPLVRYQLNGKAGYANTAPAQVPQDLGEIVGSVLGLQNVTRAHPMLKVGERSAAKTLAAGTAKGHNPTEFPTIYDASSAPTAANTTVGIITIGGVSQTLQDLQQFTSANGLASVNTQTIQTGSSNGDYSDDQQGQGEWDLDSQSIVGSAGGAVQQLLFYMADQSASGNTGLTQAFNQAVSDNVAKVINVSLGWCEADANADGTLQAEDRIFATAAAQGQTFSVSSGDEGVYECNNRGYPDGSTYSVSWPASSPNVIAVGGTTLYTTSAGAYSNETVWNEGLDSNGKLWATGGGYSVYESKPSWQSVVSGTPGRRLLPDISFDAAQGTGALIYNYGQLQQIGGTSLASPIFVGLWARLQSANSNSLGFPAASFYSAISSTPSLVHDVKSGNNGYGGYGYNAGTGWDYPTGWGSLDIAKLSAYIRSNGFGH,"[(51, 52), (63, 63), (69, 74), (106, 106), (111, 112), (115, 115), (146, 146), (149, 150), (220, 220), (222, 222), (231, 231), (268, 268)]" +C1=CC(=CC=C1[N+](=O)[O-])O[C@H]2[C@@H]([C@H]([C@@H]([C@H](O2)CO)O[C@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CO)O)O)O)O)O,OC[C@H]1O[C@@H](O[C@@H]2[C@@H](CO)O[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@@H](O)[C@@H]1O,MEKDTKQVDIIFRSKLPDIYIPNHLPLHSYCFENISEFSSRPCLINGANKQIYTYADVELNSRKVAAGLHKQGIQPKDTIMILLPNSPEFVFAFIGASYLGAISTMANPLFTPAEVVKQAKASSAKIIVTQACHVNKVKDYAFENDVKIICIDSAPEGCLHFSVLTQANEHDIPEVEIQPDDVVALPYSSGTTGLPKGVMLTHKGLVTSVAQQVDGENPNLYIHSEDVMLCVLPLFHIYSLNSVLLCGLRVGAAILIMQKFDIVSFLELIQRYKVTIGPFVPPIVLAIAKSPMVDDYDLSSVRTVMSGAAPLGKELEDTVRAKFPNAKLGQGYGMTEAGPVLAMCLAFAKEPFEIKSGACGTVVRNAEMKIVDPKTGNSLPRNQSGEICIRGDQIMKGYLNDPEATARTIDKEGWLYTGDIGYIDDDDELFIVDRLKELIKYKGFQVAPAELEALLLNHPNISDAAVVPMKDEQAGEVPVAFVVRSNGSTITEDEVKDFISKQVIFYKRIKRVFFVDAIPKSPSGKILRKDLRAKLAAGLPN,"[(187, 188), (194, 196), (198, 199), (201, 201), (212, 213), (220, 221), (229, 229), (232, 233), (237, 238), (240, 242), (244, 245), (247, 248), (258, 259), (261, 262), (280, 281), (283, 283), (306, 308), (310, 313), (317, 317), (329, 330), (333, 335), (337, 343), (345, 346), (348, 348), (358, 364), (386, 388), (399, 399), (402, 402), (405, 405), (408, 409), (417, 419), (421, 422), (432, 434), (436, 436), (438, 440), (442, 442), (445, 445), (447, 448), (474, 477), (507, 508), (520, 522), (524, 525), (527, 528), (533, 533)]" +O=C[C@H](O)[C@@H](O)[C@@H](O)CO,O=C(CO)[C@@H](O)[C@@H](O)CO,MEMKKSGLGTTAIHAGTLKNLYGTLAMPIYQTSTFIFDSAEQGGRRFALEEAGYIYTRLGNPTTTVLENKIAALEEGEAGIAMSSGMGAISSTLWTVLKAGDHVVTDKTLYGCTFALMNHGLTRFGVEVTFVDTSNLEEVKNAMKKNTRVVYLETPANPNLKIVDLEALSKIAHTNPNTLVIVDNTFATPYMQKPLKLGVDIVVHSATKYLNGHGDVIAGLVVTRQELADQIRFVGLKDMTGAVLGPQEAYYIIRGLKTFEIRMERHCKNARTIVDFLNKHPKVEKVYYPGLETHPGYEIAKKQMKDFGAMISFELKGGFEAGKTLLNNLKLCSLAVSLGDTETLIQHPASMTHSPYTKEEREVAGITDGLVRLSVGLENVEDIIADLEQGLEKI,"[(30, 30), (32, 35), (54, 55), (59, 61), (84, 85), (88, 92), (111, 111), (113, 114), (117, 117), (184, 184), (186, 186), (204, 205), (210, 213), (215, 215), (217, 221), (237, 239), (242, 242), (244, 245), (247, 247), (253, 253), (338, 339), (341, 341)]" +CC/C=C\C/C=C\C/C=C\CCCCCCCC(=O)SCCNC(=O)CCNC(=O)[C@@H](C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)OP(=O)(O)O)O,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS,MMTTSLIWGIAIAACCCLWLILGIRRRQTGEPPLENGLIPYLGCALQFGANPLEFLRANQRKHGHVFTCKLMGKYVHFITNPLSYHKVLCHGKYFDWKKFHFATSAKAFGHRSIDPMDGNTTENINDTFIKTLQGHALNSLTESMMENLQRIMRPPVSSNSKTAAWVTEGMYSFCYRVMFEAGYLTIFGRDLTRRDTQKAHILNNLDNFKQFDKVFPALVAGLPIHMFRTAHNAREKLAESLRHENLQKRESISELISLRMFLNDTLSTFDDLEKAKTHLVVLWASQANTIPATFWSLFQMIRNPEAMKAATEEVKRTLENAGQKVSLEGNPICLSQAELNDLPVLDSIIKESLRLSSASLNIRTAKEDFTLHLEDGSYNIRKDDIIALYPQLMHLDPEIYPDPLTFKYDRYLDENGKTKTTFYCNGLKLKYYYMPFGSGATICPGRLFAIHEIKQFLILMLSYFELELIEGQAKCPPLDQSRAGLGILPPLNDIEFKYKFKHL,"[(134, 134), (437, 438), (442, 443), (445, 447)]" +CC(=O)CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,C[C@](CC(=O)O)(CC(=O)SCCNC(=O)CCNC(=O)[C@@H](C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)OP(=O)(O)O)O)O,MARSRGERTPAARRITSRNARFQQWQALLGNRNKRTRAGEFLVMGVRPISLAVEHGWPVRTLLYDGQRELSKWARELLRTVRTEQIAMAPDLLMELGEKNEAPPEVVAVVEMPADDLDRIPVREDFLGVLFDRPTSPGNIGSIIRSADALGAHGLIVAGHAADVYDPKSVRSSTGSLFSLPAVRVPSPGEVMDWVEARRAAGTPIVLVGTDEHGDCDVFDFDFTQPTLLLIGNETAGLSNAWRTLCDYTVSIPMAGSASSLNAANAATAILYEAVRQRISGRTATTP,"[(131, 132), (208, 209), (212, 214), (217, 219), (229, 231), (233, 234), (237, 238), (243, 243), (249, 251), (255, 256), (259, 261), (263, 263), (266, 266), (269, 270)]" +N[C@@H](CC(=O)[O-])C(=O)[O-],O=P([O-])([O-])OP(=O)([O-])[O-],MPSASASRKSQEKPREIMDAAEDYAKERYGISSMIQSQEKPDRVLVRVRDLTIQKADEVVWVRARVHTSRAKGKQCFLVLRQQQFNVQALVAVGDHASKQMVKFAANINKESIVDVEGVVRKVNQKIGSCTQQDVELHVQKIYVISLAEPRLPLQLDDAVRPEAEGEEEGRATVNQDTRLDNRVIDLRTSTSQAVFRLQSGICHLFRETLINKGFVEIQTPKIISAASEGGANVFTVSYFKNNAYLAQSPQLYKQMCICADFEKVFSIGPVFRAEDSNTHRHLTEFVGLDIEMAFNYHYHEVMEEIADTMVQIFKGLQERFQTEIQTVNKQFPCEPFKFLEPTLRLEYCEALAMLREAGVEMGDEDDLSTPNEKLLGHLVKEKYDTDFYILDKYPLAVRPFYTMPDPRNPKQSNSYDMFMRGEEILSGAQRIHDPQLLTERALHHGIDLEKIKAYIDSFRFGAPPHAGGGIGLERVTMLFLGLHNVRQTSMFPRDPKRLTP,"[(199, 199), (202, 203), (251, 251), (254, 255), (272, 272), (284, 289), (291, 292), (373, 373), (399, 399), (402, 404), (416, 419), (422, 423), (425, 426), (428, 430), (432, 433), (466, 471), (476, 480), (486, 486), (489, 490)]" +C[C@@H](C(=O)N[C@@H](CCC(=O)N[C@@H](CCC(=O)O)C(=O)O)C(=O)O)OP(=O)(O)OC[C@H]([C@H]([C@H](CN1C2=CC(=O)C=CC2=CC3=C1NC(=O)NC3=O)O)O)O,NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,MSTLRLLISDSYDPWFNLAVEECIFRQMPATQRVLFLWRNADTVVIGRAQNPWKECNTRRMEEDNVRLARRSSGGGAVFHDLGNTCFTFMAGKPEYDKTISTSIVLNALNALGVSAEASGRNDLVVKTVEGDRKVSGSAYRETKDRGFHHGTLLLNADLSRLANYLNPDKKKLAAKGITSVRSRVTNLTELLPGITHEQVCEAITEAFFAHYGERVEAEIISPNKTPDLPNFAETFARQSSWEWNFGQAPAFSHLLDERFTWGGVELHFDVEKGHITRAQVFTDSLNPAPLEALAGRLQGCLYRADMLQQECEALLVDFPEQEKELRELSAWMAGAVR,"[(21, 21), (39, 45), (47, 47), (49, 49), (54, 54), (65, 65), (69, 70), (72, 75), (80, 81), (83, 83), (85, 85), (123, 125), (132, 133), (135, 136), (140, 140), (147, 147), (151, 153), (157, 157), (160, 161), (164, 165), (179, 179), (185, 185), (187, 187), (243, 243)]" +O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1,C1=CC(=C(C=C1[N+](=O)[O-])[N+](=O)[O-])SC[C@@H](C(=O)NCC(=O)O)NC(=O)CC[C@@H](C(=O)O)N,MPNYKLTYFNMRGRAEIIRYIFAYLDIQYEDHRIEQADWPEIKSTLPFGKIPILEVDGLTLHQSLAIARYLTKNTDLAGNTEMEQCHVDAIVDTLDDFMSCFPWAEKKQDVKEQMFNELLTYNAPHLMQDLDTYLGGREWLIGNSVTWADFYWEICSTTLLVFKPDLLDNHPRLVTLRKKVQAIPAVANWIKRRPQTKL,"[(6, 7), (9, 13), (15, 19), (32, 38), (40, 43), (46, 48), (52, 53), (61, 62), (65, 69), (96, 96), (99, 100), (104, 104), (152, 152)]" +O,O=P([O-])([O-])[O-],MSSSNVEVFIPVSQGNTNGFPATASNDLKAFTEGAVLSFHNICYRVKLKSGFLPCRKPVEKEILSNINGIMKPGLNAILGPTGGGKSSLLDVLAARKDPSGLSGDVLINGAPRPANFKCNSGYVVQDDVVMGTLTVRENLQFSAALRLATTMTNHEKNERINRVIQELGLDKVADSKVGTQFIRGVSGGERKRTSIGMELITDPSILFLDEPTTGLDSSTANAVLLLLKRMSKQGRTIIFSIHQPRYSIFKLFDSLTLLASGRLMFHGPAQEALGYFESAGYHCEAYNNPADFFLDIINGDSTAVALNREEDFKATEIIEPSKQDKPLIEKLAEIYVNSSFYKETKAELHQLSGGEKKKKITVFKEISYTTSFCHQLRWVSKRSFKNLLGNPQASIAQIIVTVVLGLVIGAIYFGLKNDSTGIQNRAGVLFFLTTNQCFSSVSAVELFVVEKKLFIHEYISGYYRVSSYFLGKLLSDLLPMRMLPSIIFTCIVYFMLGLKPKADAFFVMMFTLMMVAYSASSMALAIAAGQSVVSVATLLMTICFVFMMIFSGLLVNLTTIASWLSWLQYFSIPRYGFTALQHNEFLGQNFCPGLNATGNNPCNYATCTGEEYLVKQGIDLSPWGLWKNHVALACMIVIFLTIAYLKLLFLKKYS,"[(63, 64), (78, 79), (88, 91), (123, 123), (126, 126), (128, 128), (136, 136), (170, 170), (173, 173), (176, 180), (182, 183), (191, 194), (209, 210), (212, 216), (241, 242), (244, 245), (258, 263), (295, 295), (298, 299)]" +Cc1cn([C@H]2C[C@H](O)[C@@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O2)c(=O)[nH]c1=O,Cc1cn([C@H]2C[C@H](O)[C@@H](COP(=O)(O)OP(=O)(O)O)O2)c(=O)[nH]c1=O,MKAFILAAGSGERLEPITHTRPKAFVPILSKPLIEYQIEYLRKCGIRDITVIVSSKNKEYFEKKLKEISIVTQKDDIKGTGAAILSAKFNDEALIIYGDLFFSNEKEICNIITLKENAIIGVKVSNPKDYGVLVLDNQNNLSKIIEKPEIPPSNLINAGIYKLNSDIFTYLDKISISERGELELTDAINLMAKDHRVKVIEYEGYWMDIGKPWNIIDVNKWALDNLVFSQNLGNVEDNVKIKGKVIIEEDAEIKSGTYIEGPVYIGKGSEIGPNSYLRPYTILVEKNKIGASVEVKESVIMEGSKIPHLSYVGDSVIAEDVNFGAGTLIANLRFDEKEVKVNVKGKRISSGRRKLGAFIGGHVRTGINVTILPGVKIGAYARIYPGAVVNRDVGYGEFFKV,"[(5, 7), (14, 15), (18, 18), (23, 23), (37, 37), (52, 57), (71, 72), (74, 75), (77, 78), (81, 84), (86, 86), (95, 96), (98, 100), (119, 122), (129, 130), (132, 133), (144, 145), (147, 148), (155, 156), (158, 161), (181, 185), (206, 206), (208, 212), (343, 344)]" +N[C@@H](Cc1ccncc1)C(=O)O,N,MKTLSQAQSKTSSQQFSFTGNSSANVIIGNQKLTINDVARVARNGTLVSLTNNTDILQGIQASCDYINNAVESGEPIYGVTSGFGGMANVAISREQASELQTNLVWFLKTGAGNKLPLADVRAAMLLRANSHMRGASGIRLELIKRMEIFLNAGVTPYVYEFGSIGASGDLVPLSYITGSLIGLDPSFKVDFNGKEMDAPTALRQLNLSPLTLLPKEGLAMMNGTSVMTGIAANCVYDTQILTAIAMGVHALDIQALNGTNQSFHPFIHNSKPHPGQLWAADQMISLLANSQLVRDELDGKHDYRDHELIQDRYSLRCLPQYLGPIVDGISQIAKQIEIEINSVTDNPLIDVDNQASYHGGNFLGQYVGMGMDHLRYYIGLLAKHLDVQIALLASPEFSNGLPPSLLGNRERKVNMGLKGLQICGNSIMPLLTFYGNSIADRFPTHAEQFNQNINSQGYTSATLARRSVDIFQNYVAIALMFGVQAVDLRTYKKTGHYDARACLSPATERLYSAVRHVVGQKPTSDRPYIWNDNEQGLDEHIARISADIAAGGVIVQAVQDILPCLH,"[(128, 128), (171, 171), (174, 174), (216, 216), (218, 222), (224, 225), (263, 263), (295, 295), (310, 310), (312, 316), (318, 321), (345, 346), (348, 349), (358, 361), (363, 363), (405, 405), (416, 418), (420, 423), (446, 447), (449, 450), (452, 453)]" +CSCC[C@H](N)C(=O)O,CS,MSVHKTNDAFKVLMNSAKEPIVEDIPKKYRKQSFRDNLKVYIESPESYKNVIYYDDDVVLVRDMFPKSKMHLLLMTRDPHLTHVHPLEIMMKHRSLVEKLVSYVQGDLSGLIFDEARNCLSQQLTNEALCNYIKVGFHAGPSMNNLHLHIMTLDHVSPSLKNSAHYISFTSPFFVKIDTPTSNLPTRGTLTSLFQEDLKCWRCGETFGRHFTKLKAHLQEEYDDWLDKSVSM,"[(166, 166), (198, 199), (201, 202), (204, 205), (207, 207), (213, 216), (218, 220), (222, 226)]" +CCCCCCCC/C=C\CCCCCCCC(=O)O,CC(C)(N)CO,MRRLSSWRKMATAEKQKHDGRVKIGHYILGDTLGVGTFGKVKVGKHELTGHKVAVKILNRQKIRSLDVVGKIRREIQNLKLFRHPHIIKLYQVISTPSDIFMVMEYVSGGELFDYICKNGRLDEKESRRLFQQILSGVDYCHRHMVVHRDLKPENVLLDAHMNAKIADFGLSNMMSDGEFLRTSCGSPNYAAPEVISGRLYAGPEVDIWSSGVILYALLCGTLPFDDDHVPTLFKKICDGIFYTPQYLNPSVISLLKHMLQVDPMKRAAIKDIREHEWFKQDLPKYLFPEDPSYSSTMIDDEALKEVCEKFECSEEEVLSCLYNRNHQDPLAVAYHLIIDNRRIMNEAKDFYLATSPPDSFLDDHHLTRPHPERVPFLVAETPRARHTLDELNPQKSKHQGVRKAKWHLGIRSQSRPNDIMAEVCRAIKQLDYEWKVVNPYYLRVRRKNPVTSTFSKMSLQLYQVDSRTYLLDFRSIDDEITEAKSGTATPQRSGSISNYRSCQRSDSDAEAQGKPSDVSLTSSVTSLDSSPVDVAPRPGSHTIEFFEMCANLIKILAQ,"[(12, 12), (15, 19), (31, 32), (42, 44), (52, 55), (57, 59), (80, 80), (95, 95)]" +[C@H](C(=O)O)(NC(=O)N)O,O=C=O,MESLKRFLCSIALLLISLLLPSSLAQQQQHESIRTMEDFSGYPIHEPGQFGSINLASSLSVDAPGLQNQIDELSSFSDAPSPSVTRVLYTDKDVSARRYVKNLMALAGLTVREDAVGNIFGKWDGLEPNLPAVATGSHIDAIPYSGKYDGVVGVLGAIEAINVLKRSGFKPKRSLEIILFTSEEPTRFGISCLGSRLLAGSKELAEALKTTVVDGQNVSFIEAARSAGYAEDKDDDLSSVFLKKGSYFAFLELHIEQGPILEDEGLDIGVVTAIAAPASLKVEFEGNGGHAGAVLMPYRNDAGLAAAELALAVEKHVLESESIDTVGTVGILELHPGAINSIPSKSHLEIDTRDIDEARRNTVIKKIQESANTIAKKRKVKLSEFKIVNQDPPALSDKLVIKKMAEAATELNLSHKMMISRAYHDSLFMARISPMGMIFIPCYKGYSHKPEEYSSPEDMANGVKVLSLTLAKLSLD,"[(136, 137), (139, 142), (144, 148), (150, 152), (154, 154), (179, 180), (182, 183), (185, 187), (190, 192), (252, 253), (255, 257), (423, 425), (437, 440), (446, 447), (449, 450), (452, 452)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MVKDTYISSASKTPPMERTVRVTGMTCAMCVKSIETAVGSLEGVEEVRVNLATETAFIRFDEKRIDFETIKRVIEDLGYGVVDEQAAVSAEVEHLSRMKRKLYVAAFAGVLLLFLAHFISLPYEDFVQLLIALPAIFYSGSSIFKAAFSALRRRTLNMDVMYSMGVGAAFLASVLSTAGVLPREYSFYETSVLLLAFLLLGRTLEARAKSRTGEAIKKLVGLQAKTAVVIRDGKEIAVPVEEVAVGDIVIVRPGEKIPVDGVVVEGESYVDESMISGEPVPVLKSKGDEVFGATINNTGVLKIRATRVGGETLLAQIVKLVEDAMGSKPPIQRLADKVVAYFIPTVLLVAISAFIYWYFIAHAPLLFAFTTLIAVLVVACPCAFGLATPTALTVGMGKGAELGILIKNADALEVAEKVTAVIFDKTGTLTKGKPEVTDLVPLNGDERELLRLAAIAERRSEHPIAEAIVKKALEHGIELGEPEKVEVIAGEGVVADGILVGNKRLMEDFGVAVSNEVELALEKLEREAKTAVIVARNGRVEGIIAVSDTLKESAKPAVQELKRMGIKVGMITGDNWRSAEAISRELNLDLVIAEVLPHQKSEEVKKLQAKEVVAFVGDGINDAPALAQADLGIAVGSGSDVAVESGDIVLIRDDLRDVVAAIQLSRKTMSKIKQNIFWALIYNVILIPAAAGLLYPIFGVVFRPEFAGLAMAMSSVSVVANSLLLRNYVPPIRRGGDSVEKIVLELSGLSCHHCVARVKKALEEAGAKVEKVDLNEAVVAGNKEDVDKYIKAVEAAGYQAKLRS,"[(223, 223), (240, 240), (258, 260), (266, 267), (269, 269), (272, 274), (278, 278), (281, 289), (291, 293), (297, 298), (309, 309), (312, 312), (314, 315), (318, 318)]" +O=C(O)[C@@H](CO)OP(=O)(O)O,O=C(O)[C@H](O)COP(=O)(O)O,MSKKPVALIILDGFALRDETYGNAVAQANKPNFDRYWNEYPHTTLKACGEAVGLPEGQMGNSEVGHLNIGAGRIVYQSLTRINIAIREGEFDRNETFLAAMNHVKQHGTSLHLFGLLSDGGVHSHIHHLYALLRLAAKEGVKRVYIHGFLDGRDVGPQTAPQYIKELQEKIKEYGVGEIATLSGRYYSMDRDKRWDRVEKAYRAMVYGEGPTYRDPLECIEDSYKHGIYDEFVLPSVIVREDGRPVATIQDNDAIIFYNFRPDRAIQISNTFTNEDFREFDRGPKHPKHLFFVCLTHFSETVKGYVAFKPTNLDNTIGEVLSQHGLRQLRIAETEKYPHVTFFMSGGREEKFPGEDRILINSPKVPTYDLKPEMSAYEVTDALLKEIEADKYDAIILNYANPDMVGHSGKLEPTIKAVEAVDECLGKVVDAILAKGGIAIITADHGNADEVLTPDGKPQTAHTTNPVPVIVTKKGIKLRDGGILGDLAPTMLDLLGLPQPKEMTGKSLIVK,"[(10, 11), (13, 15), (47, 47), (59, 61), (63, 67), (69, 69), (122, 122), (153, 153), (231, 231), (261, 261), (336, 336), (339, 339), (343, 343), (398, 402), (404, 406), (408, 409), (442, 443), (446, 447), (459, 461), (463, 464), (467, 467), (484, 484)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,C[C@H](C(=O)OP(=O)([O-])OC[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O)[NH3+],MKLLEQIEKWAAETPDQTAFVWRDAKITYKQLKEDSDALAHWISSEYPDDRSPIMVYGHMQPEMIINFLGCVKAGHAYIPVDLSIPADRVQRIAENSGAKLLLSATAVTVTDLPVRIVSEDNLKDIFFTHKGNTPNPEHAVKGDENFYIIYTSGSTGNPKGVQITYNCLVSFTKWAVEDFNLQTGQVFLNQAPFSFDLSVMDIYPSLVTGGTLWAIDKDMIARPKDLFASLEQSDIQVWTSTPSFAEMCLMEASFSESMLPNMKTFLFCGEVLPNEVARKLIERFPKATIMNTYGPTEATVAVTGIHVTEEVLDQYKSLPVGYCKSDCRLLIMKEDGTIAPDGEKGEIVIVGPSVSVGYLGSPELTEKAFTMIDGERAYKTGDAGYVENGLLFYNGRLDFQIKLHGYRMELEEIEHHLRACSYVEGAVIVPIKKGEKYDYLLAVVVPGEHSFEKEFKLTSAIKKELNERLPNYMIPRKFMYQSSIPMTPNGKVDRKKLLSEVTA,"[(89, 89), (148, 148), (150, 151), (154, 154), (159, 160), (162, 162), (164, 164), (172, 172), (175, 175), (180, 180), (195, 196), (198, 201), (242, 242), (244, 244), (268, 273), (290, 291), (298, 300), (302, 306), (308, 308), (318, 321), (345, 348), (350, 350), (359, 359), (379, 379), (381, 382), (384, 386), (392, 393), (398, 401), (410, 410), (412, 413), (486, 488), (490, 491), (493, 494)]" +Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,O=P([O-])([O-])[O-],MELSEGELSHTSSSSSFVPVDQRQLQDAIQIIDENKHFNTGILDYINKTSPADVGNNYHIISVFGSQSTGKSTLLNRLFNTNFDVMDESNRQQTTKGIWLAYSPVVSTTLGHTTSKSNILVMDVEGTDGRERGEDQDFERKAALFALSTSEVLIINIWETQVGLYQGANMGLLKTVFEVNLSLFGKSKLETHNDHKVLLLIVIRDHVGVTPVESLAKTFTSDLQNMWSSLAKPAELEHLQFADFFDVTFHALNHKVLQPKEFGEGINRLDDRLVVSNELFKPEYHHDVPIDGWTMYAERCWEQIETNKDLDLPTQQILVAQFKCDEIVESVFQEFLAKYQHHFKEVDAAPDFEELGALFADLRQDAFEDYDASASRYNKAVYEQKRKKLRWLINDKLKEVFDVHAKNLCNTLLEKFEKDLVALKGKDFAVNVKTLSTKLVEDVNFQVSLMSLQGDLSLDEIILALTKDIDAIVAKQQVIELNSIVNKSVKKLSASLSKSIQFELGDPNEETWDNVLQQFKGVYEKFGGDFGLGTSSTQNQQAIEKFKFKSWCQFYDVTHKLISREKLLALLQDRFDDKFRYDENGLPKLYLNEQDLEKTFAVAKQHALQVLPILTFAKLADGSEIVPDYDIFDSKLREQFLGGYDDSDDEEDHCFAEIITEQEKSEVLAKFKKEVDAKYIETKRSIVQHITQIPYYIYLIILVLGWNEFMAIIRNPLFFSLSIVLGATVYVLYYLGLLRPALVVAQRTMDEVIVMAKTKLREVLIDDHEVTGRQLNKMAGSKENIELDDM,"[(63, 64), (73, 77), (83, 86), (123, 128), (156, 156), (158, 158), (169, 169), (172, 172), (204, 204)]" +O=C1OC2(c3ccc(O)cc3Oc3cc(O)ccc32)c2ccccc21,C(C(=N)C(=O)O)C(=O)O,MIYIIGSGIAGLSAGVALRRAGKKVTLISKRIDGGSTPIAKGGVAASVGSDDSPELHAQDTIRVGDGLCDVKTVNYVTSEAKNVIETFESWGFEFEEDLRLEGGHTKRRVLHRTDETGREIFNFLLKLAREEGIPIIEDRLVEIRVKDGKVTGFVTEKRGLVEDVDKLVLATGGYSYLYEYSSTQSTNIGDGMAIAFKAGTILADMEFVQFHPTVTSLDGEVFLLTETLRGEGAQIINENGERFLFNYDKRGELAPRDILSRAIYIEMLKGHKVFIDLSKIEDFERKFPVVAKYLARHGHNYKVKIPIFPAAHFVDGGIRVNIRGESNIVNLYAIGEVSDSGLHGANRLASNSLLEGLVFGINLPRYVDSSWEGISTDDGIVHSVRISGNKTLSLKEIRRINWENVGIIRNEEKLVKAINTYSSSTQNEAIISYLTALAAEIRKESRGNHFREDYPYKDPNWEKRIYFKLVV,"[(5, 6), (11, 15), (27, 28), (30, 35), (38, 41), (44, 45), (102, 102), (110, 112), (117, 119), (121, 122), (125, 125), (137, 140), (171, 176), (184, 184), (187, 190), (192, 196), (313, 315), (317, 317), (335, 336), (338, 339), (344, 344), (348, 349), (351, 352), (355, 358), (361, 361), (428, 428)]" +O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MNSPGGRGKKKGSGGASNPVPPRPPPPCLAPAPPAAGPAPPPESPHKRNLYYFSYPLFVGFALLRLVAFHLGLLFVWLCQRFSRALMAAKRSSGAAPAPASASAPAPVPGGEAERVRVFHKQAFEYISIALRIDEDEKAGQKEQAVEWYKKGIEELEKGIAVIVTGQGEQCERARRLQAKMMTNLVMAKDRLQLLEKMQPVLPFSKSQTDVYNDSTNLACRNGHLQSESGAVPKRKDPLTHTSNSLPRSKTVMKTGSAGLSGHHRAPSYSGLSMVSGVKQGSGPAPTTHKGTPKTNRTNKPSTPTTATRKKKDLKNFRNVDSNLANLIMNEIVDNGTAVKFDDIAGQDLAKQALQEIVILPSLRPELFTGLRAPARGLLLFGPPGNGKTMLAKAVAAESNATFFNISAASLTSKYVGEGEKLVRALFAVARELQPSIIFIDEVDSLLCERREGEHDASRRLKTEFLIEFDGVQSAGDDRVLVMGATNRPQELDEAVLRRFIKRVYVSLPNEETRLLLLKNLLCKQGSPLTQKELAQLARMTDGYSGSDLTALAKDAALGPIRELKPEQVKNMSASEMRNIRLSDFTESLKKIKRSVSPQTLEAYIRWNKDFGDTTV,"[(114, 115), (118, 119), (121, 122), (124, 124), (150, 150), (153, 153), (155, 155), (157, 157), (189, 189), (192, 192)]" +O=P([O-])([O-])[O-],NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,MALVRALVCCLLTAWHCRSGLGLPVAPAGGRNPPPAIGQFWHVTDLHLDPTYHITDDHTKVCASSKGANASNPGPFGDVLCDSPYQLILSAFDFIKNSGQEASFMIWTGDSPPHVPVPELSTDTVINVITNMTTTIQSLFPNLQVFPALGNHDYWPQDQLPVVTSKVYNAVANLWKPWLDEEAISTLRKGGFYSQKVTTNPNLRIISLNTNLYYGPNIMTLNKTDPANQFEWLESTLNNSQQNKEKVYIIAHVPVGYLPSSQNITAMREYYNEKLIDIFQKYSDVIAGQFYGHTHRDSIMVLSDKKGSPVNSLFVAPAVTPVKSVLEKQTNNPGIRLFQYDPRDYKLLDMLQYYLNLTEANLKGESIWKLEYILTQTYDIEDLQPESLYGLAKQFTILDSKQFIKYYNYFFVSYDSSVTCDKTCKAFQICAIMNLDNISYADCLKQLYIKHNY,"[(43, 44), (46, 46), (48, 49), (82, 83), (88, 88), (108, 109), (111, 114), (148, 150), (152, 154), (157, 160), (208, 208), (210, 211), (214, 214), (250, 251), (253, 254), (266, 266), (291, 292), (294, 294), (296, 297), (316, 320), (322, 322), (331, 331), (410, 410)]" +NCCC[C@H](N)C(=O)O,O=P([O-])([O-])[O-],MARTVVLITGCSSGIGLHLAVRLASDPSQSFKVYATLRDLKTQGRLWEAARALACPPGSLETLQLDVRDSKSVAAARERVTEGRVDVLVCNAGLGLLGPLEALGEDAVASVLDVNVVGTVRMLQAFLPDMKRRGSGRVLVTGSVGGLMGLPFNDVYCASKFALEGLCESLAVLLLPFGVHLSLIECGPVHTAFMEKVLGSPEEVLDRTDIHTFHRFYQYLAHSKQVFREAAQNPEEVAEVFLTALRAPKPTLRYFTTERFLPLLRMRLDDPSGSNYVTAMHREVFGDVPAKAEAGAEAGGGAGPGAEDEAGRGAVGDPELGDPPAAPQ,"[(1, 9), (39, 42), (44, 45), (48, 49), (52, 55), (57, 65), (67, 68), (72, 72), (78, 81), (88, 88), (90, 91), (112, 113), (116, 117), (120, 121), (140, 140), (144, 148), (155, 159), (161, 164), (185, 185), (188, 188), (190, 190), (233, 234), (236, 238), (240, 241), (244, 245), (266, 266)]" +O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MFLKVRAEKRLGNFRLNVDFEMGRDYCVLLGPTGAGKSVFLELIAGIVKPDRGEVRLNGADITPLPPERRGIGFVPQDYALFPHLSVYRNIAYGLRNVERVERDRRVREMAEKLGIAHLLDRKPARLSGGERQRVALARALVIQPRLLLLDEPLSAVDLKTKGVLMEELRFVQREFDVPILHVTHDLIEAAMLADEVAVMLNGRIVEKGKLKELFSAKNGEVAEFLSARNLLLKVSKILD,"[(14, 14), (16, 16), (29, 30), (39, 42), (77, 77), (151, 152), (183, 185), (200, 204), (225, 225)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,O=P([O-])([O-])[O-],MELEEDLKGRADKNFSKMGKKSKKEKKEKKPAVSVLTMFRYAGWLDRLYMLVGTLAAIIHGVALPLMMLIFGDMTDSFASVGNVSKNSTNMSEADKRAMFAKLEEEMTTYAYYYTGIGAGVLIVAYIQVSFWCLAAGRQIHKIRQKFFHAIMNQEIGWFDVHDVGELNTRLTDDVSKINEGIGDKIGMFFQAMATFFGGFIIGFTRGWKLTLVILAISPVLGLSAGIWAKILSSFTDKELHAYAKAGAVAEEVLAAIRTVIAFGGQKKELERYNNNLEEAKRLGIKKAITANISMGAAFLLIYASYALAFWYGTSLVISKEYSIGQVLTVFFSVLIGAFSVGQASPNIEAFANARGAAYEVFKIIDNKPSIDSFSKSGHKPDNIQGNLEFKNIHFSYPSRKEVQILKGLNLKVKSGQTVALVGNSGCGKSTTVQLMQRLYDPLDGMVSIDGQDIRTINVRYLREIIGVVSQEPVLFATTIAENIRYGREDVTMDEIEKAVKEANAYDFIMKLPHQFDTLVGERGAQLSGGQKQRIAIARALVRNPKILLLDEATSALDTESEAVVQAALDKAREGRTTIVIAHRLSTVRNADVIAGFDGGVIVEQGNHDELMREKGIYFKLVMTQTAGNEIELGNEACKSKDEIDNLDMSSKDSGSSLIRRRSTRKSICGPHDQDRKLSTKEALDEDVPPASFWRILKLNSTEWPYFVVGIFCAIINGGLQPAFSVIFSKVVGVFTNGGPPETQRQNSNLFSLLFLILGIISFITFFLQGFTFGKAGEILTKRLRYMVFKSMLRQDVSWFDDPKNTTGALTTRLANDAAQVKGATGSRLAVIFQNIANLGTGIIISLIYGWQLTLLLLAIVPIIAIAGVVEMKMLSGQALKDKKELEGSGKIATEAIENFRTVVSLTREQKFETMYAQSLQIPYRNAMKKAHVFGITFSFTQAMMYFSYAACFRFGAYLVTQQLMTFENVLLVFSAIVFGAMAVGQVSSFAPDYAKATVSASHIIRIIEKTPEIDSYSTQGLKPNMLEGNVQFSGVVFNYPTRPSIPVLQGLSLEVKKGQTLALVGSSGCGKSTVVQLLERFYDPMAGSVFLDGKEIKQLNVQWLRAQLGIVSQEPILFDCSIAENIAYGDNSRVVSYEEIVRAAKEANIHQFIDSLPDKYNTRVGDKGTQLSGGQKQRIAIARALVRQPHILLLDEATSALDTESEKVVQEALDKAREGRTCIVIAHRLSTIQNADLIVVIQNGKVKEHGTHQQLLAQKGIYFSMVSVQAGAKRS,"[(397, 397), (405, 406), (409, 409), (421, 422), (431, 435), (471, 471), (551, 552), (581, 583), (596, 601), (617, 617), (621, 621), (1040, 1040), (1048, 1049), (1052, 1052), (1064, 1065), (1074, 1078), (1196, 1197), (1226, 1228), (1241, 1245), (1262, 1262), (1266, 1266)]" +C1=CN(C(=O)NC1=O)[C@H]2[C@@H]([C@@H]([C@H](O2)COP(=O)(O)OP(=O)(O)OC3[C@@H]([C@H]([C@H]([C@H](O3)CO)O)O)O)O)O,O=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,MIIDRLLQRSHSHLPILQATFGLERESLRIHQPTQRVAQTPHPKTLGSRNYHPYIQTDYSEPQLELITPIAKDSQEAIRFLKAISDVAGRSINHDEYLWPLSMPPKVREEDIQIAQLEDAFEYDYRKYLEKTYGKLIQSISGIHYNLGLGQELLTSLFELSQADNAIDFQNQLYMKLSQNFLRYRWLLTYLYGASPVAEEDFLDQKLNNPVRSLRNSHLGYVNHKDIRISYTSLKDYVNDLENAVKSGQLIAEKEFYSPVRLRGSKACRNYLEKGITYLEFRTFDLNPFSPIGITQETVDTVHLFLLALLWIDSSSHIDQDIKEANRLNDLIALSHPLEKLPNQAPVSDLVDAMQSVIQHFNLSPYYQDLLESVKRQIQSPELTVAGQLLEMIEGLSLETFGQRQGQIYHDYAWEAPYALKGYETMELSTQLLLFDVIQKGVNFEVLDEQDQFLKLWHNSHIEYVKNGNMTSKDNYIVPLAMANKVVTKKILDEKHFPTPFGDEFTDRKEALNYFSQIQDKPIVVKPKSTNFGLGISIFKTSANLASYEKAIDIAFTEDSAILVEEYIEGTEYRFFVLEGDCIAVLLRVAANVVGDGIHTISQLVKLKNQNPLRGYDHRSPLEVIELGEVEQLMLEQQGYTVNSIPPEGTKIELRRNSNISTGGDSIDVTNTMDPTYKQLAAEMAEAMGAWVCGVDLIIPNATQAYSKDKKNATCIELNFNPLMYMHTYCQEGPGQSITPRILAKLFPEL,"[(482, 482), (499, 508), (511, 515), (575, 576), (584, 588), (630, 630), (655, 657), (659, 659), (662, 662), (669, 669), (673, 673), (692, 695), (697, 700), (702, 702), (714, 716), (718, 718), (720, 723), (727, 727)]" +CC(C)[C@@H](N)C(=O)O,CC(C)[C@H](N)C(=O)O,MGKLDKASKLIDEENKYYARSARINYYNLVIDHAHGATLVDVDGNKYIDLLASASAINVGHTHEKVVKAIADQAQKLIHYTPAYFHHVPGMELSEKLAKIAPGNSPKMVSFGNSGSDANDAIIKFARAYTGRQYIVSYMGSYHGSTYGSQTLSGSSLNMTRKIGPMLPSVVHVPYPDSYRTYPGETEHDVSLRYFNEFKKPFESFLPADETACVLIEPIQGDGGIIKAPEEYMQLVYKFCHEHGILFAIDEVNQGLGRTGKMWAIQQFKDIEPDLMSVGKSLASGMPLSAVIGKKEVMQSLDAPAHLFTTAGNPVCSAASLATLDVIEYEGLVEKSATDGAYAKQRFLEMQQRHPMIGDVRMWGLNGGIELVKDPKTKEPDSDAATKVIYYAFAHGVVIITLAGNILRFQPPLVIPREQLDQALQVLDDAFTAVENGEVTIPKDTGKIGW,"[(54, 55), (79, 79), (81, 83), (113, 114), (117, 120), (140, 141), (143, 146), (149, 149), (153, 155), (159, 159), (215, 218), (221, 222), (248, 249), (254, 255), (258, 258), (262, 265), (276, 281), (289, 289), (307, 308), (310, 311), (366, 366), (408, 408), (410, 410)]" +N,N[C@@H](Cc1ccccc1)C(=O)O,MDKLRVAVVGYGNVGRYALEAVQAAPDMELVGVVRRKVLAATPPELTGVRVVTDISQLEGVQGALLCVPTRSVPEYAEAMLRRGIHTVDSYDIHGDLADLRRRLDPVAREHGAAAVISAGWDPGTDSIIRALLEFMAPKGITYTNFGPGMSMGHSVAVKAIPGVRDALSMTIPAGMGVHKRAVYVELEPGADFAEVERAIKTDPYFVRDETRVTQVESVSALMDVGHGVVMERKGVSGATHNQLFRFEMRINNPALTAQVMVAALRAAARQKPGCYTMIEIPVIDYLPGDREAWIRKLV,"[(8, 10), (15, 19), (33, 34), (38, 39), (43, 43), (45, 46), (51, 53), (65, 66), (72, 75), (77, 81), (88, 89), (93, 94), (96, 97), (100, 100), (116, 118), (124, 127), (130, 130), (144, 144), (146, 146), (155, 158), (160, 161), (164, 167), (225, 225), (227, 229), (249, 249), (251, 251), (253, 254), (256, 258), (260, 261), (264, 264), (278, 278)]" +O,O=P([O-])([O-])[O-],MSSLEDIKNETVDLEKIPIEEVFQQLKCSREGLTTQEGEDRIQIFGPNKLEEKKESKLLKFLGFMWNPLSWVMEMAAIMAIALANGDGRPPDWQDFVGIICLLVINSTISFIEENNAGNAAAALMAGLAPKTKVLRDGKWSEQEAAILVPGDIVSIKLGDIIPADARLLEGDPLKVDQSALTGESLPVTKHPGQEVFSGSTCKQGEIEAVVIATGVHTFFGKAAHLVDSTNQVGHFQKVLTAIGNFCICSIAIGMVIEIIVMYPIQRRKYRDGIDNLLVLLIGGIPIAMPTVLSVTMAIGSHRLSQQGAITKRMTAIEEMAGMDVLCSDKTGTLTLNKLSVDKNLVEVFCKGVEKDQVLLFAAMASRVENQDAIDAAMVGMLADPKEARAGIREVHFLPFNPVDKRTALTYIDGSGNWHRVSKGAPEQILELAKASNDLSKKVLSIIDKYAERGLRSLAVARQVVPEKTKESPGAPWEFVGLLPLFDPPRHDSAETIRRALNLGVNVKMITGDQLAIGKETGRRLGMGTNMYPSSALLGTHKDANLASIPVEELIEKADGFAGVFPEHKYEIVKKLQERKHIVGMTGDGVNDAPALKKADIGIAVADATDAARGASDIVLTEPGLSVIISAVLTSRAIFQRMKNYTIYAVSITIRIVFGFMLIALIWEFDFSAFMVLIIAILNDGTIMTISKDRVKPSPTPDSWKLKEIFATGVVLGGYQAIMTVIFFWAAHKTDFFSDTFGVRSIRDNNHELMGAVYLQVSIISQALIFVTRSRSWSFVERPGALLMIAFLIAQLIATLIAVYANWEFAKIRGIGWGWAGVIWLYSIVTYFPLDVFKFAIRYILSGKAWLNLFENKTAFTMKKDYGKEEREAQWALAQRTLHGLQPKEAVNIFPEKGSYRELSEIAEQAKRRAEIARLRELHTLKGHVESVVKLKGLDIETPSHYTV,"[(329, 329), (569, 569), (586, 587), (589, 591), (593, 597), (605, 605), (607, 609), (612, 612)]" +[Co+2],[H+],MVKSLQLAHQLKDKKILLIGGGEVGLTRLYKLIPTGCKLTLVSPDLHKSIIPKFGKFIQNEDQPDYREDAKRFINPNWDPTKNEIYEYIRSDFKDEYLDLEDENDAWYIIMTCIPDHPESARIYHLCKERFGKQQLVNVADKPDLCDFYFGANLEIGDRLQILISTNGLSPRFGALVRDEIRNLFTQMGDLALEDAVVKLGELRRGIRLLAPDDKDVKYRMDWARRCTDLFGIQHCHNIDVKRLLDLFKVMFQEQNCSLQFPPRERLLSEYCSS,"[(17, 17), (19, 22), (25, 28), (41, 42), (46, 47), (88, 92), (94, 95), (97, 98), (113, 113), (119, 119), (122, 123), (126, 126), (140, 141)]" +O,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS,MDIYMSRYEEITQQLIFSPKTWLITGVAGFIGSNLLEKLLKLNQVVIGLDNFSTGHQYNLDEVKTLVSTEQWSRFCFIEGDIRDLTTCEQVMKGVDHVLHQAALGSVPRSIVDPITTNATNITGFLNILHAAKNAQVQSFTYAASSSTYGDHPALPKVEENIGNPLSPYAVTKYVNEIYAQVYARTYGFKTIGLRYFNVFGRRQDPNGAYAAVIPKWTAAMLKGDDVYINGDGETSRDFCYIDNVIQMNILSALAKDSAKDNIYNVAVGDRTTLNELSGYIYDELNLIHHIDKLSIKYREFRSGDVRHSQADVTKAIDLLKYRPNIKIREGLRLSMPWYVRFLKG,"[(21, 21), (23, 25), (32, 35), (47, 49), (56, 60), (74, 74), (78, 80), (83, 87), (98, 100), (104, 107), (113, 119), (121, 125), (128, 128), (139, 140), (165, 168), (170, 172), (174, 178), (193, 193), (196, 198), (200, 202), (207, 207), (238, 240), (242, 243), (246, 246), (332, 332), (335, 336), (339, 339)]" +N[C@@H](CCC(=O)N[C@@H](CS)C(=O)NCC(=O)O)C(=O)O,Cl,MASPPCTTEELSPPPGGSLVEYSGGSLRVPDNPVVAFIRGDGVGPEVVESALKVVDAAVKKVYGGSRRIVWWELLAGHLAREKCGELLPKATLEGIRLARVALKGPLETPVGTGYRSLNVAIRQALDLYANIRPVRYYGQPAPHKYADRVDMVIFRENTEDVYAGIEWPHDSPEAARIRRFLAEEFGISIREDAGIGVKPISRFATRRLMERALEWALRNGNTVVTIMHKGNIMKYTEGAFMRWAYEVALEKFREHVVTEQEVQEKYGGVRPEGKILVNDRIADNMLQQIITRPWDYQVIVAPNLNGDYISDAASALVGGIGMAAGMNMGDGIAVAEPVHGTAPKYAGKDLINPSAEILSASLLIGEFMGWREVKSIVEYAIRKAVQSKKVTQDLARHMPGVQPLRTSEYTETLIAYIDEADLNEVLAGKRG,"[(41, 47), (49, 51), (103, 106), (108, 108), (110, 112), (115, 116), (118, 118), (120, 122), (124, 128), (131, 132), (134, 138), (154, 155), (157, 162), (164, 167), (203, 205), (208, 208), (212, 213), (219, 219), (228, 229), (231, 231), (233, 235), (241, 242), (244, 245), (248, 249), (263, 263), (281, 283), (285, 287), (290, 293), (300, 304), (306, 307), (309, 313), (315, 315), (325, 333), (335, 336), (338, 339), (347, 352), (354, 356), (359, 360), (363, 363), (367, 367), (371, 371), (388, 389), (392, 393), (395, 396), (398, 402), (405, 411), (413, 413)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,C1=C(N(C=N1)C2[C@@H]([C@@H]([C@H](O2)COP(=O)(O)O)O)O)N,MASQSSVAVISSAAARGESFPDSKKPIGSVRFQQPLRLSFSYCKSGNMSSRICAMAKPNDAETLSSSVDMSLSPRVQSLKPSKTMVITDLAATLVQSGVPVIRLAAGEPDFDTPKVVAEAGINAIREGFTRYTLNAGITELREAICRKLKEENGLSYAPDQILVSNGAKQSLLQAVLAVCSPGDEVIIPAPYWVSYTEQARLADATPVVIPTKISNNFLLDPKDLESKLTEKSRLLILCSPSNPTGSVYPKSLLEEIARIIAKHPRLLVLSDEIYEHIIYAPATHTSFASLPDMYERTLTVNGFSKAFAMTGWRLGYLAGPKHIVAACSKLQGQVSSGASSIAQKAGVAALGLGKAGGETVAEMVKAYRERRDFLVKSLGDIKGVKISEPQGAFYLFIDFSAYYGSEAEGFGLINDSSSLALYFLDKFQVAMVPGDAFGDDSCIRISYATSLDVLQAAVEKIRKALEPLRATVSV,"[(84, 84), (104, 106), (108, 109), (169, 169), (172, 172), (191, 192), (194, 197), (199, 199), (239, 239), (241, 242), (244, 246), (275, 275), (306, 306), (394, 397), (432, 435), (438, 438), (443, 444), (446, 447), (449, 449)]" +CC(=CCC/C(=C/CC/C(=C/COP(=O)(O)OP(=O)(O)O)/C)/C)C,O=P([O-])([O-])OP(=O)([O-])[O-],MLEEYRKHVAERAAEGIAPKPLDANQMAALVELLKNPPAGEEEFLLDLLTNRVPPGVDEAAYVKAGFLAAIAKGEAKSPLLTPEKAIELLGTMQGGYNIHPLIDALDDAKLAPIAAKALSHTLLMFDNFYDVEEKAKAGNEYAKQVMQSWADAEWFLNRPALAEKLTVTVFKVTGETNTDDLSPAPDAWSRPDIPLHALAMLKNAREGIEPDQPGVVGPIKQIEALQQKGFPLAYVGDVVGTGSSRKSATNSVLWFMGDDIPHVPNKRGGGLCLGGKIAPIFFNTMEDAGALPIEVDVSNLNMGDVIDVYPYKGEVRNHETGELLATFELKTDVLIDEVRAGGRIPLIIGRGLTTKAREALGLPHSDVFRQAKDVAESDRGFSLAQKMVGRACGVKGIRPGAYCEPKMTSVGSQDTTGPMTRDELKDLACLGFSADLVMQSFCHTAAYPKPVDVNTHHTLPDFIMNRGGVSLRPGDGVIHSWLNRMLLPDTVGTGGDSHTRFPIGISFPAGSGLVAFAAATGVMPLDMPESVLVRFKGKMQPGITLRDLVHAIPLYAIKQGLLTVEKKGKKNIFSGRILEIEGLPDLKVEQAFELTDASAERSAAGCTIKLNKEPIIEYLNSNIVLLKWMIAEGYGDRRTLERRIQGMEKWLANPELLEADADAEYAAVIDIDLADIKEPILCAPNDPDDARPLSAVQGEKIDEVFIGSCMTNIGHFRAAGKLLDAHKGQLPTRLWVAPPTRMDAAQLTEEGYYSVFGKSGARIEIPGCSLCMGNQARVADGATVVSTSTRNFPNRLGTGANVFLASAELAAVAALIGKLPTPEEYQTYVAQVDKTAVDTYRYLNFNQLSQYTEKADGVIFQTAV,"[(443, 444), (478, 478), (480, 481), (499, 499), (593, 593), (597, 597), (706, 706), (708, 709), (711, 712), (740, 740), (767, 768), (770, 771), (773, 774), (789, 790), (796, 797)]" +O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O,MSGKPVLHYFNARGRMECIRWLLAAAGVEFEEKLIQSPEDLEKLKKDGNLMFDQVPMVEIDGMKLAQTRAILNYIATKYDLYGKDMKERALIDMYSEGILDLTEMIIQLVICPPDQREAKTALAKDRTKNRYLPAFEKVLKSHGQDYLVGNRLTRVDIHLLELLLYVEEFDASLLTSFPLLKAFKSRISSLPNVKKFLQPGSQRKPAMDAKQIEEARKVFKF,"[(7, 8), (10, 16), (19, 20), (33, 35), (41, 44), (46, 50), (52, 53), (56, 57), (65, 66), (69, 73)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MAKYTREDIEKLVKEENVKYIRLQFTDILGTIKNVEIPVSQLGKALDNKVMFDGSSIEGFVRIEESDMYLYPDLNTFVIFPWTAEKGKVARFICDIYNPDGTPFEGDPRNNLKRILKEMEDLGFSDFNLGPEPEFFLFKLDEKGEPTLELNDKGGYFDLAPTDLGENCRRDIVLELEEMGFEIEASHHEVAPGQHEIDFKYAGAVRSCDDIQTFKLVVKTIARKHGLHATFMPKPLFGVNGSGMHCNLSLFKNGVNAFFDENADLQLSETAKHFIAGIVKHATSFTAVTNPTVNSYKRLVPGYEAPCYVAWSAQNRSPLIRIPASRGISTRVEVRSVDPAANPYLALSVLLAAGLDGIKNKLEAPAPIDRNIYVMSKEERMENGIVDLPATLAEALEEFKSNEVMVKALGEHLFEHFIEAKEIEWDMFRTQVHPWEREQYMSQY,"[(126, 131), (133, 133), (135, 136), (154, 157), (169, 169), (182, 183), (185, 188), (190, 191), (194, 195), (197, 201), (214, 214), (230, 231), (233, 235), (238, 239), (242, 244), (246, 248), (250, 251), (256, 258), (290, 290), (294, 297), (299, 300), (302, 303), (305, 306), (311, 315), (317, 320), (322, 324), (328, 328), (330, 332), (334, 334), (336, 339), (373, 373)]" +N,N[C@@H](Cc1cccc(F)c1)C(=O)O,MENGNGATTNGHVNGNGMDFCMKTEDPLYWGIAAEAMTGSHLDEVKKMVAEYRKPVVKLGGETLTISQVAAISARDGSGVTVELSEAARAGVKASSDWVMDSMNKGTDSYGVTTGFGATSHRRTKQGGALQKELIRFLNAGIFGNGSDNTLPHSATRAAMLVRINTLLQGYSGIRFEILEAITKFLNQNITPCLPLRGTITASGDLVPLSYIAGLLTGRPNSKAVGPTGVILSPEEAFKLAGVEGGFFELQPKEGLALVNGTAVGSGMASMVLFEANILAVLAEVMSAIFAEVMQGKPEFTDHLTHKLKHHPGQIEAAAIMEHILDGSAYVKAAQKLHEMDPLQKPKQDRYALRTSPQWLGPQIEVIRSSTKMIEREINSVNDNPLIDVSRNKAIHGGNFQGTPIGVSMDNTRLAIAAIGKLMFAQFSELVNDFYNNGLPSNLSGGRNPSLDYGFKGAEIAMASYCSELQFLANPVTNHVQSAEQHNQDVNSLGLISSRKTSEAVEILKLMSTTFLVGLCQAIDLRHLEENLKSTVKNTVSSVAKRVLTMGVNGELHPSRFCEKDLLRVVDREYIFAYIDDPCSATYPLMQKLRQTLVEHALKNGDNERNLSTSIFQKIATFEDELKALLPKEVESARAALESGNPAIPNRIEECRSYPLYKFVRKELGTEYLTGEKVTSPGEEFEKVFIAMSKGEIIDPLLECLESWNGAPLPIC,"[(163, 163), (205, 206), (209, 209), (253, 253), (255, 259), (261, 262), (297, 297), (300, 300), (344, 347), (349, 353), (355, 358), (382, 383), (385, 386), (395, 397), (400, 400), (442, 442), (453, 455), (457, 460), (482, 483), (485, 486), (488, 489)]" +C(CC(=O)[O-])C(CC(=O)C(=O)[O-])O,O=CCCC(=O)[O-],MENSFKAALKAGRPQIGLWLGLSSSYSAELLAGAGFDWLLIDGEHAPNNVQTVLTQLQAIAPYPSQPVVRPSWNDPVQIKQLLDVGTQTLLVPMVQNADEAREAVRATRYPPAGIRGVGSALARASRWNRIPDYLQKANDQMCVLVQIETREAMKNLPQILDVEGVDGVFIGPADLSADMGYAGNPQHPEVQAAIEQAIVQIRESGKAPGILIANEQLAKRYLELGALFVAVGVDTTLLARAAEALAARFGAQATAVKPGVY,"[(93, 94), (96, 96), (147, 148), (150, 151), (153, 153), (172, 174), (176, 179)]" +O=[N+]([O-])c1ccc(O)cc1,C1=CC(=CC=C1[N+](=O)[O-])O[C@H]2[C@@H]([C@H]([C@@H]([C@H](O2)C(=O)O)O)O)O,MKQSHFFAHLSRLKLINRWPLMRNVRTENVSEHSLQVAMVAHALAAIKNRKFGGNVNAERIALLAMYHDASEVLTGDLPTPVKYFNSQIAQEYKAIEKIAQQKLVDMVPEELRDIFAPLIDEHAYSDEEKSLVKQADALCAYLKCLEELAAGNNEFLLAKTRLEATLEARRSQEMDYFMEIFVPSFHLSLDEISQDSPL,"[(13, 13), (18, 18), (28, 32), (34, 38), (64, 65), (67, 67), (70, 73), (119, 119), (122, 122), (125, 125), (133, 136), (138, 142)]" +C[C@@H](C(=O)N[C@@H](CC(=O)O)C(=O)O)N,C[C@@H](C(=O)N[C@H](CC(=O)[O-])C(=O)[O-])[NH3+],MKIIRIETSRIAVPLTKPFKTALRTVYTAESVIVRITYDSGAVGWGEAPPTLVITGDSMDSIESAIHHVLKPALLGKSLAGYEAILHDIQHLLTGNMSAKAAVEMALYDGWAQMCGLPLYQMLGGYRDTLETDYTVSVNSPEEMAADAENYLKQGFQTLKIKVGKDDIATDIARIQEIRKRVGSAVKLRLDANQGWRPKEAVTAIRKMEDAGLGIELVEQPVHKDDLAGLKKVTDATDTPIMADESVFTPRQAFEVLQTRSADLINIKLMKAGGISGAEKINAMAEACGVECMVGSMIETKLGITAAAHFAASKRNITRFDFDAPLMLKTDVFNGGITYSGSTISMPGKPGLGIIGAALLKGEKEQ,"[(160, 164), (189, 190), (192, 193), (217, 218), (220, 221), (242, 243), (245, 248), (266, 266), (268, 268), (271, 271), (293, 293), (321, 321)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,CC(C)(N)CO,MEFSEWYSDILEKAEIYDVRYPIKGCGVYLPYGFKIRRYTFEIIRNLLDESGHDEALFPMLIPEDLLAKEAEHIKGFEDEVYWVTHGGKTQLDVKLALRPTSETPIYYMMKLWVKVHTDLPIKIYQIVNTFRYETKHTRPLIRLREIMTFKEAHTAHSTKEEAENQVKEAISIYKKFFDTLGIPYLISKRPEWDKFPGAEYTMAFDTIFPDGRTMQIATVHNLGQNFSKTFEIIFETPTGDKDYAYQTCYGISDRVIASIIAIHGDEKGLILPPIVAPIQVVIVPLIFKGKEDIVMEKAKEIYEKLKGKFRVHIDDRDIRPGRKFNDWEIKGVPLRIEVGPKDIENKKITLFRRDTMEKFQVDETQLMEVVEKTLNNIMENIKNRAWEKFENFITILEDINPDEIKNILSEKRGVILVPFKEEIYNEELEEKVEATILGETEYKGNKYIAIAKTY,"[(41, 41), (70, 70), (77, 77), (80, 80), (82, 82), (99, 100), (102, 102), (104, 108), (130, 131), (133, 133), (135, 137), (141, 145), (147, 147), (149, 150), (152, 152), (154, 154), (167, 167), (174, 174), (190, 190), (196, 197), (199, 199), (201, 207), (209, 209), (214, 215), (217, 218), (220, 220), (222, 223), (227, 227), (231, 231), (248, 252), (254, 254), (256, 260), (455, 455)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,O=P([O-])([O-])[O-],MVRIIVKNVSKVFKKGKVVALDNVNINIENGERFGILGPSGAGKTTFMRIIAGLDVPSTGELYFDDRLVASNGKLIVPPEDRKIGMVFQTWALYPNLTAFENIAFPLTNMKMSKEEIRKRVEEVAKILDIHHVLNHFPRELSGGQQQRVALARALVKDPSLLLLDEPFSNLDARMRDSARALVKEVQSRLGVTLLVVSHDPADIFAIADRVGVLVKGKLVQVGKPEDLYDNPVSIQVASLIGEINELEGKVTNEGVVIGSLRFPVSVSSDRAIIGIRPEDVKLSKDVIKDDSWILVGKGKVKVIGYQGGLFRITITPLDSEEEIFTYSDHPIHSGEEVLVYVRKDKIKVFEKN,"[(11, 11), (13, 13), (20, 21), (36, 39), (47, 50), (55, 55), (87, 88), (90, 92), (164, 165), (167, 170), (197, 200), (214, 214)]" +C1[C@H](C([C@@H](CC1(C(=O)O)O)O)O)O,O=C1C[C@@](O)(C(=O)O)C[C@@H](O)[C@@H]1O,MAGQHLPVPRLEGVSREQFMQHLYPQRKPLVLEGIDLGPCTSKWTVDYLSQVGGKKEVKIHVAAVAQMDFISKNFVYRTLPFDQLVQRAAEEKHKEFFVSEDEKYYLRSLGEDPRKDVADIRKQFPLLKGDIKFPEFFKEEQFFSSVFRISSPGLQLWTHYDVMDNLLIQVTGKKRVVLFSPRDAQYLYLKGTKSEVLNIDNPDLAKYPLFSKARRYECSLEAGDVLFIPALWFHNVISEEFGVGVNIFWKHLPSECYDKTDTYGNKDPTAASRAAQILDRALKTLAELPEEYRDFYARRMVLHIQDKAYSKNSE,"[(59, 61), (68, 68), (85, 85), (104, 105), (107, 108), (149, 151), (155, 155), (157, 159), (161, 161), (163, 165), (167, 168), (170, 170), (173, 174), (176, 180), (193, 194), (219, 221), (227, 229), (231, 234), (236, 240), (243, 245), (247, 250), (264, 266)]" +C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CCC2=O,NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,MARTVVLITGCSSGIGLHLAVRLASDPSQSFKVYATLRDLKTQGRLWEAARALACPPGSLETLQLDVRDSKSVAAARERVTEGRVDVLVCNAGLGLLGPLEALGEDAVASVLDVNVVGTVRMLQAFLPDMKRRGSGRVLVTGSVGGLMGLPFNDVYCASKFALEGLCESLAVLLLPFGVHLSLIECGPVHTAFMEKVLGSPEEVLDRTDIHTFHRFYQYLAHSKQVFREAAQNPEEVAEVFLTALRAPKPTLRYFTTERFLPLLRMRLDDPSGSNYVTAMHREVFGDVPAKAEAGAEAGGGAGPGAEDEAGRGAVGDPELGDPPAAPQ,"[(1, 9), (39, 42), (44, 45), (48, 49), (52, 55), (57, 65), (67, 68), (72, 72), (78, 81), (88, 88), (90, 91), (112, 113), (116, 117), (120, 121), (140, 140), (144, 148), (155, 159), (161, 164), (185, 185), (188, 188), (190, 190), (233, 234), (236, 238), (240, 241), (244, 245), (266, 266)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MSPRVGVTLSGRYRLQRLIATGGMGQVWEAVDNRLGRRVAVKVLKSEFSSDPEFIERFRAEARTTAMLNHPGIASVHDYGESQMNGEGRTAYLVMELVNGEPLNSVLKRTGRLSLRHALDMLEQTGRALQIAHAAGLVHRDVKPGNILITPTGQVKITDFGIAKAVDAAPVTQTGMVMGTAQYIAPEQALGHDASPASDVYSLGVVGYEAVSGKRPFAGDGALTVAMKHIKEPPPPLPPDLPPNVRELIEITLVKNPAMRYRSGGPFADAVAAVRAGRRPPRPSQTPPPGRAAPAAIPSGTTARVAANSAGRTAASRRSRPATGGHRPPRRTFSSGQRALLWAAGVLGALAIIIAVLLVIKAPGDNSPQQAPTPTVTTTGNPPASNTGGTDASPRLNWTERGETRHSGLQSWVVPPTPHSRASLARYEIAQ,"[(3, 3), (17, 18), (28, 29), (40, 41), (43, 45), (48, 48), (54, 54), (58, 58), (91, 95), (97, 97), (159, 159), (162, 163), (166, 166)]" +O,C[C@H](N)C(=O)O,MDIMNEKVKKIIEFMDKNSIDAVLIAKNPNVYYISGASPLAGGYILITGESATLYVPELEYEMAKEESNIPVEKFKKMDEFYKALEGIKSLGIESSLPYGFIEELKKKANIKEFKKVDDVIRDMRIIKSEKEIKIIEKACEIADKAVMAAIEEITEGKKEREVAAKVEYLMKMNGAEKPAFDTIIASGYRSALPHGVASDKRIERGDLVVIDLGALYQHYNSDITRTIVVGSPNEKQKEIYEIVLEAQKKAVESAKPGITAKELDSIARNIIAEYGYGEYFNHSLGHGVGLEVHEWPRVSQYDETVLREGMVITIEPGIYIPKIGGVRIEDTILITKNGSKRLTKTERELI,"[(140, 140), (181, 184), (210, 211), (213, 214), (221, 222), (224, 226), (248, 248), (281, 281), (283, 286), (288, 289), (293, 295), (297, 299), (312, 315), (317, 318), (320, 320), (328, 329), (331, 332), (344, 344)]" +Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,O=P([O-])([O-])OP(=O)([O-])[O-],MRYDVVIAGAGPTGLMLACELRLAGARTLVLERLAEPVDFSKALGVHARTVELLDMRGLGEGFQAEAPKLRGGNFASLGVPLDFSSFDTRHPYALFVPQVRTEELLTGRALELGAELRRGHAVTALEQDADGVTVSVTGPEGPYEVECAYLVGCDGGGSTVRKLLGIDFPGQDPHMFAVIADARFREELPHGEGMGPMRPYGVMRHDLRAWFAAFPLEPDVYRATVAFFDRPYADRRAPVTEEDVRAALTEVAGSDFGMHDVRWLSRLTDTSRQAERYRDGRVLLAGDACHIHLPAGGQGLNLGFQDAVNLGWKLGATIAGTAPPELLDTYEAERRPIAAGVLRNTRAQAVLIDPDPRYEGLRELMIELLHVPETNRYLAGLISALDVRYPMAGEHPLLGRRVPDLPLVTEDGTRQLSTYFHAARGVLLTLGCDQPLADEAAAWKDRVDLVAAEGVADPGSAVDGLTALLVRPDGYICWTAAPETGTDGLTDALRTWFGPPAM,"[(8, 12), (14, 17), (30, 31), (34, 35), (40, 43), (45, 47), (50, 50), (95, 98), (100, 104), (117, 122), (124, 125), (135, 137), (151, 151), (154, 159), (161, 164), (169, 169), (215, 215), (217, 217), (223, 223), (286, 287), (289, 292), (298, 300), (303, 306), (308, 308), (335, 335)]" +O=O,O=Cc1ccccc1,MSFGALRQLLLIACLALPSLAATNLPTADFDYVVVGAGNAGNVVAARLTEDPDVSVLVLEAGVSDENVLGAEAPLLAPGLVPNSIFDWNYTTTAQAGYNGRSIAYPRGRMLGGSSSVHYMVMMRGSTEDFDRYAAVTGDEGWNWDNIQQFVRKNEMVVPPADNHNTSGEFIPAVHGTNGSVSISLPGFPTPLDDRVLATTQEQSEEFFFNPDMGTGHPLGISWSIASVGNGQRSSSSTAYLRPAQSRPNLSVLINAQVTKLVNSGTTNGLPAFRCVEYAEQEGAPTTTVCAKKEVVLSAGSVGTPILLQLSGIGDENDLSSVGIDTIVNNPSVGRNLSDHLLLPAAFFVNSNQTFDNIFRDSSEFNVDLDQWTNTRTGPLTALIANHLAWLRLPSNSSIFQTFPDPAAGPNSAHWETIFSNQWFHPAIPRPDTGSFMSVTNALISPVARGDIKLATSNPFDKPLINPQYLSTEFDIFTMIQAVKSNLRFLSGQAWADFVIRPFDPRLRDPTDDAAIESYIRDNANTIFHPVGTASMSPRGASWGVVDPDLKVKGVDGLRIVDGSILPFAPNAHTQGPIYLVGKQGADLIKADQ,"[(11, 12), (20, 21), (23, 23), (34, 34), (37, 38), (41, 42), (45, 45), (54, 59), (62, 63), (78, 82), (85, 87), (89, 91), (103, 104), (112, 113), (115, 117), (119, 119), (122, 125), (147, 147), (187, 187), (206, 206), (208, 210), (215, 215), (230, 230), (243, 244), (246, 246), (251, 253), (256, 257), (259, 260), (286, 286), (301, 301), (435, 435), (525, 527), (529, 531), (548, 548), (552, 553), (557, 562), (564, 566)]" +C([C@H](C(=O)[O-])O)O,[H+],MTWKNFGFEIFGEKYGQEELEKRIKDEHTPPPDSPVFGGLKLKLKKEKFKTLFTLGTTLKGFRRATHTVGTGGIGEITIVNDPKFPEHEFFTAGRTFPARLRHANLKYPDDAGADARSFSIKFADSDSDGPLDIVMNTGEANIFWNSPSLEDFVPVEEGDAAEEYVYKNPYYYYNLVEALRRAPDTFAHLYYYSQVTMPFKAKDGKVRYCRYRALPGDVDIKEEDESGRLTEEEQRKIWIFSRHENEKRPDDYLRKEYVERLQKGPVNYRLQIQIHEASPDDTATIFHAGILWDKETHPWFDLAKVSIKTPLSPDVLEKTAFNIANQPASLGLLEAKSPEDYNSIGELRVAVYTWVQHLRKLKIGSLVPAGQNAIYNVEVETGDREHAGTDATITIRITGAKGRTDYLKLDKWFHNDFEAGSKEQYTVQGFDVGDIQLIELHSDGGGYWSGDPDWFVNRVIIISSTQDRVYSFPCFRWVIKDMVLFPGEATLPFNEVPAIVSEQRQKELEQRKLTYQWDYVSDDMPGNIKAKTHDDLPRDVQFTDEKSRSYQESRKAALVNLGIGSLFTMFENWDSYDDYHILYRNWILGGTPNMADRWHEDRWFGYQFLNGANPVILTRCDALPSNFPVTNEHVNASLDRGKNLDEEIKDGHIYIVDFKVLVGAKSYGGPVLEDIGYKVPDHLKHDEADIRYCAAPLALFYVNKLGHLMPIAIQINQEPGPENPIWTPHEENEHDWMMAKFWLGVAESNFHQLNTHLLRTHLTTESFALSTWRNLASAHPVFKLLQPHIYGVLAIDTIGRKELIGSGGIVDQSLSLGGGGHVTFMEKCFKEVNLQDYHLPNALKKRGVDDPSKLPGFYYRDDGLALWEAIETFIGEIIAIFYKNDDDVKRDNEIQSWIYDVHKNGWRVNPGHQDHGVPASFESREQLKEVLTSLVFTFSCQHAAVNFSQKDHYGFTPNAPAVLRHPPPKKKGEATLQSILSTLPSKSQAAKAIATVYILTKFSEDERYLGNYSATAWEDKDALDAINRFQDKLEDISKKIKQRNENLEVPYIYLLPERIPNGTAI,"[(382, 383), (385, 386), (388, 388), (392, 394), (414, 414), (418, 418), (420, 422), (424, 424), (446, 446), (449, 451), (453, 453), (455, 456), (478, 480), (539, 539), (543, 543), (545, 545), (551, 551), (603, 603), (752, 756), (758, 761), (763, 766), (793, 793), (796, 797), (859, 859), (937, 942), (944, 946), (948, 951), (999, 1002), (1047, 1050), (1063, 1065)]" +C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OS(=O)(=O)O)OP(=O)(O)O)O)N,C1=CC=C(C=C1)OS(=O)(=O)[O-],MALTSDLGKQIKLKEVEGTLLQPATVDNWSQIQSFEAKPDDLLICTYPKAGTTWIQEIVDMIEQNGDVEKCQRAIIQHRHPFIEWARPPQPSGVEKAKAMPSPRILKTHLSTQLLPPSFWENNCKFLYVARNAKDCMVSYYHFQRMNHMLPDPGTWEEYFETFINGKVVWGSWFDHVKGWWEMKDRHQILFLFYEDIKRDPKHEIRKVMQFMGKKVDETVLDKIVQETSFEKMKENPMTNRSTVSKSILDQSISSFMRKGTVGDWKNHFTVAQNERFDEIYRRKMEGTSINFCMEL,"[(44, 44), (46, 48), (55, 58), (61, 61), (107, 107), (109, 109), (129, 130), (132, 133), (135, 138), (140, 143), (146, 146), (173, 173), (192, 193), (195, 198), (205, 205), (223, 227), (234, 236), (261, 262), (264, 264), (268, 268), (292, 292)]" +C(=O)(O)[O-],O=P([O-])([O-])[O-],MASSAQDGNNPLFSPYKMGKFNLSHRVVLAPMTRCRALNNIPQAALGEYYEQRATAGGFLITEGTMISPTSAGFPHVPGIFTKEQVREWKKIVDVVHAKGAVIFCQLWHVGRASHEVYQPAGAAPISSTEKPISNRWRILMPDGTHGIYPKPRAIGTYEISQVVEDYRRSALNAIEAGFDGIEIHGAHGYLIDQFLKDGINDRTDEYGGSLANRCKFITQVVQAVVSAIGADRVGVRVSPAIDHLDAMDSNPLSLGLAVVERLNKIQLHSGSKLAYLHVTQPRYVAYGQTEAGRLGSEEEEARLMRTLRNAYQGTFICSGGYTRELGIEAVAQGDADLVSYGRLFISNPDLVMRIKLNAPLNKYNRKTFYTQDPVVGYTDYPFLQGNGSNGPLSRL,"[(29, 30), (34, 35), (49, 50), (61, 63), (65, 66), (74, 74), (77, 78), (104, 105), (107, 108), (183, 188), (235, 236), (238, 239), (278, 280), (283, 284), (319, 320), (322, 323), (326, 326), (340, 341), (344, 347), (364, 364), (366, 366), (369, 370)]" +[H+],NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,MAPKRSSDLFSQVVNSGPGSFLARQLGVPQPETLRRYRAGEPPLTGSLLIGGAGRVVEPLRAALEKDYDLVGNNLGGRWADSFGGLVFDATGITEPAGLKGLHEFFTPVLRNLGRCGRVVVVGGTPEAAASTNERIAQRALEGFTRSLGKELRRGATTALVYLSPDAKPAATGLESTMRFLLSAKSAYVDGQVFSVGADDSTPPADWEKPLDGKVAIVTGAARGIGATIAEVFARDGAHVVAIDVESAAENLAETASKVGGTALWLDVTADDAVDKISEHLRDHHGGKADILVNNAGITRDKLLANMDDARWDAVLAVNLLAPLRLTEGLVGNGSIGEGGRVIGLSSIAGIAGNRGQTNYATTKAGMIGITQALAPGLAAKGITINAVAPGFIETQMTAAIPLATREVGRRLNSLLQGGQPVDVAEAIAYFASPASNAVTGNVIRVCGQAMIGA,"[(188, 188), (218, 222), (224, 224), (226, 229), (242, 243), (245, 246), (248, 248), (251, 252), (255, 255), (264, 266), (269, 270), (273, 273), (293, 294), (296, 296), (298, 299), (301, 301), (315, 315), (318, 319), (322, 322), (326, 326), (345, 350), (357, 359), (361, 363), (365, 368), (388, 389), (391, 392), (394, 394), (419, 421), (424, 424), (446, 447)]" +O,O=P([O-])([O-])[O-],MQKSPLEKASFISKLFFSWTTPILRKGYRHHLELSDIYQAPSADSADHLSEKLEREWDREQASKKNPQLIHALRRCFFWRFLFYGILLYLGEVTKAVQPVLLGRIIASYDPENKVERSIAIYLGIGLCLLFIVRTLLLHPAIFGLHRIGMQMRTAMFSLIYKKTLKLSSRVLDKISIGQLVSLLSNNLNKFDEGLALAHFIWIAPLQVTLLMGLLWDLLQFSAFCGLGLLIILVIFQAILGKMMVKYRDQRAAKINERLVITSEIIDNIYSVKAYCWESAMEKMIENLREVELKMTRKAAYMRFFTSSAFFFSGFFVVFLSVLPYTVINGIVLRKIFTTISFCIVLRMSVTRQFPTAVQIWYDSFGMIRKIQDFLQKQEYKVLEYNLMTTGIIMENVTAFWEEGFGELLEKVQQSNGDRKHSSDENNVSFSHLCLVGNPVLKNINLNIEKGEMLAITGSTGSGKTSLLMLILGELEASEGIIKHSGRVSFCSQFSWIMPGTIKENIIFGVSYDEYRYKSVVKACQLQQDITKFAEQDNTVLGEGGVTLSGGQRARISLARAVYKDADLYLLDSPFGYLDVFTEEQVFESCVCKLMANKTRILVTSKMEHLRKADKILILHQGSSYFYGTFSELQSLRPDFSSKLMGYDTFDQFTEERRSSILTETLRRFSVDDSSAPWSKPKQSFRQTGEVGEKRKNSILNSFSSVRKISIVQKTPLCIDGESDDLQEKRLSLVPDSEQGEAALPRSNMIATGPTFPGRRRQSVLDLMTFTPNSGSSNLQRTRTSIRKISLVPQISLNEVDVYSRRLSQDSTLNITEEINEEDLKECFLDDVIKIPPVTTWNTYLRYFTLHKGLLLVLIWCVLVFLVEVAASLFVLWLLKNNPVNSGNNGTKISNSSYVVIITSTSFYYIFYIYVGVADTLLALSLFRGLPLVHTLITASKILHRKMLHSILHAPMSTISKLKAGGILNRFSKDIAILDDFLPLTIFDFIQLVFIVIGAIIVVSALQPYIFLATVPGLVVFILLRAYFLHTAQQLKQLESEGRSPIFTHLVTSLKGLWTLRAFRRQTYFETLFHKALNLHTANWFMYLATLRWFQMRIDMIFVLFFIVVTFISILTTGEGEGTAGIILTLAMNIMSTLQWAVNSSIDTDSLMRSVSRVFKFIDIQTEESMYTQIIKELPREGSSDVLVIKNEHVKKSDIWPSGGEMVVKDLTVKYMDDGNAVLENISFSISPGQRVGLLGRTGSGKSTLLSAFLRMLNIKGDIEIDGVSWNSVTLQEWRKAFGVITQKVFIFSGTFRQNLDPNGKWKDEEIWKVADEVGLKSVIEQFPGQLNFTLVDGGYVLSHGHKQLMCLARSVLSKAKIILLDEPSAHLDPITYQVIRRVLKQAFAGCTVILCEHRIEAMLDCQRFLVIEESNVWQYDSLQALLSEKSIFQQAISSSEKMRFFQGRHSSKHKPRTQITALKEETEEEVQETRL,"[(399, 400), (402, 403), (405, 406), (409, 409), (412, 413), (430, 430), (433, 434), (438, 441), (456, 457), (466, 469), (490, 492), (494, 495), (572, 573), (577, 577), (603, 604), (618, 623), (659, 659), (662, 663), (666, 666)]" +O,CC(C)(CO)[C@H](C(=O)NCCC(=O)O)O,MSTLANLTEVLFRLDFDPDTAVYHYRGQTLSRLQCRTYILSQASQLARLLKPGDRVVLALNDSPSLACLFLACIAVGAIPAVINPKSREQALADIAADCQASLVVREADAPSLSGPLAPLTLRAAAGRPLLDDFSLDALVGPADLDWSAFHRQDPAAACFLQYTSGSTGAPKGVMHSLRNTLGFCRAFATELLALQAGDRLYSIPKMFFGYGMGNSLFFPWFSGASALLDDTWPSPERVLENLVAFRPRVLFGVPAIYASLRPQARELLSSVRLAFSAGSPLPRGEFEFWAAHGLEICDGIGATEVGHVFLANRPGQARADSTGLPLPGYECRLVDREGHTIEEAGRQGVLLVRGPGLSPGYWRASEEQQARFAGGWYRTGDLFERDESGAYRHCGREDDLFKVNGRWVVPTQVEQAICRHLPEVSEAVLVPTCRLHDGLRPTLFVTLATPLDDNQILLAQRIDQHLAEQIPSHMLPSQLHVLPALPRNDNGKLARAELRHLADTLYHDNLPEERAC,"[(55, 55), (57, 57), (70, 70), (79, 83), (91, 91), (94, 95), (98, 99), (158, 160), (173, 176), (184, 184), (208, 210), (213, 214), (218, 218), (304, 305), (307, 307), (362, 365), (369, 369), (372, 372), (378, 378)]" +[C@H](C(=O)O)(N)NC(=O)N,N,MRSLYLIVFIVISLVKASKSDDGFCSAPSIVESDEKTNPIYWKATNPTLSPSHLQDLPGFTRSVYKRDHALITPESHVYSPLPDWTNTLGAYLITPATGSHFVMYLAKMKEMSSSGLPPQDIERLIFVVEGAVTLTNTSSSSKKLTVDSYAYLPPNFHHSLDCVESATLVVFERRYEYLGSHTTELIVGSTDKQPLLETPGEVFELRKLLPMSVAYDFNIHTMDFQPGEFLNVKEVHYNQHGLLLLEGQGIYRLGDNWYPVQAGDVIWMAPFVPQWYAALGKTRSRYLLYKDVNRNPL,"[(94, 94), (102, 102), (233, 234), (236, 236), (238, 240), (242, 243), (252, 254), (267, 274), (276, 277), (289, 291)]" +C([C@@H]1[C@H]([C@@H]([C@@H]([C@H](O1)OP(=O)(O)O)O)O)O)O,C([C@@H]1[C@H]([C@@H]([C@@H]([C@H](O1)O)O)O)O)OP(=O)(O)O,MAVTAQAARRKERVLCLFDVDGTLTPARQKIDPEVAAFLQKLRSRVQIGVVGGSDYCKIAEQLGDGDEVIEKFDYVFAENGTVQYKHGRLLSKQTIQNHLGEELLQDLINFCLSYMALLRLPKKRGTFIEFRNGMLNISPIGRSCTLEERIEFSELDKKEKIREKFVEALKTEFAGKGLRFSRGGMISFDVFPEGWDKRYCLDSLDQDSFDTIHFFGNETSPGGNDFEIFADPRTVGHSVVSPQDTVQRCREIFFPETAHEA,"[(17, 18), (20, 20), (22, 27), (29, 30), (51, 52), (54, 54), (58, 58), (62, 62), (79, 79), (96, 96), (126, 128), (130, 131), (133, 134), (136, 142), (144, 149), (151, 154), (157, 157), (163, 163), (180, 185), (187, 187), (189, 189), (191, 192), (198, 199), (202, 202), (213, 213), (215, 217), (219, 220), (225, 229), (231, 231), (233, 234), (236, 241)]" +O=C([O-])C(=O)C[C@]1(C(=O)[O-])C=C[C@@H](O)C=C1,O,MFDKHTHTLIAQRLDQAEKQREQIRAISLDYPEITIEDAYAVQREWVRLKIAEGRTLKGHKIGLTSKAMQASSQISEPDYGALLDDMFFHDGSDIPTDRFIVPRIEVELAFVLAKPLRGPNCTLFDVYNATDYVIPALELIDARCHNIDPETQRPRKVFDTISDNAANAGVILGGRPIKPDELDLRWISALMYRNGVIEETGVAAGVLNHPANGVAWLANKLAPYDVQLEAGQIILGGSFTRPVPARKGDTFHVDYGNMGSISCRFV,"[(61, 62), (83, 83), (104, 105), (107, 107), (109, 110), (135, 138), (140, 141), (168, 168), (170, 171), (236, 241), (243, 243), (256, 256)]" +[H+],O,MSKLLMIGTGPVAIQLANICYLKSDYEIDMVGRASTSEKSKRLYQAYKKEKQFEVKIQNEAHQHLEGKFEINRLYKDVKNVKGEYETVVMACTADAYYDTLQQLSLETLQSVKHVILISPTFGSQMIVEQFMSKFSQDIEVISFSTYLGDTRIVDKEAPNHVLTTGVKKKLYMGSTHSNSTMCQRISALAEQLKIQLEVVESPLHAETRNSSLYVHPPLFMNDFSLKAIFEGTDVPVYVYKLFPEGPITMTLIREMRLMWKEMMAILQAFRVPSVNLLQFMVKENYPVRPETLDEGDIEHFEILPDILQEYLLYVRYTAILIDPFSQPDENGHYFDFSAVPFKQVYKNEQDVVQIPRMPSEDYYRTAMIQHIGKMLGIKTPMIDQFLTRYEASCQAYKDMHQDQQLSSQFNTNLFEGDKALVTKFLEINRTLS,"[(7, 8), (13, 17), (30, 32), (34, 36), (41, 44), (74, 74), (76, 78), (91, 91), (96, 98), (100, 103), (118, 118), (147, 147), (151, 153), (162, 162), (349, 350)]" +C([C@H]([C@H]([C@H]([C@@H](C(=O)CO)O)O)O)O)OP(=O)(O)O,C([C@H]([C@@H]1[C@H]([C@@H]([C@@H](C(O1)O)O)O)O)O)OP(=O)(O)O,MENRELTYITNSIAEAQRVMAAMLADERLLATVRKVADACIASIAQGGKVLLAGNGGSAADAQHIAGEFVSRFAFDRPGLPAVALTTDTSILTAIGNDYGYEKLFSRQVQALGNEGDVLIGYSTSGKSPNILAAFREAKAKGMTCVGFTGNRGGEMRELCDLLLEVPSADTPKIQEGHLVLGHIVCGLVEHSIFGKQ,"[(57, 58), (60, 63), (65, 67), (69, 73), (77, 77), (124, 124), (170, 174), (176, 182), (184, 187)]" +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MDSSTATAMTAPFIDPTDHVNLKTDTDASENRRMGNYKPSIWNYDFLQSLATHHNIVEERHLKLAEKLKGQVKFMFGAPMEPLAKLELVDVVQRLGLNHLFETEIKEALFSIYKDGSNGWWFGHLHATSLRFRLLRQCGLFIPQDVFKTFQNKTGEFDMKLCDNVKGLLSLYEASYLGWKGENILDEAKAFTTKCLKSAWENISEKWLAKRVKHALALPLHWRVPRIEARWFIEAYEQEANMNPTLLKLAKLDFNMVQSIHQKEIGELARWWVTTGLDKLAFARNNLLQSYMWSCAIASDPKFKLARETIVEIGSVLTVVDDGYDVYGSIDELDLYTSSVERWSCVEIDKLPNTLKLIFMSMFNKTNEVGLRVQHERGYNSIPTFIKAWVEQCKSYQKEARWFHGGHTPPLEEYSLNGLVSIGFPLLLITGYVAIAENEAALDKVHPLPDLLHYSSLLSRLINDIGTSPDEMARGDNLKSIHCYMNETGASEEVAREHIKGVIEENWKILNQCCFDQSQFQEPFITFNLNSVRGSHFFYEFGDGFGVTDSWTKVDMKSVLIDPIPLGEE,"[(44, 44), (281, 283), (285, 286), (317, 320), (322, 324), (326, 327), (396, 396), (399, 399), (414, 414), (418, 419), (421, 422), (456, 459), (461, 462), (464, 466), (468, 468), (477, 477), (479, 479), (481, 481), (496, 496), (539, 539), (546, 546), (548, 548)]" +CCN(CC)C(=O)/C(C#N)=C/c1cc(O)c(O)c([N+](=O)[O-])c1,O=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,MSVKWTSVILLIQLSFCFSSGNCGKVLVWAAEYSHWMNIKTILDELIQRGHEVTVLASSASILFDPNNSSALKIEIYPTSLTKTELENFIMQQIKRWSDLPKDTFWLYFSQVQEIMSIFGDITRKFCKDVVSNKKFMKKVQESRFDVIFADAIFPCSELLAELFNIPFVYSLSFSPGYTFEKHSGGFIFPPSYVPVVMSELTDQMTFMERVKNMIYVLYFDFWFEIFDMKKWDQFYSEVLGRPTTLSETMGKADVWLIRNSWNFQFPYPLLPNVDFVGGLHCKPAKPLPKEMEDFVQSSGENGVVVFSLGSMVSNMTEERANVIASALAQIPQKVLWRFDGNKPDTLGLNTRLYKWIPQNDLLGHPKTRAFITHGGANGIYEAIYHGIPMVGIPLFADQPDNIAHMKARGAAVRVDFNTMSSTDLLNALKRVINDPSYKENVMKLSRIQHDQPVKPLDRAVFWIEFVMRHKGAKHLRVAAHDLTWFQYHSLDVIGFLLVCVATVIFIVTKCCLFCFWKFARKAKKGKND,"[(308, 310), (359, 359), (363, 363), (371, 372), (380, 383), (391, 397), (399, 403)]" +CC(=O)Oc1ccc2ccccc2c1,Oc1ccc2ccccc2c1,MAKLIALTLLGMGLALFRNHQSSYQTRLNALREVQPVELPNCNLVKGIETGSEDLEILPNGLAFISSGLKYPGIKSFNPNSPGKILLMDLNEEDPTVLELGITGSKFDVSSFNPHGISTFTDEDNAMYLLVVNHPDAKSTVELFKFQEEEKSLLHLKTIRHKLLPNLNDIVAVGPEHFYGTNDHYFLDPYLQSWEMYLGLAWSYVVYYSPSEVRVVAEGFDFANGINISPDGKYVYIAELLAHKIHVYEKHANWTLTPLKSLDFNTLVDNISVDPETGDLWVGCHPNGMKIFFYDSENPPASEVLRIQNILTEEPKVTQVYAENGTVLQGSTVASVYKGKLLIGTVFHKALYCEL,"[(51, 52), (55, 56), (64, 66), (69, 69), (85, 85), (115, 116), (118, 119), (129, 132), (166, 167), (170, 171), (179, 183), (222, 223), (225, 226), (238, 238), (240, 240), (267, 268), (271, 272), (281, 285), (331, 333), (345, 346)]" +NC(=O)[C@@H](N)Cc1ccccc1,NC(=O)[C@H](N)Cc1ccccc1,MTKALYDRDGAAIGNLQKLRFFPLAISGGRGARLIEENGRELIDLSGAWGAASLGYGHPAIVAAVSAAAANPAGATILSASNAPAVTLAERLLASFPGEGTHKIWFGHSGSDANEAAYRAIVKATGRSGVIAFAGAYHGCTVGSMAFSGHSVQADAAKADGLILLPYPDPYRPYRNDPTGDAILTLLTEKLAAVPAGSIGAAFIEPIQSDGGLIVPPDGFLRKFADICRAHGILVVCDEVKVGLARSGRLHCFEHEGFVPDILVLGKGLGGGLPLSAVIAPAEILDCASAFAMQTLHGNPISAAAGLAVLETIDRDDLPAMAERKGRLLRDGLSELAKRHPLIGDIRGRGLACGMELVCDRQSREPARAETAKLIYRAYQLGLVVYYVGMNGNVLEFTPPLTITETDIHKALDLLDRAFSELSAVSNEEIAQFAGW,"[(49, 50), (75, 76), (78, 78), (108, 109), (112, 116), (135, 136), (138, 141), (144, 144), (148, 150), (152, 153), (204, 206), (209, 210), (236, 237), (242, 244), (246, 246), (250, 253), (263, 268), (276, 276), (293, 294), (296, 297), (396, 396), (398, 398)]" +CC(=O)NCCCCNCCCN,NCCCCNCCCN,MAIGYVWNTLYGWVDTGTGSLAAANLTARMQPISHHLAHPDTKRRFHELVCASGQIEHLTPIAAVAATDADILRAHSAAHLENMKRVSNLPTGGDTGDGITMMGNGGLEIARLSAGGAVELTRRVATGELSAGYALVNPPGHHAPHNAAMGFCIFNNTSVAAGYARAVLGMERVAILDWDVHHGNGTQDIWWNDPSVLTISLHQHLCFPPDSGYSTERGAGNGHGYNINVPLPPGSGNAAYLHAMDQVVLHALRAYRPQLIIVGSGFDASMLDPLARMMVTADGFRQMARRTIDCAADICDGRIVFVQEGGYSPHYLPFCGLAVIEELTGVRSLPDPYHEFLAGMGGNTLLDAERAAIEEIVPLLADIR,"[(142, 143), (178, 179), (181, 181), (183, 184), (201, 204), (207, 208), (264, 264), (266, 267), (269, 270), (273, 273), (275, 276), (285, 285), (309, 312)]" +CC[C@H](/C=C/[C@@H](C)[C@H]1CC[C@@H]2[C@@]1(CC[C@H]3[C@H]2CC=C4[C@@]3(CC[C@@H](C4)O)C)C)C(C)C,OO,MTAQQHLSRRRMLGMAAFGAAALAGGTTIAAPRAAAAAKSAADNGGYVPAVVIGTGYGAAVSALRLGEAGVQTLMLEMGQLWNQPGPDGNIFCGMLNPDKRSSWFKNRTEAPLGSFLWLDVVNRNIDPYAGVLDRVNYDQMSVYVGRGVGGGSLVNGGMAVEPKRSYFEEILPRVDSSEMYDRYFPRANSMLRVNHIDTKWFEDTEWYKFARVSREQAGKAGLGTVFVPNVYDFGYMQREAAGEVPKSALATEVIYGNNHGKQSLDKTYLAAALGTGKVTIQTLHQVKTIRQTKDGGYALTVEQKDTDGKLLATKEISCRYLFLGAGSLGSTELLVRARDTGTLPNLNSEVGAGWGPNGNIMTARANHMWNPTGAHQSSIPALGIDAWDNSDSSVFAEIAPMPAGLETWVSLYLAITKNPQRGTFVYDAATDRAKLNWTRDQNAPAVNAAKALFDRINKANGTIYRYDLFGTQLKAFADDFCYHPLGGCVLGKATDDYGRVAGYKNLYVTDGSLIPGSVGVNPFVTITALAERNVERIIKQDVTAS,"[(15, 16), (51, 51), (55, 56), (59, 61), (75, 76), (78, 82), (85, 85), (123, 123), (125, 125), (130, 130), (147, 151), (153, 155), (160, 161), (191, 198), (214, 220), (222, 222), (226, 226), (230, 231), (234, 234), (253, 253), (285, 286), (288, 289), (293, 295), (297, 298), (321, 321), (325, 325), (342, 342), (351, 351), (359, 359), (365, 366), (375, 375), (379, 380), (432, 433), (464, 464), (470, 474), (481, 482), (484, 485), (487, 488)]" +N[C@@H](Cc1ccc(O)c(O)c1)C(=O)O,NCCc1ccc(O)c(O)c1,MNASEFRRRGKEMVDYMANYMEGIEGRQVYPDVEPGYLRPLIPAAAPQEPDTFEDIINDVEKIIMPGVTHWHSPYFFAYFPTASSYPAMLADMLCGAIGCIGFSWAASPACTELETVMMDWLGKMLELPKAFLNEKAGEGGGVIQGSASEATLVALLAARTKVIHRLQAASPELTQAAIMEKLVAYSSDQAHSSVERAGLIGGVKLKAIPSDGNFAMRASALQEALERDKAAGLIPFFMVATLGTTTCCSFDNLLEVGPICNKEDIWLHVDAAYAGSAFICPEFRHLLNGVEFADSFNFNPHKWLLVNFDCSAMWVKKRTDLTGAFRLDPTYLKHSHQDSGLITDYRHWQIPLGRRFRSLKMWFVFRMYGVKGLQAYIRKHVQLSHEFESLVRQDPRFEICVEVILGLVCFRLKGSNKVNEALLQRINSAKKIHLVPCHLRDKFVLRFAICSRTVESAHVQRAWEHIKELAADVLRAERE,"[(146, 147), (150, 153), (190, 190), (192, 192), (194, 194), (197, 197), (244, 245), (247, 248), (271, 271), (273, 274), (277, 277), (298, 299), (301, 303), (311, 313), (447, 447)]" +O,O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,MSKLFSTVNSARHSVPLGGMRDYVHIKKLEMNTVLGPDSWNQLMPQKCLLSLDMGTDFSKSAATDDLKYSLNYAVISRDLTNFVSKKKNWGSVSNLAKSVSQFVMDKYSGVECLNLEVQADTTHIRSDHISCIIQQERGNPESQEFDVVRISELKMLTLIGVFTFERLKKQYVTLDIKLPWPKKAELPPPVQSIIDNVVKFVEESNFKTVEALVESVSAVIAHNEYFQKFPDSPLVVKVLKLNAITATEGVGVSCIREPREIAMVNIPYLSSIHESSDIKFQLSSSQNTPIEGKNTWKRAFLAFGSNIGDRFKHIQMALQLLSREKTVKLRNISSIFESEPMYFKDQTPFMNGCVEVETLLTPSELLKLCKKIEYEELQRVKHFDNGPRTIDLDIVMFLNSAGEDIIVNEPDLNIPHPRMLERTFVLEPLCELISPVHLHPVTAEPIVDHLKQLYDKQHDEDTLWKLVPLPYRSGVEPRFLKFKTATKLDEFTGETNRITVSPTYIMAIFNATPDSFSDGGEHFADIESQLNDIIKLCKDALYLHESVIIDVGGCSTRPNSIQASEEEEIRRSIPLIKAIRESTELPQDKVILSIDTYRSNVAKEAIKVGVDIINDISGGLFDSNMFAVIAENPEICYILSHTRGDISTMNRLAHYENFALGDSIQQEFVHNTDIQQLDDLKDKTVLIRNVGQEIGERYIKAIDNGVKRWQILIDPGLGFAKTWKQNLQIIRHIPILKNYSFTMNSNNSQVYVNLRNMPVLLGPSRKKFIGHITKDVDAKQRDFATGAVVASCIGFGSDMVRVHDVKNCSKSIKLADAIYKGLE,"[(509, 510), (512, 513), (519, 519), (553, 553), (555, 557), (559, 561), (567, 567), (594, 595), (597, 598), (601, 603), (610, 614), (616, 619), (651, 652), (675, 676), (687, 687), (708, 710), (713, 714), (716, 719), (721, 722), (729, 729), (755, 759), (762, 766), (768, 771), (774, 774), (800, 801), (805, 807), (809, 810), (820, 820), (826, 826), (829, 830), (833, 833), (838, 838), (840, 842)]" +CC(C)=CCC/C(C)=C/COP(=O)([O-])OP(=O)([O-])[O-],CC1=CC[C@@H]2[C@H](C1)C2(C)C,MSKILVFGHQNPDSDAIGSSVAFAYLAKEAWGLDTEAVALGTPNEETAYVLDYFGVQAPRVVESAKAEGVETVILTDHNEFQQSISDIKDVTVYGVVDHHRVANFETANPLYMRLEPVGSASSIVYRMFKENGVSVPKELAGLLLSGLISDTLLLKSPTTHASDIPVAKELAELAGVNLEEYGLEMLKAGTNLSSKTAAELIDIDAKTFELNGEAVRVAQVNTVDINDILARQEEIEVAIQEAIVTEGYSDFVLMITDIVNSNSEILALGSNMAKVEAAFEFTLENNHAFLAGAVSRKKQVVPQLTESYNA,"[(7, 8), (10, 12), (14, 14), (16, 20), (40, 40), (44, 44), (47, 47), (75, 76), (78, 79), (83, 83), (97, 98), (100, 101), (115, 122), (124, 124), (146, 150), (152, 153), (157, 157), (159, 159), (205, 205), (207, 207), (298, 298)]" +O,O=[N+]([O-])c1ccc(O)cc1,MDIMNEKVKKIIEFMDKNSIDAVLIAKNPNVYYISGASPLAGGYILITGESATLYVPELEYEMAKEESNIPVEKFKKMDEFYKALEGIKSLGIESSLPYGFIEELKKKANIKEFKKVDDVIRDMRIIKSEKEIKIIEKACEIADKAVMAAIEEITEGKKEREVAAKVEYLMKMNGAEKPAFDTIIASGYRSALPHGVASDKRIERGDLVVIDLGALYQHYNSDITRTIVVGSPNEKQKEIYEIVLEAQKKAVESAKPGITAKELDSIARNIIAEYGYGEYFNHSLGHGVGLEVHEWPRVSQYDETVLREGMVITIEPGIYIPKIGGVRIEDTILITKNGSKRLTKTERELI,"[(140, 140), (181, 184), (210, 211), (213, 214), (221, 222), (224, 226), (248, 248), (281, 281), (283, 286), (288, 289), (293, 295), (297, 299), (312, 315), (317, 318), (320, 320), (328, 329), (331, 332), (344, 344)]" +NC(=O)CC[C@H](N)C(=O)O,N[C@@H](CCC(=O)[O-])C(=O)[O-],MNFYSAYQHGFVRVAACTHHTTIGDPAANAASVLDMARACHDDGAALAVFPELTLSGYSIEDVLLQDSLLDAVEDALLDLVTESADLLPVLVVGAPLRHRHRIYNTAVVIHRGAVLGVVPKSYLPTYREFYERRQMAPGDGERGTIRIGGADVAFGTDLLFAASDLPGFVLHVEICEDMFVPMPPSAEAALAGATVLANLSGSPITIGRAEDRRLLARSASARCLAAYVYAAAGEGESTTDLAWDGQTMIWENGALLAESERFPKGVRRSVADVDTELLRSERLRMGTFDDNRRHHRELTESFRRIDFALDPPAGDIGLLREVERFPFVPADPQRLQQDCYEAYNIQVSGLEQRLRALDYPKVVIGVSGGLDSTHALIVATHAMDREGRPRSDILAFALPGFATGEHTKNNAIKLARALGVTFSEIDIGDTARLMLHTIGHPYSVGEKVYDVTFENVQAGLRTDYLFRIANQRGGIVLGTGDLSELALGWSTYGVGDQMSHYNVNAGVPKTLIQHLIRWVISAGEFGEKVGEVLQSVLDTEITPELIPTGEEELQSSEAKVGPFALQDFSLFQVLRYGFRPSKIAFLAWHAWNDAERGNWPPGFPKSERPSYSLAEIRHWLQIFVQRFYSFSQFKRSALPNGPKVSHGGALSPRGDWRAPSDMSARIWLDQIDREVPKG,"[(52, 52), (58, 58), (125, 126), (128, 130), (177, 177), (201, 202), (204, 208), (210, 214), (230, 230), (232, 232), (243, 244), (350, 353), (355, 358), (361, 365), (374, 377), (396, 399), (410, 412), (423, 423), (452, 455), (457, 461), (466, 470), (472, 474), (476, 479), (481, 484), (486, 489), (494, 500), (502, 503), (505, 505), (510, 510), (513, 514), (517, 517), (537, 538), (541, 542), (557, 557), (560, 561), (564, 564), (627, 627), (630, 634), (636, 641), (659, 660), (662, 663)]" +O=O,O=C[O-],MPQLEASLELDFQSESYKDAYSRINAIVIEGEQEAFDNYNRLAEMLPDQRDELHKLAKMEQRHMKGFMACGKNLSVTPDMGFAQKFFERLHENFKAAAAEGKVVTCLLIQSLIIECFAIAAYNIYIPVADAFARKITEGVVRDEYLHRNFGEEWLKANFDASKAELEEANRQNLPLVWLMLNEVADDARELGMERESLVEDFMIAYGEALENIGFTTREIMRMSAYGLAAV,"[(28, 31), (33, 36), (39, 39), (56, 59), (61, 62), (64, 68), (109, 114), (116, 120), (122, 122), (139, 140), (143, 146), (148, 151)]" +C[C@H](CCC(=O)NCC(=O)O)[C@H]1CC[C@@H]2[C@@]1([C@H](C[C@H]3[C@H]2[C@@H](C[C@H]4[C@@]3(CC[C@H](C4)O)C)O)O)C,NCC(=O)O,MCTGLALETKDGLHLFGRNMDIEYSFNQSIIFIPRNFKCVNKSNKKELTTKYAVLGMGTIFDDYPTFADGMNEKGLGCAGLNFPVYVSYSKEDIEGKTNIPVYNFLLWVLANFSSVEEVKEALKNANIVDIPISENIPNTTLHWMISDITGKSIVVEQTKEKLNVFDNNIGVLTNSPTFDWHVANLNQYVGLRYNQVPEFKLGDQSLTALGQGTGLVGLPGDFTPASRFIRVAFLRDAMIKNDKDSIDLIEFFHILNNVAMVRGSTRTVEEKSDLTQYTSCMCLEKGIYYYNTYENNQINAIDMNKENLDGNEIKTYKYNKTLSINHVN,"[(3, 5), (16, 17), (19, 21), (68, 69), (79, 81), (83, 84), (140, 143), (173, 176), (228, 228), (252, 252), (256, 256), (278, 280)]" +C1CCNC(=O)[C@H](C1)N,C1CCNC(=O)[C@@H](C1)N,MTKALYDRDGAAIGNLQKLRFFPLAISGGRGARLIEENGRELIDLSGAWGAASLGYGHPAIVAAVSAAAANPAGATILSASNAPAVTLAERLLASFPGEGTHKIWFGHSGSDANEAAYRAIVKATGRSGVIAFAGAYHGCTVGSMAFSGHSVQADAAKADGLILLPYPDPYRPYRNDPTGDAILTLLTEKLAAVPAGSIGAAFIEPIQSDGGLIVPPDGFLRKFADICRAHGILVVCDEVKVGLARSGRLHCFEHEGFVPDILVLGKGLGGGLPLSAVIAPAEILDCASAFAMQTLHGNPISAAAGLAVLETIDRDDLPAMAERKGRLLRDGLSELAKRHPLIGDIRGRGLACGMELVCDRQSREPARAETAKLIYRAYQLGLVVYYVGMNGNVLEFTPPLTITETDIHKALDLLDRAFSELSAVSNEEIAQFAGW,"[(49, 50), (75, 76), (78, 78), (108, 109), (112, 116), (135, 136), (138, 141), (144, 144), (148, 150), (152, 153), (204, 206), (209, 210), (236, 237), (242, 244), (246, 246), (250, 253), (263, 268), (276, 276), (293, 294), (296, 297), (396, 396), (398, 398)]" +O,C[S+](CCC(N)C(=O)O)CC1OC(n2cnc3c(N)ncnc32)C(O)C1O,IPAAPVAAQARKLLRDLAFRPPLLAARSQVVQLTPRRWLNLQEYQSKKLMSDNGVKVQRFFVADTANEALEAAKRLNAKEIVLKAQILAGGRGKGVFSSGLKGGVHLTKDPEVVGQLAKQMIGYNLATKQTPKEGVKVNKVMVAEALDISRETYLAILMDRSCNGPVLVGSPQGGVDIEEVAASNPELIFKEQIDIIEGIKDSQAQRMAENLGFLGPLQNQAADQIKKLYNLFLKIDATQVEVNPFGETPEGQVVCFDAKINFDDNAEFRQKDIFAMDDKSENEPIENEAAKYDLKYIGLDGNIACFVNGAGLAMATCDIIFLNGGKPANFLDLGGGVKESQVYQAFKLLTADPKVEAILVNIFGGIVNCAIIANGITKACRELELKVPLVVRLEGTNVHEAQNILTNSGLPITSAVDLEDAAKKAVASVTKK,"[(4, 4), (49, 53), (56, 57), (59, 60), (62, 65), (87, 90), (94, 98), (135, 135), (142, 146), (148, 151), (241, 243), (245, 246), (250, 250), (252, 252), (256, 257), (259, 260), (273, 273), (293, 298)]" diff --git a/examples/enzeptional/example_enzeptional.py b/examples/enzeptional/example_enzeptional.py new file mode 100644 index 000000000..98f107589 --- /dev/null +++ b/examples/enzeptional/example_enzeptional.py @@ -0,0 +1,84 @@ +import logging +import pandas as pd +from gt4sd.frameworks.enzeptional.processing import HFandTAPEModelUtility +from gt4sd.frameworks.enzeptional.core import SequenceMutator, EnzymeOptimizer +from gt4sd.configuration import GT4SDConfiguration, sync_algorithm_with_s3 + + +def initialize_environment(): + """Synchronize with GT4SD S3 storage and set up the environment.""" + # NOTE: For those interested in optimizing kcat values, it is important to adjust the scorer path to reflect this focus, thereby selecting the appropriate model for kcat optimization. The specification of the scaler, located within the same directory as the `scorer.pkl`, is mandatory for accurate model performance. + configuration = GT4SDConfiguration.get_instance() + sync_algorithm_with_s3("proteins/enzeptional/scorers", module="properties") + return f"{configuration.gt4sd_local_cache_path}/properties/proteins/enzeptional/scorers/feasibility/model.pkl" + + +def load_experiment_parameters(): + """Load experiment parameters from a CSV file.""" + df = pd.read_csv("data.csv").iloc[1] + return df["substrates"], df["products"], df["sequences"], eval(df["intervals"]) + + +def setup_optimizer( + substrate_smiles, product_smiles, sample_sequence, intervals, scorer_path +): + """Set up and return the optimizer with all necessary components configured.""" + model_tokenizer_paths = "facebook/esm2_t33_650M_UR50D" + chem_paths = "seyonec/ChemBERTa-zinc-base-v1" + + protein_model = HFandTAPEModelUtility( + embedding_model_path=model_tokenizer_paths, tokenizer_path=model_tokenizer_paths + ) + mutation_config = { + "type": "language-modeling", + "embedding_model_path": model_tokenizer_paths, + "tokenizer_path": model_tokenizer_paths, + "unmasking_model_path": model_tokenizer_paths, + } + + mutator = SequenceMutator(sequence=sample_sequence, mutation_config=mutation_config) + optimizer_config = { + "sequence": sample_sequence, + "protein_model": protein_model, + "substrate_smiles": substrate_smiles, + "product_smiles": product_smiles, + "chem_model_path": chem_paths, + "chem_tokenizer_path": chem_paths, + "scorer_filepath": scorer_path, + "mutator": mutator, + "intervals": intervals, + "batch_size": 5, + "top_k": 3, + "selection_ratio": 0.25, + "perform_crossover": True, + "crossover_type": "single_point", + "concat_order": ["substrate", "sequence", "product"], + } + return EnzymeOptimizer(**optimizer_config) + + +def optimize_sequences(optimizer): + """Optimize sequences using the configured optimizer.""" + return optimizer.optimize( + num_iterations=3, num_sequences=5, num_mutations=5, time_budget=3600 + ) + + +def main(): + logging.basicConfig(level=logging.INFO) + scorer_path = initialize_environment() + ( + substrate_smiles, + product_smiles, + sample_sequence, + intervals, + ) = load_experiment_parameters() + optimizer = setup_optimizer( + substrate_smiles, product_smiles, sample_sequence, intervals, scorer_path + ) + optimized_sequences, iteration_info = optimize_sequences(optimizer) + logging.info("Optimization completed.") + + +if __name__ == "__main__": + main()