-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
590 lines (538 loc) · 22.8 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
@article{deamont2014superconductivity,
title={Superconductivity of In and Sn Samples},
author={Deamont, George and Foreman, Sam},
year={2014}
}
@inproceedings{foreman2018rg,
title={RG-inspired machine learning for lattice field theory},
author={Foreman, Sam and Giedt, Joel and Meurice, Yannick and Unmuth-Yockey, Judah},
booktitle={EPJ Web of Conferences},
volume={175},
pages={11025},
year={2018},
organization={EDP Sciences}
}
@article{hubler2018large,
title={Large energy density in three-plate nanocapacitors due to Coulomb blockade},
author={Hubler, A and Foreman, S and Liu, J and Wortsmann, L},
journal={Journal of Applied Physics},
volume={123},
number={10},
year={2018},
publisher={AIP Publishing}
}
@article{foreman2018examples,
title={Examples of renormalization group transformations for image sets},
author={Foreman, Samuel and Giedt, Joel and Meurice, Yannick and Unmuth-Yockey, Judah},
journal={Physical Review E},
volume={98},
number={5},
pages={052129},
year={2018},
publisher={American Physical Society}
}
@inproceedings{foreman2018machine,
title={Machine learning inspired analysis of the Ising model transition},
author={Foreman, Samuel and Giedt, Joel and Meurice, Yannick and Unmuth-Yockey, Judah},
booktitle={Lattice 2018},
year={2018}
}
@phdthesis{foreman2019learning,
title={Learning Better Physics: A Machine Learning Approach to Lattice Gauge Theory},
author={Foreman, Samuel Alfred},
year={2019},
school={University of Iowa}
}
@article{foreman2020machine,
title={Machine Learning and Neural Networks for Field Theory},
author={Foreman, Sam and Jin, Xiao-Yong and Osborn, James C},
year={2020}
}
@article{foreman2021deep,
title={Deep Learning Hamiltonian Monte Carlo},
author={Foreman, Sam and Jin, Xiao-Yong and Osborn, James C},
journal={arXiv preprint arXiv:2105.03418},
year={2021}
}
@article{foreman2021hmc,
title={HMC with normalizing flows},
author={Foreman, Sam and Izubuchi, Taku and Jin, Luchang and Jin, Xiao-Yong and Osborn, James C and Tomiya, Akio},
journal={arXiv preprint arXiv:2112.01586},
year={2021}
}
@article{foreman2021leapfroglayers,
title={LeapfrogLayers: A Trainable Framework for Effective Topological Sampling},
author={Foreman, Sam and Jin, Xiao-Yong and Osborn, James C},
journal={arXiv preprint arXiv:2112.01582},
year={2021}
}
@article{liu2017energy,
title={Energy storage in quantum resonators},
author={Liu, Jiaqi and Hubler, Alfred W and Foreman, Samuel Alfred and Ott, Katharina},
year={2017}
}
@article{boyda2022applications,
title={Applications of machine learning to lattice quantum field theory},
author={Boyda, Denis and Cal{\`\i}, Salvatore and Foreman, Sam and Funcke, Lena and Hackett, Daniel C and Lin, Yin and Aarts, Gert and Alexandru, Andrei and Jin, Xiao-Yong and Lucini, Biagio and others},
journal={arXiv preprint arXiv:2202.05838},
year={2022}
}
@article{kronfeld2022lattice,
title={Lattice QCD and particle physics},
author={Kronfeld, Andreas S and Bhattacharya, Tanmoy and Blum, Thomas and Christ, Norman H and DeTar, Carleton and Detmold, William and Edwards, Robert and Hasenfratz, Anna and Lin, Huey-Wen and Mukherjee, Swagato and others},
journal={arXiv preprint arXiv:2207.07641},
year={2022}
}
@article{zvyagin2023genslms,
title={GenSLMs: Genome-scale language models reveal SARS-CoV-2 evolutionary dynamics},
author={Zvyagin, Maxim and Brace, Alexander and Hippe, Kyle and Deng, Yuntian and Zhang, Bin and Bohorquez, Cindy Orozco and Clyde, Austin and Kale, Bharat and Perez-Rivera, Danilo and Ma, Heng and others},
journal={The International Journal of High Performance Computing Applications},
volume={37},
number={6},
pages={683--705},
year={2023},
publisher={SAGE Publications Sage UK: London, England}
}
@inproceedings{foreman2023mlmc,
title={MLMC: Machine Learning Monte Carlo},
author={Foreman, Sam},
booktitle={The International Symposium on Lattice Field Theory},
year={2023}
}
@article{deamont2014superconductivity,
title={Superconductivity of In and Sn Samples},
author={Deamont, George and Foreman, Sam},
year={2014}
}
@article{emani2023comprehensive,
title={A Comprehensive Performance Study of Large Language Models on Novel AI Accelerators},
author={Emani, Murali and Foreman, Sam and Sastry, Varuni and Xie, Zhen and Raskar, Siddhisanket and Arnold, William and Thakur, Rajeev and Vishwanath, Venkatram and Papka, Michael E},
journal={arXiv preprint arXiv:2310.04607},
year={2023}
}
@article{song2023deepspeed4science,
title={DeepSpeed4Science Initiative: Enabling Large-Scale Scientific Discovery through Sophisticated AI System Technologies},
author={Song, Shuaiwen Leon and Kruft, Bonnie and Zhang, Minjia and Li, Conglong and Chen, Shiyang and Zhang, Chengming and Tanaka, Masahiro and Wu, Xiaoxia and Rasley, Jeff and Awan, Ammar Ahmad and others},
journal={arXiv preprint arXiv:2310.04610},
year={2023}
}
@inproceedings{dharuman2023protein,
title={Protein Generation via Genome-scale Language Models with Bio-physical Scoring},
author={Dharuman, Gautham and Ward, Logan and Ma, Heng and Setty, Priyanka V and Gokdemir, Ozan and Foreman, Sam and Emani, Murali and Hippe, Kyle and Brace, Alexander and Keipert, Kristopher and others},
booktitle={Proceedings of the SC'23 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis},
pages={95--101},
year={2023}
}
@article{foreman2023mlmc,
title={MLMC: Machine Learning Monte Carlo for Lattice Gauge Theory},
author={Foreman, Sam and Jin, Xiao-Yong and Osborn, James C},
journal={arXiv preprint arXiv:2312.08936},
year={2023}
}
@article{shanahan2022snowmass,
title={Snowmass 2021 computational frontier CompF03 topical group report: Machine learning},
author={Shanahan, Phiala and Terao, Kazuhiro and Whiteson, Daniel},
journal={arXiv preprint arXiv:2209.07559},
year={2022}
}
@article{cheng2024thorough,
title={Thorough Characterization and Analysis of Large Transformer Model Training At-Scale},
author={Cheng, Scott and Lin, Jun-Liang and Emani, Murali and Raskar, Siddhisanket and Foreman, Sam and Xie, Zhen and Vishwanath, Venkatram and Kandemir, Mahmut Taylan},
journal={Proceedings of the ACM on Measurement and Analysis of Computing Systems},
volume={8},
number={1},
pages={1--25},
year={2024},
publisher={ACM New York, NY, USA}
}
@article{leung2024communities,
title={Communities Through Energy Justice Projects},
author={Leung, Mary Ann and Cahill, Katharine and Hartman-Baker, Rebecca and Kinsley, Paige and McInnes, Lois Curfman and Parete-Koon, Suzanne and Abraham, Subil and Barrier, Lacy Beach and Chen, Gladys and DeStefano, Lizanne and others},
journal={Journal of Computational Science},
volume={15},
number={1},
year={2024}
}
@inproceedings{arcomano2023applications,
title={Applications of a Foundation Model Approach for Weather and Climate},
author={Arcomano, Troy and Wikner, Alexander and Maulik, Romit and Kotamarthi, Veerabhadra Rao and Foreman, Sam},
booktitle={AGU Fall Meeting Abstracts},
volume={2023},
pages={GC22C--06},
year={2023}
}
@inproceedings{emani2024toward,
title={Toward a holistic performance evaluation of large language models across diverse ai accelerators},
author={Emani, Murali and Foreman, Sam and Sastry, Varuni and Xie, Zhen and Raskar, Siddhisanket and Arnold, William and Thakur, Rajeev and Vishwanath, Venkatram and Papka, Michael E and Shanmugavelu, Sanjif and others},
booktitle={2024 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)},
pages={1--10},
year={2024},
organization={IEEE}
}
@article{parete2024intro,
title={Intro to HPC Bootcamp: Engaging New Communities Through Energy Justice Projects},
author={Parete-Koon, Suzanne and Sandoval, Michael and Leland, Kellen and Abraham, Subil and Leung, Mary Ann and Hartman-Baker, Rebecca and Kinsley, Paige and McInnes, Lois and Ramprakash, Sreeranjani and Beach Barrier, Lacy and others},
journal={Journal of Computational Science Education},
volume={15},
number={1},
year={2024},
publisher={Oak Ridge National Laboratory (ORNL), Oak Ridge, TN (United States)}
}
@inproceedings{dharuman2024mprot,
title={MProt-DPO: Breaking the ExaFLOPS Barrier for Multimodal Protein Design Workflows with Direct Preference Optimization},
author={Dharuman, Gautham and Hippe, Kyle and Brace, Alexander and Foreman, Sam and Hatanp{\"a}{\"a}, V{\"a}in{\"a} and Sastry, Varuni K and Zheng, Huihuo and Ward, Logan and Muralidharan, Servesh and Vasan, Archit and others},
booktitle={2024 SC24: International Conference for High Performance Computing, Networking, Storage and Analysis SC},
pages={74--86},
year={2024},
organization={IEEE Computer Society}
}
@misc{wei2022emergentabilitieslargelanguage,
title={Emergent Abilities of Large Language Models},
author={Jason Wei and Yi Tay and Rishi Bommasani and Colin Raffel and Barret Zoph and Sebastian Borgeaud and Dani Yogatama and Maarten Bosma and Denny Zhou and Donald Metzler and Ed H. Chi and Tatsunori Hashimoto and Oriol Vinyals and Percy Liang and Jeff Dean and William Fedus},
year={2022},
eprint={2206.07682},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2206.07682},
}
@misc{song2023ds4sci,
title = {DeepSpeed4Science Initiative: Enabling Large-Scale Scientific
Discovery through Sophisticated AI System Technologies},
author = {Shuaiwen Leon Song and Bonnie Kruft and Minjia Zhang and Conglong
Li and Shiyang Chen and Chengming Zhang and Masahiro Tanaka and
Xiaoxia Wu and Jeff Rasley and Ammar Ahmad Awan and Connor Holmes
and Martin Cai and Adam Ghanem and Zhongzhu Zhou and Yuxiong He and
Pete Luferenko and Divya Kumar and Jonathan Weyn and Ruixiong Zhang
and Sylwester Klocek and Volodymyr Vragov and Mohammed AlQuraishi
and Gustaf Ahdritz and Christina Floristean and Cristina Negri and
Rao Kotamarthi and Venkatram Vishwanath and Arvind Ramanathan and
Sam Foreman and Kyle Hippe and Troy Arcomano and Romit Maulik and
Maxim Zvyagin and Alexander Brace and Bin Zhang and Cindy Orozco
Bohorquez and Austin Clyde and Bharat Kale and Danilo Perez-Rivera
and Heng Ma and Carla M. Mann and Michael Irvin and J. Gregory
Pauloski and Logan Ward and Valerie Hayot and Murali Emani and Zhen
Xie and Diangen Lin and Maulik Shukla and Ian Foster and James J.
Davis and Michael E. Papka and Thomas Brettin and Prasanna
Balaprakash and Gina Tourassi and John Gounley and Heidi Hanson and
Thomas E Potok and Massimiliano Lupo Pasini and Kate Evans and Dan
Lu and Dalton Lunga and Junqi Yin and Sajal Dash and Feiyi Wang and
Mallikarjun Shankar and Isaac Lyngaas and Xiao Wang and Guojing
Cong and Pei Zhang and Ming Fan and Siyan Liu and Adolfy Hoisie and
Shinjae Yoo and Yihui Ren and William Tang and Kyle Felker and
Alexey Svyatkovskiy and Hang Liu and Ashwin Aji and Angela Dalton
and Michael Schulte and Karl Schulz and Yuntian Deng and Weili Nie
and Josh Romero and Christian Dallago and Arash Vahdat and Chaowei
Xiao and Thomas Gibbs and Anima Anandkumar and Rick Stevens},
year = {2023},
eprint = {2310.04610},
archivePrefix = {arXiv},
primaryClass = {cs.AI},
url = {https://arxiv.org/abs/2310.04610},
}
@misc{wei2022emergentabilitieslargelanguage,
title = {Emergent Abilities of Large Language Models},
author = {Jason Wei and Yi Tay and Rishi Bommasani and Colin Raffel and
Barret Zoph and Sebastian Borgeaud and Dani Yogatama and Maarten
Bosma and Denny Zhou and Donald Metzler and Ed H. Chi and Tatsunori
Hashimoto and Oriol Vinyals and Percy Liang and Jeff Dean and
William Fedus},
year = {2022},
eprint = {2206.07682},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
url = {https://arxiv.org/abs/2206.07682},
}
@misc{Burdi:2023climrr,
title = {The Climate Risk & Resilience Portal (ClimRR) Metadata and Data
Dictionary},
author = "Burdi, C. and Branham, J., Wall. T",
year = "2023",
note = {Available at \url{
https://anl.app.box.com/s/hmkkgkrkzxxocfe9kpgrzk2gfc4gizp8/file/1055145398460
}},
url = {https://dub.sh/ClimRR-Metadata},
}
@misc{wittig2023progress,
title = {Progress on $(g-2)_\mu$ from Lattice QCD},
author = {Hartmut Wittig},
year = {2023},
eprint = {2306.04165},
archivePrefix = {arXiv},
primaryClass = {hep-ph},
}
@article{Duane:1987de,
author = "Duane, S. and Kennedy, A. D. and Pendleton, B. J. and Roweth, D.",
title = "{Hybrid Monte Carlo}",
doi = "10.1016/0370-2693(87)91197-X",
journal = "Phys. Lett. B",
volume = "195",
pages = "216--222",
year = "1987",
}
@article{Shanahan:2022ifi,
author = "Shanahan, Phiala and others",
title = "{Snowmass 2021 Computational Frontier CompF03 Topical Group Report:
Machine Learning}",
eprint = "2209.07559",
archivePrefix = "arXiv",
primaryClass = "physics.comp-ph",
reportNumber = "FERMILAB-CONF-22-719-ND-PPD-QIS-SCD",
month = "9",
year = "2022",
}
@inproceedings{Boyda:2022nmh,
author = "Boyda, Denis and others",
title = "{Applications of Machine Learning to Lattice Quantum Field Theory}",
booktitle = "{Snowmass 2021}",
eprint = "2202.05838",
archivePrefix = "arXiv",
primaryClass = "hep-lat",
reportNumber = "MIT-CTP/5405",
month = "2",
year = "2022",
}
@article{Foreman:2021ljl,
author = "Foreman, Sam and Izubuchi, Taku and Jin, Luchang and Jin,
Xiao-Yong and Osborn, James C. and Tomiya, Akio",
title = "{HMC with Normalizing Flows}",
eprint = "2112.01586",
archivePrefix = "arXiv",
primaryClass = "cs.LG",
doi = "10.22323/1.396.0073",
journal = "PoS",
volume = "LATTICE2021",
pages = "073",
year = "2022",
}
@article{Foreman:2021rhs,
author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
title = "{LeapfrogLayers: A Trainable Framework for Effective Topological
Sampling}",
eprint = "2112.01582",
archivePrefix = "arXiv",
primaryClass = "hep-lat",
doi = "10.22323/1.396.0508",
journal = "PoS",
volume = "LATTICE2021",
pages = "508",
year = "2022",
}
@inproceedings{Foreman:2021ixr,
author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
title = "{Deep Learning Hamiltonian Monte Carlo}",
booktitle = "{9th International Conference on Learning Representations}",
eprint = "2105.03418",
archivePrefix = "arXiv",
primaryClass = "hep-lat",
month = "5",
year = "2021",
}
@online{foreman2023climate,
author = {Foreman, Sam},
title = {Energy {Justice} {Analysis} of {Climate} {Data} with {ClimRR}},
date = {2023-08-07},
url = {https://saforem2.github.io/climate-analysis},
langid = {en},
}
@misc{foreman2023-l2hmcqcd,
author = {Foreman, Sam},
date = {2023-08-19},
url = {https://saforem2.github.io/l2hmc-qcd},
langid = {en},
}
@misc{foreman2021deep,
title = {Deep Learning Hamiltonian Monte Carlo},
author = {Sam Foreman and Xiao-Yong Jin and James C. Osborn},
year = {2021},
eprint = {2105.03418},
archivePrefix = {arXiv},
primaryClass = {hep-lat},
}
@inproceedings{foreman2023mlmc,
title = {MLMC: Machine Learning Monte Carlo for Lattice Gauge Theory},
author = {Foreman, Sam and Jin, Xiao-Yong and Osborn, James},
booktitle = {40th International Symposium on Lattice Field Theory (Lattice
2023) (Batavia, IL, United States, 07/31/2023 - 08/04/2023)},
year = {},
editor = {},
volume = {},
number = {},
series = {},
pages = {},
address = {},
month = {},
publisher = {},
note = {, , },
crossref = {},
}
@misc{wittig2023progress,
title = {Progress on $(g-2)_\mu$ from Lattice QCD},
author = {Hartmut Wittig},
year = {2023},
eprint = {2306.04165},
archivePrefix = {arXiv},
primaryClass = {hep-ph},
}
@article{Duane:1987de,
author = "Duane, S. and Kennedy, A. D. and Pendleton, B. J. and Roweth, D.",
title = "{Hybrid Monte Carlo}",
doi = "10.1016/0370-2693(87)91197-X",
journal = "Phys. Lett. B",
volume = "195",
pages = "216--222",
year = "1987",
}
@article{Shanahan:2022ifi,
author = "Shanahan, Phiala and others",
title = "{Snowmass 2021 Computational Frontier CompF03 Topical Group Report:
Machine Learning}",
eprint = "2209.07559",
archivePrefix = "arXiv",
primaryClass = "physics.comp-ph",
reportNumber = "FERMILAB-CONF-22-719-ND-PPD-QIS-SCD",
month = "9",
year = "2022",
}
@inproceedings{Boyda:2022nmh,
author = "Boyda, Denis and others",
title = "{Applications of Machine Learning to Lattice Quantum Field Theory}",
booktitle = "{Snowmass 2021}",
eprint = "2202.05838",
archivePrefix = "arXiv",
primaryClass = "hep-lat",
reportNumber = "MIT-CTP/5405",
month = "2",
year = "2022",
}
@article{Foreman:2021rhs,
author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
title = "{LeapfrogLayers: A Trainable Framework for Effective Topological
Sampling}",
eprint = "2112.01582",
archivePrefix = "arXiv",
primaryClass = "hep-lat",
doi = "10.22323/1.396.0508",
journal = "PoS",
volume = "LATTICE2021",
pages = "508",
month = "05",
year = "2022",
}
@article{Foreman:2021ljl,
author = "Foreman, Sam and Izubuchi, Taku and Jin, Luchang and Jin,
Xiao-Yong and Osborn, James C. and Tomiya, Akio",
title = "{HMC with Normalizing Flows}",
eprint = "2112.01586",
archivePrefix = "arXiv",
primaryClass = "cs.LG",
doi = "10.22323/1.396.0073",
journal = "PoS",
volume = "LATTICE2021",
pages = "073",
year = "2022",
}
@inproceedings{Foreman:2021ixr,
author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
title = "{Deep Learning Hamiltonian Monte Carlo}",
booktitle = "{9th International Conference on Learning Representations}",
eprint = "2105.03418",
archivePrefix = "arXiv",
primaryClass = "hep-lat",
month = "5",
year = "2021",
}
https://towardsdatascience.com/mastering-language-models-32e1d891511a
@misc{Montgomery_2023,
title = {Mastering language models},
url = {https://towardsdatascience.com/mastering-language-models-32e1d891511a
},
journal = {Medium},
publisher = {Towards Data Science},
author = {Montgomery, Samuel},
year = {2023},
month = {Oct},
}
@misc{yang2023harnessing,
title = {Harnessing the Power of LLMs in Practice: A Survey on ChatGPT and
Beyond},
author = {Jingfeng Yang and Hongye Jin and Ruixiang Tang and Xiaotian Han
and Qizhang Feng and Haoming Jiang and Bing Yin and Xia Hu},
year = {2023},
eprint = {2304.13712},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@article{Popel_2018,
doi = {10.2478/pralin-2018-0002},
url = {https://doi.org/10.2478%2Fpralin-2018-0002},
year = 2018,
month = {apr},
publisher = {Charles University in Prague, Karolinum Press},
volume = {110},
number = {1},
pages = {43--70},
author = {Martin Popel and Ond{\v{r}}ej Bojar},
title = {Training Tips for the Transformer Model},
journal = {The Prague Bulletin of Mathematical Linguistics},
}
@misc{vaswani2017attention,
title = {Attention Is All You Need},
author = {Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob
Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and
Illia Polosukhin},
year = {2017},
eprint = {1706.03762},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@misc{yao2023tree,
title = {Tree of Thoughts: Deliberate Problem Solving with Large Language
Models},
author = {Shunyu Yao and Dian Yu and Jeffrey Zhao and Izhak Shafran and
Thomas L. Griffiths and Yuan Cao and Karthik Narasimhan},
year = {2023},
eprint = {2305.10601},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@article{Zvyagin2022.10.10.511571,
author = {Maxim Zvyagin and Alexander Brace and Kyle Hippe and Yuntian Deng
and Bin Zhang and Cindy Orozco Bohorquez and Austin Clyde and
Bharat Kale and Danilo Perez-Rivera and Heng Ma and Carla M. Mann
and Michael Irvin and J. Gregory Pauloski and Logan Ward and
Valerie Hayot-Sasson and Murali Emani and Sam Foreman and Zhen Xie
and Diangen Lin and Maulik Shukla and Weili Nie and Josh Romero and
Christian Dallago and Arash Vahdat and Chaowei Xiao and Thomas
Gibbs and Ian Foster and James J. Davis and Michael E. Papka and
Thomas Brettin and Rick Stevens and Anima Anandkumar and Venkatram
Vishwanath and Arvind Ramanathan},
title = {GenSLMs: Genome-scale language models reveal SARS-CoV-2
evolutionary dynamics},
elocation-id = {2022.10.10.511571},
year = {2022},
doi = {10.1101/2022.10.10.511571},
publisher = {Cold Spring Harbor Laboratory},
abstract = {We seek to transform how new and emergent variants of
pandemiccausing viruses, specifically SARS-CoV-2, are identified
and classified. By adapting large language models (LLMs) for
genomic data, we build genome-scale language models (GenSLMs)
which can learn the evolutionary landscape of SARS-CoV-2 genomes.
By pretraining on over 110 million prokaryotic gene sequences and
finetuning a SARS-CoV-2-specific model on 1.5 million genomes, we
show that GenSLMs can accurately and rapidly identify variants of
concern. Thus, to our knowledge, GenSLMs represents one of the
first whole genome scale foundation models which can generalize
to other prediction tasks. We demonstrate scaling of GenSLMs on
GPU-based supercomputers and AI-hardware accelerators utilizing
1.63 Zettaflops in training runs with a sustained performance of
121 PFLOPS in mixed precision and peak of 850 PFLOPS. We present
initial scientific insights from examining GenSLMs in tracking
evolutionary dynamics of SARS-CoV-2, paving the path to realizing
this on large biological data.Competing Interest StatementThe
authors have declared no competing interest.},
URL = {https://www.biorxiv.org/content/early/2022/11/23/2022.10.10.511571},
eprint = {
https://www.biorxiv.org/content/early/2022/11/23/2022.10.10.511571.full.pdf
},
journal = {bioRxiv},
}