reference.bib

@article{Hinton06,
  author = {Hinton, Geoffrey E. and Osindero, Simon and Teh, Yee Whye},
  journal = {Neural Computation},
  pages = {1527--1554},
  title = {A Fast Learning Algorithm for Deep Belief Nets},
  volume = {18},
  year = {2006}
}

@book{goodfellow2016deep,
  title = {Deep learning},
  author = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua},
  volume = {1},
  year = {2016},
  publisher = {MIT Press}
}

@article{wang2023far,
  title = {How Far Can Camels Go? Exploring the State of Instruction Tuning on Open Resources},
  author = {Wang, Yizhong and Ivison, Hamish and Dasigi, Pradeep and Hessel, Jack and Khot, Tushar and Chandu, Khyathi Raghavi and Wadden, David and MacMillan, Kelsey and Smith, Noah A and Beltagy, Iz and others},
  journal = {arXiv preprint arXiv:2306.04751},
  year = {2023}
}

@article{kuznetsova2020open,
  title = {The open images dataset v4: Unified image classification, object detection, and visual relationship detection at scale},
  author = {Kuznetsova, Alina and Rom, Hassan and Alldrin, Neil and Uijlings, Jasper and Krasin, Ivan and Pont-Tuset, Jordi and Kamali, Shahab and Popov, Stefan and Malloci, Matteo and Kolesnikov, Alexander and others},
  journal = {International Journal of Computer Vision},
  volume = {128},
  number = {7},
  pages = {1956--1981},
  year = {2020},
  publisher = {Springer}
}

@inproceedings{biten2022let,
  title = {Let there be a clock on the beach: Reducing object hallucination in image captioning},
  author = {Biten, Ali Furkan and G{\'o}mez, Llu{\'\i}s and Karatzas, Dimosthenis},
  booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
  pages = {1381--1390},
  year = {2022}
}
@inproceedings{macleod2017understanding,
  title = {Understanding blind people's experiences with computer-generated captions of social media images},
  author = {MacLeod, Haley and Bennett, Cynthia L and Morris, Meredith Ringel and Cutrell, Edward},
  booktitle = {proceedings of the 2017 CHI conference on human factors in computing systems},
  pages = {5988--5999},
  year = {2017}
}

@article{shi2023replug,
  title = {Replug: Retrieval-augmented black-box language models},
  author = {Shi, Weijia and Min, Sewon and Yasunaga, Michihiro and Seo, Minjoon and James, Rich and Lewis, Mike and Zettlemoyer, Luke and Yih, Wen-tau},
  journal = {arXiv preprint arXiv:2301.12652},
  year = {2023}
}

@article{mckenna2023sources,
  title = {Sources of Hallucination by Large Language Models on Inference Tasks},
  author = {McKenna, Nick and Li, Tianyi and Cheng, Liang and Hosseini, Mohammad Javad and Johnson, Mark and Steedman, Mark},
  journal = {arXiv preprint arXiv:2305.14552},
  year = {2023}
}

@article{glaese2022improving,
  title = {Improving alignment of dialogue agents via targeted human judgements},
  author = {Glaese, Amelia and McAleese, Nat and Tr{\k{e}}bacz, Maja and Aslanides, John and Firoiu, Vlad and Ewalds, Timo and Rauh, Maribeth and Weidinger, Laura and Chadwick, Martin and Thacker, Phoebe and others},
  journal = {arXiv preprint arXiv:2209.14375},
  year = {2022}
}

@article{ouyang2022training,
  title = {Training language models to follow instructions with human feedback},
  author = {Ouyang, Long and Wu, Jeffrey and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and others},
  journal = {Advances in Neural Information Processing Systems},
  volume = {35},
  pages = {27730--27744},
  year = {2022}
}

@article{bai2022training,
  title = {Training a helpful and harmless assistant with reinforcement learning from human feedback},
  author = {Bai, Yuntao and Jones, Andy and Ndousse, Kamal and Askell, Amanda and Chen, Anna and DasSarma, Nova and Drain, Dawn and Fort, Stanislav and Ganguli, Deep and Henighan, Tom and others},
  journal = {arXiv preprint arXiv:2204.05862},
  year = {2022}
}

@article{OpenAI2023GPT4TR,
  title = {GPT-4 Technical Report},
  author = {OpenAI},
  journal = {ArXiv},
  year = {2023},
  volume = {abs/2303.08774}
}
@article{zhu2023minigpt,
  title = {MiniGPT-4: Enhancing Vision-Language Understanding with Advanced Large Language Models},
  author = {Zhu, Deyao and Chen, Jun and Shen, Xiaoqian and Li, Xiang and Elhoseiny, Mohamed},
  journal = {arXiv preprint arXiv:2304.10592},
  year = {2023}
}
@article{li2023otter,
  title = {Otter: A Multi-Modal Model with In-Context Instruction Tuning},
  author = {Li, Bo and Zhang, Yuanhan and Chen, Liangyu and Wang, Jinghao and Yang, Jingkang and Liu, Ziwei},
  journal = {arXiv preprint arXiv:2305.03726},
  year = {2023}
}

@article{gao2023llama,
  title = {Llama-adapter v2: Parameter-efficient visual instruction model},
  author = {Gao, Peng and Han, Jiaming and Zhang, Renrui and Lin, Ziyi and Geng, Shijie and Zhou, Aojun and Zhang, Wei and Lu, Pan and He, Conghui and Yue, Xiangyu and others},
  journal = {arXiv preprint arXiv:2304.15010},
  year = {2023}
}
Ask AI to edit or generate...

@misc{zhang2023llamaadapter,
  title = {LLaMA-Adapter: Efficient Fine-tuning of Language Models with Zero-init Attention},
  author = {Renrui Zhang and Jiaming Han and Chris Liu and Peng Gao and Aojun Zhou and Xiangfei Hu and Shilin Yan and Pan Lu and Hongsheng Li and Yu Qiao},
  year = {2023},
  eprint = {2303.16199},
  archiveprefix = {arXiv},
  primaryclass = {cs.CV}
}

@article{brown2020language,
  title = {Language models are few-shot learners},
  author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others},
  journal = {Advances in neural information processing systems},
  volume = {33},
  pages = {1877--1901},
  year = {2020}
}

@article{chiang2023vicuna,
  title = {Vicuna: An open-source chatbot impressing gpt-4 with 90\%* chatgpt quality},
  author = {Chiang, Wei-Lin and Li, Zhuohan and Lin, Zi and Sheng, Ying and Wu, Zhanghao and Zhang, Hao and Zheng, Lianmin and Zhuang, Siyuan and Zhuang, Yonghao and Gonzalez, Joseph E and others},
  journal = {See https://vicuna. lmsys. org (accessed 14 April 2023)},
  year = {2023}
}

@article{dai2023instructblip,
  title = {Instructblip: Towards general-purpose vision-language models with instruction tuning},
  author = {Dai, Wenliang and Li, Junnan and Li, Dongxu and Tiong, Anthony Meng Huat and Zhao, Junqi and Wang, Weisheng and Li, Boyang and Fung, Pascale and Hoi, Steven},
  journal = {arXiv preprint arXiv:2305.06500},
  year = {2023}
}

@inproceedings{du2022glm,
  title = {GLM: General Language Model Pretraining with Autoregressive Blank Infilling},
  author = {Du, Zhengxiao and Qian, Yujie and Liu, Xiao and Ding, Ming and Qiu, Jiezhong and Yang, Zhilin and Tang, Jie},
  booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  pages = {320--335},
  year = {2022}
}

@article{longpre2023flan,
  title = {The flan collection: Designing data and methods for effective instruction tuning},
  author = {Longpre, Shayne and Hou, Le and Vu, Tu and Webson, Albert and Chung, Hyung Won and Tay, Yi and Zhou, Denny and Le, Quoc V and Zoph, Barret and Wei, Jason and others},
  journal = {arXiv preprint arXiv:2301.13688},
  year = {2023}
}

@article{liu2023mmbench,
  title = {MMBench: Is Your Multi-modal Model an All-around Player?},
  author = {Liu, Yuan and Duan, Haodong and Zhang, Yuanhan and Li, Bo and Zhang, Songyang and Zhao, Wangbo and Yuan, Yike and Wang, Jiaqi and He, Conghui and Liu, Ziwei and others},
  journal = {arXiv preprint arXiv:2307.06281},
  year = {2023}
}

@article{hu2021lora,
  title = {Lora: Low-rank adaptation of large language models},
  author = {Hu, Edward J and Shen, Yelong and Wallis, Phillip and Allen-Zhu, Zeyuan and Li, Yuanzhi and Wang, Shean and Wang, Lu and Chen, Weizhu},
  journal = {arXiv preprint arXiv:2106.09685},
  year = {2021}
}

@article{ye2023mplug,
  title = {mplug-owl: Modularization empowers large language models with multimodality},
  author = {Ye, Qinghao and Xu, Haiyang and Xu, Guohai and Ye, Jiabo and Yan, Ming and Zhou, Yiyang and Wang, Junyang and Hu, Anwen and Shi, Pengcheng and Shi, Yaya and others},
  journal = {arXiv preprint arXiv:2304.14178},
  year = {2023}
}

@article{li2023blip,
  title = {Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models},
  author = {Li, Junnan and Li, Dongxu and Savarese, Silvio and Hoi, Steven},
  journal = {arXiv preprint arXiv:2301.12597},
  year = {2023}
}

@article{radford2019language,
  title = {Language models are unsupervised multitask learners},
  author = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya and others},
  journal = {OpenAI blog},
  volume = {1},
  number = {8},
  pages = {9},
  year = {2019}
}

@article{li2023mimic,
  title = {MIMIC-IT: Multi-Modal In-Context Instruction Tuning},
  author = {Bo Li and Yuanhan Zhang and Liangyu Chen and Jinghao Wang and Fanyi Pu and Jingkang Yang and Chunyuan Li and Ziwei Liu},
  year = {2023},
  eprint = {2306.05425},
  archiveprefix = {arXiv},
  primaryclass = {cs.CV}
}

@inproceedings{vqav2,
  title = {Making the v in vqa matter: Elevating the role of image understanding in visual question answering},
  author = {Goyal, Yash and Khot, Tejas and Summers-Stay, Douglas and Batra, Dhruv and Parikh, Devi},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages = {6904--6913},
  year = {2017}
}
@article{coco_caption,
  title = {Microsoft coco captions: Data collection and evaluation server},
  author = {Chen, Xinlei and Fang, Hao and Lin, Tsung-Yi and Vedantam, Ramakrishna and Gupta, Saurabh and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  journal = {arXiv preprint arXiv:1504.00325},
  year = {2015}
}
@inproceedings{hudson2019gqa,
  title = {Gqa: A new dataset for real-world visual reasoning and compositional question answering},
  author = {Hudson, Drew A and Manning, Christopher D},
  booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  pages = {6700--6709},
  year = {2019}
}
@inproceedings{ok-vqa,
  title = {Ok-vqa: A visual question answering benchmark requiring external knowledge},
  author = {Marino, Kenneth and Rastegari, Mohammad and Farhadi, Ali and Mottaghi, Roozbeh},
  booktitle = {Proceedings of the IEEE/cvf conference on computer vision and pattern recognition},
  pages = {3195--3204},
  year = {2019}
}

@inproceedings{Xu2023LVLMeHubAC,
  title = {LVLM-eHub: A Comprehensive Evaluation Benchmark for Large Vision-Language Models},
  author = {Peng Xu and Wenqi Shao and Kaipeng Zhang and Peng Gao and Shuo Liu and Meng Lei and Fanqing Meng and Siyuan Huang and Yu Jiao Qiao and Ping Luo},
  year = {2023}
}
@misc{zheng2023judging,
  title = {Judging LLM-as-a-judge with MT-Bench and Chatbot Arena},
  author = {Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Siyuan Zhuang and Zhanghao Wu and Yonghao Zhuang and Zi Lin and Zhuohan Li and Dacheng Li and Eric. P Xing and Hao Zhang and Joseph E. Gonzalez and Ion Stoica},
  year = {2023},
  eprint = {2306.05685},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL}
}
@book{oaksford2007bayesian,
  title = {Bayesian rationality: The probabilistic approach to human reasoning},
  author = {Oaksford, Mike and Chater, Nick},
  year = {2007},
  publisher = {Oxford University Press}
}
@book{fodor1983modularity,
  title = {The modularity of mind},
  author = {Fodor, Jerry A},
  year = {1983},
  publisher = {MIT press}
}
@inproceedings{nocaps,
  title = {Nocaps: Novel object captioning at scale},
  author = {Agrawal, Harsh and Desai, Karan and Wang, Yufei and Chen, Xinlei and Jain, Rishabh and Johnson, Mark and Batra, Dhruv and Parikh, Devi and Lee, Stefan and Anderson, Peter},
  booktitle = {Proceedings of the IEEE/CVF international conference on computer vision},
  pages = {8948--8957},
  year = {2019}
}

@article{flickr30k,
  title = {From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions},
  author = {Young, Peter and Lai, Alice and Hodosh, Micah and Hockenmaier, Julia},
  journal = {Transactions of the Association for Computational Linguistics},
  volume = {2},
  pages = {67--78},
  year = {2014},
  publisher = {MIT Press}
}
@inproceedings{vizwiz,
  title = {Vizwiz grand challenge: Answering visual questions from blind people},
  author = {Gurari, Danna and Li, Qing and Stangl, Abigale J and Guo, Anhong and Lin, Chi and Grauman, Kristen and Luo, Jiebo and Bigham, Jeffrey P},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages = {3608--3617},
  year = {2018}
}
@inproceedings{textvqa,
  title = {Towards vqa models that can read},
  author = {Singh, Amanpreet and Natarajan, Vivek and Shah, Meet and Jiang, Yu and Chen, Xinlei and Batra, Dhruv and Parikh, Devi and Rohrbach, Marcus},
  booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  pages = {8317--8326},
  year = {2019}
}
@article{scienceqa,
  title = {Learn to explain: Multimodal reasoning via thought chains for science question answering},
  author = {Lu, Pan and Mishra, Swaroop and Xia, Tanglin and Qiu, Liang and Chang, Kai-Wei and Zhu, Song-Chun and Tafjord, Oyvind and Clark, Peter and Kalyan, Ashwin},
  journal = {Advances in Neural Information Processing Systems},
  volume = {35},
  pages = {2507--2521},
  year = {2022}
}
@inproceedings{youcook2,
  title = {Towards automatic learning of procedures from web instructional videos},
  author = {Zhou, Luowei and Xu, Chenliang and Corso, Jason},
  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
  volume = {32},
  number = {1},
  year = {2018}
}

@article{laurenccon2023obelisc,
  title = {OBELISC: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents},
  author = {Lauren{\c{c}}on, Hugo and Saulnier, Lucile and Tronchon, L{\'e}o and Bekman, Stas and Singh, Amanpreet and Lozhkov, Anton and Wang, Thomas and Karamcheti, Siddharth and Rush, Alexander M and Kiela, Douwe and others},
  journal = {arXiv preprint arXiv:2306.16527},
  year = {2023}
}
@inproceedings{singh2019towards,
  title = {Towards VQA Models That Can Read},
  author = {Singh, Amanpreet and Natarjan, Vivek and Shah, Meet and Jiang, Yu and Chen, Xinlei and Parikh, Devi and Rohrbach, Marcus},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages = {8317-8326},
  year = {2019}
}

@article{zhou2017places,
  title = {Places: A 10 million Image Database for Scene Recognition},
  author = {Zhou, Bolei and Lapedriza, Agata and Khosla, Aditya and Oliva, Aude and Torralba, Antonio},
  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  year = {2017},
  publisher = {IEEE}
}

@misc{2023internlm,
  title = {InternLM: A Multilingual Language Model with Progressively Enhanced Capabilities},
  author = {InternLM Team},
  howpublished = {\url{https://github.com/InternLM/InternLM-techreport}},
  year = {2023}
}

@inproceedings{W3Cschool,
  title = {W3C school},
  booktitle = {https://www.w3schools.com/},
  year = {2023}
}

@misc{ARAS,
  doi = {10.48550/ARXIV.2209.09393},
  url = {https://arxiv.org/abs/2209.09393},
  author = {Duan, Haodong and Zhao, Yue and Chen, Kai and Xiong, Yuanjun and Lin, Dahua},
  keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
  title = {Mitigating Representation Bias in Action Recognition: Algorithms and Benchmarks},
  publisher = {arXiv},
  year = {2022},
  copyright = {arXiv.org perpetual, non-exclusive license}
}

@inproceedings{johnson2017clevr,
  title = {Clevr: A diagnostic dataset for compositional language and elementary visual reasoning},
  author = {Johnson, Justin and Hariharan, Bharath and Van Der Maaten, Laurens and Fei-Fei, Li and Lawrence Zitnick, C and Girshick, Ross},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages = {2901--2910},
  year = {2017}
}

@inproceedings{li2017dual,
  title = {Dual-glance model for deciphering social relationships},
  author = {Li, Junnan and Wong, Yongkang and Zhao, Qi and Kankanhalli, Mohan S},
  booktitle = {Proceedings of the IEEE international conference on computer vision},
  pages = {2650--2659},
  year = {2017}
}

@article{koniq10k,
  author = {V. {Hosu} and H. {Lin} and T. {Sziranyi} and D.
            {Saupe}},
  journal = {IEEE Transactions on Image Processing},
  title = {KonIQ-10k: An Ecologically Valid Database for Deep
           Learning of Blind Image Quality Assessment},
  year = {2020},
  volume = {29},
  pages = {4041-4056}
}

@article{Liu2022VisualSR,
  title = {Visual Spatial Reasoning},
  author = {Fangyu Liu and Guy Edward Toh Emerson and Nigel Collier},
  journal = {Transactions of the Association for Computational Linguistics},
  year = {2023}
}

@article{huang2023c,
  title = {C-eval: A multi-level multi-discipline chinese evaluation suite for foundation models},
  author = {Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and others},
  journal = {arXiv preprint arXiv:2305.08322},
  year = {2023}
}
@article{hendrycks2020measuring,
  title = {Measuring massive multitask language understanding},
  author = {Hendrycks, Dan and Burns, Collin and Basart, Steven and Zou, Andy and Mazeika, Mantas and Song, Dawn and Steinhardt, Jacob},
  journal = {arXiv preprint arXiv:2009.03300},
  year = {2020}
}

@misc{gong2023multimodalgpt,
  title = {MultiModal-GPT: A Vision and Language Model for Dialogue with Humans},
  author = {Tao Gong and Chengqi Lyu and Shilong Zhang and Yudong Wang and Miao Zheng and Qian Zhao and Kuikun Liu and Wenwei Zhang and Ping Luo and Kai Chen},
  year = {2023},
  eprint = {2305.04790},
  archiveprefix = {arXiv},
  primaryclass = {cs.CV}
}

@article{chen2023shikra,
  title = {Shikra: Unleashing Multimodal LLM's Referential Dialogue Magic},
  author = {Chen, Keqin and Zhang, Zhao and Zeng, Weili and Zhang, Richong and Zhu, Feng and Zhao, Rui},
  journal = {arXiv preprint arXiv:2306.15195},
  year = {2023}
}

@article{peng2023kosmos,
  title = {Kosmos-2: Grounding Multimodal Large Language Models to the World},
  author = {Peng, Zhiliang and Wang, Wenhui and Dong, Li and Hao, Yaru and Huang, Shaohan and Ma, Shuming and Wei, Furu},
  journal = {arXiv preprint arXiv:2306.14824},
  year = {2023}
}

@article{su2023pandagpt,
  title = {PandaGPT: One Model To Instruction-Follow Them All},
  author = {Su, Yixuan and Lan, Tian and Li, Huayang and Xu, Jialu and Wang, Yan and Cai, Deng},
  journal = {arXiv preprint arXiv:2305.16355},
  year = {2023}
}

@article{Fu2023MMEAC,
  title = {MME: A Comprehensive Evaluation Benchmark for Multimodal Large Language Models},
  author = {Chaoyou Fu and Peixian Chen and Yunhang Shen and Yulei Qin and Mengdan Zhang and Xu Lin and Zhenyu Qiu and Wei Lin and Jinrui Yang and Xiawu Zheng and Ke Li and Xing Sun and Rongrong Ji},
  journal = {ArXiv},
  year = {2023},
  volume = {abs/2306.13394},
  url = {https://api.semanticscholar.org/CorpusID:259243928}
}

@article{schuhmann2021laion,
  title = {Laion-400m: Open dataset of clip-filtered 400 million image-text pairs},
  author = {Schuhmann, Christoph and Vencu, Richard and Beaumont, Romain and Kaczmarczyk, Robert and Mullis, Clayton and Katta, Aarush and Coombes, Theo and Jitsev, Jenia and Komatsuzaki, Aran},
  journal = {arXiv preprint arXiv:2111.02114},
  year = {2021}
}

@incollection{Bengio+chapter2007,
  author = {Bengio, Yoshua and LeCun, Yann},
  booktitle = {Large Scale Kernel Machines},
  publisher = {MIT Press},
  title = {Scaling Learning Algorithms Towards {AI}},
  year = {2007}
}

@article{sun2023principle,
  title = {Principle-driven self-alignment of language models from scratch with minimal human supervision},
  author = {Sun, Zhiqing and Shen, Yikang and Zhou, Qinhong and Zhang, Hongxin and Chen, Zhenfang and Cox, David and Yang, Yiming and Gan, Chuang},
  journal = {arXiv preprint arXiv:2305.03047},
  year = {2023}
}

@misc{alpaca,
  author = {Rohan Taori and Ishaan Gulrajani and Tianyi Zhang and Yann Dubois and Xuechen Li and Carlos Guestrin and Percy Liang and Tatsunori B. Hashimoto },
  title = {Stanford Alpaca: An Instruction-following LLaMA model},
  year = {2023},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/tatsu-lab/stanford_alpaca}}
}

@misc{textdavinci,
  author = {OpenAI},
  title = {Model index for researchers},
  year = 2023,
  howpublished = {\url{https://platform.openai.com/docs/model-index-for-researchers}}
}

@article{devlin2018bert,
  title = {{BERT}: {P}re-training of deep bidirectional transformers for language understanding},
  author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
  journal = {arXiv preprint arXiv:1810.04805},
  year = {2018}
}

@article{liu2019roberta,
  title = {{Ro{BERT}a: A robustly optimized {BERT} pretraining approach}},
  author = {Liu, Yinhan and Ott, Myle and Goyal, Naman and Du, Jingfei and Joshi, Mandar and Chen, Danqi and Levy, Omer and Lewis, Mike and Zettlemoyer, Luke and Stoyanov, Veselin},
  journal = {arXiv preprint arXiv:1907.11692},
  year = {2019}
}

@article{wei2022chain,
  title = {Chain-of-thought prompting elicits reasoning in large language models},
  author = {Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Chi, Ed and Le, Quoc and Zhou, Denny},
  journal = {NeurIPS},
  year = {2022}
}

@article{raffel2019exploring,
  title = {Exploring the limits of transfer learning with a unified text-to-text transformer},
  author = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J},
  journal = {arXiv preprint arXiv:1910.10683},
  year = {2019}
}

@article{wei2022emergent,
  title = {Emergent abilities of large language models},
  author = {Wei, Jason and Tay, Yi and Bommasani, Rishi and Raffel, Colin and Zoph, Barret and Borgeaud, Sebastian and Yogatama, Dani and Bosma, Maarten and Zhou, Denny and Metzler, Donald and others},
  journal = {TMLR},
  year = {2022}
}

@article{srivastava2022beyond,
  title = {Beyond the imitation game: Quantifying and extrapolating the capabilities of language models},
  author = {Srivastava, Aarohi and Rastogi, Abhinav and Rao, Abhishek and Shoeb, Abu Awal Md and Abid, Abubakar and Fisch, Adam and Brown, Adam R. and Santoro, Adam and Gupta, Aditya and Garriga-Alonso, Adri{\`a} and others},
  journal = {arXiv preprint arXiv:2206.04615},
  year = {2022}
}

@article{xu2023baize,
  title = {Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data},
  author = {Xu, Canwen and Guo, Daya and Duan, Nan and McAuley, Julian},
  journal = {arXiv preprint arXiv:2304.01196},
  year = {2023}
}

@incollection{pytorch,
  title = {Py{T}orch: An Imperative Style, High-Performance Deep Learning Library},
  author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and Desmaison, Alban and Kopf, Andreas and Yang, Edward and DeVito, Zachary and Raison, Martin and Tejani, Alykhan and Chilamkurthy, Sasank and Steiner, Benoit and Fang, Lu and Bai, Junjie and Chintala, Soumith},
  booktitle = {Advances in Neural Information Processing Systems 32},
  editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett},
  pages = {8024--8035},
  year = {2019},
  publisher = {Curran Associates, Inc.},
  url = {http://papers.neurips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf}
}

@article{zhou2022least,
  title = {Least-to-most prompting enables complex reasoning in large language models},
  author = {Zhou, Denny and Sch{\"a}rli, Nathanael and Hou, Le and Wei, Jason and Scales, Nathan and Wang, Xuezhi and Schuurmans, Dale and Bousquet, Olivier and Le, Quoc and Chi, Ed},
  journal = {arXiv preprint arXiv:2205.10625},
  year = {2022}
}

@article{dai2019transformer,
  title = {Transformer-{XL}: Attentive language models beyond a fixed-length context},
  author = {Dai, Zihang and Yang, Zhilin and Yang, Yiming and Carbonell, Jaime and Le, Quoc V. and Salakhutdinov, Ruslan},
  journal = {arXiv preprint arXiv:1901.02860},
  year = {2019}
}

@article{kingma2014adam,
  title = {Adam: A method for stochastic optimization},
  author = {Kingma, Diederik P. and Ba, Jimmy},
  journal = {arXiv preprint arXiv:1412.6980},
  year = {2014}
}

@incollection{lecun2012efficient,
  title = {Efficient backprop},
  author = {LeCun, Yann A. and Bottou, L{\'e}on and Orr, Genevieve B and M{\"u}ller, Klaus-Robert},
  booktitle = {Neural networks: Tricks of the trade},
  pages = {9--48},
  year = {2012},
  publisher = {Springer}
}

@article{ba2016layer,
  title = {Layer normalization},
  author = {Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey E.},
  journal = {arXiv preprint arXiv:1607.06450},
  year = {2016}
}

@article{kirkpatrick2017overcoming,
  title = {Overcoming catastrophic forgetting in neural networks},
  author = {Kirkpatrick, James and Pascanu, Razvan and Rabinowitz, Neil and Veness, Joel and Desjardins, Guillaume and Rusu, Andrei A and Milan, Kieran and Quan, John and Ramalho, Tiago and Grabska-Barwinska, Agnieszka and others},
  journal = {Proceedings of the national academy of sciences},
  volume = {114},
  number = {13},
  pages = {3521--3526},
  year = {2017},
  publisher = {National Acad Sciences}
}

@article{vaswani2017attention,
  title = {Attention is all you need},
  author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
  journal = {NeurIPS},
  year = {2017}
}

@inproceedings{he2016deep,
  title = {Deep residual learning for image recognition},
  author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages = {770--778},
  year = {2016}
}

@article{steel1960principles,
  title = {Principles and procedures of statistics},
  author = {Steel, Robert George Douglas and Torrie, James Hiram and others},
  journal = {Principles and procedures of statistics.},
  year = {1960},
  publisher = {McGraw-Hill Book Company, Inc., New York, Toronto, London}
}

@article{humphreys2022data,
  title = {A data-driven approach for learning to control computers},
  author = {Humphreys, Peter C and Raposo, David and Pohlen, Toby and Thornton, Gregory and Chhaparia, Rachita and Muldal, Alistair and Abramson, Josh and Georgiev, Petko and Goldin, Alex and Santoro, Adam and others},
  journal = {arXiv preprint arXiv:2202.08137},
  year = {2022}
}

@article{ho2016generative,
  title = {Generative adversarial imitation learning},
  author = {Ho, Jonathan and Ermon, Stefano},
  journal = {Advances in Neural Information Processing Systems},
  volume = {29},
  year = {2016}
}

@inproceedings{codevilla2018end,
  title = {End-to-end driving via conditional imitation learning},
  author = {Codevilla, Felipe and M{\"u}ller, Matthias and L{\'o}pez, Antonio and Koltun, Vladlen and Dosovitskiy, Alexey},
  booktitle = {2018 IEEE international conference on robotics and automation (ICRA)},
  pages = {4693--4700},
  year = {2018},
  organization = {IEEE}
}

// Nividia Self Driving
@article{bojarski2016end,
  title = {End to end learning for self-driving cars},
  author = {Bojarski, Mariusz and Del Testa, Davide and Dworakowski, Daniel and Firner, Bernhard and Flepp, Beat and Goyal, Prasoon and Jackel, Lawrence D and Monfort, Mathew and Muller, Urs and Zhang, Jiakai and others},
  journal = {arXiv preprint arXiv:1604.07316},
  year = {2016}
}

@article{coulom2007computing,
  title = {Computing “{E}lo ratings” of move patterns in the game of {G}o},
  author = {Coulom, R{\'e}mi},
  journal = {ICGA journal},
  volume = {30},
  number = {4},
  pages = {198--208},
  year = {2007},
  publisher = {IOS Press}
}

@inproceedings{hester2018deep,
  title = {Deep q-learning from demonstrations},
  author = {Hester, Todd and Vecerik, Matej and Pietquin, Olivier and Lanctot, Marc and Schaul, Tom and Piot, Bilal and Horgan, Dan and Quan, John and Sendonaris, Andrew and Osband, Ian and others},
  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
  volume = {32},
  number = {1},
  year = {2018}
}

@inproceedings{oh2016control,
  title = {Control of memory, active perception, and action in {M}inecraft},
  author = {Oh, Junhyuk and Chockalingam, Valliappa and Lee, Honglak and others},
  booktitle = {International Conference on Machine Learning},
  pages = {2790--2799},
  year = {2016},
  organization = {PMLR}
}

@article{kanitscheider2021multi,
  title = {Multi-task curriculum learning in a complex, visual, hard-exploration domain: {M}inecraft},
  author = {Kanitscheider, Ingmar and Huizinga, Joost and Farhi, David and Guss, William Hebgen and Houghton, Brandon and Sampedro, Raul and Zhokhov, Peter and Baker, Bowen and Ecoffet, Adrien and Tang, Jie and others},
  journal = {arXiv preprint arXiv:2106.14876},
  year = {2021}
}

@inproceedings{scheller2020sample,
  title = {Sample efficient reinforcement learning through learning from demonstrations in {M}inecraft},
  author = {Scheller, Christian and Schraner, Yanick and Vogel, Manfred},
  booktitle = {NeurIPS 2019 Competition and Demonstration Track},
  pages = {67--76},
  year = {2020},
  organization = {PMLR}
}

@article{guss2019minerl,
  title = {Mine{RL}: A large-scale dataset of {M}inecraft demonstrations},
  author = {Guss, William H and Houghton, Brandon and Topin, Nicholay and Wang, Phillip and Codel, Cayden and Veloso, Manuela and Salakhutdinov, Ruslan},
  journal = {arXiv preprint arXiv:1907.13440},
  year = {2019}
}

@inproceedings{tessler2017deep,
  title = {A deep hierarchical approach to lifelong learning in {M}inecraft},
  author = {Tessler, Chen and Givony, Shahar and Zahavy, Tom and Mankowitz, Daniel and Mannor, Shie},
  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
  volume = {31},
  number = {1},
  year = {2017}
}

@article{twinfinite2021played,
  author = {Twinfinite Staff},
  date = {2021-12-18},
  title = {Most Played Games in 2021, Ranked by Peak Concurrent Players},
  journal = {Twinfinite},
  url = {https://twinfinite.net/2021/12/most-played-games-in-2020-ranked-by-peak-concurrent-players/},
  urldate = {2021-12-18}
}

@article{burda2018exploration,
  title = {Exploration by random network distillation},
  author = {Burda, Yuri and Edwards, Harrison and Storkey, Amos and Klimov, Oleg},
  journal = {arXiv preprint arXiv:1810.12894},
  year = {2018}
}

@article{bellemare2016unifying,
  title = {Unifying count-based exploration and intrinsic motivation},
  author = {Bellemare, Marc and Srinivasan, Sriram and Ostrovski, Georg and Schaul, Tom and Saxton, David and Munos, Remi},
  journal = {Advances in Neural Information Processing Systems},
  volume = {29},
  year = {2016}
}

@article{ecoffet2021first,
  title = {First return, then explore},
  author = {Ecoffet, Adrien and Huizinga, Joost and Lehman, Joel and Stanley, Kenneth O and Clune, Jeff},
  journal = {Nature},
  volume = {590},
  number = {7847},
  pages = {580--586},
  year = {2021},
  publisher = {Nature Publishing Group}
}

@book{sutton2018reinforcement,
  title = {Reinforcement learning: An introduction},
  author = {Sutton, Richard S and Barto, Andrew G},
  year = {2018},
  publisher = {MIT press}
}

@article{jaderberg2019human,
  title = {Human-level performance in 3{D} multiplayer games with population-based reinforcement learning},
  author = {Jaderberg, Max and Czarnecki, Wojciech M and Dunning, Iain and Marris, Luke and Lever, Guy and Castaneda, Antonio Garcia and Beattie, Charles and Rabinowitz, Neil C and Morcos, Ari S and Ruderman, Avraham and others},
  journal = {Science},
  volume = {364},
  number = {6443},
  pages = {859--865},
  year = {2019},
  publisher = {American Association for the Advancement of Science}
}

@inproceedings{bender2021dangers,
  title = {On the Dangers of Stochastic Parrots: Can Language Models Be Too Big?},
  author = {Bender, Emily M and Gebru, Timnit and McMillan-Major, Angelina and Shmitchell, Shmargaret},
  booktitle = {Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency},
  pages = {610--623},
  year = {2021}
}

@article{dauphin2014identifying,
  title = {Identifying and attacking the saddle point problem in high-dimensional non-convex optimization},
  author = {Dauphin, Yann N and Pascanu, Razvan and Gulcehre, Caglar and Cho, Kyunghyun and Ganguli, Surya and Bengio, Yoshua},
  journal = {Advances in Neural Information Processing Systems},
  volume = {27},
  year = {2014}
}
@article{yosinski2014transferable,
  title = {How transferable are features in deep neural networks?},
  author = {Yosinski, Jason and Clune, Jeff and Bengio, Yoshua and Lipson, Hod},
  journal = {Advances in Neural Information Processing Systems},
  volume = {27},
  year = {2014}
}
@article{baker2019emergent,
  title = {Emergent tool use from multi-agent autocurricula},
  author = {Baker, Bowen and Kanitscheider, Ingmar and Markov, Todor and Wu, Yi and Powell, Glenn and McGrew, Bob and Mordatch, Igor},
  journal = {arXiv preprint arXiv:1909.07528},
  year = {2019}
}

@article{berner2019dota,
  title = {Dota 2 with large scale deep reinforcement learning},
  author = {Berner, Christopher and Brockman, Greg and Chan, Brooke and Cheung, Vicki and D{\k{e}}biak, Przemys{\l}aw and Dennison, Christy and Farhi, David and Fischer, Quirin and Hashme, Shariq and Hesse, Chris and others},
  journal = {arXiv preprint arXiv:1912.06680},
  year = {2019}
}

@article{team2021open,
  title = {Open-ended learning leads to generally capable agents},
  author = {Team, Open Ended Learning and Stooke, Adam and Mahajan, Anuj and Barros, Catarina and Deck, Charlie and Bauer, Jakob and Sygnowski, Jakub and Trebacz, Maja and Jaderberg, Max and Mathieu, Michael and others},
  journal = {arXiv preprint arXiv:2107.12808},
  year = {2021}
}

@inproceedings{radford2021learning,
  title = {Learning transferable visual models from natural language supervision},
  author = {Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and Goh, Gabriel and Agarwal, Sandhini and Sastry, Girish and Askell, Amanda and Mishkin, Pamela and Clark, Jack and others},
  booktitle = {International Conference on Machine Learning},
  pages = {8748--8763},
  year = {2021},
  organization = {PMLR}
}
@article{ramesh2022hierarchical,
  title = {Hierarchical text-conditional image generation with {CLIP} latents},
  author = {Ramesh, Aditya and Dhariwal, Prafulla and Nichol, Alex and Chu, Casey and Chen, Mark},
  journal = {arXiv preprint arXiv:2204.06125},
  year = {2022}
}

@article{giusti2015machine,
  title = {A machine learning approach to visual perception of forest trails for mobile robots},
  author = {Giusti, Alessandro and Guzzi, J{\'e}r{\^o}me and Cire{\c{s}}an, Dan C and He, Fang-Lin and Rodr{\'\i}guez, Juan P and Fontana, Flavio and Faessler, Matthias and Forster, Christian and Schmidhuber, J{\"u}rgen and Di Caro, Gianni and others},
  journal = {IEEE Robotics and Automation Letters},
  volume = {1},
  number = {2},
  pages = {661--667},
  year = {2015},
  publisher = {IEEE}
}

@incollection{sammut1992learning,
  title = {Learning to fly},
  author = {Sammut, Claude and Hurst, Scott and Kedzier, Dana and Michie, Donald},
  booktitle = {Machine Learning Proceedings 1992},
  pages = {385--393},
  year = {1992},
  publisher = {Elsevier}
}

@article{hussein2017imitation,
  title = {Imitation learning: A survey of learning methods},
  author = {Hussein, Ahmed and Gaber, Mohamed Medhat and Elyan, Eyad and Jayne, Chrisina},
  journal = {ACM Computing Surveys (CSUR)},
  volume = {50},
  number = {2},
  pages = {1--35},
  year = {2017},
  publisher = {ACM New York, NY, USA}
}

@inproceedings{behbahani2019learning,
  title = {Learning from demonstration in the wild},
  author = {Behbahani, Feryal and Shiarlis, Kyriacos and Chen, Xi and Kurin, Vitaly and Kasewa, Sudhanshu and Stirbu, Ciprian and Gomes, Joao and Paul, Supratik and Oliehoek, Frans A and Messias, Joao and others},
  booktitle = {2019 International Conference on Robotics and Automation (ICRA)},
  pages = {775--781},
  year = {2019},
  organization = {IEEE}
}

@inproceedings{edwards2019imitating,
  title = {Imitating latent policies from observation},
  author = {Edwards, Ashley and Sahni, Himanshu and Schroecker, Yannick and Isbell, Charles},
  booktitle = {International conference on machine learning},
  pages = {1755--1763},
  year = {2019},
  organization = {PMLR}
}

@inproceedings{liu2018imitation,
  title = {Imitation from observation: Learning to imitate behaviors from raw video via context translation},
  author = {Liu, YuXuan and Gupta, Abhishek and Abbeel, Pieter and Levine, Sergey},
  booktitle = {2018 IEEE International Conference on Robotics and Automation (ICRA)},
  pages = {1118--1125},
  year = {2018},
  organization = {IEEE}
}

\\ AlphaGo – uses BC on expert games to initialize
@article{silver2016mastering,
  title = {Mastering the game of {G}o with deep neural networks and tree search},
  author = {Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
  journal = {Nature},
  volume = {529},
  number = {7587},
  pages = {484--489},
  year = {2016},
  publisher = {Nature Publishing Group}
}

\\ Recent ViT scaling law paper from google
@article{zhai2021scaling,
  author = {Xiaohua Zhai and
            Alexander Kolesnikov and
            Neil Houlsby and
            Lucas Beyer},
  title = {Scaling Vision Transformers},
  journal = {CoRR},
  volume = {abs/2106.04560},
  year = {2021},
  url = {https://arxiv.org/abs/2106.04560},
  eprinttype = {arXiv},
  eprint = {2106.04560},
  timestamp = {Fri, 11 Jun 2021 11:04:16 +0200},
  biburl = {https://dblp.org/rec/journals/corr/abs-2106-04560.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{kudithipudi2022biological,
  title = {Biological underpinnings for lifelong learning machines},
  author = {Kudithipudi, Dhireesha and Aguilar-Simon, Mario and Babb, Jonathan and Bazhenov, Maxim and Blackiston, Douglas and Bongard, Josh and Brna, Andrew P and Chakravarthi Raja, Suraj and Cheney, Nick and Clune, Jeff and others},
  journal = {Nature Machine Intelligence},
  volume = {4},
  number = {3},
  pages = {196--210},
  year = {2022},
  publisher = {Nature Publishing Group}
}

// Instagram pretraining paper
@inproceedings{mahajan2018exploring,
  title = {Exploring the limits of weakly supervised pretraining},
  author = {Mahajan, Dhruv and Girshick, Ross and Ramanathan, Vignesh and He, Kaiming and Paluri, Manohar and Li, Yixuan and Bharambe, Ashwin and Van Der Maaten, Laurens},
  booktitle = {Proceedings of the European conference on computer vision (ECCV)},
  pages = {181--196},
  year = {2018}
}

@article{vinyals2019grandmaster,
  title = {Grandmaster level in {S}tar{C}raft {II} using multi-agent reinforcement learning},
  author = {Vinyals, Oriol and Babuschkin, Igor and Czarnecki, Wojciech M and Mathieu, Micha{\"e}l and Dudzik, Andrew and Chung, Junyoung and Choi, David H and Powell, Richard and Ewalds, Timo and Georgiev, Petko and others},
  journal = {Nature},
  volume = {575},
  number = {7782},
  pages = {350--354},
  year = {2019},
  publisher = {Nature Publishing Group}
}

@article{bommasani2021opportunities,
  title = {On the opportunities and risks of foundation models},
  author = {Bommasani, Rishi and Hudson, Drew A and Adeli, Ehsan and Altman, Russ and Arora, Simran and von Arx, Sydney and Bernstein, Michael S and Bohg, Jeannette and Bosselut, Antoine and Brunskill, Emma and others},
  journal = {arXiv preprint arXiv:2108.07258},
  year = {2021}
}

@article{argall2009survey,
  title = {A survey of robot learning from demonstration},
  author = {Argall, Brenna D and Chernova, Sonia and Veloso, Manuela and Browning, Brett},
  journal = {Robotics and autonomous systems},
  volume = {57},
  number = {5},
  pages = {469--483},
  year = {2009},
  publisher = {Elsevier}
}

@article{schaal1999imitation,
  title = {Is imitation learning the route to humanoid robots?},
  author = {Schaal, Stefan},
  journal = {Trends in cognitive sciences},
  volume = {3},
  number = {6},
  pages = {233--242},
  year = {1999},
  publisher = {Elsevier}
}

@inproceedings{ng2000algorithms,
  title = {Algorithms for inverse reinforcement learning.},
  author = {Ng, Andrew Y and Russell, Stuart J and others},
  booktitle = {Icml},
  volume = {1},
  pages = {2},
  year = {2000}
}

@article{pomerleau1988alvinn,
  title = {Alvinn: An autonomous land vehicle in a neural network},
  author = {Pomerleau, Dean A},
  journal = {Advances in Neural Information Processing Systems},
  volume = {1},
  year = {1988}
}

@inproceedings{sermanet2018time,
  title = {Time-contrastive networks: Self-supervised learning from video},
  author = {Sermanet, Pierre and Lynch, Corey and Chebotar, Yevgen and Hsu, Jasmine and Jang, Eric and Schaal, Stefan and Levine, Sergey and Brain, Google},
  booktitle = {2018 IEEE international conference on robotics and automation (ICRA)},
  pages = {1134--1141},
  year = {2018},
  organization = {IEEE}
}

@inproceedings{calandra2015learning,
  title = {Learning inverse dynamics models with contacts},
  author = {Calandra, Roberto and Ivaldi, Serena and Deisenroth, Marc Peter and Rueckert, Elmar and Peters, Jan},
  booktitle = {2015 IEEE International Conference on Robotics and Automation (ICRA)},
  pages = {3186--3191},
  year = {2015},
  organization = {IEEE}
}

@article{christiano2016transfer,
  title = {Transfer from simulation to real world through learning deep inverse dynamics model},
  author = {Christiano, Paul and Shah, Zain and Mordatch, Igor and Schneider, Jonas and Blackwell, Trevor and Tobin, Joshua and Abbeel, Pieter and Zaremba, Wojciech},
  journal = {arXiv preprint arXiv:1610.03518},
  year = {2016}
}

@inproceedings{nguyen2008learning,
  title = {Learning inverse dynamics: a comparison},
  author = {Nguyen-Tuong, Duy and Peters, Jan and Seeger, Matthias and Sch{\"o}lkopf, Bernhard},
  booktitle = {European symposium on artificial neural networks},
  number = {CONF},
  year = {2008}
}

@article{peng2018sfv,
  title = {{SFV}: Reinforcement learning of physical skills from videos},
  author = {Peng, Xue Bin and Kanazawa, Angjoo and Malik, Jitendra and Abbeel, Pieter and Levine, Sergey},
  journal = {ACM Transactions On Graphics (TOG)},
  volume = {37},
  number = {6},
  pages = {1--14},
  year = {2018},
  publisher = {ACM New York, NY, USA}
}

// review paper on IFO
@article{torabi2019recent,
  title = {Recent advances in imitation learning from observation},
  author = {Torabi, Faraz and Warnell, Garrett and Stone, Peter},
  journal = {arXiv preprint arXiv:1905.13566},
  year = {2019}
}

@article{torabi2018behavioral,
  title = {Behavioral cloning from observation},
  author = {Torabi, Faraz and Warnell, Garrett and Stone, Peter},
  journal = {arXiv preprint arXiv:1805.01954},
  year = {2018}
}

@article{aytar2018playing,
  title = {Playing hard exploration games by watching {Y}ou{T}ube},
  author = {Aytar, Yusuf and Pfaff, Tobias and Budden, David and Paine, Thomas and Wang, Ziyu and De Freitas, Nando},
  journal = {Advances in Neural Information Processing Systems},
  volume = {31},
  year = {2018}
}

@inproceedings{badia2020agent57,
  title = {Agent57: Outperforming the atari human benchmark},
  author = {Badia, Adri{\`a} Puigdom{\`e}nech and Piot, Bilal and Kapturowski, Steven and Sprechmann, Pablo and Vitvitskyi, Alex and Guo, Zhaohan Daniel and Blundell, Charles},
  booktitle = {International Conference on Machine Learning},
  pages = {507--517},
  year = {2020},
  organization = {PMLR}
}

@article{awadalla2023openflamingo,
  title = {OpenFlamingo: An Open-Source Framework for Training Large Autoregressive Vision-Language Models},
  author = {Awadalla, Anas and Gao, Irena and Gardner, Josh and Hessel, Jack and Hanafy, Yusuf and Zhu, Wanrong and Marathe, Kalyani and Bitton, Yonatan and Gadre, Samir and Sagawa, Shiori and others},
  journal = {arXiv preprint arXiv:2308.01390},
  year = {2023}
}

@article{neelakantan2022text,
  title = {Text and Code Embeddings by Contrastive Pre-Training},
  author = {Neelakantan, Arvind and Xu, Tao and Puri, Raul and Radford, Alec and Han, Jesse Michael and Tworek, Jerry and Yuan, Qiming and Tezak, Nikolas and Kim, Jong Wook and Hallacy, Chris and others},
  journal = {arXiv preprint arXiv:2201.10005},
  year = {2022}
}

@book{yu2016automatic,
  title = {Automatic speech recognition},
  author = {Yu, Dong and Deng, Li},
  volume = {1},
  year = {2016},
  publisher = {Springer}
}

@article{andrychowicz2017hindsight,
  title = {Hindsight experience replay},
  author = {Andrychowicz, Marcin and Wolski, Filip and Ray, Alex and Schneider, Jonas and Fong, Rachel and Welinder, Peter and McGrew, Bob and Tobin, Josh and Pieter Abbeel, OpenAI and Zaremba, Wojciech},
  journal = {Advances in Neural Information Processing Systems},
  volume = {30},
  year = {2017}
}

@inproceedings{schaul2015universal,
  title = {Universal value function approximators},
  author = {Schaul, Tom and Horgan, Daniel and Gregor, Karol and Silver, David},
  booktitle = {International conference on machine learning},
  pages = {1312--1320},
  year = {2015},
  organization = {PMLR}
}

@misc{Daulet2021rpunct,
  note = {accessed 2022-04-22},
  author = {{Daulet Nurmanbetov}},
  year = {2021},
  month = {May 25},
  publisher = {GitHub},
  title = {rpunct},
  url = {https://github.com/Felflare/rpunct}
}

@inproceedings{luketina2019survey,
  title = {A Survey of Reinforcement Learning Informed by Natural Language},
  author = {Luketina, Jelena and Nardelli, Nantas and Farquhar, Gregory and Foerster, Jakob and Andreas, Jacob and Grefenstette, Edward and Whiteson, Shimon and Rocktäschel, Tim},
  booktitle = {Proceedings of the Twenty-Eighth International Joint Conference on
               Artificial Intelligence, {IJCAI-19}},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  pages = {6309--6317},
  year = {2019},
  month = {7},
  doi = {10.24963/ijcai.2019/880},
  url = {https://doi.org/10.24963/ijcai.2019/880}
}

@article{team2021creating,
  title = {Creating Multimodal Interactive Agents with Imitation and Self-Supervised Learning},
  author = {Team, DeepMind Interactive Agents and Abramson, Josh and Ahuja, Arun and Brussee, Arthur and Carnevale, Federico and Cassin, Mary and Fischer, Felix and Georgiev, Petko and Goldin, Alex and Harley, Tim and others},
  journal = {arXiv preprint arXiv:2112.03763},
  year = {2021}
}

@article{pedregosa2011scikit,
  title = {Scikit-learn: Machine learning in Python},
  author = {Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and others},
  journal = {the Journal of machine Learning research},
  volume = {12},
  pages = {2825--2830},
  year = {2011},
  publisher = {JMLR. org}
}

@inproceedings{pmlr-v139-cobbe21a,
  title = {Phasic Policy Gradient},
  author = {Cobbe, Karl W and Hilton, Jacob and Klimov, Oleg and Schulman, John},
  booktitle = {Proceedings of the 38th International Conference on Machine Learning},
  pages = {2020--2027},
  year = {2021},
  editor = {Meila, Marina and Zhang, Tong},
  volume = {139},
  series = {Proceedings of Machine Learning Research},
  month = {18--24 Jul},
  publisher = {PMLR},
  pdf = {http://proceedings.mlr.press/v139/cobbe21a/cobbe21a.pdf},
  url = {https://proceedings.mlr.press/v139/cobbe21a.html}
}

@article{nieto2021unsupervised,
  title = {Unsupervised Skill-Discovery and Skill-Learning in {M}inecraft},
  author = {Nieto, Juan Jos{\'e} and Creus, Roger and Giro-i-Nieto, Xavier},
  journal = {arXiv preprint arXiv:2107.08398},
  year = {2021}
}

@inproceedings{oh2017zero,
  title = {Zero-shot task generalization with multi-task deep reinforcement learning},
  author = {Oh, Junhyuk and Singh, Satinder and Lee, Honglak and Kohli, Pushmeet},
  booktitle = {International Conference on Machine Learning},
  pages = {2661--2670},
  year = {2017},
  organization = {PMLR}
}

@article{matiisen2019teacher,
  title = {Teacher--student curriculum learning},
  author = {Matiisen, Tambet and Oliver, Avital and Cohen, Taco and Schulman, John},
  journal = {IEEE transactions on neural networks and learning systems},
  volume = {31},
  number = {9},
  pages = {3732--3740},
  year = {2019},
  publisher = {IEEE}
}

% Minerl competition participants

@article{amiranashvili2020scaling,
  title = {Scaling imitation learning in {M}inecraft},
  author = {Amiranashvili, Artemij and Dorka, Nicolai and Burgard, Wolfram and Koltun, Vladlen and Brox, Thomas},
  journal = {arXiv preprint arXiv:2007.02701},
  year = {2020}
}

@inproceedings{kanervisto2020playing,
  title = {Playing {M}inecraft with behavioural cloning},
  author = {Kanervisto, Anssi and Karttunen, Janne and Hautam{\"a}ki, Ville},
  booktitle = {NeurIPS 2019 Competition and Demonstration Track},
  pages = {56--66},
  year = {2020},
  organization = {PMLR}
}

@article{lin2021juewu,
  title = {Jue{W}u-{MC}: Playing {M}inecraft with Sample-efficient Hierarchical Reinforcement Learning},
  author = {Lin, Zichuan and Li, Junyou and Shi, Jianing and Ye, Deheng and Fu, Qiang and Yang, Wei},
  journal = {arXiv preprint arXiv:2112.04907},
  year = {2021}
}

@article{skrynnik2021hierarchical,
  title = {Hierarchical deep q-network from imperfect demonstrations in {M}inecraft},
  author = {Skrynnik, Alexey and Staroverov, Aleksey and Aitygulov, Ermek and Aksenov, Kirill and Davydov, Vasilii and Panov, Aleksandr I},
  journal = {Cognitive Systems Research},
  volume = {65},
  pages = {74--78},
  year = {2021},
  publisher = {Elsevier}
}

@article{skrynnik2020forgetful,
  title = {Forgetful experience replay in hierarchical reinforcement learning from demonstrations},
  author = {Skrynnik, Alexey and Staroverov, Aleksey and Aitygulov, Ermek and Aksenov, Kirill and Davydov, Vasilii and Panov, Aleksandr I},
  journal = {arXiv preprint arXiv:2006.09939},
  year = {2020}
}

@article{patil2020align,
  title = {Align-rudder: Learning from few demonstrations by reward redistribution},
  author = {Patil, Vihang P and Hofmarcher, Markus and Dinu, Marius-Constantin and Dorfer, Matthias and Blies, Patrick M and Brandstetter, Johannes and Arjona-Medina, Jose A and Hochreiter, Sepp},
  journal = {arXiv preprint arXiv:2009.14108},
  year = {2020}
}

% Other Minecraft work
@inproceedings{aluru2015minecraft,
  title = {Minecraft as an experimental world for AI in robotics},
  author = {Aluru, Krishna Chaitanya and Tellex, Stefanie and Oberlin, John and MacGlashan, James},
  booktitle = {2015 aaai fall symposium series},
  year = {2015}
}

@article{shi2022learning,
  title = {Learning to execute or ask clarification questions},
  author = {Shi, Zhengxiang and Feng, Yue and Lipani, Aldo},
  journal = {arXiv preprint arXiv:2204.08373},
  year = {2022}
}

@article{trott2019keeping,
  title = {Keeping your distance: Solving sparse reward tasks using self-balancing shaped rewards},
  author = {Trott, Alexander and Zheng, Stephan and Xiong, Caiming and Socher, Richard},
  journal = {Advances in Neural Information Processing Systems},
  volume = {32},
  year = {2019}
}

@inproceedings{frazier2019improving,
  title = {Improving deep reinforcement learning in {M}inecraft with action advice},
  author = {Frazier, Spencer and Riedl, Mark},
  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment},
  volume = {15},
  number = {1},
  pages = {146--152},
  year = {2019}
}

@article{brittain2018hierarchical,
  title = {Hierarchical Reinforcement Learning with Deep Nested Agents},
  author = {Brittain, Marc and Wei, Peng},
  journal = {arXiv preprint arXiv:1805.07008},
  year = {2018}
}

@article{shu2017hierarchical,
  title = {Hierarchical and interpretable skill acquisition in multi-task reinforcement learning},
  author = {Shu, Tianmin and Xiong, Caiming and Socher, Richard},
  journal = {arXiv preprint arXiv:1712.07294},
  year = {2017}
}

@article{young2014image,
  title = {From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions},
  author = {Young, Peter and Lai, Alice and Hodosh, Micah and Hockenmaier, Julia},
  journal = {Transactions of the Association for Computational Linguistics},
  volume = {2},
  pages = {67--78},
  year = {2014},
  publisher = {MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…}
}

@inproceedings{schwenk2022okvqa,
  title = {A-okvqa: A benchmark for visual question answering using world knowledge},
  author = {Schwenk, Dustin and Khandelwal, Apoorv and Clark, Christopher and Marino, Kenneth and Mottaghi, Roozbeh},
  booktitle = {European Conference on Computer Vision},
  pages = {146--162},
  year = {2022},
  organization = {Springer}
}

@techreport{udagawa2016fighting,
  title = {Fighting zombies in {M}inecraft with deep reinforcement learning},
  author = {Udagawa, Hiroto and Narasimhan, Tarun and Lee, Shim-Young},
  year = {2016},
  institution = {Technical report, Technical report, Stanford University}
}

@article{lin2017explore,
  title = {Explore, exploit or listen: Combining human feedback and policy model to speed up deep reinforcement learning in 3{D} worlds},
  author = {Lin, Zhiyu and Harrison, Brent and Keech, Aaron and Riedl, Mark O},
  journal = {arXiv preprint arXiv:1709.03969},
  year = {2017}
}

@article{abel2016exploratory,
  title = {Exploratory gradient boosting for reinforcement learning in complex domains},
  author = {Abel, David and Agarwal, Alekh and Diaz, Fernando and Krishnamurthy, Akshay and Schapire, Robert E},
  journal = {arXiv preprint arXiv:1603.04119},
  year = {2016}
}

@inproceedings{ben2020evolving,
  title = {Evolving inborn knowledge for fast adaptation in dynamic {POMDP} problems},
  author = {Ben-Iwhiwhu, Eseoghene and Ladosz, Pawel and Dick, Jeffery and Chen, Wen-Hua and Pilly, Praveen and Soltoggio, Andrea},
  booktitle = {Proceedings of the 2020 Genetic and Evolutionary Computation Conference},
  pages = {280--288},
  year = {2020}
}

@inproceedings{song2019diversity,
  title = {Diversity-driven extensible hierarchical reinforcement learning},
  author = {Song, Yuhang and Wang, Jianyi and Lukasiewicz, Thomas and Xu, Zhenghua and Xu, Mai},
  booktitle = {Proceedings of the AAAI conference on artificial intelligence},
  volume = {33},
  number = {01},
  pages = {4992--4999},
  year = {2019}
}

@article{alaniz2018deep,
  title = {Deep reinforcement learning with model learning and {M}onte {C}arlo tree search in {M}inecraft},
  author = {Alaniz, Stephan},
  journal = {arXiv preprint arXiv:1803.08456},
  year = {2018}
}

@article{arumugam2019deep,
  title = {Deep reinforcement learning from policy-dependent human feedback},
  author = {Arumugam, Dilip and Lee, Jun Ki and Saskin, Sophie and Littman, Michael L},
  journal = {arXiv preprint arXiv:1902.04257},
  year = {2019}
}

@article{romac2019deep,
  title = {Deep Recurrent Q-Learning vs Deep Q-Learning on a simple partially observable Markov decision process with {M}inecraft},
  author = {Romac, Cl{\'e}ment and B{\'e}raud, Vincent},
  journal = {arXiv preprint arXiv:1903.04311},
  year = {2019}
}

@inproceedings{oguntola2021deep,
  title = {Deep Interpretable Models of Theory of Mind},
  author = {Oguntola, Ini and Hughes, Dana and Sycara, Katia},
  booktitle = {2021 30th IEEE International Conference on Robot \& Human Interactive Communication (RO-MAN)},
  pages = {657--664},
  year = {2021},
  organization = {IEEE}
}

@inproceedings{srinet2020craftassist,
  title = {Craft{A}ssist instruction parsing: Semantic parsing for a voxel-world assistant},
  author = {Srinet, Kavya and Jernite, Yacine and Gray, Jonathan and Szlam, Arthur},
  booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
  pages = {4693--4714},
  year = {2020}
}

@article{gray2019craftassist,
  title = {Craft{A}ssist: A framework for dialogue-enabled interactive agents},
  author = {Gray, Jonathan and Srinet, Kavya and Jernite, Yacine and Yu, Haonan and Chen, Zhuoyuan and Guo, Demi and Goyal, Siddharth and Zitnick, C Lawrence and Szlam, Arthur},
  journal = {arXiv preprint arXiv:1907.08584},
  year = {2019}
}

@inproceedings{reynard2020combining,
  title = {Combining primitive {DQN}s for improved reinforcement learning in {M}inecraft},
  author = {Reynard, Matthew and Kamper, Herman and Engelbrecht, Herman A and Rosman, Benjamin},
  booktitle = {2020 International SAUPEC/RobMech/PRASA Conference},
  pages = {1--6},
  year = {2020},
  organization = {IEEE}
}

@inproceedings{parashar2017adaptive,
  title = {Adaptive agents in {M}inecraft: A hybrid paradigm for combining domain knowledge with reinforcement learning},
  author = {Parashar, Priyam and Sheneman, Bradley and Goel, Ashok K},
  booktitle = {International Conference on Autonomous Agents and Multiagent Systems},
  pages = {86--100},
  year = {2017},
  organization = {Springer}
}

@article{goecks2021combining,
  title = {Combining Learning from Human Feedback and Knowledge Engineering to Solve Hierarchical Tasks in {M}inecraft},
  author = {Goecks, Vinicius G and Waytowich, Nicholas and Watkins, David and Prakash, Bharat},
  journal = {arXiv preprint arXiv:2112.03482},
  year = {2021}
}

% BASALT paper

@article{shah2021minerl,
  title = {The {M}ine{RL} {BASALT} competition on learning from human feedback},
  author = {Shah, Rohin and Wild, Cody and Wang, Steven H and Alex, Neel and Houghton, Brandon and Guss, William and Mohanty, Sharada and Kanervisto, Anssi and Milani, Stephanie and Topin, Nicholay and others},
  journal = {arXiv preprint arXiv:2107.01969},
  year = {2021}
}

@article{williams1992simple,
  title = {Simple statistical gradient-following algorithms for connectionist reinforcement learning},
  author = {Williams, Ronald J},
  journal = {Machine learning},
  volume = {8},
  number = {3},
  pages = {229--256},
  year = {1992},
  publisher = {Springer}
}

@inproceedings{mnih2016asynchronous,
  title = {Asynchronous methods for deep reinforcement learning},
  author = {Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
  booktitle = {International conference on machine learning},
  pages = {1928--1937},
  year = {2016},
  organization = {PMLR}
}

@article{schulman2017proximal,
  title = {Proximal policy optimization algorithms},
  author = {Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
  journal = {arXiv preprint arXiv:1707.06347},
  year = {2017}
}

@inproceedings{pmlrv70shi17a,
  title = {World of {B}its: An Open-Domain Platform for Web-Based Agents},
  author = {Tianlin Shi and Andrej Karpathy and Linxi Fan and Jonathan Hernandez and Percy Liang},
  booktitle = {Proceedings of the 34th International Conference on Machine Learning},
  pages = {3135--3144},
  year = {2017},
  editor = {Precup, Doina and Teh, Yee Whye},
  volume = {70},
  series = {Proceedings of Machine Learning Research},
  month = {06--11 Aug},
  publisher = {PMLR},
  pdf = {http://proceedings.mlr.press/v70/shi17a/shi17a.pdf},
  url = {https://proceedings.mlr.press/v70/shi17a.html},
  abstract = {While simulated game environments have greatly accelerated research in reinforcement learning, existing environments lack the open-domain realism of tasks in computer vision or natural language processing, which operate on artifacts created by humans in natural, organic settings. To foster reinforcement learning research in such settings, we introduce the World of Bits (WoB), a platform in which agents complete tasks on the Internet by performing low-level keyboard and mouse actions. The two main challenges are: (i) to curate a large, diverse set of interesting web-based tasks, and (ii) to ensure that these tasks have a well-defined reward structure and are reproducible despite the transience of the web. To do this, we develop a methodology in which crowdworkers create tasks defined by natural language questions and provide demonstrations of how to answer the question on real websites using keyboard and mouse; HTTP traffic is cached to create a reproducible offline approximation of the web site. Finally, we show that agents trained via behavioral cloning and reinforcement learning can successfully complete a range of our web-based tasks.}
}

@inproceedings{pathakICLR18zeroshot,
  author = {Pathak, Deepak and
            Mahmoudieh, Parsa and Luo, Guanghao and
            Agrawal, Pulkit and Chen, Dian and
            Shentu, Yide and Shelhamer, Evan and
            Malik, Jitendra and Efros, Alexei A. and
            Darrell, Trevor},
  title = {Zero-Shot Visual Imitation},
  booktitle = {ICLR},
  year = {2018}
}

@inproceedings{nairICRA2017combine,
  author = {Nair, Ashvin and Chen, Dian and Agrawal, Pulkit and Isola, Phillip and Abbeel, Pieter and Malik, Jitendra and Levine, Sergey},
  year = {2017},
  month = {05},
  pages = {2146-2153},
  title = {Combining self-supervised learning and imitation for vision-based rope manipulation},
  doi = {10.1109/ICRA.2017.7989247}
}

@article{schulman2015high,
  title = {High-dimensional continuous control using generalized advantage estimation},
  author = {Schulman, John and Moritz, Philipp and Levine, Sergey and Jordan, Michael and Abbeel, Pieter},
  journal = {arXiv preprint arXiv:1506.02438},
  year = {2015}
}

@article{fan2022minedojo,
  title = {Mine{D}ojo: Building Open-Ended Embodied Agents with Internet-Scale Knowledge},
  author = {Fan, Linxi and Wang, Guanzhi and Jiang, Yunfan and Mandlekar, Ajay and Yang, Yuncong and Zhu, Haoyi and Tang, Andrew and Huang, De-An and Zhu, Yuke and Anandkumar, Anima},
  journal = {arXiv preprint arXiv:2206.08853},
  year = {2022}
}

@article{li2022pre,
  title = {Pre-trained language models for interactive decision-making},
  author = {Li, Shuang and Puig, Xavier and Du, Yilun and Wang, Clinton and Akyurek, Ekin and Torralba, Antonio and Andreas, Jacob and Mordatch, Igor},
  journal = {arXiv preprint arXiv:2202.01771},
  year = {2022}
}

@article{reid2022can,
  title = {Can {W}ikipedia Help Offline Reinforcement Learning?},
  author = {Reid, Machel and Yamada, Yutaro and Gu, Shixiang Shane},
  journal = {arXiv preprint arXiv:2201.12122},
  year = {2022}
}

@article{reed2022generalist,
  title = {A generalist agent},
  author = {Reed, Scott and Zolna, Konrad and Parisotto, Emilio and Colmenarejo, Sergio Gomez and Novikov, Alexander and Barth-Maron, Gabriel and Gimenez, Mai and Sulsky, Yury and Kay, Jackie and Springenberg, Jost Tobias and others},
  journal = {arXiv preprint arXiv:2205.06175},
  year = {2022}
}

@article{Carlsmith2022IsPA,
  title = {Is Power-Seeking {AI} an Existential Risk?},
  author = {Joseph Carlsmith},
  journal = {ArXiv},
  year = {2022},
  volume = {abs/2206.13353}
}

@article{lee2018hallucinations,
  title = {Hallucinations in neural machine translation},
  author = {Lee, Katherine and Firat, Orhan and Agarwal, Ashish and Fannjiang, Clara and Sussillo, David},
  year = {2018}
}

@article{rohrbach2018object,
  title = {Object hallucination in image captioning},
  author = {Rohrbach, Anna and Hendricks, Lisa Anne and Burns, Kaylee and Darrell, Trevor and Saenko, Kate},
  journal = {arXiv preprint arXiv:1809.02156},
  year = {2018}
}

@article{zhou2020detecting,
  title = {Detecting hallucinated content in conditional neural sequence generation},
  author = {Zhou, Chunting and Neubig, Graham and Gu, Jiatao and Diab, Mona and Guzman, Paco and Zettlemoyer, Luke and Ghazvininejad, Marjan},
  journal = {arXiv preprint arXiv:2011.02593},
  year = {2020}
}

@article{ji2023survey,
  title = {Survey of hallucination in natural language generation},
  author = {Ji, Ziwei and Lee, Nayeon and Frieske, Rita and Yu, Tiezheng and Su, Dan and Xu, Yan and Ishii, Etsuko and Bang, Ye Jin and Madotto, Andrea and Fung, Pascale},
  journal = {ACM Computing Surveys},
  volume = {55},
  number = {12},
  pages = {1--38},
  year = {2023},
  publisher = {ACM New York, NY}
}

@article{zhang2023siren,
  title = {Siren's Song in the AI Ocean: A Survey on Hallucination in Large Language Models},
  author = {Zhang, Yue and Li, Yafu and Cui, Leyang and Cai, Deng and Liu, Lemao and Fu, Tingchen and Huang, Xinting and Zhao, Enbo and Zhang, Yu and Chen, Yulong and others},
  journal = {arXiv preprint arXiv:2309.01219},
  year = {2023}
}

@article{markov2022holistic,
  title = {A holistic approach to undesired content detection in the real world},
  author = {Markov, Todor and Zhang, Chong and Agarwal, Sandhini and Eloundou, Tyna and Lee, Teddy and Adler, Steven and Jiang, Angela and Weng, Lilian},
  journal = {arXiv preprint arXiv:2208.03274},
  year = {2022}
}

@article{perez2022redteaming,
  title = {Red Teaming Language Models with Language Models},
  author = {Perez, Ethan  and
            Huang, Saffron  and
            Song, H. Francis  and
            Cai, Trevor  and
            Ring, Roman  and
            Aslanides, John  and
            Glaese, Amelia  and
            McAleese, Nat  and
            Irving, Geoffrey},
  year = {2022},
  journal = {arXiv preprint arXiv:2202.03286}
}

@inproceedings{goyal2017making,
  title = {Making the {V} in {VQA} matter: Elevating the role of image understanding in visual question answering},
  author = {Goyal, Yash and Khot, Tejas and Summers-Stay, Douglas and Batra, Dhruv and Parikh, Devi},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages = {6904--6913},
  year = {2017}
}

@article{chen2015microsoft,
  title = {Microsoft {COCO} captions: Data collection and evaluation server},
  author = {Chen, Xinlei and Fang, Hao and Lin, Tsung-Yi and Vedantam, Ramakrishna and Gupta, Saurabh and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  journal = {arXiv preprint arXiv:1504.00325},
  year = {2015}
}

@article{masry2022chartqa,
  title = {Chart{QA}: A benchmark for question answering about charts with visual and logical reasoning},
  author = {Masry, Ahmed and Long, Do Xuan and Tan, Jia Qing and Joty, Shafiq and Hoque, Enamul},
  journal = {arXiv preprint arXiv:2203.10244},
  year = {2022}
}

@inproceedings{kembhavi2016diagram,
  title = {A diagram is worth a dozen images},
  author = {Kembhavi, Aniruddha and Salvato, Mike and Kolve, Eric and Seo, Minjoon and Hajishirzi, Hannaneh and Farhadi, Ali},
  booktitle = {Computer Vision--ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part IV 14},
  pages = {235--251},
  year = {2016},
  organization = {Springer}
}

@inproceedings{mathew2021docvqa,
  title = {Doc{VQA}: A dataset for vqa on document images},
  author = {Mathew, Minesh and Karatzas, Dimosthenis and Jawahar, CV},
  booktitle = {Proceedings of the IEEE/CVF winter conference on applications of computer vision},
  pages = {2200--2209},
  year = {2021}
}

@inproceedings{mathew2022infographicvqa,
  title = {Infographic{VQA}},
  author = {Mathew, Minesh and Bagal, Viraj and Tito, Rub{\`e}n and Karatzas, Dimosthenis and Valveny, Ernest and Jawahar, CV},
  booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
  pages = {1697--1706},
  year = {2022}
}

@inproceedings{lei2018tvqa,
  title = {{TVQA}: Localized, Compositional Video Question Answering},
  author = {Lei, Jie and Yu, Licheng and Bansal, Mohit and Berg, Tamara},
  booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
  pages = {1369--1379},
  year = {2018}
}

@inproceedings{alayracflamingo,
  title = {Flamingo: a Visual Language Model for Few-Shot Learning},
  author = {Alayrac, Jean-Baptiste and Donahue, Jeff and Luc, Pauline and Miech, Antoine and Barr, Iain and Hasson, Yana and Lenc, Karel and Mensch, Arthur and Millican, Katherine and Reynolds, Malcolm and others},
  booktitle = {Advances in Neural Information Processing Systems}
}

@article{chen2022pali,
  title = {{PaLI}: A jointly-scaled multilingual language-image model},
  author = {Chen, Xi and Wang, Xiao and Changpinyo, Soravit and Piergiovanni, AJ and Padlewski, Piotr and Salz, Daniel and Goodman, Sebastian and Grycner, Adam and Mustafa, Basil and Beyer, Lucas and others},
  journal = {arXiv preprint arXiv:2209.06794},
  year = {2022}
}

@article{lee2022pix2struct,
  title = {Pix2{S}truct: Screenshot parsing as pretraining for visual language understanding},
  author = {Lee, Kenton and Joshi, Mandar and Turc, Iulia and Hu, Hexiang and Liu, Fangyu and Eisenschlos, Julian and Khandelwal, Urvashi and Shaw, Peter and Chang, Ming-Wei and Toutanova, Kristina},
  journal = {arXiv preprint arXiv:2210.03347},
  year = {2022}
}

@article{peng2022ernie,
  title = {{ERNIE}-Layout: Layout Knowledge Enhanced Pre-training for Visually-rich Document Understanding},
  author = {Peng, Qiming and Pan, Yinxu and Wang, Wenjin and Luo, Bin and Zhang, Zhenyu and Huang, Zhengjie and Hu, Teng and Yin, Weichong and Chen, Yongfeng and Zhang, Yin and others},
  journal = {arXiv preprint arXiv:2210.06155},
  year = {2022}
}

@inproceedings{powalski2021going,
  title = {Going Full-{TILT} Boogie on Document Understanding with Text-Image-Layout Transformer},
  author = {Powalski, Rafa{\l} and Borchmann, {\L}ukasz and Jurkiewicz, Dawid and Dwojak, Tomasz and Pietruszka, Micha{\l} and Pa{\l}ka, Gabriela},
  booktitle = {Document Analysis and Recognition--ICDAR 2021: 16th International Conference, Lausanne, Switzerland, September 5--10, 2021, Proceedings, Part II},
  pages = {732--747},
  year = {2021}
}

@inproceedings{zellers2022merlot,
  title = {Merlot reserve: Neural script knowledge through vision and language and sound},
  author = {Zellers, Rowan and Lu, Jiasen and Lu, Ximing and Yu, Youngjae and Zhao, Yanpeng and Salehi, Mohammadreza and Kusupati, Aditya and Hessel, Jack and Farhadi, Ali and Choi, Yejin},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages = {16375--16387},
  year = {2022}
}

@article{kaplan2020scaling,
  title = {Scaling laws for neural language models},
  author = {Kaplan, Jared and McCandlish, Sam and Henighan, Tom and Brown, Tom B. and Chess, Benjamin and Child, Rewon and Gray, Scott and Radford, Alec and Wu, Jeffrey and Amodei, Dario},
  journal = {arXiv preprint arXiv:2001.08361},
  year = {2020}
}

@article{hoffmann2022training,
  title = {Training compute-optimal large language models},
  author = {Hoffmann, Jordan and Borgeaud, Sebastian and Mensch, Arthur and Buchatskaya, Elena and Cai, Trevor and Rutherford, Eliza and Casas, Diego de Las and Hendricks, Lisa Anne and Welbl, Johannes and Clark, Aidan and others},
  journal = {arXiv preprint arXiv:2203.15556},
  year = {2022}
}

@article{henighan2020scaling,
  title = {Scaling laws for autoregressive generative modeling},
  author = {Henighan, Tom and Kaplan, Jared and Katz, Mor and Chen, Mark and Hesse, Christopher and Jackson, Jacob and Jun, Heewoo and Brown, Tom B. and Dhariwal, Prafulla and Gray, Scott and others},
  journal = {arXiv preprint arXiv:2010.14701},
  year = {2020}
}

@article{chen2021codex,
  title = {Evaluating Large Language Models Trained on Code},
  author = {Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de Oliveira Pinto and Jared Kaplan and Harri Edwards and Yuri Burda and Nicholas Joseph and Greg Brockman and Alex Ray and Raul Puri and Gretchen Krueger and Michael Petrov and Heidy Khlaaf and Girish Sastry and Pamela Mishkin and Brooke Chan and Scott Gray and Nick Ryder and Mikhail Pavlov and Alethea Power and Lukasz Kaiser and Mohammad Bavarian and Clemens Winter and Philippe Tillet and Felipe Petroski Such and Dave Cummings and Matthias Plappert and Fotios Chantzis and Elizabeth Barnes and Ariel Herbert-Voss and William Hebgen Guss and Alex Nichol and Alex Paino and Nikolas Tezak and Jie Tang and Igor Babuschkin and Suchir Balaji and Shantanu Jain and William Saunders and Christopher Hesse and Andrew N. Carr and Jan Leike and Josh Achiam and Vedant Misra and Evan Morikawa and Alec Radford and Matthew Knight and Miles Brundage and Mira Murati and Katie Mayer and Peter Welinder and Bob McGrew and Dario Amodei and Sam McCandlish and Ilya Sutskever and Wojciech Zaremba},
  year = {2021},
  eprint = {2107.03374},
  archiveprefix = {arXiv},
  primaryclass = {cs.LG}
}

@article{wei2022inverse,
  title = {Inverse scaling can become {U}-shaped},
  author = {Wei, Jason and Kim, Najoung and Tay, Yi and Le, Quoc V.},
  journal = {arXiv preprint arXiv:2211.02011},
  year = {2022}
}

@misc{mckenzie2022inverse,
  title = {The {I}nverse {S}caling {P}rize},
  url = {https://github.com/inverse-scaling/prize},
  author = {McKenzie, Ian and Lyzhov, Alexander and Parrish, Alicia and Prabhu, Ameya and Mueller, Aaron and Kim, Najoung and Bowman, Sam and Perez, Ethan},
  year = {2022}
}

@misc{mckenzie2022round1,
  title = {Inverse {S}caling {P}rize: First Round Winners},
  url = {https://irmckenzie.co.uk/round1},
  author = {McKenzie, Ian and Lyzhov, Alexander and Parrish, Alicia and Prabhu, Ameya and Mueller, Aaron and Kim, Najoung and Bowman, Sam and Perez, Ethan},
  year = {2022}
}

@article{chowdhery2022palm,
  title = {{PaLM}: Scaling language modeling with pathways},
  author = {Chowdhery, Aakanksha and Narang, Sharan and Devlin, Jacob and Bosma, Maarten and Mishra, Gaurav and Roberts, Adam and Barham, Paul and Chung, Hyung Won and Sutton, Charles and Gehrmann, Sebastian and others},
  journal = {arXiv preprint arXiv:2204.02311},
  year = {2022}
}

@article{hendryckstest2021,
  title = {Measuring Massive Multitask Language Understanding},
  author = {Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
  journal = {Proceedings of the International Conference on Learning Representations (ICLR)},
  year = {2021}
}

@article{hendrycks2021ethics,
  title = {Aligning {AI} With Shared Human Values},
  author = {Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt},
  journal = {Proceedings of the International Conference on Learning Representations (ICLR)},
  year = {2021}
}

@misc{openaialignmentblog,
  title = {{OpenAI: Our approach to alignment research}},
  url = {https://openai.com/blog/our-approach-to-alignment-research},
  author = {Jan Leike and John Schulman and Jeffrey Wu},
  year = {2022}
}

@article{christiano2017deep,
  title = {Deep reinforcement learning from human preferences},
  author = {Christiano, Paul F and Leike, Jan and Brown, Tom and Martic, Miljan and Legg, Shane and Amodei, Dario},
  journal = {Advances in Neural Information Processing Systems},
  volume = {30},
  year = {2017}
}

@article{huang2022selfimprovement,
  title = {Large Language Models Can Self-Improve},
  author = {Huang, Jiaxin and Gu, Shixiang Shane and Hou, Le and Wu, Yuexin and Wang, Xuezhi and Yu, Hongkun and Han, Jiawei},
  journal = {arXiv preprint arXiv:2210.11610},
  year = {2022}
}

@article{chung2022flan,
  title = {Scaling Instruction-Finetuned Language Models},
  author = {Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Yunxuan and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and Webson, Albert and Gu, Shixiang Shane and Dai, Zhuyun and Suzgun, Mirac and Chen, Xinyun and Chowdhery, Aakanksha and Castro-Ros, Alex and Pellat, Marie and Robinson, Kevin and Valter, Dasha and Narang, Sharan and Mishra, Gaurav and Yu, Adams and Zhao, Vincent and Huang, Yanping and Dai, Andrew and Yu, Hongkun and Petrov, Slav and Chi, Ed H. and Dean, Jeff and Devlin, Jacob and Roberts, Adam and Zhou, Denny and Le, Quoc V. and Wei, Jason},
  journal = {arXiv preprint arXiv:2210.11416},
  year = {2022}
}

@article{uesato2022solvingmath,
  title = {Solving math word problems with process- and outcome-based feedback},
  author = {Uesato, Jonathan and Kushman, Nate and Kumar, Ramana and Song, Francis and Siegel, Noah and Wang, Lisa and Creswell, Antonia and Irving, Geoffrey and Higgins, Irina},
  journal = {arXiv preprint arXiv:2211.14275},
  year = {2022}
}

@article{cobbe2021gsm8k,
  title = {Training Verifiers to Solve Math Word Problems},
  author = {Cobbe, Karl and Kosaraju, Vineet and Bavarian, Mohammad and Chen, Mark and Jun, Heewoo and Kaiser, Lukasz and Plappert, Matthias and Tworek, Jerry and Hilton, Jacob and Nakano, Reiichiro and Hesse, Christopher and Schulman, John},
  journal = {arXiv preprint arXiv:2110.14168},
  year = {2021}
}

@inproceedings{zellers2019hellaswag,
  title = {{H}ella{S}wag: Can a Machine Really Finish Your Sentence?},
  author = {Zellers, Rowan  and
            Holtzman, Ari  and
            Bisk, Yonatan  and
            Farhadi, Ali  and
            Choi, Yejin},
  booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
  month = jul,
  year = {2019},
  address = {Florence, Italy},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/P19-1472},
  doi = {10.18653/v1/P19-1472},
  pages = {4791--4800},
  abstract = {Recent work by Zellers et al. (2018) introduced a new task of commonsense natural language inference: given an event description such as {``}A woman sits at a piano,{''} a machine must select the most likely followup: {``}She sets her fingers on the keys.{''} With the introduction of BERT, near human-level performance was reached. Does this mean that machines can perform human level commonsense inference? In this paper, we show that commonsense inference still proves difficult for even state-of-the-art models, by presenting HellaSwag, a new challenge dataset. Though its questions are trivial for humans ({\textgreater}95{\%} accuracy), state-of-the-art models struggle ({\textless}48{\%}). We achieve this via Adversarial Filtering (AF), a data collection paradigm wherein a series of discriminators iteratively select an adversarial set of machine-generated wrong answers. AF proves to be surprisingly robust. The key insight is to scale up the length and complexity of the dataset examples towards a critical {`}Goldilocks{'} zone wherein generated text is ridiculous to humans, yet often misclassified by state-of-the-art models. Our construction of HellaSwag, and its resulting difficulty, sheds light on the inner workings of deep pretrained models. More broadly, it suggests a new path forward for NLP research, in which benchmarks co-evolve with the evolving state-of-the-art in an adversarial way, so as to present ever-harder challenges.}
}

@article{lewis2019bart,
  title = {Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension},
  author = {Lewis, Mike and Liu, Yinhan and Goyal, Naman and Ghazvininejad, Marjan and Mohamed, Abdelrahman and Levy, Omer and Stoyanov, Ves and Zettlemoyer, Luke},
  journal = {arXiv preprint arXiv:1910.13461},
  year = {2019}
}

@article{askell2021general,
  title = {A general language assistant as a laboratory for alignment},
  author = {Askell, Amanda and Bai, Yuntao and Chen, Anna and Drain, Dawn and Ganguli, Deep and Henighan, Tom and Jones, Andy and Joseph, Nicholas and Mann, Ben and DasSarma, Nova and others},
  journal = {arXiv preprint arXiv:2112.00861},
  year = {2021}
}

@inproceedings{loshchilov2018decoupled,
  title = {Decoupled Weight Decay Regularization},
  author = {Ilya Loshchilov and Frank Hutter},
  booktitle = {International Conference on Learning Representations},
  year = {2019},
  url = {https://openreview.net/forum?id=Bkg6RiCqY7}
}

@article{yao2022react,
  title = {React: Synergizing reasoning and acting in language models},
  author = {Yao, Shunyu and Zhao, Jeffrey and Yu, Dian and Du, Nan and Shafran, Izhak and Narasimhan, Karthik and Cao, Yuan},
  journal = {arXiv preprint arXiv:2210.03629},
  year = {2022}
}

@article{raffel2020exploring,
  title = {Exploring the limits of transfer learning with a unified text-to-text transformer},
  author = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J},
  journal = {The Journal of Machine Learning Research},
  volume = {21},
  number = {1},
  pages = {5485--5551},
  year = {2020},
  publisher = {JMLRORG}
}

@article{shazeer2018adafactor,
  title = {Adafactor: Adaptive Learning Rates with Sublinear Memory Cost},
  author = {Shazeer, Noam and Stern, Mitchell},
  journal = {arXiv preprint arXiv:1804.04235},
  year = {2018}
}

@article{holtzman2019curious,
  title = {The curious case of neural text degeneration},
  author = {Holtzman, Ari and Buys, Jan and Du, Li and Forbes, Maxwell and Choi, Yejin},
  journal = {arXiv preprint arXiv:1904.09751},
  year = {2019}
}

@article{nye2021show,
  title = {Show your work: Scratchpads for intermediate computation with language models},
  author = {Nye, Maxwell and Andreassen, Anders Johan and Gur-Ari, Guy and Michalewski, Henryk and Austin, Jacob and Bieber, David and Dohan, David and Lewkowycz, Aitor and Bosma, Maarten and Luan, David and others},
  journal = {arXiv preprint arXiv:2112.00114},
  year = {2021}
}

@article{kojima2022zeroshotreasoner,
  title = {Large Language Models are Zero-Shot Reasoners},
  author = {Kojima, Takeshi and Gu, Shixiang Shane and Reid, Machel and Matsuo, Yutaka and Iwasawa, Yusuke},
  journal = {arXiv preprint arXiv:2205.11916},
  year = {2022}
}

@article{radford2018improving,
  author = {Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya},
  title = {Improving language understanding by generative pre-training},
  year = 2018
}

@article{yang2022tensor,
  title = {Tensor {P}rograms {V}: Tuning Large Neural Networks via Zero-Shot Hyperparameter Transfer},
  author = {Yang, Greg and Hu, Edward J. and Babuschkin, Igor and Sidor, Szymon and Liu, Xiaodong and Farhi, David and Ryder, Nick and Pachocki, Jakub and Chen, Weizhu and Gao, Jianfeng},
  journal = {arXiv preprint arXiv:2203.03466},
  year = {2022}
}

@article{shazeer2017outrageously,
  title = {Outrageously Large Neural Networks: The Sparsely-Gated {M}ixture-of-{E}xperts Layer},
  author = {Shazeer, Noam and Mirhoseini, Azalia and Maziarz, Krzysztof and Davis, Andy and Le, Quoc and Hinton, Geoffrey and Dean, Jeff},
  journal = {arXiv preprint arXiv:1701.06538},
  year = {2017}
}

@article{zoph2022stmoe,
  title = {{ST-MoE}: Designing Stable and Transferable Sparse Expert Models},
  author = {Zoph, Barret and Bello, Irwan and Kumar, Sameer and Du, Nan and Huang, Yanping and Dean, Jeff and Shazeer, Noam and Fedus, William},
  journal = {arXiv preprint arXiv:2202.08906},
  year = {2022}
}

@inproceedings{dehghani2018universal,
  title = {Universal Transformers},
  author = {Mostafa Dehghani and Stephan Gouws and Oriol Vinyals and Jakob Uszkoreit and Lukasz Kaiser},
  booktitle = {International Conference on Learning Representations},
  year = {2019},
  url = {https://openreview.net/forum?id=HyzdRiR9Y7}
}

@article{wang2022selfconsistency,
  title = {Self-Consistency Improves Chain of Thought Reasoning in Language Models},
  author = {Wang, Xuezhi and Wei, Jason and Schuurmans, Dale and Le, Quoc and Chi, Ed and Narang, Sharan and Chowdhery, Aakanksha and Zhou, Denny},
  journal = {International Conference on Learning Representations (ICLR 2023)},
  year = {2022}
}

@article{wang2022self,
  title = {Self-Instruct: Aligning Language Model with Self Generated Instructions},
  author = {Wang, Yizhong and Kordi, Yeganeh and Mishra, Swaroop and Liu, Alisa and Smith, Noah A and Khashabi, Daniel and Hajishirzi, Hannaneh},
  journal = {arXiv preprint arXiv:2212.10560},
  year = {2022}
}

@article{kadavath2022language,
  title = {Language models (mostly) know what they know},
  author = {Kadavath, Saurav and Conerly, Tom and Askell, Amanda and Henighan, Tom and Drain, Dawn and Perez, Ethan and Schiefer, Nicholas and Hatfield-Dodds, Zac and DasSarma, Nova and Tran-Johnson, Eli and others},
  journal = {arXiv preprint arXiv:2207.05221},
  year = {2022}
}
@article{Clark2018ThinkYH,
  title = {Think you have Solved Question Answering? {T}ry {ARC}, the {AI}2 Reasoning Challenge},
  author = {Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
  journal = {ArXiv},
  year = {2018},
  volume = {abs/1803.05457}
}

@article{sakaguchi2019winogrande,
  title = {Wino{G}rande: An Adversarial {W}inograd Schema Challenge at Scale},
  author = {Sakaguchi, Keisuke and Bras, Ronan Le and Bhagavatula, Chandra and Choi, Yejin},
  journal = {arXiv preprint arXiv:1907.10641},
  year = {2019}
}

@inproceedings{dua2019drop,
  title = {{DROP}: A Reading Comprehension Benchmark Requiring Discrete Reasoning Over Paragraphs},
  author = {Dua, Dheeru  and
            Wang, Yizhong  and
            Dasigi, Pradeep  and
            Stanovsky, Gabriel  and
            Singh, Sameer  and
            Gardner, Matt},
  booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
  month = jun,
  year = {2019},
  address = {Minneapolis, Minnesota},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/N19-1246},
  doi = {10.18653/v1/N19-1246},
  pages = {2368--2378},
  abstract = {Reading comprehension has recently seen rapid progress, with systems matching humans on the most popular datasets for the task. However, a large body of work has highlighted the brittleness of these systems, showing that there is much work left to be done. We introduce a new reading comprehension benchmark, DROP, which requires Discrete Reasoning Over the content of Paragraphs. In this crowdsourced, adversarially-created, 55k-question benchmark, a system must resolve references in a question, perhaps to multiple input positions, and perform discrete operations over them (such as addition, counting, or sorting). These operations require a much more comprehensive understanding of the content of paragraphs, as they remove the paraphrase-and-entity-typing shortcuts available in prior datasets. We apply state-of-the-art methods from both the reading comprehension and semantic parsing literatures on this dataset and show that the best systems only achieve 38.4{\%} F1 on our generalized accuracy metric, while expert human performance is 96{\%}. We additionally present a new model that combines reading comprehension methods with simple numerical reasoning to achieve 51{\%} F1.}
}

@article{hendrycks20mmlu,
  title = {Measuring Massive Multitask Language Understanding},
  author = {Hendrycks, Dan and Burns, Collin and Basart, Steven and Zou, Andy and Mazeika, Mantas and Song, Dawn and Steinhardt, Jacob},
  journal = {arXiv preprint arXiv:2009.03300},
  year = {2020}
}

@misc{bai2022constitutional,
  title = {Constitutional AI: Harmlessness from AI Feedback},
  author = {Yuntao Bai and Saurav Kadavath and Sandipan Kundu and Amanda Askell and Jackson Kernion and Andy Jones and Anna Chen and Anna Goldie and Azalia Mirhoseini and Cameron McKinnon and Carol Chen and Catherine Olsson and Christopher Olah and Danny Hernandez and Dawn Drain and Deep Ganguli and Dustin Li and Eli Tran-Johnson and Ethan Perez and Jamie Kerr and Jared Mueller and Jeffrey Ladish and Joshua Landau and Kamal Ndousse and Kamile Lukosuite and Liane Lovitt and Michael Sellitto and Nelson Elhage and Nicholas Schiefer and Noemi Mercado and Nova DasSarma and Robert Lasenby and Robin Larson and Sam Ringer and Scott Johnston and Shauna Kravec and Sheer El Showk and Stanislav Fort and Tamera Lanham and Timothy Telleen-Lawton and Tom Conerly and Tom Henighan and Tristan Hume and Samuel R. Bowman and Zac Hatfield-Dodds and Ben Mann and Dario Amodei and Nicholas Joseph and Sam McCandlish and Tom Brown and Jared Kaplan},
  year = {2022},
  eprint = {2212.08073},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL}
}

@misc{openaichatgptblog,
  title = {Open{AI}: Introducing {ChatGPT}},
  url = {https://openai.com/blog/chatgpt},
  author = {OpenAI},
  year = {2022}
}

@misc{anthropicsafetyblog,
  title = {Core Views on AI Safety: When, Why, What, and How},
  url = {https://www.anthropic.com/index/core-views-on-ai-safety},
  author = {Anthropic},
  year = {2023}
}

@misc{anthropiccaiblog,
  title = {Claude’s Constitution},
  url = {https://www.anthropic.com/index/claudes-constitution},
  author = {Anthropic},
  year = {2023}
}

@misc{microsoftnewbingblog,
  title = {Introducing the new Bing},
  url = {https://www.bing.com/new#features},
  author = {Microsoft},
  year = {2023}
}

@article{sun2022recitation,
  title = {Recitation-Augmented Language Models},
  author = {Sun, Zhiqing and Wang, Xuezhi and Tay, Yi and Yang, Yiming and Zhou, Denny},
  journal = {arXiv preprint arXiv:2210.01296},
  year = {2022}
}

@misc{dollyblog,
  title = {Free Dolly: Introducing the World's First Truly Open Instruction-Tuned LLM},
  url = {https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm},
  author = {Databricks},
  year = {2023}
}

@misc{koala_blogpost_2023,
  author = {Xinyang Geng and Arnav Gudibande and Hao Liu and Eric Wallace and Pieter Abbeel and Sergey Levine and Dawn Song},
  title = {Koala: A Dialogue Model for Academic Research},
  howpublished = {Blog post},
  month = {April},
  year = {2023},
  url = {https://bair.berkeley.edu/blog/2023/04/03/koala/},
  urldate = {2023-04-03}
}

@misc{vicuna2023,
  title = {Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90\%* ChatGPT Quality},
  url = {https://vicuna.lmsys.org},
  author = {Chiang, Wei-Lin and Li, Zhuohan and Lin, Zi and Sheng, Ying and Wu, Zhanghao and Zhang, Hao and Zheng, Lianmin and Zhuang, Siyuan and Zhuang, Yonghao and Gonzalez, Joseph E. and Stoica, Ion and Xing, Eric P.},
  month = {March},
  year = {2023}
}

@misc{anthropicclaudeblog,
  title = {Introducing Claude},
  url = {https://www.anthropic.com/index/introducing-claude},
  author = {Anthropic},
  year = {2023}
}

@misc{openaiapiblog,
  title = {Open{AI}: Open{AI} {API}},
  url = {https://openai.com/blog/openai-api},
  author = {Brockman, Greg and Welinder, Peter and Murati, Mira and OpenAI},
  year = {2020}
}

@misc{openaievals,
  title = {Open{AI} Evals},
  url = {https://github.com/openai/evals},
  author = {OpenAI},
  year = {2023}
}

@misc{openaibehaveblog,
  title = {Open{AI}: How should {AI} systems behave, and who should decide?},
  url = {https://openai.com/blog/how-should-ai-systems-behave},
  author = {OpenAI},
  year = {2023}
}

@article{gehman2020realtoxicityprompts,
  title = {Real{T}oxicity{P}rompts: Evaluating neural toxic degeneration in language models},
  author = {Gehman, Samuel and Gururangan, Suchin and Sap, Maarten and Choi, Yejin and Smith, Noah A},
  journal = {arXiv preprint arXiv:2009.11462},
  year = {2020}
}

@misc{openaigpt4blog,
  title = {Open{AI}: {GPT-4}},
  url = {https://openai.com/research/gpt-4},
  author = {OpenAI},
  year = {2023}
}

@article{tay2022transcending,
  title = {Transcending scaling laws with 0.1\% extra compute},
  author = {Tay, Yi and Wei, Jason and Chung, Hyung Won and Tran, Vinh Q and So, David R and Shakeri, Siamak and Garcia, Xavier and Zheng, Huaixiu Steven and Rao, Jinfeng and Chowdhery, Aakanksha and others},
  journal = {arXiv preprint arXiv:2210.11399},
  year = {2022}
}

@misc{etsgresample,
  title = {{GRE} sample issue task},
  url = {https://www.ets.org/pdfs/gre/sample-issue-task.pdf},
  author = {ETS},
  year = {2022}
}

@book{nici2020ap,
  title = {{AP} {A}rt {H}istory: 5 Practice Tests + Comprehensive Review + Online Practice},
  author = {Nici, John B.},
  isbn = {9781506260501},
  series = {Barron's Test Prep},
  year = {2020},
  publisher = {Barron's Educational Series}
}

@misc{amc_statistics,
  title = {{AMC} Statistics},
  url = {http://amc-reg.maa.org/Reports/GeneralReports.aspx},
  author = {Mathematical Association of America},
  year = {2023}
}
@misc{sat_percentiles_and_score_rankings,
  title = {{SAT} Percentiles and Score Rankings},
  author = {Halle Edwards},
  url = {https://blog.prepscholar.com/sat-percentiles-and-score-rankings},
  author = {PrepScholar},
  year = {2022}
}
@misc{understanding_sat_scores,
  title = {Understanding {SAT} Scores},
  url = {https://satsuite.collegeboard.org/media/pdf/understanding-sat-scores.pdf},
  author = {College Board},
  year = {2022}
}
@misc{ap_score_distributions_by_subject_2022,
  title = {{AP} Score Distributions by Subject},
  url = {https://apcentral.collegeboard.org/media/pdf/ap-score-distributions-by-subject-2022.pdf},
  author = {College Board},
  year = {2022}
}
@misc{usabo_semifinal_exam_histogram_2020,
  title = {2020 {USABO} {S}emifinal Exam Score Distribution},
  url = {https://www.usabo-trc.org/sites/default/files/allfiles/2020\%20USABO\%20Semifinal\%20Exam\%20Histogram.pdf},
  author = {Center for Excellence in Education},
  year = {2022}
}
@misc{magoosh_gre_score_percentiles,
  title = {{GRE} Score Percentiles -- What Does Your Score Mean for You? (2021 Update)},
  author = {Swimmer, Chris},
  url = {https://magoosh.com/gre/gre-score-percentiles/},
  author = {Magoosh},
  year = {2021},
  month = {4},
  day = {8}
}

@misc{seigel2020calculate,
  title = {How Do You Calculate {SAT} Score? Raw and Scaled},
  author = {Seigel, Dora},
  url = {https://blog.prepscholar.com/how-to-calculate-sat-score},
  year = {2020},
  month = {1},
  day = {11},
  publisher = {PrepScholar}
}

@misc{albertio_blog,
  title = {The {A}lbert Blog},
  url = {https://www.albert.io/blog/},
  publisher = {Albert.io}
}

@article{chung2022scaling,
  title = {Scaling instruction-finetuned language models},
  author = {Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Eric and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and others},
  journal = {arXiv preprint arXiv:2210.11416},
  year = {2022}
}

@article{lewkowycz2022solving,
  title = {Solving quantitative reasoning problems with language models},
  author = {Lewkowycz, Aitor and Andreassen, Anders and Dohan, David and Dyer, Ethan and Michalewski, Henryk and Ramasesh, Vinay and Slone, Ambrose and Anil, Cem and Schlag, Imanol and Gutman-Solo, Theo and others},
  journal = {arXiv preprint arXiv:2206.14858},
  year = {2022}
}

@article{touvron2023llama,
  title = {{LLaMA}: Open and Efficient Foundation Language Models},
  author = {Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\'e}e and Rozi{\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and others},
  journal = {arXiv preprint arXiv:2302.13971},
  year = {2023}
}

@article{liu2020adversarial,
  title = {Adversarial training for large neural language models},
  author = {Liu, Xiaodong and Cheng, Hao and He, Pengcheng and Chen, Weizhu and Wang, Yu and Poon, Hoifung and Gao, Jianfeng},
  journal = {arXiv preprint arXiv:2004.08994},
  year = {2020}
}

@article{chen2022codet,
  title = {Code{T}: Code generation with generated tests},
  author = {Chen, Bei and Zhang, Fengji and Nguyen, Anh and Zan, Daoguang and Lin, Zeqi and Lou, Jian-Guang and Chen, Weizhu},
  journal = {arXiv preprint arXiv:2207.10397},
  year = {2022}
}

@article{chen2020question,
  title = {Question directed graph attention network for numerical reasoning over text},
  author = {Chen, Kunlong and Xu, Weidi and Cheng, Xingyi and Xiaochuan, Zou and Zhang, Yuyu and Song, Le and Wang, Taifeng and Qi, Yuan and Chu, Wei},
  journal = {arXiv preprint arXiv:2009.07448},
  year = {2020}
}

@article{rae2021scaling,
  title = {Scaling language models: Methods, analysis \& insights from training gopher},
  author = {Rae, Jack W and Borgeaud, Sebastian and Cai, Trevor and Millican, Katie and Hoffmann, Jordan and Song, Francis and Aslanides, John and Henderson, Sarah and Ring, Roman and Young, Susannah and others},
  journal = {arXiv preprint arXiv:2112.11446},
  year = {2021}
}

@inproceedings{lin-etal-2022-truthfulqa,
  title = {{T}ruthful{QA}: Measuring How Models Mimic Human Falsehoods},
  author = {Lin, Stephanie  and
            Hilton, Jacob  and
            Evans, Owain},
  booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month = may,
  year = {2022},
  address = {Dublin, Ireland},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.acl-long.229},
  doi = {10.18653/v1/2022.acl-long.229},
  pages = {3214--3252},
  abstract = {We propose a benchmark to measure whether a language model is truthful in generating answers to questions. The benchmark comprises 817 questions that span 38 categories, including health, law, finance and politics. We crafted questions that some humans would answer falsely due to a false belief or misconception. To perform well, models must avoid generating false answers learned from imitating human texts. We tested GPT-3, GPT-Neo/J, GPT-2 and a T5-based model. The best model was truthful on 58{\%} of questions, while human performance was 94{\%}. Models generated many false answers that mimic popular misconceptions and have the potential to deceive humans. The largest models were generally the least truthful. This contrasts with other NLP tasks, where performance improves with model size. However, this result is expected if false answers are learned from the training distribution. We suggest that scaling up models alone is less promising for improving truthfulness than fine-tuning using training objectives other than imitation of text from the web.}
}

@article{hestness2017deep,
  title = {Deep learning scaling is predictable, empirically},
  author = {Hestness, Joel and Narang, Sharan and Ardalani, Newsha and Diamos, Gregory and Jun, Heewoo and Kianinejad, Hassan and Patwary, Md and Ali, Mostofa and Yang, Yang and Zhou, Yanqi},
  journal = {arXiv preprint arXiv:1712.00409},
  year = {2017}
}

@article{thompson2020computational,
  title = {The computational limits of deep learning},
  author = {Thompson, Neil C and Greenewald, Kristjan and Lee, Keeheon and Manso, Gabriel F},
  journal = {arXiv preprint arXiv:2007.05558},
  year = {2020}
}

@misc{greenSystemCardsNew2022,
  title = {{System Cards, a new resource for understanding how AI systems work}},
  author = {Green, Nekesha and Procope, Chavez and Cheema, Adeel and Adediji, Adekunle},
  year = {2022},
  month = feb,
  abstract = {AI powers services like personalization, recommendation, and ranking used to create customizable online experiences. But understanding how AI works can be difficult for everyday users and experts. Meta is aiming to change that by sharing a prototype AI System Card tool.},
  howpublished = {https://ai.facebook.com/blog/system-cards-a-new-resource-for-understanding-how-ai-systems-work/},
  langid = {turkish},
  file = {C\:\\Users\\erolc\\Zotero\\storage\\JPU57AXG\\system-cards-a-new-resource-for-understanding-how-ai-systems-work.html}
}

@misc{DALLEPreviewRisks2022,
  title = {{{DALL}}{$\cdot$}{{E}} 2 {{Preview}} - {{Risks}} and {{Limitations}}},
  year = {2022},
  month = apr,
  howpublished = {OpenAI}
}

@inproceedings{mitchellModelCardsModel2019,
  title = {Model {{Cards}} for {{Model Reporting}}},
  booktitle = {Proceedings of the {{Conference}} on {{Fairness}}, {{Accountability}}, and {{Transparency}}},
  author = {Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit},
  year = {2019},
  month = jan,
  eprint = {1810.03993},
  primaryclass = {cs},
  pages = {220--229},
  doi = {10.1145/3287560.3287596},
  abstract = {Trained machine learning models are increasingly used to perform high-impact tasks in areas such as law enforcement, medicine, education, and employment. In order to clarify the intended use cases of machine learning models and minimize their usage in contexts for which they are not well suited, we recommend that released models be accompanied by documentation detailing their performance characteristics. In this paper, we propose a framework that we call model cards, to encourage such transparent model reporting. Model cards are short documents accompanying trained machine learning models that provide benchmarked evaluation in a variety of conditions, such as across different cultural, demographic, or phenotypic groups (e.g., race, geographic location, sex, Fitzpatrick skin type) and intersectional groups (e.g., age and race, or sex and Fitzpatrick skin type) that are relevant to the intended application domains. Model cards also disclose the context in which models are intended to be used, details of the performance evaluation procedures, and other relevant information. While we focus primarily on human-centered machine learning models in the application fields of computer vision and natural language processing, this framework can be used to document any trained machine learning model. To solidify the concept, we provide cards for two supervised models: One trained to detect smiling faces in images, and one trained to detect toxic comments in text. We propose model cards as a step towards the responsible democratization of machine learning and related AI technology, increasing transparency into how well AI technology works. We hope this work encourages those releasing trained machine learning models to accompany model releases with similar detailed evaluation numbers and other relevant documentation.},
  archiveprefix = {arxiv},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning},
  file = {C\:\\Users\\erolc\\Zotero\\storage\\YLL5W2EQ\\Mitchell et al. - 2019 - Model Cards for Model Reporting.pdf;C\:\\Users\\erolc\\Zotero\\storage\\T4D9U6E7\\1810.html}
}

@article{lin2022teaching,
  title = {Teaching models to express their uncertainty in words},
  author = {Lin, Stephanie and Hilton, Jacob and Evans, Owain},
  journal = {arXiv preprint arXiv:2205.14334},
  year = {2022}
}

@misc{wang2021gpt,
  title = {{GPT-J-6B}: A 6 billion parameter autoregressive language model},
  author = {Wang, Ben and Komatsuzaki, Aran},
  year = {2021},
  publisher = {Software}
}

@article{black2021gpt,
  title = {{GPT-Neo}: Large scale autoregressive language modeling with mesh-tensorflow},
  author = {Black, Sid and Gao, Leo and Wang, Phil and Leahy, Connor and Biderman, Stella},
  journal = {If you use this software, please cite it using these metadata},
  volume = {58},
  year = {2021}
}

@online{schulman2023reinforcement,
  author = {John Schulman},
  title = {Reinforcement Learning from Human Feedback: Progress and Challenges},
  year = {2023},
  url = {https://www.youtube.com/watch?v=hhiLw5Q_UFg&ab_channel=BerkeleyEECS},
  note = {Berkeley EECS},
  month = {Apr},
  day = {19},
  publisher = {YouTube}
}

@article{zhang2022opt,
  title = {{OPT}: Open pre-trained transformer language models},
  author = {Zhang, Susan and Roller, Stephen and Goyal, Naman and Artetxe, Mikel and Chen, Moya and Chen, Shuohui and Dewan, Christopher and Diab, Mona and Li, Xian and Lin, Xi Victoria and others},
  journal = {arXiv preprint arXiv:2205.01068},
  year = {2022}
}

@article{su2021roformer,
  title = {Ro{F}ormer: Enhanced transformer with rotary position embedding},
  author = {Su, Jianlin and Lu, Yu and Pan, Shengfeng and Murtadha, Ahmed and Wen, Bo and Liu, Yunfeng},
  journal = {arXiv preprint arXiv:2104.09864},
  year = {2021}
}

@article{radford2017sentiment,
  author = {Alec Radford and
            Rafal J{\'{o}}zefowicz and
            Ilya Sutskever},
  title = {Learning to Generate Reviews and Discovering Sentiment},
  journal = {arXiv preprint arXiv:1704.01444},
  year = {2017}
}

@article{dao2022flashattention,
  author = {Tri Dao and
            Daniel Y. Fu and
            Stefano Ermon and
            Atri Rudra and
            Christopher R{\'{e}}},
  title = {FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness},
  journal = {arXiv preprint arXiv:2205.14135},
  year = {2022}
}

@article{lample2019crosslingual,
  title = {Cross-lingual Language Model Pretraining},
  author = {Lample, Guillaume and Conneau, Alexis},
  journal = {arXiv preprint arXiv:1901.07291},
  year = {2019}
}

@article{child2019generating,
  title = {Generating Long Sequences with Sparse Transformers},
  author = {Child, Rewon and Gray, Scott and Radford, Alec and Sutskever, Ilya},
  journal = {arXiv preprint arXiv:1904.10509},
  year = {2019}
}

@article{rabe2021selfattention,
  title = {Self-attention Does Not Need $O(n^2)$ Memory},
  author = {Rabe, Markus N. and Staats, Charles},
  journal = {arXiv preprint arXiv:2112.05682},
  year = {2021}
}

@misc{Gray2017GPUKF,
  title = {GPU Kernels for Block-Sparse Weights},
  author = {Scott Gray and Alec Radford and Diederik P. Kingma},
  url = {https://cdn.openai.com/blocksparse/blocksparsepaper.pdf},
  year = {2017}
}

@misc{openai2023gpt4,
  title = {GPT-4 Technical Report},
  author = {OpenAI},
  year = {2023},
  eprint = {2303.08774},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL}
}

@misc{köpf2023openassistant,
  title = {OpenAssistant Conversations -- Democratizing Large Language Model Alignment},
  author = {Andreas Köpf and Yannic Kilcher and Dimitri von Rütte and Sotiris Anagnostidis and Zhi-Rui Tam and Keith Stevens and Abdullah Barhoum and Nguyen Minh Duc and Oliver Stanley and Richárd Nagyfi and Shahul ES and Sameer Suri and David Glushkov and Arnav Dantuluri and Andrew Maguire and Christoph Schuhmann and Huu Nguyen and Alexander Mattick},
  year = {2023},
  eprint = {2304.07327},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL}
}

@article{liu2023llava,
  author = {Liu, Haotian and Li, Chunyuan and Wu, Qingyang and Lee, Yong Jae},
  title = {Visual Instruction Tuning},
  publisher = {arXiv:2304.08485},
  year = {2023}
}

@article{gabriel2020artificial,
  title = {Artificial intelligence, values, and alignment},
  author = {Gabriel, Iason},
  journal = {Minds and machines},
  volume = {30},
  number = {3},
  pages = {411--437},
  year = {2020},
  publisher = {Springer}
}

@article{madaan2023self,
  title = {Self-refine: Iterative refinement with self-feedback},
  author = {Madaan, Aman and Tandon, Niket and Gupta, Prakhar and Hallinan, Skyler and Gao, Luyu and Wiegreffe, Sarah and Alon, Uri and Dziri, Nouha and Prabhumoye, Shrimai and Yang, Yiming and others},
  journal = {arXiv preprint arXiv:2303.17651},
  year = {2023}
}

@misc{chen2023teaching,
  title = {Teaching Large Language Models to Self-Debug},
  author = {Xinyun Chen and Maxwell Lin and Nathanael Schärli and Denny Zhou},
  year = {2023},
  eprint = {2304.05128},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL}
}

@article{ganguli2022red,
  title = {Red teaming language models to reduce harms: Methods, scaling behaviors, and lessons learned},
  author = {Ganguli, Deep and Lovitt, Liane and Kernion, Jackson and Askell, Amanda and Bai, Yuntao and Kadavath, Saurav and Mann, Ben and Perez, Ethan and Schiefer, Nicholas and Ndousse, Kamal and others},
  journal = {arXiv preprint arXiv:2209.07858},
  year = {2022}
}

@article{shao2023synthetic,
  title = {Synthetic Prompting: Generating Chain-of-Thought Demonstrations for Large Language Models},
  author = {Shao, Zhihong and Gong, Yeyun and Shen, Yelong and Huang, Minlie and Duan, Nan and Chen, Weizhu},
  journal = {arXiv preprint arXiv:2302.00618},
  year = {2023}
}

@article{lin2021truthfulqa,
  title = {Truthfulqa: Measuring how models mimic human falsehoods},
  author = {Lin, Stephanie and Hilton, Jacob and Evans, Owain},
  journal = {arXiv preprint arXiv:2109.07958},
  year = {2021}
}

@inproceedings{sun2023recitationaugmented,
  title = {Recitation-Augmented Language Models},
  author = {Zhiqing Sun and Xuezhi Wang and Yi Tay and Yiming Yang and Denny Zhou},
  booktitle = {International Conference on Learning Representations},
  year = {2023},
  url = {https://openreview.net/forum?id=-cqvvvb-NkI}
}

@article{shaikh2022second,
  title = {On Second Thought, Let's Not Think Step by Step! Bias and Toxicity in Zero-Shot Reasoning},
  author = {Shaikh, Omar and Zhang, Hongxin and Held, William and Bernstein, Michael and Yang, Diyi},
  journal = {arXiv preprint arXiv:2212.08061},
  year = {2022}
}

@article{solaiman2021process,
  title = {Process for adapting language models to society (palms) with values-targeted datasets},
  author = {Solaiman, Irene and Dennison, Christy},
  journal = {Advances in Neural Information Processing Systems},
  volume = {34},
  pages = {5861--5873},
  year = {2021}
}

@article{thoppilan2022lamda,
  title = {Lamda: Language models for dialog applications},
  author = {Thoppilan, Romal and De Freitas, Daniel and Hall, Jamie and Shazeer, Noam and Kulshreshtha, Apoorv and Cheng, Heng-Tze and Jin, Alicia and Bos, Taylor and Baker, Leslie and Du, Yu and others},
  journal = {arXiv preprint arXiv:2201.08239},
  year = {2022}
}

@article{kim2016sequence,
  title = {Sequence-level knowledge distillation},
  author = {Kim, Yoon and Rush, Alexander M},
  journal = {arXiv preprint arXiv:1606.07947},
  year = {2016}
}

@article{ganguli2023capacity,
  title = {The capacity for moral self-correction in large language models},
  author = {Ganguli, Deep and Askell, Amanda and Schiefer, Nicholas and Liao, Thomas and Luko{\v{s}}i{\=u}t{\.e}, Kamil{\.e} and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and Olsson, Catherine and Hernandez, Danny and others},
  journal = {arXiv preprint arXiv:2302.07459},
  year = {2023}
}

@article{parrish2021bbq,
  title = {BBQ: A hand-built bias benchmark for question answering},
  author = {Parrish, Alicia and Chen, Angelica and Nangia, Nikita and Padmakumar, Vishakh and Phang, Jason and Thompson, Jana and Htut, Phu Mon and Bowman, Samuel R},
  journal = {arXiv preprint arXiv:2110.08193},
  year = {2021}
}

@article{rudinger2018gender,
  title = {Gender bias in coreference resolution},
  author = {Rudinger, Rachel and Naradowsky, Jason and Leonard, Brian and Van Durme, Benjamin},
  journal = {arXiv preprint arXiv:1804.09301},
  year = {2018}
}

@article{biderman2023pythia,
  title = {Pythia: A suite for analyzing large language models across training and scaling},
  author = {Biderman, Stella and Schoelkopf, Hailey and Anthony, Quentin and Bradley, Herbie and O'Brien, Kyle and Hallahan, Eric and Khan, Mohammad Aflah and Purohit, Shivanshu and Prashanth, USVSN Sai and Raff, Edward and others},
  journal = {arXiv preprint arXiv:2304.01373},
  year = {2023}
}

@misc{wan2023poisoning,
  title = {Poisoning Language Models During Instruction Tuning},
  author = {Alexander Wan and Eric Wallace and Sheng Shen and Dan Klein},
  year = {2023},
  eprint = {2305.00944},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL}
}

@article{touvron2023llama2,
  title = {Llama 2: Open foundation and fine-tuned chat models},
  author = {Touvron, Hugo and Martin, Louis and Stone, Kevin and Albert, Peter and Almahairi, Amjad and Babaei, Yasmine and Bashlykov, Nikolay and Batra, Soumya and Bhargava, Prajjwal and Bhosale, Shruti and others},
  journal = {arXiv preprint arXiv:2307.09288},
  year = {2023}
}

@article{zhou2023lima,
  title = {Lima: Less is more for alignment},
  author = {Zhou, Chunting and Liu, Pengfei and Xu, Puxin and Iyer, Srini and Sun, Jiao and Mao, Yuning and Ma, Xuezhe and Efrat, Avia and Yu, Ping and Yu, Lili and others},
  journal = {arXiv preprint arXiv:2305.11206},
  year = {2023}
}

@inproceedings{sanh2021multitask,
  title = {Multitask Prompted Training Enables Zero-Shot Task Generalization},
  author = {Sanh, Victor and Webson, Albert and Raffel, Colin and Bach, Stephen and Sutawika, Lintang and Alyafeai, Zaid and Chaffin, Antoine and Stiegler, Arnaud and Raja, Arun and Dey, Manan and others},
  booktitle = {International Conference on Learning Representations},
  year = {2021}
}

@article{ziegler2019fine,
  title = {Fine-tuning language models from human preferences},
  author = {Ziegler, Daniel M and Stiennon, Nisan and Wu, Jeffrey and Brown, Tom B and Radford, Alec and Amodei, Dario and Christiano, Paul and Irving, Geoffrey},
  journal = {arXiv preprint arXiv:1909.08593},
  year = {2019}
}

@article{bowman2022measuring,
  title = {Measuring progress on scalable oversight for large language models},
  author = {Bowman, Samuel R and Hyun, Jeeyoon and Perez, Ethan and Chen, Edwin and Pettit, Craig and Heiner, Scott and Lukosuite, Kamile and Askell, Amanda and Jones, Andy and Chen, Anna and others},
  journal = {arXiv preprint arXiv:2211.03540},
  year = {2022}
}

@article{nakano2021webgpt,
  title = {Webgpt: Browser-assisted question-answering with human feedback},
  author = {Nakano, Reiichiro and Hilton, Jacob and Balaji, Suchir and Wu, Jeff and Ouyang, Long and Kim, Christina and Hesse, Christopher and Jain, Shantanu and Kosaraju, Vineet and Saunders, William and others},
  journal = {arXiv preprint arXiv:2112.09332},
  year = {2021}
}

@article{pan2022effects,
  title = {The effects of reward misspecification: Mapping and mitigating misaligned models},
  author = {Pan, Alexander and Bhatia, Kush and Steinhardt, Jacob},
  journal = {arXiv preprint arXiv:2201.03544},
  year = {2022}
}

@article{zhong2023goal,
  title = {Goal driven discovery of distributional differences via language descriptions},
  author = {Zhong, Ruiqi and Zhang, Peter and Li, Steve and Ahn, Jinwoo and Klein, Dan and Steinhardt, Jacob},
  journal = {arXiv preprint arXiv:2302.14233},
  year = {2023}
}

@misc{alpaca_eval,
  author = {Xuechen Li and Tianyi Zhang and Yann Dubois and Rohan Taori and Ishaan Gulrajani and Carlos Guestrin and Percy Liang and Tatsunori B. Hashimoto },
  title = {AlpacaEval: An Automatic Evaluator of Instruction-following Models},
  year = {2023},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/tatsu-lab/alpaca_eval}}
}

@article{mukherjee2023orca,
  title = {Orca: Progressive learning from complex explanation traces of gpt-4},
  author = {Mukherjee, Subhabrata and Mitra, Arindam and Jawahar, Ganesh and Agarwal, Sahaj and Palangi, Hamid and Awadallah, Ahmed},
  journal = {arXiv preprint arXiv:2306.02707},
  year = {2023}
}

@article{muennighoff2022crosslingual,
  title = {Crosslingual generalization through multitask finetuning},
  author = {Muennighoff, Niklas and Wang, Thomas and Sutawika, Lintang and Roberts, Adam and Biderman, Stella and Scao, Teven Le and Bari, M Saiful and Shen, Sheng and Yong, Zheng-Xin and Schoelkopf, Hailey and others},
  journal = {arXiv preprint arXiv:2211.01786},
  year = {2022}
}

@article{lu2023empirical,
  title = {An Empirical Study of Scaling Instruct-Tuned Large Multimodal Models},
  author = {Lu, Yadong and Li, Chunyuan and Liu, Haotian and Yang, Jianwei and Gao, Jianfeng and Shen, Yelong},
  journal = {arXiv preprint arXiv:2309.09958},
  year = {2023}
}

@article{bai2023qwen,
  title = {Qwen-VL: A Frontier Large Vision-Language Model with Versatile Abilities},
  author = {Bai, Jinze and Bai, Shuai and Yang, Shusheng and Wang, Shijie and Tan, Sinan and Wang, Peng and Lin, Junyang and Zhou, Chang and Zhou, Jingren},
  journal = {arXiv preprint arXiv:2308.12966},
  year = {2023}
}

@article{scao2022bloom,
  title = {Bloom: A 176b-parameter open-access multilingual language model},
  author = {Scao, Teven Le and Fan, Angela and Akiki, Christopher and Pavlick, Ellie and Ili{\'c}, Suzana and Hesslow, Daniel and Castagn{\'e}, Roman and Luccioni, Alexandra Sasha and Yvon, Fran{\c{c}}ois and Gall{\'e}, Matthias and others},
  journal = {arXiv preprint arXiv:2211.05100},
  year = {2022}
}

@article{driess2023palm,
  title = {Palm-e: An embodied multimodal language model},
  author = {Driess, Danny and Xia, Fei and Sajjadi, Mehdi SM and Lynch, Corey and Chowdhery, Aakanksha and Ichter, Brian and Wahid, Ayzaan and Tompson, Jonathan and Vuong, Quan and Yu, Tianhe and others},
  journal = {arXiv preprint arXiv:2303.03378},
  year = {2023}
}

@article{anil2023palm,
  title = {Palm 2 technical report},
  author = {Anil, Rohan and Dai, Andrew M and Firat, Orhan and Johnson, Melvin and Lepikhin, Dmitry and Passos, Alexandre and Shakeri, Siamak and Taropa, Emanuel and Bailey, Paige and Chen, Zhifeng and others},
  journal = {arXiv preprint arXiv:2305.10403},
  year = {2023}
}

@article{chen2023pali,
  title = {PaLI-X: On Scaling up a Multilingual Vision and Language Model},
  author = {Chen, Xi and Djolonga, Josip and Padlewski, Piotr and Mustafa, Basil and Changpinyo, Soravit and Wu, Jialin and Ruiz, Carlos Riquelme and Goodman, Sebastian and Wang, Xiao and Tay, Yi and others},
  journal = {arXiv preprint arXiv:2305.18565},
  year = {2023}
}

@article{xu2023wizardlm,
  title = {Wizardlm: Empowering large language models to follow complex instructions},
  author = {Xu, Can and Sun, Qingfeng and Zheng, Kai and Geng, Xiubo and Zhao, Pu and Feng, Jiazhan and Tao, Chongyang and Jiang, Daxin},
  journal = {arXiv preprint arXiv:2304.12244},
  year = {2023}
}

@article{wang2022super,
  title = {Super-naturalinstructions: Generalization via declarative instructions on 1600+ nlp tasks},
  author = {Wang, Yizhong and Mishra, Swaroop and Alipoormolabashi, Pegah and Kordi, Yeganeh and Mirzaei, Amirreza and Arunkumar, Anjana and Ashok, Arjun and Dhanasekaran, Arut Selvan and Naik, Atharva and Stap, David and others},
  journal = {arXiv preprint arXiv:2204.07705},
  year = {2022}
}

@inproceedings{wei2021finetuned,
  title = {Finetuned Language Models are Zero-Shot Learners},
  author = {Wei, Jason and Bosma, Maarten and Zhao, Vincent and Guu, Kelvin and Yu, Adams Wei and Lester, Brian and Du, Nan and Dai, Andrew M and Le, Quoc V},
  booktitle = {International Conference on Learning Representations},
  year = {2021}
}

@article{leike2018scalable,
  title = {Scalable agent alignment via reward modeling: a research direction},
  author = {Leike, Jan and Krueger, David and Everitt, Tom and Martic, Miljan and Maini, Vishal and Legg, Shane},
  journal = {arXiv preprint arXiv:1811.07871},
  year = {2018}
}

@article{stiennon2020learning,
  title = {Learning to summarize with human feedback},
  author = {Stiennon, Nisan and Ouyang, Long and Wu, Jeffrey and Ziegler, Daniel and Lowe, Ryan and Voss, Chelsea and Radford, Alec and Amodei, Dario and Christiano, Paul F},
  journal = {Advances in Neural Information Processing Systems},
  volume = {33},
  pages = {3008--3021},
  year = {2020}
}

@article{asimov1941three,
  title = {Three laws of robotics},
  author = {Asimov, Isaac},
  journal = {Asimov, I. Runaround},
  volume = {2},
  year = {1941}
}

@misc{dromedary2,
  title = {Self-Alignment with Principle-Following Reward Models},
  author = {Sun, Zhiqing and Shen, Yikang and Zhou, Qinhong and Zhang, Hongxin and Chen, Zhenfang and Cox, David and Yang, Yiming and Gan, Chuang},
  date = {2023-07-01},
  howpublished = {personal communication},
  year = {2023}
}

@article{li2023obj_Hallucination,
  title = {Evaluating object hallucination in large vision-language models},
  author = {Li, Yifan and Du, Yifan and Zhou, Kun and Wang, Jinpeng and Zhao, Wayne Xin and Wen, Ji-Rong},
  journal = {arXiv preprint arXiv:2305.10355},
  year = {2023}
}

@inproceedings{andreas2022language,
  title = {Language Models as Agent Models},
  author = {Andreas, Jacob},
  booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2022},
  pages = {5769--5779},
  year = {2022}
}

@article{dubois2023alpacafarm,
  title = {Alpacafarm: A simulation framework for methods that learn from human feedback},
  author = {Dubois, Yann and Li, Xuechen and Taori, Rohan and Zhang, Tianyi and Gulrajani, Ishaan and Ba, Jimmy and Guestrin, Carlos and Liang, Percy and Hashimoto, Tatsunori B},
  journal = {arXiv preprint arXiv:2305.14387},
  year = {2023}
}

@article{perez2022discovering,
  title = {Discovering language model behaviors with model-written evaluations},
  author = {Perez, Ethan and Ringer, Sam and Luko{\v{s}}i{\=u}t{\.e}, Kamil{\.e} and Nguyen, Karina and Chen, Edwin and Heiner, Scott and Pettit, Craig and Olsson, Catherine and Kundu, Sandipan and Kadavath, Saurav and others},
  journal = {arXiv preprint arXiv:2212.09251},
  year = {2022}
}

@article{amodei2016concrete,
  title = {Concrete problems in AI safety},
  author = {Amodei, Dario and Olah, Chris and Steinhardt, Jacob and Christiano, Paul and Schulman, John and Man{\'e}, Dan},
  journal = {arXiv preprint arXiv:1606.06565},
  year = {2016}
}

@misc{bills2023language,
  title = {Language models can explain neurons in language models},
  author = {
            Bills, Steven and Cammarata, Nick and Mossing, Dan and Tillman, Henk and Gao, Leo and Goh, Gabriel and Sutskever, Ilya and Leike, Jan and Wu, Jeff and Saunders, William
            },
  year = {2023},
  howpublished = {\url{https://openaipublic.blob.core.windows.net/neuron-explainer/paper/index.html}}
}

@article{fu2023improving,
  title = {Improving language model negotiation with self-play and in-context learning from ai feedback},
  author = {Fu, Yao and Peng, Hao and Khot, Tushar and Lapata, Mirella},
  journal = {arXiv preprint arXiv:2305.10142},
  year = {2023}
}

@article{li2023self,
  title = {Self-Alignment with Instruction Backtranslation},
  author = {Li, Xian and Yu, Ping and Zhou, Chunting and Schick, Timo and Zettlemoyer, Luke and Levy, Omer and Weston, Jason and Lewis, Mike},
  journal = {arXiv preprint arXiv:2308.06259},
  year = {2023}
}

@article{gilardi2023chatgpt,
  title = {Chatgpt outperforms crowd-workers for text-annotation tasks},
  author = {Gilardi, Fabrizio and Alizadeh, Meysam and Kubli, Ma{\"e}l},
  journal = {arXiv preprint arXiv:2303.15056},
  year = {2023}
}

@article{yang2023rlcd,
  title = {RLCD: Reinforcement Learning from Contrast Distillation for Language Model Alignment},
  author = {Yang, Kevin and Klein, Dan and Celikyilmaz, Asli and Peng, Nanyun and Tian, Yuandong},
  journal = {arXiv preprint arXiv:2307.12950},
  year = {2023}
}

@article{kim2023aligning,
  title = {Aligning Large Language Models through Synthetic Feedback},
  author = {Kim, Sungdong and Bae, Sanghwan and Shin, Jamin and Kang, Soyoung and Kwak, Donghyun and Yoo, Kang Min and Seo, Minjoon},
  journal = {arXiv preprint arXiv:2305.13735},
  year = {2023}
}

@article{pezeshkpour2023large,
  title = {Large Language Models Sensitivity to The Order of Options in Multiple-Choice Questions},
  author = {Pezeshkpour, Pouya and Hruschka, Estevam},
  journal = {arXiv preprint arXiv:2308.11483},
  year = {2023}
}

@inproceedings{lin2014microsoft,
  title = {Microsoft coco: Common objects in context},
  author = {Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle = {Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13},
  pages = {740--755},
  year = {2014},
  organization = {Springer}
}