Here're some resources about understanding and leveraging In-context / Prompt Learning Abilities of LLMs
paper link: here
citation:
@misc{huang2024securing,
title={Securing Reliability: A Brief Overview on Enhancing In-Context Learning for Foundation Models},
author={Yunpeng Huang and Yaonan Gu and Jingwei Xu and Zhihong Zhu and Zhaorun Chen and Xiaoxing Ma},
year={2024},
eprint={2402.17671},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
paper link: here
citation:
@article{dong2022survey,
title={A survey for in-context learning},
author={Dong, Qingxiu and Li, Lei and Dai, Damai and Zheng, Ce and Wu, Zhiyong and Chang, Baobao and Sun, Xu and Xu, Jingjing and Sui, Zhifang},
journal={arXiv preprint arXiv:2301.00234},
year={2022}
}
Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing
paper link: here
citation:
@article{liu2023pre,
title={Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing},
author={Liu, Pengfei and Yuan, Weizhe and Fu, Jinlan and Jiang, Zhengbao and Hayashi, Hiroaki and Neubig, Graham},
journal={ACM Computing Surveys},
volume={55},
number={9},
pages={1--35},
year={2023},
publisher={ACM New York, NY}
}
How Do Transformers Learn In-Context Beyond Simple Functions? A Case Study on Learning with Representations
paper link: here
citation:
@article{guo2023transformers,
title={How Do Transformers Learn In-Context Beyond Simple Functions? A Case Study on Learning with Representations},
author={Guo, Tianyu and Hu, Wei and Mei, Song and Wang, Huan and Xiong, Caiming and Savarese, Silvio and Bai, Yu},
journal={arXiv preprint arXiv:2310.10616},
year={2023}
}
paper link: here
citation:
@article{bai2023transformers,
title={Transformers as Statisticians: Provable In-Context Learning with In-Context Algorithm Selection},
author={Bai, Yu and Chen, Fan and Wang, Huan and Xiong, Caiming and Mei, Song},
journal={arXiv preprint arXiv:2306.04637},
year={2023}
}
Language Models Don't Always Say What They Think: Unfaithful Explanations in Chain-of-Thought Prompting
paper link: here
citation:
@article{turpin2023language,
title={Language Models Don't Always Say What They Think: Unfaithful Explanations in Chain-of-Thought Prompting},
author={Turpin, Miles and Michael, Julian and Perez, Ethan and Bowman, Samuel R},
journal={arXiv preprint arXiv:2305.04388},
year={2023}
}
paper link: here
citation:
@misc{wei2023larger,
title={Larger language models do in-context learning differently},
author={Jerry Wei and Jason Wei and Yi Tay and Dustin Tran and Albert Webson and Yifeng Lu and Xinyun Chen and Hanxiao Liu and Da Huang and Denny Zhou and Tengyu Ma},
year={2023},
eprint={2303.03846},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
paper link: here
citation:
@misc{golovneva2023pathfinder,
title={PathFinder: Guided Search over Multi-Step Reasoning Paths},
author={Olga Golovneva and Sean O'Brien and Ramakanth Pasunuru and Tianlu Wang and Luke Zettlemoyer and Maryam Fazel-Zarandi and Asli Celikyilmaz},
year={2023},
eprint={2312.05180},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
paper link: here
citation:
@article{besta2023graph,
title={Graph of thoughts: Solving elaborate problems with large language models},
author={Besta, Maciej and Blach, Nils and Kubicek, Ales and Gerstenberger, Robert and Gianinazzi, Lukas and Gajda, Joanna and Lehmann, Tomasz and Podstawski, Michal and Niewiadomski, Hubert and Nyczyk, Piotr and others},
journal={arXiv preprint arXiv:2308.09687},
year={2023}
}
paper link: here
citation:
@article{yao2023tree,
title={Tree of thoughts: Deliberate problem solving with large language models},
author={Yao, Shunyu and Yu, Dian and Zhao, Jeffrey and Shafran, Izhak and Griffiths, Thomas L and Cao, Yuan and Narasimhan, Karthik},
journal={arXiv preprint arXiv:2305.10601},
year={2023}
}
paper link: here
citation:
@article{wang2022self,
title={Self-consistency improves chain of thought reasoning in language models},
author={Wang, Xuezhi and Wei, Jason and Schuurmans, Dale and Le, Quoc and Chi, Ed and Narang, Sharan and Chowdhery, Aakanksha and Zhou, Denny},
journal={arXiv preprint arXiv:2203.11171},
year={2022}
}
paper link: here
citation:
@article{wei2022chain,
title={Chain-of-thought prompting elicits reasoning in large language models},
author={Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Xia, Fei and Chi, Ed and Le, Quoc V and Zhou, Denny and others},
journal={Advances in Neural Information Processing Systems},
volume={35},
pages={24824--24837},
year={2022}
}
paper link: here
citation:
@misc{brunet2023icl,
title={ICL Markup: Structuring In-Context Learning using Soft-Token Tags},
author={Marc-Etienne Brunet and Ashton Anderson and Richard Zemel},
year={2023},
eprint={2312.07405},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
paper link: here
citation:
@misc{foster2023flexible,
title={Flexible visual prompts for in-context learning in computer vision},
author={Thomas Foster and Ioana Croitoru and Robert Dorfman and Christoffer Edlund and Thomas Varsavsky and Jon Almazán},
year={2023},
eprint={2312.06592},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
paper link: here
citation:
@article{pan2023plum,
title={Plum: Prompt Learning using Metaheuristic},
author={Pan, Rui and Xing, Shuo and Diao, Shizhe and Liu, Xiang and Shum, Kashun and Zhang, Jipeng and Zhang, Tong},
journal={arXiv preprint arXiv:2311.08364},
year={2023}
}
paper link: here
citation:
@article{zheng2023ddcot,
title={Ddcot: Duty-distinct chain-of-thought prompting for multimodal reasoning in language models},
author={Zheng, Ge and Yang, Bin and Tang, Jiajin and Zhou, Hong-Yu and Yang, Sibei},
journal={arXiv preprint arXiv:2310.16436},
year={2023}
}
paper link: here
citation:
@misc{hu2023evoke,
title={Evoke: Evoking Critical Thinking Abilities in LLMs via Reviewer-Author Prompt Editing},
author={Xinyu Hu and Pengfei Tang and Simiao Zuo and Zihan Wang and Bowen Song and Qiang Lou and Jian Jiao and Denis Charles},
year={2023},
eprint={2310.13855},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
paper link: here
github link: here
hfhub link: here
citation:
@misc{huang2024lorahub,
title={LoraHub: Efficient Cross-Task Generalization via Dynamic LoRA Composition},
author={Chengsong Huang and Qian Liu and Bill Yuchen Lin and Tianyu Pang and Chao Du and Min Lin},
year={2024},
eprint={2307.13269},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
paper link: here
@inproceedings{sordoni2023joint,
title={Joint Prompt Optimization of Stacked LLMs using Variational Inference},
author={Sordoni, Alessandro and Yuan, Xingdi and C{\^o}t{\'e}, Marc-Alexandre and Pereira, Matheus and Trischler, Adam and Xiao, Ziang and Hosseini, Arian and Niedtner, Friederike and Le Roux, Nicolas},
booktitle={Thirty-seventh Conference on Neural Information Processing Systems},
year={2023}
}
paper link: here
citation:
@misc{luo2023dricl,
title={Dr.ICL: Demonstration-Retrieved In-context Learning},
author={Man Luo and Xin Xu and Zhuyun Dai and Panupong Pasupat and Mehran Kazemi and Chitta Baral and Vaiva Imbrasaite and Vincent Y Zhao},
year={2023},
eprint={2305.14128},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
paper link: here
citation:
@misc{jagerman2023query,
title={Query Expansion by Prompting Large Language Models},
author={Rolf Jagerman and Honglei Zhuang and Zhen Qin and Xuanhui Wang and Michael Bendersky},
year={2023},
eprint={2305.03653},
archivePrefix={arXiv},
primaryClass={cs.IR}
}
paper link: here
citation:
@article{chen2023evoprompting,
title={EvoPrompting: Language Models for Code-Level Neural Architecture Search},
author={Chen, Angelica and Dohan, David M and So, David R},
journal={arXiv preprint arXiv:2302.14838},
year={2023}
}
paper link: here
citation:
@misc{kirsch2024generalpurpose,
title={General-Purpose In-Context Learning by Meta-Learning Transformers},
author={Louis Kirsch and James Harrison and Jascha Sohl-Dickstein and Luke Metz},
year={2024},
eprint={2212.04458},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
paper link: here
citation:
@inproceedings{singh2023progprompt,
title={Progprompt: Generating situated robot task plans using large language models},
author={Singh, Ishika and Blukis, Valts and Mousavian, Arsalan and Goyal, Ankit and Xu, Danfei and Tremblay, Jonathan and Fox, Dieter and Thomason, Jesse and Garg, Animesh},
booktitle={2023 IEEE International Conference on Robotics and Automation (ICRA)},
pages={11523--11530},
year={2023},
organization={IEEE}
}
paper link: here
citation:
@article{deng2022rlprompt,
title={Rlprompt: Optimizing discrete text prompts with reinforcement learning},
author={Deng, Mingkai and Wang, Jianyu and Hsieh, Cheng-Ping and Wang, Yihan and Guo, Han and Shu, Tianmin and Song, Meng and Xing, Eric P and Hu, Zhiting},
journal={arXiv preprint arXiv:2205.12548},
year={2022}
}
paper link: here
citation:
@inproceedings{khattak2023maple,
title={Maple: Multi-modal prompt learning},
author={Khattak, Muhammad Uzair and Rasheed, Hanoona and Maaz, Muhammad and Khan, Salman and Khan, Fahad Shahbaz},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={19113--19122},
year={2023}
}
paper link: here
citation:
@article{hao2022structured,
title={Structured prompting: Scaling in-context learning to 1,000 examples},
author={Hao, Yaru and Sun, Yutao and Dong, Li and Han, Zhixiong and Gu, Yuxian and Wei, Furu},
journal={arXiv preprint arXiv:2212.06713},
year={2022}
}
paper link: here
citation:
@article{zhou2022large,
title={Large language models are human-level prompt engineers},
author={Zhou, Yongchao and Muresanu, Andrei Ioan and Han, Ziwen and Paster, Keiran and Pitis, Silviu and Chan, Harris and Ba, Jimmy},
journal={arXiv preprint arXiv:2211.01910},
year={2022}
}
paper link: here
citation:
@article{huang2022large,
title={Large language models can self-improve},
author={Huang, Jiaxin and Gu, Shixiang Shane and Hou, Le and Wu, Yuexin and Wang, Xuezhi and Yu, Hongkun and Han, Jiawei},
journal={arXiv preprint arXiv:2210.11610},
year={2022}
}
paper link: here
citation:
@inproceedings{zhou2022conditional,
title={Conditional prompt learning for vision-language models},
author={Zhou, Kaiyang and Yang, Jingkang and Loy, Chen Change and Liu, Ziwei},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={16816--16825},
year={2022}
}
paper link: here
citation:
@misc{bach2022promptsource,
title={PromptSource: An Integrated Development Environment and Repository for Natural Language Prompts},
author={Stephen H. Bach and Victor Sanh and Zheng-Xin Yong and Albert Webson and Colin Raffel and Nihal V. Nayak and Abheesht Sharma and Taewoon Kim and M Saiful Bari and Thibault Fevry and Zaid Alyafeai and Manan Dey and Andrea Santilli and Zhiqing Sun and Srulik Ben-David and Canwen Xu and Gunjan Chhablani and Han Wang and Jason Alan Fries and Maged S. Al-shaibani and Shanya Sharma and Urmish Thakker and Khalid Almubarak and Xiangru Tang and Dragomir Radev and Mike Tian-Jian Jiang and Alexander M. Rush},
year={2022},
eprint={2202.01279},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
paper link: here
citation:
@inproceedings{zhao2021calibrate,
title={Calibrate before use: Improving few-shot performance of language models},
author={Zhao, Zihao and Wallace, Eric and Feng, Shi and Klein, Dan and Singh, Sameer},
booktitle={International Conference on Machine Learning},
pages={12697--12706},
year={2021},
organization={PMLR}
}
paper link: here
citation:
@article{sanh2021multitask,
title={Multitask prompted training enables zero-shot task generalization},
author={Sanh, Victor and Webson, Albert and Raffel, Colin and Bach, Stephen H and Sutawika, Lintang and Alyafeai, Zaid and Chaffin, Antoine and Stiegler, Arnaud and Scao, Teven Le and Raja, Arun and others},
journal={arXiv preprint arXiv:2110.08207},
year={2021}
}