Skip to content

Latest commit

 

History

History
571 lines (404 loc) · 16.7 KB

in-context.md

File metadata and controls

571 lines (404 loc) · 16.7 KB

In-context / Prompt Learning Abilities of LLMs

Here're some resources about understanding and leveraging In-context / Prompt Learning Abilities of LLMs

Survey

Securing Reliability: A Brief Overview on Enhancing In-Context Learning for Foundation Models

paper link: here

citation:

@misc{huang2024securing,
      title={Securing Reliability: A Brief Overview on Enhancing In-Context Learning for Foundation Models}, 
      author={Yunpeng Huang and Yaonan Gu and Jingwei Xu and Zhihong Zhu and Zhaorun Chen and Xiaoxing Ma},
      year={2024},
      eprint={2402.17671},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

A survey for in-context learning

paper link: here

citation:

@article{dong2022survey,
  title={A survey for in-context learning},
  author={Dong, Qingxiu and Li, Lei and Dai, Damai and Zheng, Ce and Wu, Zhiyong and Chang, Baobao and Sun, Xu and Xu, Jingjing and Sui, Zhifang},
  journal={arXiv preprint arXiv:2301.00234},
  year={2022}
}

Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing

paper link: here

citation:

@article{liu2023pre,
  title={Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing},
  author={Liu, Pengfei and Yuan, Weizhe and Fu, Jinlan and Jiang, Zhengbao and Hayashi, Hiroaki and Neubig, Graham},
  journal={ACM Computing Surveys},
  volume={55},
  number={9},
  pages={1--35},
  year={2023},
  publisher={ACM New York, NY}
}

Empirical Study

How Do Transformers Learn In-Context Beyond Simple Functions? A Case Study on Learning with Representations

paper link: here

citation:

@article{guo2023transformers,
  title={How Do Transformers Learn In-Context Beyond Simple Functions? A Case Study on Learning with Representations},
  author={Guo, Tianyu and Hu, Wei and Mei, Song and Wang, Huan and Xiong, Caiming and Savarese, Silvio and Bai, Yu},
  journal={arXiv preprint arXiv:2310.10616},
  year={2023}
}

Transformers as Statisticians: Provable In-Context Learning with In-Context Algorithm Selection

paper link: here

citation:

@article{bai2023transformers,
  title={Transformers as Statisticians: Provable In-Context Learning with In-Context Algorithm Selection},
  author={Bai, Yu and Chen, Fan and Wang, Huan and Xiong, Caiming and Mei, Song},
  journal={arXiv preprint arXiv:2306.04637},
  year={2023}
}

Language Models Don't Always Say What They Think: Unfaithful Explanations in Chain-of-Thought Prompting

paper link: here

citation:

@article{turpin2023language,
  title={Language Models Don't Always Say What They Think: Unfaithful Explanations in Chain-of-Thought Prompting},
  author={Turpin, Miles and Michael, Julian and Perez, Ethan and Bowman, Samuel R},
  journal={arXiv preprint arXiv:2305.04388},
  year={2023}
}

Larger language models do in-context learning differently

paper link: here

citation:

@misc{wei2023larger,
      title={Larger language models do in-context learning differently}, 
      author={Jerry Wei and Jason Wei and Yi Tay and Dustin Tran and Albert Webson and Yifeng Lu and Xinyun Chen and Hanxiao Liu and Da Huang and Denny Zhou and Tengyu Ma},
      year={2023},
      eprint={2303.03846},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

Chain-of-Thoughts

PathFinder: Guided Search over Multi-Step Reasoning Paths

paper link: here

citation:

@misc{golovneva2023pathfinder,
      title={PathFinder: Guided Search over Multi-Step Reasoning Paths}, 
      author={Olga Golovneva and Sean O'Brien and Ramakanth Pasunuru and Tianlu Wang and Luke Zettlemoyer and Maryam Fazel-Zarandi and Asli Celikyilmaz},
      year={2023},
      eprint={2312.05180},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

Graph of thoughts: Solving elaborate problems with large language models (GoT)

paper link: here

citation:

@article{besta2023graph,
  title={Graph of thoughts: Solving elaborate problems with large language models},
  author={Besta, Maciej and Blach, Nils and Kubicek, Ales and Gerstenberger, Robert and Gianinazzi, Lukas and Gajda, Joanna and Lehmann, Tomasz and Podstawski, Michal and Niewiadomski, Hubert and Nyczyk, Piotr and others},
  journal={arXiv preprint arXiv:2308.09687},
  year={2023}
}

Tree of thoughts: Deliberate problem solving with large language models (ToT)

paper link: here

citation:

@article{yao2023tree,
  title={Tree of thoughts: Deliberate problem solving with large language models},
  author={Yao, Shunyu and Yu, Dian and Zhao, Jeffrey and Shafran, Izhak and Griffiths, Thomas L and Cao, Yuan and Narasimhan, Karthik},
  journal={arXiv preprint arXiv:2305.10601},
  year={2023}
}

Self-consistency improves chain of thought reasoning in language models

paper link: here

citation:

@article{wang2022self,
  title={Self-consistency improves chain of thought reasoning in language models},
  author={Wang, Xuezhi and Wei, Jason and Schuurmans, Dale and Le, Quoc and Chi, Ed and Narang, Sharan and Chowdhery, Aakanksha and Zhou, Denny},
  journal={arXiv preprint arXiv:2203.11171},
  year={2022}
}

Chain-of-thought prompting elicits reasoning in large language models (CoT)

paper link: here

citation:

@article{wei2022chain,
  title={Chain-of-thought prompting elicits reasoning in large language models},
  author={Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Xia, Fei and Chi, Ed and Le, Quoc V and Zhou, Denny and others},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={24824--24837},
  year={2022}
}

Miscellaneous

ICL Markup: Structuring In-Context Learning using Soft-Token Tags

paper link: here

citation:

@misc{brunet2023icl,
      title={ICL Markup: Structuring In-Context Learning using Soft-Token Tags}, 
      author={Marc-Etienne Brunet and Ashton Anderson and Richard Zemel},
      year={2023},
      eprint={2312.07405},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

Flexible visual prompts for in-context learning in computer vision

paper link: here

citation:

@misc{foster2023flexible,
      title={Flexible visual prompts for in-context learning in computer vision}, 
      author={Thomas Foster and Ioana Croitoru and Robert Dorfman and Christoffer Edlund and Thomas Varsavsky and Jon Almazán},
      year={2023},
      eprint={2312.06592},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}

Plum: Prompt Learning using Metaheuristic

paper link: here

citation:

@article{pan2023plum,
  title={Plum: Prompt Learning using Metaheuristic},
  author={Pan, Rui and Xing, Shuo and Diao, Shizhe and Liu, Xiang and Shum, Kashun and Zhang, Jipeng and Zhang, Tong},
  journal={arXiv preprint arXiv:2311.08364},
  year={2023}
}

Ddcot: Duty-distinct chain-of-thought prompting for multimodal reasoning in language models (DDCoT)

paper link: here

citation:

@article{zheng2023ddcot,
  title={Ddcot: Duty-distinct chain-of-thought prompting for multimodal reasoning in language models},
  author={Zheng, Ge and Yang, Bin and Tang, Jiajin and Zhou, Hong-Yu and Yang, Sibei},
  journal={arXiv preprint arXiv:2310.16436},
  year={2023}
}

Evoke: Evoking Critical Thinking Abilities in LLMs via Reviewer-Author Prompt Editing

paper link: here

citation:

@misc{hu2023evoke,
      title={Evoke: Evoking Critical Thinking Abilities in LLMs via Reviewer-Author Prompt Editing}, 
      author={Xinyu Hu and Pengfei Tang and Simiao Zuo and Zihan Wang and Bowen Song and Qiang Lou and Jian Jiao and Denis Charles},
      year={2023},
      eprint={2310.13855},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

LoraHub: Efficient Cross-Task Generalization via Dynamic LoRA Composition

paper link: here

github link: here

hfhub link: here

citation:

@misc{huang2024lorahub,
      title={LoraHub: Efficient Cross-Task Generalization via Dynamic LoRA Composition}, 
      author={Chengsong Huang and Qian Liu and Bill Yuchen Lin and Tianyu Pang and Chao Du and Min Lin},
      year={2024},
      eprint={2307.13269},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

Joint Prompt Optimization of Stacked LLMs using Variational Inference

paper link: here

@inproceedings{sordoni2023joint,
  title={Joint Prompt Optimization of Stacked LLMs using Variational Inference},
  author={Sordoni, Alessandro and Yuan, Xingdi and C{\^o}t{\'e}, Marc-Alexandre and Pereira, Matheus and Trischler, Adam and Xiao, Ziang and Hosseini, Arian and Niedtner, Friederike and Le Roux, Nicolas},
  booktitle={Thirty-seventh Conference on Neural Information Processing Systems},
  year={2023}
}

Dr.ICL: Demonstration-Retrieved In-context Learning

paper link: here

citation:

@misc{luo2023dricl,
      title={Dr.ICL: Demonstration-Retrieved In-context Learning}, 
      author={Man Luo and Xin Xu and Zhuyun Dai and Panupong Pasupat and Mehran Kazemi and Chitta Baral and Vaiva Imbrasaite and Vincent Y Zhao},
      year={2023},
      eprint={2305.14128},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

Query Expansion by Prompting Large Language Models

paper link: here

citation:

@misc{jagerman2023query,
      title={Query Expansion by Prompting Large Language Models}, 
      author={Rolf Jagerman and Honglei Zhuang and Zhen Qin and Xuanhui Wang and Michael Bendersky},
      year={2023},
      eprint={2305.03653},
      archivePrefix={arXiv},
      primaryClass={cs.IR}
}

EvoPrompting: Language Models for Code-Level Neural Architecture Search

paper link: here

citation:

@article{chen2023evoprompting,
  title={EvoPrompting: Language Models for Code-Level Neural Architecture Search},
  author={Chen, Angelica and Dohan, David M and So, David R},
  journal={arXiv preprint arXiv:2302.14838},
  year={2023}
}

General-Purpose In-Context Learning by Meta-Learning Transformers

paper link: here

citation:

@misc{kirsch2024generalpurpose,
      title={General-Purpose In-Context Learning by Meta-Learning Transformers}, 
      author={Louis Kirsch and James Harrison and Jascha Sohl-Dickstein and Luke Metz},
      year={2024},
      eprint={2212.04458},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

Progprompt: Generating situated robot task plans using large language models

paper link: here

citation:

@inproceedings{singh2023progprompt,
  title={Progprompt: Generating situated robot task plans using large language models},
  author={Singh, Ishika and Blukis, Valts and Mousavian, Arsalan and Goyal, Ankit and Xu, Danfei and Tremblay, Jonathan and Fox, Dieter and Thomason, Jesse and Garg, Animesh},
  booktitle={2023 IEEE International Conference on Robotics and Automation (ICRA)},
  pages={11523--11530},
  year={2023},
  organization={IEEE}
}

RLPrompt: Optimizing Discrete Text Prompts with Reinforcement Learning

paper link: here

citation:

@article{deng2022rlprompt,
  title={Rlprompt: Optimizing discrete text prompts with reinforcement learning},
  author={Deng, Mingkai and Wang, Jianyu and Hsieh, Cheng-Ping and Wang, Yihan and Guo, Han and Shu, Tianmin and Song, Meng and Xing, Eric P and Hu, Zhiting},
  journal={arXiv preprint arXiv:2205.12548},
  year={2022}
}

Maple: Multi-modal prompt learning

paper link: here

citation:

@inproceedings{khattak2023maple,
  title={Maple: Multi-modal prompt learning},
  author={Khattak, Muhammad Uzair and Rasheed, Hanoona and Maaz, Muhammad and Khan, Salman and Khan, Fahad Shahbaz},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={19113--19122},
  year={2023}
}

Structured prompting: Scaling in-context learning to 1,000 examples

paper link: here

citation:

@article{hao2022structured,
  title={Structured prompting: Scaling in-context learning to 1,000 examples},
  author={Hao, Yaru and Sun, Yutao and Dong, Li and Han, Zhixiong and Gu, Yuxian and Wei, Furu},
  journal={arXiv preprint arXiv:2212.06713},
  year={2022}
}

Large language models are human-level prompt engineers

paper link: here

citation:

@article{zhou2022large,
  title={Large language models are human-level prompt engineers},
  author={Zhou, Yongchao and Muresanu, Andrei Ioan and Han, Ziwen and Paster, Keiran and Pitis, Silviu and Chan, Harris and Ba, Jimmy},
  journal={arXiv preprint arXiv:2211.01910},
  year={2022}
}

Large language models can self-improve

paper link: here

citation:

@article{huang2022large,
  title={Large language models can self-improve},
  author={Huang, Jiaxin and Gu, Shixiang Shane and Hou, Le and Wu, Yuexin and Wang, Xuezhi and Yu, Hongkun and Han, Jiawei},
  journal={arXiv preprint arXiv:2210.11610},
  year={2022}
}

Conditional prompt learning for vision-language models

paper link: here

citation:

@inproceedings{zhou2022conditional,
  title={Conditional prompt learning for vision-language models},
  author={Zhou, Kaiyang and Yang, Jingkang and Loy, Chen Change and Liu, Ziwei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={16816--16825},
  year={2022}
}

Promptsource: An integrated development environment and repository for natural language prompts

paper link: here

citation:

@misc{bach2022promptsource,
      title={PromptSource: An Integrated Development Environment and Repository for Natural Language Prompts}, 
      author={Stephen H. Bach and Victor Sanh and Zheng-Xin Yong and Albert Webson and Colin Raffel and Nihal V. Nayak and Abheesht Sharma and Taewoon Kim and M Saiful Bari and Thibault Fevry and Zaid Alyafeai and Manan Dey and Andrea Santilli and Zhiqing Sun and Srulik Ben-David and Canwen Xu and Gunjan Chhablani and Han Wang and Jason Alan Fries and Maged S. Al-shaibani and Shanya Sharma and Urmish Thakker and Khalid Almubarak and Xiangru Tang and Dragomir Radev and Mike Tian-Jian Jiang and Alexander M. Rush},
      year={2022},
      eprint={2202.01279},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

Calibrate before use: Improving few-shot performance of language models

paper link: here

citation:

@inproceedings{zhao2021calibrate,
  title={Calibrate before use: Improving few-shot performance of language models},
  author={Zhao, Zihao and Wallace, Eric and Feng, Shi and Klein, Dan and Singh, Sameer},
  booktitle={International Conference on Machine Learning},
  pages={12697--12706},
  year={2021},
  organization={PMLR}
}

Multitask prompted training enables zero-shot task generalization

paper link: here

citation:

@article{sanh2021multitask,
  title={Multitask prompted training enables zero-shot task generalization},
  author={Sanh, Victor and Webson, Albert and Raffel, Colin and Bach, Stephen H and Sutawika, Lintang and Alyafeai, Zaid and Chaffin, Antoine and Stiegler, Arnaud and Scao, Teven Le and Raja, Arun and others},
  journal={arXiv preprint arXiv:2110.08207},
  year={2021}
}