Here're some resources about Code LLMs
tag: OpenCoder
| INF
paper link: here
code link: here
modelhub link: here
homepage link: here
citation:
@misc{huang2024opencoderopencookbooktoptier,
title={OpenCoder: The Open Cookbook for Top-Tier Code Large Language Models},
author={Siming Huang and Tianhao Cheng and Jason Klein Liu and Jiaran Hao and Liuyihan Song and Yang Xu and J. Yang and J. H. Liu and Chenchen Zhang and Linzheng Chai and Ruifeng Yuan and Zhaoxiang Zhang and Jie Fu and Qian Liu and Ge Zhang and Zili Wang and Yuan Qi and Yinghui Xu and Wei Chu},
year={2024},
eprint={2411.04905},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2411.04905},
}
tag: Phi-1.5
| Phi
| SLM
| Microsoft
paper link: here
model link: here
followup work: here
citation:
@misc{li2023textbooksneediiphi15,
title={Textbooks Are All You Need II: phi-1.5 technical report},
author={Yuanzhi Li and Sébastien Bubeck and Ronen Eldan and Allie Del Giorno and Suriya Gunasekar and Yin Tat Lee},
year={2023},
eprint={2309.05463},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2309.05463},
}
tag: Phi-1
| Phi
| SLM
| Microsoft
paper link: here
model link: here
followup work: here
citation:
@misc{gunasekar2023textbooksneed,
title={Textbooks Are All You Need},
author={Suriya Gunasekar and Yi Zhang and Jyoti Aneja and Caio César Teodoro Mendes and Allie Del Giorno and Sivakanth Gopi and Mojan Javaheripi and Piero Kauffmann and Gustavo de Rosa and Olli Saarikivi and Adil Salim and Shital Shah and Harkirat Singh Behl and Xin Wang and Sébastien Bubeck and Ronen Eldan and Adam Tauman Kalai and Yin Tat Lee and Yuanzhi Li},
year={2023},
eprint={2306.11644},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2306.11644},
}
tag: MultiPL-E
| TSE23
paper link: here
code link: here
leaderboard link: here
citation:
@article{cassano2023multipl,
title={MultiPL-E: a scalable and polyglot approach to benchmarking neural code generation},
author={Cassano, Federico and Gouwar, John and Nguyen, Daniel and Nguyen, Sydney and Phipps-Costin, Luna and Pinckney, Donald and Yee, Ming-Ho and Zi, Yangtian and Anderson, Carolyn Jane and Feldman, Molly Q and others},
journal={IEEE Transactions on Software Engineering},
year={2023},
publisher={IEEE}
}
tag: HumanEval
| ICSE24
| OpenAI
paper link: here
code link: here
dataset link: here
citation:
@misc{chen2021evaluating,
title={Evaluating Large Language Models Trained on Code},
author={Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de Oliveira Pinto and Jared Kaplan and Harri Edwards and Yuri Burda and Nicholas Joseph and Greg Brockman and Alex Ray and Raul Puri and Gretchen Krueger and Michael Petrov and Heidy Khlaaf and Girish Sastry and Pamela Mishkin and Brooke Chan and Scott Gray and Nick Ryder and Mikhail Pavlov and Alethea Power and Lukasz Kaiser and Mohammad Bavarian and Clemens Winter and Philippe Tillet and Felipe Petroski Such and Dave Cummings and Matthias Plappert and Fotios Chantzis and Elizabeth Barnes and Ariel Herbert-Voss and William Hebgen Guss and Alex Nichol and Alex Paino and Nikolas Tezak and Jie Tang and Igor Babuschkin and Suchir Balaji and Shantanu Jain and William Saunders and Christopher Hesse and Andrew N. Carr and Jan Leike and Josh Achiam and Vedant Misra and Evan Morikawa and Alec Radford and Matthew Knight and Miles Brundage and Mira Murati and Katie Mayer and Peter Welinder and Bob McGrew and Dario Amodei and Sam McCandlish and Ilya Sutskever and Wojciech Zaremba},
year={2021},
eprint={2107.03374},
archivePrefix={arXiv},
primaryClass={cs.LG}
}