Here're some resources about Pipeline Parallelism for LLMs Training
tag: Controllable Memory
| ZBPP
| Sea AILab
| NUS
paper link: here
github link: here
citation:
@misc{qi2024pipelineparallelismcontrollablememory,
title={Pipeline Parallelism with Controllable Memory},
author={Penghui Qi and Xinyi Wan and Nyamdavaa Amar and Min Lin},
year={2024},
eprint={2405.15362},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2405.15362},
}
tag: ZBPP
| ZB1P
| ZB2P
| ICLR24
| Sea AILab
paper link: here
github link: here
follow-up work: here
citation:
@misc{qi2023zerobubblepipelineparallelism,
title={Zero Bubble Pipeline Parallelism},
author={Penghui Qi and Xinyi Wan and Guangxing Huang and Min Lin},
year={2023},
eprint={2401.10241},
archivePrefix={arXiv},
primaryClass={cs.DC},
url={https://arxiv.org/abs/2401.10241},
}
tag: BPipe
| Memory Balance
| ICML23
paper link: here
citation:
@inproceedings{kim2023bpipe,
title = {{BP}ipe: Memory-Balanced Pipeline Parallelism for Training Large Language Models},
author = {Kim, Taebum and Kim, Hyoungjoo and Yu, Gyeong-In and Chun, Byung-Gon},
booktitle = {Proceedings of the 40th International Conference on Machine Learning},
pages = {16639--16653},
year = {2023},
editor = {Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan},
volume = {202},
series = {Proceedings of Machine Learning Research},
month = {23--29 Jul},
publisher = {PMLR},
url = {https://proceedings.mlr.press/v202/kim23l.html},
}
tag: BFPP
| MLSys23
paper link: here
citation:
@misc{lamypoirier2023breadthfirstpipelineparallelism,
title={Breadth-First Pipeline Parallelism},
author={Joel Lamy-Poirier},
year={2023},
eprint={2211.05953},
archivePrefix={arXiv},
primaryClass={cs.DC},
url={https://arxiv.org/abs/2211.05953},
}
tag: Chimera
| Bidirectional PP
| SC21
paper link: here
citation:
@inproceedings{li2021chimera,
author = {Li, Shigang and Hoefler, Torsten},
title = {Chimera: efficiently training large-scale neural networks with bidirectional pipelines},
year = {2021},
isbn = {9781450384421},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3458817.3476145},
doi = {10.1145/3458817.3476145},
articleno = {27},
numpages = {14},
keywords = {pipeline parallelism, operator parallelism, model parallelism, distributed deep learning, data parallelism},
location = {St. Louis, Missouri},
series = {SC '21}
}
tag: GEMS
| SC20
paper link: here
citation:
@inproceedings{jain2020gems,
author={Jain, Arpan and Awan, Ammar Ahmad and Aljuhani, Asmaa M. and Hashmi, Jahanzeb Maqbool and Anthony, Quentin G. and Subramoni, Hari and Panda, Dhableswar K. and Machiraju, Raghu and Parwani, Anil},
booktitle={SC20: International Conference for High Performance Computing, Networking, Storage and Analysis},
title={GEMS: GPU-Enabled Memory-Aware Model-Parallelism System for Distributed DNN Training},
year={2020},
volume={},
number={},
pages={1-15},
keywords={Training;Histopathology;Computational modeling;High performance computing;Graphics processing units;Distributed databases;Data models;DNN;Model Parallelism;Keras;TensorFlow;Eager Execution;MPI},
doi={10.1109/SC41405.2020.00049}
}
tag: DAPPLE
| PPoPP21
| Alibaba Group
paper link: here
citation:
@misc{fan2020dapple,
title={DAPPLE: A Pipelined Data Parallel Approach for Training Large Models},
author={Shiqing Fan and Yi Rong and Chen Meng and Zongyan Cao and Siyu Wang and Zhen Zheng and Chuan Wu and Guoping Long and Jun Yang and Lixue Xia and Lansong Diao and Xiaoyong Liu and Wei Lin},
year={2020},
eprint={2007.01045},
archivePrefix={arXiv},
primaryClass={cs.DC}
}
tag: PipeDream-2BW
| PipeDream-Flush
| Double-Buffered Weight Updates
| ICML21
| Microsysoft
| Standford University
paper link: here
citation:
@misc{narayanan2021memoryefficient,
title={Memory-Efficient Pipeline-Parallel DNN Training},
author={Deepak Narayanan and Amar Phanishayee and Kaiyu Shi and Xie Chen and Matei Zaharia},
year={2021},
eprint={2006.09503},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
tag: PipeMare
| MLSys21
| Stanford University
paper link: here
citation:
@misc{yang2020pipemareasynchronouspipelineparallel,
title={PipeMare: Asynchronous Pipeline Parallel DNN Training},
author={Bowen Yang and Jian Zhang and Jonathan Li and Christopher Ré and Christopher R. Aberger and Christopher De Sa},
year={2020},
eprint={1910.05124},
archivePrefix={arXiv},
primaryClass={cs.DC},
url={https://arxiv.org/abs/1910.05124},
}
tag: Pipe-Torch
| CBD19
| SEU
paper link: here
citation:
@inproceedings{zhan2019pipetorch,
author={Zhan, Jun and Zhang, Jinghui},
booktitle={2019 Seventh International Conference on Advanced Cloud and Big Data (CBD)},
title={Pipe-Torch: Pipeline-Based Distributed Deep Learning in a GPU Cluster with Heterogeneous Networking},
year={2019},
volume={},
number={},
pages={55-60},
keywords={Graphics processing units;Training;Parallel processing;Computational modeling;Data models;Pipelines;Load modeling;Distributed deep learning;Pipeline-hybrid parallelism;Heterogeneous network environment},
doi={10.1109/CBD.2019.00020}
}
tag: GPipe
| NIPS19
| Google
paper link: here
citation:
@misc{huang2019gpipe,
title={GPipe: Efficient Training of Giant Neural Networks using Pipeline Parallelism},
author={Yanping Huang and Youlong Cheng and Ankur Bapna and Orhan Firat and Mia Xu Chen and Dehao Chen and HyoukJoong Lee and Jiquan Ngiam and Quoc V. Le and Yonghui Wu and Zhifeng Chen},
year={2019},
eprint={1811.06965},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
tag: PipeDream
| SOSP19
| Microsoft
| CMU
| Stanford University
paper link: here
citation:
@inproceedings{harlap2018pipedream,
author = {Narayanan, Deepak and Harlap, Aaron and Phanishayee, Amar and Seshadri, Vivek and Devanur, Nikhil R. and Ganger, Gregory R. and Gibbons, Phillip B. and Zaharia, Matei},
title = {PipeDream: generalized pipeline parallelism for DNN training},
year = {2019},
isbn = {9781450368735},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3341301.3359646},
doi = {10.1145/3341301.3359646},
booktitle = {Proceedings of the 27th ACM Symposium on Operating Systems Principles},
pages = {1–15},
numpages = {15},
location = {Huntsville, Ontario, Canada},
series = {SOSP '19}
}