-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmerge_base.sh
29 lines (23 loc) · 1.8 KB
/
merge_base.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# Assign the first command line argument to a variable or use a default value
BASE_MODEL="${1:-"openaccess-ai-collective/wizard-mega-13b"}"
MODEL_PATH="${2:-"models/wizard-mega_chinese-13b-plus"}"
LORA_WEIGHT="${3:-"ziqingyang/chinese-llama-plus-lora-13b,ziqingyang/chinese-alpaca-plus-lora-13b,Chinese-Vicuna/Chinese-Vicuna-lora-13b-belle-and-guanaco"}"
OUT_TYPE="f16"
QUANT_TYPE="q5_0"
mkdir -p $MODEL_PATH
echo "The BASE_MODEL is: $BASE_MODEL"
echo "The LORA_WEIGHT is: $LORA_WEIGHT"
echo "The output model path is: $MODEL_PATH, the output type is: $OUT_TYPE"
echo "Merge the base '$BASE_MODEL' with '$LORA_WEIGHT' LoRA weight"
echo python tools/merge/merge_llama_with_chinese_lora.py --base_model $BASE_MODEL --lora_model $LORA_WEIGHT --output_dir $MODEL_PATH
python tools/merge/merge_llama_with_chinese_lora.py --base_model $BASE_MODEL --lora_model $LORA_WEIGHT --output_dir $MODEL_PATH
echo python tools/convert_ggml.py --outtype $OUT_TYPE --outfile $MODEL_PATH/ggml-$OUT_TYPE.bin $MODEL_PATH/consolidated.00.pth
python tools/convert_ggml.py --outtype $OUT_TYPE --outfile $MODEL_PATH/ggml-$OUT_TYPE.bin $MODEL_PATH/consolidated.00.pth
echo "quantize the model ggml-$OUT_TYPE.bin $QUANT_TYPE"
echo tools/quantize $MODEL_PATH/ggml-$OUT_TYPE.bin $MODEL_PATH/ggml-$QUANT_TYPE.bin $QUANT_TYPE
tools/quantize $MODEL_PATH/ggml-$OUT_TYPE.bin $MODEL_PATH/ggml-$QUANT_TYPE.bin $QUANT_TYPE
echo "Compress the model to 4bit 128 group safetensors format. Need to install gptq.llama first"
echo "Refer to https://github.com/0cc4m/GPTQ-for-LLaMa"
echo python -m gptq.llama $MODEL_PATH c5 --wbits 4 --true-sequential --act-order --save_safetensors $MODEL_PATH/gptq-4bit-128g.safetensors
CUDA_VISIBLE_DEVICES=0 python -m gptq.llama $MODEL_PATH c4 --wbits 4 --true-sequential --act-order \
--save_safetensors $MODEL_PATH/gptq-4bit-128g.safetensors