Skip to content

Commit cc1f6cd

Browse files
committed
Update README
1 parent 1ff170b commit cc1f6cd

File tree

2 files changed

+3
-8
lines changed

2 files changed

+3
-8
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode in
165165

166166
To run with int4, just pass the int4 checkpoint to generate.py.
167167
```bash
168-
python generate.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.pth --compile
168+
python generate.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.pth --compile --device $DEVICE
169169
```
170170

171171
## Speculative Sampling

quantize.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -402,12 +402,7 @@ def __init__(self, mod, groupsize=128, inner_k_tiles=8, padding=True):
402402
assert inner_k_tiles in [2, 4, 8]
403403

404404
@torch.no_grad()
405-
def create_quantized_state_dict(self, use_cuda = True):
406-
if use_cuda and torch.cuda.is_available():
407-
device="cuda"
408-
else:
409-
device="cpu"
410-
405+
def create_quantized_state_dict(self):
411406
cur_state_dict = self.mod.state_dict()
412407
for fqn, mod in self.mod.named_modules():
413408
if isinstance(mod, torch.nn.Linear):
@@ -430,7 +425,7 @@ def create_quantized_state_dict(self, use_cuda = True):
430425
"and that groupsize and inner_k_tiles*16 evenly divide into it")
431426
continue
432427
weight_int4pack, scales_and_zeros = prepare_int4_weight_and_scales_and_zeros(
433-
weight.to(torch.bfloat16).to(device=device), self.groupsize, self.inner_k_tiles
428+
weight.to(torch.bfloat16), self.groupsize, self.inner_k_tiles
434429
)
435430
cur_state_dict[f"{fqn}.weight"] = weight_int4pack.to('cpu')
436431
cur_state_dict[f"{fqn}.scales_and_zeros"] = scales_and_zeros.to('cpu')

0 commit comments

Comments
 (0)