Skip to content

Remove oneshot_device #1568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions src/llmcompressor/args/model_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,6 @@ class ModelArguments:
default=True,
metadata={"help": "Whether to compress sparse models during save"},
)
oneshot_device: Optional[str] = field(
default="cuda",
metadata={
"help": "This argument is deprecated and nonfunctional "
"and will be removed in future release"
},
)
model_revision: str = field(
default="main",
metadata={
Expand Down
2 changes: 0 additions & 2 deletions src/llmcompressor/entrypoints/oneshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ def oneshot(
tie_word_embeddings: bool = False,
trust_remote_code_model: bool = False,
save_compressed: bool = True,
oneshot_device: str = "cuda:0",
model_revision: str = "main",
# Recipe arguments
recipe: Optional[Union[str, List[str]]] = None,
Expand Down Expand Up @@ -259,7 +258,6 @@ def oneshot(
:param trust_remote_code_model: Whether to allow for custom models to execute
their own modeling files.
:param save_compressed: Whether to compress sparse models during save.
:param oneshot_device: Device to run oneshot calibration on.
:param model_revision: The specific model version to use (can be branch name,
tag, or commit id).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def preprocessing_func(example):
recipe=None,
num_train_epochs=self.num_train_epochs,
concatenate_data=concatenate_data,
oneshot_device=self.device,
text_column="text",
dataset_path=dataset_path,
preprocessing_func=preprocessing_func,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def test_finetune_without_recipe(self):
from llmcompressor import train

recipe_str = None
device = "cuda:0"

concatenate_data = False
max_steps = 50
Expand All @@ -38,7 +37,6 @@ def test_finetune_without_recipe(self):
max_steps=max_steps,
concatenate_data=concatenate_data,
splits=splits,
oneshot_device=device,
)

def tearDown(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def _test_oneshot_and_finetune(self):
splits=splits,
recipe=self.recipe,
num_calibration_samples=64,
oneshot_device=self.device,
dataset_config_name=self.dataset_config_name,
concatenate_data=self.concat_txt,
output_dir=self.output,
Expand Down
3 changes: 0 additions & 3 deletions tests/llmcompressor/transformers/finetune/test_safetensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def setUp(self):
def test_safetensors(self):
from llmcompressor import train

device = "cuda:0"
output_dir = self.output / "output1"
max_steps = 10
splits = {"train": "train[:10%]"}
Expand All @@ -35,7 +34,6 @@ def test_safetensors(self):
output_dir=output_dir,
max_steps=max_steps,
splits=splits,
oneshot_device=device,
)

assert os.path.exists(output_dir / "model.safetensors")
Expand All @@ -49,7 +47,6 @@ def test_safetensors(self):
output_dir=new_output_dir,
max_steps=max_steps,
splits=splits,
oneshot_device=device,
)

def tearDown(self):
Expand Down
1 change: 0 additions & 1 deletion tests/llmcompressor/transformers/gptq/test_oneshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def test_oneshot_application(self):
dataset=self.dataset,
output_dir=self.output,
recipe=self.recipe,
oneshot_device=self.device,
num_calibration_samples=9,
)
model_loaded = AutoModelForCausalLM.from_pretrained(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def _test_consecutive_runs(
num_calibration_samples=num_calibration_samples,
recipe=self.first_recipe,
output_dir=self.output_first,
oneshot_device=self.device,
)

first_model = AutoModelForCausalLM.from_pretrained(
Expand Down Expand Up @@ -68,7 +67,6 @@ def _test_consecutive_runs(
num_calibration_samples=num_calibration_samples,
recipe=self.second_recipe,
output_dir=self.output_second,
oneshot_device=self.device,
)

second_model = AutoModelForCausalLM.from_pretrained(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def test_mask_structure_preserved(self):
num_calibration_samples=num_calibration_samples,
recipe=self.initial_pruning_only_recipe,
output_dir=self.output_first,
oneshot_device=self.device,
save_compressed=False,
)
targetted_layer = first_tiny_model.model.layers[0].self_attn.k_proj
Expand All @@ -82,7 +81,6 @@ def test_mask_structure_preserved(self):
num_calibration_samples=num_calibration_samples,
recipe=self.subsequent_prune_and_quant_recipe,
output_dir=self.output_second,
oneshot_device=self.device,
save_compressed=False,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def _test_oneshot_completion(self, model_name: str = None):
model=self.model,
dataset=self.dataset,
splits={"calibration": f"train[:{self.num_samples}]"},
oneshot_device=self.device,
recipe=self.recipe,
max_seq_length=512,
num_calibration_samples=self.num_samples,
Expand Down
2 changes: 0 additions & 2 deletions tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def test_sparsities(self):
model = oneshot(
model=self.model,
dataset=self.dataset,
oneshot_device=self.device,
recipe=self.recipe,
max_seq_length=128,
num_calibration_samples=64,
Expand Down Expand Up @@ -82,7 +81,6 @@ def test_sparsities_gpu(self):
model = oneshot(
model=self.model,
dataset=self.dataset,
oneshot_device=self.device,
recipe=self.recipe,
max_seq_length=128,
num_calibration_samples=64,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def test_oneshot_with_modifier_object(self):
SparseGPTModifier(sparsity=0.5, targets=[r"re:model.layers.\d+$"])
]

device = "cuda:0"
concatenate_data = False
num_calibration_samples = 64
output_dir = self.output / "oneshot_out"
Expand All @@ -45,7 +44,6 @@ def test_oneshot_with_modifier_object(self):
recipe=recipe_str,
concatenate_data=concatenate_data,
splits=splits,
oneshot_device=device,
)

def tearDown(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path):
recipe_str = "tests/llmcompressor/transformers/obcq/recipes/test_tiny2.yaml"
expected_sparsity = 0.5
model_path = "nm-testing/llama2.c-stories15M"
device = "cuda:0"
if not torch.cuda.is_available():
device = "cpu"
dataset = "open_platypus"
concatenate_data = False
num_calibration_samples = 64
Expand All @@ -66,7 +63,6 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path):
recipe=recipe_str,
concatenate_data=concatenate_data,
splits=splits,
oneshot_device=device,
precision=dtype,
clear_sparse_session=False,
)
Expand Down Expand Up @@ -166,9 +162,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
"tests/llmcompressor/transformers/compression/recipes/new_quant_simple.yaml"
)
model_path = "nm-testing/llama2.c-stories15M"
device = "cuda:0"
if not torch.cuda.is_available():
device = "cpu"
device = "cuda:0" if not torch.cuda.is_available() else "cpu"
dataset = "open_platypus"
concatenate_data = False
num_calibration_samples = 16
Expand All @@ -182,7 +176,6 @@ def test_quant_model_reload(format, dtype, tmp_path):
recipe=recipe_str,
concatenate_data=concatenate_data,
splits=splits,
oneshot_device=device,
precision=dtype,
clear_sparse_session=False,
)
Expand Down Expand Up @@ -362,9 +355,7 @@ def test_model_shared_tensors_gpu(
def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tmp_path):
from llmcompressor.pytorch.model_load.helpers import get_session_model

device = "cuda"
if not torch.cuda.is_available():
device = "cpu"
device = "cuda:0" if not torch.cuda.is_available() else "cpu"
dataset = "open_platypus"
concatenate_data = False
num_calibration_samples = 64
Expand All @@ -378,7 +369,6 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm
recipe=recipe,
concatenate_data=concatenate_data,
splits=splits,
oneshot_device=device,
clear_sparse_session=False,
)

Expand Down Expand Up @@ -446,9 +436,7 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm
],
)
def test_sparse_24_compressor_is_lossless(model_stub, recipe, sparse_format, tmp_path):
device = "cuda"
if not torch.cuda.is_available():
device = "cpu"
device = "cuda:0" if not torch.cuda.is_available() else "cpu"
dataset = "open_platypus"
concatenate_data = False
num_calibration_samples = 64
Expand All @@ -462,7 +450,6 @@ def test_sparse_24_compressor_is_lossless(model_stub, recipe, sparse_format, tmp
recipe=recipe,
concatenate_data=concatenate_data,
splits=splits,
oneshot_device=device,
clear_sparse_session=False,
)

Expand Down