Skip to content

[GPTQ] Change actorder default to "static" #1425

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/llmcompressor/modifiers/quantization/gptq/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,9 @@ class GPTQModifier(Modifier, QuantizationMixin):
:param block_size: Used to determine number of columns to compress in one pass
:param dampening_frac: Amount of dampening to apply to H, as a fraction of the
diagonal norm
:param actorder: order in which weight columns are quantized. For more information,
on actorder options, see https://github.com/vllm-project/vllm/pull/8135
:param actorder: order in which weight columns are quantized. Defaults to "static"
activation ordering, which achieves best accuracy recovery with no runtime cost.
For more information, see https://github.com/vllm-project/vllm/pull/8135
:param offload_hessians: Set to True for decreased memory usage but increased
runtime.

Expand Down Expand Up @@ -110,7 +111,7 @@ class GPTQModifier(Modifier, QuantizationMixin):
sequential_targets: Union[str, List[str], None] = None
block_size: int = 128
dampening_frac: Optional[float] = 0.01
actorder: Optional[Union[ActivationOrdering, Sentinel]] = None
actorder: Optional[Union[ActivationOrdering, Sentinel]] = Sentinel("static")
offload_hessians: bool = False

# private variables
Expand Down
20 changes: 11 additions & 9 deletions tests/llmcompressor/modifiers/quantization/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,25 @@ def q_config_kwargs(config_0, config_1):
@pytest.mark.parametrize(
"has_actorder,actorder,config_0,config_1,expected_0,expected_1",
[
# defaults to None if nothing provided
(False, None, None, None, None, None),
# defaults to "static" if nothing provided
(False, "N/A", None, None, "static", "static"),
# modifier overrides config if no config provided
(True, "group", None, None, "group", "group"),
# modifier overrides if config partially matches anyways
(True, "group", None, "group", "group", "group"),
(True, "group", "group", None, "group", "group"),
# modifier errors if conflict with config
# modifier errors if explicitly conflicts with config
(True, "static", None, "group", "error", "error"),
(True, "static", "group", None, "error", "error"),
(True, "group", None, "static", "error", "error"),
(True, "group", "static", None, "error", "error"),
# modifier does not override if not provided
(False, "N/A", None, None, None, None),
(False, "N/A", None, "static", None, "static"),
(False, "N/A", "static", None, "static", None),
# modifier overrides to static if nothing is provided
(False, "N/A", None, "static", "static", "static"),
(False, "N/A", "static", None, "static", "static"),
(False, "N/A", "static", "static", "static", "static"),
(False, "N/A", None, "group", None, "group"),
(False, "N/A", "group", None, "group", None),
# modifier does not override set config vaules
(False, "N/A", None, "group", "static", "group"),
(False, "N/A", "group", None, "group", "static"),
(False, "N/A", "group", "group", "group", "group"),
],
)
Expand Down