File tree Expand file tree Collapse file tree 1 file changed +4
-3
lines changed
src/llmcompressor/modifiers/quantization/gptq Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -75,8 +75,9 @@ class GPTQModifier(Modifier, QuantizationMixin):
75
75
:param block_size: Used to determine number of columns to compress in one pass
76
76
:param dampening_frac: Amount of dampening to apply to H, as a fraction of the
77
77
diagonal norm
78
- :param actorder: order in which weight columns are quantized. For more information,
79
- on actorder options, see https://github.com/vllm-project/vllm/pull/8135
78
+ :param actorder: order in which weight columns are quantized. Defaults to "static"
79
+ activation ordering, which achieves best accuracy recovery with no runtime cost.
80
+ For more information, see https://github.com/vllm-project/vllm/pull/8135
80
81
:param offload_hessians: Set to True for decreased memory usage but increased
81
82
runtime.
82
83
@@ -109,7 +110,7 @@ class GPTQModifier(Modifier, QuantizationMixin):
109
110
sequential_targets : Union [str , List [str ], None ] = None
110
111
block_size : int = 128
111
112
dampening_frac : Optional [float ] = 0.01
112
- actorder : Optional [ActivationOrdering ] = None
113
+ actorder : Optional [ActivationOrdering ] = ActivationOrdering . STATIC
113
114
offload_hessians : bool = False
114
115
115
116
# private variables
You can’t perform that action at this time.
0 commit comments