Skip to content

Commit 3213a7d

Browse files
committed
do not overwrite schemes
Signed-off-by: Kyle Sayers <[email protected]>
1 parent 045bc8b commit 3213a7d

File tree

7 files changed

+42
-15
lines changed

7 files changed

+42
-15
lines changed

src/llmcompressor/modifiers/quantization/gptq/base.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,20 +138,29 @@ def on_initialize(self, state: State, **kwargs) -> bool:
138138
:param state: session state storing input model and calibration data
139139
"""
140140
# apply config to model and prepare calibration hooks
141-
QuantizationMixin.attach_scheme_and_observers(self, state.model)
142-
QuantizationMixin.register_calibration_hooks(self, state.model)
141+
if QuantizationMixin.has_config(self):
142+
QuantizationMixin.attach_scheme_and_observers(self, state.model)
143+
QuantizationMixin.register_calibration_hooks(self, state.model)
143144

144145
# prepare module names
145146
self._module_names = {m: name for name, m in state.model.named_modules()}
146147

147148
# register hooks
149+
added_hook = False
148150
for module in state.model.modules():
149151
if getattr_chain(module, "quantization_scheme.weights", None) is not None:
150152
# HACK: previously, embeddings were not quantized because they were not
151153
# accessible by the layer compressor. For now, we manually ignore it,
152154
# but in the FUTURE this should be ignored by the user
153155
if not isinstance(module, torch.nn.Embedding):
154156
self.register_hook(module, self.calibrate_module, "forward")
157+
added_hook = True
158+
159+
if not added_hook:
160+
raise ValueError(
161+
"GPTQModifier requires a quantization config be specified by this "
162+
"modifier or a modifier preceding it"
163+
)
155164

156165
# prepare for calibration
157166
state.model.apply(apply_calibration_status)

src/llmcompressor/modifiers/quantization/quantization/base.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ def on_initialize(self, state: State, **kwargs) -> bool:
5959
Then, according to the module's quantization scheme, observers and calibration
6060
hooks are added. These hooks are disabled until the modifier starts.
6161
"""
62+
if not QuantizationMixin.has_config(self):
63+
raise ValueError(
64+
"QuantizationModifier requires that quantization fields to be specified"
65+
)
66+
6267
QuantizationMixin.attach_scheme_and_observers(self, state.model)
6368
state.model.apply(disable_quantization) # disable quantization until start
6469

src/llmcompressor/modifiers/quantization/quantization/mixin.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,15 @@ def register_calibration_hooks(self, model: torch.nn.Module):
9797
"""
9898
model.apply(self._initialize_hooks)
9999

100+
def has_config(self) -> bool:
101+
return not (
102+
self.config_groups is None
103+
and self.targets == ["Linear"]
104+
and self.ignore == []
105+
and self.scheme is None
106+
and self.kv_cache_scheme is None
107+
)
108+
100109
def resolve_quantization_config(self) -> QuantizationConfig:
101110
"""
102111
Returns the quantization config specified by this modifier

tests/llmcompressor/transformers/compression/recipes/new_quant_actorder_weight.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
test_stage:
22
quant_modifiers:
3-
GPTQModifier:
4-
block_size: 128
3+
QuantizationModifier:
54
ignore: ["lm_head", "model.layers.0.mlp.down_proj"]
65
config_groups:
76
group_0:
@@ -14,4 +13,6 @@ test_stage:
1413
actorder: "weight"
1514
input_activations: null
1615
output_activations: null
17-
targets: ["Linear"]
16+
targets: ["Linear"]
17+
GPTQModifier:
18+
block_size: 128
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
test_stage:
22
quant_modifiers:
3-
GPTQModifier:
4-
block_size: 128
3+
QuantizationModifier:
54
ignore: ["lm_head", "model.layers.0.mlp.down_proj"]
65
config_groups:
76
group_0:
@@ -12,4 +11,6 @@ test_stage:
1211
strategy: "channel"
1312
input_activations: null
1413
output_activations: null
15-
targets: ["Linear"]
14+
targets: ["Linear"]
15+
GPTQModifier:
16+
block_size: 128

tests/llmcompressor/transformers/compression/recipes/new_quant_group.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
test_stage:
22
quant_modifiers:
3-
GPTQModifier:
4-
block_size: 128
3+
QuantizationModifier:
54
ignore: ["lm_head", "model.layers.0.mlp.down_proj"]
65
config_groups:
76
group_0:
@@ -13,4 +12,6 @@ test_stage:
1312
group_size: 128
1413
input_activations: null
1514
output_activations: null
16-
targets: ["Linear"]
15+
targets: ["Linear"]
16+
GPTQModifier:
17+
block_size: 128
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
test_stage:
22
quant_modifiers:
3-
GPTQModifier:
4-
block_size: 128
5-
targets: ["re:model.layers.\\d+$"]
3+
QuantizationModifier:
64
ignore: ["lm_head", "model.layers.0.mlp.down_proj"]
75
config_groups:
86
group_0:
@@ -13,4 +11,7 @@ test_stage:
1311
strategy: "tensor"
1412
input_activations: null
1513
output_activations: null
16-
targets: ["Linear", "Embedding"]
14+
targets: ["Linear", "Embedding"]
15+
GPTQModifier:
16+
block_size: 128
17+
targets: ["re:model.layers.\\d+$"]

0 commit comments

Comments
 (0)