Skip to content

Commit 42a767c

Browse files
committed
Check numerical equivalence / closeness between different kernel preferences
Summary: This PR checks different kernel preferences for Float8Tensor are similar in numerics (AUTO, TORCH and FBGEMM) triton implementation and torchao implementation are a bit different right now actually, need to decide if we should fix it or not main difference seems to be the triton implementation is using: ``` a_scale = MAX_FP8 / max_abs then do a_scale = 1.0 / a_scale a_fp8 = a * a_scale ``` while torch is doing: ``` a_scale = max_abs / MAX_FP8 a_fp8 = a / a_scale ``` Also the hp_value_lb and hp_value_ub settings are slightly different Test Plan: python test/quantization/quantize_/workflows/float8/test_float8_tensor.py -k test_kernel_preference_numerical_equivalence Reviewers: Subscribers: Tasks: Tags: stack-info: PR: #2651, branch: jerryzh168/stack/15
1 parent 79e89ff commit 42a767c

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

test/quantization/quantize_/workflows/float8/test_float8_tensor.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,49 @@ def test_slice(self, granularity):
259259
sqnr = compute_error(res, res_ref)
260260
self.assertTrue(sqnr > 15, f"sqnr: {sqnr}")
261261

262+
@common_utils.parametrize("granularity", [PerTensor(), PerRow()])
263+
# Inputs are (M,..), K, N
264+
@common_utils.parametrize(
265+
"sizes",
266+
[
267+
((128,), 256, 128),
268+
((32, 128), 64, 256),
269+
],
270+
)
271+
def test_kernel_preference_numerical_equivalence(self, granularity, sizes):
272+
"""Test different kernel preferences have the same numerics for float8 dynamic activation
273+
and float8 weight config
274+
"""
275+
M, N, K = sizes
276+
dtype = torch.bfloat16
277+
input_tensor = torch.randn(*M, K, dtype=dtype, device="cuda")
278+
# Create a linear layer with bfloat16 dtype
279+
model = ToyLinearModel(K, N).eval().to(dtype).to("cuda")
280+
281+
kernel_preferences = [
282+
KernelPreference.AUTO,
283+
KernelPreference.TORCH,
284+
KernelPreference.FBGEMM,
285+
]
286+
quantized_outputs = {}
287+
for kp in kernel_preferences:
288+
config = Float8DynamicActivationFloat8WeightConfig(
289+
granularity=granularity, kernel_preference=kp
290+
)
291+
quantized_model = copy.deepcopy(model)
292+
quantize_(quantized_model, config)
293+
quantized_outputs[kp] = quantized_model(input_tensor)
294+
295+
from torchao.quantization.utils import compute_error
296+
297+
kp_and_res = list(quantized_outputs.items())
298+
for i in range(1, len(kp_and_res)):
299+
kp, res = kp_and_res[i]
300+
self.assertTrue(
301+
compute_error(res, kp_and_res[0][1]) > 28,
302+
f"mismatch between {kp=} and {kp_and_res[0]=}, {sizes=}, {granularity=}",
303+
)
304+
262305
@common_utils.parametrize("granularity", [PerTensor(), PerRow()])
263306
def test_slice_preserves_aliasing(self, granularity):
264307
config = Float8DynamicActivationFloat8WeightConfig(

0 commit comments

Comments
 (0)