Skip to content

Commit c608b78

Browse files
committed
Check numerical equivalence / closeness between different kernel preferences
Summary: This PR checks different kernel preferences for Float8Tensor are similar in numerics (AUTO, TORCH and FBGEMM) triton implementation and torchao implementation are a bit different right now actually, need to decide if we should fix it or not main difference seems to be the triton implementation is using: ``` a_scale = MAX_FP8 / max_abs then do a_scale = 1.0 / a_scale a_fp8 = a * a_scale ``` while torch is doing: ``` a_scale = max_abs / MAX_FP8 a_fp8 = a / a_scale ``` Test Plan: python test/quantization/quantize_/workflows/float8/test_float8_tensor.py -k test_kernel_preference_numerical_equivalence Reviewers: Subscribers: Tasks: Tags: stack-info: PR: #2651, branch: jerryzh168/stack/15
1 parent 5ae457c commit c608b78

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

test/quantization/quantize_/workflows/float8/test_float8_tensor.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,49 @@ def test_slice(self, granularity):
257257
sqnr = compute_error(res, res_ref)
258258
self.assertTrue(sqnr > 15, f"sqnr: {sqnr}")
259259

260+
@common_utils.parametrize("granularity", [PerTensor(), PerRow()])
261+
# Inputs are (M,..), K, N
262+
@common_utils.parametrize(
263+
"sizes",
264+
[
265+
((128,), 256, 128),
266+
((32, 128), 64, 256),
267+
],
268+
)
269+
def test_kernel_preference_numerical_equivalence(self, granularity, sizes):
270+
"""Test different kernel preferences have the same numerics for float8 dynamic activation
271+
and float8 weight config
272+
"""
273+
M, N, K = sizes
274+
dtype = torch.bfloat16
275+
input_tensor = torch.randn(*M, K, dtype=dtype, device="cuda")
276+
# Create a linear layer with bfloat16 dtype
277+
model = ToyLinearModel(K, N).eval().to(dtype).to("cuda")
278+
279+
kernel_preferences = [
280+
KernelPreference.AUTO,
281+
KernelPreference.TORCH,
282+
KernelPreference.FBGEMM,
283+
]
284+
quantized_outputs = {}
285+
for kp in kernel_preferences:
286+
config = Float8DynamicActivationFloat8WeightConfig(
287+
granularity=granularity, kernel_preference=kp
288+
)
289+
quantized_model = copy.deepcopy(model)
290+
quantize_(quantized_model, config)
291+
quantized_outputs[kp] = quantized_model(input_tensor)
292+
293+
from torchao.quantization.utils import compute_error
294+
295+
kp_and_res = list(quantized_outputs.items())
296+
for i in range(1, len(kp_and_res)):
297+
kp, res = kp_and_res[i]
298+
self.assertTrue(
299+
compute_error(res, kp_and_res[0][1]) > 28,
300+
f"mismatch between {kp=} and {kp_and_res[0]=}, {sizes=}, {granularity=}",
301+
)
302+
260303
@common_utils.parametrize("granularity", [PerTensor(), PerRow()])
261304
def test_slice_preserves_aliasing(self, granularity):
262305
config = Float8DynamicActivationFloat8WeightConfig(

0 commit comments

Comments
 (0)