Skip to content

Commit adfe570

Browse files
Disable a flaky test (#257)
Summary: att Test Plan: python test/integration/test_integration.py Reviewers: Subscribers: Tasks: Tags: Co-authored-by: Mark Saroufim <[email protected]>
1 parent e0affd6 commit adfe570

File tree

2 files changed

+2
-1
lines changed

2 files changed

+2
-1
lines changed

test/integration/test_integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,7 @@ def test_weight_only_quant(self):
11041104
@parameterized.expand(COMMON_DEVICE_DTYPE)
11051105
@torch.no_grad()
11061106
@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
1107+
@unittest.skip("This test is flaky, we'll enable later")
11071108
def test_weight_only_quant_force_mixed_mm(self, device, dtype):
11081109
if device != "cuda":
11091110
self.skipTest(f"weight_only_quant_force_mixed_mm can't be constructed on {device}")

test/quantization/test_quant_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ def get_per_token_block_size(x):
563563
input_eps = 1e-5
564564
input_quant_min = -127
565565
input_quant_max = 127
566-
input_quant_func = lambda x: AffineQuantizedTensor.from_float(x, input_mapping_type, get_per_token_block_size(x), input_target_dtype, eps=input_eps, quant_min=input_quant_min, quant_max=input_quant_max, scale_dtype=torch.float)
566+
input_quant_func = lambda x: AffineQuantizedTensor.from_float(x, input_mapping_type, get_per_token_block_size(x), input_target_dtype, eps=input_eps, quant_min=input_quant_min, quant_max=input_quant_max, scale_dtype=torch.float32 if x.dtype == torch.float16 else None)
567567

568568
# use 1024 so that we don't need padding
569569
m = ToyLinearModel(1024, 1024, 1024).eval().to(torch.bfloat16).to("cuda")

0 commit comments

Comments
 (0)