Migrate to unittest for files in test/dtypes

zeshengzong · zeshengzong · commit 3641c98b3b65 · 2025-07-25T08:53:50.000Z
diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py
@@ -3,14 +3,14 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
-import pytest
+import unittest
 
 from torchao.utils import (
     TORCH_VERSION_AT_LEAST_2_5,
 )
 
 if not TORCH_VERSION_AT_LEAST_2_5:
-    pytest.skip("Unsupported PyTorch version", allow_module_level=True)
+    raise unittest.SkipTest("Unsupported PyTorch version")
 
 import copy
 import io
@@ -20,10 +20,10 @@
 from functools import partial
 from typing import Tuple
 
-import pytest
 import torch
 from torch._inductor.test_case import TestCase as InductorTestCase
 from torch.testing._internal import common_utils
+from torch.testing._internal.common_utils import parametrize, run_tests
 
 from torchao.dtypes.floatx.float8_layout import Float8AQTTensorImpl, preprocess_scale
 from torchao.float8.float8_utils import compute_error
@@ -74,12 +74,12 @@ class TestAffineQuantizedFloat8Compile(InductorTestCase):
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
-    @common_utils.parametrize("dtype", [torch.bfloat16, torch.float32])
-    @common_utils.parametrize("mode", ["dynamic", "weight-only", "static"])
-    @common_utils.parametrize("compile", [True, False])
-    @common_utils.parametrize("granularity", [PerTensor(), PerRow()])
+    @parametrize("dtype", [torch.bfloat16, torch.float32])
+    @parametrize("mode", ["dynamic", "weight-only", "static"])
+    @parametrize("compile", [True, False])
+    @parametrize("granularity", [PerTensor(), PerRow()])
     # Inputs are (M,..), K, N
-    @common_utils.parametrize(
+    @parametrize(
         "sizes",
         [
             ((128,), 256, 128),
@@ -99,7 +99,7 @@ def test_fp8_linear_variants(
                 )
 
         error_context = (
-            pytest.raises(AssertionError, match=error_message)
+            self.assertRaisesRegex(AssertionError, error_message)
             if error_message
             else nullcontext()
         )
@@ -150,16 +150,16 @@ def test_fp8_linear_variants(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
     def test_invalid_granularity(self):
-        with pytest.raises(ValueError, match="Invalid granularity specification"):
+        with self.assertRaisesRegex(ValueError, "Invalid granularity specification"):
             float8_dynamic_activation_float8_weight(granularity="invalid")
 
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
     def test_mismatched_granularity(self):
-        with pytest.raises(
+        with self.assertRaisesRegex(
             ValueError,
-            match="Different granularities for activation and weight are not supported",
+            "Different granularities for activation and weight are not supported",
         ):
             float8_dynamic_activation_float8_weight(granularity=(PerTensor(), PerRow()))
 
@@ -170,7 +170,7 @@ def test_unsupported_granularity(self):
         class UnsupportedGranularity:
             pass
 
-        with pytest.raises(ValueError, match="Invalid granularity types"):
+        with self.assertRaisesRegex(ValueError, "Invalid granularity types"):
             float8_dynamic_activation_float8_weight(
                 granularity=(UnsupportedGranularity(), UnsupportedGranularity())
             )
@@ -180,9 +180,9 @@ class UnsupportedGranularity:
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
     def test_per_row_with_float32(self):
-        with pytest.raises(
+        with self.assertRaisesRegex(
             AssertionError,
-            match="PerRow quantization only works for bfloat16 precision",
+            "PerRow quantization only works for bfloat16 precision",
         ):
             model = ToyLinearModel(64, 64).eval().to(torch.float32).to("cuda")
             quantize_(
@@ -193,7 +193,7 @@ def test_per_row_with_float32(self):
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
-    @common_utils.parametrize("mode", ["dynamic", "weight-only", "static"])
+    @parametrize("mode", ["dynamic", "weight-only", "static"])
     def test_serialization(self, mode: str):
         # Create and quantize the model
         model = ToyLinearModel(16, 32).to(device="cuda")
@@ -300,13 +300,11 @@ def test_fp8_weight_dimension_warning(self):
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
-    @common_utils.parametrize(
-        "in_features,out_features", [(512, 1024), (256, 768), (1024, 512)]
-    )
-    @common_utils.parametrize(
+    @parametrize("in_features,out_features", [(512, 1024), (256, 768), (1024, 512)])
+    @parametrize(
         "leading_shape", [(1,), (8,), (16,), (2, 8,), (2, 2, 16,)]
     )  # fmt: skip
-    @common_utils.parametrize("bias", [True, False])
+    @parametrize("bias", [True, False])
     def test_mm_float8dq_per_row(
         self, in_features, out_features, leading_shape, bias: bool
     ):
@@ -354,9 +352,9 @@ def test_mm_float8dq_per_row(
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
-    @common_utils.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2])
-    @common_utils.parametrize("output_dtype", [torch.float32, torch.bfloat16])
-    @common_utils.parametrize("block_size", [(), (1, 32), (2, 16), (4, 8)])
+    @parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2])
+    @parametrize("output_dtype", [torch.float32, torch.bfloat16])
+    @parametrize("block_size", [(), (1, 32), (2, 16), (4, 8)])
     def test_dequantize_affine_float8(self, float8_dtype, output_dtype, block_size):
         """Test _dequantize_affine_float8 with various configurations"""
 
@@ -419,7 +417,7 @@ def test_dequantize_affine_float8_scale_broadcasting(self):
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
-    @common_utils.parametrize("granularity", [PerTensor(), PerRow()])
+    @parametrize("granularity", [PerTensor(), PerRow()])
     def test_float8_tensor_slicing_basic(self, granularity):
         """Test basic slicing operations on Float8 tensors"""
         device = "cuda"
@@ -552,7 +550,7 @@ def test_float8_tensor_slicing_edge_cases(self):
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
-    @common_utils.parametrize("granularity", [PerTensor(), PerRow()])
+    @parametrize("granularity", [PerTensor(), PerRow()])
     @unittest.skipIf(
         is_sm_version(8, 9),
         "TODO: AssertionError: tensor(-2.1562, device='cuda:0', dtype=torch.bfloat16) not greater than 15",
@@ -675,8 +673,8 @@ def test_preprocess_scale_3d_reshape(self):
         expected_shape = (8, 1)  # Flattened (2*2*2, 1)
         self.assertEqual(result.shape, expected_shape)
 
-    @common_utils.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2])
-    @common_utils.parametrize("hp_dtype", [torch.float32, torch.bfloat16])
+    @parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2])
+    @parametrize("hp_dtype", [torch.float32, torch.bfloat16])
     def test_quantize_dequantize_fp8_inductor(self, float8_dtype, hp_dtype):
         quantize_affine_float8 = torch.ops.torchao.quantize_affine_float8
         dequantize_affine_float8 = torch.ops.torchao.dequantize_affine_float8
@@ -719,4 +717,4 @@ def test_quantize_dequantize_fp8_inductor(self, float8_dtype, hp_dtype):
 common_utils.instantiate_parametrized_tests(TestAffineQuantizedFloat8Compile)
 
 if __name__ == "__main__":
-    pytest.main([__file__])
+    run_tests()
diff --git a/test/dtypes/test_affine_quantized_tensor_parallel.py b/test/dtypes/test_affine_quantized_tensor_parallel.py
@@ -5,7 +5,6 @@
 # LICENSE file in the root directory of this source tree.
 import unittest
 
-import pytest
 import torch
 from torch.distributed._tensor import DeviceMesh, DTensor, Replicate, Shard
 from torch.testing._internal import common_utils
@@ -34,7 +33,7 @@
     has_gemlite = False
 
 if torch.version.hip is not None:
-    pytest.skip("Skipping the test in ROCm", allow_module_level=True)
+    raise unittest.SkipTest("Skipping the test in ROCm", allow_module_level=True)
 
 
 class TestAffineQuantizedTensorParallel(DTensorTestBase):
diff --git a/test/dtypes/test_bitpacking.py b/test/dtypes/test_bitpacking.py
@@ -3,8 +3,15 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
-import pytest
+from unittest import skipIf
+
 import torch
+from torch.testing._internal.common_utils import (
+    TestCase,
+    instantiate_parametrized_tests,
+    parametrize,
+    run_tests,
+)
 from torch.utils._triton import has_triton
 
 from torchao.dtypes.uintx.bitpacking import pack, pack_cpu, unpack, unpack_cpu
@@ -13,68 +20,74 @@
 dimensions = (0, -1, 1)
 
 
-@pytest.fixture(autouse=True)
-def run_before_and_after_tests():
-    yield
-    torch._dynamo.reset()  # reset cache between tests
-
-
-@pytest.mark.parametrize("bit_width", bit_widths)
-@pytest.mark.parametrize("dim", dimensions)
-def test_CPU(bit_width, dim):
-    test_tensor = torch.randint(
-        0, 2**bit_width, (32, 32, 32), dtype=torch.uint8, device="cpu"
-    )
-    packed = pack_cpu(test_tensor, bit_width, dim=dim)
-    unpacked = unpack_cpu(packed, bit_width, dim=dim)
-    assert unpacked.allclose(test_tensor)
+class TestBitpacking(TestCase):
+    def tearDown(self):
+        torch._dynamo.reset()  # reset cache between tests
 
+    @parametrize("bit_width", bit_widths)
+    @parametrize("dim", dimensions)
+    def test_CPU(self, bit_width, dim):
+        test_tensor = torch.randint(
+            0, 2**bit_width, (32, 32, 32), dtype=torch.uint8, device="cpu"
+        )
+        packed = pack_cpu(test_tensor, bit_width, dim=dim)
+        unpacked = unpack_cpu(packed, bit_width, dim=dim)
+        assert unpacked.allclose(test_tensor)
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-@pytest.mark.parametrize("bit_width", bit_widths)
-@pytest.mark.parametrize("dim", dimensions)
-def test_GPU(bit_width, dim):
-    test_tensor = torch.randint(0, 2**bit_width, (32, 32, 32), dtype=torch.uint8).cuda()
-    packed = pack(test_tensor, bit_width, dim=dim)
-    unpacked = unpack(packed, bit_width, dim=dim)
-    assert unpacked.allclose(test_tensor)
+    @skipIf(not torch.cuda.is_available(), "CUDA not available")
+    @parametrize("bit_width", bit_widths)
+    @parametrize("dim", dimensions)
+    def test_GPU(self, bit_width, dim):
+        test_tensor = torch.randint(
+            0, 2**bit_width, (32, 32, 32), dtype=torch.uint8
+        ).cuda()
+        packed = pack(test_tensor, bit_width, dim=dim)
+        unpacked = unpack(packed, bit_width, dim=dim)
+        assert unpacked.allclose(test_tensor)
 
+    @skipIf(not torch.cuda.is_available(), reason="CUDA not available")
+    @skipIf(not has_triton(), reason="unsupported without triton")
+    @parametrize("bit_width", bit_widths)
+    @parametrize("dim", dimensions)
+    def test_compile(self, bit_width, dim):
+        torch._dynamo.config.specialize_int = True
+        torch.compile(pack, fullgraph=True)
+        torch.compile(unpack, fullgraph=True)
+        test_tensor = torch.randint(
+            0, 2**bit_width, (32, 32, 32), dtype=torch.uint8
+        ).cuda()
+        packed = pack(test_tensor, bit_width, dim=dim)
+        unpacked = unpack(packed, bit_width, dim=dim)
+        assert unpacked.allclose(test_tensor)
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-@pytest.mark.skipif(not has_triton(), reason="unsupported without triton")
-@pytest.mark.parametrize("bit_width", bit_widths)
-@pytest.mark.parametrize("dim", dimensions)
-def test_compile(bit_width, dim):
-    torch._dynamo.config.specialize_int = True
-    torch.compile(pack, fullgraph=True)
-    torch.compile(unpack, fullgraph=True)
-    test_tensor = torch.randint(0, 2**bit_width, (32, 32, 32), dtype=torch.uint8).cuda()
-    packed = pack(test_tensor, bit_width, dim=dim)
-    unpacked = unpack(packed, bit_width, dim=dim)
-    assert unpacked.allclose(test_tensor)
+    # these test cases are for the example pack walk through in the bitpacking.py file
+    @skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_pack_example(self):
+        test_tensor = torch.tensor(
+            [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
+        ).cuda()
+        shard_4, shard_2 = pack(test_tensor, 6)
+        print(shard_4, shard_2)
+        assert (
+            torch.tensor([0, 105, 151, 37], dtype=torch.uint8).cuda().allclose(shard_4)
+        )
+        assert torch.tensor([39, 146], dtype=torch.uint8).cuda().allclose(shard_2)
+        unpacked = unpack([shard_4, shard_2], 6)
+        assert unpacked.allclose(test_tensor)
 
+    def test_pack_example_CPU(self):
+        test_tensor = torch.tensor(
+            [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
+        )
+        shard_4, shard_2 = pack(test_tensor, 6)
+        print(shard_4, shard_2)
+        assert torch.tensor([0, 105, 151, 37], dtype=torch.uint8).allclose(shard_4)
+        assert torch.tensor([39, 146], dtype=torch.uint8).allclose(shard_2)
+        unpacked = unpack([shard_4, shard_2], 6)
+        assert unpacked.allclose(test_tensor)
 
-# these test cases are for the example pack walk through in the bitpacking.py file
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-def test_pack_example():
-    test_tensor = torch.tensor(
-        [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
-    ).cuda()
-    shard_4, shard_2 = pack(test_tensor, 6)
-    print(shard_4, shard_2)
-    assert torch.tensor([0, 105, 151, 37], dtype=torch.uint8).cuda().allclose(shard_4)
-    assert torch.tensor([39, 146], dtype=torch.uint8).cuda().allclose(shard_2)
-    unpacked = unpack([shard_4, shard_2], 6)
-    assert unpacked.allclose(test_tensor)
 
+instantiate_parametrized_tests(TestBitpacking)
 
-def test_pack_example_CPU():
-    test_tensor = torch.tensor(
-        [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
-    )
-    shard_4, shard_2 = pack(test_tensor, 6)
-    print(shard_4, shard_2)
-    assert torch.tensor([0, 105, 151, 37], dtype=torch.uint8).allclose(shard_4)
-    assert torch.tensor([39, 146], dtype=torch.uint8).allclose(shard_2)
-    unpacked = unpack([shard_4, shard_2], 6)
-    assert unpacked.allclose(test_tensor)
+if __name__ == "__main__":
+    run_tests()
diff --git a/test/dtypes/test_nf4.py b/test/dtypes/test_nf4.py
@@ -11,7 +11,6 @@
 from collections import OrderedDict
 from typing import Tuple, Union
 
-import pytest
 import torch
 import torch.nn.functional as F
 from torch import nn
@@ -628,9 +627,9 @@ class TestQLoRA(FSDPTest):
     def world_size(self) -> int:
         return 2
 
-    @pytest.mark.skipif(
+    @unittest.skipIf(
         version.parse(torch.__version__).base_version < "2.4.0",
-        reason="torch >= 2.4 required",
+        "torch >= 2.4 required",
     )
     @skip_if_lt_x_gpu(2)
     def test_qlora_fsdp2(self):
diff --git a/test/dtypes/test_uintx.py b/test/dtypes/test_uintx.py
diff --git a/torchao/testing/utils.py b/torchao/testing/utils.py