feat: add test framwork (#88)

yangbofun · web-flow · commit 13da12b8d177 · 2024-04-10T18:12:14.000+08:00
* add test framwork
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/core.py b/tests/core.py
@@ -0,0 +1,68 @@
+import torch
+import typing
+
+__all__ = ["call_module", "call_func", "copy_to_cpu", "allclose"]
+
+
+def call_module(module: torch.nn.Module, *forward_args):
+    output_forward = module(*forward_args)
+    grads = []
+    if torch.is_tensor(output_forward):
+        output_forward.backward(torch.ones_like(output_forward))
+    elif isinstance(output_forward, (list, tuple)):
+        assert torch.is_tensor(output_forward[0]), "output_forward[0] is not a tensor"
+        output_forward[0].backward(torch.ones_like(output_forward[0]))
+    else:
+        raise RuntimeError(
+            "the result of forward is not a tensor or list or tuple of tensor"
+        )
+    for arg in forward_args:
+        if torch.is_tensor(arg) and arg.requires_grad:
+            grads.append(arg.grad)
+    return output_forward, grads
+
+
+def call_func(f: typing.Callable, args: list):
+    return f(args)
+
+
+def copy_to_cpu(tensors: list[torch.Tensor], dtype=None):
+    if dtype is None:
+        dtype = torch.float32
+    return [
+        tensor.detach().clone().to(dtype).cpu().requires_grad_(tensor.requires_grad)
+        for tensor in tensors
+    ]
+
+
+def allclose(expected_vals: list, real_vals: list, rtol, atol):
+    assert len(expected_vals) == len(real_vals), "length of outputs is not same"
+    for i in range(len(expected_vals)):
+        assert type(expected_vals[i]) == type(
+            real_vals[i]
+        ), "the type of expected_vals[{index}] is {type1}, but real_vals[{index}] is {type2}.".format(
+            index=i, type1=type(expected_vals[i]), type2=type(real_vals[i])
+        )
+        if isinstance(expected_vals[i], torch.Tensor):
+            assert isinstance(real_vals[i], torch.Tensor)
+            return torch.allclose(
+                expected_vals[i].cpu().to(torch.float32),
+                real_vals[i].cpu().to(torch.float32),
+                rtol,
+                atol,
+            )
+        elif isinstance(expected_vals[i], (tuple, list)):
+            assert isinstance(real_vals[i], (tuple, list))
+            allclose(expected_vals[i], real_vals[i], rtol, atol)
+        elif isinstance(expected_vals[i], dict):
+            assert isinstance(real_vals[i], dict)
+            for key, val in expected_vals[i].items():
+                assert key in real_vals.keys(), "key {k} not in real_val.keys()".format(
+                    k=key
+                )
+                allclose(val, real_vals[key], rtol, atol)
+        # Primitive type
+        else:
+            return abs(real_vals[i] - expected_vals[i]) <= atol + rtol * abs(
+                expected_vals[i]
+            )
diff --git a/tests/internevo/test_flash_attention.py b/tests/internevo/test_flash_attention.py
@@ -1,4 +1,5 @@
 import torch
+from tests.core import copy_to_cpu, allclose, call_module, call_func
 
 from deeplink_ext.internevo_ops.flash_attention import (
     FlashSelfAttention,
@@ -11,64 +12,59 @@
 
 
 def test_self_attention():
-    batch = 8
-    seqlen = 32
-    nheads = 16
-    headdim = 64
+    batch, seqlen, nheads, headdim = [8, 32, 16, 64]
 
-    q_ref = torch.rand([batch, seqlen, nheads, headdim], requires_grad=True)
-    k_ref = torch.rand([batch, seqlen, nheads, headdim], requires_grad=True)
-    v_ref = torch.rand([batch, seqlen, nheads, headdim], requires_grad=True)
-    qkv_ref = torch.stack([q_ref, k_ref, v_ref], 2)
-    q_ext = q_ref.clone().detach().to(torch.float16).cuda().requires_grad_()
-    k_ext = k_ref.clone().detach().to(torch.float16).cuda().requires_grad_()
-    v_ext = v_ref.clone().detach().to(torch.float16).cuda().requires_grad_()
-
-    model_ref = SelfAttention()
-    model_ext = FlashSelfAttention()
-    out_ref = model_ref(None, q_ref, k_ref, v_ref, None)
-    out_ext = model_ext(None, q_ext, k_ext, v_ext, None)
-    out_ref.backward(torch.ones_like(out_ref))
-    out_ext.backward(torch.ones_like(out_ext))
-
-    assert torch.allclose(
-        out_ext.cpu(), out_ref.to(torch.float16), rtol=1e-3, atol=1e-3
+    q_gpu = torch.rand(
+        [batch, seqlen, nheads, headdim],
+        dtype=torch.float16,
+        requires_grad=True,
+        device="cuda",
     )
-    assert torch.allclose(
-        q_ext.grad.cpu(), q_ref.grad.to(torch.float16), rtol=1e-3, atol=1e-3
+    k_gpu = torch.rand(
+        [batch, seqlen, nheads, headdim],
+        dtype=torch.float16,
+        requires_grad=True,
+        device="cuda",
     )
-    assert torch.allclose(
-        k_ext.grad.cpu(), k_ref.grad.to(torch.float16), rtol=1e-3, atol=1e-3
+    v_gpu = torch.rand(
+        [batch, seqlen, nheads, headdim],
+        dtype=torch.float16,
+        requires_grad=True,
+        device="cuda",
+    )
+
+    q_cpu, k_cpu, v_cpu = copy_to_cpu([q_gpu, k_gpu, v_gpu])
+    ouput_forward_cpu, grads_cpu = call_module(
+        SelfAttention(), None, q_cpu, k_cpu, v_cpu, None
     )
-    assert torch.allclose(
-        v_ext.grad.cpu(), v_ref.grad.to(torch.float16), rtol=1e-3, atol=1e-3
+    ouput_forward_gpu, grads_gpu = call_module(
+        FlashSelfAttention().cuda(), None, q_gpu, k_gpu, v_gpu, None
     )
+    assert allclose(ouput_forward_cpu, ouput_forward_gpu, rtol=1e-3, atol=1e-3)
+    assert allclose(grads_cpu, grads_gpu, rtol=1e-3, atol=1e-3)
 
 
 def test_cross_attention():
-    batch = 8
-    seqlen = 32
-    nheads = 16
-    headdim = 64
+    batch, seqlen, nheads, headdim = [8, 32, 16, 64]
 
-    q_ref = torch.rand([batch, seqlen, nheads, headdim], requires_grad=True)
-    kv_ref = torch.rand([batch, seqlen, 2, nheads, headdim], requires_grad=True)
-    q_ext = q_ref.clone().detach().to(torch.float16).cuda().requires_grad_()
-    kv_ext = kv_ref.clone().detach().to(torch.float16).cuda().requires_grad_()
-
-    model_ref = CrossAttention()
-    model_ext = FlashCrossAttention()
-    out_ref = model_ref(q_ref, kv_ref)
-    out_ext = model_ext(q_ext, kv_ext)
-    out_ref.backward(torch.ones_like(out_ref))
-    out_ext.backward(torch.ones_like(out_ext))
-
-    assert torch.allclose(
-        out_ext.cpu(), out_ref.to(torch.float16), rtol=1e-3, atol=1e-3
+    q_gpu = torch.rand(
+        [batch, seqlen, nheads, headdim],
+        dtype=torch.float16,
+        requires_grad=True,
+        device="cuda",
     )
-    assert torch.allclose(
-        q_ext.grad.cpu(), q_ref.grad.to(torch.float16), rtol=1e-3, atol=1e-3
+    kv_gpu = torch.rand(
+        [batch, seqlen, 2, nheads, headdim],
+        dtype=torch.float16,
+        requires_grad=True,
+        device="cuda",
     )
-    assert torch.allclose(
-        kv_ext.grad.cpu(), kv_ref.grad.to(torch.float16), rtol=1e-3, atol=1e-3
+
+    q_cpu, kv_cpu = copy_to_cpu([q_gpu, kv_gpu])
+    ouput_forward_cpu, grads_cpu = call_module(CrossAttention(), q_cpu, kv_cpu)
+    ouput_forward_gpu, grads_gpu = call_module(
+        FlashCrossAttention().cuda(), q_gpu, kv_gpu
     )
+
+    assert allclose(ouput_forward_cpu, ouput_forward_gpu, rtol=1e-3, atol=1e-3)
+    assert allclose(grads_cpu, grads_gpu, rtol=1e-3, atol=1e-3)