remove unused kernels

BlackSamorez · BlackSamorez · commit 0a21d8cdddac · 2025-10-28T14:06:06.000+01:00
diff --git a/inference_lib/setup.py b/inference_lib/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="fp_quant",
-    version="0.3.0",
+    version="0.3.1",
     packages=find_packages(where="src"),
     package_dir={"": "src"},
     author="Andrei Panferov",
diff --git a/inference_lib/src/fp_quant/module/linear_fns.py b/inference_lib/src/fp_quant/module/linear_fns.py
@@ -15,8 +15,6 @@
     matmul_mxf4_bf16_tn_op,
     matmul_nvf4_bf16_tn_op,
     matmul_mxf8_bf16_tn_op,
-    matmul_mxf8_bf16_nt_op,
-    matmul_mxf8_bf16_tt_op,
     matmul_mxf8_bf16_nn_op,
     # Backward quantization
     backward_t_bf16_op,
diff --git a/inference_lib/src/fp_quant/module/qutlass_ops.py b/inference_lib/src/fp_quant/module/qutlass_ops.py
@@ -12,8 +12,6 @@
         matmul_mxf4_bf16_tn,
         matmul_nvf4_bf16_tn,
         matmul_mxf8_bf16_tn,
-        matmul_mxf8_bf16_tt,
-        matmul_mxf8_bf16_nt,
         matmul_mxf8_bf16_nn,
         # Backward quantization
         backward_t_bf16,
@@ -192,50 +190,6 @@ def _(x, w, xs, ws, alpha):
     return x.new_empty(x.shape[0], w.shape[0], dtype=torch.bfloat16)
 
 
-@torch.library.custom_op("fp_quant::matmul_mxf8_bf16_nt_op", mutates_args=())
-def matmul_mxf8_bf16_nt_op(
-    x: torch.Tensor,
-    w: torch.Tensor,
-    xs: torch.Tensor,
-    ws: torch.Tensor,
-    alpha: torch.Tensor,
-) -> torch.Tensor:
-    return matmul_mxf8_bf16_nt(
-        x,
-        w,
-        to_blocked_qutlass(xs, use_triton_kernel=True),
-        to_blocked_qutlass(ws, use_triton_kernel=True).view(torch.float8_e8m0fnu),
-        alpha,
-    )
-
-
-@matmul_mxf8_bf16_nt_op.register_fake
-def _(x, w, xs, ws, alpha):
-    return x.new_empty(x.shape[1], w.shape[1], dtype=torch.bfloat16)
-
-
-@torch.library.custom_op("fp_quant::matmul_mxf8_bf16_tt_op", mutates_args=())
-def matmul_mxf8_bf16_tt_op(
-    x: torch.Tensor,
-    w: torch.Tensor,
-    xs: torch.Tensor,
-    ws: torch.Tensor,
-    alpha: torch.Tensor,
-) -> torch.Tensor:
-    return matmul_mxf8_bf16_tt(
-        x,
-        w,
-        to_blocked_qutlass(xs, use_triton_kernel=True),
-        to_blocked_qutlass(ws, use_triton_kernel=True).view(torch.float8_e8m0fnu),
-        alpha,
-    )
-
-
-@matmul_mxf8_bf16_tt_op.register_fake
-def _(x, w, xs, ws, alpha):
-    return x.new_empty(x.shape[0], w.shape[1], dtype=torch.bfloat16)
-
-
 @torch.library.custom_op("fp_quant::matmul_mxf8_bf16_nn_op", mutates_args=())
 def matmul_mxf8_bf16_nn_op(
     x: torch.Tensor,