Skip to content

Commit 0a21d8c

Browse files
committed
remove unused kernels
1 parent e980b45 commit 0a21d8c

File tree

3 files changed

+1
-49
lines changed

3 files changed

+1
-49
lines changed

inference_lib/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name="fp_quant",
5-
version="0.3.0",
5+
version="0.3.1",
66
packages=find_packages(where="src"),
77
package_dir={"": "src"},
88
author="Andrei Panferov",

inference_lib/src/fp_quant/module/linear_fns.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
matmul_mxf4_bf16_tn_op,
1616
matmul_nvf4_bf16_tn_op,
1717
matmul_mxf8_bf16_tn_op,
18-
matmul_mxf8_bf16_nt_op,
19-
matmul_mxf8_bf16_tt_op,
2018
matmul_mxf8_bf16_nn_op,
2119
# Backward quantization
2220
backward_t_bf16_op,

inference_lib/src/fp_quant/module/qutlass_ops.py

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
matmul_mxf4_bf16_tn,
1313
matmul_nvf4_bf16_tn,
1414
matmul_mxf8_bf16_tn,
15-
matmul_mxf8_bf16_tt,
16-
matmul_mxf8_bf16_nt,
1715
matmul_mxf8_bf16_nn,
1816
# Backward quantization
1917
backward_t_bf16,
@@ -192,50 +190,6 @@ def _(x, w, xs, ws, alpha):
192190
return x.new_empty(x.shape[0], w.shape[0], dtype=torch.bfloat16)
193191

194192

195-
@torch.library.custom_op("fp_quant::matmul_mxf8_bf16_nt_op", mutates_args=())
196-
def matmul_mxf8_bf16_nt_op(
197-
x: torch.Tensor,
198-
w: torch.Tensor,
199-
xs: torch.Tensor,
200-
ws: torch.Tensor,
201-
alpha: torch.Tensor,
202-
) -> torch.Tensor:
203-
return matmul_mxf8_bf16_nt(
204-
x,
205-
w,
206-
to_blocked_qutlass(xs, use_triton_kernel=True),
207-
to_blocked_qutlass(ws, use_triton_kernel=True).view(torch.float8_e8m0fnu),
208-
alpha,
209-
)
210-
211-
212-
@matmul_mxf8_bf16_nt_op.register_fake
213-
def _(x, w, xs, ws, alpha):
214-
return x.new_empty(x.shape[1], w.shape[1], dtype=torch.bfloat16)
215-
216-
217-
@torch.library.custom_op("fp_quant::matmul_mxf8_bf16_tt_op", mutates_args=())
218-
def matmul_mxf8_bf16_tt_op(
219-
x: torch.Tensor,
220-
w: torch.Tensor,
221-
xs: torch.Tensor,
222-
ws: torch.Tensor,
223-
alpha: torch.Tensor,
224-
) -> torch.Tensor:
225-
return matmul_mxf8_bf16_tt(
226-
x,
227-
w,
228-
to_blocked_qutlass(xs, use_triton_kernel=True),
229-
to_blocked_qutlass(ws, use_triton_kernel=True).view(torch.float8_e8m0fnu),
230-
alpha,
231-
)
232-
233-
234-
@matmul_mxf8_bf16_tt_op.register_fake
235-
def _(x, w, xs, ws, alpha):
236-
return x.new_empty(x.shape[0], w.shape[1], dtype=torch.bfloat16)
237-
238-
239193
@torch.library.custom_op("fp_quant::matmul_mxf8_bf16_nn_op", mutates_args=())
240194
def matmul_mxf8_bf16_nn_op(
241195
x: torch.Tensor,

0 commit comments

Comments
 (0)