From d637a669d425dcff00ba45df90ca896f8251d9ac Mon Sep 17 00:00:00 2001 From: Janani Sriram Date: Thu, 28 Aug 2025 10:08:40 -0700 Subject: [PATCH] Validate exhaustive autotuning for FP8 Inductor templates (#355) Summary: X-link: https://github.com/pytorch/pytorch/pull/161442 Validate exhaustive autotuning for FP8 Inductor templates: scaled MM templates require `block_k >= 32`. Before, exhaustive autotuning defaulted to a limited set of autotuning configs, as limitations for exhaustively autotuning on FP8 shapes had not been tested. Reviewed By: coconutruben Differential Revision: D80958642 --- tritonbench/operators/fp8_gemm/fp8_gemm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tritonbench/operators/fp8_gemm/fp8_gemm.py b/tritonbench/operators/fp8_gemm/fp8_gemm.py index e35e34bb8..7acf87c1d 100644 --- a/tritonbench/operators/fp8_gemm/fp8_gemm.py +++ b/tritonbench/operators/fp8_gemm/fp8_gemm.py @@ -17,6 +17,10 @@ from .tutorial import matmul as tutorial_matmul +torch._dynamo.config.recompile_limit = ( + 10000 # Set high recompile limit to allow for exhausting autotuning +) + logger = logging.getLogger(__name__) try: from .persistent import (