[Mosaic GPU][NFC] Always set exact to False in tmem_alloc and tmem_dealloc lowerings.

allanrenucci · Google-ML-Automation · commit 998e6baf2091 · 2025-08-29T03:29:00.000-07:00
`exact` is a validation mechanism, it does not affect lowering. The instruction is already validated by the op verifier.

PiperOrigin-RevId: 800827106
diff --git a/jax/experimental/mosaic/gpu/dialect_lowering.py b/jax/experimental/mosaic/gpu/dialect_lowering.py
@@ -1520,15 +1520,14 @@ def _tmem_alloc_op_lowering_rule(
   ncols = output_shape[1] // op.packing.value
 
   with mgpu_utils.when(ctx.single_warp_per_block_predicate):
-    tcgen05.tmem_alloc(op.smem_ptr, ncols, op.collective, op.exact)
+    tcgen05.tmem_alloc(op.smem_ptr, ncols, op.collective, exact=False)
   gpu.barrier()
   tmem_addr = memref.load(op.smem_ptr, [])
 
   cast_op = builtin.UnrealizedConversionCastOp(
       [op.result.type], [tmem_addr]
   )
   cast_op.attributes["collective"] = op.collective
-  cast_op.attributes["exact"] = op.exact
   cast_op.attributes["packing"] = op.packing
 
   return [cast_op.result]
@@ -1552,14 +1551,13 @@ def _tmem_dealloc_op_lowering_rule(
   i32 = ir.IntegerType.get_signless(32)
   conversion_cast, [tmem_addr] = _undo_conversion_cast(op.tmem_ref, [i32])
   collective = ir.BoolAttr(conversion_cast.attributes["collective"]).value
-  exact = ir.BoolAttr(conversion_cast.attributes["exact"]).value
   packing = ir.IntegerAttr(conversion_cast.attributes["packing"]).value
 
   output_shape = ir.MemRefType(op.tmem_ref.type).shape
   ncols = output_shape[1] // packing
 
   with mgpu_utils.when(ctx.single_warp_per_block_predicate):
-    tcgen05.tmem_dealloc(tmem_addr, ncols, collective, exact)
+    tcgen05.tmem_dealloc(tmem_addr, ncols, collective, exact=False)
 
   return []
 
diff --git a/tests/mosaic/gpu_test.py b/tests/mosaic/gpu_test.py
@@ -4466,15 +4466,13 @@ def setUp(self):
       self.skipTest("Only works on GPU with capability sm_100a or sm_101a")
 
   @parameterized.named_parameters(
-      ("exact", (128, 64), jnp.bfloat16, 1, True, False, 64),
-      ("non-exact", (128, 77), jnp.bfloat16, 1, False, False, 128),
-      ("exact-packed", (128, 128), jnp.bfloat16, 2, True, False, 64),
-      ("non-exact-packed", (128, 120), jnp.bfloat16, 2, False, False, 64),
-      ("collective-exact", (128, 64), jnp.bfloat16, 1, True, True, 64),
+      ("unpacked", (128, 77), jnp.bfloat16, 1, False, 128),
+      ("packed", (128, 128), jnp.bfloat16, 2, False, 64),
+      ("collective", (128, 64), jnp.bfloat16, 1, True, 64),
   )
   @unittest.skip("Layout inference fails for trivial load/store kernels.")
   def test_tmem_alloc_dealloc(
-      self, shape, dtype, packing, exact, collective, expected_allocated_columns
+      self, shape, dtype, packing, collective, expected_allocated_columns
   ):
     tmem_type = ir.MemRefType.get(
         shape,
@@ -4493,7 +4491,7 @@ def body(
           result=tmem_type,
           smem_ptr=tmem,
           collective=collective,
-          exact=exact,
+          exact=False,
           packing=packing,
       )