ROCm
diff --git a/‎example/01_gemm/common.hpp‎
Lines changed: 2 additions & 2 deletions b/‎example/01_gemm/common.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎example/01_gemm/gemm_xdl_lds_direct_load_fp32_tf32.cpp‎
Lines changed: 1 addition & 1 deletion b/‎example/01_gemm/gemm_xdl_lds_direct_load_fp32_tf32.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎example/09_convnd_fwd/convnd_fwd_common.hpp‎
Lines changed: 2 additions & 2 deletions b/‎example/09_convnd_fwd/convnd_fwd_common.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎example/09_convnd_fwd/convnd_fwd_xdl_fp32_tf32.cpp‎
Lines changed: 1 addition & 1 deletion b/‎example/09_convnd_fwd/convnd_fwd_xdl_fp32_tf32.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp‎
Lines changed: 5 additions & 6 deletions b/‎include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp‎
Lines changed: 17 additions & 2 deletions b/‎include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp‎
Lines changed: 2 additions & 2 deletions b/‎include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp‎
Lines changed: 2 additions & 2 deletions b/‎include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp‎
Lines changed: 5 additions & 5 deletions b/‎include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎include/ck/utility/amd_xdlops.hpp‎
Lines changed: 1 addition & 1 deletion b/‎include/ck/utility/amd_xdlops.hpp‎
Lines changed: 1 addition & 1 deletion
@@ -313,7 +313,7 @@ bool parse_cmd_args<ProblemSizeSplitK>(int argc,
 template <typename DataType, typename GemmType = DataType>
 inline __host__ __device__ constexpr double get_rtol()
 {
-    if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::xf32_t>)
+    if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::tf32_t>)
     {
         return 1e-3;
     }
@@ -358,7 +358,7 @@ inline __host__ __device__ constexpr double get_rtol()
 template <typename DataType, typename GemmType = DataType>
 inline __host__ __device__ constexpr double get_atol()
 {
-    if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::xf32_t>)
+    if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::tf32_t>)
     {
         return 1e-3;
     }
 
@@ -21,7 +21,7 @@ using BDataType        = F32;
 using AccDataType      = F32;
 using CShuffleDataType = F32;
 using CDataType        = F32;
-using GemmDataType     = ck::xf32_t;
+using GemmDataType     = ck::tf32_t;
 
 using ALayout = Row;
 using BLayout = Col;
 
@@ -30,7 +30,7 @@ void print_helper_msg()
 template <typename DataType, typename GemmType = DataType>
 inline __host__ __device__ constexpr double get_rtol()
 {
-    if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::xf32_t>)
+    if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::tf32_t>)
     {
         return 1e-3;
     }
@@ -75,7 +75,7 @@ inline __host__ __device__ constexpr double get_rtol()
 template <typename DataType, typename GemmType = DataType>
 inline __host__ __device__ constexpr double get_atol()
 {
-    if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::xf32_t>)
+    if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::tf32_t>)
     {
         return 1e-3;
     }
 
@@ -14,7 +14,7 @@ using WeiDataType      = float;
 using AccDataType      = float;
 using CShuffleDataType = float;
 using OutDataType      = float;
-using GemmDataType     = ck::xf32_t;
+using GemmDataType     = ck::tf32_t;
 
 template <ck::index_t... Is>
 using S = ck::Sequence<Is...>;
 
@@ -49,8 +49,8 @@ struct BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
 
     using ThisThreadBlock = ThisThreadBlock<BlockSize>;
 
-    using ComputeTypeA  = conditional_t<is_same_v<ComputeTypeA_, ck::xf32_t>, float, ComputeTypeA_>;
-    using ComputeTypeB  = conditional_t<is_same_v<ComputeTypeB_, ck::xf32_t>, float, ComputeTypeB_>;
+    using ComputeTypeA  = conditional_t<is_same_v<ComputeTypeA_, ck::tf32_t>, float, ComputeTypeA_>;
+    using ComputeTypeB  = conditional_t<is_same_v<ComputeTypeB_, ck::tf32_t>, float, ComputeTypeB_>;
     using GemmDataTypeA = ComputeTypeA_;
     using GemmDataTypeB = ComputeTypeB_;
 
@@ -177,11 +177,10 @@ struct BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
 
         static_assert(MPerBlock % (MPerXDL * MRepeat) == 0 && NPerBlock % (NPerXDL * NRepeat) == 0,
                       "wrong!");
-        if constexpr(is_same_v<ComputeTypeA, ck::xf32_t> || is_same_v<ComputeTypeB, ck::xf32_t>)
+        if constexpr(is_same_v<ComputeTypeA, ck::tf32_t> || is_same_v<ComputeTypeB, ck::tf32_t>)
         {
-            static_assert(
-                is_same_v<ComputeTypeA_, ComputeTypeA_>,
-                "ComputeTypeA and ComputeTypeB must be both xf32_t when one of them is xf32_t");
+            static_assert(is_same_v<ComputeTypeA_, ComputeTypeA_>,
+                          "ComputeTypeA and ComputeTypeB must be same when one of them is tf32");
         }
     }
 
 
@@ -1043,8 +1043,6 @@ struct DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle
 
         float RunGemm(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            ::std::cout << __FILE__ << ":" << __LINE__
-                        << " DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle" << std::endl;
             if(stream_config.log_level_ > 0)
             {
                 arg.Print();
@@ -1657,6 +1655,23 @@ struct DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle
                                                              arg.block_2_etile_map_);
             }
         }
+        if constexpr(is_same_v<AComputeDataType, ck::tf32_t> ||
+                     is_same_v<BComputeDataType, ck::tf32_t>)
+
+        {
+            if(!(ck::get_device_name() == "gfx942"))
+            {
+                std::cout << "TF32 is enabled on gfx942 only" << std::endl;
+                return false;
+            }
+            if constexpr(!is_same_v<AComputeDataType, BComputeDataType>)
+            {
+                std::cout << "ComputeDataType for A and B should be same while using TF32"
+                          << std::endl;
+                return false;
+            }
+        }
+        return true;
     }
 
     bool IsSupportedArgument(const BaseArgument* p_arg) override
 
@@ -108,9 +108,9 @@ struct GridwiseGemmMultipleD_xdl_cshuffle
         conditional_t<is_same_v<BComputeDataType_, ck::half_t>, ck::bhalf_t, BComputeDataType_>;
 #else
     using AComputeDataType =
-        conditional_t<is_same_v<AComputeDataType_, ck::xf32_t>, float, AComputeDataType_>;
+        conditional_t<is_same_v<AComputeDataType_, ck::tf32_t>, float, AComputeDataType_>;
     using BComputeDataType =
-        conditional_t<is_same_v<BComputeDataType_, ck::xf32_t>, float, BComputeDataType_>;
+        conditional_t<is_same_v<BComputeDataType_, ck::tf32_t>, float, BComputeDataType_>;
     using GemmDataTypeA = AComputeDataType_;
     using GemmDataTypeB = BComputeDataType_;
 #endif
 
@@ -169,9 +169,9 @@ struct GridwiseGemmMultipleD_Xdl_CShuffle_LdsDirectLoad
         conditional_t<is_same_v<AComputeDataType_, ck::half_t>, ck::bhalf_t, AComputeDataType_>;
 #else
     using AComputeDataType =
-        conditional_t<is_same_v<AComputeDataType_, ck::xf32_t>, float, AComputeDataType_>;
+        conditional_t<is_same_v<AComputeDataType_, ck::tf32_t>, float, AComputeDataType_>;
     using BComputeDataType =
-        conditional_t<is_same_v<BComputeDataType_, ck::xf32_t>, float, BComputeDataType_>;
+        conditional_t<is_same_v<BComputeDataType_, ck::tf32_t>, float, BComputeDataType_>;
     using GemmDataTypeA = AComputeDataType_;
     using GemmDataTypeB = BComputeDataType_;
 #endif
 
@@ -78,7 +78,7 @@ enum struct MfmaInstr
     mfma_f32_16x16x128f8f6f4,
     mfma_scale_f32_32x32x64f8f6f4,
     mfma_scale_f32_16x16x128f8f6f4,
-    mfma_f32_16x16x8xf32, // xf32
+    mfma_f32_16x16x8xf32, // tf32
     mfma_f32_32x32x4xf32,
     // gfx11
     wmma_f32_16x16x16_f16,
@@ -1273,13 +1273,13 @@ struct MfmaSelector
     }
 
     template <>
-    constexpr auto GetMfma<xf32_t, 32, 32>()
+    constexpr auto GetMfma<tf32_t, 32, 32>()
     {
         return MfmaInstr::mfma_f32_32x32x4xf32;
     }
 
     template <>
-    constexpr auto GetMfma<xf32_t, 16, 16>()
+    constexpr auto GetMfma<tf32_t, 16, 16>()
     {
         return MfmaInstr::mfma_f32_16x16x8xf32;
     }
@@ -1998,12 +1998,12 @@ struct XdlopsGemm
     {
         static_assert(
             is_same<base_type, double>::value || is_same<base_type, float>::value ||
-                is_same<base_type, xf32_t>::value || is_same<base_type, half_t>::value ||
+                is_same<base_type, tf32_t>::value || is_same<base_type, half_t>::value ||
                 is_same<base_type, bhalf_t>::value || is_same<base_type, int8_t>::value ||
                 is_same<base_type, f8_t>::value || is_same<base_type, bf8_t>::value ||
                 (is_same<base_type, f8_t>::value && is_same<additional_type, bf8_t>::value) ||
                 (is_same<base_type, bf8_t>::value && is_same<additional_type, f8_t>::value),
-            "base_type must be double, float, xf32_t, half, bfloat16, int8_t, f8_t or bf8_t!");
+            "base_type must be double, float, tf32_t, half, bfloat16, int8_t, f8_t or bf8_t!");
 
         static_for<0, KPack / mfma_instr.k_per_blk, 1>{}([&](auto k) {
             if constexpr(!TransposeC)
 
@@ -1636,7 +1636,7 @@ struct intrin_mfma_f32_16x16x32bf8f8<16, 16>
     }
 };
 
-/******************* xf32  *************************************/
+/******************* tf32  *************************************/
 template <index_t MPerWave, index_t NPerWave>
 struct intrin_mfma_f32_16x16x8xf32;
Original file line number	Diff line number	Diff line change
`@@ -313,7 +313,7 @@ bool parse_cmd_args<ProblemSizeSplitK>(int argc,`
`313`	`313`	`template <typename DataType, typename GemmType = DataType>`
`314`	`314`	`inline __host__ __device__ constexpr double get_rtol()`
`315`	`315`	`{`
`316`		`- if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::xf32_t>)`
	`316`	`+ if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::tf32_t>)`
`317`	`317`	`{`
`318`	`318`	`return 1e-3;`
`319`	`319`	`}`
`@@ -358,7 +358,7 @@ inline __host__ __device__ constexpr double get_rtol()`
`358`	`358`	`template <typename DataType, typename GemmType = DataType>`
`359`	`359`	`inline __host__ __device__ constexpr double get_atol()`
`360`	`360`	`{`
`361`		`- if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::xf32_t>)`
	`361`	`+ if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::tf32_t>)`
`362`	`362`	`{`
`363`	`363`	`return 1e-3;`
`364`	`364`	`}`
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@ void print_helper_msg()`
`30`	`30`	`template <typename DataType, typename GemmType = DataType>`
`31`	`31`	`inline __host__ __device__ constexpr double get_rtol()`
`32`	`32`	`{`
`33`		`- if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::xf32_t>)`
	`33`	`+ if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::tf32_t>)`
`34`	`34`	`{`
`35`	`35`	`return 1e-3;`
`36`	`36`	`}`
`@@ -75,7 +75,7 @@ inline __host__ __device__ constexpr double get_rtol()`
`75`	`75`	`template <typename DataType, typename GemmType = DataType>`
`76`	`76`	`inline __host__ __device__ constexpr double get_atol()`
`77`	`77`	`{`
`78`		`- if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::xf32_t>)`
	`78`	`+ if constexpr(std::is_same_v<DataType, float> && std::is_same_v<GemmType, ck::tf32_t>)`
`79`	`79`	`{`
`80`	`80`	`return 1e-3;`
`81`	`81`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1636,7 +1636,7 @@ struct intrin_mfma_f32_16x16x32bf8f8<16, 16>`
`1636`	`1636`	`}`
`1637`	`1637`	`};`
`1638`	`1638`
`1639`		`-/***************** xf32 ***********************************/`
	`1639`	`+/***************** tf32 ***********************************/`
`1640`	`1640`	`template <index_t MPerWave, index_t NPerWave>`
`1641`	`1641`	`struct intrin_mfma_f32_16x16x8xf32;`
`1642`	`1642`