[AMDGPU] Check legality of both operands before swap (#148843)

ptrojahn · paultrojahnamd · web-flow · commit 70e1a3ceada9 · 2025-07-15T15:55:26.000-04:00
When trying to fold an SGPR into the second operand to a DPP add, si-fold-operands correctly determines that this is not possible and attempts to swap the second and third operand. This succeeds even if the third operand is an SGPR, creating an illegal dpp add with two SGPR operands. We need to check both operands if they are legal in their new position. This causes a crash at compile time for a test in triton on gfx12: https://github.com/triton-lang/triton/blob/345c633787e90a7f94864de3035346eb5de1781f/python/test/unit/language/test_core.py#L2718 Co-authored-by: Paul Trojahn <paul.trojahn@amd.com>
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2813,12 +2813,14 @@ bool SIInstrInfo::isLegalToSwap(const MachineInstr &MI, unsigned OpIdx0,
   if ((int)OpIdx1 != Src0Idx && MO0->isReg()) {
     if (!DefinedRC1)
       return OpInfo1.OperandType == MCOI::OPERAND_UNKNOWN;
-    return isLegalRegOperand(MI, OpIdx1, *MO0);
+    return isLegalRegOperand(MI, OpIdx1, *MO0) &&
+           (!MO1->isReg() || isLegalRegOperand(MI, OpIdx0, *MO1));
   }
   if ((int)OpIdx0 != Src0Idx && MO1->isReg()) {
     if (!DefinedRC0)
       return OpInfo0.OperandType == MCOI::OPERAND_UNKNOWN;
-    return isLegalRegOperand(MI, OpIdx0, *MO1);
+    return (!MO0->isReg() || isLegalRegOperand(MI, OpIdx1, *MO0)) &&
+           isLegalRegOperand(MI, OpIdx0, *MO1);
   }
 
   // No need to check 64-bit literals since swapping does not bring new
diff --git a/llvm/test/CodeGen/AMDGPU/fold-commute-sgpr.mir b/llvm/test/CodeGen/AMDGPU/fold-commute-sgpr.mir
@@ -0,0 +1,24 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: fold_commute_sgprs
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; CHECK-LABEL: name: fold_commute_sgprs
+    ; CHECK: liveins: $sgpr0, $sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; CHECK-NEXT: [[V_ADD_NC_U16_fake16_e64_dpp:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp [[COPY1]], 0, [[COPY2]], 0, [[COPY3]], 0, 0, 280, 15, 15, 1, implicit $exec
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = IMPLICIT_DEF
+    %2:vgpr_32 = COPY %1:sreg_32
+    %3:vgpr_32 = COPY %0:sreg_32
+    %4:sreg_32 = COPY $sgpr1
+    %5:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %2:vgpr_32, 0, %3:vgpr_32, 0, %4:sreg_32, 0, 0, 280, 15, 15, 1, implicit $exec
+...