diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4c5f938831243..8e985ea6af53f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2807,12 +2807,14 @@ bool SIInstrInfo::isLegalToSwap(const MachineInstr &MI, unsigned OpIdx0, if ((int)OpIdx1 != Src0Idx && MO0->isReg()) { if (!DefinedRC1) return OpInfo1.OperandType == MCOI::OPERAND_UNKNOWN; - return isLegalRegOperand(MI, OpIdx1, *MO0); + return isLegalRegOperand(MI, OpIdx1, *MO0) && + (!MO1->isReg() || isLegalRegOperand(MI, OpIdx0, *MO1)); } if ((int)OpIdx0 != Src0Idx && MO1->isReg()) { if (!DefinedRC0) return OpInfo0.OperandType == MCOI::OPERAND_UNKNOWN; - return isLegalRegOperand(MI, OpIdx0, *MO1); + return (!MO0->isReg() || isLegalRegOperand(MI, OpIdx1, *MO0)) && + isLegalRegOperand(MI, OpIdx0, *MO1); } // No need to check 64-bit literals since swapping does not bring new diff --git a/llvm/test/CodeGen/AMDGPU/fold-commute-sgpr.mir b/llvm/test/CodeGen/AMDGPU/fold-commute-sgpr.mir new file mode 100644 index 0000000000000..c6bc248f13388 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-commute-sgpr.mir @@ -0,0 +1,24 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: fold_commute_sgprs +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: fold_commute_sgprs + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; CHECK-NEXT: [[V_ADD_NC_U16_fake16_e64_dpp:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp [[COPY1]], 0, [[COPY2]], 0, [[COPY3]], 0, 0, 280, 15, 15, 1, implicit $exec + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = IMPLICIT_DEF + %2:vgpr_32 = COPY %1:sreg_32 + %3:vgpr_32 = COPY %0:sreg_32 + %4:sreg_32 = COPY $sgpr1 + %5:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %2:vgpr_32, 0, %3:vgpr_32, 0, %4:sreg_32, 0, 0, 280, 15, 15, 1, implicit $exec +...