Skip to content

Commit 9e87804

Browse files
davidjwooigcbot
authored andcommitted
Add helper lane mode to wave intrinsics
Added helper lane mode argument to wave intrinsics. Much like with GenISA_WaveShuffleIndex, this argument denotes that helper lanes should be active for this instruction when its value is 1.
1 parent 3c9d129 commit 9e87804

File tree

10 files changed

+103
-47
lines changed

10 files changed

+103
-47
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20657,6 +20657,7 @@ void EmitPass::emitWavePrefix(WavePrefixIntrinsic* I)
2065720657
if (CI->isAllOnesValue())
2065820658
Mask = nullptr;
2065920659
}
20660+
m_encoder->SetSubSpanDestination(false);
2066020661
emitScan(
2066120662
I->getSrc(), I->getOpKind(), I->isInclusiveScan(), Mask, false);
2066220663
}
@@ -20707,6 +20708,7 @@ void EmitPass::emitWaveAll(llvm::GenIntrinsicInst* inst)
2070720708
uint64_t identity = 0;
2070820709
GetReductionOp(op, inst->getOperand(0)->getType(), identity, opCode, type);
2070920710
CVariable* dst = m_destination;
20711+
m_encoder->SetSubSpanDestination(false);
2071020712
emitReductionAll(opCode, identity, type, false, src, dst);
2071120713
}
2071220714

@@ -20720,6 +20722,7 @@ void EmitPass::emitWaveClustered(llvm::GenIntrinsicInst* inst)
2072020722
uint64_t identity = 0;
2072120723
GetReductionOp(op, inst->getOperand(0)->getType(), identity, opCode, type);
2072220724
CVariable *dst = m_destination;
20725+
m_encoder->SetSubSpanDestination(false);
2072320726
emitReductionClustered(opCode, identity, type, false, clusterSize, src, dst);
2072420727
}
2072520728

IGC/Compiler/CISACodeGen/PatternMatchPass.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,13 @@ namespace IGC
11221122
MatchShuffleBroadCast(*GII) ||
11231123
MatchWaveShuffleIndex(*GII);
11241124
break;
1125+
case GenISAIntrinsic::GenISA_WaveBallot:
1126+
case GenISAIntrinsic::GenISA_WaveInverseBallot:
1127+
case GenISAIntrinsic::GenISA_WaveAll:
1128+
case GenISAIntrinsic::GenISA_WaveClustered:
1129+
case GenISAIntrinsic::GenISA_WavePrefix:
1130+
match = MatchWaveInstruction(*GII);
1131+
break;
11251132
case GenISAIntrinsic::GenISA_simdBlockRead:
11261133
case GenISAIntrinsic::GenISA_simdBlockWrite:
11271134
match = MatchBlockReadWritePointer(*GII) ||
@@ -4944,6 +4951,34 @@ namespace IGC
49444951
return MatchSingleInstruction(I);
49454952
}
49464953

4954+
bool CodeGenPatternMatch::MatchWaveInstruction(llvm::GenIntrinsicInst& I)
4955+
{
4956+
unsigned int helperLaneIndex = 0;
4957+
switch (I.getIntrinsicID())
4958+
{
4959+
case GenISAIntrinsic::GenISA_WaveAll:
4960+
helperLaneIndex = 2;
4961+
break;
4962+
case GenISAIntrinsic::GenISA_WaveBallot:
4963+
case GenISAIntrinsic::GenISA_WaveInverseBallot:
4964+
helperLaneIndex = 1;
4965+
break;
4966+
case GenISAIntrinsic::GenISA_WaveClustered:
4967+
case GenISAIntrinsic::GenISA_WavePrefix:
4968+
helperLaneIndex = 3;
4969+
break;
4970+
default:
4971+
IGC_ASSERT(false);
4972+
break;
4973+
}
4974+
llvm::Value* helperLaneMode = I.getArgOperand(helperLaneIndex);
4975+
if (int_cast<int>(cast<ConstantInt>(helperLaneMode)->getSExtValue()) == 1)
4976+
{
4977+
m_NeedVMask = true;
4978+
}
4979+
return MatchSingleInstruction(I);
4980+
}
4981+
49474982
bool CodeGenPatternMatch::MatchRegisterRegion(llvm::GenIntrinsicInst& I)
49484983
{
49494984
struct MatchRegionPattern : public Pattern

IGC/Compiler/CISACodeGen/PatternMatchPass.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ namespace IGC
188188
bool MatchBranch(llvm::BranchInst& I);
189189
bool MatchShuffleBroadCast(llvm::GenIntrinsicInst& I);
190190
bool MatchWaveShuffleIndex(llvm::GenIntrinsicInst& I);
191+
bool MatchWaveInstruction(llvm::GenIntrinsicInst& I);
191192
bool MatchRegisterRegion(llvm::GenIntrinsicInst& I);
192193

193194
Pattern* Match(llvm::Instruction& inst);

IGC/Compiler/CISACodeGen/PixelShaderLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1098,7 +1098,7 @@ bool PixelShaderLowering::optBlendState(
10981098
// ifany(src.a != 1.0) ? RTIndex : RTIndex + 4
10991099
Constant* f1 = ConstantFP::get(colorOut.color[3]->getType(), 1.0);
11001100
Value* ane1 = irb.CreateFCmpUNE(colorOut.color[3], f1);
1101-
Value* ane1_ballot = irb.CreateCall(fBallot, { ane1 });
1101+
Value* ane1_ballot = irb.CreateCall(fBallot, { ane1, irb.getInt32(0) });
11021102
Value* any = irb.CreateICmpNE(ane1_ballot, irb.getInt32(0));
11031103
colorOut.blendStateIndex = irb.CreateSelect(any,
11041104
irb.getInt32(colorOut.RTindex),

IGC/Compiler/CISACodeGen/PromoteInt8Type.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,28 +1141,32 @@ void PromoteInt8Type::promoteIntrinsic()
11411141
iArgs.push_back(GII->getArgOperand(2));
11421142
break;
11431143
}
1144-
case GenISAIntrinsic::GenISA_WaveAll:
1144+
case GenISAIntrinsic::GenISA_WaveClustered:
11451145
{
1146-
// prototype Ty <waveall> (Ty, char)
1146+
// prototype:
1147+
// Ty <clustered> (Ty, char, int, int)
11471148
iArgs.push_back(GII->getArgOperand(1));
1149+
iArgs.push_back(GII->getArgOperand(2));
1150+
iArgs.push_back(GII->getArgOperand(3));
11481151
break;
11491152
}
11501153
case GenISAIntrinsic::GenISA_WavePrefix:
11511154
{
1152-
// prototype: Ty <waveprefix> (Ty, char, bool, bool)
1155+
// prototype: Ty <waveprefix> (Ty, char, bool, bool, int)
11531156
iArgs.push_back(GII->getArgOperand(1));
11541157
iArgs.push_back(GII->getArgOperand(2));
11551158
iArgs.push_back(GII->getArgOperand(3));
1159+
iArgs.push_back(GII->getArgOperand(4));
11561160
break;
11571161
}
11581162
case GenISAIntrinsic::GenISA_QuadPrefix:
11591163
case GenISAIntrinsic::GenISA_WaveShuffleIndex:
1160-
case GenISAIntrinsic::GenISA_WaveClustered:
1164+
case GenISAIntrinsic::GenISA_WaveAll:
11611165
{
11621166
// prototype:
11631167
// Ty <quadprefix> (Ty, char, bool)
11641168
// Ty <shuffleIndex> (Ty, int, int)
1165-
// Ty <clustered> (Ty, char, int)
1169+
// Ty <waveall> (Ty, char, int)
11661170
iArgs.push_back(GII->getArgOperand(1));
11671171
iArgs.push_back(GII->getArgOperand(2));
11681172
break;

IGC/Compiler/Optimizer/OCLBIUtils.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,7 @@ class CWaveBallotIntrinsic : public CCommand
10051005
}
10061006

10071007
m_args.push_back(truncInst);
1008+
m_args.push_back(IRB.getInt32(0));
10081009
replaceGenISACallInst(isaId);
10091010
}
10101011
};

IGC/Compiler/Optimizer/OpenCLPasses/SubGroupFuncs/SubGroupFuncsResolution.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -506,15 +506,15 @@ void SubGroupFuncsResolution::subGroupArithmetic(CallInst& CI, WaveOps op, Group
506506
Value* waveCall = nullptr;
507507
if (groupType == GroupOperationReduce)
508508
{
509-
Value* args[2] = { arg, opVal };
509+
Value* args[3] = { arg, opVal, IRB.getInt32(0) };
510510
Function* waveAll = GenISAIntrinsic::getDeclaration(CI.getCalledFunction()->getParent(),
511511
GenISAIntrinsic::GenISA_WaveAll,
512512
arg->getType());
513513
waveCall = IRB.CreateCall(waveAll, args);
514514
}
515515
else if (groupType == GroupOperationScan)
516516
{
517-
Value* args[4] = { arg, opVal, IRB.getInt1(false), IRB.getInt1(true) };
517+
Value* args[5] = { arg, opVal, IRB.getInt1(false), IRB.getInt1(true), IRB.getInt32(0) };
518518
Function* waveScan = GenISAIntrinsic::getDeclaration(CI.getCalledFunction()->getParent(),
519519
GenISAIntrinsic::GenISA_WavePrefix,
520520
arg->getType());
@@ -523,7 +523,7 @@ void SubGroupFuncsResolution::subGroupArithmetic(CallInst& CI, WaveOps op, Group
523523
else if (groupType == GroupOperationClusteredReduce)
524524
{
525525
Value* clusterSize = CI.getOperand(1);
526-
Value* args[3] = { arg, opVal, clusterSize };
526+
Value* args[4] = { arg, opVal, clusterSize, IRB.getInt32(0) };
527527
Function* waveClustered = GenISAIntrinsic::getDeclaration(CI.getCalledFunction()->getParent(),
528528
GenISAIntrinsic::GenISA_WaveClustered,
529529
arg->getType());

IGC/GenISAIntrinsics/Intrinsic_definitions.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -651,13 +651,15 @@
651651
"GenISA_WaveAll": ["Accumulate all the active lanes",
652652
[("anyint", ""),
653653
[(0, "value"),
654-
("char", "IGC:WaveOps")],
654+
("char", "IGC:WaveOps"),
655+
("int", "helperLaneMode : 0: not used; 1: used.")],
655656
"Convergent,InaccessibleMemOnly"]],
656657
####################################################################################################
657658
"GenISA_WaveBallot": ["All lanes get the same value",
658659
[("int", "return a bitfield with 1 for active lane with input true, "+\
659660
"0 for the rest."),
660-
[("bool", "Bool b")],
661+
[("bool", "Bool b"),
662+
("int", "helperLaneMode : 0: not used; 1: used.")],
661663
"Convergent,InaccessibleMemOnly"]],
662664
####################################################################################################
663665
"GenISA_WaveClustered": ["Accumulate all active lanes within consecutive input clusters and "+\
@@ -669,20 +671,23 @@
669671
"n-th output cluster"),
670672
[(0, "value"),
671673
("char", "IGC::WaveOps"),
672-
("int", "size - must be a compile time constant and assumed > 1")],
674+
("int", "size - must be a compile time constant and assumed > 1"),
675+
("int", "helperLaneMode : 0: not used; 1: used.")],
673676
"Convergent,InaccessibleMemOnly"]],
674677
####################################################################################################
675678
"GenISA_WaveInverseBallot": ["",
676679
[("bool", "return value of n-th bit from the input bitfield"),
677-
[("int", "bitfield")],
680+
[("int", "bitfield"),
681+
("int", "helperLaneMode : 0: not used; 1: used.")],
678682
"Convergent,InaccessibleMemOnly"]],
679683
####################################################################################################
680684
"GenISA_WavePrefix": ["Accumulate and keep the intermediate results in each lane",
681685
[("anyint", "result"),
682686
[(0, "value"),
683687
("char", "IGC::WaveOps"),
684688
("bool", "type - either exclusive(0) or inclusive(1) operation"),
685-
("bool", "mask - subset of lanes to participate in the computation.")],
689+
("bool", "mask - subset of lanes to participate in the computation."),
690+
("int", "helperLaneMode : 0: not used; 1: used.")],
686691
"Convergent,InaccessibleMemOnly"]],
687692
####################################################################################################
688693
"GenISA_WaveShuffleIndex": ["Read from a specific lane",

IGC/LLVM3DBuilder/BuiltinsFrontend.hpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -761,30 +761,32 @@ class LLVM3DBuilder : public llvm::IGCIRBuilder<T, InserterTyDef()>
761761
llvm::Value* create_discard(llvm::Value* condition);
762762
llvm::Value* create_runtime(llvm::Value* offset);
763763
llvm::CallInst* create_countbits(llvm::Value* src);
764-
llvm::Value* create_waveBallot(llvm::Value* src);
765-
llvm::Value* create_waveInverseBallot(llvm::Value* src);
764+
llvm::Value* create_waveBallot(llvm::Value* src, llvm::Value* helperLaneMode = nullptr);
765+
llvm::Value* create_waveInverseBallot(llvm::Value* src, llvm::Value* helperLaneMode = nullptr);
766766
llvm::Value* create_waveshuffleIndex(llvm::Value* src, llvm::Value* index, llvm::Value* helperLaneMode = nullptr);
767-
llvm::Value* create_waveAll(llvm::Value* src, llvm::Value* type);
767+
llvm::Value* create_waveAll(llvm::Value* src, llvm::Value* type, llvm::Value* helperLaneMode = nullptr);
768768
llvm::Value* create_wavePrefix(
769769
llvm::Value* src, llvm::Value* type, bool inclusive,
770-
llvm::Value *Mask = nullptr);
770+
llvm::Value *Mask = nullptr, llvm::Value* helperLaneMode = nullptr);
771771
llvm::Value* create_wavePrefixBitCount(
772-
llvm::Value* src, llvm::Value *Mask = nullptr);
773-
llvm::Value* create_waveMatch(llvm::Instruction *inst, llvm::Value *src);
772+
llvm::Value* src, llvm::Value *Mask = nullptr, llvm::Value* helperLaneMode = nullptr);
773+
llvm::Value* create_waveMatch(llvm::Instruction *inst, llvm::Value *src, llvm::Value* helperLaneMode = nullptr);
774774
llvm::Value* create_waveMultiPrefix(
775775
llvm::Instruction *I,
776776
llvm::Value *Val,
777777
llvm::Value *Mask,
778-
IGC::WaveOps OpKind);
778+
IGC::WaveOps OpKind,
779+
llvm::Value* helperLaneMode = nullptr);
779780
llvm::Value* create_waveMultiPrefixBitCount(
780781
llvm::Instruction *I,
781782
llvm::Value *Val,
782-
llvm::Value *Mask);
783+
llvm::Value *Mask,
784+
llvm::Value* helperLaneMode = nullptr);
783785
llvm::Value* create_quadPrefix(llvm::Value* src, llvm::Value* type, bool inclusive = false);
784786
llvm::Value* get32BitLaneID();
785787
llvm::Value* getSimdSize();
786-
llvm::Value* getFirstLaneID();
787-
llvm::Value* readFirstLane(llvm::Value* src);
788+
llvm::Value* getFirstLaneID(llvm::Value* helperLaneMode = nullptr);
789+
llvm::Value* readFirstLane(llvm::Value* src, llvm::Value* helperLaneMode = nullptr);
788790

789791
void VectorToScalars(
790792
llvm::Value* vector,

0 commit comments

Comments
 (0)