Skip to content

Commit 941ba38

Browse files
YuriPlyakhinigcbot
authored andcommitted
Fix predicated store sub-DW value handling
This change addresses the handling of predicated stores for sub-DW values with non-uniform stored values. Predicate alone is not enough to calculate the correct offset. So, we use `EMASK & Predicate` to determine the correct offset.
1 parent f68235f commit 941ba38

File tree

3 files changed

+14
-5
lines changed

3 files changed

+14
-5
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12368,12 +12368,17 @@ CVariable *EmitPass::UniformCopy(CVariable *var, bool doSub) {
1236812368

1236912369
/// Uniform copy allowing to reuse the off calculated by a previous call
1237012370
/// This allow avoiding redundant code
12371-
CVariable *EmitPass::UniformCopy(CVariable *var, CVariable *&off, CVariable *eMask, bool doSub, bool safeGuard) {
12371+
CVariable *EmitPass::UniformCopy(CVariable *var, CVariable *&off, CVariable *eMask, bool doSub, bool safeGuard,
12372+
CVariable *predicate) {
1237212373
IGC_ASSERT_MESSAGE(!var->IsUniform(), "Expect non-uniform source!");
1237312374

1237412375
if (eMask == nullptr) {
1237512376
eMask = GetExecutionMask();
1237612377
}
12378+
if (predicate != nullptr) {
12379+
m_encoder->And(eMask, eMask, predicate);
12380+
m_encoder->Push();
12381+
}
1237712382
if (off == nullptr) {
1237812383
// Get offset to any 1s. For simplicity, use 'fbl' to find the lowest 1s.
1237912384
off = m_currShader->GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, CName::NONE);
@@ -17762,8 +17767,8 @@ void EmitPass::emitLSCVectorStore_subDW(LSC_CACHE_OPTS CacheOpts, bool UseA32, R
1776217767
if (!srcUniform) {
1776317768
if (predicate) {
1776417769
CVariable *offset = nullptr;
17765-
CVariable *eMask = CastFlagToVariable(predicateVar);
17766-
stVar = UniformCopy(stVar, offset, eMask);
17770+
CVariable *pMask = CastFlagToVariable(predicateVar);
17771+
stVar = UniformCopy(stVar, offset, nullptr, false, false, pMask);
1776717772
} else {
1776817773
stVar = UniformCopy(stVar);
1776917774
}

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ class EmitPass : public llvm::FunctionPass {
653653
CVariable *GetHalfExecutionMask();
654654
CVariable *UniformCopy(CVariable *var, bool doSub = false);
655655
CVariable *UniformCopy(CVariable *var, CVariable *&LaneOffset, CVariable *eMask = nullptr, bool doSub = false,
656-
bool safeGuard = false);
656+
bool safeGuard = false, CVariable *predicate = nullptr);
657657

658658
// generate loop header to process sample instruction with varying resource/sampler
659659
bool ResourceLoopHeader(const CVariable *destination, ResourceDescriptor &resource, SamplerDescriptor &sampler,

IGC/Compiler/tests/EmitVISAPass/predicated-store-subdw.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@
2424
; CHECK: cmp.eq (M1, 32) [[F_LOC_IDX_MATCH:.*]] [[LOCAL_IDX]](0,0)<1;1,0> loc_idx(0,0)<0;1,0>
2525
; CHECK: and (M1, 32) [[F_LOC_IDX_MATCH]] [[F_LOC_IDX_MATCH]] [[F_LOC_IDYZ]]
2626
; CHECK: mov (M1_NM, 1) [[FVAR:.*]](0,0)<1> [[F_LOC_IDX_MATCH]]
27-
; CHECK: fbl (M1_NM, 1) [[OFFSET_ALIAS]](0,0)<1> [[FVAR]](0,0)<0;1,0>
27+
; CHECK: setp (M1_NM, 32) [[F_EMASK:.*]] 0x0:ud
28+
; CHECK: cmp.eq (M1, 32) [[F_EMASK]] [[DUMMY:.*]](0,0)<0;1,0> [[DUMMY]](0,0)<0;1,0>
29+
; CHECK: mov (M1_NM, 1) [[VAR_EMASK:.*]](0,0)<1> [[F_EMASK]]
30+
; CHECK: and (M1_NM, 1) [[VAR_EMASK]](0,0)<1> [[VAR_EMASK]](0,0)<0;1,0> [[FVAR]](0,0)<0;1,0>
31+
; CHECK: fbl (M1_NM, 1) [[OFFSET_ALIAS]](0,0)<1> [[VAR_EMASK]](0,0)<0;1,0>
2832
; CHECK: shl (M1_NM, 1) [[OFFSET_ALIAS]](0,0)<1> [[OFFSET_ALIAS]](0,0)<0;1,0> 0x1:w
2933
; CHECK: addr_add (M1_NM, 1) [[ADDR:.*]](0)<1> &{{.*}} [[OFFSET]](0,0)<0;1,0>
3034
; CHECK: mov (M1_NM, 1) [[DATA_ALIAS]](0,0)<1> r[[[ADDR]](0),0]<0;1,0>:hf

0 commit comments

Comments
 (0)