Skip to content

Commit 1b283e4

Browse files
wizardengineerfrabert
authored andcommitted
[CIR] Added support for psrldqi
1 parent 3843050 commit 1b283e4

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,47 @@ static mlir::Value emitX86PSLLDQIByteShift(CIRGenFunction &cgf,
196196
return builder.createBitcast(shuffleResult, resultType);
197197
}
198198

199+
static mlir::Value emitX86PSRLDQIByteShift(CIRGenFunction &cgf,
200+
const CallExpr *E,
201+
ArrayRef<mlir::Value> Ops) {
202+
auto &builder = cgf.getBuilder();
203+
auto resultType = cast<cir::VectorType>(Ops[0].getType());
204+
auto loc = cgf.getLoc(E->getExprLoc());
205+
unsigned shiftVal = getIntValueFromConstOp(Ops[1]) & 0xff;
206+
207+
// If psrldq is shifting the vector more than 15 bytes, emit zero.
208+
if (shiftVal >= 16)
209+
return builder.getZero(loc, resultType);
210+
211+
auto numElts = resultType.getSize() * 8;
212+
assert(numElts % 16 == 0 && "Expected a multiple of 16");
213+
214+
llvm::SmallVector<int64_t, 64> indices;
215+
216+
// This correlates to the OG CodeGen
217+
// As stated in the OG, 256/512-bit psrldq operates on 128-bit lanes.
218+
// So we have to make sure we handle it.
219+
for (unsigned l = 0; l < numElts; l += 16) {
220+
for (unsigned i = 0; i < 16; ++i) {
221+
unsigned idx = i + shiftVal;
222+
if (idx >= 16)
223+
idx += numElts - 16;
224+
indices.push_back(idx + l);
225+
}
226+
}
227+
228+
auto byteVecTy = cir::VectorType::get(builder.getSInt8Ty(), numElts);
229+
mlir::Value byteCast = builder.createBitcast(Ops[0], byteVecTy);
230+
mlir::Value zero = builder.getZero(loc, byteVecTy);
231+
232+
// Perform the shuffle (right shift by inserting zeros from the left)
233+
mlir::Value shuffleResult =
234+
builder.createVecShuffle(loc, byteCast, zero, indices);
235+
236+
// Cast back to original type
237+
return builder.createBitcast(shuffleResult, resultType);
238+
}
239+
199240
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
200241
const CallExpr *E) {
201242
if (BuiltinID == Builtin::BI__builtin_cpu_is)
@@ -1161,7 +1202,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
11611202
case X86::BI__builtin_ia32_psrldqi128_byteshift:
11621203
case X86::BI__builtin_ia32_psrldqi256_byteshift:
11631204
case X86::BI__builtin_ia32_psrldqi512_byteshift:
1164-
llvm_unreachable("psrldqi NYI");
1205+
emitX86PSRLDQIByteShift(*this, E, Ops);
11651206
case X86::BI__builtin_ia32_kshiftliqi:
11661207
case X86::BI__builtin_ia32_kshiftlihi:
11671208
case X86::BI__builtin_ia32_kshiftlisi:

llvm/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,5 @@ docs/_build
6666
.sw?
6767
#OS X specific files.
6868
.DS_store
69+
CMakePresets.json
70+
out/

0 commit comments

Comments
 (0)