Skip to content

Commit 7eb8fee

Browse files
wizardengineerfrabert
authored andcommitted
[CIR] Added fallback method for intrinsics
1 parent 1b283e4 commit 7eb8fee

File tree

3 files changed

+195
-7
lines changed

3 files changed

+195
-7
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 191 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,177 @@ static mlir::Value emitX86SExtMask(CIRGenFunction &cgf, mlir::Value op,
158158
return cgf.getBuilder().createCast(loc, cir::CastKind::integral, mask, dstTy);
159159
}
160160

161+
// Helper function to convert builtin names to LLVM intrinsic names
162+
std::string CIRGenFunction::convertBuiltinToIntrinsicName(llvm::StringRef builtinName) {
163+
// Remove "__builtin_ia32_" prefix
164+
llvm::StringRef baseName = builtinName.drop_front(15); // "__builtin_ia32_".size() == 15
165+
166+
// Simple mapping for common patterns
167+
// This can be extended as needed
168+
static llvm::StringMap<std::string> intrinsicMap = {
169+
// Load/Store operations
170+
{"loadups", "llvm.x86.sse.loadu.ps"},
171+
{"loaddqu", "llvm.x86.sse2.loadu.dq"},
172+
{"storeups", "llvm.x86.sse.storeu.ps"},
173+
{"storedqu", "llvm.x86.sse2.storeu.dq"},
174+
{"movntdqa", "llvm.x86.sse41.movntdqa"},
175+
{"movntdq", "llvm.x86.sse2.movnt.dq"},
176+
177+
// Arithmetic operations
178+
{"addps", "llvm.x86.sse.add.ps"},
179+
{"subps", "llvm.x86.sse.sub.ps"},
180+
{"mulps", "llvm.x86.sse.mul.ps"},
181+
{"divps", "llvm.x86.sse.div.ps"},
182+
183+
// Cast operations (these might not need intrinsics)
184+
{"castps_si128", "llvm.x86.sse.cast.ps.si128"},
185+
{"castsi128_ps", "llvm.x86.sse.cast.si128.ps"},
186+
187+
// Set/Zero operations
188+
{"setzero_ps", "llvm.x86.sse.setzero.ps"},
189+
{"setzero_si128", "llvm.x86.sse2.setzero.si128"},
190+
191+
// Unpack operations
192+
{"unpacklo_epi8", "llvm.x86.sse2.punpcklbw.128"},
193+
{"unpackhi_epi8", "llvm.x86.sse2.punpckhbw.128"},
194+
{"unpacklo_epi16", "llvm.x86.sse2.punpcklwd.128"},
195+
{"unpackhi_epi16", "llvm.x86.sse2.punpckhwd.128"},
196+
197+
// K-mask shift operations (AVX-512)
198+
{"kshiftliqi", "llvm.x86.avx512.kshiftl.b"},
199+
{"kshiftlihi", "llvm.x86.avx512.kshiftl.w"},
200+
{"kshiftlisi", "llvm.x86.avx512.kshiftl.d"},
201+
{"kshiftlidi", "llvm.x86.avx512.kshiftl.q"},
202+
{"kshiftriqi", "llvm.x86.avx512.kshiftr.b"},
203+
{"kshiftrihi", "llvm.x86.avx512.kshiftr.w"},
204+
{"kshiftrisi", "llvm.x86.avx512.kshiftr.d"},
205+
{"kshiftridi", "llvm.x86.avx512.kshiftr.q"},
206+
207+
// Pack operations
208+
{"packsswb128", "llvm.x86.sse2.packsswb.128"},
209+
{"packssdw128", "llvm.x86.sse2.packssdw.128"},
210+
{"packuswb128", "llvm.x86.sse2.packuswb.128"},
211+
212+
// Conversion operations
213+
{"cvtps2dq", "llvm.x86.sse2.cvtps2dq"},
214+
{"cvtdq2ps", "llvm.x86.sse2.cvtdq2ps"},
215+
{"cvtpd2dq", "llvm.x86.sse2.cvtpd2dq"},
216+
217+
// Shuffle operations
218+
{"shufps", "llvm.x86.sse.shuf.ps"},
219+
{"pshuflw", "llvm.x86.sse2.pshufl.w"},
220+
{"pshufhw", "llvm.x86.sse2.pshufh.w"},
221+
{"palignr128", "llvm.x86.ssse3.palign.r.128"},
222+
{"palignr256", "llvm.x86.avx2.palign.r"},
223+
{"permdi256", "llvm.x86.avx2.permd"},
224+
225+
// AES operations
226+
{"aesdec128", "llvm.x86.aesni.aesdec"},
227+
{"aesenc128", "llvm.x86.aesni.aesenc"},
228+
229+
// Shift operations
230+
{"pslldqi128_byteshift", "llvm.x86.sse2.psll.dq"},
231+
{"pslldqi256_byteshift", "llvm.x86.avx2.psll.dq"},
232+
{"pslldqi512_byteshift", "llvm.x86.avx512.psll.dq.512"},
233+
234+
// Advanced math operations (using correct LLVM intrinsic names)
235+
{"sqrtps512", "llvm.x86.avx512.sqrt.ps.512"},
236+
{"sqrtpd512", "llvm.x86.avx512.sqrt.pd.512"},
237+
// Note: SSE sqrt doesn't have LLVM intrinsics - they become regular sqrt calls
238+
{"rcpps", "llvm.x86.sse.rcp.ps"},
239+
{"rsqrtps", "llvm.x86.sse.rsqrt.ps"},
240+
{"minpd", "llvm.x86.sse2.min.pd"},
241+
{"maxpd", "llvm.x86.sse2.max.pd"},
242+
243+
// Comparison operations
244+
{"pcmpeqb128", "llvm.x86.sse2.pcmpeq.b"},
245+
{"pcmpeqw128", "llvm.x86.sse2.pcmpeq.w"},
246+
{"pcmpeqd128", "llvm.x86.sse2.pcmpeq.d"},
247+
{"pcmpgtb128", "llvm.x86.sse2.pcmpgt.b"},
248+
{"cmpeqps", "llvm.x86.sse.cmp.ps"},
249+
{"cmpltps", "llvm.x86.sse.cmp.ps"},
250+
{"cmpleps", "llvm.x86.sse.cmp.ps"},
251+
252+
// Bit manipulation
253+
{"pand128", "llvm.x86.sse2.pand"},
254+
{"por128", "llvm.x86.sse2.por"},
255+
{"pxor128", "llvm.x86.sse2.pxor"},
256+
{"pandn128", "llvm.x86.sse2.pandn"},
257+
258+
// Mask operations (AVX-512)
259+
{"kandqi", "llvm.x86.avx512.kand.b"},
260+
{"korqi", "llvm.x86.avx512.kor.b"},
261+
{"kxorqi", "llvm.x86.avx512.kxor.b"},
262+
{"knotqi", "llvm.x86.avx512.knot.b"},
263+
264+
// Conversion operations
265+
{"cvtdq2ps256", "llvm.x86.avx.cvtdq2.ps.256"},
266+
{"cvtpd2ps", "llvm.x86.sse2.cvtpd2ps"},
267+
{"cvtps2dq256", "llvm.x86.avx.cvtps2dq.256"},
268+
269+
// Specialized operations
270+
{"pternlogd128", "llvm.x86.avx512.pternlog.d.128"},
271+
{"vpopcntd_128", "llvm.x86.avx512.vpopcnt.d.128"},
272+
{"vplzcntd_128", "llvm.x86.avx512.vplzcnt.d.128"},
273+
274+
// Gather/Scatter operations
275+
{"gathersiv4sf", "llvm.x86.avx2.gather.d.ps"},
276+
{"scattersiv4sf", "llvm.x86.avx512.scatter.dps.512"},
277+
278+
// Vector size operations
279+
{"extract128i256", "llvm.x86.avx2.vextracti128"},
280+
{"insert128i256", "llvm.x86.avx2.vinserti128"},
281+
{"pbroadcastd256", "llvm.x86.avx2.pbroadcastd.256"},
282+
283+
// String processing
284+
{"pcmpistri128", "llvm.x86.sse42.pcmpistri128"},
285+
{"pcmpistrm128", "llvm.x86.sse42.pcmpistrm128"},
286+
};
287+
288+
// Check if we have a direct mapping
289+
auto it = intrinsicMap.find(baseName);
290+
if (it != intrinsicMap.end()) {
291+
return it->second;
292+
}
293+
294+
// Fallback: For intrinsics without LLVM equivalents, create a function call
295+
// This allows the backend to handle it as a regular function call
296+
return ("__" + baseName).str(); // e.g., "__sqrtps" becomes a function call
297+
}
298+
299+
// Generic fallback for unsupported X86 intrinsics
300+
// This creates a function call with the intrinsic name preserved as a string
301+
mlir::Value CIRGenFunction::emitX86IntrinsicFallback(unsigned BuiltinID,
302+
const CallExpr *E,
303+
llvm::ArrayRef<mlir::Value> Ops) {
304+
// Get the builtin name from the BuiltinID
305+
std::string builtinName = getContext().BuiltinInfo.getName(BuiltinID);
306+
307+
// Only handle X86 intrinsics (they start with "__builtin_ia32_")
308+
llvm::StringRef nameRef(builtinName);
309+
if (!nameRef.starts_with("__builtin_ia32_")) {
310+
return nullptr;
311+
}
312+
313+
// Convert builtin name to intrinsic name
314+
// "__builtin_ia32_addps" -> "llvm.x86.sse.add.ps"
315+
std::string intrinsicName = convertBuiltinToIntrinsicName(nameRef);
316+
317+
// Get the return type
318+
mlir::Type returnType = convertType(E->getType());
319+
320+
// Create the fallback intrinsic call
321+
mlir::Location loc = getLoc(E->getExprLoc());
322+
323+
// Use LLVMIntrinsicCallOp to preserve the intrinsic name as a string
324+
// This allows the LLVM backend to handle it or emit an appropriate error
325+
auto intrinsicCall = builder.create<cir::LLVMIntrinsicCallOp>(
326+
loc, builder.getStringAttr(intrinsicName), returnType, Ops);
327+
328+
return intrinsicCall.getResult();
329+
}
330+
331+
161332
static mlir::Value emitX86PSLLDQIByteShift(CIRGenFunction &cgf,
162333
const CallExpr *E,
163334
ArrayRef<mlir::Value> Ops) {
@@ -206,7 +377,7 @@ static mlir::Value emitX86PSRLDQIByteShift(CIRGenFunction &cgf,
206377

207378
// If psrldq is shifting the vector more than 15 bytes, emit zero.
208379
if (shiftVal >= 16)
209-
return builder.getZero(loc, resultType);
380+
return builder.getZero(loc, resultType);
210381

211382
auto numElts = resultType.getSize() * 8;
212383
assert(numElts % 16 == 0 && "Expected a multiple of 16");
@@ -215,7 +386,7 @@ static mlir::Value emitX86PSRLDQIByteShift(CIRGenFunction &cgf,
215386

216387
// This correlates to the OG CodeGen
217388
// As stated in the OG, 256/512-bit psrldq operates on 128-bit lanes.
218-
// So we have to make sure we handle it.
389+
// So we have to make sure we handle it.
219390
for (unsigned l = 0; l < numElts; l += 16) {
220391
for (unsigned i = 0; i < 16; ++i) {
221392
unsigned idx = i + shiftVal;
@@ -265,6 +436,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
265436

266437
switch (BuiltinID) {
267438
default:
439+
// Try generic fallback for unknown X86 intrinsics
440+
if (auto fallbackResult = emitX86IntrinsicFallback(BuiltinID, E, Ops)) {
441+
return fallbackResult;
442+
}
268443
return nullptr;
269444
case X86::BI_mm_prefetch: {
270445
mlir::Value Address = builder.createPtrBitcast(Ops[0], VoidTy);
@@ -1202,17 +1377,28 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
12021377
case X86::BI__builtin_ia32_psrldqi128_byteshift:
12031378
case X86::BI__builtin_ia32_psrldqi256_byteshift:
12041379
case X86::BI__builtin_ia32_psrldqi512_byteshift:
1205-
emitX86PSRLDQIByteShift(*this, E, Ops);
1380+
return emitX86PSRLDQIByteShift(*this, E, Ops);
12061381
case X86::BI__builtin_ia32_kshiftliqi:
12071382
case X86::BI__builtin_ia32_kshiftlihi:
12081383
case X86::BI__builtin_ia32_kshiftlisi:
12091384
case X86::BI__builtin_ia32_kshiftlidi:
1210-
llvm_unreachable("kshiftl NYI");
1385+
// llvm_unreachable("kshiftl NYI");
1386+
// Try generic fallback for unknown X86 intrinsics
1387+
if (auto fallbackResult = emitX86IntrinsicFallback(BuiltinID, E, Ops)) {
1388+
return fallbackResult;
1389+
}
1390+
return nullptr;
12111391
case X86::BI__builtin_ia32_kshiftriqi:
12121392
case X86::BI__builtin_ia32_kshiftrihi:
12131393
case X86::BI__builtin_ia32_kshiftrisi:
12141394
case X86::BI__builtin_ia32_kshiftridi:
1215-
llvm_unreachable("kshiftr NYI");
1395+
// llvm_unreachable("kshiftr NYI");
1396+
// Try generic fallback for unknown X86 intrinsics
1397+
if (auto fallbackResult = emitX86IntrinsicFallback(BuiltinID, E, Ops)) {
1398+
return fallbackResult;
1399+
}
1400+
return nullptr;
1401+
12161402
// Rotate is a special case of funnel shift - 1st 2 args are the same.
12171403
case X86::BI__builtin_ia32_vprotb:
12181404
case X86::BI__builtin_ia32_vprotw:

clang/lib/CIR/CodeGen/CIRGenFunction.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2499,6 +2499,10 @@ class CIRGenFunction : public CIRGenTypeCache {
24992499
mlir::LogicalResult emitWhileStmt(const clang::WhileStmt &S);
25002500

25012501
mlir::Value emitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
2502+
// Fallback support for unsupported intrinsics
2503+
mlir::Value emitX86IntrinsicFallback(unsigned BuiltinID, const CallExpr *E,
2504+
llvm::ArrayRef<mlir::Value> Ops);
2505+
std::string convertBuiltinToIntrinsicName(llvm::StringRef builtinName);
25022506

25032507
/// CIR build helpers
25042508
/// -----------------

llvm/.gitignore

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,5 +66,3 @@ docs/_build
6666
.sw?
6767
#OS X specific files.
6868
.DS_store
69-
CMakePresets.json
70-
out/

0 commit comments

Comments
 (0)