@@ -158,6 +158,188 @@ static mlir::Value emitX86SExtMask(CIRGenFunction &cgf, mlir::Value op,
158
158
return cgf.getBuilder ().createCast (loc, cir::CastKind::integral, mask, dstTy);
159
159
}
160
160
161
+ // Helper function to convert builtin names to LLVM intrinsic names
162
+ std::string CIRGenFunction::convertBuiltinToIntrinsicName (llvm::StringRef builtinName) {
163
+ // Remove "__builtin_ia32_" prefix
164
+ llvm::StringRef baseName = builtinName.drop_front (15 ); // "__builtin_ia32_".size() == 15
165
+
166
+ // Simple mapping for common patterns
167
+ // This can be extended as needed
168
+ llvm::StringMap<std::string> intrinsicMap = {
169
+ // Load/Store operations
170
+ {" loadups" , " llvm.x86.sse.loadu.ps" },
171
+ {" loaddqu" , " llvm.x86.sse2.loadu.dq" },
172
+ {" storeups" , " llvm.x86.sse.storeu.ps" },
173
+ {" storedqu" , " llvm.x86.sse2.storeu.dq" },
174
+
175
+ // Arithmetic operations
176
+ {" addps" , " llvm.x86.sse.add.ps" },
177
+ {" subps" , " llvm.x86.sse.sub.ps" },
178
+ {" mulps" , " llvm.x86.sse.mul.ps" },
179
+ {" divps" , " llvm.x86.sse.div.ps" },
180
+
181
+ // Cast operations (these might not need intrinsics)
182
+ {" castps_si128" , " llvm.x86.sse.cast.ps.si128" },
183
+ {" castsi128_ps" , " llvm.x86.sse.cast.si128.ps" },
184
+
185
+ // Set/Zero operations
186
+ {" setzero_ps" , " llvm.x86.sse.setzero.ps" },
187
+ {" setzero_si128" , " llvm.x86.sse2.setzero.si128" },
188
+
189
+ // Unpack operations
190
+ {" unpacklo_epi8" , " llvm.x86.sse2.punpcklbw.128" },
191
+ {" unpackhi_epi8" , " llvm.x86.sse2.punpckhbw.128" },
192
+ {" unpacklo_epi16" , " llvm.x86.sse2.punpcklwd.128" },
193
+ {" unpackhi_epi16" , " llvm.x86.sse2.punpckhwd.128" },
194
+
195
+ // K-mask shift operations (AVX-512)
196
+ {" kshiftliqi" , " llvm.x86.avx512.kshiftl.b" },
197
+ {" kshiftlihi" , " llvm.x86.avx512.kshiftl.w" },
198
+ {" kshiftlisi" , " llvm.x86.avx512.kshiftl.d" },
199
+ {" kshiftlidi" , " llvm.x86.avx512.kshiftl.q" },
200
+ {" kshiftriqi" , " llvm.x86.avx512.kshiftr.b" },
201
+ {" kshiftrihi" , " llvm.x86.avx512.kshiftr.w" },
202
+ {" kshiftrisi" , " llvm.x86.avx512.kshiftr.d" },
203
+ {" kshiftridi" , " llvm.x86.avx512.kshiftr.q" },
204
+
205
+ // Pack operations
206
+ {" packsswb128" , " llvm.x86.sse2.packsswb.128" },
207
+ {" packssdw128" , " llvm.x86.sse2.packssdw.128" },
208
+ {" packuswb128" , " llvm.x86.sse2.packuswb.128" },
209
+
210
+ // Conversion operations
211
+ {" cvtps2dq" , " llvm.x86.sse2.cvtps2dq" },
212
+ {" cvtdq2ps" , " llvm.x86.sse2.cvtdq2ps" },
213
+ {" cvtpd2dq" , " llvm.x86.sse2.cvtpd2dq" },
214
+
215
+ // Comparison operations
216
+ {" pcmpeqd128" , " llvm.x86.sse2.pcmpeq.d" },
217
+ {" pcmpgtb128" , " llvm.x86.sse2.pcmpgt.b" },
218
+
219
+ // Shuffle operations
220
+ {" shufps" , " llvm.x86.sse.shuf.ps" },
221
+ {" pshuflw" , " llvm.x86.sse2.pshufl.w" },
222
+ {" pshufhw" , " llvm.x86.sse2.pshufh.w" },
223
+
224
+ // AES operations
225
+ {" aesdec128" , " llvm.x86.aesni.aesdec" },
226
+ {" aesenc128" , " llvm.x86.aesni.aesenc" },
227
+
228
+ // Shift operations
229
+ {" pslldqi128_byteshift" , " llvm.x86.sse2.psll.dq" },
230
+ {" pslldqi256_byteshift" , " llvm.x86.avx2.psll.dq" },
231
+ {" pslldqi512_byteshift" , " llvm.x86.avx512.psll.dq.512" },
232
+
233
+ // Advanced math operations (using correct LLVM intrinsic names)
234
+ {" sqrtps512" , " llvm.x86.avx512.sqrt.ps.512" },
235
+ {" sqrtpd512" , " llvm.x86.avx512.sqrt.pd.512" },
236
+ // Note: SSE sqrt doesn't have LLVM intrinsics - they become regular sqrt calls
237
+ {" rcpps" , " llvm.x86.sse.rcp.ps" },
238
+ {" rsqrtps" , " llvm.x86.sse.rsqrt.ps" },
239
+ {" minpd" , " llvm.x86.sse2.min.pd" },
240
+ {" maxpd" , " llvm.x86.sse2.max.pd" },
241
+
242
+ // Shuffle operations
243
+ {" pshuflw" , " llvm.x86.sse2.pshufl.w" },
244
+ {" pshufhw" , " llvm.x86.sse2.pshufh.w" },
245
+ {" palignr128" , " llvm.x86.ssse3.palign.r.128" },
246
+ {" palignr256" , " llvm.x86.avx2.palign.r" },
247
+ {" permdi256" , " llvm.x86.avx2.permd" },
248
+
249
+ // Comparison operations
250
+ {" pcmpeqb128" , " llvm.x86.sse2.pcmpeq.b" },
251
+ {" pcmpeqw128" , " llvm.x86.sse2.pcmpeq.w" },
252
+ {" pcmpeqd128" , " llvm.x86.sse2.pcmpeq.d" },
253
+ {" cmpeqps" , " llvm.x86.sse.cmp.ps" },
254
+ {" cmpltps" , " llvm.x86.sse.cmp.ps" },
255
+ {" cmpleps" , " llvm.x86.sse.cmp.ps" },
256
+
257
+ // Bit manipulation
258
+ {" pand128" , " llvm.x86.sse2.pand" },
259
+ {" por128" , " llvm.x86.sse2.por" },
260
+ {" pxor128" , " llvm.x86.sse2.pxor" },
261
+ {" pandn128" , " llvm.x86.sse2.pandn" },
262
+
263
+ // Load/Store operations
264
+ {" loaddqu" , " llvm.x86.sse2.loadu.dq" },
265
+ {" storedqu" , " llvm.x86.sse2.storeu.dq" },
266
+ {" movntdqa" , " llvm.x86.sse41.movntdqa" },
267
+ {" movntdq" , " llvm.x86.sse2.movnt.dq" },
268
+
269
+ // Mask operations (AVX-512)
270
+ {" kandqi" , " llvm.x86.avx512.kand.b" },
271
+ {" korqi" , " llvm.x86.avx512.kor.b" },
272
+ {" kxorqi" , " llvm.x86.avx512.kxor.b" },
273
+ {" knotqi" , " llvm.x86.avx512.knot.b" },
274
+
275
+ // Conversion operations
276
+ {" cvtdq2ps256" , " llvm.x86.avx.cvtdq2.ps.256" },
277
+ {" cvtpd2ps" , " llvm.x86.sse2.cvtpd2ps" },
278
+ {" cvtps2dq256" , " llvm.x86.avx.cvtps2dq.256" },
279
+
280
+ // Specialized operations
281
+ {" pternlogd128" , " llvm.x86.avx512.pternlog.d.128" },
282
+ {" vpopcntd_128" , " llvm.x86.avx512.vpopcnt.d.128" },
283
+ {" vplzcntd_128" , " llvm.x86.avx512.vplzcnt.d.128" },
284
+
285
+ // Gather/Scatter operations
286
+ {" gathersiv4sf" , " llvm.x86.avx2.gather.d.ps" },
287
+ {" scattersiv4sf" , " llvm.x86.avx512.scatter.dps.512" },
288
+
289
+ // Vector size operations
290
+ {" extract128i256" , " llvm.x86.avx2.vextracti128" },
291
+ {" insert128i256" , " llvm.x86.avx2.vinserti128" },
292
+ {" pbroadcastd256" , " llvm.x86.avx2.pbroadcastd.256" },
293
+
294
+ // String processing
295
+ {" pcmpistri128" , " llvm.x86.sse42.pcmpistri128" },
296
+ {" pcmpistrm128" , " llvm.x86.sse42.pcmpistrm128" },
297
+ };
298
+
299
+ // Check if we have a direct mapping
300
+ auto it = intrinsicMap.find (baseName);
301
+ if (it != intrinsicMap.end ()) {
302
+ return it->second ;
303
+ }
304
+
305
+ // Fallback: For intrinsics without LLVM equivalents, create a function call
306
+ // This allows the backend to handle it as a regular function call
307
+ return (" __" + baseName).str (); // e.g., "__sqrtps" becomes a function call
308
+ }
309
+
310
+ // Generic fallback for unsupported X86 intrinsics
311
+ // This creates a function call with the intrinsic name preserved as a string
312
+ mlir::Value CIRGenFunction::emitX86IntrinsicFallback (unsigned BuiltinID,
313
+ const CallExpr *E,
314
+ llvm::ArrayRef<mlir::Value> Ops) {
315
+ // Get the builtin name from the BuiltinID
316
+ std::string builtinName = getContext ().BuiltinInfo .getName (BuiltinID);
317
+
318
+ // Only handle X86 intrinsics (they start with "__builtin_ia32_")
319
+ llvm::StringRef nameRef (builtinName);
320
+ if (!nameRef.starts_with (" __builtin_ia32_" )) {
321
+ return nullptr ;
322
+ }
323
+
324
+ // Convert builtin name to intrinsic name
325
+ // "__builtin_ia32_addps" -> "llvm.x86.sse.add.ps"
326
+ std::string intrinsicName = convertBuiltinToIntrinsicName (nameRef);
327
+
328
+ // Get the return type
329
+ mlir::Type returnType = convertType (E->getType ());
330
+
331
+ // Create the fallback intrinsic call
332
+ mlir::Location loc = getLoc (E->getExprLoc ());
333
+
334
+ // Use LLVMIntrinsicCallOp to preserve the intrinsic name as a string
335
+ // This allows the LLVM backend to handle it or emit an appropriate error
336
+ auto intrinsicCall = builder.create <cir::LLVMIntrinsicCallOp>(
337
+ loc, builder.getStringAttr (intrinsicName), returnType, Ops);
338
+
339
+ return intrinsicCall.getResult ();
340
+ }
341
+
342
+
161
343
static mlir::Value emitX86PSLLDQIByteShift (CIRGenFunction &cgf,
162
344
const CallExpr *E,
163
345
ArrayRef<mlir::Value> Ops) {
@@ -265,6 +447,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
265
447
266
448
switch (BuiltinID) {
267
449
default :
450
+ // Try generic fallback for unknown X86 intrinsics
451
+ if (auto fallbackResult = emitX86IntrinsicFallback (BuiltinID, E, Ops)) {
452
+ return fallbackResult;
453
+ }
268
454
return nullptr ;
269
455
case X86::BI_mm_prefetch: {
270
456
mlir::Value Address = builder.createPtrBitcast (Ops[0 ], VoidTy);
@@ -1202,17 +1388,28 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
1202
1388
case X86::BI__builtin_ia32_psrldqi128_byteshift:
1203
1389
case X86::BI__builtin_ia32_psrldqi256_byteshift:
1204
1390
case X86::BI__builtin_ia32_psrldqi512_byteshift:
1205
- emitX86PSRLDQIByteShift (*this , E, Ops);
1391
+ return emitX86PSRLDQIByteShift (*this , E, Ops);
1206
1392
case X86::BI__builtin_ia32_kshiftliqi:
1207
1393
case X86::BI__builtin_ia32_kshiftlihi:
1208
1394
case X86::BI__builtin_ia32_kshiftlisi:
1209
1395
case X86::BI__builtin_ia32_kshiftlidi:
1210
- llvm_unreachable (" kshiftl NYI" );
1396
+ // llvm_unreachable("kshiftl NYI");
1397
+ // Try generic fallback for unknown X86 intrinsics
1398
+ if (auto fallbackResult = emitX86IntrinsicFallback (BuiltinID, E, Ops)) {
1399
+ return fallbackResult;
1400
+ }
1401
+ return nullptr ;
1211
1402
case X86::BI__builtin_ia32_kshiftriqi:
1212
1403
case X86::BI__builtin_ia32_kshiftrihi:
1213
1404
case X86::BI__builtin_ia32_kshiftrisi:
1214
1405
case X86::BI__builtin_ia32_kshiftridi:
1215
- llvm_unreachable (" kshiftr NYI" );
1406
+ // llvm_unreachable("kshiftr NYI");
1407
+ // Try generic fallback for unknown X86 intrinsics
1408
+ if (auto fallbackResult = emitX86IntrinsicFallback (BuiltinID, E, Ops)) {
1409
+ return fallbackResult;
1410
+ }
1411
+ return nullptr ;
1412
+
1216
1413
// Rotate is a special case of funnel shift - 1st 2 args are the same.
1217
1414
case X86::BI__builtin_ia32_vprotb:
1218
1415
case X86::BI__builtin_ia32_vprotw:
0 commit comments