From cea94791ba408d559ef5e7ec6deea5b0ac4fd534 Mon Sep 17 00:00:00 2001 From: Arthur-Chang016 Date: Fri, 13 Jun 2025 03:46:06 +0000 Subject: [PATCH 1/3] [CIR][CIRGen][builtin][X86] Implement support for _mm_getcsr() intrinsic --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 18 ++++++++++++++++++ clang/test/CIR/CodeGen/X86/builtins-x86.c | 10 ++++++++++ 2 files changed, 28 insertions(+) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 96469248535b..1e417aee0fa0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -144,5 +144,23 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID, getLoc(E->getExprLoc()), builder.getStringAttr("x86.rdtsc"), intTy) .getResult(); } + case X86::BI_mm_getcsr: { + // note that _mm_getcsr() returns uint, but llvm.x86.sse.stmxcsr takes i32 + // pointer and returns void. So needs alloc extra memory to store the + // result. + auto loc = getLoc(E->getExprLoc()); + mlir::Type voidTy = cir::VoidType::get(&getMLIRContext()); + mlir::Type i32Ty = cir::IntType::get(&getMLIRContext(), 32, true); + auto i32PtrTy = builder.getPointerTo(i32Ty); + // Allocate memory for the result + auto alloca = builder.createAlloca(loc, i32PtrTy, i32Ty, "csrRes", + builder.getAlignmentAttr(4)); + builder.create( + loc, builder.getStringAttr("x86.sse.stmxcsr"), voidTy, alloca); + // Load the value from the allocated memory + auto loadResult = + builder.createAlignedLoad(loc, i32Ty, alloca, llvm::Align(4)); + return loadResult; + } } } diff --git a/clang/test/CIR/CodeGen/X86/builtins-x86.c b/clang/test/CIR/CodeGen/X86/builtins-x86.c index eeebec717bcd..6eb5b388aa54 100644 --- a/clang/test/CIR/CodeGen/X86/builtins-x86.c +++ b/clang/test/CIR/CodeGen/X86/builtins-x86.c @@ -54,3 +54,13 @@ unsigned long long test_rdtsc() { // LLVM: call i64 @llvm.x86.rdtsc } +unsigned int test_mm_getcsr() { + // CIR-LABEL: test_mm_getcsr + // LLVM-LABEL: test_mm_getcsr + return _mm_getcsr(); + // 2 allocas: 1. for the return value, 1. for the csr result + // CIR: %{{.*}} = cir.alloca !u32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} + // CIR: %{{.*}} = cir.alloca !s32i, !cir.ptr, ["csrRes"] {alignment = 4 : i64} + // CIR: {{%.*}} = cir.llvm.intrinsic "x86.sse.stmxcsr" {{%.*}} : (!cir.ptr) -> !void + // LLVM: call void @llvm.x86.sse.stmxcsr(ptr {{%.*}}) +} From 5c7c1603d2f6366903ca805461c68c050f6c1f7b Mon Sep 17 00:00:00 2001 From: Arthur-Chang016 Date: Fri, 13 Jun 2025 04:14:03 +0000 Subject: [PATCH 2/3] Implement support for _mm_setcsr() intrinsic --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 15 +++++++++++++++ clang/test/CIR/CodeGen/X86/builtins-x86.c | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 1e417aee0fa0..31188d8f4c2f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -162,5 +162,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID, builder.createAlignedLoad(loc, i32Ty, alloca, llvm::Align(4)); return loadResult; } + case X86::BI_mm_setcsr: { + auto loc = getLoc(E->getExprLoc()); + mlir::Type voidTy = cir::VoidType::get(&getMLIRContext()); + mlir::Type i32Ty = cir::IntType::get(&getMLIRContext(), 32, true); + auto i32PtrTy = builder.getPointerTo(i32Ty); + // Allocate memory for the argument + auto alloca = builder.createAlloca(loc, i32PtrTy, i32Ty, "csrVal", + builder.getAlignmentAttr(4)); + // Store the value to be set + builder.createAlignedStore(loc, Ops[0], alloca, CharUnits::fromQuantity(4)); + return builder + .create( + loc, builder.getStringAttr("x86.sse.ldmxcsr"), voidTy, alloca) + .getResult(); + } } } diff --git a/clang/test/CIR/CodeGen/X86/builtins-x86.c b/clang/test/CIR/CodeGen/X86/builtins-x86.c index 6eb5b388aa54..b337442b2fce 100644 --- a/clang/test/CIR/CodeGen/X86/builtins-x86.c +++ b/clang/test/CIR/CodeGen/X86/builtins-x86.c @@ -64,3 +64,11 @@ unsigned int test_mm_getcsr() { // CIR: {{%.*}} = cir.llvm.intrinsic "x86.sse.stmxcsr" {{%.*}} : (!cir.ptr) -> !void // LLVM: call void @llvm.x86.sse.stmxcsr(ptr {{%.*}}) } + +void test_mm_setcsr() { + // CIR-LABEL: test_mm_setcsr + // LLVM-LABEL: test_mm_setcsr + _mm_setcsr(0); + // CIR: {{%.*}} = cir.llvm.intrinsic "x86.sse.ldmxcsr" {{%.*}} : (!cir.ptr) -> !void + // LLVM: call void @llvm.x86.sse.ldmxcsr(ptr {{%.*}}) +} From 7800eb8466b66eb329178a14462a5369d1e44b6f Mon Sep 17 00:00:00 2001 From: Arthur-Chang016 Date: Fri, 13 Jun 2025 05:24:12 +0000 Subject: [PATCH 3/3] Add BI__builtin_ia32_stmxcsr() and BI__builtin_ia32_ldmxcsr() by fall through --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 ++ clang/test/CIR/CodeGen/X86/builtins-x86.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 31188d8f4c2f..fb8e780a2717 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -144,6 +144,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID, getLoc(E->getExprLoc()), builder.getStringAttr("x86.rdtsc"), intTy) .getResult(); } + case X86::BI__builtin_ia32_stmxcsr: case X86::BI_mm_getcsr: { // note that _mm_getcsr() returns uint, but llvm.x86.sse.stmxcsr takes i32 // pointer and returns void. So needs alloc extra memory to store the @@ -162,6 +163,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID, builder.createAlignedLoad(loc, i32Ty, alloca, llvm::Align(4)); return loadResult; } + case X86::BI__builtin_ia32_ldmxcsr: case X86::BI_mm_setcsr: { auto loc = getLoc(E->getExprLoc()); mlir::Type voidTy = cir::VoidType::get(&getMLIRContext()); diff --git a/clang/test/CIR/CodeGen/X86/builtins-x86.c b/clang/test/CIR/CodeGen/X86/builtins-x86.c index b337442b2fce..c4eaee5b012f 100644 --- a/clang/test/CIR/CodeGen/X86/builtins-x86.c +++ b/clang/test/CIR/CodeGen/X86/builtins-x86.c @@ -65,6 +65,16 @@ unsigned int test_mm_getcsr() { // LLVM: call void @llvm.x86.sse.stmxcsr(ptr {{%.*}}) } +unsigned int test__builtin_ia32_stmxcsr() { + // CIR-LABEL: test__builtin_ia32_stmxcsr + // LLVM-LABEL: test__builtin_ia32_stmxcsr + return __builtin_ia32_stmxcsr(); + // CIR: %{{.*}} = cir.alloca !u32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} + // CIR: %{{.*}} = cir.alloca !s32i, !cir.ptr, ["csrRes"] {alignment = 4 : i64} + // CIR: {{%.*}} = cir.llvm.intrinsic "x86.sse.stmxcsr" {{%.*}} : (!cir.ptr) -> !void + // LLVM: call void @llvm.x86.sse.stmxcsr(ptr {{%.*}}) +} + void test_mm_setcsr() { // CIR-LABEL: test_mm_setcsr // LLVM-LABEL: test_mm_setcsr @@ -72,3 +82,11 @@ void test_mm_setcsr() { // CIR: {{%.*}} = cir.llvm.intrinsic "x86.sse.ldmxcsr" {{%.*}} : (!cir.ptr) -> !void // LLVM: call void @llvm.x86.sse.ldmxcsr(ptr {{%.*}}) } + +void test__builtin_ia32_ldmxcsr() { + // CIR-LABEL: test__builtin_ia32_ldmxcsr + // LLVM-LABEL: test__builtin_ia32_ldmxcsr + __builtin_ia32_ldmxcsr(0); + // CIR: {{%.*}} = cir.llvm.intrinsic "x86.sse.ldmxcsr" {{%.*}} : (!cir.ptr) -> !void + // LLVM: call void @llvm.x86.sse.ldmxcsr(ptr {{%.*}}) +}