llvm · RiverDave · Aug 17, 2025 · Aug 23, 2025
@@ -871,8 +871,39 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_insertf64x2_256:
   case X86::BI__builtin_ia32_inserti64x2_256:
   case X86::BI__builtin_ia32_insertf64x2_512:
-  case X86::BI__builtin_ia32_inserti64x2_512:
-    llvm_unreachable("insertf128 NYI");
+  case X86::BI__builtin_ia32_inserti64x2_512: {
+    unsigned dstNumElts = cast<cir::VectorType>(Ops[0].getType()).getSize();
+    unsigned srcNumElts = cast<cir::VectorType>(Ops[1].getType()).getSize();
+    unsigned subVectors = dstNumElts / srcNumElts;
+    unsigned index =
+        Ops[2].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
+    assert(llvm::isPowerOf2_32(subVectors) && "Expected power of 2 subvectors");
+    index &= subVectors - 1; // Remove any extra bits.
+    index *= srcNumElts;
+
+    int64_t indices[16];
+    for (unsigned i = 0; i != dstNumElts; ++i)
+      indices[i] = (i >= srcNumElts) ? srcNumElts + (i % srcNumElts) : i;
+
+    cir::ConstantOp poisonVec =
+        builder.getConstant(getLoc(E->getExprLoc()),
+                            builder.getAttr<cir::PoisonAttr>(Ops[1].getType()));
+
+    mlir::Value op1 =
+        builder.createVecShuffle(getLoc(E->getExprLoc()), Ops[1], poisonVec,
+                                 ArrayRef(indices, dstNumElts));
+
+    for (unsigned i = 0; i != dstNumElts; ++i) {
+      if (i >= index && i < (index + srcNumElts))
+        indices[i] = (i - index) + dstNumElts;
+      else
+        indices[i] = i;
+    }
+
+    return builder.createVecShuffle(getLoc(E->getExprLoc()), Ops[0], op1,
+                                    ArrayRef(indices, dstNumElts));
+  }
+
   case X86::BI__builtin_ia32_pmovqd512_mask:
   case X86::BI__builtin_ia32_pmovwb512_mask:
     llvm_unreachable("pmovqd512_mask NYI");

diff --git a/clang/test/CIR/CodeGen/X86/avx-builtins.c b/clang/test/CIR/CodeGen/X86/avx-builtins.c
@@ -143,3 +143,36 @@ __m256i test_mm256_insert_epi64(__m256i x, long long b) {
   return _mm256_insert_epi64(x, b, 2);
 }
 #endif
+
+__m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
+  // CIR-LABEL: test_mm256_insertf128_pd
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.double x 4>
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 4>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.double x 4>
+
+  // LLVM-LABEL: test_mm256_insertf128_pd
+  // LLVM: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  return _mm256_insertf128_pd(A, B, 0);
+}
+
+__m256 test_mm256_insertf128_ps(__m256 A, __m128 B) {
+  // CIR-LABEL: test_mm256_insertf128_ps
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.float x 8>
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<!cir.float x 8>
+
+  // LLVM-LABEL: test_mm256_insertf128_ps
+  // LLVM: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm256_insertf128_ps(A, B, 1);
+}
+
+__m256i test_mm256_insertf128_si256(__m256i A, __m128i B) {
+  // CIR-LABEL: test_mm256_insertf128_si256
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 8>
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 8>) [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i]
+
+  // LLVM-LABEL: test_mm256_insertf128_si256
+  // LLVM: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  return _mm256_insertf128_si256(A, B, 0);
+}
diff --git a/clang/test/CIR/CodeGen/X86/avx-shuffle-builtins.c b/clang/test/CIR/CodeGen/X86/avx-shuffle-builtins.c
@@ -0,0 +1,95 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-cir -o %t.cir | opt -S -passes=mem2reg
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-llvm -o %t.ll | opt -S -passes=mem2reg
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+// CIR-LABEL: @test_mm256_insertf128_pd_0(
+// CIR: [[A:%.*]] = cir.load align(32) %0 : !cir.ptr<!cir.vector<!cir.double x 4>>, !cir.vector<!cir.double x 4>
+// CIR: [[B:%.*]] = cir.load align(16) %1 : !cir.ptr<!cir.vector<!cir.double x 2>>, !cir.vector<!cir.double x 2>
+// CIR: %{{.*}} = cir.vec.shuffle([[B]], %{{.*}} : !cir.vector<!cir.double x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.double x 4>
+// CIR-NEXT: %{{.*}} = cir.vec.shuffle([[A]], %{{.*}} : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.double x 4>
+// CIR: cir.return %{{.*}} : !cir.vector<!cir.double x 4>
+
+
+// LLVM-LABEL: @test_mm256_insertf128_pd_0
+// LLVM:    [[A:%.*]] = load <4 x double>, ptr %{{.*}}, align 32
+// LLVM:    [[B:%.*]] = load <2 x double>, ptr %{{.*}}, align 16
+// LLVM-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// LLVM-NEXT:    [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+// LLVM:    ret <4 x double>
+__m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) {
+  return _mm256_insertf128_pd(a, b, 0);
+}
+
+// CIR-LABEL: @test_mm256_insertf128_ps_0(
+// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.float x 8>
+// CIR-NEXT: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.float x 8>
+// CIR: cir.return %{{.*}} : !cir.vector<!cir.float x 8>
+
+// LLVM-LABEL: @test_mm256_insertf128_ps_0(
+// LLVM:    %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// LLVM-NEXT:    %{{.*}} = shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+// LLVM:    ret <8 x float> %{{.*}}
+//
+__m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
+  return _mm256_insertf128_ps(a, b, 0);
+}
+
+// CIR-LABEL: @test_mm256_insertf128_ps_1(
+// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.float x 8>
+// CIR-NEXT: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<!cir.float x 8>
+// CIR: cir.return %{{.*}} : !cir.vector<!cir.float x 8>
+
+// LLVM-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_1(
+// LLVM:    %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// LLVM-NEXT:    %{{.*}} = shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+// LLVM:    ret <8 x float> %{{.*}}
+//
+__m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) {
+  return _mm256_insertf128_ps(a, b, 1);
+}
+
+// CIR-LABEL: @test_mm256_insertf128_si256_0(
+// CIR: [[TMP0:%.*]] = cir.cast(bitcast, %{{.*}} : !cir.vector<!s64i x 4>), !cir.vector<!s32i x 8>
+// CIR: [[TMP1:%.*]] = cir.cast(bitcast, %{{.*}} : !cir.vector<!s64i x 2>), !cir.vector<!s32i x 4>
+// CIR: %{{.*}} = cir.vec.shuffle([[TMP1]], %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s32i x 8>
+// CIR-NEXT: %{{.*}} = cir.vec.shuffle([[TMP0]], %{{.*}} : !cir.vector<!s32i x 8>) [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s32i x 8>
+// CIR: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.vector<!s32i x 8>), !cir.vector<!s64i x 4>
+// CIR: cir.return %{{.*}} : !cir.vector<!s64i x 4>
+
+// LLVM-LABEL: @test_mm256_insertf128_si256_0
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+// LLVM:    [[TMP1:%.*]] = bitcast <2 x i64> %{{.*}} to <4 x i32>
+// LLVM:    [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// LLVM-NEXT:    [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+// LLVM:    [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
+// LLVM:    ret <4 x i64> %{{.*}}
+//
+__m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
+  return _mm256_insertf128_si256(a, b, 0);
+}
+
+// CIR-LABEL: @test_mm256_insertf128_si256_1(
+// CIR: [[TMP0:%.*]] = cir.cast(bitcast, %{{.*}} : !cir.vector<!s64i x 4>), !cir.vector<!s32i x 8>
+// CIR: [[TMP1:%.*]] = cir.cast(bitcast, %{{.*}} : !cir.vector<!s64i x 2>), !cir.vector<!s32i x 4>
+// CIR: %{{.*}} = cir.vec.shuffle([[TMP1]], %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s32i x 8>
+// CIR-NEXT: %{{.*}} = cir.vec.shuffle([[TMP0]], %{{.*}} : !cir.vector<!s32i x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<!s32i x 8>
+// CIR: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.vector<!s32i x 8>), !cir.vector<!s64i x 4>
+// CIR: cir.return %{{.*}} : !cir.vector<!s64i x 4>
+
+// LLVM-LABEL: @test_mm256_insertf128_si256_1
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+// LLVM:    [[TMP1:%.*]] = bitcast <2 x i64> %{{.*}} to <4 x i32>
+// LLVM:    [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// LLVM-NEXT:    [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+// LLVM:    [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
+// LLVM:    ret <4 x i64> %{{.*}}
+//
+__m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) {
+  return _mm256_insertf128_si256(a, b, 1);
+}
diff --git a/clang/test/CIR/CodeGen/X86/avx2-builtins.c b/clang/test/CIR/CodeGen/X86/avx2-builtins.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m256i test0_mm256_inserti128_si256(__m256i a, __m128i b) {
+
+  // CIR-LABEL: test0_mm256_inserti128_si256
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 4>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 4>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: test0_mm256_inserti128_si256
+  // LLVM: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  return _mm256_inserti128_si256(a, b, 0);
+}
+
+__m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) {
+  // CIR-LABEL: test1_mm256_inserti128_si256
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 4>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i] : !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: test1_mm256_inserti128_si256
+  // LLVM: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  return _mm256_inserti128_si256(a, b, 1);
+}
+
+// Immediate should be truncated to one bit.
+__m256i test2_mm256_inserti128_si256(__m256i a, __m128i b) {
+  // CIR-LABEL: test2_mm256_inserti128_si256
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 4>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 4>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: test2_mm256_inserti128_si256
+  // LLVM: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  return _mm256_inserti128_si256(a, b, 0);
+}
+
+
+
diff --git a/clang/test/CIR/CodeGen/X86/avx512dq-builtins.c b/clang/test/CIR/CodeGen/X86/avx512dq-builtins.c
@@ -14,3 +14,39 @@ __m512i test_mm512_movm_epi64(__mmask8 __A) {
   // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i64>
   return _mm512_movm_epi64(__A); 
 }
+
+__m512 test_mm512_insertf32x8(__m512 __A, __m256 __B) {
+  // CIR-LABEL: test_mm512_insertf32x8
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 16>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i] : !cir.vector<!cir.float x 16>
+
+  // LLVM-LABEL: @test_mm512_insertf32x8
+  // LLVM: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  return _mm512_insertf32x8(__A, __B, 1); 
+}
+
+__m512i test_mm512_inserti32x8(__m512i __A, __m256i __B) {
+  // CIR-LABEL: test_mm512_inserti32x8
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 16>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i] : !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: @test_mm512_inserti32x8
+  // LLVM: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  return _mm512_inserti32x8(__A, __B, 1); 
+}
+
+__m512d test_mm512_insertf64x2(__m512d __A, __m128d __B) {
+  // CIR-LABEL: test_mm512_insertf64x2
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i] : !cir.vector<!cir.double x 8>
+
+  // LLVM-LABEL: @test_mm512_insertf64x2
+  // LLVM: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  return _mm512_insertf64x2(__A, __B, 3); 
+}
+
+__m512i test_mm512_inserti64x2(__m512i __A, __m128i __B) {
+  // CIR-LABEL: test_mm512_inserti64x2
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s64i x 8>
+
+  // LLVM-LABEL: @test_mm512_inserti64x2
+  // LLVM: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+  return _mm512_inserti64x2(__A, __B, 1); 
+}