Skip to content

Conversation

@maryammo
Copy link
Contributor

This commit adds two Clang builtins for PowerPC AMO load operations:

  • __builtin_amo_lwat for 32-bit unsigned operations
  • __builtin_amo_ldat for 64-bit unsigned operations

Also adds an amo.h header that maps GCC's AMO functions to these Clang builtins for compatibility.

@maryammo maryammo added clang Clang issues not falling into any other category backend:PowerPC labels Nov 12, 2025
@llvmbot llvmbot added backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics llvm:ir labels Nov 12, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 12, 2025

@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-powerpc

Author: Maryam Moghadas (maryammo)

Changes

This commit adds two Clang builtins for PowerPC AMO load operations:

  • __builtin_amo_lwat for 32-bit unsigned operations
  • __builtin_amo_ldat for 64-bit unsigned operations

Also adds an amo.h header that maps GCC's AMO functions to these Clang builtins for compatibility.


Patch is 24.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167790.diff

14 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsPPC.def (+4)
  • (modified) clang/lib/Headers/CMakeLists.txt (+1)
  • (added) clang/lib/Headers/amo.h (+97)
  • (modified) clang/lib/Sema/SemaPPC.cpp (+15)
  • (added) clang/test/CodeGen/PowerPC/builtins-amo-err.c (+18)
  • (added) clang/test/CodeGen/PowerPC/builtins-ppc-amo.c (+58)
  • (added) clang/test/CodeGen/PowerPC/ppc-amo-header.c (+91)
  • (modified) llvm/include/llvm/IR/IntrinsicsPowerPC.td (+12)
  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+64-2)
  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+1)
  • (modified) llvm/lib/Target/PowerPC/PPCInstr64Bit.td (+10-3)
  • (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.td (+11-4)
  • (modified) llvm/lib/Target/PowerPC/PPCRegisterInfo.td (+1-1)
  • (added) llvm/test/CodeGen/PowerPC/amo-enable.ll (+51)
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index cf8bdd2a429df..216b5fdb69ff7 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1001,6 +1001,10 @@ TARGET_BUILTIN(__builtin_darn_32, "i", "", "isa-v30-instructions")
 TARGET_BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "", "vsx")
 TARGET_BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "", "vsx")
 
+// AMO builtins
+TARGET_BUILTIN(__builtin_amo_lwat, "UiUi*UiIi", "", "isa-v30-instructions")
+TARGET_BUILTIN(__builtin_amo_ldat, "ULiULi*ULiIi", "", "isa-v30-instructions")
+
 // Set the floating point rounding mode
 BUILTIN(__builtin_setrnd, "di", "")
 
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 33fff7645df65..038859a513eb8 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -119,6 +119,7 @@ set(opencl_files
 
 set(ppc_files
   altivec.h
+  amo.h
   )
 
 set(ppc_htm_files
diff --git a/clang/lib/Headers/amo.h b/clang/lib/Headers/amo.h
new file mode 100644
index 0000000000000..fda2984b97626
--- /dev/null
+++ b/clang/lib/Headers/amo.h
@@ -0,0 +1,97 @@
+/*===---- amo.h - PowerPC Atomic Memory Operations ------------------------===*\
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+\*===----------------------------------------------------------------------===*/
+
+/* This header provides compatibility wrappers for GCC's AMO functions.
+ * The functions here call Clang's underlying AMO builtins.
+ */
+
+#ifndef _AMO_H
+#define _AMO_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* AMO Load Operation Codes (FC values) */
+enum {
+  _AMO_LD_ADD = 0x00,  /* Fetch and Add */
+  _AMO_LD_XOR = 0x01,  /* Fetch and XOR */
+  _AMO_LD_IOR = 0x02,  /* Fetch and OR */
+  _AMO_LD_AND = 0x03,  /* Fetch and AND */
+  _AMO_LD_UMAX = 0x04, /* Fetch and Maximum Unsigned */
+  _AMO_LD_SMAX = 0x05, /* Fetch and Maximum Signed */
+  _AMO_LD_UMIN = 0x06, /* Fetch and Minimum Unsigned */
+  _AMO_LD_SMIN = 0x07, /* Fetch and Minimum Signed */
+  _AMO_LD_SWAP = 0x08  /* Swap */
+};
+
+/* 32-bit unsigned AMO load operations */
+static inline uint32_t amo_lwat_add(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_ADD);
+}
+
+static inline uint32_t amo_lwat_xor(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_XOR);
+}
+
+static inline uint32_t amo_lwat_ior(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_IOR);
+}
+
+static inline uint32_t amo_lwat_and(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_AND);
+}
+
+static inline uint32_t amo_lwat_umax(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_UMAX);
+}
+
+static inline uint32_t amo_lwat_umin(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_UMIN);
+}
+
+static inline uint32_t amo_lwat_swap(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_SWAP);
+}
+
+/* 64-bit unsigned AMO load operations */
+static inline uint64_t amo_ldat_add(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_ADD);
+}
+
+static inline uint64_t amo_ldat_xor(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_XOR);
+}
+
+static inline uint64_t amo_ldat_ior(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_IOR);
+}
+
+static inline uint64_t amo_ldat_and(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_AND);
+}
+
+static inline uint64_t amo_ldat_umax(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_UMAX);
+}
+
+static inline uint64_t amo_ldat_umin(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_UMIN);
+}
+
+static inline uint64_t amo_ldat_swap(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_SWAP);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AMO_H */
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index bfa458d207b46..a6873900de9e1 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -87,6 +87,8 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
   case PPC::BI__builtin_ppc_fetch_and_andlp:
   case PPC::BI__builtin_ppc_fetch_and_orlp:
   case PPC::BI__builtin_ppc_fetch_and_swaplp:
+  case PPC::BI__builtin_amo_lwat:
+  case PPC::BI__builtin_amo_ldat:
     return true;
   }
   return false;
@@ -253,6 +255,19 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo &TI,
   case PPC::BI__builtin_##Name:                                                \
     return BuiltinPPCMMACall(TheCall, BuiltinID, Types);
 #include "clang/Basic/BuiltinsPPC.def"
+  case PPC::BI__builtin_amo_lwat:
+  case PPC::BI__builtin_amo_ldat: {
+    llvm::APSInt Result;
+    if (SemaRef.BuiltinConstantArg(TheCall, 2, Result))
+      return true;
+    unsigned Val = Result.getZExtValue();
+    if ((Val <= 8) || Val == 16 || (Val >= 24 && Val <= 25) || Val == 28)
+      return false;
+    Expr *Arg = TheCall->getArg(2);
+    return SemaRef.Diag(Arg->getBeginLoc(), diag::err_argument_invalid_range)
+           << toString(Result, 10) << "0-8, 16, 24-25" << "28"
+           << Arg->getSourceRange();
+  }
   }
   llvm_unreachable("must return from switch");
 }
diff --git a/clang/test/CodeGen/PowerPC/builtins-amo-err.c b/clang/test/CodeGen/PowerPC/builtins-amo-err.c
new file mode 100644
index 0000000000000..77f456e427244
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-amo-err.c
@@ -0,0 +1,18 @@
+// RUN: not %clang_cc1 -triple powerpc-ibm-aix -target-cpu pwr9 \
+// RUN:   -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=AIX32-ERROR
+// RUN: not %clang_cc1 -triple powerpc64-ibm-aix -target-cpu pwr9 \
+// RUN:   -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=FC-ERROR
+
+void test_amo() {
+  unsigned int *ptr1, value1;
+  // AIX32-ERROR: error: this builtin is only available on 64-bit targets
+  __builtin_amo_lwat(ptr1, value1, 0);
+  // FC-ERROR: argument value 9 is outside the valid range [0-8, 16, 24-25, 28]
+  __builtin_amo_lwat(ptr1, value1, 9);
+
+  unsigned long int *ptr2, value2;
+  // AIX32-ERROR: error: this builtin is only available on 64-bit targets
+  __builtin_amo_ldat(ptr2, value2, 3);
+  // FC-ERROR: error: argument value 26 is outside the valid range [0-8, 16, 24-25, 28]
+  __builtin_amo_ldat(ptr2, value2, 26);
+}
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c b/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c
new file mode 100644
index 0000000000000..2975b99786869
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c
@@ -0,0 +1,58 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \
+// RUN:  -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu pwr9 \
+// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=AIX
+
+// CHECK-LABEL: define dso_local void @test_unsigned_lwat(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat(ptr [[PTR]], i32 [[VALUE]], i32 0)
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
+// CHECK-NEXT:    ret void
+//
+// AIX-LABEL: define void @test_unsigned_lwat(
+// AIX-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat(ptr [[PTR]], i32 [[VALUE]], i32 0)
+// AIX-NEXT:    store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
+// AIX-NEXT:    ret void
+//
+void test_unsigned_lwat(unsigned int *ptr, unsigned int value, unsigned int * resp) {
+  unsigned int res = __builtin_amo_lwat(ptr, value, 0);
+  *resp = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_unsigned_ldat(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat(ptr [[PTR]], i64 [[VALUE]], i32 3)
+// CHECK-NEXT:    store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6:![0-9]+]]
+// CHECK-NEXT:    ret void
+//
+// AIX-LABEL: define void @test_unsigned_ldat(
+// AIX-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat(ptr [[PTR]], i64 [[VALUE]], i32 3)
+// AIX-NEXT:    store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6:![0-9]+]]
+// AIX-NEXT:    ret void
+//
+void test_unsigned_ldat(unsigned long int *ptr, unsigned long int value, unsigned long int * resp) {
+  unsigned long int res = __builtin_amo_ldat(ptr, value, 3);
+  *resp = res;
+}
+//.
+// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[LONG_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK: [[META7]] = !{!"long", [[META4]], i64 0}
+//.
+// AIX: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// AIX: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// AIX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// AIX: [[META5]] = !{!"Simple C/C++ TBAA"}
+// AIX: [[LONG_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// AIX: [[META7]] = !{!"long", [[META4]], i64 0}
+//.
diff --git a/clang/test/CodeGen/PowerPC/ppc-amo-header.c b/clang/test/CodeGen/PowerPC/ppc-amo-header.c
new file mode 100644
index 0000000000000..f544cdef1e7d0
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/ppc-amo-header.c
@@ -0,0 +1,91 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu pwr9 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix -target-cpu pwr9 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+
+#include <amo.h>
+
+uint32_t test_lwat_add(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_add
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 0)
+  return amo_lwat_add(ptr, val);
+}
+
+uint32_t test_lwat_xor(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_xor
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 1)
+  return amo_lwat_xor(ptr, val);
+}
+
+uint32_t test_lwat_ior(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_ior
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 2)
+  return amo_lwat_ior(ptr, val);
+}
+
+uint32_t test_lwat_and(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_and
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 3)
+  return amo_lwat_and(ptr, val);
+}
+
+uint32_t test_lwat_umax(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_umax
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 4)
+  return amo_lwat_umax(ptr, val);
+}
+
+uint32_t test_lwat_umin(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_umin
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 6)
+  return amo_lwat_umin(ptr, val);
+}
+
+uint32_t test_lwat_swap(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_swap
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 8)
+  return amo_lwat_swap(ptr, val);
+}
+
+uint64_t test_ldat_add(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_add
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 0)
+  return amo_ldat_add(ptr, val);
+}
+
+uint64_t test_ldat_xor(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_xor
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 1)
+  return amo_ldat_xor(ptr, val);
+}
+
+uint64_t test_ldat_ior(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_ior
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 2)
+  return amo_ldat_ior(ptr, val);
+}
+
+uint64_t test_ldat_and(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_and
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 3)
+  return amo_ldat_and(ptr, val);
+}
+
+uint64_t test_ldat_umax(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_umax
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 4)
+  return amo_ldat_umax(ptr, val);
+}
+
+uint64_t test_ldat_umin(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_umin
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 6)
+  return amo_ldat_umin(ptr, val);
+}
+
+uint64_t test_ldat_swap(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_swap
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 8)
+  return amo_ldat_swap(ptr, val);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 636e88898a55e..54caa52f57826 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -2132,3 +2132,15 @@ let TargetPrefix = "ppc" in {
     Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
               [IntrArgMemOnly, IntrWriteMem, NoCapture<ArgIndex<2>>]>;
 }
+
+// AMO intrisics
+let TargetPrefix = "ppc" in {
+ def int_ppc_amo_lwat : ClangBuiltin<"__builtin_amo_lwat">,
+    DefaultAttrsIntrinsic<[llvm_i32_ty],[llvm_ptr_ty,
+                           llvm_i32_ty, llvm_i32_ty],
+                          [IntrArgMemOnly, ImmArg<ArgIndex<2>>]>;
+ def int_ppc_amo_ldat : ClangBuiltin<"__builtin_amo_ldat">,
+    DefaultAttrsIntrinsic<[llvm_i64_ty],[llvm_ptr_ty,
+                           llvm_i64_ty, llvm_i32_ty],
+                          [IntrArgMemOnly, ImmArg<ArgIndex<2>>]>;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index dd233e236e17f..ec5e61b724d87 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -677,6 +677,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   // To handle counter-based loop conditions.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
 
   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
@@ -11633,6 +11634,29 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   return Flags;
 }
 
+SDValue PPCTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  unsigned IntrinsicID = Op.getConstantOperandVal(1);
+
+  SDLoc dl(Op);
+  switch (IntrinsicID) {
+  case Intrinsic::ppc_amo_lwat:
+  case Intrinsic::ppc_amo_ldat:
+    SDValue Ptr = Op.getOperand(2);
+    SDValue Val1 = Op.getOperand(3);
+    SDValue FC = Op.getOperand(4);
+    SDValue Ops[] = {Ptr, Val1, FC};
+    bool IsLwat = IntrinsicID == Intrinsic::ppc_amo_lwat;
+    unsigned Opcode = IsLwat ? PPC::LWAT_PSEUDO : PPC::LDAT_PSEUDO;
+    MachineSDNode *MNode = DAG.getMachineNode(
+        Opcode, dl, {IsLwat ? MVT::i32 : MVT::i64, MVT::Other}, Ops);
+    SDValue Result = SDValue(MNode, 0);
+    SDValue OutChain = SDValue(MNode, 1);
+    return DAG.getMergeValues({Result, OutChain}, dl);
+  }
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                                                SelectionDAG &DAG) const {
   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
@@ -12803,8 +12827,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerFP_ROUND(Op, DAG);
   case ISD::ROTL:               return LowerROTL(Op, DAG);
 
-  // For counter-based loop handling.
-  case ISD::INTRINSIC_W_CHAIN:  return SDValue();
+  // For counter-based loop handling, and amo load.
+  case ISD::INTRINSIC_W_CHAIN:
+    return LowerINTRINSIC_W_CHAIN(Op, DAG);
 
   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
 
@@ -14715,6 +14740,43 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
         .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
         .addImm(0)
         .addReg(Ptr);
+  } else if (MI.getOpcode() == PPC::LWAT_PSEUDO ||
+             MI.getOpcode() == PPC::LDAT_PSEUDO) {
+    DebugLoc DL = MI.getDebugLoc();
+    Register DstReg = MI.getOperand(0).getReg();
+    Register PtrReg = MI.getOperand(1).getReg();
+    Register ValReg = MI.getOperand(2).getReg();
+    unsigned FC = MI.getOperand(3).getImm();
+    bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
+    Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
+    if (IsLwat)
+      BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64)
+          .addImm(0)
+          .addReg(ValReg)
+          .addImm(PPC::sub_32);
+    else
+      Val64 = ValReg;
+    Register Pair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
+    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Pair);
+    Register PairWithVal = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
+    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), PairWithVal)
+        .addReg(Pair)
+        .addReg(Val64)
+        .addImm(PPC::sub_gp8_x1);
+    Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
+    BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult)
+        .addReg(PairWithVal)
+        .addReg(PtrReg)
+        .addImm(FC);
+    Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
+    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
+        .addReg(PairResult, 0, PPC::sub_gp8_x0);
+    if (IsLwat)
+      BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
+          .addReg(Result64, 0, PPC::sub_32);
+    else
+      BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
+          .addReg(Result64);
   } else {
     llvm_unreachable("Unexpected instr type to insert");
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index d967018982734..839f797e2dfed 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1312,6 +1312,7 @@ namespace llvm {
                        EVT VT, SDValue V1, SDValue V2) const;
     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index fdca5ebc854ba..462535601e05e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -327,12 +327,19 @@ def LQARXL : XForm_1<31, 276, (outs g8prc:$RST), (ins (memrr $RA, $RB):$addr),
                      "lqarx $RST, $addr, 1", IIC_LdStLQARX, []>,
                      isPPC64, isRecordForm;
 
-let hasExtraDefRegAllocReq = 1 in
-def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$RST), (ins g8rc:$RA, u5imm:$RB),
+let hasExtraDefRegAllocReq = 1, mayStore = 1 in
+def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8prc:$RST), (ins g8prc:$RSTi, ptr_rc_nor0:$RA, u5imm:$RB),
                          "ldat $RST, $RA, $RB"...
[truncated]

@maryammo maryammo requested review from RolandF77 and lei137 November 12, 2025 23:52
This commit adds two Clang builtins for PowerPC AMO load operations:
- __builtin_amo_lwat for 32-bit unsigned operations
- __builtin_amo_ldat for 64-bit unsigned operations

Also adds an amo.h header that maps GCC's AMO functions to these Clang
builtins for compatibility.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:PowerPC backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category llvm:ir

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants