llvm · mmha · Mar 25, 2025 · Mar 26, 2025 · Mar 26, 2025 · erichkeane
@@ -889,6 +889,65 @@ def BinOp : CIR_Op<"binop", [Pure,
   let hasVerifier = 1;
 }
 
+
+//===----------------------------------------------------------------------===//
+// BinOpOverflowOp
+//===----------------------------------------------------------------------===//
+
+def BinOpOverflowKind : I32EnumAttr<
+    "BinOpOverflowKind",
+    "checked binary arithmetic operation kind",
+    [BinOpKind_Add, BinOpKind_Sub, BinOpKind_Mul]> {
+  let cppNamespace = "::cir";
+}
+
+def BinOpOverflowOp : CIR_Op<"binop.overflow", [Pure, SameTypeOperands]> {
+  let summary = "Perform binary integral arithmetic with overflow checking";
+  let description = [{
+    `cir.binop.overflow` performs binary arithmetic operations with overflow
+    checking on integral operands.
+
+    The `kind` argument specifies the kind of arithmetic operation to perform.
+    It can be either `add`, `sub`, or `mul`. The `lhs` and `rhs` arguments
+    specify the input operands of the arithmetic operation. The types of `lhs`
+    and `rhs` must be the same.
+
+    `cir.binop.overflow` produces two SSA values. `result` is the result of the
+    arithmetic operation truncated to its specified type. `overflow` is a
+    boolean value indicating whether overflow happens during the operation.
+
+    The exact semantic of this operation is as follows:
+
+      - `lhs` and `rhs` are promoted to an imaginary integral type that has
+        infinite precision.
+      - The arithmetic operation is performed on the promoted operands.
+      - The infinite-precision result is truncated to the type of `result`. The
+        truncated result is assigned to `result`.
+      - If the truncated result is equal to the un-truncated result, `overflow`
+        is assigned to false. Otherwise, `overflow` is assigned to true.
+  }];
+
+  let arguments = (ins Arg<BinOpOverflowKind, "arithmetic kind">:$kind,
+                       CIR_IntType:$lhs, CIR_IntType:$rhs);
+  let results = (outs CIR_IntType:$result, CIR_BoolType:$overflow);
+
+  let assemblyFormat = [{
+    `(` $kind `,` $lhs `,` $rhs `)` `:` type($lhs) `,`
+    `(` type($result) `,` type($overflow) `)`
+    attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "cir::IntType":$resultTy,
+                   "cir::BinOpOverflowKind":$kind,
+                   "mlir::Value":$lhs,
+                   "mlir::Value":$rhs), [{
+      auto overflowTy = cir::BoolType::get($_builder.getContext());
+      build($_builder, $_state, resultTy, overflowTy, kind, lhs, rhs);
+    }]>
+  ];
+}
+
 //===----------------------------------------------------------------------===//
 // GlobalOp
 //===----------------------------------------------------------------------===//

@@ -109,6 +109,7 @@ struct MissingFeatures {
   static bool cgFPOptionsRAII() { return false; }
   static bool metaDataNode() { return false; }
   static bool fastMathFlags() { return false; }
+  static bool vlas() { return false; }
-  static bool vlas() { return false; }
+  static bool variableLengthArrays() { return false; }
-  static bool vlas() { return false; }
+  static bool variableLengthArrays() { return false; }
 
   // Missing types
   static bool dataMemberType() { return false; }

@@ -936,8 +936,107 @@ getUnwidenedIntegerType(const ASTContext &astContext, const Expr *e) {
 static mlir::Value emitPointerArithmetic(CIRGenFunction &cgf,
                                          const BinOpInfo &op,
                                          bool isSubtraction) {
-  cgf.cgm.errorNYI(op.loc, "pointer arithmetic");
-  return {};
+  // Must have binary (not unary) expr here.  Unary pointer
+  // increment/decrement doesn't use this path.
+  const BinaryOperator *expr = cast<BinaryOperator>(op.e);
+
+  mlir::Value pointer = op.lhs;
+  Expr *pointerOperand = expr->getLHS();
+  mlir::Value index = op.rhs;
+  Expr *indexOperand = expr->getRHS();
+
+  // In a subtraction, the LHS is always the pointer.
+  if (!isSubtraction && !mlir::isa<cir::PointerType>(pointer.getType())) {
+    std::swap(pointer, index);
+    std::swap(pointerOperand, indexOperand);
+  }
+
+  bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType();
+
+  // Some versions of glibc and gcc use idioms (particularly in their malloc
+  // routines) that add a pointer-sized integer (known to be a pointer value)
+  // to a null pointer in order to cast the value back to an integer or as
+  // part of a pointer alignment algorithm.  This is undefined behavior, but
+  // we'd like to be able to compile programs that use it.
+  //
+  // Normally, we'd generate a GEP with a null-pointer base here in response
+  // to that code, but it's also UB to dereference a pointer created that
+  // way.  Instead (as an acknowledged hack to tolerate the idiom) we will
+  // generate a direct cast of the integer value to a pointer.
+  //
+  // The idiom (p = nullptr + N) is not met if any of the following are true:
+  //
+  //   The operation is subtraction.
+  //   The index is not pointer-sized.
+  //   The pointer type is not byte-sized.
+  //
+  if (BinaryOperator::isNullPointerArithmeticExtension(
+          cgf.getContext(), op.opcode, expr->getLHS(), expr->getRHS()))
+    return cgf.getBuilder().createIntToPtr(index, pointer.getType());
+
+  // Differently from LLVM codegen, ABI bits for index sizes is handled during
+  // LLVM lowering.
+
+  // If this is subtraction, negate the index.
+  if (isSubtraction)
+    index = cgf.getBuilder().createNeg(index);
+
+  if (cgf.sanOpts.has(SanitizerKind::ArrayBounds))
+    cgf.cgm.errorNYI("array bounds sanitizer");
+
+  const PointerType *pointerType =
+      pointerOperand->getType()->getAs<PointerType>();
+  if (!pointerType) {
+    cgf.cgm.errorNYI("ObjC");
+    return {};
+  }
+
+  QualType elementType = pointerType->getPointeeType();
+  if (const VariableArrayType *vla =
+          cgf.getContext().getAsVariableArrayType(elementType)) {
+
+    // The element count here is the total number of non-VLA elements.
+    // TODO(cir): Get correct VLA size here
+    assert(!cir::MissingFeatures::vlas());
+    mlir::Value numElements = cgf.getBuilder().getConstAPInt(
+        cgf.getLoc(op.loc), cgf.getBuilder().getUInt64Ty(), llvm::APInt(64, 0));
+
+    // GEP indexes are signed, and scaling an index isn't permitted to
+    // signed-overflow, so we use the same semantics for our explicit
+    // multiply.  We suppress this if overflow is not undefined behavior.
+    mlir::Type elemTy = cgf.convertTypeForMem(vla->getElementType());
+
+    index = cgf.getBuilder().createCast(cir::CastKind::integral, index,
+                                        numElements.getType());
+    index = cgf.getBuilder().createMul(index.getLoc(), index, numElements);
+
+    if (cgf.getLangOpts().isSignedOverflowDefined()) {
+      assert(!cir::MissingFeatures::ptrStrideOp());
+      cgf.cgm.errorNYI("pointer stride");
+    } else {
+      pointer = cgf.emitCheckedInBoundsGEP(elemTy, pointer, index, isSigned,
+                                           isSubtraction, op.e->getExprLoc());
+    }
+
+    return pointer;
+  }
+  // Explicitly handle GNU void* and function pointer arithmetic extensions. The
+  // GNU void* casts amount to no-ops since our void* type is i8*, but this is
+  // future proof.
+  mlir::Type elemTy;
+  if (elementType->isVoidType() || elementType->isFunctionType())
+    elemTy = cgf.UInt8Ty;
+  else
+    elemTy = cgf.convertTypeForMem(elementType);
+
+  if (cgf.getLangOpts().isSignedOverflowDefined()) {
+    assert(!cir::MissingFeatures::ptrStrideOp());
+    cgf.cgm.errorNYI("pointer stride");
+    return pointer;
+  }
+
+  return cgf.emitCheckedInBoundsGEP(elemTy, pointer, index, isSigned,
+                                    isSubtraction, op.e->getExprLoc());
 }
 
 mlir::Value ScalarExprEmitter::emitMul(const BinOpInfo &ops) {
@@ -1106,7 +1205,7 @@ mlir::Value ScalarExprEmitter::emitSub(const BinOpInfo &ops) {
   // See more in `EmitSub` in CGExprScalar.cpp.
   assert(!cir::MissingFeatures::ptrDiffOp());
   cgf.cgm.errorNYI("ptrdiff");
-  return {};
+  return cgf.createDummyValue(loc, ops.fullType);
 }
 
 mlir::Value ScalarExprEmitter::emitShl(const BinOpInfo &ops) {
@@ -1402,3 +1501,23 @@ mlir::Value CIRGenFunction::emitScalarPrePostIncDec(const UnaryOperator *e,
   return ScalarExprEmitter(*this, builder)
       .emitScalarPrePostIncDec(e, lv, isInc, isPre);
 }
+
+mlir::Value CIRGenFunction::emitCheckedInBoundsGEP(
+    mlir::Type elemTy, mlir::Value ptr, ArrayRef<mlir::Value> idxList,
+    bool signedIndices, bool isSubtraction, SourceLocation loc) {
+  assert(!cir::MissingFeatures::ptrStrideOp());
+  if (idxList.size() != 1)
+    cgm.errorNYI("multi-index ptr arithmetic");
+
+  // TODO(cir): This should be a PtrStrideOp. For now we simply return the base
+  // pointer
+  mlir::Value gepVal = ptr;
+
+  // If the pointer overflow sanitizer isn't enabled, do nothing.
+  if (!sanOpts.has(SanitizerKind::PointerOverflow))
+    return gepVal;
+
+  assert(!cir::MissingFeatures::pointerOverflowSanitizer());
+  cgm.errorNYI("pointer overflow sanitizer");
+  return gepVal;
+}
@@ -223,6 +223,16 @@ class CIRGenFunction : public CIRGenTypeCache {
 
   void emitDecl(const clang::Decl &d);
 
+  /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to
+  /// detect undefined behavior when the pointer overflow sanitizer is enabled.
+  /// \p SignedIndices indicates whether any of the GEP indices are signed.
+  /// \p IsSubtraction indicates whether the expression used to form the GEP
+  /// is a subtraction.
+  mlir::Value emitCheckedInBoundsGEP(mlir::Type elemTy, mlir::Value ptr,
+                                     llvm::ArrayRef<mlir::Value> idxList,
+                                     bool signedIndices, bool isSubtraction,
+                                     SourceLocation loc);
+
   void emitScalarInit(const clang::Expr *init, mlir::Location loc,
                       LValue lvalue, bool capturedByInit = false);
 

@@ -20,6 +20,7 @@
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/BuiltinDialect.h"
 #include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/Types.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"
@@ -1117,6 +1118,122 @@ mlir::LogicalResult CIRToLLVMBinOpLowering::matchAndRewrite(
   return mlir::LogicalResult::success();
 }
 
+mlir::LogicalResult CIRToLLVMBinOpOverflowOpLowering::matchAndRewrite(
+    cir::BinOpOverflowOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Location loc = op.getLoc();
+  BinOpOverflowKind arithKind = op.getKind();
+  IntType operandTy = op.getLhs().getType();
+  IntType resultTy = op.getResult().getType();
+
+  EncompassedTypeInfo encompassedTyInfo =
+      computeEncompassedTypeWidth(operandTy, resultTy);
+  mlir::IntegerType encompassedLLVMTy =
+      rewriter.getIntegerType(encompassedTyInfo.width);
+
+  mlir::Value lhs = adaptor.getLhs();
+  mlir::Value rhs = adaptor.getRhs();
+  if (operandTy.getWidth() < encompassedTyInfo.width) {
+    if (operandTy.isSigned()) {
+      lhs = rewriter.create<mlir::LLVM::SExtOp>(loc, encompassedLLVMTy, lhs);
+      rhs = rewriter.create<mlir::LLVM::SExtOp>(loc, encompassedLLVMTy, rhs);
+    } else {
+      lhs = rewriter.create<mlir::LLVM::ZExtOp>(loc, encompassedLLVMTy, lhs);
+      rhs = rewriter.create<mlir::LLVM::ZExtOp>(loc, encompassedLLVMTy, rhs);
+    }
+  }
+
+  std::string intrinName = getLLVMIntrinName(arithKind, encompassedTyInfo.sign,
+                                             encompassedTyInfo.width);
+  auto intrinNameAttr = mlir::StringAttr::get(op.getContext(), intrinName);
+
+  mlir::IntegerType overflowLLVMTy = rewriter.getI1Type();
+  auto intrinRetTy = mlir::LLVM::LLVMStructType::getLiteral(
+      rewriter.getContext(), {encompassedLLVMTy, overflowLLVMTy});
+
+  auto callLLVMIntrinOp = rewriter.create<mlir::LLVM::CallIntrinsicOp>(
+      loc, intrinRetTy, intrinNameAttr, mlir::ValueRange{lhs, rhs});
+  mlir::Value intrinRet = callLLVMIntrinOp.getResult(0);
+
+  mlir::Value result = rewriter
+                           .create<mlir::LLVM::ExtractValueOp>(
+                               loc, intrinRet, ArrayRef<int64_t>{0})
+                           .getResult();
+  mlir::Value overflow = rewriter
+                             .create<mlir::LLVM::ExtractValueOp>(
+                                 loc, intrinRet, ArrayRef<int64_t>{1})
+                             .getResult();
+
+  if (resultTy.getWidth() < encompassedTyInfo.width) {
+    mlir::Type resultLLVMTy = getTypeConverter()->convertType(resultTy);
+    auto truncResult =
+        rewriter.create<mlir::LLVM::TruncOp>(loc, resultLLVMTy, result);
+
+    // Extend the truncated result back to the encompassing type to check for
+    // any overflows during the truncation.
+    mlir::Value truncResultExt;
+    if (resultTy.isSigned())
+      truncResultExt = rewriter.create<mlir::LLVM::SExtOp>(
+          loc, encompassedLLVMTy, truncResult);
+    else
+      truncResultExt = rewriter.create<mlir::LLVM::ZExtOp>(
+          loc, encompassedLLVMTy, truncResult);
+    auto truncOverflow = rewriter.create<mlir::LLVM::ICmpOp>(
+        loc, mlir::LLVM::ICmpPredicate::ne, truncResultExt, result);
+
+    result = truncResult;
+    overflow = rewriter.create<mlir::LLVM::OrOp>(loc, overflow, truncOverflow);
+  }
+
+  mlir::Type boolLLVMTy =
+      getTypeConverter()->convertType(op.getOverflow().getType());
+  if (boolLLVMTy != rewriter.getI1Type())
+    overflow = rewriter.create<mlir::LLVM::ZExtOp>(loc, boolLLVMTy, overflow);
+
+  rewriter.replaceOp(op, mlir::ValueRange{result, overflow});
+
+  return mlir::success();
+}
+
+std::string CIRToLLVMBinOpOverflowOpLowering::getLLVMIntrinName(
+    cir::BinOpOverflowKind opKind, bool isSigned, unsigned width) {
+  // The intrinsic name is `@llvm.{s|u}{opKind}.with.overflow.i{width}`
+
+  std::string name = "llvm.";
+
+  if (isSigned)
+    name.push_back('s');
+  else
+    name.push_back('u');
+
+  switch (opKind) {
+  case cir::BinOpOverflowKind::Add:
+    name.append("add.");
+    break;
+  case cir::BinOpOverflowKind::Sub:
+    name.append("sub.");
+    break;
+  case cir::BinOpOverflowKind::Mul:
+    name.append("mul.");
+    break;
+  }
+
+  name.append("with.overflow.i");
+  name.append(std::to_string(width));
+
+  return name;
+}
+
+CIRToLLVMBinOpOverflowOpLowering::EncompassedTypeInfo
+CIRToLLVMBinOpOverflowOpLowering::computeEncompassedTypeWidth(
+    cir::IntType operandTy, cir::IntType resultTy) {
+  bool sign = operandTy.getIsSigned() || resultTy.getIsSigned();
+  unsigned width =
+      std::max(operandTy.getWidth() + (sign && operandTy.isUnsigned()),
+               resultTy.getWidth() + (sign && resultTy.isUnsigned()));
+  return {sign, width};
+}
+
 static void prepareTypeConverter(mlir::LLVMTypeConverter &converter,
                                  mlir::DataLayout &dataLayout) {
   converter.addConversion([&](cir::PointerType type) -> mlir::Type {
@@ -1256,6 +1373,7 @@ void ConvertCIRToLLVMPass::runOnOperation() {
   patterns.add<
       // clang-format off
                CIRToLLVMBinOpLowering,
+               CIRToLLVMBinOpOverflowOpLowering,
                CIRToLLVMBrCondOpLowering,
                CIRToLLVMBrOpLowering,
                CIRToLLVMFuncOpLowering,