From ac0bc20c94738a473522a1c7b7ea37ab180a769a Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Mon, 20 Oct 2025 11:02:06 -0700 Subject: [PATCH 01/38] [OpenACC][CIR] Reduction combiner lowering for min/max (#163656) These two are lowered as if they are the expression: LHS = (LHS < RHS ) ? RHS : LHS; and LHS = (LHS < RHS ) ? LHS : RHS; This patch generates these expressions and ensures they are properly emitted into IR. Note: this is dependent on https://github.com/llvm/llvm-project/pull/163580 and cannot be merged until that one is (or the tests will fail). --- .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp | 7 +- clang/lib/Sema/SemaOpenACC.cpp | 93 ++- .../combined-reduction-clause-default-ops.cpp | 536 ++++++++++++++++- .../combined-reduction-clause-float.cpp | 164 +++++- .../combined-reduction-clause-inline-ops.cpp | 142 ++++- .../combined-reduction-clause-int.cpp | 164 +++++- .../combined-reduction-clause-outline-ops.cpp | 144 ++++- .../compute-reduction-clause-default-ops.c | 557 +++++++++++++++++- .../compute-reduction-clause-default-ops.cpp | 536 ++++++++++++++++- .../compute-reduction-clause-float.c | 165 +++++- .../compute-reduction-clause-float.cpp | 163 ++++- .../compute-reduction-clause-inline-ops.cpp | 142 ++++- .../compute-reduction-clause-int.c | 164 +++++- .../compute-reduction-clause-int.cpp | 164 +++++- .../compute-reduction-clause-outline-ops.cpp | 142 ++++- .../compute-reduction-clause-unsigned-int.c | 164 +++++- .../loop-reduction-clause-default-ops.cpp | 536 ++++++++++++++++- .../loop-reduction-clause-float.cpp | 164 +++++- .../loop-reduction-clause-inline-ops.cpp | 142 ++++- .../loop-reduction-clause-int.cpp | 164 +++++- .../loop-reduction-clause-outline-ops.cpp | 142 ++++- .../combined-construct-reduction-clause.cpp | 8 +- .../compute-construct-reduction-clause.c | 18 +- .../compute-construct-reduction-clause.cpp | 17 +- 25 files changed, 4504 insertions(+), 137 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 22de85d90a3cf..5ff4cc4b833d9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13676,6 +13676,9 @@ def err_acc_reduction_recipe_no_op "not have a valid operation available">; def note_acc_reduction_recipe_noop_field : Note<"while forming combiner for compound type %0">; +def note_acc_reduction_combiner_forming + : Note<"while forming %select{|binary operator '%1'|conditional " + "operator|final assignment operator}0">; // AMDGCN builtins diagnostics def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">; diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp index f638d391d55cd..be063033ddcfc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp @@ -590,15 +590,18 @@ void OpenACCRecipeBuilderBase::createReductionRecipeCombiner( } else { // else we have to handle each individual field after after a // get-element. + const CIRGenRecordLayout &layout = + cgf.cgm.getTypes().getCIRGenRecordLayout(rd); for (const auto &[field, combiner] : llvm::zip_equal(rd->fields(), combinerRecipes)) { mlir::Type fieldType = cgf.convertType(field->getType()); auto fieldPtr = cir::PointerType::get(fieldType); + unsigned fieldIndex = layout.getCIRFieldNo(field); mlir::Value lhsField = builder.createGetMember( - loc, fieldPtr, lhsArg, field->getName(), field->getFieldIndex()); + loc, fieldPtr, lhsArg, field->getName(), fieldIndex); mlir::Value rhsField = builder.createGetMember( - loc, fieldPtr, rhsArg, field->getName(), field->getFieldIndex()); + loc, fieldPtr, rhsArg, field->getName(), fieldIndex); emitSingleCombiner(lhsField, rhsField, combiner); } diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index ca99834ce8266..3bb8080f6e72c 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -2996,6 +2996,8 @@ bool SemaOpenACC::CreateReductionCombinerRecipe( case OpenACCReductionOperator::Max: case OpenACCReductionOperator::Min: + BinOp = BinaryOperatorKind::BO_LT; + break; case OpenACCReductionOperator::And: case OpenACCReductionOperator::Or: // We just want a 'NYI' error in the backend, so leave an empty combiner @@ -3011,26 +3013,80 @@ bool SemaOpenACC::CreateReductionCombinerRecipe( assert(!VarTy->isArrayType() && "Only 1 level of array allowed"); + enum class CombinerFailureKind { + None = 0, + BinOp = 1, + Conditional = 2, + Assignment = 3, + }; + + auto genCombiner = [&, this](DeclRefExpr *LHSDRE, DeclRefExpr *RHSDRE) + -> std::pair { + ExprResult BinOpRes = + SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, BinOp, LHSDRE, RHSDRE, + /*ForFoldExpr=*/false); + switch (ReductionOperator) { + case OpenACCReductionOperator::Addition: + case OpenACCReductionOperator::Multiplication: + case OpenACCReductionOperator::BitwiseAnd: + case OpenACCReductionOperator::BitwiseOr: + case OpenACCReductionOperator::BitwiseXOr: + // These 5 are simple and are being done as compound operators, so we can + // immediately quit here. + return {BinOpRes, BinOpRes.isUsable() ? CombinerFailureKind::None + : CombinerFailureKind::BinOp}; + case OpenACCReductionOperator::Max: + case OpenACCReductionOperator::Min: { + // These are done as: + // LHS = (LHS < RHS) ? LHS : RHS; and LHS = (LHS < RHS) ? RHS : LHS; + // + // The BinOpRes should have been created with the less-than, so we just + // have to build the conditional and assignment. + if (!BinOpRes.isUsable()) + return {BinOpRes, CombinerFailureKind::BinOp}; + + // Create the correct conditional operator, swapping the results + // (true/false value) depending on min/max. + ExprResult CondRes; + if (ReductionOperator == OpenACCReductionOperator::Min) + CondRes = SemaRef.ActOnConditionalOp(Loc, Loc, BinOpRes.get(), LHSDRE, + RHSDRE); + else + CondRes = SemaRef.ActOnConditionalOp(Loc, Loc, BinOpRes.get(), RHSDRE, + LHSDRE); + + if (!CondRes.isUsable()) + return {CondRes, CombinerFailureKind::Conditional}; + + // Build assignment. + ExprResult Assignment = SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, + BinaryOperatorKind::BO_Assign, + LHSDRE, CondRes.get(), + /*ForFoldExpr=*/false); + return {Assignment, Assignment.isUsable() + ? CombinerFailureKind::None + : CombinerFailureKind::Assignment}; + } + case OpenACCReductionOperator::And: + case OpenACCReductionOperator::Or: + llvm_unreachable("And/Or not implemented, but should fail earlier"); + case OpenACCReductionOperator::Invalid: + llvm_unreachable("Invalid should have been caught above"); + } + }; + auto tryCombiner = [&, this](DeclRefExpr *LHSDRE, DeclRefExpr *RHSDRE, bool IncludeTrap) { - // TODO: OpenACC: we have to figure out based on the bin-op how to do the - // ones that we can't just use compound operators for. So &&, ||, max, and - // min aren't really clear what we could do here. if (IncludeTrap) { // Trap all of the errors here, we'll emit our own at the end. Sema::TentativeAnalysisScope Trap{SemaRef}; - - return SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, BinOp, LHSDRE, - RHSDRE, - /*ForFoldExpr=*/false); - } else { - return SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, BinOp, LHSDRE, - RHSDRE, - /*ForFoldExpr=*/false); + return genCombiner(LHSDRE, RHSDRE); } + return genCombiner(LHSDRE, RHSDRE); }; struct CombinerAttemptTy { + CombinerFailureKind FailKind; VarDecl *LHS; DeclRefExpr *LHSDRE; VarDecl *RHS; @@ -3058,9 +3114,11 @@ bool SemaOpenACC::CreateReductionCombinerRecipe( RHSDecl->getBeginLoc()}, Ty, clang::VK_LValue, RHSDecl, nullptr, NOUR_None); - ExprResult BinOpResult = tryCombiner(LHSDRE, RHSDRE, /*IncludeTrap=*/true); + std::pair BinOpResult = + tryCombiner(LHSDRE, RHSDRE, /*IncludeTrap=*/true); - return {LHSDecl, LHSDRE, RHSDecl, RHSDRE, BinOpResult.get()}; + return {BinOpResult.second, LHSDecl, LHSDRE, RHSDecl, RHSDRE, + BinOpResult.first.get()}; }; CombinerAttemptTy TopLevelCombinerInfo = formCombiner(VarTy); @@ -3081,12 +3139,20 @@ bool SemaOpenACC::CreateReductionCombinerRecipe( } } + auto EmitFailureNote = [&](CombinerFailureKind CFK) { + if (CFK == CombinerFailureKind::BinOp) + return Diag(Loc, diag::note_acc_reduction_combiner_forming) + << CFK << BinaryOperator::getOpcodeStr(BinOp); + return Diag(Loc, diag::note_acc_reduction_combiner_forming) << CFK; + }; + // Since the 'root' level didn't fail, the only thing that could be successful // is a struct that we decompose on its individual fields. RecordDecl *RD = VarTy->getAsRecordDecl(); if (!RD) { Diag(Loc, diag::err_acc_reduction_recipe_no_op) << VarTy; + EmitFailureNote(TopLevelCombinerInfo.FailKind); tryCombiner(TopLevelCombinerInfo.LHSDRE, TopLevelCombinerInfo.RHSDRE, /*IncludeTrap=*/false); return true; @@ -3098,6 +3164,7 @@ bool SemaOpenACC::CreateReductionCombinerRecipe( if (!FieldCombinerInfo.Op || FieldCombinerInfo.Op->containsErrors()) { Diag(Loc, diag::err_acc_reduction_recipe_no_op) << FD->getType(); Diag(FD->getBeginLoc(), diag::note_acc_reduction_recipe_noop_field) << RD; + EmitFailureNote(FieldCombinerInfo.FailKind); tryCombiner(FieldCombinerInfo.LHSDRE, FieldCombinerInfo.RHSDRE, /*IncludeTrap=*/false); return true; diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp index ee4fffef971e0..c1c2e4b715365 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp @@ -161,7 +161,78 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -188,7 +259,78 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -724,7 +866,100 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -826,7 +1061,100 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -1510,6 +1838,106 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -1558,6 +1986,106 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp index 472e4ac0bb05b..853f345e53ddf 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp @@ -46,7 +46,17 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -59,7 +69,17 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -227,7 +247,38 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -258,7 +309,38 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -484,7 +566,42 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -520,7 +637,42 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp index 112ff6567e26e..67e8460649f7e 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp @@ -17,7 +17,7 @@ struct HasOperatorsInline { bool &operator&&(HasOperatorsInline& other); bool &operator||(HasOperatorsInline& other); // For min/max - HasOperatorsInline &operator<(HasOperatorsInline& other); + bool operator<(HasOperatorsInline& other); HasOperatorsInline &operator=(HasOperatorsInline& other); }; @@ -110,7 +110,13 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -119,7 +125,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr reduction_operator init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr reduction_operator init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr -> !cir.ptr @@ -141,7 +147,13 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -631,7 +643,34 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -753,7 +792,34 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -1528,6 +1594,38 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1605,6 +1703,38 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp index 7eaa822b86e9b..d74de8220225a 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp @@ -47,7 +47,17 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -60,7 +70,17 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -280,7 +300,38 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -311,7 +362,38 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -700,7 +782,42 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -736,7 +853,42 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp index c2c0c77526f6d..a6df6c03f5c8e 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp @@ -7,7 +7,7 @@ struct HasOperatorsOutline { bool b; ~HasOperatorsOutline(); -HasOperatorsOutline &operator=(const HasOperatorsOutline &); + HasOperatorsOutline &operator=(const HasOperatorsOutline &); }; HasOperatorsOutline &operator+=(HasOperatorsOutline &, HasOperatorsOutline &); @@ -18,7 +18,7 @@ HasOperatorsOutline &operator^=(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator&&(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator||(HasOperatorsOutline &, HasOperatorsOutline &); // For min/max -HasOperatorsOutline &operator<(HasOperatorsOutline &, HasOperatorsOutline &); +bool operator<(HasOperatorsOutline &, HasOperatorsOutline &); template void acc_combined() { @@ -109,7 +109,13 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -118,7 +124,7 @@ void acc_combined() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc parallel loop reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr reduction_operator init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr reduction_operator init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr -> !cir.ptr @@ -140,7 +146,13 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -630,7 +642,34 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -752,7 +791,34 @@ void acc_combined() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -1527,6 +1593,38 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1604,6 +1702,38 @@ void acc_combined() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c index b439623e7d050..d65d5d4add0ac 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c @@ -160,7 +160,81 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -187,7 +261,81 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -710,7 +858,103 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -812,7 +1056,104 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -1467,6 +1808,110 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -1515,6 +1960,110 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast bool_to_int %[[RESULT]] : !cir.bool -> !s32i +// CHECK-NEXT: cir.yield %[[RES_CAST]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: %[[RES_CAST:.*]] = cir.cast int_to_bool %[[TERNARY]] : !s32i -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[RES_CAST]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp index f99790277e5b7..f32fa2d2d6372 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp @@ -161,7 +161,78 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -188,7 +259,78 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -724,7 +866,100 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -826,7 +1061,100 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -1510,6 +1838,106 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -1558,6 +1986,106 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c index 3e4583f261dae..9f7336727e5a9 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c @@ -45,7 +45,18 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -58,7 +69,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -226,7 +247,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -257,7 +309,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -483,7 +566,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -519,7 +637,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp index 833cfad0708a1..ffd26319e9bfc 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp @@ -47,7 +47,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -60,7 +70,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -228,7 +248,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -259,7 +310,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -485,7 +567,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -521,6 +638,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp index ec4372d352626..1e367ee37a30d 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp @@ -17,7 +17,7 @@ struct HasOperatorsInline { bool &operator&&(HasOperatorsInline& other); bool &operator||(HasOperatorsInline& other); // For min/max - HasOperatorsInline &operator<(HasOperatorsInline& other); + bool operator<(HasOperatorsInline& other); HasOperatorsInline &operator=(HasOperatorsInline& other); }; @@ -110,7 +110,13 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -119,7 +125,7 @@ void acc_compute() { // CHECK-NEXT: } ; #pragma acc parallel reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr reduction_operator init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr reduction_operator init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr -> !cir.ptr @@ -141,7 +147,13 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -631,7 +643,34 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -753,7 +792,34 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -1528,6 +1594,38 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1605,6 +1703,38 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c index 0cee5c6b17903..2f42a5c63f149 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c @@ -46,7 +46,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -59,7 +69,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -279,7 +299,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -310,7 +361,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -699,7 +781,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -735,7 +852,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp index 822dd9f62cc2c..af7bcf3770fe1 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp @@ -47,7 +47,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -60,7 +70,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -280,7 +300,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -311,7 +362,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -700,7 +782,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -736,7 +853,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp index 873bf5120ed88..ec890e2b1de65 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp @@ -18,7 +18,7 @@ HasOperatorsOutline &operator^=(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator&&(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator||(HasOperatorsOutline &, HasOperatorsOutline &); // For min/max -HasOperatorsOutline &operator<(HasOperatorsOutline &, HasOperatorsOutline &); +bool operator<(HasOperatorsOutline &, HasOperatorsOutline &); template void acc_compute() { @@ -109,7 +109,13 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -118,7 +124,7 @@ void acc_compute() { // CHECK-NEXT: } ; #pragma acc parallel reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr reduction_operator init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr reduction_operator init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr -> !cir.ptr @@ -140,7 +146,13 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -630,7 +642,34 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -752,7 +791,34 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -1527,6 +1593,38 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1604,6 +1702,38 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c index b2d13628490f8..08daa702c47f8 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c @@ -46,7 +46,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !u32i, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -59,7 +69,17 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !u32i, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } ; @@ -278,7 +298,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !u32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -309,7 +360,38 @@ void acc_compute() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !u32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -698,7 +780,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !u32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; @@ -734,7 +851,42 @@ void acc_compute() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !u32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } ; diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp index 349e0fbc33a74..1a77c0f10a144 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp @@ -161,7 +161,78 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -188,7 +259,78 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHSARG]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -724,7 +866,100 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -826,7 +1061,100 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -1510,6 +1838,106 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); @@ -1558,6 +1986,106 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][0] {name = "i"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !s32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][1] {name = "u"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !u32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !u32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !u32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !u32i, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][2] {name = "f"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.float, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][3] {name = "d"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.double, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.double +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.double +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.double, !cir.ptr +// +// CHECK-NEXT: %[[GET_MEM_LHS:.*]] = cir.get_member %[[LHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[GET_MEM_RHS:.*]] = cir.get_member %[[RHS_STRIDE]][4] {name = "b"} : !cir.ptr -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[LHS_CAST:.*]] = cir.cast bool_to_int %[[LHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: %[[RHS_CAST:.*]] = cir.cast bool_to_int %[[RHS_LOAD]] : !cir.bool -> !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_CAST]], %[[RHS_CAST]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_LHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[GET_MEM_RHS]] : !cir.ptr, !cir.bool +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.bool +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[GET_MEM_LHS]] : !cir.bool, !cir.ptr +// +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i = 0; i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp index 8d9269bafc2fd..7faef7111a9c8 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp @@ -47,7 +47,17 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -60,7 +70,17 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !cir.float, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -228,7 +248,38 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -259,7 +310,38 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -485,7 +567,42 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -521,7 +638,42 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !cir.float, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !cir.float +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !cir.float, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp index 1c895156b6171..43c9fbbce7533 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp @@ -17,7 +17,7 @@ struct HasOperatorsInline { bool &operator&&(HasOperatorsInline& other); bool &operator||(HasOperatorsInline& other); // For min/max - HasOperatorsInline &operator<(HasOperatorsInline& other); + bool operator<(HasOperatorsInline& other); HasOperatorsInline &operator=(HasOperatorsInline& other); }; @@ -110,7 +110,13 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -119,7 +125,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr reduction_operator init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS18HasOperatorsInline : !cir.ptr reduction_operator init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsInline, !cir.ptr, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr -> !cir.ptr @@ -141,7 +147,13 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -631,7 +643,34 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -753,7 +792,34 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -1528,6 +1594,38 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1605,6 +1703,38 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZN18HasOperatorsInlineltERS_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN18HasOperatorsInlineaSERS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp index 72e9d1f44e825..5353218866d47 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp @@ -47,7 +47,17 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -60,7 +70,17 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHSARG]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHSARG]] : !s32i, !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -280,7 +300,38 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -311,7 +362,38 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[MAX_IDX:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[ITR_CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[MAX_IDX]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[ITR_CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -700,7 +782,42 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); @@ -736,7 +853,42 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LHS_LOAD:.*]] = cir.load {{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[RHS_LOAD:.*]] = cir.load {{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[LHS_LOAD]], %[[RHS_LOAD]]) : !s32i, !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[CMP]], true { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[LHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }, false { +// CHECK-NEXT: %[[RESULT:.*]] = cir.load{{.*}} %[[RHS_STRIDE]] : !cir.ptr, !s32i +// CHECK-NEXT: cir.yield %[[RESULT]] +// CHECK-NEXT: }) : (!cir.bool) -> !s32i +// CHECK-NEXT: cir.store{{.*}} %[[TERNARY]], %[[LHS_STRIDE]] : !s32i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } for(int i=0;i < 5; ++i); diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp index a36d41c1490f6..e193cfa1a5ab2 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp @@ -18,7 +18,7 @@ HasOperatorsOutline &operator^=(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator&&(HasOperatorsOutline &, HasOperatorsOutline &); bool &operator||(HasOperatorsOutline &, HasOperatorsOutline &); // For min/max -HasOperatorsOutline &operator<(HasOperatorsOutline &, HasOperatorsOutline &); +bool operator<(HasOperatorsOutline &, HasOperatorsOutline &); template void acc_loop() { @@ -109,7 +109,13 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -118,7 +124,7 @@ void acc_loop() { // CHECK-NEXT: } for(int i=0;i < 5; ++i); #pragma acc loop reduction(min:someVar) -// CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr reduction_operator init { +// CHECK: acc.reduction.recipe @reduction_min__ZTS19HasOperatorsOutline : !cir.ptr reduction_operator init { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr{{.*}}) // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !rec_HasOperatorsOutline, !cir.ptr, ["openacc.reduction.init", init] // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[ALLOCA]][0] {name = "i"} : !cir.ptr -> !cir.ptr @@ -140,7 +146,13 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr {{.*}}, %[[RHSARG:.*]]: !cir.ptr {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHSARG]], %[[RHSARG]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHSARG]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHSARG]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHSARG]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr {{.*}}, %[[ARG:.*]]: !cir.ptr {{.*}}): @@ -630,7 +642,34 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -752,7 +791,34 @@ void acc_loop() { // // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}) -// TODO OpenACC: Expecting combination operation here +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !s64i, !cir.ptr, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[END_VAL:.*]] = cir.const #cir.int<5> : !s64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[END_VAL]]) : !s64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !s64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !s64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !s64i, !s64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !s64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}): @@ -1527,6 +1593,38 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): @@ -1604,6 +1702,38 @@ void acc_loop() { // CHECK-NEXT: acc.yield // CHECK-NEXT: } combiner { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[LHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[LHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[LHS_STRIDE:.*]] = cir.ptr_stride %[[LHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[RHS_DECAY:.*]] = cir.cast array_to_ptrdecay %[[RHSARG]] : !cir.ptr> -> !cir.ptr +// CHECK-NEXT: %[[RHS_STRIDE:.*]] = cir.ptr_stride %[[RHS_DECAY]], %[[ITR_LOAD]] : (!cir.ptr, !u64i) -> !cir.ptr +// CHECK-NEXT: %[[LT:.*]] = cir.call @_ZltR19HasOperatorsOutlineS0_(%[[LHS_STRIDE]], %[[RHS_STRIDE]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CHECK-NEXT: %[[TERNARY:.*]] = cir.ternary(%[[LT]], true { +// CHECK-NEXT: cir.yield %[[LHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }, false { +// CHECK-NEXT: cir.yield %[[RHS_STRIDE]] : !cir.ptr +// CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr +// CHECK-NEXT: cir.call @_ZN19HasOperatorsOutlineaSERKS_(%[[LHS_STRIDE]], %[[TERNARY]]) : (!cir.ptr, !cir.ptr) -> !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr> // CHECK-NEXT: } destroy { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): diff --git a/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp index 1b50336207305..c406644961942 100644 --- a/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp +++ b/clang/test/SemaOpenACC/combined-construct-reduction-clause.cpp @@ -31,10 +31,12 @@ void uses(unsigned Parm) { #pragma acc serial loop reduction(&: CoS, I, F) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-error@-2{{invalid operands to binary expression ('float' and 'float')}} - // expected-error@-3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} + // expected-note@-2{{while forming binary operator '&='}} + // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-error@-4{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-5{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-6{{while forming binary operator '&='}} + // expected-error@-7{{invalid operands to binary expression ('float' and 'float')}} for(int i = 0; i < 5; ++i); #pragma acc kernels loop reduction(min: CoS, Array[I], Array[0:I]) diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c index 96c01d0aed7ba..cb490ed940c4c 100644 --- a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c +++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c @@ -59,10 +59,12 @@ void uses(unsigned Parm) { // Vars in a reduction must be a scalar or a composite of scalars. #pragma acc parallel reduction(&: CoS, I, F) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-error@-2{{invalid operands to binary expression ('float' and 'float')}} - // expected-error@-3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} + // expected-note@-2{{while forming binary operator '&='}} + // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-error@-4{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-5{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-6{{while forming binary operator '&='}} + // expected-error@-7{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} @@ -76,15 +78,17 @@ void uses(unsigned Parm) { #pragma acc parallel reduction(&: CoS, Array[I], Array[0:I]) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-3{{while forming binary operator '&='}} + // expected-error@-4{{invalid operands to binary expression ('float' and 'float')}} while (1); struct CompositeHasComposite ChCArray[5]; - // expected-error@+6{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} + // expected-error@+7{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} - // expected-note@+4{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} - // expected-error@+3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} + // expected-note@+5{{OpenACC 'reduction' variable reference must be a scalar variable or a composite of scalars, or an array, sub-array, or element of scalar types}} + // expected-error@+4{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} + // expected-note@+2{{while forming binary operator '&='}} // expected-error@+1{{invalid operands to binary expression ('float' and 'float')}} #pragma acc parallel reduction(&: CoS, Array[I], ChCArray[0:I]) while (1); diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp index e3a487a94ec23..70dc3d6d88937 100644 --- a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp +++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp @@ -62,10 +62,12 @@ void uses(unsigned Parm) { // Vars in a reduction must be a scalar or a composite of scalars. #pragma acc parallel reduction(&: CoS, I, F) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} - // expected-error@-2{{invalid operands to binary expression ('float' and 'float')}} - // expected-error@-3{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} + // expected-note@-2{{while forming binary operator '&='}} + // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-error@-4{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-5{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-6{{while forming binary operator '&='}} + // expected-error@-7{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} @@ -78,7 +80,8 @@ void uses(unsigned Parm) { #pragma acc parallel reduction(&: CoS, Array[I], Array[0:I]) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-3{{while forming binary operator '&='}} + // expected-error@-4{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}} @@ -221,7 +224,8 @@ void TemplUses(T Parm, U CoS, V ChC) { #pragma acc parallel reduction(&: CoS, Var, Parm) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-3{{while forming binary operator '&='}} + // expected-error@-4{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+3{{invalid type 'struct CompositeOfScalars' used in OpenACC 'reduction' variable reference; type is not a scalar value}} // expected-note@#COS_FIELD{{used as field 'COS' of composite 'CompositeHasComposite'}} @@ -236,7 +240,8 @@ void TemplUses(T Parm, U CoS, V ChC) { #pragma acc parallel reduction(&: CoS, Array[Var], Array[0:Var]) // expected-error@-1{{variable of type 'float' referenced in OpenACC 'reduction' clause does not have a valid operation available}} // expected-note@#COS_FLOAT{{while forming combiner for compound type 'CompositeOfScalars'}} - // expected-error@-3{{invalid operands to binary expression ('float' and 'float')}} + // expected-note@-3{{while forming binary operator '&='}} + // expected-error@-4{{invalid operands to binary expression ('float' and 'float')}} while (1); // expected-error@+1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}} From 5c86520eb3f223de7a383ed696ace59978b98261 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 20 Oct 2025 11:05:57 -0700 Subject: [PATCH 02/38] [RISCV] Remove shiftop/shiftopw PatFrags. NFC (#164050) Replace with PatGprShiftMaskXLen/PatGprShiftMask32 or using the ShiftMaskXLen/ShiftMask32 ComplexPattern direclty in patterns. This avoids various casts that were need to make a ComplexPattern work inside of a PatFrag. --- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 24 ++++++++-------- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 34 +++++++++++------------ 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 66717b96e4bc7..7c89686ebfb3c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1511,16 +1511,16 @@ def GIShiftMask32 : GIComplexOperandMatcher, GIComplexPatternEquiv; -class shiftop - : PatFrag<(ops node:$val, node:$count), - (operator node:$val, (XLenVT (shiftMaskXLen node:$count)))>; -class shiftopw - : PatFrag<(ops node:$val, node:$count), - (operator node:$val, (i64 (shiftMask32 node:$count)))>; +class PatGprShiftMaskXLen + : Pat<(OpNode GPR:$rs1, shiftMaskXLen:$rs2), + (Inst GPR:$rs1, shiftMaskXLen:$rs2)>; +class PatGprShiftMask32 + : Pat<(OpNode GPR:$rs1, shiftMask32:$rs2), + (Inst GPR:$rs1, shiftMask32:$rs2)>; -def : PatGprGpr, SLL>; -def : PatGprGpr, SRL>; -def : PatGprGpr, SRA>; +def : PatGprShiftMaskXLen; +def : PatGprShiftMaskXLen; +def : PatGprShiftMaskXLen; // This is a special case of the ADD instruction used to facilitate the use of a // fourth operand to emit a relocation on a symbol relating to this instruction. @@ -2203,9 +2203,9 @@ def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt), def : Pat<(i64 (sra (shl GPR:$rs1, (i64 32)), uimm6gt32:$shamt)), (SRAIW GPR:$rs1, (ImmSub32 uimm6gt32:$shamt))>; -def : PatGprGpr, SLLW>; -def : PatGprGpr, SRLW>; -def : PatGprGpr, SRAW>; +def : PatGprShiftMask32; +def : PatGprShiftMask32; +def : PatGprShiftMask32; // Select W instructions if only the lower 32 bits of the result are used. def : PatGprGpr, ADDW>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 57fbaa04ec687..40d7341b745ef 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -506,8 +506,8 @@ def : Pat<(XLenVT (xor GPR:$rs1, invLogicImm:$rs2)), (XNOR GPR:$rs1, invLogicImm } // Predicates = [HasStdExtZbbOrZbkb] let Predicates = [HasStdExtZbbOrZbkb] in { -def : PatGprGpr, ROL>; -def : PatGprGpr, ROR>; +def : PatGprShiftMaskXLen; +def : PatGprShiftMaskXLen; def : PatGprImm; // There's no encoding for roli in the the 'B' extension as it can be @@ -517,29 +517,29 @@ def : Pat<(XLenVT (rotl GPR:$rs1, uimmlog2xlen:$shamt)), } // Predicates = [HasStdExtZbbOrZbkb] let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in { -def : PatGprGpr, ROLW>; -def : PatGprGpr, RORW>; +def : PatGprShiftMask32; +def : PatGprShiftMask32; def : PatGprImm; def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; } // Predicates = [HasStdExtZbbOrZbkb, IsRV64] let Predicates = [HasStdExtZbs] in { -def : Pat<(XLenVT (and (not (shiftop 1, (XLenVT GPR:$rs2))), GPR:$rs1)), - (BCLR GPR:$rs1, GPR:$rs2)>; +def : Pat<(XLenVT (and (not (shl 1, shiftMaskXLen:$rs2)), GPR:$rs1)), + (BCLR GPR:$rs1, shiftMaskXLen:$rs2)>; def : Pat<(XLenVT (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)), (BCLR GPR:$rs1, GPR:$rs2)>; -def : Pat<(XLenVT (or (shiftop 1, (XLenVT GPR:$rs2)), GPR:$rs1)), - (BSET GPR:$rs1, GPR:$rs2)>; -def : Pat<(XLenVT (xor (shiftop 1, (XLenVT GPR:$rs2)), GPR:$rs1)), - (BINV GPR:$rs1, GPR:$rs2)>; -def : Pat<(XLenVT (and (shiftop GPR:$rs1, (XLenVT GPR:$rs2)), 1)), - (BEXT GPR:$rs1, GPR:$rs2)>; - -def : Pat<(XLenVT (shiftop 1, (XLenVT GPR:$rs2))), - (BSET (XLenVT X0), GPR:$rs2)>; -def : Pat<(XLenVT (not (shiftop -1, (XLenVT GPR:$rs2)))), - (ADDI (XLenVT (BSET (XLenVT X0), GPR:$rs2)), -1)>; +def : Pat<(XLenVT (or (shl 1, shiftMaskXLen:$rs2), GPR:$rs1)), + (BSET GPR:$rs1, shiftMaskXLen:$rs2)>; +def : Pat<(XLenVT (xor (shl 1, shiftMaskXLen:$rs2), GPR:$rs1)), + (BINV GPR:$rs1, shiftMaskXLen:$rs2)>; +def : Pat<(XLenVT (and (srl GPR:$rs1, shiftMaskXLen:$rs2), 1)), + (BEXT GPR:$rs1, shiftMaskXLen:$rs2)>; + +def : Pat<(XLenVT (shl 1, shiftMaskXLen:$rs2)), + (BSET (XLenVT X0), shiftMaskXLen:$rs2)>; +def : Pat<(XLenVT (not (shl -1, shiftMaskXLen:$rs2))), + (ADDI (XLenVT (BSET (XLenVT X0), shiftMaskXLen:$rs2)), -1)>; def : Pat<(XLenVT (and GPR:$rs1, BCLRMask:$mask)), (BCLRI GPR:$rs1, BCLRMask:$mask)>; From ad582e383369fc32fffb016be68b1ba7812f2325 Mon Sep 17 00:00:00 2001 From: Michal R Date: Mon, 20 Oct 2025 20:07:55 +0200 Subject: [PATCH 03/38] [BPF] Support for `DW_TAG_variant_part` in BTF generation (#155783) Variant part, represented by `DW_TAG_variant_part` is a structure with a discriminant and different variants, from which only one can be active and valid at the same time. The discriminant is the main difference between variant parts and unions represented by `DW_TAG_union` type. Variant parts are used by Rust enums, which look like: ```rust pub enum MyEnum { First { a: u32, b: i32 }, Second(u32), } ``` This type's debug info is the following `DICompositeType` with `DW_TAG_structure_type` tag: ```llvm !4 = !DICompositeType(tag: DW_TAG_structure_type, name: "MyEnum", scope: !2, file: !5, size: 96, align: 32, flags: DIFlagPublic, elements: !6, templateParams: !16, identifier: "faba668fd9f71e9b7cf3b9ac5e8b93cb") ``` With one element being also a `DICompositeType`, but with `DW_TAG_variant_part` tag: ```llvm !6 = !{!7} !7 = !DICompositeType(tag: DW_TAG_variant_part, scope: !4, file: !5, size: 96, align: 32, elements: !8, templateParams: !16, identifier: "e4aee046fc86d111657622fdcb8c42f7", discriminator: !21) ``` Which has a discriminator: ```llvm !21 = !DIDerivedType(tag: DW_TAG_member, scope: !4, file: !5, baseType: !13, size: 32, align: 32, flags: DIFlagArtificial) ``` Which then holds different variants as `DIDerivedType` elements with `DW_TAG_member` tag: ```llvm !8 = !{!9, !17} !9 = !DIDerivedType(tag: DW_TAG_member, name: "First", scope: !7, file: !5, baseType: !10, size: 96, align: 32, extraData: i32 0) !10 = !DICompositeType(tag: DW_TAG_structure_type, name: "First", scope: !4, file: !5, size: 96, align: 32, flags: DIFlagPublic, elements: !11, templateParams: !16, identifier: "cc7748c842e275452db4205b190c8ff7") !11 = !{!12, !14} !12 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !10, file: !5, baseType: !13, size: 32, align: 32, offset: 32, flags: DIFlagPublic) !13 = !DIBasicType(name: "u32", size: 32, encoding: DW_ATE_unsigned) !14 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !10, file: !5, baseType: !15, size: 32, align: 32, offset: 64, flags: DIFlagPublic) !15 = !DIBasicType(name: "i32", size: 32, encoding: DW_ATE_signed) !16 = !{} !17 = !DIDerivedType(tag: DW_TAG_member, name: "Second", scope: !7, file: !5, baseType: !18, size: 96, align: 32, extraData: i32 1) !18 = !DICompositeType(tag: DW_TAG_structure_type, name: "Second", scope: !4, file: !5, size: 96, align: 32, flags: DIFlagPublic, elements: !19, templateParams: !16, identifier: "a2094b1381f3082d504fbd0903aa7c06") !19 = !{!20} !20 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !18, file: !5, baseType: !13, size: 32, align: 32, offset: 32, flags: DIFlagPublic) ``` BPF backend was assuming that all the elements of any `DICompositeType` have tag `DW_TAG_member` and are instances of `DIDerivedType`. However, the single element of the outer composite type `!4` has tag `DW_TAG_variant_part` and is an instance of `DICompositeType`. The unconditional call of `cast` on all elements was causing an assertion failure when any Rust code with enums was compiled to the BPF target. Fix that by: * Handling `DW_TAG_variant_part` in `visitStructType`. * Replacing unconditional call of `cast` over `DICompositeType` elements with a `switch` statement, handling both `DW_TAG_member` and `DW_TAG_variant_part` and casting the element to an appropriate type (`DIDerivedType` or `DICompositeType`). Fixes: https://github.com/llvm/llvm-project/issues/155778 --- llvm/lib/Target/BPF/BTFDebug.cpp | 112 ++++++++++++++++++---- llvm/test/CodeGen/BPF/BTF/variant-part.ll | 87 +++++++++++++++++ 2 files changed, 180 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/BTF/variant-part.ll diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index ba4b48990c647..9b5fc9d05e336 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -14,6 +14,7 @@ #include "BPF.h" #include "BPFCORE.h" #include "MCTargetDesc/BPFMCTargetDesc.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -23,6 +24,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -301,21 +303,59 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) { BTFType.NameOff = BDebug.addString(STy->getName()); + if (STy->getTag() == dwarf::DW_TAG_variant_part) { + // Variant parts might have a discriminator, which has its own memory + // location, and variants, which share the memory location afterwards. LLVM + // DI doesn't consider discriminator as an element and instead keeps + // it as a separate reference. + // To keep BTF simple, let's represent the structure as an union with + // discriminator as the first element. + // The offsets inside variant types are already handled correctly in the + // DI. + const auto *DTy = STy->getDiscriminator(); + if (DTy) { + struct BTF::BTFMember Discriminator; + + Discriminator.NameOff = BDebug.addString(DTy->getName()); + Discriminator.Offset = DTy->getOffsetInBits(); + const auto *BaseTy = DTy->getBaseType(); + Discriminator.Type = BDebug.getTypeId(BaseTy); + + Members.push_back(Discriminator); + } + } + // Add struct/union members. const DINodeArray Elements = STy->getElements(); for (const auto *Element : Elements) { struct BTF::BTFMember BTFMember; - const auto *DDTy = cast(Element); - BTFMember.NameOff = BDebug.addString(DDTy->getName()); - if (HasBitField) { - uint8_t BitFieldSize = DDTy->isBitField() ? DDTy->getSizeInBits() : 0; - BTFMember.Offset = BitFieldSize << 24 | DDTy->getOffsetInBits(); - } else { - BTFMember.Offset = DDTy->getOffsetInBits(); + switch (Element->getTag()) { + case dwarf::DW_TAG_member: { + const auto *DDTy = cast(Element); + + BTFMember.NameOff = BDebug.addString(DDTy->getName()); + if (HasBitField) { + uint8_t BitFieldSize = DDTy->isBitField() ? DDTy->getSizeInBits() : 0; + BTFMember.Offset = BitFieldSize << 24 | DDTy->getOffsetInBits(); + } else { + BTFMember.Offset = DDTy->getOffsetInBits(); + } + const auto *BaseTy = tryRemoveAtomicType(DDTy->getBaseType()); + BTFMember.Type = BDebug.getTypeId(BaseTy); + break; + } + case dwarf::DW_TAG_variant_part: { + const auto *DCTy = dyn_cast(Element); + + BTFMember.NameOff = BDebug.addString(DCTy->getName()); + BTFMember.Offset = DCTy->getOffsetInBits(); + BTFMember.Type = BDebug.getTypeId(DCTy); + break; + } + default: + llvm_unreachable("Unexpected DI tag of a struct/union element"); } - const auto *BaseTy = tryRemoveAtomicType(DDTy->getBaseType()); - BTFMember.Type = BDebug.getTypeId(BaseTy); Members.push_back(BTFMember); } } @@ -672,16 +712,28 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct, uint32_t &TypeId) { const DINodeArray Elements = CTy->getElements(); uint32_t VLen = Elements.size(); + // Variant parts might have a discriminator. LLVM DI doesn't consider it as + // an element and instead keeps it as a separate reference. But we represent + // it as an element in BTF. + if (CTy->getTag() == dwarf::DW_TAG_variant_part) { + const auto *DTy = CTy->getDiscriminator(); + if (DTy) { + visitTypeEntry(DTy); + VLen++; + } + } if (VLen > BTF::MAX_VLEN) return; // Check whether we have any bitfield members or not bool HasBitField = false; for (const auto *Element : Elements) { - auto E = cast(Element); - if (E->isBitField()) { - HasBitField = true; - break; + if (Element->getTag() == dwarf::DW_TAG_member) { + auto E = cast(Element); + if (E->isBitField()) { + HasBitField = true; + break; + } } } @@ -696,9 +748,22 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct, // Visit all struct members. int FieldNo = 0; for (const auto *Element : Elements) { - const auto Elem = cast(Element); - visitTypeEntry(Elem); - processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo); + switch (Element->getTag()) { + case dwarf::DW_TAG_member: { + const auto Elem = cast(Element); + visitTypeEntry(Elem); + processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo); + break; + } + case dwarf::DW_TAG_variant_part: { + const auto Elem = cast(Element); + visitTypeEntry(Elem); + processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo); + break; + } + default: + llvm_unreachable("Unexpected DI tag of a struct/union element"); + } FieldNo++; } } @@ -781,16 +846,25 @@ void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion, void BTFDebug::visitCompositeType(const DICompositeType *CTy, uint32_t &TypeId) { auto Tag = CTy->getTag(); - if (Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { + switch (Tag) { + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_variant_part: // Handle forward declaration differently as it does not have members. if (CTy->isForwardDecl()) visitFwdDeclType(CTy, Tag == dwarf::DW_TAG_union_type, TypeId); else visitStructType(CTy, Tag == dwarf::DW_TAG_structure_type, TypeId); - } else if (Tag == dwarf::DW_TAG_array_type) + break; + case dwarf::DW_TAG_array_type: visitArrayType(CTy, TypeId); - else if (Tag == dwarf::DW_TAG_enumeration_type) + break; + case dwarf::DW_TAG_enumeration_type: visitEnumType(CTy, TypeId); + break; + default: + llvm_unreachable("Unexpected DI tag of a composite type"); + } } bool BTFDebug::IsForwardDeclCandidate(const DIType *Base) { diff --git a/llvm/test/CodeGen/BPF/BTF/variant-part.ll b/llvm/test/CodeGen/BPF/BTF/variant-part.ll new file mode 100644 index 0000000000000..1071e618f601b --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/variant-part.ll @@ -0,0 +1,87 @@ +; RUN: llc -mtriple=bpfel -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s +; RUN: llc -mtriple=bpfeb -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s +; +; Source: +; #![no_std] +; #![no_main] +; +; pub enum MyEnum { +; First { a: u32, b: i32 }, +; Second(u32), +; } +; +; #[unsafe(no_mangle)] +; pub static X: MyEnum = MyEnum::First { a: 54, b: -23 }; +; +; #[cfg(not(test))] +; #[panic_handler] +; fn panic(_info: &core::panic::PanicInfo) -> ! { +; loop {} +; } +; Compilation flag: +; cargo +nightly rustc -Zbuild-std=core --target=bpfel-unknown-none -- --emit=llvm-bc +; llvm-extract --glob=X $(find target/ -name "*.bc" | head -n 1) -o variant-part.bc +; llvm-dis variant-part.bc -o variant-part.ll + +; ModuleID = 'variant-part.bc' +source_filename = "c0znihgkvro8hs0n88fgrtg6x" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "bpfel" + +@X = constant [12 x i8] c"\00\00\00\006\00\00\00\E9\FF\FF\FF", align 4, !dbg !0 + +!llvm.module.flags = !{!22, !23, !24, !25} +!llvm.ident = !{!26} +!llvm.dbg.cu = !{!27} + +; CHECK-BTF: [1] STRUCT 'MyEnum' size=12 vlen=1 +; CHECK-BTF-NEXT: '(anon)' type_id=3 bits_offset=0 +; CHECK-BTF-NEXT: [2] INT 'u32' size=4 bits_offset=0 nr_bits=32 encoding=(none) +; CHECK-BTF-NEXT: [3] UNION '(anon)' size=12 vlen=3 +; CHECK-BTF-NEXT: '(anon)' type_id=2 bits_offset=0 +; CHECK-BTF-NEXT: 'First' type_id=4 bits_offset=0 +; CHECK-BTF-NEXT: 'Second' type_id=6 bits_offset=0 +; CHECK-BTF-NEXT: [4] STRUCT 'First' size=12 vlen=2 +; CHECK-BTF-NEXT: 'a' type_id=2 bits_offset=32 +; CHECK-BTF-NEXT: 'b' type_id=5 bits_offset=64 +; CHECK-BTF-NEXT: [5] INT 'i32' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED +; CHECK-BTF-NEXT: [6] STRUCT 'Second' size=12 vlen=1 +; CHECK-BTF-NEXT: '__0' type_id=2 bits_offset=32 +; CHECK-BTF-NEXT: [7] VAR 'X' type_id=1, linkage=global +; CHECK-BTF-NEXT: [8] DATASEC '.rodata' size=0 vlen=1 +; CHECK-BTF-NEXT: type_id=7 offset=0 size=12 + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "X", scope: !2, file: !3, line: 10, type: !4, isLocal: false, isDefinition: true, align: 32) +!2 = !DINamespace(name: "variant_part", scope: null) +!3 = !DIFile(filename: "variant-part/src/main.rs", directory: "/tmp/variant-part", checksumkind: CSK_MD5, checksum: "b94cd53886ea8f14cbc116b36bc7dd36") +!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "MyEnum", scope: !2, file: !5, size: 96, align: 32, flags: DIFlagPublic, elements: !6, templateParams: !16, identifier: "faba668fd9f71e9b7cf3b9ac5e8b93cb") +!5 = !DIFile(filename: "", directory: "") +!6 = !{!7} +!7 = !DICompositeType(tag: DW_TAG_variant_part, scope: !4, file: !5, size: 96, align: 32, elements: !8, templateParams: !16, identifier: "e4aee046fc86d111657622fdcb8c42f7", discriminator: !21) +!8 = !{!9, !17} +!9 = !DIDerivedType(tag: DW_TAG_member, name: "First", scope: !7, file: !5, baseType: !10, size: 96, align: 32, extraData: i32 0) +!10 = !DICompositeType(tag: DW_TAG_structure_type, name: "First", scope: !4, file: !5, size: 96, align: 32, flags: DIFlagPublic, elements: !11, templateParams: !16, identifier: "cc7748c842e275452db4205b190c8ff7") +!11 = !{!12, !14} +!12 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !10, file: !5, baseType: !13, size: 32, align: 32, offset: 32, flags: DIFlagPublic) +!13 = !DIBasicType(name: "u32", size: 32, encoding: DW_ATE_unsigned) +!14 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !10, file: !5, baseType: !15, size: 32, align: 32, offset: 64, flags: DIFlagPublic) +!15 = !DIBasicType(name: "i32", size: 32, encoding: DW_ATE_signed) +!16 = !{} +!17 = !DIDerivedType(tag: DW_TAG_member, name: "Second", scope: !7, file: !5, baseType: !18, size: 96, align: 32, extraData: i32 1) +!18 = !DICompositeType(tag: DW_TAG_structure_type, name: "Second", scope: !4, file: !5, size: 96, align: 32, flags: DIFlagPublic, elements: !19, templateParams: !16, identifier: "a2094b1381f3082d504fbd0903aa7c06") +!19 = !{!20} +!20 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !18, file: !5, baseType: !13, size: 32, align: 32, offset: 32, flags: DIFlagPublic) +!21 = !DIDerivedType(tag: DW_TAG_member, scope: !4, file: !5, baseType: !13, size: 32, align: 32, flags: DIFlagArtificial) +!22 = !{i32 8, !"PIC Level", i32 2} +!23 = !{i32 7, !"PIE Level", i32 2} +!24 = !{i32 7, !"Dwarf Version", i32 4} +!25 = !{i32 2, !"Debug Info Version", i32 3} +!26 = !{!"rustc version 1.91.0-nightly (160e7623e 2025-08-26)"} +!27 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !28, producer: "clang LLVM (rustc version 1.91.0-nightly (160e7623e 2025-08-26))", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !29, splitDebugInlining: false, nameTableKind: None) +!28 = !DIFile(filename: "variant-part/src/main.rs/@/c0znihgkvro8hs0n88fgrtg6x", directory: "/tmp/variant-part") +!29 = !{!0} From c01a22363034b98a21affde44b1b45ba5d022c71 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 20 Oct 2025 11:12:26 -0700 Subject: [PATCH 04/38] [lldb] Add try_lock to SBMutex (#164109) Add `try_lock` to confirm to Lockable, which is necessary to use it with `std::scoped_lock`. --- lldb/include/lldb/API/SBMutex.h | 4 ++++ lldb/source/API/SBMutex.cpp | 9 +++++++++ lldb/unittests/API/SBMutexTest.cpp | 5 +++++ 3 files changed, 18 insertions(+) diff --git a/lldb/include/lldb/API/SBMutex.h b/lldb/include/lldb/API/SBMutex.h index 717d5f86cbc1c..826ad077f159f 100644 --- a/lldb/include/lldb/API/SBMutex.h +++ b/lldb/include/lldb/API/SBMutex.h @@ -31,6 +31,10 @@ class LLDB_API SBMutex { /// Releases ownership of this lock. void unlock() const; + /// Tries to lock the mutex. Returns immediately. On successful lock + /// acquisition returns true, otherwise returns false. + bool try_lock() const; + private: // Private constructor used by SBTarget to create the Target API mutex. // Requires a friend declaration. diff --git a/lldb/source/API/SBMutex.cpp b/lldb/source/API/SBMutex.cpp index 445076b5a9174..c7844dec658cc 100644 --- a/lldb/source/API/SBMutex.cpp +++ b/lldb/source/API/SBMutex.cpp @@ -58,3 +58,12 @@ void SBMutex::unlock() const { if (m_opaque_sp) m_opaque_sp->unlock(); } + +bool SBMutex::try_lock() const { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_sp) + return m_opaque_sp->try_lock(); + + return false; +} diff --git a/lldb/unittests/API/SBMutexTest.cpp b/lldb/unittests/API/SBMutexTest.cpp index aafad59d58c17..18dc420086d0a 100644 --- a/lldb/unittests/API/SBMutexTest.cpp +++ b/lldb/unittests/API/SBMutexTest.cpp @@ -36,11 +36,16 @@ TEST_F(SBMutexTest, LockTest) { std::future f; { lldb::SBMutex lock = target.GetAPIMutex(); + + ASSERT_TRUE(lock.try_lock()); + lock.unlock(); + std::lock_guard lock_guard(lock); ASSERT_FALSE(locked.exchange(true)); f = std::async(std::launch::async, [&]() { ASSERT_TRUE(locked); + EXPECT_FALSE(lock.try_lock()); target.BreakpointCreateByName("foo", "bar"); ASSERT_FALSE(locked); }); From c0073a9170aaa4f3504f7cdf20758176bcb14ac1 Mon Sep 17 00:00:00 2001 From: Baranov Victor Date: Mon, 20 Oct 2025 21:14:06 +0300 Subject: [PATCH 05/38] [GitHub][CI] Run clang-tidy in dedicated container (#164290) Tested in https://github.com/llvm/llvm-project/pull/160193. --- .github/workflows/pr-code-lint.yml | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/.github/workflows/pr-code-lint.yml b/.github/workflows/pr-code-lint.yml index 776ec4af9d2dc..e67b518149c2c 100644 --- a/.github/workflows/pr-code-lint.yml +++ b/.github/workflows/pr-code-lint.yml @@ -20,7 +20,7 @@ jobs: run: shell: bash container: - image: 'ghcr.io/llvm/ci-ubuntu-24.04:latest' + image: 'ghcr.io/llvm/ci-ubuntu-24.04-lint' timeout-minutes: 60 concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -31,6 +31,11 @@ jobs: with: fetch-depth: 2 + # FIXME: same as in ".github/workflows/pr-code-format.yml" + - name: Set Safe Directory + run: | + chown -R root $(pwd) + - name: Get changed files id: changed-files uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5 @@ -46,22 +51,6 @@ jobs: run: | echo "Changed files:" echo "$CHANGED_FILES" - - # The clang tidy version should always be upgraded to the first version - # of a release cycle (x.1.0) or the last version of a release cycle, or - # if there have been relevant clang-format backports. - - name: Install clang-tidy - uses: aminya/setup-cpp@a276e6e3d1db9160db5edc458e99a30d3b109949 # v1.7.1 - with: - clang-tidy: 21.1.0 - - - name: Setup Python env - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.13' - - - name: Install Python dependencies - run: python3 -m pip install -r llvm/utils/git/requirements_linting.txt # TODO: create special mapping for 'codegen' targets, for now build predefined set # TODO: add entrypoint in 'compute_projects.py' that only adds a project and its direct dependencies From a39704f496b25816fdafc8136c9eccd08df135a1 Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Mon, 20 Oct 2025 11:33:06 -0700 Subject: [PATCH 06/38] [NFC] Add myself to CODEOWNERS for AMD dialects (#164289) Having taken on a maintainer role for these dialects, make it official with a CODEOWNERS entry. --------- Co-authored-by: Jakub Kuderski --- .github/CODEOWNERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 64fb60a486a7c..3a0a291ccb24c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -53,6 +53,13 @@ /mlir/include/mlir/Interfaces/DestinationStyleOpInterface.* @matthias-springer /mlir/lib/Interfaces/DestinationStyleOpInterface.* @matthias-springer +# AMDGPU and ROCDL dialects in MLIR. +/mlir/include/mlir/Dialect/AMDGPU @krzysz00 @kuhar +/mlir/lib/Dialect/AMDGPU @krzysz00 @kuhar +/mlir/lib/Conversion/*AMDGPU* @krzysz00 @kuhar +/mlir/lib/Conversion/*ToROCDL @krzysz00 @kuhar +/mlir/include/mlir/Dialect/LLVMIR/ROCDL* @krzysz00 @kuhar + # Bufferization Dialect in MLIR. /mlir/include/mlir/Dialect/Bufferization @matthias-springer /mlir/lib/Dialect/Bufferization @matthias-springer From 894eaf481542adefde861a7e39f769f21d3f4fa4 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Mon, 20 Oct 2025 14:35:30 -0400 Subject: [PATCH 07/38] [ADT] Deprecate StringSwitch Cases with 4+ args. NFC. (#164276) Suggest the `initializer_list` overload instead. 4+ args is an arbitrary number that allows for incremental deprecation without having too update too many call sites. For more context, see https://github.com/llvm/llvm-project/pull/163117. --- llvm/include/llvm/ADT/StringSwitch.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h index 26d568298207e..2262b1162e330 100644 --- a/llvm/include/llvm/ADT/StringSwitch.h +++ b/llvm/include/llvm/ADT/StringSwitch.h @@ -98,11 +98,13 @@ class StringSwitch { return CasesImpl({S0, S1, S2}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, T Value) { return CasesImpl({S0, S1, S2, S3}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, StringLiteral S4, T Value) { return CasesImpl({S0, S1, S2, S3, S4}, Value); @@ -179,11 +181,13 @@ class StringSwitch { return CasesLowerImpl({S0, S1, S2}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, T Value) { return CasesLowerImpl({S0, S1, S2, S3}, Value); } + [[deprecated("Pass cases in std::initializer_list instead")]] StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2, StringLiteral S3, StringLiteral S4, T Value) { return CasesLowerImpl({S0, S1, S2, S3, S4}, Value); From 7e1f79c9935fe86b072119fdbb01ee25a80060e6 Mon Sep 17 00:00:00 2001 From: HighW4y2H3ll Date: Mon, 20 Oct 2025 12:10:34 -0700 Subject: [PATCH 08/38] [Bazel] Add more llvm tools (#163228) Adding llvm-ir2vec, llvm-ctxprof-util (and llvm-sim) in the Bazel configs. llvm-ctxprof-util and llvm-ir2vec are used in several LIT unit tests, and the missing binary is causing unit test failures. llvm-ctxprof-util: https://github.com/llvm/llvm-project/blob/15cde999d47c3edc7647faf5fd967f5d5d88416a/llvm/test/Analysis/CtxProfAnalysis/flatten-icp.ll#L2 llvm-ir2vec: https://github.com/llvm/llvm-project/blob/55d4e92c8821d5543469118a76fe38db866377b7/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py#L60 llvm-sim: https://github.com/llvm/llvm-project/blob/894eaf481542adefde861a7e39f769f21d3f4fa4/llvm/test/lit.cfg.py#L268 --- .../llvm-project-overlay/llvm/BUILD.bazel | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 5357a6abe0ca1..b415a367d2c19 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -5671,6 +5671,57 @@ cc_binary( ], ) +cc_binary( + name = "llvm-sim", + testonly = True, + srcs = glob([ + "tools/llvm-sim/*.cpp", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":Analysis", + ":Core", + ":IRReader", + ":Support", + ":config", + ], +) + +cc_binary( + name = "llvm-ir2vec", + testonly = True, + srcs = glob([ + "tools/llvm-ir2vec/*.cpp", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":Analysis", + ":Core", + ":IRReader", + ":Support", + ":config", + ], +) + +cc_binary( + name = "llvm-ctxprof-util", + testonly = True, + srcs = glob([ + "tools/llvm-ctxprof-util/*.cpp", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":Core", + ":Object", + ":ProfileData", + ":Support", + ":config", + ], +) + cc_binary( name = "obj2yaml", testonly = True, From 628404105b170a19273d74a5e7d9c6e3470fe58b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 20 Oct 2025 12:35:41 -0700 Subject: [PATCH 09/38] [ADT] Add llvm::countr_zero_constexpr (#164188) This patch implements llvm::countr_zero_constexpr, a constexpr version of llvm::countr_zero, in terms of llvm::popcount while making llvm::popcount a constexpr function at the same time. The new function is intended to serve as a marker. When we switch to C++20, we will most likely go through functions in llvm/ADT/bit.h and replace them with their counterparts from . With llvm::countr_zero_constexpr, we can easily replace its use with std::countr_zero. This patch reimplements ConstantLog2 in terms of the new function. --- llvm/include/llvm/ADT/bit.h | 24 +++++++++++++++++++----- llvm/include/llvm/Support/MathExtras.h | 4 +--- llvm/unittests/ADT/BitTest.cpp | 22 ++++++++++++++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h index 8b60b6998ca0b..5971b75045b6b 100644 --- a/llvm/include/llvm/ADT/bit.h +++ b/llvm/include/llvm/ADT/bit.h @@ -151,7 +151,7 @@ template >> /// Count the number of set bits in a value. /// Ex. popcount(0xF000F000) = 8 /// Returns 0 if Value is zero. -template [[nodiscard]] inline int popcount(T Value) noexcept { +template [[nodiscard]] constexpr int popcount(T Value) noexcept { static_assert(std::is_unsigned_v, "T must be an unsigned integer type"); static_assert(sizeof(T) <= 8, "T must be 8 bytes or less"); @@ -178,7 +178,23 @@ template [[nodiscard]] inline int popcount(T Value) noexcept { } /// Count number of 0's from the least significant bit to the most -/// stopping at the first 1. +/// stopping at the first 1. +/// +/// A constexpr version of countr_zero. +/// +/// Only unsigned integral types are allowed. +/// +/// Returns std::numeric_limits::digits on an input of 0. +template [[nodiscard]] constexpr int countr_zero_constexpr(T Val) { + static_assert(std::is_unsigned_v, + "Only unsigned integral types are allowed."); + // "(Val & -Val) - 1" generates a mask with all bits set up to (but not + // including) the least significant set bit of Val. + return llvm::popcount(static_cast>((Val & -Val) - 1)); +} + +/// Count number of 0's from the least significant bit to the most +/// stopping at the first 1. /// /// Only unsigned integral types are allowed. /// @@ -208,9 +224,7 @@ template [[nodiscard]] int countr_zero(T Val) { #endif } - // Fallback to popcount. "(Val & -Val) - 1" is a bitmask with all bits below - // the least significant 1 set. - return llvm::popcount(static_cast>((Val & -Val) - 1)); + return countr_zero_constexpr(Val); } /// Count number of 0's from the most significant bit to the least diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 412323354525b..9bbb8a2a30541 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -316,11 +316,9 @@ inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx, /// Valid only for positive powers of two. template constexpr size_t ConstantLog2() { static_assert(llvm::isPowerOf2_64(kValue), "Value is not a valid power of 2"); - return 1 + ConstantLog2(); + return llvm::countr_zero_constexpr(kValue); } -template <> constexpr size_t ConstantLog2<1>() { return 0; } - template LLVM_DEPRECATED("Use ConstantLog2 instead", "ConstantLog2") constexpr size_t CTLog2() { diff --git a/llvm/unittests/ADT/BitTest.cpp b/llvm/unittests/ADT/BitTest.cpp index 5b3df918e62b0..e8041bb6f7f58 100644 --- a/llvm/unittests/ADT/BitTest.cpp +++ b/llvm/unittests/ADT/BitTest.cpp @@ -286,6 +286,28 @@ TEST(BitTest, BitCeilConstexpr) { static_assert(llvm::bit_ceil_constexpr(257u) == 512); } +TEST(BitTest, CountrZeroConstexpr) { + static_assert(llvm::countr_zero_constexpr(0u) == 32); + static_assert(llvm::countr_zero_constexpr(1u) == 0); + static_assert(llvm::countr_zero_constexpr(2u) == 1); + static_assert(llvm::countr_zero_constexpr(3u) == 0); + static_assert(llvm::countr_zero_constexpr(4u) == 2); + static_assert(llvm::countr_zero_constexpr(8u) == 3); + static_assert(llvm::countr_zero_constexpr(0x80000000u) == 31); + + static_assert(llvm::countr_zero_constexpr(0ull) == 64); + static_assert(llvm::countr_zero_constexpr(1ull) == 0); + static_assert(llvm::countr_zero_constexpr(0x100000000ull) == 32); + static_assert(llvm::countr_zero_constexpr(0x8000000000000000ull) == 63); + + static_assert( + llvm::countr_zero_constexpr(std::numeric_limits::max()) == 0); + static_assert( + llvm::countr_zero_constexpr(std::numeric_limits::max()) == 0); + static_assert( + llvm::countr_zero_constexpr(std::numeric_limits::max()) == 0); +} + TEST(BitTest, CountlZero) { uint8_t Z8 = 0; uint16_t Z16 = 0; From ef87da06326e879ba74d0935673a8c18d0e69825 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Mon, 20 Oct 2025 15:41:12 -0400 Subject: [PATCH 10/38] [mlir][spirv] Remove invalid canon pattern for GL.Length (#164301) This rewrite does not preserve numerics: for example, we'd expect the maximum fp value to yield Inf instead of identity. `GL.Length` does not allow for fast math flags, so we need to remove this. Special cases (constants) can be handled via a folder if someone wants to implement one. --- .../Dialect/SPIRV/IR/SPIRVCanonicalization.td | 8 ------- .../SPIRV/IR/SPIRVGLCanonicalization.cpp | 4 ++-- .../SPIRV/Transforms/gl-canonicalize.mlir | 22 ------------------- 3 files changed, 2 insertions(+), 32 deletions(-) diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td index 39fbab8f37a2e..e8d2274d29aa0 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td @@ -75,11 +75,3 @@ def ConvertComparisonIntoClamp2_#CmpClampPair[0] : Pat< )), (CmpClampPair[1] $input, $min, $max)>; } - -//===----------------------------------------------------------------------===// -// spirv.GL.Length -> spirv.GL.FAbs -//===----------------------------------------------------------------------===// - -def ConvertGLLengthToGLFAbs : Pat< - (SPIRV_GLLengthOp SPIRV_Float:$operand), - (SPIRV_GLFAbsOp $operand)>; diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVGLCanonicalization.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVGLCanonicalization.cpp index 46acb8c156fc6..3ad8057a58dc9 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVGLCanonicalization.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVGLCanonicalization.cpp @@ -34,8 +34,8 @@ void populateSPIRVGLCanonicalizationPatterns(RewritePatternSet &results) { ConvertComparisonIntoClamp2_SPIRV_SLessThanOp, ConvertComparisonIntoClamp2_SPIRV_SLessThanEqualOp, ConvertComparisonIntoClamp2_SPIRV_ULessThanOp, - ConvertComparisonIntoClamp2_SPIRV_ULessThanEqualOp, - ConvertGLLengthToGLFAbs>(results.getContext()); + ConvertComparisonIntoClamp2_SPIRV_ULessThanEqualOp>( + results.getContext()); } } // namespace spirv } // namespace mlir diff --git a/mlir/test/Dialect/SPIRV/Transforms/gl-canonicalize.mlir b/mlir/test/Dialect/SPIRV/Transforms/gl-canonicalize.mlir index 33b877667512e..c1447b38f0a48 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/gl-canonicalize.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/gl-canonicalize.mlir @@ -177,25 +177,3 @@ func.func @clamp_ulessthanequal(%input: i32, %min: i32, %max: i32) -> i32 { // CHECK-NEXT: spirv.ReturnValue [[RES]] spirv.ReturnValue %2 : i32 } - -// ----- - -//===----------------------------------------------------------------------===// -// spirv.GL.Length -//===----------------------------------------------------------------------===// - -// CHECK-LABEL: @convert_length_into_fabs_scalar -func.func @convert_length_into_fabs_scalar(%arg0 : f32) -> f32 { - //CHECK: spirv.GL.FAbs {{%.*}} : f32 - //CHECK-NOT: spirv.GL.Length - %0 = spirv.GL.Length %arg0 : f32 -> f32 - spirv.ReturnValue %0 : f32 -} - -// CHECK-LABEL: @dont_convert_length_into_fabs_vec -func.func @dont_convert_length_into_fabs_vec(%arg0 : vector<3xf32>) -> f32 { - //CHECK: spirv.GL.Length {{%.*}} : vector<3xf32> -> f32 - //CHECK-NOT: spirv.GL.FAbs - %0 = spirv.GL.Length %arg0 : vector<3xf32> -> f32 - spirv.ReturnValue %0 : f32 -} From c683f215e56bd12167eb7bf7e77473b7fad20def Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Mon, 20 Oct 2025 15:47:46 -0400 Subject: [PATCH 11/38] [NFC][Clang][AMDGPU] Fix upstream and downstream difference (#164304) These two files were left during the upstream of the corresponding feature. --- .../lib/Headers/__clang_hip_runtime_wrapper.h | 1 + .../test/CodeGenOpenCL/amdgpu-cluster-dims.cl | 47 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h index fb0ece96e1418..19ce7a5d2c86b 100644 --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -26,6 +26,7 @@ #define __managed__ __attribute__((managed)) #define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__))) +#define __no_cluster__ __attribute__((no_cluster)) #if !defined(__cplusplus) || __cplusplus < 201103L #define nullptr NULL; diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl new file mode 100644 index 0000000000000..be822a6e55f58 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl @@ -0,0 +1,47 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 5 +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1250 -disable-llvm-passes -fno-ident -emit-llvm %s -o - | FileCheck %s + +kernel void foo(global int *p) { *p = 1; } +// CHECK: Function Attrs: convergent norecurse nounwind +// CHECK-LABEL: define dso_local amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: call void @__clang_ocl_kern_imp_foo(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: alwaysinline convergent norecurse nounwind +// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_foo( +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4, !tbaa [[TBAA12:![0-9]+]] +// CHECK-NEXT: ret void +// +//. +// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" } +// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" } +// CHECK: attributes #[[ATTR2]] = { convergent nounwind } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META2:![0-9]+]] = !{i32 2, i32 0} +// CHECK: [[META3]] = !{i32 1} +// CHECK: [[META4]] = !{!"none"} +// CHECK: [[META5]] = !{!"int*"} +// CHECK: [[META6]] = !{!""} +// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"p1 int", [[META9:![0-9]+]], i64 0} +// CHECK: [[META9]] = !{!"any pointer", [[META10:![0-9]+]], i64 0} +// CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0} +// CHECK: [[META11]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// CHECK: [[META13]] = !{!"int", [[META10]], i64 0} +//. From c375c414cb41a1e4a00f5a4673087c3430e83242 Mon Sep 17 00:00:00 2001 From: Asher Mancinelli Date: Mon, 20 Oct 2025 13:04:10 -0700 Subject: [PATCH 12/38] [mlir][python] Add Pythonic wrappers for gpu ops (#163883) Add builders on the Python side that match builders in the C++ side, add tests for launching GPU kernels and regions, and correct some small documentation mistakes. This reflects the API decisions already made in the func dialect's Python bindings and makes use of the GPU dialect's bindings work more similar to C++ interface. --- mlir/docs/Dialects/GPU.md | 2 +- mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 2 +- mlir/python/mlir/dialects/gpu/__init__.py | 184 ++++++++++++++++++++- mlir/test/python/dialects/gpu/dialect.py | 96 ++++++++++- 4 files changed, 280 insertions(+), 4 deletions(-) diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md index 8d4d2ca3e5743..c16ed57737e5b 100644 --- a/mlir/docs/Dialects/GPU.md +++ b/mlir/docs/Dialects/GPU.md @@ -121,7 +121,7 @@ func.func @main() { gpu.launch blocks(%0, %1, %2) in (%3 = %c1, %4 = %c1, %5 = %c1) threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) { - gpu.printf "Hello from %d\n" %6 : index + gpu.printf "Hello from %d\n", %6 : index gpu.terminator } return diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 987fc13e0508d..a6c6038e1e224 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -584,7 +584,7 @@ def GPU_DynamicSharedMemoryOp : GPU_Op<"dynamic_shared_memory", [Pure]> This operation provides a memref pointer to the start of dynamic shared memory, often referred to as workgroup memory. It's important to note that this dynamic shared memory needs to be allocated at kernel launch. One can - conveniently utilize `the dynamic_shared_memory_size` parameter of + conveniently utilize the `dynamic_shared_memory_size` parameter of `gpu.launch` for this purpose. Examples: diff --git a/mlir/python/mlir/dialects/gpu/__init__.py b/mlir/python/mlir/dialects/gpu/__init__.py index b14ea68938160..2fbcbb059f87a 100644 --- a/mlir/python/mlir/dialects/gpu/__init__.py +++ b/mlir/python/mlir/dialects/gpu/__init__.py @@ -6,7 +6,7 @@ from .._gpu_ops_gen import _Dialect from .._gpu_enum_gen import * from ..._mlir_libs._mlirDialectsGPU import * -from typing import Callable, Sequence, Union, Optional, List +from typing import Any, Callable, Sequence, Tuple, Union, Optional, List try: from ...ir import ( @@ -21,15 +21,24 @@ DictAttr, Attribute, DenseI32ArrayAttr, + Value, ) + from ...extras.meta import region_op + from ...extras import types as T + from ..arith import constant, ConstantOp from .._ods_common import ( get_default_loc_context as _get_default_loc_context, _cext as _ods_cext, + get_op_result_or_op_results, ) except ImportError as e: raise RuntimeError("Error loading imports from extension module") from e +def gpu_async_token(): + return Type.parse("!gpu.async.token") + + @_ods_cext.register_operation(_Dialect, replace=True) class GPUFuncOp(GPUFuncOp): __doc__ = GPUFuncOp.__doc__ @@ -151,3 +160,176 @@ def entry_block(self) -> Block: @property def arguments(self) -> Sequence[Type]: return self.function_type.value.inputs + + +def _convert_literal_to_constant(value: Union[int, ConstantOp, Value]) -> Value: + if isinstance(value, int): + return constant(T.index(), value) + elif isinstance(value, (ConstantOp, Value)): + return value + else: + raise ValueError(f"Invalid value: {value}") + + +@_ods_cext.register_operation(_Dialect, replace=True) +class LaunchFuncOp(LaunchFuncOp): + __doc__ = LaunchFuncOp.__doc__ + + def __init__( + self, + kernel: List[str], + grid_size: Tuple[Any, Any, Any], + block_size: Tuple[Any, Any, Any], + kernel_operands: Optional[List[Value]] = None, + async_dependencies: Optional[List[Value]] = None, + dynamic_shared_memory_size: Optional[Value] = None, + async_object=None, + *, + loc=None, + ip=None, + ): + if async_dependencies is None: + async_dependencies = [] + async_token = None + if len(async_dependencies): + async_token = gpu_async_token() + + grid_size_x, grid_size_y, grid_size_z = map( + _convert_literal_to_constant, grid_size + ) + block_size_x, block_size_y, block_size_z = map( + _convert_literal_to_constant, block_size + ) + + super().__init__( + async_token, + async_dependencies, + kernel, + grid_size_x, + grid_size_y, + grid_size_z, + block_size_x, + block_size_y, + block_size_z, + kernel_operands, + dynamicSharedMemorySize=dynamic_shared_memory_size, + asyncObject=async_object, + loc=loc, + ip=ip, + ) + + +def launch_func( + kernel: List[str], + grid_size: Tuple[Any, Any, Any], + block_size: Tuple[Any, Any, Any], + kernel_operands: Optional[List[Value]] = None, + async_dependencies: Optional[List[Value]] = None, + dynamic_shared_memory_size: Optional[Value] = None, + async_object=None, + *, + loc=None, + ip=None, +) -> Union[Value, List[Value], LaunchFuncOp]: + op = LaunchFuncOp( + kernel=kernel, + grid_size=grid_size, + block_size=block_size, + kernel_operands=kernel_operands, + async_dependencies=async_dependencies, + dynamic_shared_memory_size=dynamic_shared_memory_size, + async_object=async_object, + loc=loc, + ip=ip, + ) + results = op.results + if len(results) == 1: + return results[0] + elif len(results) > 1: + return results + else: + return op + + +def wait( + async_dependencies: Optional[List[Value]] = None, *, loc=None, ip=None +) -> Union[Value, List[Value], WaitOp]: + if async_dependencies is None: + async_dependencies = [] + return get_op_result_or_op_results( + WaitOp(gpu_async_token(), async_dependencies, loc=loc, ip=ip) + ) + + +@_ods_cext.register_operation(_Dialect, replace=True) +class LaunchOp(LaunchOp): + __doc__ = LaunchOp.__doc__ + + def __init__( + self, + grid_size: Tuple[Any, Any, Any], + block_size: Tuple[Any, Any, Any], + async_dependencies=None, + dynamic_shared_memory_size: Optional[Value] = None, + *, + loc=None, + ip=None, + ): + if async_dependencies is None: + async_dependencies = [] + async_token = None + if len(async_dependencies): + async_token = gpu_async_token() + grid_size_x, grid_size_y, grid_size_z = map( + _convert_literal_to_constant, grid_size + ) + block_size_x, block_size_y, block_size_z = map( + _convert_literal_to_constant, block_size + ) + + super().__init__( + async_token, + async_dependencies, + grid_size_x, + grid_size_y, + grid_size_z, + block_size_x, + block_size_y, + block_size_z, + dynamicSharedMemorySize=dynamic_shared_memory_size, + loc=loc, + ip=ip, + ) + self.regions[0].blocks.append(*[T.index() for _ in range(12)]) + + +def launch_( + grid_size: Tuple[Any, Any, Any], + block_size: Tuple[Any, Any, Any], + async_dependencies=None, + dynamic_shared_memory_size: Optional[Value] = None, + *, + loc=None, + ip=None, +): + grid_size = tuple(map(_convert_literal_to_constant, grid_size)) + block_size = tuple(map(_convert_literal_to_constant, block_size)) + launch_op = LaunchOp( + grid_size, + block_size, + async_dependencies, + dynamic_shared_memory_size, + loc=loc, + ip=ip, + ) + return launch_op + + +launch = region_op(launch_, terminator=lambda *_args: terminator()) + + +_printf = printf + + +def printf(format, *args, loc=None, ip=None): + return _printf(format=format, args=args, loc=loc, ip=ip) diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index 66c401886804c..3945c99c41091 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -2,7 +2,8 @@ from mlir.ir import * import mlir.ir as ir -import mlir.dialects.gpu as gpu +from mlir.dialects import gpu, func, arith, math +from mlir.extras import types as T import mlir.dialects.gpu.passes from mlir.passmanager import * @@ -157,3 +158,96 @@ def builder(func: gpu.GPUFuncOp) -> None: # CHECK: %[[VAL_0:.*]] = gpu.global_id x # CHECK: gpu.return # CHECK: } + + +# CHECK-LABEL: testGPULaunchFuncOp +@run +def testGPULaunchFuncOp(): + module = Module.create() + + module.operation.attributes["gpu.container_module"] = UnitAttr.get() + with InsertionPoint(module.body): + gpu_module = gpu.GPUModuleOp("gpu_module") + block = gpu_module.bodyRegion.blocks.append() + + with InsertionPoint(block): + gpu_func = gpu.GPUFuncOp( + FunctionType.get([], []), + "kernel", + body_builder=lambda func: gpu.return_([]), + kernel=True, + ) + + with InsertionPoint(module.body): + host = func.FuncOp(type=FunctionType.get([], []), name="host") + + with InsertionPoint(host.add_entry_block()): + c1 = arith.constant(T.index(), 1) + grid_sizes = (1, 1, 1) + block_sizes = (1, 1, 1) + token = gpu.wait() + token = gpu.launch_func( + async_dependencies=[token], + kernel=[gpu_module.sym_name.value, gpu_func.name.value], + grid_size=grid_sizes, + block_size=block_sizes, + kernel_operands=[], + ) + gpu.wait(async_dependencies=[token]) + func.ReturnOp([]) + + print(module) + + # CHECK-LABEL: gpu.module @gpu_module { + # CHECK: gpu.func @kernel() kernel { + # CHECK: gpu.return + # CHECK: } + # CHECK: } + + # CHECK-LABEL: func.func @host() { + # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index + # CHECK: %[[WAIT_0:.*]] = gpu.wait async + # CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index + # CHECK: %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]]) + # CHECK: %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]] + # CHECK: return + # CHECK: } + + +# CHECK-LABEL: testGPULaunchOp +@run +def testGPULaunchOp(): + module = Module.create() + + with InsertionPoint(module.body): + host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf") + + entry_block = host.add_entry_block() + with InsertionPoint(entry_block): + c1 = arith.constant(T.index(), 1) + grid_sizes = (c1, c1, c1) + block_sizes = (c1, c1, c1) + + launch = gpu.launch(grid_sizes, block_sizes) + + op = launch(lambda *args: gpu.printf("%f", args[0])) + + with InsertionPoint(entry_block): + func.ReturnOp([]) + + print(module) + + # CHECK-LABEL: func.func @gpu_printf( + # CHECK-SAME: %[[ARG0:.*]]: f32) { + # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index + # CHECK: gpu.launch blocks(%[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]]) in (%[[VAL_3:.*]] = %[[CONSTANT_0]], %[[VAL_4:.*]] = %[[CONSTANT_0]], %[[VAL_5:.*]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]]) in (%[[VAL_9:.*]] = %[[CONSTANT_0]], %[[VAL_10:.*]] = %[[CONSTANT_0]], %[[VAL_11:.*]] = %[[CONSTANT_0]]) { + # CHECK: gpu.printf "%[[VAL_12:.*]]", %[[VAL_0]] : index + # CHECK: gpu.terminator + # CHECK: } + # CHECK: return + # CHECK: } From c318f82a4ad4c91401daa177b8c8ab546901bb4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Sch=C3=A4pers?= Date: Mon, 20 Oct 2025 22:11:11 +0200 Subject: [PATCH 13/38] [clang-format] Respect ColumnLimit while aligning multiline expressions (#163863) Before the patch the added test case would indent the function and moving its second line beyond the column limit. Fixes #68122. --- clang/lib/Format/WhitespaceManager.cpp | 78 +++++++++++++++++++------- clang/unittests/Format/FormatTest.cpp | 24 ++++++++ 2 files changed, 83 insertions(+), 19 deletions(-) diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index b004d738588ab..65fc65e79fdc3 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -506,15 +506,15 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, MatchedIndices.clear(); }; - unsigned i = StartAt; - for (unsigned e = Changes.size(); i != e; ++i) { - auto &CurrentChange = Changes[i]; + unsigned I = StartAt; + for (unsigned E = Changes.size(); I != E; ++I) { + auto &CurrentChange = Changes[I]; if (CurrentChange.indentAndNestingLevel() < IndentAndNestingLevel) break; if (CurrentChange.NewlinesBefore != 0) { CommasBeforeMatch = 0; - EndOfSequence = i; + EndOfSequence = I; // Whether to break the alignment sequence because of an empty line. bool EmptyLineBreak = @@ -530,8 +530,8 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, // A new line starts, re-initialize line status tracking bools. // Keep the match state if a string literal is continued on this line. - if (i == 0 || CurrentChange.Tok->isNot(tok::string_literal) || - Changes[i - 1].Tok->isNot(tok::string_literal)) { + if (I == 0 || CurrentChange.Tok->isNot(tok::string_literal) || + Changes[I - 1].Tok->isNot(tok::string_literal)) { FoundMatchOnLine = false; } LineIsComment = true; @@ -547,8 +547,8 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, IndentAndNestingLevel) { // Call AlignTokens recursively, skipping over this scope block. const auto StoppedAt = - AlignTokens(Style, Matches, Changes, i, ACS, RightJustify); - i = StoppedAt - 1; + AlignTokens(Style, Matches, Changes, I, ACS, RightJustify); + I = StoppedAt - 1; continue; } } @@ -559,7 +559,7 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, // If there is more than one matching token per line, or if the number of // preceding commas, do not match anymore, end the sequence. if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch) { - MatchedIndices.push_back(i); + MatchedIndices.push_back(I); AlignCurrentSequence(); } @@ -567,29 +567,69 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, FoundMatchOnLine = true; if (StartOfSequence == 0) - StartOfSequence = i; + StartOfSequence = I; unsigned ChangeWidthLeft = CurrentChange.StartOfTokenColumn; unsigned ChangeWidthAnchor = 0; unsigned ChangeWidthRight = 0; + unsigned CurrentChangeWidthRight = 0; if (RightJustify) if (ACS.PadOperators) ChangeWidthAnchor = CurrentChange.TokenLength; else ChangeWidthLeft += CurrentChange.TokenLength; else - ChangeWidthRight = CurrentChange.TokenLength; - for (unsigned j = i + 1; j != e && Changes[j].NewlinesBefore == 0; ++j) { - ChangeWidthRight += Changes[j].Spaces; + CurrentChangeWidthRight = CurrentChange.TokenLength; + const FormatToken *MatchingParenToEncounter = nullptr; + for (unsigned J = I + 1; + J != E && (Changes[J].NewlinesBefore == 0 || MatchingParenToEncounter); + ++J) { + const auto &Change = Changes[J]; + const auto *Tok = Change.Tok; + + if (Tok->MatchingParen) { + if (Tok->isOneOf(tok::l_paren, tok::l_brace, tok::l_square, + TT_TemplateOpener) && + !MatchingParenToEncounter) { + // If the next token is on the next line, we probably don't need to + // check the following lengths, because it most likely isn't aligned + // with the rest. + if (J + 1 != E && Changes[J + 1].NewlinesBefore == 0) + MatchingParenToEncounter = Tok->MatchingParen; + } else if (MatchingParenToEncounter == Tok->MatchingParen) { + MatchingParenToEncounter = nullptr; + } + } + + if (Change.NewlinesBefore != 0) { + ChangeWidthRight = std::max(ChangeWidthRight, CurrentChangeWidthRight); + const auto ChangeWidthStart = ChangeWidthLeft + ChangeWidthAnchor; + // If the position of the current token is columnwise before the begin + // of the alignment, we drop out here, because the next line does not + // have to be moved with the previous one(s) for the alignment. E.g.: + // int i1 = 1; | <- ColumnLimit | int i1 = 1; + // int j = 0; | Without the break -> | int j = 0; + // int k = bar( | We still want to align the = | int k = bar( + // argument1, | here, even if we can't move | argument1, + // argument2); | the following lines. | argument2); + if (static_cast(Change.Spaces) < ChangeWidthStart) + break; + CurrentChangeWidthRight = Change.Spaces - ChangeWidthStart; + } else { + CurrentChangeWidthRight += Change.Spaces; + } + // Changes are generally 1:1 with the tokens, but a change could also be // inside of a token, in which case it's counted more than once: once for // the whitespace surrounding the token (!IsInsideToken) and once for // each whitespace change within it (IsInsideToken). // Therefore, changes inside of a token should only count the space. - if (!Changes[j].IsInsideToken) - ChangeWidthRight += Changes[j].TokenLength; + if (!Change.IsInsideToken) + CurrentChangeWidthRight += Change.TokenLength; } + ChangeWidthRight = std::max(ChangeWidthRight, CurrentChangeWidthRight); + // If we are restricted by the maximum column width, end the sequence. unsigned NewLeft = std::max(ChangeWidthLeft, WidthLeft); unsigned NewAnchor = std::max(ChangeWidthAnchor, WidthAnchor); @@ -598,7 +638,7 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, if (Style.ColumnLimit != 0 && Style.ColumnLimit < NewLeft + NewAnchor + NewRight) { AlignCurrentSequence(); - StartOfSequence = i; + StartOfSequence = I; WidthLeft = ChangeWidthLeft; WidthAnchor = ChangeWidthAnchor; WidthRight = ChangeWidthRight; @@ -607,12 +647,12 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, WidthAnchor = NewAnchor; WidthRight = NewRight; } - MatchedIndices.push_back(i); + MatchedIndices.push_back(I); } - EndOfSequence = i; + EndOfSequence = I; AlignCurrentSequence(); - return i; + return I; } // Aligns a sequence of matching tokens, on the MinColumn column. diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index a3ad978a9357d..ce68f91bef02a 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -20777,6 +20777,30 @@ TEST_F(FormatTest, AlignWithLineBreaks) { "}", Style); // clang-format on + + Style = getLLVMStyleWithColumns(70); + Style.AlignConsecutiveDeclarations.Enabled = true; + verifyFormat( + "ReturnType\n" + "MyFancyIntefaceFunction(Context *context,\n" + " ALongTypeName *response) noexcept override;\n" + "ReturnType func();", + Style); + + verifyFormat( + "ReturnType\n" + "MyFancyIntefaceFunction(B *context,\n" + " decltype(AFunc) *response) noexcept override;\n" + "ReturnType func();", + Style); + + Style.AlignConsecutiveAssignments.Enabled = true; + Style.ColumnLimit = 15; + verifyFormat("int i1 = 1;\n" + "k = bar(\n" + " argument1,\n" + " argument2);", + Style); } TEST_F(FormatTest, AlignWithInitializerPeriods) { From d4af5e6b0b4fef49c3277d4a13d279dfcc4e155a Mon Sep 17 00:00:00 2001 From: Tomohiro Kashiwada Date: Tue, 21 Oct 2025 05:14:19 +0900 Subject: [PATCH 14/38] [Unittest][Cygwin] Set $PATH when running unittests (#163947) As the Cygwin platform requires $PATH to be set in order to run unittests, do the same as for the regular Windows target. --- clang-tools-extra/clangd/unittests/lit.cfg.py | 2 +- clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py | 2 +- clang-tools-extra/test/Unit/lit.cfg.py | 2 +- clang/test/Unit/lit.cfg.py | 2 +- llvm/utils/lit/tests/Inputs/googletest-cmd-wrapper/lit.cfg | 2 +- polly/test/Unit/lit.cfg | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/lit.cfg.py b/clang-tools-extra/clangd/unittests/lit.cfg.py index 33aa9e61f4ce9..4c3f0f028acdd 100644 --- a/clang-tools-extra/clangd/unittests/lit.cfg.py +++ b/clang-tools-extra/clangd/unittests/lit.cfg.py @@ -19,7 +19,7 @@ if platform.system() == "Darwin": shlibpath_var = "DYLD_LIBRARY_PATH" -elif platform.system() == "Windows": +elif platform.system() == "Windows" or sys.platform == "cygwin": shlibpath_var = "PATH" else: shlibpath_var = "LD_LIBRARY_PATH" diff --git a/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py b/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py index 0963351abe3b1..010d28e036f83 100644 --- a/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py +++ b/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py @@ -11,7 +11,7 @@ if platform.system() == "Darwin": shlibpath_var = "DYLD_LIBRARY_PATH" -elif platform.system() == "Windows": +elif platform.system() == "Windows" or sys.platform == "cygwin": shlibpath_var = "PATH" else: shlibpath_var = "LD_LIBRARY_PATH" diff --git a/clang-tools-extra/test/Unit/lit.cfg.py b/clang-tools-extra/test/Unit/lit.cfg.py index b7376a02c89e1..0254829ed67e4 100644 --- a/clang-tools-extra/test/Unit/lit.cfg.py +++ b/clang-tools-extra/test/Unit/lit.cfg.py @@ -21,7 +21,7 @@ if platform.system() == "Darwin": shlibpath_var = "DYLD_LIBRARY_PATH" -elif platform.system() == "Windows": +elif platform.system() == "Windows" or sys.platform == "cygwin": shlibpath_var = "PATH" else: shlibpath_var = "LD_LIBRARY_PATH" diff --git a/clang/test/Unit/lit.cfg.py b/clang/test/Unit/lit.cfg.py index 37e91d0f8629f..ebe35a10e7f30 100644 --- a/clang/test/Unit/lit.cfg.py +++ b/clang/test/Unit/lit.cfg.py @@ -51,7 +51,7 @@ def find_shlibpath_var(): yield "LD_LIBRARY_PATH" elif platform.system() == "Darwin": yield "DYLD_LIBRARY_PATH" - elif platform.system() == "Windows": + elif platform.system() == "Windows" or sys.platform == "cygwin": yield "PATH" elif platform.system() == "AIX": yield "LIBPATH" diff --git a/llvm/utils/lit/tests/Inputs/googletest-cmd-wrapper/lit.cfg b/llvm/utils/lit/tests/Inputs/googletest-cmd-wrapper/lit.cfg index 9f93bac51456d..d3eb987922995 100644 --- a/llvm/utils/lit/tests/Inputs/googletest-cmd-wrapper/lit.cfg +++ b/llvm/utils/lit/tests/Inputs/googletest-cmd-wrapper/lit.cfg @@ -2,5 +2,5 @@ import lit.formats config.name = "googletest-cmd-wrapper" config.test_format = lit.formats.GoogleTest( - "DummySubDir", "Test" if "win32" in sys.platform else ".exe", [sys.executable] + "DummySubDir", "Test" if sys.platform in ["win32", "cygwin"] else ".exe", [sys.executable] ) diff --git a/polly/test/Unit/lit.cfg b/polly/test/Unit/lit.cfg index 6c450fbc54b5a..21d7bc4ab25c5 100644 --- a/polly/test/Unit/lit.cfg +++ b/polly/test/Unit/lit.cfg @@ -50,7 +50,7 @@ for var in [ if platform.system() == 'Darwin': shlibpath_var = 'DYLD_LIBRARY_PATH' -elif platform.system() == 'Windows': +elif platform.system() == 'Windows' or sys.platform == "cygwin": shlibpath_var = 'PATH' else: shlibpath_var = 'LD_LIBRARY_PATH' From f37b4459f050514542d755172855dc75ddda33f2 Mon Sep 17 00:00:00 2001 From: Abhinav Gaba Date: Mon, 20 Oct 2025 13:14:33 -0700 Subject: [PATCH 15/38] [NFC][OpenMP] Add small class-member use_device_ptr/addr unit tests. (#164039) Two of the tests are currently asserting, and two are emitting unexpected results. The asserting tests will be fixed using the ATTACH-style codegen from #153683. The other two involve `use_device_addr` on byrefs, and need more follow-up codegen changes, that have been noted in a FIXME comment. --- ...rget_data_use_device_addr_class_member.cpp | 34 +++++++++++++ ..._data_use_device_addr_class_member_ref.cpp | 34 +++++++++++++ ..._device_addr_class_member_ref_with_map.cpp | 49 +++++++++++++++++++ ..._use_device_addr_class_member_with_map.cpp | 43 ++++++++++++++++ ...arget_data_use_device_ptr_class_member.cpp | 34 +++++++++++++ ...t_data_use_device_ptr_class_member_ref.cpp | 34 +++++++++++++ ...e_device_ptr_class_member_ref_with_map.cpp | 36 ++++++++++++++ ...a_use_device_ptr_class_member_with_map.cpp | 36 ++++++++++++++ 8 files changed, 300 insertions(+) create mode 100644 offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member.cpp create mode 100644 offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref.cpp create mode 100644 offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref_with_map.cpp create mode 100644 offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_with_map.cpp create mode 100644 offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member.cpp create mode 100644 offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref.cpp create mode 100644 offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_with_map.cpp create mode 100644 offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_with_map.cpp diff --git a/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member.cpp b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member.cpp new file mode 100644 index 0000000000000..6fef34f665b66 --- /dev/null +++ b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include +#include + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f7() { +#pragma omp target data map(to : c) + { + void *mapped_ptr = omp_get_mapped_ptr(&c, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data use_device_addr(c) + { + printf("%d\n", &c == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f7(); +} diff --git a/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref.cpp b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref.cpp new file mode 100644 index 0000000000000..8ca02ddd0425c --- /dev/null +++ b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include +#include + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f8() { +#pragma omp target enter data map(to : d) + { + void *mapped_ptr = omp_get_mapped_ptr(&d, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data use_device_addr(d) + { + printf("%d\n", &d == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f8(); +} diff --git a/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref_with_map.cpp b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref_with_map.cpp new file mode 100644 index 0000000000000..5e8769eb3079d --- /dev/null +++ b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_ref_with_map.cpp @@ -0,0 +1,49 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include +#include + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f6() { + uintptr_t offset = (uintptr_t)&d - n; +#pragma omp target data map(to : m, d) + { + void *mapped_ptr = omp_get_mapped_ptr(&d, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data map(m, d) use_device_addr(d) + { + // FIXME: Clang is mapping class member references using: + // &this[0], &ref_ptee(this[0].d), 4, PTR_AND_OBJ + // but a load from `this[0]` cannot be used to compute the offset + // in the runtime, because for example in this case, it would mean + // that the base address of the pointee is a load from `n`, i.e. 111. + // clang should be emitting the following instead: + // &ref_ptr(this[0].d), &ref_ptee(this[0].d), 4, PTR_AND_OBJ + // And eventually, the following that's compatible with the + // ref/attach modifiers: + // &ref_ptee(this[0].[d])), &ref_ptee(this[0].d), TO | FROM + // &ref_ptr(this[0].d), &ref_ptee(this[0].d), 4, ATTACH + // EXPECTED: 1 0 + // CHECK: 0 1 + printf("%d %d\n", &d == mapped_ptr, + (uintptr_t)&d == (uintptr_t)mapped_ptr - offset); + } + } + } +}; + +int main() { + ST s; + s.f6(); +} diff --git a/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_with_map.cpp b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_with_map.cpp new file mode 100644 index 0000000000000..f5db4ecc66175 --- /dev/null +++ b/offload/test/mapping/use_device_addr/target_data_use_device_addr_class_member_with_map.cpp @@ -0,0 +1,43 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include +#include + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f5() { + uintptr_t offset = (uintptr_t)&c - (uintptr_t)this; +#pragma omp target data map(to : m, c) + { + void *mapped_ptr = omp_get_mapped_ptr(&c, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data map(m, c) use_device_addr(c) + { + // FIXME: RT is currently doing the translation for "&this[0]" instead + // of &this->c, for a map like: + // this, &this->c, ..., RETURN_PARAM + // We either need to fix RT, or emit a separate entry for such + // use_device_addr, even if there is a matching map entry already. + // EXPECTED: 1 0 + // CHECK: 0 1 + printf("%d %d\n", &c == mapped_ptr, + (uintptr_t)&c == (uintptr_t)mapped_ptr - offset); + } + } + } +}; + +int main() { + ST s; + s.f5(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member.cpp new file mode 100644 index 0000000000000..b0253cdbe20d9 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include +#include + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f3() { +#pragma omp target data map(to : a[0]) + { + void *mapped_ptr = omp_get_mapped_ptr(a, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data use_device_ptr(a) + { + printf("%d\n", a == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f3(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref.cpp new file mode 100644 index 0000000000000..4de34487c2b04 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include +#include + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f4() { +#pragma omp target data map(to : b[0]) + { + void *mapped_ptr = omp_get_mapped_ptr(b, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data use_device_ptr(b) + { + printf("%d\n", b == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f4(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_with_map.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_with_map.cpp new file mode 100644 index 0000000000000..27fda743b989e --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_with_map.cpp @@ -0,0 +1,36 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +// XFAIL: * + +#include +#include + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f2() { +#pragma omp target data map(to : b[0]) + { + void *mapped_ptr = omp_get_mapped_ptr(b, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data map(b[0], m) use_device_ptr(b) + { + printf("%d\n", b == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f2(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_with_map.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_with_map.cpp new file mode 100644 index 0000000000000..38a369659d13d --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_with_map.cpp @@ -0,0 +1,36 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +// XFAIL: * + +#include +#include + +int x = 0; +int *y = &x; +int z = 0; + +struct ST { + int n = 111; + int *a = &x; + int *&b = y; + int c = 0; + int &d = z; + int m = 0; + + void f1() { +#pragma omp target data map(to : a[0]) + { + void *mapped_ptr = omp_get_mapped_ptr(a, omp_get_default_device()); + printf("%d\n", mapped_ptr != NULL); // CHECK: 1 +#pragma omp target data map(a[0], m) use_device_ptr(a) + { + printf("%d\n", a == mapped_ptr); // CHECK: 1 + } + } + } +}; + +int main() { + ST s; + s.f1(); +} From 13498bc1d16005774a62b3dd04f1d2e5dd887674 Mon Sep 17 00:00:00 2001 From: Tomohiro Kashiwada Date: Tue, 21 Oct 2025 05:14:54 +0900 Subject: [PATCH 16/38] [clang-tools-extra][Unittest] Fix wrong reference to CMake configuration variable (#164147) `@SHLIBDIR@` is replaced by CMake's configuration function, so it must be in `lit.site.cfg.py.in` but not `lit.cfg.py`. `lit.cfg.py` must reference variables in generated `lit.site.cfg.py`. We didn't notice this problem because it only affects Windows (including MinGW and Cygwin) that are configured with either LLVM_LINK_LLVM_DYLIB=ON or BUILD_SHARED=ON. --- clang-tools-extra/clangd/unittests/lit.cfg.py | 2 +- clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/lit.cfg.py b/clang-tools-extra/clangd/unittests/lit.cfg.py index 4c3f0f028acdd..666e9879bb4ad 100644 --- a/clang-tools-extra/clangd/unittests/lit.cfg.py +++ b/clang-tools-extra/clangd/unittests/lit.cfg.py @@ -24,7 +24,7 @@ else: shlibpath_var = "LD_LIBRARY_PATH" config.environment[shlibpath_var] = os.path.pathsep.join( - ("@SHLIBDIR@", "@LLVM_LIBS_DIR@", config.environment.get(shlibpath_var, "")) + (config.shlibdir, config.llvm_libs_dir, config.environment.get(shlibpath_var, "")) ) # It is not realistically possible to account for all options that could diff --git a/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py b/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py index 010d28e036f83..c4454df06b386 100644 --- a/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py +++ b/clang-tools-extra/include-cleaner/test/Unit/lit.cfg.py @@ -16,7 +16,7 @@ else: shlibpath_var = "LD_LIBRARY_PATH" config.environment[shlibpath_var] = os.path.pathsep.join( - ("@SHLIBDIR@", "@LLVM_LIBS_DIR@", config.environment.get(shlibpath_var, "")) + (config.shlibdir, config.llvm_libs_dir, config.environment.get(shlibpath_var, "")) ) # It is not realistically possible to account for all options that could From be9c083cf7ec7dc9b6e93f29981d7b7e71273241 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Mon, 20 Oct 2025 13:16:09 -0700 Subject: [PATCH 17/38] [CAS] Add OnDiskGraphDB and OnDiskKeyValueDB (#114102) Add OnDiskGraphDB and OnDiskKeyValueDB that can be used to implement ObjectStore and ActionCache respectively. Those are on-disk persistent storage that build upon OnDiskTrieHashMap and implements key functions that are required by LLVMCAS interfaces. This abstraction layer defines how the objects are hashed and stored on disk. OnDiskKeyValueDB is a basic OnDiskTrieHashMap while OnDiskGraphDB also defines: * How objects of various size are store on disk and are referenced by the trie nodes. * How to store the references from one stored object to another object that is referenced. In addition to basic APIs for ObjectStore and ActionCache, other advances database configuration features can be implemented in this layer without exposing to the users of the LLVMCAS interface. For example, OnDiskGraphDB has a faulty in function to fetch data from an upstream OnDiskGraphDB if the data is missing. --- llvm/include/llvm/CAS/OnDiskDataAllocator.h | 2 +- llvm/include/llvm/CAS/OnDiskGraphDB.h | 469 +++++ llvm/include/llvm/CAS/OnDiskKeyValueDB.h | 82 + llvm/lib/CAS/CMakeLists.txt | 2 + llvm/lib/CAS/OnDiskCommon.cpp | 56 +- llvm/lib/CAS/OnDiskCommon.h | 22 + llvm/lib/CAS/OnDiskDataAllocator.cpp | 6 +- llvm/lib/CAS/OnDiskGraphDB.cpp | 1755 +++++++++++++++++++ llvm/lib/CAS/OnDiskKeyValueDB.cpp | 113 ++ llvm/unittests/CAS/CASTestConfig.cpp | 14 + llvm/unittests/CAS/CASTestConfig.h | 11 + llvm/unittests/CAS/CMakeLists.txt | 21 +- llvm/unittests/CAS/OnDiskCommonUtils.h | 76 + llvm/unittests/CAS/OnDiskGraphDBTest.cpp | 310 ++++ llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp | 77 + 15 files changed, 3009 insertions(+), 7 deletions(-) create mode 100644 llvm/include/llvm/CAS/OnDiskGraphDB.h create mode 100644 llvm/include/llvm/CAS/OnDiskKeyValueDB.h create mode 100644 llvm/lib/CAS/OnDiskGraphDB.cpp create mode 100644 llvm/lib/CAS/OnDiskKeyValueDB.cpp create mode 100644 llvm/unittests/CAS/OnDiskCommonUtils.h create mode 100644 llvm/unittests/CAS/OnDiskGraphDBTest.cpp create mode 100644 llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h index 2809df800621b..b7099dce2069d 100644 --- a/llvm/include/llvm/CAS/OnDiskDataAllocator.h +++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h @@ -64,7 +64,7 @@ class OnDiskDataAllocator { /// \returns the buffer that was allocated at \p create time, with size /// \p UserHeaderSize. - MutableArrayRef getUserHeader(); + MutableArrayRef getUserHeader() const; size_t size() const; size_t capacity() const; diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h new file mode 100644 index 0000000000000..83017a6a54fee --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -0,0 +1,469 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This declares OnDiskGraphDB, an ondisk CAS database with a fixed length +/// hash. This is the class that implements the database storage scheme without +/// exposing the hashing algorithm. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKGRAPHDB_H +#define LLVM_CAS_ONDISKGRAPHDB_H + +#include "llvm/ADT/PointerUnion.h" +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "llvm/CAS/OnDiskTrieRawHashMap.h" + +namespace llvm::cas::ondisk { + +/// Standard 8 byte reference inside OnDiskGraphDB. +class InternalRef { +public: + FileOffset getFileOffset() const { return FileOffset(Data); } + uint64_t getRawData() const { return Data; } + + static InternalRef getFromRawData(uint64_t Data) { return InternalRef(Data); } + static InternalRef getFromOffset(FileOffset Offset) { + return InternalRef(Offset.get()); + } + + friend bool operator==(InternalRef LHS, InternalRef RHS) { + return LHS.Data == RHS.Data; + } + +private: + InternalRef(FileOffset Offset) : Data((uint64_t)Offset.get()) {} + InternalRef(uint64_t Data) : Data(Data) {} + uint64_t Data; +}; + +/// Compact 4 byte reference inside OnDiskGraphDB for smaller references. +class InternalRef4B { +public: + FileOffset getFileOffset() const { return FileOffset(Data); } + uint32_t getRawData() const { return Data; } + + /// Shrink to 4B reference. + static std::optional tryToShrink(InternalRef Ref) { + uint64_t Offset = Ref.getRawData(); + if (Offset > UINT32_MAX) + return std::nullopt; + return InternalRef4B(Offset); + } + + operator InternalRef() const { + return InternalRef::getFromOffset(getFileOffset()); + } + +private: + friend class InternalRef; + InternalRef4B(uint32_t Data) : Data(Data) {} + uint32_t Data; +}; + +/// Array of internal node references. +class InternalRefArrayRef { +public: + size_t size() const { return Size; } + bool empty() const { return !Size; } + + class iterator + : public iterator_facade_base { + public: + bool operator==(const iterator &RHS) const { return I == RHS.I; } + InternalRef operator*() const { + if (auto *Ref = dyn_cast(I)) + return *Ref; + return InternalRef(*cast(I)); + } + bool operator<(const iterator &RHS) const { + assert(isa(I) == isa(RHS.I)); + if (auto *Ref = dyn_cast(I)) + return Ref < cast(RHS.I); + return cast(I) - + cast(RHS.I); + } + ptrdiff_t operator-(const iterator &RHS) const { + assert(isa(I) == isa(RHS.I)); + if (auto *Ref = dyn_cast(I)) + return Ref - cast(RHS.I); + return cast(I) - + cast(RHS.I); + } + iterator &operator+=(ptrdiff_t N) { + if (auto *Ref = dyn_cast(I)) + I = Ref + N; + else + I = cast(I) + N; + return *this; + } + iterator &operator-=(ptrdiff_t N) { + if (auto *Ref = dyn_cast(I)) + I = Ref - N; + else + I = cast(I) - N; + return *this; + } + InternalRef operator[](ptrdiff_t N) const { return *(this->operator+(N)); } + + iterator() = default; + + uint64_t getOpaqueData() const { return uintptr_t(I.getOpaqueValue()); } + + static iterator fromOpaqueData(uint64_t Opaque) { + return iterator( + PointerUnion::getFromOpaqueValue((void *) + Opaque)); + } + + private: + friend class InternalRefArrayRef; + explicit iterator( + PointerUnion I) + : I(I) {} + PointerUnion I; + }; + + bool operator==(const InternalRefArrayRef &RHS) const { + return size() == RHS.size() && std::equal(begin(), end(), RHS.begin()); + } + + iterator begin() const { return iterator(Begin); } + iterator end() const { return begin() + Size; } + + /// Array accessor. + InternalRef operator[](ptrdiff_t N) const { return begin()[N]; } + + bool is4B() const { return isa(Begin); } + bool is8B() const { return isa(Begin); } + + ArrayRef getBuffer() const { + if (is4B()) { + auto *B = cast(Begin); + return ArrayRef((const uint8_t *)B, sizeof(InternalRef4B) * Size); + } + auto *B = cast(Begin); + return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size); + } + + InternalRefArrayRef(std::nullopt_t = std::nullopt) { + // This is useful so that all the casts in the \p iterator functions can + // operate without needing to check for a null value. + static InternalRef PlaceHolder = InternalRef::getFromRawData(0); + Begin = &PlaceHolder; + } + + InternalRefArrayRef(ArrayRef Refs) + : Begin(Refs.begin()), Size(Refs.size()) {} + + InternalRefArrayRef(ArrayRef Refs) + : Begin(Refs.begin()), Size(Refs.size()) {} + +private: + PointerUnion Begin; + size_t Size = 0; +}; + +/// Reference to a node. The node's data may not be stored in the database. +/// An \p ObjectID instance can only be used with the \p OnDiskGraphDB instance +/// it came from. \p ObjectIDs from different \p OnDiskGraphDB instances are not +/// comparable. +class ObjectID { +public: + uint64_t getOpaqueData() const { return Opaque; } + + static ObjectID fromOpaqueData(uint64_t Opaque) { return ObjectID(Opaque); } + + friend bool operator==(const ObjectID &LHS, const ObjectID &RHS) { + return LHS.Opaque == RHS.Opaque; + } + friend bool operator!=(const ObjectID &LHS, const ObjectID &RHS) { + return !(LHS == RHS); + } + +private: + explicit ObjectID(uint64_t Opaque) : Opaque(Opaque) {} + uint64_t Opaque; +}; + +/// Handle for a loaded node object. +class ObjectHandle { +public: + explicit ObjectHandle(uint64_t Opaque) : Opaque(Opaque) {} + uint64_t getOpaqueData() const { return Opaque; } + + static ObjectHandle fromFileOffset(FileOffset Offset); + static ObjectHandle fromMemory(uintptr_t Ptr); + + friend bool operator==(const ObjectHandle &LHS, const ObjectHandle &RHS) { + return LHS.Opaque == RHS.Opaque; + } + friend bool operator!=(const ObjectHandle &LHS, const ObjectHandle &RHS) { + return !(LHS == RHS); + } + +private: + uint64_t Opaque; +}; + +/// Iterator for ObjectID. +class object_refs_iterator + : public iterator_facade_base { +public: + bool operator==(const object_refs_iterator &RHS) const { return I == RHS.I; } + ObjectID operator*() const { + return ObjectID::fromOpaqueData((*I).getRawData()); + } + bool operator<(const object_refs_iterator &RHS) const { return I < RHS.I; } + ptrdiff_t operator-(const object_refs_iterator &RHS) const { + return I - RHS.I; + } + object_refs_iterator &operator+=(ptrdiff_t N) { + I += N; + return *this; + } + object_refs_iterator &operator-=(ptrdiff_t N) { + I -= N; + return *this; + } + ObjectID operator[](ptrdiff_t N) const { return *(this->operator+(N)); } + + object_refs_iterator() = default; + object_refs_iterator(InternalRefArrayRef::iterator I) : I(I) {} + + uint64_t getOpaqueData() const { return I.getOpaqueData(); } + + static object_refs_iterator fromOpaqueData(uint64_t Opaque) { + return InternalRefArrayRef::iterator::fromOpaqueData(Opaque); + } + +private: + InternalRefArrayRef::iterator I; +}; + +using object_refs_range = llvm::iterator_range; + +/// On-disk CAS nodes database, independent of a particular hashing algorithm. +class OnDiskGraphDB { +public: + /// Associate data & references with a particular object ID. If there is + /// already a record for this object the operation is a no-op. \param ID the + /// object ID to associate the data & references with. \param Refs references + /// \param Data data buffer. + Error store(ObjectID ID, ArrayRef Refs, ArrayRef Data); + + /// \returns \p nullopt if the object associated with \p Ref does not exist. + Expected> load(ObjectID Ref); + + /// \returns the hash bytes digest for the object reference. + ArrayRef getDigest(ObjectID Ref) const { + // ObjectID should be valid to fetch Digest. + return cantFail(getDigest(getInternalRef(Ref))); + } + + /// Form a reference for the provided hash. The reference can be used as part + /// of a CAS object even if it's not associated with an object yet. + Expected getReference(ArrayRef Hash); + + /// Get an existing reference to the object \p Digest. + /// + /// Returns \p nullopt if the object is not stored in this CAS. + std::optional getExistingReference(ArrayRef Digest); + + /// Check whether the object associated with \p Ref is stored in the CAS. + /// Note that this function will fault-in according to the policy. + Expected isMaterialized(ObjectID Ref); + + /// Check whether the object associated with \p Ref is stored in the CAS. + /// Note that this function does not fault-in. + bool containsObject(ObjectID Ref) const { + return containsObject(Ref, /*CheckUpstream=*/true); + } + + /// \returns the data part of the provided object handle. + ArrayRef getObjectData(ObjectHandle Node) const; + + /// \returns the object referenced by the provided object handle. + object_refs_range getObjectRefs(ObjectHandle Node) const { + InternalRefArrayRef Refs = getInternalRefs(Node); + return make_range(Refs.begin(), Refs.end()); + } + + /// \returns Total size of stored objects. + /// + /// NOTE: There's a possibility that the returned size is not including a + /// large object if the process crashed right at the point of inserting it. + size_t getStorageSize() const; + + /// \returns The precentage of space utilization of hard space limits. + /// + /// Return value is an integer between 0 and 100 for percentage. + unsigned getHardStorageLimitUtilization() const; + + void print(raw_ostream &OS) const; + + /// Hashing function type for validation. + using HashingFuncT = function_ref>, ArrayRef, SmallVectorImpl &)>; + + /// Validate the OnDiskGraphDB. + /// + /// \param Deep if true, rehash all the objects to ensure no data + /// corruption in stored objects, otherwise just validate the structure of + /// CAS database. + /// \param Hasher is the hashing function used for objects inside CAS. + Error validate(bool Deep, HashingFuncT Hasher) const; + + /// How to fault-in nodes if an upstream database is used. + enum class FaultInPolicy { + /// Copy only the requested node. + SingleNode, + /// Copy the the entire graph of a node. + FullTree, + }; + + /// Open the on-disk store from a directory. + /// + /// \param Path directory for the on-disk store. The directory will be created + /// if it doesn't exist. + /// \param HashName Identifier name for the hashing algorithm that is going to + /// be used. + /// \param HashByteSize Size for the object digest hash bytes. + /// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes + /// if they don't exist in the primary store. The upstream store is only used + /// for reading nodes, new nodes are only written to the primary store. + /// \param Policy If \p UpstreamDB is provided, controls how nodes are copied + /// to primary store. This is recorded at creation time and subsequent opens + /// need to pass the same policy otherwise the \p open will fail. + static Expected> + open(StringRef Path, StringRef HashName, unsigned HashByteSize, + std::unique_ptr UpstreamDB = nullptr, + FaultInPolicy Policy = FaultInPolicy::FullTree); + + ~OnDiskGraphDB(); + +private: + /// Forward declaration for a proxy for an ondisk index record. + struct IndexProxy; + + enum class ObjectPresence { + Missing, + InPrimaryDB, + OnlyInUpstreamDB, + }; + + /// Check if object exists and if it is on upstream only. + Expected getObjectPresence(ObjectID Ref, + bool CheckUpstream) const; + + /// \returns true if object can be found in database. + bool containsObject(ObjectID Ref, bool CheckUpstream) const { + auto Presence = getObjectPresence(Ref, CheckUpstream); + if (!Presence) { + consumeError(Presence.takeError()); + return false; + } + switch (*Presence) { + case ObjectPresence::Missing: + return false; + case ObjectPresence::InPrimaryDB: + return true; + case ObjectPresence::OnlyInUpstreamDB: + return true; + } + } + + /// When \p load is called for a node that doesn't exist, this function tries + /// to load it from the upstream store and copy it to the primary one. + Expected> faultInFromUpstream(ObjectID PrimaryID); + + /// Import the entire tree from upstream with \p UpstreamNode as root. + Error importFullTree(ObjectID PrimaryID, ObjectHandle UpstreamNode); + /// Import only the \param UpstreamNode. + Error importSingleNode(ObjectID PrimaryID, ObjectHandle UpstreamNode); + + /// Found the IndexProxy for the hash. + Expected indexHash(ArrayRef Hash); + + /// Get path for creating standalone data file. + void getStandalonePath(StringRef FileSuffix, const IndexProxy &I, + SmallVectorImpl &Path) const; + /// Create a standalone leaf file. + Error createStandaloneLeaf(IndexProxy &I, ArrayRef Data); + + /// \name Helper functions for internal data structures. + /// \{ + static InternalRef getInternalRef(ObjectID Ref) { + return InternalRef::getFromRawData(Ref.getOpaqueData()); + } + + static ObjectID getExternalReference(InternalRef Ref) { + return ObjectID::fromOpaqueData(Ref.getRawData()); + } + + static ObjectID getExternalReference(const IndexProxy &I); + + static InternalRef makeInternalRef(FileOffset IndexOffset); + + Expected> getDigest(InternalRef Ref) const; + + ArrayRef getDigest(const IndexProxy &I) const; + + Expected getIndexProxyFromRef(InternalRef Ref) const; + + IndexProxy + getIndexProxyFromPointer(OnDiskTrieRawHashMap::ConstOnDiskPtr P) const; + + InternalRefArrayRef getInternalRefs(ObjectHandle Node) const; + /// \} + + /// Get the atomic variable that keeps track of the standalone data storage + /// size. + std::atomic &standaloneStorageSize() const; + + /// Increase the standalone data size. + void recordStandaloneSizeIncrease(size_t SizeIncrease); + /// Get the standalone data size. + uint64_t getStandaloneStorageSize() const; + + // Private constructor. + OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, + OnDiskDataAllocator DataPool, + std::unique_ptr UpstreamDB, + FaultInPolicy Policy); + + /// Mapping from hash to object reference. + /// + /// Data type is TrieRecord. + OnDiskTrieRawHashMap Index; + + /// Storage for most objects. + /// + /// Data type is DataRecordHandle. + OnDiskDataAllocator DataPool; + + /// A StandaloneDataMap. + void *StandaloneData = nullptr; + + /// Path to the root directory. + std::string RootPath; + + /// Optional on-disk store to be used for faulting-in nodes. + std::unique_ptr UpstreamDB; + + /// The policy used to fault in data from upstream. + FaultInPolicy FIPolicy; +}; + +} // namespace llvm::cas::ondisk + +#endif // LLVM_CAS_ONDISKGRAPHDB_H diff --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h new file mode 100644 index 0000000000000..b762518366c21 --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This declares OnDiskKeyValueDB, a key value storage database of fixed size +/// key and value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKKEYVALUEDB_H +#define LLVM_CAS_ONDISKKEYVALUEDB_H + +#include "llvm/CAS/OnDiskTrieRawHashMap.h" + +namespace llvm::cas::ondisk { + +/// An on-disk key-value data store with the following properties: +/// * Keys are fixed length binary hashes with expected normal distribution. +/// * Values are buffers of the same size, specified at creation time. +/// * The value of a key cannot be changed once it is set. +/// * The value buffers returned from a key lookup have 8-byte alignment. +class OnDiskKeyValueDB { +public: + /// Associate a value with a key. + /// + /// \param Key the hash bytes for the key + /// \param Value the value bytes, same size as \p ValueSize parameter of + /// \p open call. + /// + /// \returns the value associated with the \p Key. It may be different than + /// \p Value if another value is already associated with this key. + Expected> put(ArrayRef Key, ArrayRef Value); + + /// \returns the value associated with the \p Key, or \p std::nullopt if the + /// key does not exist. + Expected>> get(ArrayRef Key); + + /// \returns Total size of stored data. + size_t getStorageSize() const { return Cache.size(); } + + /// \returns The precentage of space utilization of hard space limits. + /// + /// Return value is an integer between 0 and 100 for percentage. + unsigned getHardStorageLimitUtilization() const { + return Cache.size() * 100ULL / Cache.capacity(); + } + + /// Open the on-disk store from a directory. + /// + /// \param Path directory for the on-disk store. The directory will be created + /// if it doesn't exist. + /// \param HashName Identifier name for the hashing algorithm that is going to + /// be used. + /// \param KeySize Size for the key hash bytes. + /// \param ValueName Identifier name for the values. + /// \param ValueSize Size for the value bytes. + static Expected> + open(StringRef Path, StringRef HashName, unsigned KeySize, + StringRef ValueName, size_t ValueSize); + + using CheckValueT = + function_ref Data)>; + /// Validate the storage with a callback \p CheckValue to check the stored + /// value. + Error validate(CheckValueT CheckValue) const; + +private: + OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache) + : ValueSize(ValueSize), Cache(std::move(Cache)) {} + + const size_t ValueSize; + OnDiskTrieRawHashMap Cache; +}; + +} // namespace llvm::cas::ondisk + +#endif // LLVM_CAS_ONDISKKEYVALUEDB_H diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index bca39b645af45..a2f8c49e50145 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -8,6 +8,8 @@ add_llvm_component_library(LLVMCAS ObjectStore.cpp OnDiskCommon.cpp OnDiskDataAllocator.cpp + OnDiskGraphDB.cpp + OnDiskKeyValueDB.cpp OnDiskTrieRawHashMap.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/CAS/OnDiskCommon.cpp b/llvm/lib/CAS/OnDiskCommon.cpp index 25aa06bfe64da..281bde981457b 100644 --- a/llvm/lib/CAS/OnDiskCommon.cpp +++ b/llvm/lib/CAS/OnDiskCommon.cpp @@ -7,9 +7,10 @@ //===----------------------------------------------------------------------===// #include "OnDiskCommon.h" -#include "llvm/Config/config.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Process.h" +#include #include #if __has_include() @@ -25,8 +26,44 @@ #include #endif +#if __has_include() +#include // statfs +#endif + using namespace llvm; +static uint64_t OnDiskCASMaxMappingSize = 0; + +Expected> cas::ondisk::getOverriddenMaxMappingSize() { + static std::once_flag Flag; + Error Err = Error::success(); + std::call_once(Flag, [&Err] { + ErrorAsOutParameter EAO(&Err); + constexpr const char *EnvVar = "LLVM_CAS_MAX_MAPPING_SIZE"; + auto Value = sys::Process::GetEnv(EnvVar); + if (!Value) + return; + + uint64_t Size; + if (StringRef(*Value).getAsInteger(/*auto*/ 0, Size)) + Err = createStringError(inconvertibleErrorCode(), + "invalid value for %s: expected integer", EnvVar); + OnDiskCASMaxMappingSize = Size; + }); + + if (Err) + return std::move(Err); + + if (OnDiskCASMaxMappingSize == 0) + return std::nullopt; + + return OnDiskCASMaxMappingSize; +} + +void cas::ondisk::setMaxMappingSize(uint64_t Size) { + OnDiskCASMaxMappingSize = Size; +} + std::error_code cas::ondisk::lockFileThreadSafe(int FD, sys::fs::LockKind Kind) { #if HAVE_FLOCK @@ -125,3 +162,20 @@ Expected cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize, return NewSize; // Pretend it worked. #endif } + +bool cas::ondisk::useSmallMappingSize(const Twine &P) { + // Add exceptions to use small database file here. +#if defined(__APPLE__) && __has_include() + // macOS tmpfs does not support sparse tails. + SmallString<128> PathStorage; + StringRef Path = P.toNullTerminatedStringRef(PathStorage); + struct statfs StatFS; + if (statfs(Path.data(), &StatFS) != 0) + return false; + + if (strcmp(StatFS.f_fstypename, "tmpfs") == 0) + return true; +#endif + // Default to use regular database file. + return false; +} diff --git a/llvm/lib/CAS/OnDiskCommon.h b/llvm/lib/CAS/OnDiskCommon.h index 8b79ffe5c3158..ac00662a2e91e 100644 --- a/llvm/lib/CAS/OnDiskCommon.h +++ b/llvm/lib/CAS/OnDiskCommon.h @@ -12,9 +12,31 @@ #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include +#include namespace llvm::cas::ondisk { +/// The version for all the ondisk database files. It needs to be bumped when +/// compatibility breaking changes are introduced. +constexpr StringLiteral CASFormatVersion = "v1"; + +/// Retrieves an overridden maximum mapping size for CAS files, if any, +/// speicified by LLVM_CAS_MAX_MAPPING_SIZE in the environment or set by +/// `setMaxMappingSize()`. If the value from environment is unreadable, returns +/// an error. +Expected> getOverriddenMaxMappingSize(); + +/// Set MaxMappingSize for ondisk CAS. This function is not thread-safe and +/// should be set before creaing any ondisk CAS and does not affect CAS already +/// created. Set value 0 to use default size. +void setMaxMappingSize(uint64_t Size); + +/// Whether to use a small file mapping for ondisk databases created in \p Path. +/// +/// For some file system that doesn't support sparse file, use a smaller file +/// mapping to avoid consuming too much disk space on creation. +bool useSmallMappingSize(const Twine &Path); + /// Thread-safe alternative to \c sys::fs::lockFile. This does not support all /// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library /// for now. diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp index 13bbd66139178..9c68bc48d533a 100644 --- a/llvm/lib/CAS/OnDiskDataAllocator.cpp +++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp @@ -185,7 +185,7 @@ Expected> OnDiskDataAllocator::get(FileOffset Offset, return ArrayRef{Impl->File.getRegion().data() + Offset.get(), Size}; } -MutableArrayRef OnDiskDataAllocator::getUserHeader() { +MutableArrayRef OnDiskDataAllocator::getUserHeader() const { return Impl->Store.getUserHeader(); } @@ -221,7 +221,9 @@ Expected> OnDiskDataAllocator::get(FileOffset Offset, "OnDiskDataAllocator is not supported"); } -MutableArrayRef OnDiskDataAllocator::getUserHeader() { return {}; } +MutableArrayRef OnDiskDataAllocator::getUserHeader() const { + return {}; +} size_t OnDiskDataAllocator::size() const { return 0; } size_t OnDiskDataAllocator::capacity() const { return 0; } diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp new file mode 100644 index 0000000000000..72bb98c4bf65d --- /dev/null +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -0,0 +1,1755 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements OnDiskGraphDB, an on-disk CAS nodes database, +/// independent of a particular hashing algorithm. It only needs to be +/// configured for the hash size and controls the schema of the storage. +/// +/// OnDiskGraphDB defines: +/// +/// - How the data is stored inside database, either as a standalone file, or +/// allocated inside a datapool. +/// - How references to other objects inside the same database is stored. They +/// are stored as internal references, instead of full hash value to save +/// space. +/// - How to chain databases together and import objects from upstream +/// databases. +/// +/// Here's a top-level description of the current layout: +/// +/// - db/index.: a file for the "index" table, named by \a +/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B +/// that are accessed atomically, describing the object kind and where/how +/// it's stored (including an optional file offset). See \a TrieRecord for +/// more details. +/// - db/data.: a file for the "data" table, named by \a +/// DataPoolTableName and managed by \a DataStore. New objects within +/// TrieRecord::MaxEmbeddedSize are inserted here as \a +/// TrieRecord::StorageKind::DataPool. +/// - db/obj..: a file storing an object outside the main +/// "data" table, named by its offset into the "index" table, with the +/// format of \a TrieRecord::StorageKind::Standalone. +/// - db/leaf..: a file storing a leaf node outside the +/// main "data" table, named by its offset into the "index" table, with +/// the format of \a TrieRecord::StorageKind::StandaloneLeaf. +/// - db/leaf+0..: a file storing a null-terminated leaf object +/// outside the main "data" table, named by its offset into the "index" table, +/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskGraphDB.h" +#include "OnDiskCommon.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "llvm/CAS/OnDiskTrieRawHashMap.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include +#include +#include + +#define DEBUG_TYPE "on-disk-cas" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +static constexpr StringLiteral IndexTableName = "llvm.cas.index"; +static constexpr StringLiteral DataPoolTableName = "llvm.cas.data"; + +static constexpr StringLiteral IndexFilePrefix = "index."; +static constexpr StringLiteral DataPoolFilePrefix = "data."; + +static constexpr StringLiteral FilePrefixObject = "obj."; +static constexpr StringLiteral FilePrefixLeaf = "leaf."; +static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0."; + +static Error createCorruptObjectError(Expected> ID) { + if (!ID) + return ID.takeError(); + + return createStringError(llvm::errc::invalid_argument, + "corrupt object '" + toHex(*ID) + "'"); +} + +namespace { + +/// Trie record data: 8 bytes, atomic +/// - 1-byte: StorageKind +/// - 7-bytes: DataStoreOffset (offset into referenced file) +class TrieRecord { +public: + enum class StorageKind : uint8_t { + /// Unknown object. + Unknown = 0, + + /// data.vX: main pool, full DataStore record. + DataPool = 1, + + /// obj..vX: standalone, with a full DataStore record. + Standalone = 10, + + /// leaf..vX: standalone, just the data. File contents + /// exactly the data content and file size matches the data size. No refs. + StandaloneLeaf = 11, + + /// leaf+0..vX: standalone, just the data plus an + /// extra null character ('\0'). File size is 1 bigger than the data size. + /// No refs. + StandaloneLeaf0 = 12, + }; + + static StringRef getStandaloneFilePrefix(StorageKind SK) { + switch (SK) { + default: + llvm_unreachable("Expected standalone storage kind"); + case TrieRecord::StorageKind::Standalone: + return FilePrefixObject; + case TrieRecord::StorageKind::StandaloneLeaf: + return FilePrefixLeaf; + case TrieRecord::StorageKind::StandaloneLeaf0: + return FilePrefixLeaf0; + } + } + + enum Limits : int64_t { + /// Saves files bigger than 64KB standalone instead of embedding them. + MaxEmbeddedSize = 64LL * 1024LL - 1, + }; + + struct Data { + StorageKind SK = StorageKind::Unknown; + FileOffset Offset; + }; + + /// Pack StorageKind and Offset from Data into 8 byte TrieRecord. + static uint64_t pack(Data D) { + assert(D.Offset.get() < (int64_t)(1ULL << 56)); + uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get(); + assert(D.SK != StorageKind::Unknown || Packed == 0); +#ifndef NDEBUG + Data RoundTrip = unpack(Packed); + assert(D.SK == RoundTrip.SK); + assert(D.Offset.get() == RoundTrip.Offset.get()); +#endif + return Packed; + } + + // Unpack TrieRecord into Data. + static Data unpack(uint64_t Packed) { + Data D; + if (!Packed) + return D; + D.SK = (StorageKind)(Packed >> 56); + D.Offset = FileOffset(Packed & (UINT64_MAX >> 8)); + return D; + } + + TrieRecord() : Storage(0) {} + + Data load() const { return unpack(Storage); } + bool compare_exchange_strong(Data &Existing, Data New); + +private: + std::atomic Storage; +}; + +/// DataStore record data: 4B + size? + refs? + data + 0 +/// - 4-bytes: Header +/// - {0,4,8}-bytes: DataSize (may be packed in Header) +/// - {0,4,8}-bytes: NumRefs (may be packed in Header) +/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned) +/// - +/// - 1-byte: 0-term +struct DataRecordHandle { + /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment + /// convenience to avoid computing padding later. + enum class NumRefsFlags : uint8_t { + Uses0B = 0U, + Uses1B = 1U, + Uses2B = 2U, + Uses4B = 3U, + Uses8B = 4U, + Max = Uses8B, + }; + + /// DataSize storage: 8B, 4B, 2B, or 1B. + enum class DataSizeFlags { + Uses1B = 0U, + Uses2B = 1U, + Uses4B = 2U, + Uses8B = 3U, + Max = Uses8B, + }; + + /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B. + enum class RefKindFlags { + InternalRef = 0U, + InternalRef4B = 1U, + Max = InternalRef4B, + }; + + enum Counts : int { + NumRefsShift = 0, + NumRefsBits = 3, + DataSizeShift = NumRefsShift + NumRefsBits, + DataSizeBits = 2, + RefKindShift = DataSizeShift + DataSizeBits, + RefKindBits = 1, + }; + static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) == + 0, + "Not enough bits"); + static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) == + 0, + "Not enough bits"); + static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) == + 0, + "Not enough bits"); + + /// Layout of the DataRecordHandle and how to decode it. + struct LayoutFlags { + NumRefsFlags NumRefs; + DataSizeFlags DataSize; + RefKindFlags RefKind; + + static uint64_t pack(LayoutFlags LF) { + unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) | + ((unsigned)LF.DataSize << DataSizeShift) | + ((unsigned)LF.RefKind << RefKindShift); +#ifndef NDEBUG + LayoutFlags RoundTrip = unpack(Packed); + assert(LF.NumRefs == RoundTrip.NumRefs); + assert(LF.DataSize == RoundTrip.DataSize); + assert(LF.RefKind == RoundTrip.RefKind); +#endif + return Packed; + } + static LayoutFlags unpack(uint64_t Storage) { + assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte"); + LayoutFlags LF; + LF.NumRefs = + (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1)); + LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) & + ((1U << DataSizeBits) - 1)); + LF.RefKind = + (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1)); + return LF; + } + }; + + /// Header layout: + /// - 1-byte: LayoutFlags + /// - 1-byte: 1B size field + /// - {0,2}-bytes: 2B size field + struct Header { + using PackTy = uint32_t; + PackTy Packed; + + static constexpr unsigned LayoutFlagsShift = + (sizeof(PackTy) - 1) * CHAR_BIT; + }; + + struct Input { + InternalRefArrayRef Refs; + ArrayRef Data; + }; + + LayoutFlags getLayoutFlags() const { + return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift); + } + + uint64_t getDataSize() const; + void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const; + uint32_t getNumRefs() const; + void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const; + int64_t getRefsRelOffset() const; + int64_t getDataRelOffset() const; + + static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) { + return DataRelOffset + DataSize + 1; + } + uint64_t getTotalSize() const { + return getDataRelOffset() + getDataSize() + 1; + } + + /// Describe the layout of data stored and how to decode from + /// DataRecordHandle. + struct Layout { + explicit Layout(const Input &I); + + LayoutFlags Flags; + uint64_t DataSize = 0; + uint32_t NumRefs = 0; + int64_t RefsRelOffset = 0; + int64_t DataRelOffset = 0; + uint64_t getTotalSize() const { + return DataRecordHandle::getTotalSize(DataRelOffset, DataSize); + } + }; + + InternalRefArrayRef getRefs() const { + assert(H && "Expected valid handle"); + auto *BeginByte = reinterpret_cast(H) + getRefsRelOffset(); + size_t Size = getNumRefs(); + if (!Size) + return InternalRefArrayRef(); + if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B) + return ArrayRef(reinterpret_cast(BeginByte), Size); + return ArrayRef(reinterpret_cast(BeginByte), Size); + } + + ArrayRef getData() const { + assert(H && "Expected valid handle"); + return ArrayRef(reinterpret_cast(H) + getDataRelOffset(), + getDataSize()); + } + + static DataRecordHandle create(function_ref Alloc, + const Input &I); + static Expected + createWithError(function_ref(size_t Size)> Alloc, + const Input &I); + static DataRecordHandle construct(char *Mem, const Input &I); + + static DataRecordHandle get(const char *Mem) { + return DataRecordHandle( + *reinterpret_cast(Mem)); + } + static Expected + getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset); + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + + DataRecordHandle() = default; + explicit DataRecordHandle(const Header &H) : H(&H) {} + +private: + static DataRecordHandle constructImpl(char *Mem, const Input &I, + const Layout &L); + const Header *H = nullptr; +}; + +/// Proxy for any on-disk object or raw data. +struct OnDiskContent { + std::optional Record; + std::optional> Bytes; +}; + +/// Data loaded inside the memory from standalone file. +class StandaloneDataInMemory { +public: + OnDiskContent getContent() const; + + StandaloneDataInMemory(std::unique_ptr Region, + TrieRecord::StorageKind SK) + : Region(std::move(Region)), SK(SK) { +#ifndef NDEBUG + bool IsStandalone = false; + switch (SK) { + case TrieRecord::StorageKind::Standalone: + case TrieRecord::StorageKind::StandaloneLeaf: + case TrieRecord::StorageKind::StandaloneLeaf0: + IsStandalone = true; + break; + default: + break; + } + assert(IsStandalone); +#endif + } + +private: + std::unique_ptr Region; + TrieRecord::StorageKind SK; +}; + +/// Container to lookup loaded standalone objects. +template class StandaloneDataMap { + static_assert(isPowerOf2_64(NumShards), "Expected power of 2"); + +public: + uintptr_t insert(ArrayRef Hash, TrieRecord::StorageKind SK, + std::unique_ptr Region); + + const StandaloneDataInMemory *lookup(ArrayRef Hash) const; + bool count(ArrayRef Hash) const { return bool(lookup(Hash)); } + +private: + struct Shard { + /// Needs to store a std::unique_ptr for a stable address identity. + DenseMap> Map; + mutable std::mutex Mutex; + }; + Shard &getShard(ArrayRef Hash) { + return const_cast( + const_cast(this)->getShard(Hash)); + } + const Shard &getShard(ArrayRef Hash) const { + static_assert(NumShards <= 256, "Expected only 8 bits of shard"); + return Shards[Hash[0] % NumShards]; + } + + Shard Shards[NumShards]; +}; + +using StandaloneDataMapTy = StandaloneDataMap<16>; + +/// A vector of internal node references. +class InternalRefVector { +public: + void push_back(InternalRef Ref) { + if (NeedsFull) + return FullRefs.push_back(Ref); + if (std::optional Small = InternalRef4B::tryToShrink(Ref)) + return SmallRefs.push_back(*Small); + NeedsFull = true; + assert(FullRefs.empty()); + FullRefs.reserve(SmallRefs.size() + 1); + for (InternalRef4B Small : SmallRefs) + FullRefs.push_back(Small); + FullRefs.push_back(Ref); + SmallRefs.clear(); + } + + operator InternalRefArrayRef() const { + assert(SmallRefs.empty() || FullRefs.empty()); + return NeedsFull ? InternalRefArrayRef(FullRefs) + : InternalRefArrayRef(SmallRefs); + } + +private: + bool NeedsFull = false; + SmallVector SmallRefs; + SmallVector FullRefs; +}; + +} // namespace + +Expected DataRecordHandle::createWithError( + function_ref(size_t Size)> Alloc, const Input &I) { + Layout L(I); + if (Expected Mem = Alloc(L.getTotalSize())) + return constructImpl(*Mem, I, L); + else + return Mem.takeError(); +} + +DataRecordHandle +DataRecordHandle::create(function_ref Alloc, + const Input &I) { + Layout L(I); + return constructImpl(Alloc(L.getTotalSize()), I, L); +} + +ObjectHandle ObjectHandle::fromFileOffset(FileOffset Offset) { + // Store the file offset as it is. + assert(!(Offset.get() & 0x1)); + return ObjectHandle(Offset.get()); +} + +ObjectHandle ObjectHandle::fromMemory(uintptr_t Ptr) { + // Store the pointer from memory with lowest bit set. + assert(!(Ptr & 0x1)); + return ObjectHandle(Ptr | 1); +} + +/// Proxy for an on-disk index record. +struct OnDiskGraphDB::IndexProxy { + FileOffset Offset; + ArrayRef Hash; + TrieRecord &Ref; +}; + +template +uintptr_t StandaloneDataMap::insert( + ArrayRef Hash, TrieRecord::StorageKind SK, + std::unique_ptr Region) { + auto &S = getShard(Hash); + std::lock_guard Lock(S.Mutex); + auto &V = S.Map[Hash.data()]; + if (!V) + V = std::make_unique(std::move(Region), SK); + return reinterpret_cast(V.get()); +} + +template +const StandaloneDataInMemory * +StandaloneDataMap::lookup(ArrayRef Hash) const { + auto &S = getShard(Hash); + std::lock_guard Lock(S.Mutex); + auto I = S.Map.find(Hash.data()); + if (I == S.Map.end()) + return nullptr; + return &*I->second; +} + +namespace { + +/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too +/// expensive to register/unregister at this rate. +/// +/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp +/// files and has a signal handler registerd that removes them all. +class TempFile { + bool Done = false; + TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {} + +public: + /// This creates a temporary file with createUniqueFile. + static Expected create(const Twine &Model); + TempFile(TempFile &&Other) { *this = std::move(Other); } + TempFile &operator=(TempFile &&Other) { + TmpName = std::move(Other.TmpName); + FD = Other.FD; + Other.Done = true; + Other.FD = -1; + return *this; + } + + // Name of the temporary file. + std::string TmpName; + + // The open file descriptor. + int FD = -1; + + // Keep this with the given name. + Error keep(const Twine &Name); + Error discard(); + + // This checks that keep or delete was called. + ~TempFile() { consumeError(discard()); } +}; + +class MappedTempFile { +public: + char *data() const { return Map.data(); } + size_t size() const { return Map.size(); } + + Error discard() { + assert(Map && "Map already destroyed"); + Map.unmap(); + return Temp.discard(); + } + + Error keep(const Twine &Name) { + assert(Map && "Map already destroyed"); + Map.unmap(); + return Temp.keep(Name); + } + + MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map) + : Temp(std::move(Temp)), Map(std::move(Map)) {} + +private: + TempFile Temp; + sys::fs::mapped_file_region Map; +}; +} // namespace + +Error TempFile::discard() { + Done = true; + if (FD != -1) { + sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); + if (std::error_code EC = sys::fs::closeFile(File)) + return errorCodeToError(EC); + } + FD = -1; + + // Always try to close and remove. + std::error_code RemoveEC; + if (!TmpName.empty()) { + std::error_code EC = sys::fs::remove(TmpName); + if (EC) + return errorCodeToError(EC); + } + TmpName = ""; + + return Error::success(); +} + +Error TempFile::keep(const Twine &Name) { + assert(!Done); + Done = true; + // Always try to close and rename. + std::error_code RenameEC = sys::fs::rename(TmpName, Name); + + if (!RenameEC) + TmpName = ""; + + sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); + if (std::error_code EC = sys::fs::closeFile(File)) + return errorCodeToError(EC); + FD = -1; + + return errorCodeToError(RenameEC); +} + +Expected TempFile::create(const Twine &Model) { + int FD; + SmallString<128> ResultPath; + if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath)) + return errorCodeToError(EC); + + TempFile Ret(ResultPath, FD); + return std::move(Ret); +} + +bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) { + uint64_t ExistingPacked = pack(Existing); + uint64_t NewPacked = pack(New); + if (Storage.compare_exchange_strong(ExistingPacked, NewPacked)) + return true; + Existing = unpack(ExistingPacked); + return false; +} + +DataRecordHandle DataRecordHandle::construct(char *Mem, const Input &I) { + return constructImpl(Mem, I, Layout(I)); +} + +Expected +DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool, + FileOffset Offset) { + auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header)); + if (!HeaderData) + return HeaderData.takeError(); + + auto Record = DataRecordHandle::get(HeaderData->data()); + if (Record.getTotalSize() + Offset.get() > Pool.size()) + return createStringError( + make_error_code(std::errc::illegal_byte_sequence), + "data record span passed the end of the data pool"); + + return Record; +} + +DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I, + const Layout &L) { + char *Next = Mem + sizeof(Header); + + // Fill in Packed and set other data, then come back to construct the header. + Header::PackTy Packed = 0; + Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift; + + // Construct DataSize. + switch (L.Flags.DataSize) { + case DataSizeFlags::Uses1B: + assert(I.Data.size() <= UINT8_MAX); + Packed |= (Header::PackTy)I.Data.size() + << ((sizeof(Packed) - 2) * CHAR_BIT); + break; + case DataSizeFlags::Uses2B: + assert(I.Data.size() <= UINT16_MAX); + Packed |= (Header::PackTy)I.Data.size() + << ((sizeof(Packed) - 4) * CHAR_BIT); + break; + case DataSizeFlags::Uses4B: + support::endian::write32le(Next, I.Data.size()); + Next += 4; + break; + case DataSizeFlags::Uses8B: + support::endian::write64le(Next, I.Data.size()); + Next += 8; + break; + } + + // Construct NumRefs. + // + // NOTE: May be writing NumRefs even if there are zero refs in order to fix + // alignment. + switch (L.Flags.NumRefs) { + case NumRefsFlags::Uses0B: + break; + case NumRefsFlags::Uses1B: + assert(I.Refs.size() <= UINT8_MAX); + Packed |= (Header::PackTy)I.Refs.size() + << ((sizeof(Packed) - 2) * CHAR_BIT); + break; + case NumRefsFlags::Uses2B: + assert(I.Refs.size() <= UINT16_MAX); + Packed |= (Header::PackTy)I.Refs.size() + << ((sizeof(Packed) - 4) * CHAR_BIT); + break; + case NumRefsFlags::Uses4B: + support::endian::write32le(Next, I.Refs.size()); + Next += 4; + break; + case NumRefsFlags::Uses8B: + support::endian::write64le(Next, I.Refs.size()); + Next += 8; + break; + } + + // Construct Refs[]. + if (!I.Refs.empty()) { + assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B()); + ArrayRef RefsBuffer = I.Refs.getBuffer(); + llvm::copy(RefsBuffer, Next); + Next += RefsBuffer.size(); + } + + // Construct Data and the trailing null. + assert(isAddrAligned(Align(8), Next)); + llvm::copy(I.Data, Next); + Next[I.Data.size()] = 0; + + // Construct the header itself and return. + Header *H = new (Mem) Header{Packed}; + DataRecordHandle Record(*H); + assert(Record.getData() == I.Data); + assert(Record.getNumRefs() == I.Refs.size()); + assert(Record.getRefs() == I.Refs); + assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize); + assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs); + assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind); + return Record; +} + +DataRecordHandle::Layout::Layout(const Input &I) { + // Start initial relative offsets right after the Header. + uint64_t RelOffset = sizeof(Header); + + // Initialize the easy stuff. + DataSize = I.Data.size(); + NumRefs = I.Refs.size(); + + // Check refs size. + Flags.RefKind = + I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef; + + // Find the smallest slot available for DataSize. + bool Has1B = true; + bool Has2B = true; + if (DataSize <= UINT8_MAX && Has1B) { + Flags.DataSize = DataSizeFlags::Uses1B; + Has1B = false; + } else if (DataSize <= UINT16_MAX && Has2B) { + Flags.DataSize = DataSizeFlags::Uses2B; + Has2B = false; + } else if (DataSize <= UINT32_MAX) { + Flags.DataSize = DataSizeFlags::Uses4B; + RelOffset += 4; + } else { + Flags.DataSize = DataSizeFlags::Uses8B; + RelOffset += 8; + } + + // Find the smallest slot available for NumRefs. Never sets NumRefs8B here. + if (!NumRefs) { + Flags.NumRefs = NumRefsFlags::Uses0B; + } else if (NumRefs <= UINT8_MAX && Has1B) { + Flags.NumRefs = NumRefsFlags::Uses1B; + Has1B = false; + } else if (NumRefs <= UINT16_MAX && Has2B) { + Flags.NumRefs = NumRefsFlags::Uses2B; + Has2B = false; + } else { + Flags.NumRefs = NumRefsFlags::Uses4B; + RelOffset += 4; + } + + // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated + // padding rules when reading and writing. This also bumps RelOffset. + // + // The value for NumRefs is strictly limited to UINT32_MAX, but it can be + // stored as 8B. This means we can *always* find a size to grow. + // + // NOTE: Only call this once. + auto GrowSizeFieldsBy4B = [&]() { + assert(isAligned(Align(4), RelOffset)); + RelOffset += 4; + + assert(Flags.NumRefs != NumRefsFlags::Uses8B && + "Expected to be able to grow NumRefs8B"); + + // First try to grow DataSize. NumRefs will not (yet) be 8B, and if + // DataSize is upgraded to 8B it'll already be aligned. + // + // Failing that, grow NumRefs. + if (Flags.DataSize < DataSizeFlags::Uses4B) + Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B. + else if (Flags.DataSize < DataSizeFlags::Uses8B) + Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B. + else if (Flags.NumRefs < NumRefsFlags::Uses4B) + Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B. + else + Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B. + }; + + assert(isAligned(Align(4), RelOffset)); + if (Flags.RefKind == RefKindFlags::InternalRef) { + // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this + // without padding. + if (!isAligned(Align(8), RelOffset)) + GrowSizeFieldsBy4B(); + + assert(isAligned(Align(8), RelOffset)); + RefsRelOffset = RelOffset; + RelOffset += 8 * NumRefs; + } else { + // The array of 4B refs doesn't need 8B alignment, but the data will need + // to be 8B-aligned. Detect this now, and, if necessary, shift everything + // by 4B by growing one of the sizes. + // If we remove the need for 8B-alignment for data there is <1% savings in + // disk storage for a clang build using MCCAS but the 8B-alignment may be + // useful in the future so keep it for now. + uint64_t RefListSize = 4 * NumRefs; + if (!isAligned(Align(8), RelOffset + RefListSize)) + GrowSizeFieldsBy4B(); + RefsRelOffset = RelOffset; + RelOffset += RefListSize; + } + + assert(isAligned(Align(8), RelOffset)); + DataRelOffset = RelOffset; +} + +uint64_t DataRecordHandle::getDataSize() const { + int64_t RelOffset = sizeof(Header); + auto *DataSizePtr = reinterpret_cast(H) + RelOffset; + switch (getLayoutFlags().DataSize) { + case DataSizeFlags::Uses1B: + return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX; + case DataSizeFlags::Uses2B: + return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) & + UINT16_MAX; + case DataSizeFlags::Uses4B: + return support::endian::read32le(DataSizePtr); + case DataSizeFlags::Uses8B: + return support::endian::read64le(DataSizePtr); + } +} + +void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const { + if (LF.DataSize >= DataSizeFlags::Uses4B) + RelOffset += 4; + if (LF.DataSize >= DataSizeFlags::Uses8B) + RelOffset += 4; +} + +uint32_t DataRecordHandle::getNumRefs() const { + LayoutFlags LF = getLayoutFlags(); + int64_t RelOffset = sizeof(Header); + skipDataSize(LF, RelOffset); + auto *NumRefsPtr = reinterpret_cast(H) + RelOffset; + switch (LF.NumRefs) { + case NumRefsFlags::Uses0B: + return 0; + case NumRefsFlags::Uses1B: + return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX; + case NumRefsFlags::Uses2B: + return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) & + UINT16_MAX; + case NumRefsFlags::Uses4B: + return support::endian::read32le(NumRefsPtr); + case NumRefsFlags::Uses8B: + return support::endian::read64le(NumRefsPtr); + } +} + +void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const { + if (LF.NumRefs >= NumRefsFlags::Uses4B) + RelOffset += 4; + if (LF.NumRefs >= NumRefsFlags::Uses8B) + RelOffset += 4; +} + +int64_t DataRecordHandle::getRefsRelOffset() const { + LayoutFlags LF = getLayoutFlags(); + int64_t RelOffset = sizeof(Header); + skipDataSize(LF, RelOffset); + skipNumRefs(LF, RelOffset); + return RelOffset; +} + +int64_t DataRecordHandle::getDataRelOffset() const { + LayoutFlags LF = getLayoutFlags(); + int64_t RelOffset = sizeof(Header); + skipDataSize(LF, RelOffset); + skipNumRefs(LF, RelOffset); + uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8; + RelOffset += RefSize * getNumRefs(); + return RelOffset; +} + +Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { + return Index.validate([&](FileOffset Offset, + OnDiskTrieRawHashMap::ConstValueProxy Record) + -> Error { + auto formatError = [&](Twine Msg) { + return createStringError( + llvm::errc::illegal_byte_sequence, + "bad record at 0x" + + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + + Msg.str()); + }; + + if (Record.Data.size() != sizeof(TrieRecord)) + return formatError("wrong data record size"); + if (!isAligned(Align::Of(), Record.Data.size())) + return formatError("wrong data record alignment"); + + auto *R = reinterpret_cast(Record.Data.data()); + TrieRecord::Data D = R->load(); + std::unique_ptr FileBuffer; + if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown && + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool && + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone && + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf && + (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0) + return formatError("invalid record kind value"); + + auto Ref = InternalRef::getFromOffset(Offset); + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + + switch (D.SK) { + case TrieRecord::StorageKind::Unknown: + // This could be an abandoned entry due to a termination before updating + // the record. It can be reused by later insertion so just skip this entry + // for now. + return Error::success(); + case TrieRecord::StorageKind::DataPool: + // Check offset is a postive value, and large enough to hold the + // header for the data record. + if (D.Offset.get() <= 0 || + (uint64_t)D.Offset.get() + sizeof(DataRecordHandle::Header) >= + DataPool.size()) + return formatError("datapool record out of bound"); + break; + case TrieRecord::StorageKind::Standalone: + case TrieRecord::StorageKind::StandaloneLeaf: + case TrieRecord::StorageKind::StandaloneLeaf0: + SmallString<256> Path; + getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path); + // If need to validate the content of the file later, just load the + // buffer here. Otherwise, just check the existance of the file. + if (Deep) { + auto File = MemoryBuffer::getFile(Path, /*IsText=*/false, + /*RequiresNullTerminator=*/false); + if (!File || !*File) + return formatError("record file \'" + Path + "\' does not exist"); + + FileBuffer = std::move(*File); + } else if (!llvm::sys::fs::exists(Path)) + return formatError("record file \'" + Path + "\' does not exist"); + } + + if (!Deep) + return Error::success(); + + auto dataError = [&](Twine Msg) { + return createStringError(llvm::errc::illegal_byte_sequence, + "bad data for digest \'" + toHex(I->Hash) + + "\': " + Msg.str()); + }; + SmallVector> Refs; + ArrayRef StoredData; + + switch (D.SK) { + case TrieRecord::StorageKind::Unknown: + llvm_unreachable("already handled"); + case TrieRecord::StorageKind::DataPool: { + auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset); + if (!DataRecord) + return dataError(toString(DataRecord.takeError())); + + for (auto InternRef : DataRecord->getRefs()) { + auto Index = getIndexProxyFromRef(InternRef); + if (!Index) + return Index.takeError(); + Refs.push_back(Index->Hash); + } + StoredData = DataRecord->getData(); + break; + } + case TrieRecord::StorageKind::Standalone: { + if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header)) + return dataError("data record is not big enough to read the header"); + auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart()); + if (DataRecord.getTotalSize() < FileBuffer->getBufferSize()) + return dataError( + "data record span passed the end of the standalone file"); + for (auto InternRef : DataRecord.getRefs()) { + auto Index = getIndexProxyFromRef(InternRef); + if (!Index) + return Index.takeError(); + Refs.push_back(Index->Hash); + } + StoredData = DataRecord.getData(); + break; + } + case TrieRecord::StorageKind::StandaloneLeaf: + case TrieRecord::StorageKind::StandaloneLeaf0: { + StoredData = arrayRefFromStringRef(FileBuffer->getBuffer()); + if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) { + if (!FileBuffer->getBuffer().ends_with('\0')) + return dataError("standalone file is not zero terminated"); + StoredData = StoredData.drop_back(1); + } + break; + } + } + + SmallVector ComputedHash; + Hasher(Refs, StoredData, ComputedHash); + if (I->Hash != ArrayRef(ComputedHash)) + return dataError("hash mismatch, got \'" + toHex(ComputedHash) + + "\' instead"); + + return Error::success(); + }); +} + +void OnDiskGraphDB::print(raw_ostream &OS) const { + OS << "on-disk-root-path: " << RootPath << "\n"; + + struct PoolInfo { + uint64_t Offset; + }; + SmallVector Pool; + + OS << "\n"; + OS << "index:\n"; + Index.print(OS, [&](ArrayRef Data) { + assert(Data.size() == sizeof(TrieRecord)); + assert(isAligned(Align::Of(), Data.size())); + auto *R = reinterpret_cast(Data.data()); + TrieRecord::Data D = R->load(); + OS << " SK="; + switch (D.SK) { + case TrieRecord::StorageKind::Unknown: + OS << "unknown "; + break; + case TrieRecord::StorageKind::DataPool: + OS << "datapool "; + Pool.push_back({D.Offset.get()}); + break; + case TrieRecord::StorageKind::Standalone: + OS << "standalone-data "; + break; + case TrieRecord::StorageKind::StandaloneLeaf: + OS << "standalone-leaf "; + break; + case TrieRecord::StorageKind::StandaloneLeaf0: + OS << "standalone-leaf+0"; + break; + } + OS << " Offset=" << (void *)D.Offset.get(); + }); + if (Pool.empty()) + return; + + OS << "\n"; + OS << "pool:\n"; + llvm::sort( + Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; }); + for (PoolInfo PI : Pool) { + OS << "- addr=" << (void *)PI.Offset << " "; + auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset)); + if (!D) { + OS << "error: " << toString(D.takeError()); + return; + } + + OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize() + << " size=" << D->getTotalSize() + << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n"; + } +} + +Expected +OnDiskGraphDB::indexHash(ArrayRef Hash) { + auto P = Index.insertLazy( + Hash, [](FileOffset TentativeOffset, + OnDiskTrieRawHashMap::ValueProxy TentativeValue) { + assert(TentativeValue.Data.size() == sizeof(TrieRecord)); + assert( + isAddrAligned(Align::Of(), TentativeValue.Data.data())); + new (TentativeValue.Data.data()) TrieRecord(); + }); + if (LLVM_UNLIKELY(!P)) + return P.takeError(); + + assert(*P && "Expected insertion"); + return getIndexProxyFromPointer(*P); +} + +OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer( + OnDiskTrieRawHashMap::ConstOnDiskPtr P) const { + assert(P); + assert(P.getOffset()); + return IndexProxy{P.getOffset(), P->Hash, + *const_cast( + reinterpret_cast(P->Data.data()))}; +} + +Expected OnDiskGraphDB::getReference(ArrayRef Hash) { + auto I = indexHash(Hash); + if (LLVM_UNLIKELY(!I)) + return I.takeError(); + return getExternalReference(*I); +} + +ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) { + return getExternalReference(makeInternalRef(I.Offset)); +} + +std::optional +OnDiskGraphDB::getExistingReference(ArrayRef Digest) { + auto tryUpstream = + [&](std::optional I) -> std::optional { + if (!UpstreamDB) + return std::nullopt; + std::optional UpstreamID = + UpstreamDB->getExistingReference(Digest); + if (LLVM_UNLIKELY(!UpstreamID)) + return std::nullopt; + auto Ref = expectedToOptional(indexHash(Digest)); + if (!Ref) + return std::nullopt; + if (!I) + I.emplace(*Ref); + return getExternalReference(*I); + }; + + OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest); + if (!P) + return tryUpstream(std::nullopt); + IndexProxy I = getIndexProxyFromPointer(P); + TrieRecord::Data Obj = I.Ref.load(); + if (Obj.SK == TrieRecord::StorageKind::Unknown) + return tryUpstream(I); + return getExternalReference(makeInternalRef(I.Offset)); +} + +Expected +OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const { + auto P = Index.recoverFromFileOffset(Ref.getFileOffset()); + if (LLVM_UNLIKELY(!P)) + return P.takeError(); + return getIndexProxyFromPointer(*P); +} + +Expected> OnDiskGraphDB::getDigest(InternalRef Ref) const { + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + return I->Hash; +} + +ArrayRef OnDiskGraphDB::getDigest(const IndexProxy &I) const { + return I.Hash; +} + +static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool, + ObjectHandle OH) { + // Decode ObjectHandle to locate the stored content. + uint64_t Data = OH.getOpaqueData(); + if (Data & 1) { + const auto *SDIM = + reinterpret_cast(Data & (-1ULL << 1)); + return SDIM->getContent(); + } + + auto DataHandle = + cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data))); + assert(DataHandle.getData().end()[0] == 0 && "Null termination"); + return OnDiskContent{DataHandle, std::nullopt}; +} + +ArrayRef OnDiskGraphDB::getObjectData(ObjectHandle Node) const { + OnDiskContent Content = getContentFromHandle(DataPool, Node); + if (Content.Bytes) + return *Content.Bytes; + assert(Content.Record && "Expected record or bytes"); + return Content.Record->getData(); +} + +InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const { + if (std::optional Record = + getContentFromHandle(DataPool, Node).Record) + return Record->getRefs(); + return std::nullopt; +} + +Expected> +OnDiskGraphDB::load(ObjectID ExternalRef) { + InternalRef Ref = getInternalRef(ExternalRef); + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + TrieRecord::Data Object = I->Ref.load(); + + if (Object.SK == TrieRecord::StorageKind::Unknown) { + if (!UpstreamDB) + return std::nullopt; + return faultInFromUpstream(ExternalRef); + } + + if (Object.SK == TrieRecord::StorageKind::DataPool) + return ObjectHandle::fromFileOffset(Object.Offset); + + // Only TrieRecord::StorageKind::Standalone (and variants) need to be + // explicitly loaded. + // + // There's corruption if standalone objects have offsets, or if we get here + // for something that isn't standalone. + if (Object.Offset) + return createCorruptObjectError(getDigest(*I)); + switch (Object.SK) { + case TrieRecord::StorageKind::Unknown: + case TrieRecord::StorageKind::DataPool: + llvm_unreachable("unexpected storage kind"); + case TrieRecord::StorageKind::Standalone: + case TrieRecord::StorageKind::StandaloneLeaf0: + case TrieRecord::StorageKind::StandaloneLeaf: + break; + } + + // Load it from disk. + // + // Note: Creation logic guarantees that data that needs null-termination is + // suitably 0-padded. Requiring null-termination here would be too expensive + // for extremely large objects that happen to be page-aligned. + SmallString<256> Path; + getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path); + + auto File = sys::fs::openNativeFileForRead(Path); + if (!File) + return createFileError(Path, File.takeError()); + + auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(*File); }); + + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status(*File, Status)) + return createCorruptObjectError(getDigest(*I)); + + std::error_code EC; + auto Region = std::make_unique( + *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC); + if (EC) + return createCorruptObjectError(getDigest(*I)); + + return ObjectHandle::fromMemory( + static_cast(StandaloneData) + ->insert(I->Hash, Object.SK, std::move(Region))); +} + +Expected OnDiskGraphDB::isMaterialized(ObjectID Ref) { + auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true); + if (!Presence) + return Presence.takeError(); + + switch (*Presence) { + case ObjectPresence::Missing: + return false; + case ObjectPresence::InPrimaryDB: + return true; + case ObjectPresence::OnlyInUpstreamDB: + if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult) + return FaultInResult.takeError(); + return true; + } +} + +Expected +OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef, + bool CheckUpstream) const { + InternalRef Ref = getInternalRef(ExternalRef); + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + + TrieRecord::Data Object = I->Ref.load(); + if (Object.SK != TrieRecord::StorageKind::Unknown) + return ObjectPresence::InPrimaryDB; + if (!CheckUpstream || !UpstreamDB) + return ObjectPresence::Missing; + std::optional UpstreamID = + UpstreamDB->getExistingReference(getDigest(*I)); + return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB + : ObjectPresence::Missing; +} + +InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) { + return InternalRef::getFromOffset(IndexOffset); +} + +void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I, + SmallVectorImpl &Path) const { + Path.assign(RootPath.begin(), RootPath.end()); + sys::path::append(Path, + Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion); +} + +OnDiskContent StandaloneDataInMemory::getContent() const { + bool Leaf0 = false; + bool Leaf = false; + switch (SK) { + default: + llvm_unreachable("Storage kind must be standalone"); + case TrieRecord::StorageKind::Standalone: + break; + case TrieRecord::StorageKind::StandaloneLeaf0: + Leaf = Leaf0 = true; + break; + case TrieRecord::StorageKind::StandaloneLeaf: + Leaf = true; + break; + } + + if (Leaf) { + StringRef Data(Region->data(), Region->size()); + assert(Data.drop_back(Leaf0).end()[0] == 0 && + "Standalone node data missing null termination"); + return OnDiskContent{std::nullopt, + arrayRefFromStringRef(Data.drop_back(Leaf0))}; + } + + DataRecordHandle Record = DataRecordHandle::get(Region->data()); + assert(Record.getData().end()[0] == 0 && + "Standalone object record missing null termination for data"); + return OnDiskContent{Record, std::nullopt}; +} + +static Expected createTempFile(StringRef FinalPath, + uint64_t Size) { + assert(Size && "Unexpected request for an empty temp file"); + Expected File = TempFile::create(FinalPath + ".%%%%%%"); + if (!File) + return File.takeError(); + + if (Error E = preallocateFileTail(File->FD, 0, Size).takeError()) + return createFileError(File->TmpName, std::move(E)); + + if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size)) + return createFileError(File->TmpName, EC); + + std::error_code EC; + sys::fs::mapped_file_region Map(sys::fs::convertFDToNativeFile(File->FD), + sys::fs::mapped_file_region::readwrite, Size, + 0, EC); + if (EC) + return createFileError(File->TmpName, EC); + return MappedTempFile(std::move(*File), std::move(Map)); +} + +static size_t getPageSize() { + static int PageSize = sys::Process::getPageSizeEstimate(); + return PageSize; +} + +Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef Data) { + assert(Data.size() > TrieRecord::MaxEmbeddedSize && + "Expected a bigger file for external content..."); + + bool Leaf0 = isAligned(Align(getPageSize()), Data.size()); + TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0 + : TrieRecord::StorageKind::StandaloneLeaf; + + SmallString<256> Path; + int64_t FileSize = Data.size() + Leaf0; + getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path); + + // Write the file. Don't reuse this mapped_file_region, which is read/write. + // Let load() pull up one that's read-only. + Expected File = createTempFile(Path, FileSize); + if (!File) + return File.takeError(); + assert(File->size() == (uint64_t)FileSize); + llvm::copy(Data, File->data()); + if (Leaf0) + File->data()[Data.size()] = 0; + assert(File->data()[Data.size()] == 0); + if (Error E = File->keep(Path)) + return E; + + // Store the object reference. + TrieRecord::Data Existing; + { + TrieRecord::Data Leaf{SK, FileOffset()}; + if (I.Ref.compare_exchange_strong(Existing, Leaf)) { + recordStandaloneSizeIncrease(FileSize); + return Error::success(); + } + } + + // If there was a race, confirm that the new value has valid storage. + if (Existing.SK == TrieRecord::StorageKind::Unknown) + return createCorruptObjectError(getDigest(I)); + + return Error::success(); +} + +Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, + ArrayRef Data) { + auto I = getIndexProxyFromRef(getInternalRef(ID)); + if (LLVM_UNLIKELY(!I)) + return I.takeError(); + + // Early return in case the node exists. + { + TrieRecord::Data Existing = I->Ref.load(); + if (Existing.SK != TrieRecord::StorageKind::Unknown) + return Error::success(); + } + + // Big leaf nodes. + if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize) + return createStandaloneLeaf(*I, Data); + + // TODO: Check whether it's worth checking the index for an already existing + // object (like storeTreeImpl() does) before building up the + // InternalRefVector. + InternalRefVector InternalRefs; + for (ObjectID Ref : Refs) + InternalRefs.push_back(getInternalRef(Ref)); + + // Create the object. + + DataRecordHandle::Input Input{InternalRefs, Data}; + + // Compute the storage kind, allocate it, and create the record. + TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown; + FileOffset PoolOffset; + SmallString<256> Path; + std::optional File; + std::optional FileSize; + auto AllocStandaloneFile = [&](size_t Size) -> Expected { + getStandalonePath(TrieRecord::getStandaloneFilePrefix( + TrieRecord::StorageKind::Standalone), + *I, Path); + if (Error E = createTempFile(Path, Size).moveInto(File)) + return std::move(E); + assert(File->size() == Size); + FileSize = Size; + SK = TrieRecord::StorageKind::Standalone; + return File->data(); + }; + auto Alloc = [&](size_t Size) -> Expected { + if (Size <= TrieRecord::MaxEmbeddedSize) { + SK = TrieRecord::StorageKind::DataPool; + auto P = DataPool.allocate(Size); + if (LLVM_UNLIKELY(!P)) { + char *NewAlloc = nullptr; + auto NewE = handleErrors( + P.takeError(), [&](std::unique_ptr E) -> Error { + if (E->convertToErrorCode() == std::errc::not_enough_memory) + return AllocStandaloneFile(Size).moveInto(NewAlloc); + return Error(std::move(E)); + }); + if (!NewE) + return NewAlloc; + return std::move(NewE); + } + PoolOffset = P->getOffset(); + LLVM_DEBUG({ + dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get() + << " size=" << Size + << " end=" << (void *)(PoolOffset.get() + Size) << "\n"; + }); + return (*P)->data(); + } + return AllocStandaloneFile(Size); + }; + + DataRecordHandle Record; + if (Error E = + DataRecordHandle::createWithError(Alloc, Input).moveInto(Record)) + return E; + assert(Record.getData().end()[0] == 0 && "Expected null-termination"); + assert(Record.getData() == Input.Data && "Expected initialization"); + assert(SK != TrieRecord::StorageKind::Unknown); + assert(bool(File) != bool(PoolOffset) && + "Expected either a mapped file or a pooled offset"); + + // Check for a race before calling MappedTempFile::keep(). + // + // Then decide what to do with the file. Better to discard than overwrite if + // another thread/process has already added this. + TrieRecord::Data Existing = I->Ref.load(); + { + TrieRecord::Data NewObject{SK, PoolOffset}; + if (File) { + if (Existing.SK == TrieRecord::StorageKind::Unknown) { + // Keep the file! + if (Error E = File->keep(Path)) + return E; + } else { + File.reset(); + } + } + + // If we didn't already see a racing/existing write, then try storing the + // new object. If that races, confirm that the new value has valid storage. + // + // TODO: Find a way to reuse the storage from the new-but-abandoned record + // handle. + if (Existing.SK == TrieRecord::StorageKind::Unknown) { + if (I->Ref.compare_exchange_strong(Existing, NewObject)) { + if (FileSize) + recordStandaloneSizeIncrease(*FileSize); + return Error::success(); + } + } + } + + if (Existing.SK == TrieRecord::StorageKind::Unknown) + return createCorruptObjectError(getDigest(*I)); + + // Load existing object. + return Error::success(); +} + +void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) { + standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed); +} + +std::atomic &OnDiskGraphDB::standaloneStorageSize() const { + MutableArrayRef UserHeader = DataPool.getUserHeader(); + assert(UserHeader.size() == sizeof(std::atomic)); + assert(isAddrAligned(Align(8), UserHeader.data())); + return *reinterpret_cast *>(UserHeader.data()); +} + +uint64_t OnDiskGraphDB::getStandaloneStorageSize() const { + return standaloneStorageSize().load(std::memory_order_relaxed); +} + +size_t OnDiskGraphDB::getStorageSize() const { + return Index.size() + DataPool.size() + getStandaloneStorageSize(); +} + +unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const { + unsigned IndexPercent = Index.size() * 100ULL / Index.capacity(); + unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity(); + return std::max(IndexPercent, DataPercent); +} + +Expected> OnDiskGraphDB::open( + StringRef AbsPath, StringRef HashName, unsigned HashByteSize, + std::unique_ptr UpstreamDB, FaultInPolicy Policy) { + if (std::error_code EC = sys::fs::create_directories(AbsPath)) + return createFileError(AbsPath, EC); + + constexpr uint64_t MB = 1024ull * 1024ull; + constexpr uint64_t GB = 1024ull * 1024ull * 1024ull; + + uint64_t MaxIndexSize = 12 * GB; + uint64_t MaxDataPoolSize = 24 * GB; + + if (useSmallMappingSize(AbsPath)) { + MaxIndexSize = 1 * GB; + MaxDataPoolSize = 2 * GB; + } + + auto CustomSize = getOverriddenMaxMappingSize(); + if (!CustomSize) + return CustomSize.takeError(); + if (*CustomSize) + MaxIndexSize = MaxDataPoolSize = **CustomSize; + + SmallString<256> IndexPath(AbsPath); + sys::path::append(IndexPath, IndexFilePrefix + CASFormatVersion); + std::optional Index; + if (Error E = OnDiskTrieRawHashMap::create( + IndexPath, IndexTableName + "[" + HashName + "]", + HashByteSize * CHAR_BIT, + /*DataSize=*/sizeof(TrieRecord), MaxIndexSize, + /*MinFileSize=*/MB) + .moveInto(Index)) + return std::move(E); + + uint32_t UserHeaderSize = sizeof(std::atomic); + + SmallString<256> DataPoolPath(AbsPath); + sys::path::append(DataPoolPath, DataPoolFilePrefix + CASFormatVersion); + std::optional DataPool; + StringRef PolicyName = + Policy == FaultInPolicy::SingleNode ? "single" : "full"; + if (Error E = OnDiskDataAllocator::create( + DataPoolPath, + DataPoolTableName + "[" + HashName + "]" + PolicyName, + MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize, + [](void *UserHeaderPtr) { + new (UserHeaderPtr) std::atomic(0); + }) + .moveInto(DataPool)) + return std::move(E); + if (DataPool->getUserHeader().size() != UserHeaderSize) + return createStringError(llvm::errc::argument_out_of_domain, + "unexpected user header in '" + DataPoolPath + + "'"); + + return std::unique_ptr( + new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool), + std::move(UpstreamDB), Policy)); +} + +OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, + OnDiskDataAllocator DataPool, + std::unique_ptr UpstreamDB, + FaultInPolicy Policy) + : Index(std::move(Index)), DataPool(std::move(DataPool)), + RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)), + FIPolicy(Policy) { + /// Lifetime for "big" objects not in DataPool. + /// + /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something + /// simpler on the assumption there won't be much contention since most data + /// is not big. If there is contention, and we've already fixed ObjectProxy + /// object handles to be cheap enough to use consistently, the fix might be + /// to use better use of them rather than optimizing this map. + /// + /// FIXME: Figure out the right number of shards, if any. + StandaloneData = new StandaloneDataMapTy(); +} + +OnDiskGraphDB::~OnDiskGraphDB() { + delete static_cast(StandaloneData); +} + +Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID, + ObjectHandle UpstreamNode) { + // Copies the full CAS tree from upstream. Uses depth-first copying to protect + // against the process dying during importing and leaving the database with an + // incomplete tree. Note that if the upstream has missing nodes then the tree + // will be copied with missing nodes as well, it won't be considered an error. + + struct UpstreamCursor { + ObjectHandle Node; + size_t RefsCount; + object_refs_iterator RefI; + object_refs_iterator RefE; + }; + /// Keeps track of the state of visitation for current node and all of its + /// parents. + SmallVector CursorStack; + /// Keeps track of the currently visited nodes as they are imported into + /// primary database, from current node and its parents. When a node is + /// entered for visitation it appends its own ID, then appends referenced IDs + /// as they get imported. When a node is fully imported it removes the + /// referenced IDs from the bottom of the stack which leaves its own ID at the + /// bottom, adding to the list of referenced IDs for the parent node. + SmallVector PrimaryNodesStack; + + auto enqueueNode = [&](ObjectID PrimaryID, std::optional Node) { + PrimaryNodesStack.push_back(PrimaryID); + if (!Node) + return; + auto Refs = UpstreamDB->getObjectRefs(*Node); + CursorStack.push_back({*Node, + (size_t)std::distance(Refs.begin(), Refs.end()), + Refs.begin(), Refs.end()}); + }; + + enqueueNode(PrimaryID, UpstreamNode); + + while (!CursorStack.empty()) { + UpstreamCursor &Cur = CursorStack.back(); + if (Cur.RefI == Cur.RefE) { + // Copy the node data into the primary store. + // FIXME: Use hard-link or cloning if the file-system supports it and data + // is stored into a separate file. + + // The bottom of \p PrimaryNodesStack contains the primary ID for the + // current node plus the list of imported referenced IDs. + assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1); + ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1); + auto PrimaryRefs = ArrayRef(PrimaryNodesStack) + .slice(PrimaryNodesStack.size() - Cur.RefsCount); + auto Data = UpstreamDB->getObjectData(Cur.Node); + if (Error E = store(PrimaryID, PrimaryRefs, Data)) + return E; + // Remove the current node and its IDs from the stack. + PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount); + CursorStack.pop_back(); + continue; + } + + ObjectID UpstreamID = *(Cur.RefI++); + auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID)); + if (LLVM_UNLIKELY(!PrimaryID)) + return PrimaryID.takeError(); + if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) { + // This \p ObjectID already exists in the primary. Either it was imported + // via \p importFullTree or the client created it, in which case the + // client takes responsibility for how it was formed. + enqueueNode(*PrimaryID, std::nullopt); + continue; + } + Expected> UpstreamNode = + UpstreamDB->load(UpstreamID); + if (!UpstreamNode) + return UpstreamNode.takeError(); + enqueueNode(*PrimaryID, *UpstreamNode); + } + + assert(PrimaryNodesStack.size() == 1); + assert(PrimaryNodesStack.front() == PrimaryID); + return Error::success(); +} + +Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID, + ObjectHandle UpstreamNode) { + // Copies only a single node, it doesn't copy the referenced nodes. + + // Copy the node data into the primary store. + // FIXME: Use hard-link or cloning if the file-system supports it and data is + // stored into a separate file. + + auto Data = UpstreamDB->getObjectData(UpstreamNode); + auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode); + SmallVector Refs; + Refs.reserve(std::distance(UpstreamRefs.begin(), UpstreamRefs.end())); + for (ObjectID UpstreamRef : UpstreamRefs) { + auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef)); + if (LLVM_UNLIKELY(!Ref)) + return Ref.takeError(); + Refs.push_back(*Ref); + } + + return store(PrimaryID, Refs, Data); +} + +Expected> +OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) { + assert(UpstreamDB); + + auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID)); + if (LLVM_UNLIKELY(!UpstreamID)) + return UpstreamID.takeError(); + + Expected> UpstreamNode = + UpstreamDB->load(*UpstreamID); + if (!UpstreamNode) + return UpstreamNode.takeError(); + if (!*UpstreamNode) + return std::nullopt; + + if (Error E = FIPolicy == FaultInPolicy::SingleNode + ? importSingleNode(PrimaryID, **UpstreamNode) + : importFullTree(PrimaryID, **UpstreamNode)) + return std::move(E); + return load(PrimaryID); +} diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp new file mode 100644 index 0000000000000..21860717da3bf --- /dev/null +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -0,0 +1,113 @@ +//===- OnDiskKeyValueDB.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements OnDiskKeyValueDB, an ondisk key value database. +/// +/// The KeyValue database file is named `actions.` inside the CAS +/// directory. The database stores a mapping between a fixed-sized key and a +/// fixed-sized value, where the size of key and value can be configured when +/// opening the database. +/// +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskKeyValueDB.h" +#include "OnDiskCommon.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +static constexpr StringLiteral ActionCacheFile = "actions."; + +Expected> OnDiskKeyValueDB::put(ArrayRef Key, + ArrayRef Value) { + if (LLVM_UNLIKELY(Value.size() != ValueSize)) + return createStringError(errc::invalid_argument, + "expected value size of " + itostr(ValueSize) + + ", got: " + itostr(Value.size())); + assert(Value.size() == ValueSize); + auto ActionP = Cache.insertLazy( + Key, [&](FileOffset TentativeOffset, + OnDiskTrieRawHashMap::ValueProxy TentativeValue) { + assert(TentativeValue.Data.size() == ValueSize); + llvm::copy(Value, TentativeValue.Data.data()); + }); + if (LLVM_UNLIKELY(!ActionP)) + return ActionP.takeError(); + return (*ActionP)->Data; +} + +Expected>> +OnDiskKeyValueDB::get(ArrayRef Key) { + // Check the result cache. + OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key); + if (!ActionP) + return std::nullopt; + assert(isAddrAligned(Align(8), ActionP->Data.data())); + return ActionP->Data; +} + +Expected> +OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, + StringRef ValueName, size_t ValueSize) { + if (std::error_code EC = sys::fs::create_directories(Path)) + return createFileError(Path, EC); + + SmallString<256> CachePath(Path); + sys::path::append(CachePath, ActionCacheFile + CASFormatVersion); + constexpr uint64_t MB = 1024ull * 1024ull; + constexpr uint64_t GB = 1024ull * 1024ull * 1024ull; + + uint64_t MaxFileSize = GB; + auto CustomSize = getOverriddenMaxMappingSize(); + if (!CustomSize) + return CustomSize.takeError(); + if (*CustomSize) + MaxFileSize = **CustomSize; + + std::optional ActionCache; + if (Error E = OnDiskTrieRawHashMap::create( + CachePath, + "llvm.actioncache[" + HashName + "->" + ValueName + "]", + KeySize * 8, + /*DataSize=*/ValueSize, MaxFileSize, /*MinFileSize=*/MB) + .moveInto(ActionCache)) + return std::move(E); + + return std::unique_ptr( + new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache))); +} + +Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { + return Cache.validate( + [&](FileOffset Offset, + OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { + auto formatError = [&](Twine Msg) { + return createStringError( + llvm::errc::illegal_byte_sequence, + "bad cache value at 0x" + + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + + Msg.str()); + }; + + if (Record.Data.size() != ValueSize) + return formatError("wrong cache value size"); + if (!isAddrAligned(Align(8), Record.Data.data())) + return formatError("wrong cache value alignment"); + if (CheckValue) + return CheckValue(Offset, Record.Data); + return Error::success(); + }); +} diff --git a/llvm/unittests/CAS/CASTestConfig.cpp b/llvm/unittests/CAS/CASTestConfig.cpp index 29e2db48db5c0..91d0970367ac3 100644 --- a/llvm/unittests/CAS/CASTestConfig.cpp +++ b/llvm/unittests/CAS/CASTestConfig.cpp @@ -19,3 +19,17 @@ static CASTestingEnv createInMemory(int I) { INSTANTIATE_TEST_SUITE_P(InMemoryCAS, CASTest, ::testing::Values(createInMemory)); + +#if LLVM_ENABLE_ONDISK_CAS +namespace llvm::cas::ondisk { +extern void setMaxMappingSize(uint64_t Size); +} // namespace llvm::cas::ondisk + +void setMaxOnDiskCASMappingSize() { + static std::once_flag Flag; + std::call_once( + Flag, [] { llvm::cas::ondisk::setMaxMappingSize(100 * 1024 * 1024); }); +} +#else +void setMaxOnDiskCASMappingSize() {} +#endif /* LLVM_ENABLE_ONDISK_CAS */ diff --git a/llvm/unittests/CAS/CASTestConfig.h b/llvm/unittests/CAS/CASTestConfig.h index 8093a0b0864f9..c08968b95b9cc 100644 --- a/llvm/unittests/CAS/CASTestConfig.h +++ b/llvm/unittests/CAS/CASTestConfig.h @@ -18,6 +18,17 @@ struct CASTestingEnv { std::unique_ptr Cache; }; +void setMaxOnDiskCASMappingSize(); + +// Test fixture for on-disk data base tests. +class OnDiskCASTest : public ::testing::Test { +protected: + void SetUp() override { + // Use a smaller database size for testing to conserve disk space. + setMaxOnDiskCASMappingSize(); + } +}; + class CASTest : public testing::TestWithParam> { protected: diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt index ee40e6c9879a1..da469f7fccb5a 100644 --- a/llvm/unittests/CAS/CMakeLists.txt +++ b/llvm/unittests/CAS/CMakeLists.txt @@ -1,3 +1,19 @@ +set(ONDISK_CAS_TEST_SOURCES + OnDiskGraphDBTest.cpp + OnDiskDataAllocatorTest.cpp + OnDiskKeyValueDBTest.cpp + OnDiskTrieRawHashMapTest.cpp + ProgramTest.cpp + ) + +set(LLVM_OPTIONAL_SOURCES + ${ONDISK_CAS_TEST_SOURCES} + ) + +if (NOT LLVM_ENABLE_ONDISK_CAS) + unset(ONDISK_CAS_TEST_SOURCES) +endif() + set(LLVM_LINK_COMPONENTS Support CAS @@ -8,9 +24,8 @@ add_llvm_unittest(CASTests ActionCacheTest.cpp CASTestConfig.cpp ObjectStoreTest.cpp - OnDiskDataAllocatorTest.cpp - OnDiskTrieRawHashMapTest.cpp - ProgramTest.cpp + + ${ONDISK_CAS_TEST_SOURCES} ) target_link_libraries(CASTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/CAS/OnDiskCommonUtils.h b/llvm/unittests/CAS/OnDiskCommonUtils.h new file mode 100644 index 0000000000000..57c8c228867fd --- /dev/null +++ b/llvm/unittests/CAS/OnDiskCommonUtils.h @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Helper functions to test OnDiskCASDatabases. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/BuiltinObjectHasher.h" +#include "llvm/CAS/OnDiskGraphDB.h" +#include "llvm/Support/BLAKE3.h" +#include "llvm/Testing/Support/Error.h" + +namespace llvm::unittest::cas { + +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +using HasherT = BLAKE3; +using HashType = decltype(HasherT::hash(std::declval &>())); +using ValueType = std::array; + +inline HashType digest(StringRef Data, ArrayRef> RefHashes) { + return BuiltinObjectHasher::hashObject( + RefHashes, arrayRefFromStringRef(Data)); +} + +inline ObjectID digest(OnDiskGraphDB &DB, StringRef Data, + ArrayRef Refs) { + SmallVector, 8> RefHashes; + for (ObjectID Ref : Refs) + RefHashes.push_back(DB.getDigest(Ref)); + HashType Digest = digest(Data, RefHashes); + std::optional ID; + EXPECT_THAT_ERROR(DB.getReference(Digest).moveInto(ID), Succeeded()); + return *ID; +} + +inline HashType digest(StringRef Data) { + return HasherT::hash(arrayRefFromStringRef(Data)); +} + +inline ValueType valueFromString(StringRef S) { + ValueType Val; + llvm::copy(S.substr(0, sizeof(Val)), Val.data()); + return Val; +} + +inline Expected store(OnDiskGraphDB &DB, StringRef Data, + ArrayRef Refs) { + ObjectID ID = digest(DB, Data, Refs); + if (Error E = DB.store(ID, Refs, arrayRefFromStringRef(Data))) + return std::move(E); + return ID; +} + +inline Error printTree(OnDiskGraphDB &DB, ObjectID ID, raw_ostream &OS, + unsigned Indent = 0) { + std::optional Obj; + if (Error E = DB.load(ID).moveInto(Obj)) + return E; + if (!Obj) + return Error::success(); + OS.indent(Indent) << toStringRef(DB.getObjectData(*Obj)) << '\n'; + for (ObjectID Ref : DB.getObjectRefs(*Obj)) { + if (Error E = printTree(DB, Ref, OS, Indent + 2)) + return E; + } + return Error::success(); +} + +} // namespace llvm::unittest::cas diff --git a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp new file mode 100644 index 0000000000000..58f5dcc625a09 --- /dev/null +++ b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp @@ -0,0 +1,310 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CASTestConfig.h" +#include "OnDiskCommonUtils.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; +using namespace llvm::unittest::cas; + +TEST_F(OnDiskCASTest, OnDiskGraphDBTest) { + unittest::TempDir Temp("ondiskcas", /*Unique=*/true); + std::unique_ptr DB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB), + Succeeded()); + + auto digest = [&DB](StringRef Data, ArrayRef Refs) -> ObjectID { + return ::digest(*DB, Data, Refs); + }; + + auto store = [&](StringRef Data, + ArrayRef Refs) -> Expected { + return ::store(*DB, Data, Refs); + }; + + std::optional ID1; + ASSERT_THAT_ERROR(store("hello", {}).moveInto(ID1), Succeeded()); + + std::optional Obj1; + ASSERT_THAT_ERROR(DB->load(*ID1).moveInto(Obj1), Succeeded()); + ASSERT_TRUE(Obj1.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj1)), "hello"); + + ArrayRef Digest1 = DB->getDigest(*ID1); + std::optional ID2; + ASSERT_THAT_ERROR(DB->getReference(Digest1).moveInto(ID2), Succeeded()); + EXPECT_EQ(ID1, ID2); + + ObjectID ID3 = digest("world", {}); + EXPECT_FALSE(DB->containsObject(ID3)); + std::optional Obj2; + ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj2), Succeeded()); + EXPECT_FALSE(Obj2.has_value()); + + ASSERT_THAT_ERROR(DB->store(ID3, {}, arrayRefFromStringRef("world")), + Succeeded()); + EXPECT_TRUE(DB->containsObject(ID3)); + ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj2), Succeeded()); + ASSERT_TRUE(Obj2.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj2)), "world"); + + size_t LargeDataSize = 256LL * 1024LL; // 256K. + // The precise size number is not important, we mainly check that the large + // object will be properly accounted for. + EXPECT_TRUE(DB->getStorageSize() > 10 && + DB->getStorageSize() < LargeDataSize); + + SmallString<16> Buffer; + Buffer.resize(LargeDataSize); + ASSERT_THAT_ERROR(store(Buffer, {}).moveInto(ID1), Succeeded()); + size_t StorageSize = DB->getStorageSize(); + EXPECT_TRUE(StorageSize > LargeDataSize); + + // Close & re-open the DB and check that it reports the same storage size. + DB.reset(); + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB), + Succeeded()); + EXPECT_EQ(DB->getStorageSize(), StorageSize); +} + +TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInSingleNode) { + unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true); + std::unique_ptr UpstreamDB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType)) + .moveInto(UpstreamDB), + Succeeded()); + { + std::optional ID1; + ASSERT_THAT_ERROR(store(*UpstreamDB, "hello", {}).moveInto(ID1), + Succeeded()); + std::optional ID2; + ASSERT_THAT_ERROR(store(*UpstreamDB, "another", {}).moveInto(ID2), + Succeeded()); + std::optional ID3; + ASSERT_THAT_ERROR(store(*UpstreamDB, "world", {*ID1, *ID2}).moveInto(ID3), + Succeeded()); + } + + unittest::TempDir Temp("ondiskcas", /*Unique=*/true); + std::unique_ptr DB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), + std::move(UpstreamDB), + OnDiskGraphDB::FaultInPolicy::SingleNode) + .moveInto(DB), + Succeeded()); + + ObjectID ID1 = digest(*DB, "hello", {}); + ObjectID ID2 = digest(*DB, "another", {}); + ObjectID ID3 = digest(*DB, "world", {ID1, ID2}); + ObjectID ID4 = digest(*DB, "world", {}); + + EXPECT_TRUE(DB->containsObject(ID1)); + EXPECT_TRUE(DB->containsObject(ID2)); + EXPECT_TRUE(DB->containsObject(ID3)); + EXPECT_FALSE(DB->containsObject(ID4)); + + EXPECT_TRUE(DB->getExistingReference(digest("hello", {})).has_value()); + EXPECT_TRUE(DB->getExistingReference(DB->getDigest(ID3)).has_value()); + EXPECT_FALSE(DB->getExistingReference(digest("world", {})).has_value()); + + { + std::optional Obj; + ASSERT_THAT_ERROR(DB->load(ID1).moveInto(Obj), Succeeded()); + ASSERT_TRUE(Obj.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "hello"); + auto Refs = DB->getObjectRefs(*Obj); + EXPECT_TRUE(Refs.empty()); + } + { + std::optional Obj; + ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj), Succeeded()); + ASSERT_TRUE(Obj.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "world"); + auto Refs = DB->getObjectRefs(*Obj); + ASSERT_EQ(std::distance(Refs.begin(), Refs.end()), 2); + EXPECT_EQ(Refs.begin()[0], ID1); + EXPECT_EQ(Refs.begin()[1], ID2); + } + { + std::optional Obj; + ASSERT_THAT_ERROR(DB->load(ID4).moveInto(Obj), Succeeded()); + EXPECT_FALSE(Obj.has_value()); + } + + // Re-open the primary without chaining, to verify the data were copied from + // the upstream. + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), + /*UpstreamDB=*/nullptr, + OnDiskGraphDB::FaultInPolicy::SingleNode) + .moveInto(DB), + Succeeded()); + ID1 = digest(*DB, "hello", {}); + ID2 = digest(*DB, "another", {}); + ID3 = digest(*DB, "world", {ID1, ID2}); + EXPECT_TRUE(DB->containsObject(ID1)); + EXPECT_FALSE(DB->containsObject(ID2)); + EXPECT_TRUE(DB->containsObject(ID3)); + { + std::optional Obj; + ASSERT_THAT_ERROR(DB->load(ID1).moveInto(Obj), Succeeded()); + ASSERT_TRUE(Obj.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "hello"); + auto Refs = DB->getObjectRefs(*Obj); + EXPECT_TRUE(Refs.empty()); + } +} + +TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInFullTree) { + unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true); + std::unique_ptr UpstreamDB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType)) + .moveInto(UpstreamDB), + Succeeded()); + HashType RootHash; + { + std::optional ID11; + ASSERT_THAT_ERROR(store(*UpstreamDB, "11", {}).moveInto(ID11), Succeeded()); + std::optional ID121; + ASSERT_THAT_ERROR(store(*UpstreamDB, "121", {}).moveInto(ID121), + Succeeded()); + std::optional ID12; + ASSERT_THAT_ERROR(store(*UpstreamDB, "12", {*ID121}).moveInto(ID12), + Succeeded()); + std::optional ID1; + ASSERT_THAT_ERROR(store(*UpstreamDB, "1", {*ID11, *ID12}).moveInto(ID1), + Succeeded()); + std::optional ID21; + ASSERT_THAT_ERROR(store(*UpstreamDB, "21", {}).moveInto(ID21), Succeeded()); + std::optional ID22; + ASSERT_THAT_ERROR(store(*UpstreamDB, "22", {}).moveInto(ID22), Succeeded()); + std::optional ID2; + ASSERT_THAT_ERROR( + store(*UpstreamDB, "2", {*ID12, *ID21, *ID22}).moveInto(ID2), + Succeeded()); + std::optional IDRoot; + ASSERT_THAT_ERROR(store(*UpstreamDB, "root", {*ID1, *ID2}).moveInto(IDRoot), + Succeeded()); + ArrayRef Digest = UpstreamDB->getDigest(*IDRoot); + ASSERT_EQ(Digest.size(), RootHash.size()); + llvm::copy(Digest, RootHash.data()); + } + + unittest::TempDir Temp("ondiskcas", /*Unique=*/true); + std::unique_ptr DB; + ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), + std::move(UpstreamDB), + OnDiskGraphDB::FaultInPolicy::FullTree) + .moveInto(DB), + Succeeded()); + + { + std::optional IDRoot; + ASSERT_THAT_ERROR(DB->getReference(RootHash).moveInto(IDRoot), Succeeded()); + std::optional Obj; + ASSERT_THAT_ERROR(DB->load(*IDRoot).moveInto(Obj), Succeeded()); + ASSERT_TRUE(Obj.has_value()); + EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "root"); + auto Refs = DB->getObjectRefs(*Obj); + ASSERT_EQ(std::distance(Refs.begin(), Refs.end()), 2); + } + + // Re-open the primary without chaining, to verify the data were copied from + // the upstream. + ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), + /*UpstreamDB=*/nullptr, + OnDiskGraphDB::FaultInPolicy::FullTree) + .moveInto(DB), + Succeeded()); + + std::optional IDRoot; + ASSERT_THAT_ERROR(DB->getReference(RootHash).moveInto(IDRoot), Succeeded()); + std::string PrintedTree; + raw_string_ostream OS(PrintedTree); + ASSERT_THAT_ERROR(printTree(*DB, *IDRoot, OS), Succeeded()); + StringRef Expected = R"(root + 1 + 11 + 12 + 121 + 2 + 12 + 121 + 21 + 22 +)"; + EXPECT_EQ(PrintedTree, Expected); +} + +TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInPolicyConflict) { + auto tryFaultInPolicyConflict = [](OnDiskGraphDB::FaultInPolicy Policy1, + OnDiskGraphDB::FaultInPolicy Policy2) { + unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true); + std::unique_ptr UpstreamDB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType)) + .moveInto(UpstreamDB), + Succeeded()); + + unittest::TempDir Temp("ondiskcas", /*Unique=*/true); + std::unique_ptr DB; + ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", + sizeof(HashType), + std::move(UpstreamDB), Policy1) + .moveInto(DB), + Succeeded()); + DB.reset(); + ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", + sizeof(HashType), + std::move(UpstreamDB), Policy2) + .moveInto(DB), + Failed()); + }; + // Open as 'single', then as 'full'. + tryFaultInPolicyConflict(OnDiskGraphDB::FaultInPolicy::SingleNode, + OnDiskGraphDB::FaultInPolicy::FullTree); + // Open as 'full', then as 'single'. + tryFaultInPolicyConflict(OnDiskGraphDB::FaultInPolicy::FullTree, + OnDiskGraphDB::FaultInPolicy::SingleNode); +} + +#if defined(EXPENSIVE_CHECKS) +TEST_F(OnDiskCASTest, OnDiskGraphDBSpaceLimit) { + setMaxOnDiskCASMappingSize(); + unittest::TempDir Temp("ondiskcas", /*Unique=*/true); + std::unique_ptr DB; + ASSERT_THAT_ERROR( + OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB), + Succeeded()); + + std::optional ID; + std::string Data(500, '0'); + auto storeSmallObject = [&]() { + SmallVector Refs; + if (ID) + Refs.push_back(*ID); + ASSERT_THAT_ERROR(store(*DB, Data, Refs).moveInto(ID), Succeeded()); + }; + + // Insert enough small elements to overflow the data pool. + for (unsigned I = 0; I < 1024 * 256; ++I) + storeSmallObject(); + + EXPECT_GE(DB->getHardStorageLimitUtilization(), 99U); +} +#endif diff --git a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp new file mode 100644 index 0000000000000..89c03b890a488 --- /dev/null +++ b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskKeyValueDB.h" +#include "CASTestConfig.h" +#include "OnDiskCommonUtils.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; +using namespace llvm::unittest::cas; + +TEST_F(OnDiskCASTest, OnDiskKeyValueDBTest) { + unittest::TempDir Temp("ondiskkv", /*Unique=*/true); + std::unique_ptr DB; + ASSERT_THAT_ERROR(OnDiskKeyValueDB::open(Temp.path(), "blake3", + sizeof(HashType), "test", + sizeof(ValueType)) + .moveInto(DB), + Succeeded()); + { + std::optional> Val; + ASSERT_THAT_ERROR(DB->get(digest("hello")).moveInto(Val), Succeeded()); + EXPECT_FALSE(Val.has_value()); + } + + ValueType ValW = valueFromString("world"); + ArrayRef Val; + ASSERT_THAT_ERROR(DB->put(digest("hello"), ValW).moveInto(Val), Succeeded()); + EXPECT_EQ(Val, ArrayRef(ValW)); + ASSERT_THAT_ERROR( + DB->put(digest("hello"), valueFromString("other")).moveInto(Val), + Succeeded()); + EXPECT_EQ(Val, ArrayRef(ValW)); + + { + std::optional> Val; + ASSERT_THAT_ERROR(DB->get(digest("hello")).moveInto(Val), Succeeded()); + EXPECT_TRUE(Val.has_value()); + EXPECT_EQ(*Val, ArrayRef(ValW)); + } + + // Validate + { + auto ValidateFunc = [](FileOffset Offset, ArrayRef Data) -> Error { + EXPECT_EQ(Data.size(), sizeof(ValueType)); + return Error::success(); + }; + ASSERT_THAT_ERROR(DB->validate(ValidateFunc), Succeeded()); + } + + // Size + { + size_t InitSize = DB->getStorageSize(); + unsigned InitPrecent = DB->getHardStorageLimitUtilization(); + + // Insert a lot of entries. + for (unsigned I = 0; I < 1024 * 100; ++I) { + std::string Index = Twine(I).str(); + ArrayRef Val; + ASSERT_THAT_ERROR( + DB->put(digest(Index), valueFromString(Index)).moveInto(Val), + Succeeded()); + } + + EXPECT_GT(DB->getStorageSize(), InitSize); + EXPECT_GT(DB->getHardStorageLimitUtilization(), InitPrecent); + } +} From 5cd9f0f655ac2ab9da4fbd049fbcba6eb0d793b9 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Mon, 20 Oct 2025 13:20:33 -0700 Subject: [PATCH 18/38] [flang] Move parse tree tool to Parser/tools.h (#163998) Move the parse tree utility function semantics::getDesignatorNameIfDataRef to Parser/tools.h and rename it to comply with the local style. --- flang/include/flang/Parser/tools.h | 2 ++ flang/include/flang/Semantics/tools.h | 6 ------ flang/lib/Lower/Bridge.cpp | 3 ++- flang/lib/Lower/OpenACC.cpp | 8 ++++---- flang/lib/Lower/OpenMP/OpenMP.cpp | 3 ++- flang/lib/Parser/tools.cpp | 5 +++++ flang/lib/Semantics/check-acc-structure.cpp | 7 +++++-- flang/lib/Semantics/check-omp-loop.cpp | 4 ++-- flang/lib/Semantics/check-omp-structure.cpp | 2 +- flang/lib/Semantics/resolve-directives.cpp | 6 +++--- flang/lib/Semantics/resolve-names.cpp | 6 +++--- 11 files changed, 29 insertions(+), 23 deletions(-) diff --git a/flang/include/flang/Parser/tools.h b/flang/include/flang/Parser/tools.h index a90c85625d70d..d105f03dd31d3 100644 --- a/flang/include/flang/Parser/tools.h +++ b/flang/include/flang/Parser/tools.h @@ -259,5 +259,7 @@ template std::optional GetLastSource(A &x) { // Checks whether the assignment statement has a single variable on the RHS. bool CheckForSingleVariableOnRHS(const AssignmentStmt &); +const Name *GetDesignatorNameIfDataRef(const Designator &); + } // namespace Fortran::parser #endif // FORTRAN_PARSER_TOOLS_H_ diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index b977fb812fb11..8a7b9867c0979 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -739,12 +739,6 @@ const DerivedTypeSpec *GetDtvArgDerivedType(const Symbol &); void WarnOnDeferredLengthCharacterScalar(SemanticsContext &, const SomeExpr *, parser::CharBlock at, const char *what); -inline const parser::Name *getDesignatorNameIfDataRef( - const parser::Designator &designator) { - const auto *dataRef{std::get_if(&designator.u)}; - return dataRef ? std::get_if(&dataRef->u) : nullptr; -} - bool CouldBeDataPointerValuedFunction(const Symbol *); template diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 0595ca063f407..3b711ccbe786a 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -58,6 +58,7 @@ #include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Transforms/Passes.h" #include "flang/Parser/parse-tree.h" +#include "flang/Parser/tools.h" #include "flang/Runtime/iostat-consts.h" #include "flang/Semantics/openmp-dsa.h" #include "flang/Semantics/runtime-type-info.h" @@ -3352,7 +3353,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { &var.u)) { const Fortran::parser::Designator &designator = iDesignator->value(); if (const auto *name = - Fortran::semantics::getDesignatorNameIfDataRef(designator)) { + Fortran::parser::GetDesignatorNameIfDataRef(designator)) { auto val = getSymbolAddress(*name->symbol); reduceOperands.push_back(val); } diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 69718eed1d781..b3e8b697df1e9 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -30,6 +30,7 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" +#include "flang/Parser/tools.h" #include "flang/Semantics/expression.h" #include "flang/Semantics/scope.h" #include "flang/Semantics/tools.h" @@ -297,7 +298,7 @@ getSymbolFromAccObject(const Fortran::parser::AccObject &accObject) { if (const auto *designator = std::get_if(&accObject.u)) { if (const auto *name = - Fortran::semantics::getDesignatorNameIfDataRef(*designator)) + Fortran::parser::GetDesignatorNameIfDataRef(*designator)) return *name->symbol; if (const auto *arrayElement = Fortran::parser::Unwrap( @@ -2913,7 +2914,7 @@ static Op createComputeOp( if (const auto *designator = std::get_if(&accObject.u)) { if (const auto *name = - Fortran::semantics::getDesignatorNameIfDataRef( + Fortran::parser::GetDesignatorNameIfDataRef( *designator)) { auto cond = converter.getSymbolAddress(*name->symbol); selfCond = builder.createConvert(clauseLocation, @@ -4278,8 +4279,7 @@ static void genGlobalCtors(Fortran::lower::AbstractConverter &converter, Fortran::common::visitors{ [&](const Fortran::parser::Designator &designator) { if (const auto *name = - Fortran::semantics::getDesignatorNameIfDataRef( - designator)) { + Fortran::parser::GetDesignatorNameIfDataRef(designator)) { genCtors(operandLocation, *name->symbol); } }, diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index f86ee01355104..9495ea61058ca 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -33,6 +33,7 @@ #include "flang/Parser/characters.h" #include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree.h" +#include "flang/Parser/tools.h" #include "flang/Semantics/openmp-directive-sets.h" #include "flang/Semantics/openmp-utils.h" #include "flang/Semantics/tools.h" @@ -3884,7 +3885,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, assert(object && "Expecting object as argument"); auto *designator = semantics::omp::GetDesignatorFromObj(*object); assert(designator && "Expecting desginator in argument"); - auto *name = semantics::getDesignatorNameIfDataRef(*designator); + auto *name = parser::GetDesignatorNameIfDataRef(*designator); assert(name && "Expecting dataref in designator"); critName = *name; } diff --git a/flang/lib/Parser/tools.cpp b/flang/lib/Parser/tools.cpp index 264ca520f38b8..ed6d194c17dc3 100644 --- a/flang/lib/Parser/tools.cpp +++ b/flang/lib/Parser/tools.cpp @@ -179,4 +179,9 @@ bool CheckForSingleVariableOnRHS(const AssignmentStmt &assignmentStmt) { return Unwrap(std::get(assignmentStmt.t)) != nullptr; } +const Name *GetDesignatorNameIfDataRef(const Designator &designator) { + const auto *dataRef{std::get_if(&designator.u)}; + return dataRef ? std::get_if(&dataRef->u) : nullptr; +} + } // namespace Fortran::parser diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp index 3cd6d6ba7689a..5e87b834edf7e 100644 --- a/flang/lib/Semantics/check-acc-structure.cpp +++ b/flang/lib/Semantics/check-acc-structure.cpp @@ -10,6 +10,7 @@ #include "flang/Common/enum-set.h" #include "flang/Evaluate/tools.h" #include "flang/Parser/parse-tree.h" +#include "flang/Parser/tools.h" #include "flang/Semantics/symbol.h" #include "flang/Semantics/tools.h" #include "flang/Semantics/type.h" @@ -709,7 +710,8 @@ void AccStructureChecker::CheckMultipleOccurrenceInDeclare( common::visit( common::visitors{ [&](const parser::Designator &designator) { - if (const auto *name = getDesignatorNameIfDataRef(designator)) { + if (const auto *name = + parser::GetDesignatorNameIfDataRef(designator)) { if (declareSymbols.contains(&name->symbol->GetUltimate())) { if (declareSymbols[&name->symbol->GetUltimate()] == clause) { context_.Warn(common::UsageWarning::OpenAccUsage, @@ -982,7 +984,8 @@ void AccStructureChecker::Enter(const parser::AccClause::Reduction &reduction) { common::visit( common::visitors{ [&](const parser::Designator &designator) { - if (const auto *name = getDesignatorNameIfDataRef(designator)) { + if (const auto *name = + parser::GetDesignatorNameIfDataRef(designator)) { if (name->symbol) { if (const auto *type{name->symbol->GetType()}) { if (type->IsNumeric(TypeCategory::Integer) && diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index 331397b035cf9..aaaa2d6e78280 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -485,8 +485,8 @@ void OmpStructureChecker::Leave(const parser::OpenMPLoopConstruct &x) { common::visit( common::visitors{ [&](const parser::Designator &designator) { - if (const auto *name{semantics::getDesignatorNameIfDataRef( - designator)}) { + if (const auto *name{ + parser::GetDesignatorNameIfDataRef(designator)}) { checkReductionSymbolInScan(name); } }, diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index e015e948bc701..e2f8796aeb5e7 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -2617,7 +2617,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPCriticalConstruct &x) { auto getNameFromArg{[](const parser::OmpArgument &arg) { if (auto *object{parser::Unwrap(arg.u)}) { if (auto *designator{omp::GetDesignatorFromObj(*object)}) { - return getDesignatorNameIfDataRef(*designator); + return parser::GetDesignatorNameIfDataRef(*designator); } } return static_cast(nullptr); diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index db061bdce18ea..3bb586c51c58f 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -893,7 +893,7 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { common::visitors{ [&](const parser::Designator &designator) { if (const auto *name{ - semantics::getDesignatorNameIfDataRef(designator)}) { + parser::GetDesignatorNameIfDataRef(designator)}) { if (name->symbol) { name->symbol->set( ompFlag.value_or(Symbol::Flag::OmpMapStorage)); @@ -1759,7 +1759,7 @@ void AccAttributeVisitor::ResolveAccObject( common::visitors{ [&](const parser::Designator &designator) { if (const auto *name{ - semantics::getDesignatorNameIfDataRef(designator)}) { + parser::GetDesignatorNameIfDataRef(designator)}) { if (auto *symbol{ResolveAcc(*name, accFlag, currScope())}) { AddToContextObjectWithDSA(*symbol, accFlag); if (dataSharingAttributeFlags.test(accFlag)) { @@ -3065,7 +3065,7 @@ void OmpAttributeVisitor::ResolveOmpDesignator( unsigned version{context_.langOptions().OpenMPVersion}; llvm::omp::Directive directive{GetContext().directive}; - const auto *name{semantics::getDesignatorNameIfDataRef(designator)}; + const auto *name{parser::GetDesignatorNameIfDataRef(designator)}; if (!name) { // Array sections to be changed to substrings as needed if (AnalyzeExpr(context_, designator)) { diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 699de417a629f..0af1c94502bb4 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1494,7 +1494,7 @@ bool AccVisitor::Pre(const parser::AccClause::UseDevice &x) { common::visitors{ [&](const parser::Designator &designator) { if (const auto *name{ - semantics::getDesignatorNameIfDataRef(designator)}) { + parser::GetDesignatorNameIfDataRef(designator)}) { Symbol *prev{currScope().FindSymbol(name->source)}; if (prev != name->symbol) { name->symbol = prev; @@ -1648,7 +1648,7 @@ class OmpVisitor : public virtual DeclarationVisitor { common::visitors{ [&](const parser::Designator &designator) { if (const auto *name{ - semantics::getDesignatorNameIfDataRef(designator)}) { + parser::GetDesignatorNameIfDataRef(designator)}) { specPartState_.declareTargetNames.insert(name->source); } }, @@ -2016,7 +2016,7 @@ void OmpVisitor::ResolveCriticalName(const parser::OmpArgument &arg) { if (auto *object{parser::Unwrap(arg.u)}) { if (auto *desg{omp::GetDesignatorFromObj(*object)}) { - if (auto *name{getDesignatorNameIfDataRef(*desg)}) { + if (auto *name{parser::GetDesignatorNameIfDataRef(*desg)}) { if (auto *symbol{FindInScope(globalScope, *name)}) { if (!symbol->test(Symbol::Flag::OmpCriticalLock)) { SayWithDecl(*name, *symbol, From c3d905ec4b8c02978feeb6d21a5dadb95d2df098 Mon Sep 17 00:00:00 2001 From: Levi Zim Date: Tue, 21 Oct 2025 04:26:54 +0800 Subject: [PATCH 19/38] [bazel] Add riscv64 linux platform (#163781) This fixes a build error when building tensorflow on riscv64 linux. --- utils/bazel/llvm-project-overlay/llvm/config.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/config.bzl b/utils/bazel/llvm-project-overlay/llvm/config.bzl index 3e9c032d5b8ce..fbd6f6bfdd6ae 100644 --- a/utils/bazel/llvm-project-overlay/llvm/config.bzl +++ b/utils/bazel/llvm-project-overlay/llvm/config.bzl @@ -105,6 +105,7 @@ llvm_config_defines = os_defines + builtin_thread_pointer + select({ "@bazel_tools//src/conditions:darwin_x86_64": native_arch_defines("X86", "x86_64-unknown-darwin"), "@bazel_tools//src/conditions:linux_aarch64": native_arch_defines("AArch64", "aarch64-unknown-linux-gnu"), "@bazel_tools//src/conditions:linux_ppc64le": native_arch_defines("PowerPC", "powerpc64le-unknown-linux-gnu"), + "@bazel_tools//src/conditions:linux_riscv64": native_arch_defines("RISCV", "riscv64-unknown-linux-gnu"), "@bazel_tools//src/conditions:linux_s390x": native_arch_defines("SystemZ", "systemz-unknown-linux_gnu"), "//conditions:default": native_arch_defines("X86", "x86_64-unknown-linux-gnu"), }) + [ From 9e9d67dc9c4788d39da3c5612565d6e603015715 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Mon, 20 Oct 2025 13:46:56 -0700 Subject: [PATCH 20/38] [debugserver][NFC] Fix unused variable warning This variable is only read from. --- lldb/tools/debugserver/source/RNBRemote.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/tools/debugserver/source/RNBRemote.cpp b/lldb/tools/debugserver/source/RNBRemote.cpp index b2d79377f1eec..11b93710444f2 100644 --- a/lldb/tools/debugserver/source/RNBRemote.cpp +++ b/lldb/tools/debugserver/source/RNBRemote.cpp @@ -3205,7 +3205,6 @@ rnb_err_t RNBRemote::HandlePacket_MultiMemRead(const char *p) { packet.remove_prefix(ranges_prefix.size()); std::vector> ranges; - std::size_t total_length = 0; // Ranges should have the form: ,[,,]*; auto end_of_ranges_pos = packet.find(';'); @@ -3236,7 +3235,6 @@ rnb_err_t RNBRemote::HandlePacket_MultiMemRead(const char *p) { "MultiMemRead length is too large"); ranges.emplace_back(*maybe_addr, *maybe_length); - total_length += *maybe_length; } if (ranges.empty()) From 670fb3e7683c1fc83bcfff04bb15488617ff2a34 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 20 Oct 2025 13:50:39 -0700 Subject: [PATCH 21/38] [NFC][LLVM][CodeGen] Create header file for MIRFSDiscriminator options (#163438) Extract extern variable declaration into a header per https://discourse.llvm.org/t/rfc-cs-changes-for-standalone-variables/88581 --- .../llvm/CodeGen/MIRFSDiscriminatorOptions.h | 22 +++++++++++++++++++ llvm/lib/CodeGen/MIRFSDiscriminator.cpp | 6 ++--- llvm/lib/CodeGen/MIRSampleProfile.cpp | 4 +--- 3 files changed, 25 insertions(+), 7 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/MIRFSDiscriminatorOptions.h diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminatorOptions.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminatorOptions.h new file mode 100644 index 0000000000000..672a6b359333a --- /dev/null +++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminatorOptions.h @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Command line options for MIR Flow Sensitive discriminators. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MIRFSDISCRIMINATOR_OPTIONS_H +#define LLVM_CODEGEN_MIRFSDISCRIMINATOR_OPTIONS_H + +#include "llvm/Support/CommandLine.h" + +namespace llvm { +extern cl::opt ImprovedFSDiscriminator; +} // namespace llvm + +#endif // LLVM_CODEGEN_MIRFSDISCRIMINATOR_OPTIONS_H diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp index d988a2ad4e793..e37f78454cd9b 100644 --- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/CodeGen/MIRFSDiscriminatorOptions.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -35,13 +36,10 @@ using namespace sampleprofutil; // TODO(xur): Remove this option and related code once we make true as the // default. -namespace llvm { -cl::opt ImprovedFSDiscriminator( +cl::opt llvm::ImprovedFSDiscriminator( "improved-fs-discriminator", cl::Hidden, cl::init(false), cl::desc("New FS discriminators encoding (incompatible with the original " "encoding)")); -} // namespace llvm - char MIRAddFSDiscriminators::ID = 0; INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE, diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp index 9bba50e8e6924..d44f577558619 100644 --- a/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/CodeGen/MIRFSDiscriminatorOptions.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" @@ -62,9 +63,6 @@ static cl::opt ViewBFIAfter("fs-viewbfi-after", cl::Hidden, cl::init(false), cl::desc("View BFI after MIR loader")); -namespace llvm { -extern cl::opt ImprovedFSDiscriminator; -} char MIRProfileLoaderPass::ID = 0; INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE, From cd67ca2f11f79d2ce08807682b571ca04dab0996 Mon Sep 17 00:00:00 2001 From: Ebuka Ezike Date: Mon, 20 Oct 2025 21:54:13 +0100 Subject: [PATCH 22/38] [lldb-dap] Send an Invalidated event on thread stack change. (#163976) When the user send `thread return ` command this changes the stack length but the UI does not update. Send stack invalidated event to the client to update the stack. --- .../tools/lldb-dap/invalidated-event/Makefile | 3 + .../TestDAP_invalidatedEvent.py | 55 +++++++++++++++++++ .../tools/lldb-dap/invalidated-event/main.cpp | 9 +++ .../tools/lldb-dap/invalidated-event/other.h | 10 ++++ lldb/tools/lldb-dap/DAP.cpp | 19 +++++++ lldb/tools/lldb-dap/DAP.h | 1 + lldb/tools/lldb-dap/EventHelper.cpp | 7 ++- lldb/tools/lldb-dap/EventHelper.h | 5 +- .../lldb-dap/Protocol/ProtocolEvents.cpp | 2 +- lldb/unittests/DAP/ProtocolTypesTest.cpp | 19 ++++--- 10 files changed, 119 insertions(+), 11 deletions(-) create mode 100644 lldb/test/API/tools/lldb-dap/invalidated-event/Makefile create mode 100644 lldb/test/API/tools/lldb-dap/invalidated-event/TestDAP_invalidatedEvent.py create mode 100644 lldb/test/API/tools/lldb-dap/invalidated-event/main.cpp create mode 100644 lldb/test/API/tools/lldb-dap/invalidated-event/other.h diff --git a/lldb/test/API/tools/lldb-dap/invalidated-event/Makefile b/lldb/test/API/tools/lldb-dap/invalidated-event/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/invalidated-event/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/tools/lldb-dap/invalidated-event/TestDAP_invalidatedEvent.py b/lldb/test/API/tools/lldb-dap/invalidated-event/TestDAP_invalidatedEvent.py new file mode 100644 index 0000000000000..8ba56b0bb27ca --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/invalidated-event/TestDAP_invalidatedEvent.py @@ -0,0 +1,55 @@ +""" +Test lldb-dap recieves invalidated-events when the area such as +stack, variables, threads has changes but the client does not +know about it. +""" + +import lldbdap_testcase +from lldbsuite.test.lldbtest import line_number +from dap_server import Event + + +class TestDAP_invalidatedEvent(lldbdap_testcase.DAPTestCaseBase): + def verify_top_frame_name(self, frame_name: str): + all_frames = self.get_stackFrames() + self.assertGreaterEqual(len(all_frames), 1, "Expected at least one frame.") + top_frame_name = all_frames[0]["name"] + self.assertRegex(top_frame_name, f"{frame_name}.*") + + def test_invalidated_stack_area_event(self): + """ + Test an invalidated event for the stack area. + The event is sent when the command `thread return ` is sent by the user. + """ + other_source = "other.h" + return_bp_line = line_number(other_source, "// thread return breakpoint") + + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + self.set_source_breakpoints(other_source, [return_bp_line]) + self.continue_to_next_stop() + + self.verify_top_frame_name("add") + thread_id = self.dap_server.get_thread_id() + self.assertIsNotNone(thread_id, "Exepected a thread id.") + + # run thread return + thread_command = "thread return 20" + eval_resp = self.dap_server.request_evaluate(thread_command, context="repl") + self.assertTrue(eval_resp["success"], f"Failed to evaluate `{thread_command}`.") + + # wait for the invalidated stack event. + stack_event = self.dap_server.wait_for_event(["invalidated"]) + self.assertIsNotNone(stack_event, "Expected an invalidated event.") + event_body: Event = stack_event["body"] + self.assertIn("stacks", event_body["areas"]) + self.assertIn("threadId", event_body.keys()) + self.assertEqual( + thread_id, + event_body["threadId"], + f"Expected the event from thread {thread_id}.", + ) + + # confirm we are back at the main frame. + self.verify_top_frame_name("main") + self.continue_to_exit() diff --git a/lldb/test/API/tools/lldb-dap/invalidated-event/main.cpp b/lldb/test/API/tools/lldb-dap/invalidated-event/main.cpp new file mode 100644 index 0000000000000..c82f9532b7517 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/invalidated-event/main.cpp @@ -0,0 +1,9 @@ +#include "other.h" + +int main() { + int first = 5; + int second = 10; + const int result = add(first, second); + + return 0; +} diff --git a/lldb/test/API/tools/lldb-dap/invalidated-event/other.h b/lldb/test/API/tools/lldb-dap/invalidated-event/other.h new file mode 100644 index 0000000000000..856db446d7b5a --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/invalidated-event/other.h @@ -0,0 +1,10 @@ +#ifndef OTHER_H +#define OTHER_H + +int add(int a, int b) { + int first = a; + int second = b; // thread return breakpoint + int result = first + second; + return result; +} +#endif // OTHER_H diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp index f76656e98ca01..61226cceb6db0 100644 --- a/lldb/tools/lldb-dap/DAP.cpp +++ b/lldb/tools/lldb-dap/DAP.cpp @@ -1368,6 +1368,12 @@ void DAP::EventThread() { broadcaster.AddListener(listener, eBroadcastBitStopEventThread); debugger.GetBroadcaster().AddListener( listener, lldb::eBroadcastBitError | lldb::eBroadcastBitWarning); + + // listen for thread events. + listener.StartListeningForEventClass( + debugger, lldb::SBThread::GetBroadcasterClassName(), + lldb::SBThread::eBroadcastBitStackChanged); + bool done = false; while (!done) { if (listener.WaitForEvent(1, event)) { @@ -1503,6 +1509,9 @@ void DAP::EventThread() { SendJSON(llvm::json::Value(std::move(bp_event))); } } + + } else if (lldb::SBThread::EventIsThreadEvent(event)) { + HandleThreadEvent(event); } else if (event_mask & lldb::eBroadcastBitError || event_mask & lldb::eBroadcastBitWarning) { lldb::SBStructuredData data = @@ -1522,6 +1531,16 @@ void DAP::EventThread() { } } +void DAP::HandleThreadEvent(const lldb::SBEvent &event) { + uint32_t event_type = event.GetType(); + + if (event_type & lldb::SBThread::eBroadcastBitStackChanged) { + const lldb::SBThread evt_thread = lldb::SBThread::GetThreadFromEvent(event); + SendInvalidatedEvent(*this, {InvalidatedEventBody::eAreaStacks}, + evt_thread.GetThreadID()); + } +} + std::vector DAP::SetSourceBreakpoints( const protocol::Source &source, const std::optional> &breakpoints) { diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h index a90ddf59671ee..bf2c3f146a396 100644 --- a/lldb/tools/lldb-dap/DAP.h +++ b/lldb/tools/lldb-dap/DAP.h @@ -460,6 +460,7 @@ struct DAP final : public DAPTransport::MessageHandler { /// Event threads. /// @{ void EventThread(); + void HandleThreadEvent(const lldb::SBEvent &event); void ProgressEventThread(); std::thread event_thread; diff --git a/lldb/tools/lldb-dap/EventHelper.cpp b/lldb/tools/lldb-dap/EventHelper.cpp index 2b9ed229405a8..3042d3293b482 100644 --- a/lldb/tools/lldb-dap/EventHelper.cpp +++ b/lldb/tools/lldb-dap/EventHelper.cpp @@ -276,11 +276,16 @@ void SendProcessExitedEvent(DAP &dap, lldb::SBProcess &process) { } void SendInvalidatedEvent( - DAP &dap, llvm::ArrayRef areas) { + DAP &dap, llvm::ArrayRef areas, + lldb::tid_t tid) { if (!dap.clientFeatures.contains(protocol::eClientFeatureInvalidatedEvent)) return; protocol::InvalidatedEventBody body; body.areas = areas; + + if (tid != LLDB_INVALID_THREAD_ID) + body.threadId = tid; + dap.Send(protocol::Event{"invalidated", std::move(body)}); } diff --git a/lldb/tools/lldb-dap/EventHelper.h b/lldb/tools/lldb-dap/EventHelper.h index 48eb5af6bd0b9..be783d032a5ae 100644 --- a/lldb/tools/lldb-dap/EventHelper.h +++ b/lldb/tools/lldb-dap/EventHelper.h @@ -11,6 +11,8 @@ #include "DAPForward.h" #include "Protocol/ProtocolEvents.h" +#include "lldb/lldb-defines.h" +#include "lldb/lldb-types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Error.h" @@ -35,7 +37,8 @@ void SendContinuedEvent(DAP &dap); void SendProcessExitedEvent(DAP &dap, lldb::SBProcess &process); void SendInvalidatedEvent( - DAP &dap, llvm::ArrayRef areas); + DAP &dap, llvm::ArrayRef areas, + lldb::tid_t tid = LLDB_INVALID_THREAD_ID); void SendMemoryEvent(DAP &dap, lldb::SBValue variable); diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolEvents.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolEvents.cpp index b896eca817be6..df6be06637a13 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolEvents.cpp +++ b/lldb/tools/lldb-dap/Protocol/ProtocolEvents.cpp @@ -51,7 +51,7 @@ llvm::json::Value toJSON(const InvalidatedEventBody::Area &IEBA) { llvm::json::Value toJSON(const InvalidatedEventBody &IEB) { json::Object Result{{"areas", IEB.areas}}; if (IEB.threadId) - Result.insert({"threadID", IEB.threadId}); + Result.insert({"threadId", IEB.threadId}); if (IEB.stackFrameId) Result.insert({"stackFrameId", IEB.stackFrameId}); return Result; diff --git a/lldb/unittests/DAP/ProtocolTypesTest.cpp b/lldb/unittests/DAP/ProtocolTypesTest.cpp index a5ae856a185b7..8170abdd25bc6 100644 --- a/lldb/unittests/DAP/ProtocolTypesTest.cpp +++ b/lldb/unittests/DAP/ProtocolTypesTest.cpp @@ -1079,14 +1079,17 @@ TEST(ProtocolTypesTest, InvalidatedEventBody) { body.areas = {InvalidatedEventBody::eAreaStacks, InvalidatedEventBody::eAreaThreads}; body.stackFrameId = 1; - StringRef json = R"({ - "areas": [ - "stacks", - "threads" - ], - "stackFrameId": 1 -})"; - EXPECT_EQ(json, pp(body)); + body.threadId = 20; + Expected expected = json::parse(R"({ + "areas": [ + "stacks", + "threads" + ], + "stackFrameId": 1, + "threadId": 20 + })"); + ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); + EXPECT_EQ(pp(*expected), pp(body)); } TEST(ProtocolTypesTest, MemoryEventBody) { From e8b255df1bb41411c3908b205779ba28264d5c2e Mon Sep 17 00:00:00 2001 From: Fateme Hosseini Date: Mon, 20 Oct 2025 15:59:03 -0500 Subject: [PATCH 23/38] Hexagon QFP Optimizer (#163843) Co-authored-by: Rahul Utkoor Co-authored-by: Brendon Cahoon Co-authored-by: abhikran Co-authored-by: Sumanth Gundapaneni Co-authored-by: Ikhlas Ajbar Co-authored-by: Anirudh Sundar Co-authored-by: Yashas Andaluri Co-authored-by: quic-santdas --- llvm/lib/Target/Hexagon/CMakeLists.txt | 1 + llvm/lib/Target/Hexagon/Hexagon.h | 3 + .../Target/Hexagon/HexagonQFPOptimizer.cpp | 334 ++++++++++++++++++ .../Target/Hexagon/HexagonTargetMachine.cpp | 2 + llvm/test/CodeGen/Hexagon/qfp-conv.ll | 35 ++ llvm/test/CodeGen/Hexagon/qfp-enabled.ll | 19 + llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir | 95 +++++ llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir | 33 ++ .../CodeGen/Hexagon/qfpopt-rem-conv-add.ll | 21 ++ llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir | 79 +++++ .../CodeGen/Hexagon/vect/qfp-zeroinit.mir | 23 ++ .../CodeGen/Hexagon/vect/unique-vreg-def.ll | 32 ++ 12 files changed, 677 insertions(+) create mode 100644 llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp create mode 100644 llvm/test/CodeGen/Hexagon/qfp-conv.ll create mode 100644 llvm/test/CodeGen/Hexagon/qfp-enabled.ll create mode 100644 llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir create mode 100644 llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir create mode 100644 llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll create mode 100644 llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir create mode 100644 llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir create mode 100644 llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index d758260a8ab5d..1a5f09642ea66 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen HexagonOptAddrMode.cpp HexagonOptimizeSZextends.cpp HexagonPeephole.cpp + HexagonQFPOptimizer.cpp HexagonRDFOpt.cpp HexagonRegisterInfo.cpp HexagonSelectionDAGInfo.cpp diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h index 109aba53b6e3e..422ab20891b94 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.h +++ b/llvm/lib/Target/Hexagon/Hexagon.h @@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &); void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &); void initializeHexagonVectorPrintPass(PassRegistry &); +void initializeHexagonQFPOptimizerPass(PassRegistry &); + Pass *createHexagonLoopIdiomPass(); Pass *createHexagonVectorLoopCarriedReuseLegacyPass(); @@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass(); FunctionPass *createHexagonVectorPrint(); FunctionPass *createHexagonVExtract(); FunctionPass *createHexagonExpandCondsets(); +FunctionPass *createHexagonQFPOptimizer(); } // end namespace llvm; diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp new file mode 100644 index 0000000000000..479ac90b7d526 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp @@ -0,0 +1,334 @@ +//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions +// optimizer ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Basic infrastructure for optimizing intermediate conversion instructions +// generated while performing vector floating point operations. +// Currently run at the starting of the code generation for Hexagon, cleans +// up redundant conversion instructions and replaces the uses of conversion +// with appropriate machine operand. Liveness is preserved after this pass. +// +// @note: The redundant conversion instructions are not eliminated in this pass. +// In this pass, we are only trying to replace the uses of conversion +// instructions with its appropriate QFP instruction. We are leaving the job to +// Dead instruction Elimination pass to remove redundant conversion +// instructions. +// +// Brief overview of working of this QFP optimizer. +// This version of Hexagon QFP optimizer basically iterates over each +// instruction, checks whether if it belongs to hexagon floating point HVX +// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique +// definition for the machine operands corresponding to the instruction. +// +// Example: +// MachineInstruction *MI be the HVX vadd instruction +// MI -> $v0 = V6_vadd_sf $v1, $v2 +// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg()); +// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg()); +// +// In the above example, DefMI1 and DefMI2 gives the unique definitions +// corresponding to the operands($v1 and &v2 respectively) of instruction MI. +// +// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32, +// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and +// iterates over next instruction. +// +// If one the definitions is conversion instruction then our pass will replace +// the arithmetic instruction with its corresponding mix variant. +// In the above example, if $v1 is conversion instruction +// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3 +// After Transformation: +// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3) +// +// If both the definitions are conversion instructions then the instruction will +// be replaced with its qf variant +// In the above example, if $v1 and $v2 are conversion instructions +// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3 +// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4 +// After Transformation: +// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced +// with $v4) +// +// Currently, in this pass, we are not handling the case when the definitions +// are PHI inst. +// +//===----------------------------------------------------------------------===// +#include +#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass" + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +#define DEBUG_TYPE "hexagon-qfp-optimizer" + +using namespace llvm; + +cl::opt + DisableQFOptimizer("disable-qfp-opt", cl::init(false), + cl::desc("Disable optimization of Qfloat operations.")); + +namespace { +const std::map QFPInstMap{ + {Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix}, + {Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16}, + {Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix}, + {Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32}, + {Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix}, + {Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16}, + {Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix}, + {Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32}, + {Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf}, + {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16}, + {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf}, + {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16}, + {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}}; +} // namespace + +namespace llvm { + +FunctionPass *createHexagonQFPOptimizer(); +void initializeHexagonQFPOptimizerPass(PassRegistry &); + +} // namespace llvm + +namespace { + +struct HexagonQFPOptimizer : public MachineFunctionPass { +public: + static char ID; + + HexagonQFPOptimizer() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB); + + StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + const HexagonSubtarget *HST = nullptr; + const HexagonInstrInfo *HII = nullptr; + const MachineRegisterInfo *MRI = nullptr; +}; + +char HexagonQFPOptimizer::ID = 0; +} // namespace + +INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer", + HEXAGON_QFP_OPTIMIZER, false, false) + +FunctionPass *llvm::createHexagonQFPOptimizer() { + return new HexagonQFPOptimizer(); +} + +bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, + MachineBasicBlock *MBB) { + + // Early exit: + // - if instruction is invalid or has too few operands (QFP ops need 2 sources + // + 1 dest), + // - or does not have a transformation mapping. + if (MI->getNumOperands() < 3) + return false; + auto It = QFPInstMap.find(MI->getOpcode()); + if (It == QFPInstMap.end()) + return false; + unsigned short InstTy = It->second; + + unsigned Op0F = 0; + unsigned Op1F = 0; + // Get the reaching defs of MI, DefMI1 and DefMI2 + MachineInstr *DefMI1 = nullptr; + MachineInstr *DefMI2 = nullptr; + + if (MI->getOperand(1).isReg()) + DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (MI->getOperand(2).isReg()) + DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg()); + if (!DefMI1 || !DefMI2) + return false; + + MachineOperand &Res = MI->getOperand(0); + MachineInstr *Inst1 = nullptr; + MachineInstr *Inst2 = nullptr; + LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump(); + DefMI2->dump()); + + // Get the reaching defs of DefMI + if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() && + DefMI1->getOperand(1).getReg().isVirtual()) + Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg()); + + if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() && + DefMI2->getOperand(1).getReg().isVirtual()) + Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg()); + + unsigned Def1OP = DefMI1->getOpcode(); + unsigned Def2OP = DefMI2->getOpcode(); + + MachineInstrBuilder MIB; + // Case 1: Both reaching defs of MI are qf to sf/hf conversions + if ((Def1OP == Hexagon::V6_vconv_sf_qf32 && + Def2OP == Hexagon::V6_vconv_sf_qf32) || + (Def1OP == Hexagon::V6_vconv_hf_qf16 && + Def2OP == Hexagon::V6_vconv_hf_qf16)) { + + // If the reaching defs of DefMI are W register type, we return + if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() && + MRI->getRegClass(Inst1->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass) || + (Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() && + MRI->getRegClass(Inst2->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass)) + return false; + + // Analyze the use operands of the conversion to get their KILL status + MachineOperand &Src1 = DefMI1->getOperand(1); + MachineOperand &Src2 = DefMI2->getOperand(1); + + Op0F = getKillRegState(Src1.isKill()); + Src1.setIsKill(false); + + Op1F = getKillRegState(Src2.isKill()); + Src2.setIsKill(false); + + if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) { + auto OuterIt = QFPInstMap.find(MI->getOpcode()); + if (OuterIt == QFPInstMap.end()) + return false; + auto InnerIt = QFPInstMap.find(OuterIt->second); + if (InnerIt == QFPInstMap.end()) + return false; + InstTy = InnerIt->second; + } + + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()) + .addReg(Src2.getReg(), Op1F, Src2.getSubReg()); + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); + return true; + + // Case 2: Left operand is conversion to sf/hf + } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 && + Def2OP != Hexagon::V6_vconv_sf_qf32) || + (Def1OP == Hexagon::V6_vconv_hf_qf16 && + Def2OP != Hexagon::V6_vconv_hf_qf16)) && + !DefMI2->isPHI() && + (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) { + + if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass) + return false; + + MachineOperand &Src1 = DefMI1->getOperand(1); + MachineOperand &Src2 = MI->getOperand(2); + + Op0F = getKillRegState(Src1.isKill()); + Src1.setIsKill(false); + Op1F = getKillRegState(Src2.isKill()); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()) + .addReg(Src2.getReg(), Op1F, Src2.getSubReg()); + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); + return true; + + // Case 2: Left operand is conversion to sf/hf + } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 && + Def2OP == Hexagon::V6_vconv_sf_qf32) || + (Def1OP != Hexagon::V6_vconv_hf_qf16 && + Def2OP == Hexagon::V6_vconv_hf_qf16)) && + !DefMI1->isPHI() && + (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) { + // The second operand of original instruction is converted. + // In "mix" instructions, "qf" operand is always the first operand. + + // Caveat: vsub is not commutative w.r.t operands. + if (InstTy == Hexagon::V6_vsub_qf16_mix || + InstTy == Hexagon::V6_vsub_qf32_mix) + return false; + + if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass) + return false; + + MachineOperand &Src1 = MI->getOperand(1); + MachineOperand &Src2 = DefMI2->getOperand(1); + + Op1F = getKillRegState(Src2.isKill()); + Src2.setIsKill(false); + Op0F = getKillRegState(Src1.isKill()); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src2.getReg(), Op1F, + Src2.getSubReg()) // Notice the operands are flipped. + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); + return true; + } + + return false; +} + +bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) { + + bool Changed = false; + + if (DisableQFOptimizer) + return Changed; + + HST = &MF.getSubtarget(); + if (!HST->useHVXV68Ops() || !HST->usePackets() || + skipFunction(MF.getFunction())) + return false; + HII = HST->getInstrInfo(); + MRI = &MF.getRegInfo(); + + MachineFunction::iterator MBBI = MF.begin(); + LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName() + << " Optimize intermediate conversions ===\n"); + while (MBBI != MF.end()) { + MachineBasicBlock *MBB = &*MBBI; + MachineBasicBlock::iterator MII = MBBI->instr_begin(); + while (MII != MBBI->instr_end()) { + MachineInstr *MI = &*MII; + ++MII; // As MI might be removed. + + if (QFPInstMap.count(MI->getOpcode()) && + MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 && + MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) { + LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); + if (optimizeQfp(MI, MBB)) { + MI->eraseFromParent(); + LLVM_DEBUG(dbgs() << "\t....Removing...."); + Changed = true; + } + } + } + ++MBBI; + } + return Changed; +} diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index f5d8b696733ba..d9824a3154093 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -220,6 +220,7 @@ LLVMInitializeHexagonTarget() { initializeHexagonPeepholePass(PR); initializeHexagonSplitConst32AndConst64Pass(PR); initializeHexagonVectorPrintPass(PR); + initializeHexagonQFPOptimizerPass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, @@ -386,6 +387,7 @@ bool HexagonPassConfig::addInstSelector() { addPass(createHexagonGenInsert()); if (EnableEarlyIf) addPass(createHexagonEarlyIfConversion()); + addPass(createHexagonQFPOptimizer()); } return false; diff --git a/llvm/test/CodeGen/Hexagon/qfp-conv.ll b/llvm/test/CodeGen/Hexagon/qfp-conv.ll new file mode 100644 index 0000000000000..d2d393e1a859d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/qfp-conv.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=hexagon -mattr=+hvxv68,+hvx,+hvx-length128b < %s | FileCheck %s + +; Test that the Qfloat optimization pass doesn't crash due to an invalid +; instructions. + +; CHECK: v{{[0-9]+}}.hf = v{{[0-9]:[0-9]}}.qf32 + +define void @test( + <32 x i32>* %optr, + <64 x i32> %in64, + <32 x i32> %va, + <32 x i32> %vb +) local_unnamed_addr #0 { +entry: + br label %for.body + +for.body: + %optr.068 = phi <32 x i32>* [ %optr, %entry ], [ %incdec.ptr6, %for.body ] + %0 = tail call <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32> %in64) #2 + %1 = tail call <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32> %0) #2 + %2 = tail call <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32> %va, <32 x i32> %1) #2 + %3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %2, <32 x i32> %va, <32 x i32> %vb) #2 + %4 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %3, <32 x i32> %vb) #2 + %5 = tail call <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32> %va, <32 x i32> %4) #2 + store <32 x i32> %5, <32 x i32>* %optr.068, align 1 + %incdec.ptr6 = getelementptr inbounds <32 x i32>, <32 x i32>* %optr.068, i32 1 + br label %for.body +} + +declare <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32>) #1 +declare <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32>) #1 +declare <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32>, <32 x i32>) #1 +declare <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32>, <32 x i32>) #1 +declare <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32>, <32 x i32>) #1 +declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1 diff --git a/llvm/test/CodeGen/Hexagon/qfp-enabled.ll b/llvm/test/CodeGen/Hexagon/qfp-enabled.ll new file mode 100644 index 0000000000000..a5cc5fa43167e --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/qfp-enabled.ll @@ -0,0 +1,19 @@ +; Tests if the flag to disable qfp optimizer pass works or not. + +; RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b \ +; RUN: < %s -o -| FileCheck %s --check-prefix=ENABLED +; RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b \ +; RUN: -disable-qfp-opt < %s -o -| FileCheck %s --check-prefix=DISABLED + +define dso_local <32 x i32> @conv1_qf32(<32 x i32> noundef %input1, <32 x i32> noundef %input2) local_unnamed_addr { +entry: +; DISABLED: [[V2:v[0-9]+]].qf32 = vadd(v0.sf,v1.sf) +; DISABLED: [[V3:v[0-9]+]].sf = [[V2]].qf32 +; DISABLED: qf32 = vadd(v0.sf,[[V3]].sf) +; ENABLED: [[V4:v[0-9]+]].qf32 = vadd(v0.sf,v1.sf) +; ENABLED: qf32 = vadd([[V4]].qf32,v0.sf) + %0 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.128B(<32 x i32> %input1, <32 x i32> %input2) + %1 = tail call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %0) + %2 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.128B(<32 x i32> %input1, <32 x i32> %1) + ret <32 x i32> %2 +} diff --git a/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir b/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir new file mode 100644 index 0000000000000..d8dde7d70885b --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir @@ -0,0 +1,95 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \ +# RUN: -run-pass hexagon-qfp-optimizer -run-pass machineverifier %s -o - | FileCheck %s + +# Test that the killed RegState from DefMI operands are removed +# killed RegState should be set for MI operands +# CHECK-LABEL: name: qfpAdd +# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]] +# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG2:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32 killed %[[REG1]], killed %[[REG2]] +# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG3:([0-9]+)]] +# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG4:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32 killed %[[REG3]], killed %[[REG4]] + +--- +name: qfpAdd +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:intregs = COPY $r3 + %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %6:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %7:hvxvr = V6_vL32Ub_ai %3:intregs, 0 + %8:hvxvr = V6_vconv_sf_qf32 killed %4:hvxvr + %9:hvxvr = V6_vconv_sf_qf32 killed %5:hvxvr + %10:hvxvr = V6_vadd_sf %8:hvxvr, %9:hvxvr + %11:hvxvr = V6_vconv_sf_qf32 killed %6:hvxvr + %12:hvxvr = V6_vconv_sf_qf32 killed %7:hvxvr + %13:hvxvr = V6_vadd_sf killed %11:hvxvr, killed %12:hvxvr +... + + +# Test that the killed RegState from DefMI operands are removed +# CHECK-LABEL: name: qfpAddMix +# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG1]], %{{[0-9]+}} +# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG2:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG2]], %{{[0-9]+}} + +--- +name: qfpAddMix +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %6:hvxvr = V6_vmpy_qf32_sf %4, %5 + %7:hvxvr = V6_vconv_sf_qf32 killed %6:hvxvr + %8:hvxvr = V6_vadd_sf %3:hvxvr, %7:hvxvr + %9:hvxvr = V6_vmpy_qf32_sf %4, %5 + %10:hvxvr = V6_vconv_sf_qf32 killed %9:hvxvr + %11:hvxvr = V6_vadd_sf %3:hvxvr, killed %10:hvxvr +... + + +# Test that we do generate V6_vsub_qf32_mix for the below test. +# V6_vsub_qf32_mix only allowes qf32 as first operand. In the test qf32 +# is passed as first operand. So, V6_vsub_qf32_mix must be generated. +# CHECK-LABEL: name: qfpAddSwapMix +# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG1]], %{{[0-9]+}} +# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG2:([0-9]+)]] +# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG2]], %{{[0-9]+}} + +--- +name: qfpAddSwapMix +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %6:hvxvr = V6_vmpy_qf32_sf %4, %5 + %7:hvxvr = V6_vconv_sf_qf32 killed %6:hvxvr + %8:hvxvr = V6_vadd_sf %7:hvxvr, %3:hvxvr + %9:hvxvr = V6_vmpy_qf32_sf %4, %5 + %10:hvxvr = V6_vconv_sf_qf32 killed %9:hvxvr + %11:hvxvr = V6_vadd_sf killed %10:hvxvr, %3:hvxvr +... diff --git a/llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir b/llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir new file mode 100644 index 0000000000000..1d78203cf5d5a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir @@ -0,0 +1,33 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s + +# CHECK: V6_vshuffvdd +# CHECK: V6_vadd_sf +# CHECK: V6_vadd_qf32_mix{{.*}}vsub_lo +# CHECK: V6_vadd_qf32_mix{{.*}}vsub_hi + +--- +name: qfp_subreg_fix +alignment: 16 +tracksRegLiveness: true + +body: | + bb.0: + %10:intregs = IMPLICIT_DEF + %9:hvxvr = V6_vL32Ub_ai %10, 0 :: (load (s1024) from `ptr undef`, align 4) + %11:intregs = A2_tfrsi 15360 + %12:hvxvr = V6_lvsplath %11 + %13:hvxwr = V6_vmpy_qf32_hf %9, %12 + %15:hvxvr = V6_vconv_sf_qf32 %13.vsub_lo + %17:hvxvr = V6_vconv_sf_qf32 %13.vsub_hi + %18:intregslow8 = A2_tfrsi -4 + %19:hvxwr = V6_vshuffvdd %17, %15, %18 + %21:hvxvr = V6_vadd_sf %19.vsub_hi, %19.vsub_hi + %22:hvxvr = V6_vconv_sf_qf32 %21 + %24:hvxvr = V6_vadd_sf %19.vsub_lo, %19.vsub_lo + %25:hvxvr = V6_vconv_sf_qf32 %24 + %26:hvxvr = V6_vadd_sf %25, %19.vsub_lo + %27:hvxvr = V6_vconv_sf_qf32 %26 + %28:hvxvr = V6_vadd_sf %22, %19.vsub_hi + %29:hvxvr = V6_vconv_sf_qf32 %28 + +... diff --git a/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll new file mode 100644 index 0000000000000..c16370c3b907d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll @@ -0,0 +1,21 @@ +; Tests if generated vadd instruction takes in qf32 +; type as first parameter instead of a sf type without +; any conversion instruction of type sf = qf32 + +; RUN: llc -mtriple=hexagon < %s -o - | FileCheck %s + +; CHECK: [[V2:v[0-9]+]] = vxor([[V2]],[[V2]]) +; CHECK: [[V0:v[0-9]+]].qf32 = vmpy([[V0]].sf,[[V2]].sf) +; CHECK: [[V1:v[0-9]+]].qf32 = vmpy([[V1]].sf,[[V2]].sf) +; CHECK: [[V4:v[0-9]+]].qf32 = vadd([[V0]].qf32,[[V2]].sf) +; CHECK: [[V5:v[0-9]+]].qf32 = vadd([[V1]].qf32,[[V2]].sf) + +define void @_Z19compute_ripple_geluIDF16_EviPT_PKS0_(ptr %out_ptr, <64 x float> %conv14.ripple.vectorized) #0 { +entry: + %mul16.ripple.vectorized = fmul <64 x float> %conv14.ripple.vectorized, zeroinitializer + %conv17.ripple.vectorized = fptrunc <64 x float> %mul16.ripple.vectorized to <64 x half> + store <64 x half> %conv17.ripple.vectorized, ptr %out_ptr, align 2 + ret void +} + +attributes #0 = { "target-features"="+hvx-length128b,+hvxv75,+v75,-long-calls,-small-data" } diff --git a/llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir b/llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir new file mode 100644 index 0000000000000..9a9e938f35d85 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir @@ -0,0 +1,79 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \ +# RUN: -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s + + +# Test that the operands are swapped for Add if the second operand +# is a qf32 to sf conversion. V6_vadd_qf32_mix supports first operand +# as qf32. +# CHECK-LABEL: name: qfpAddMix +# CHECK: %[[REG:([0-9]+)]]:hvxvr = V6_vmpy_qf32_sf +# CHECK: V6_vadd_qf32_mix %[[REG]] + +--- +name: qfpAddMix +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %6:hvxvr = V6_vmpy_qf32_sf %4, %5 + %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr + %8:hvxvr = V6_vadd_sf %3:hvxvr, %7:hvxvr +... + + +# Test that we do not generate V6_vsub_qf32_mix for the below test. +# V6_vsub_qf32_mix only allowes qf32 as first operand. In the test qf32 +# is passed as second operand. As sub is not commutative, we should not +# generate the mix instruction. +# CHECK-LABEL: name: qfpSubNoMix +# CHECK-NOT: V6_vsub_qf32_mix + +--- +name: qfpSubNoMix +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %6:hvxvr = V6_vmpy_qf32_sf %4, %5 + %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr + %8:hvxvr = V6_vsub_sf %3:hvxvr, %7:hvxvr +... + + +# Test that we do generate V6_vsub_qf32_mix for the below test. +# V6_vsub_qf32_mix only allowes qf32 as first operand. In the test qf32 +# is passed as first operand. So, V6_vsub_qf32_mix must be generated. +# CHECK-LABEL: name: qfpSubMix +# CHECK: V6_vsub_qf32_mix + +--- +name: qfpSubMix +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %6:hvxvr = V6_vmpy_qf32_sf %4, %5 + %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr + %8:hvxvr = V6_vsub_sf %7:hvxvr, %3:hvxvr +... diff --git a/llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir b/llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir new file mode 100644 index 0000000000000..f0b1d3c96bbb3 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir @@ -0,0 +1,23 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s + +# CHECK-LABEL: name: qfpAdd32 +# CHECK: V6_vd0 +# CHECK-NEXT: V6_vL32Ub_ai +# CHECK-NEXT: V6_vadd_sf +# CHECK-NEXT: V6_vconv_sf_qf32 +# CHECK-NEXT: V6_vS32Ub_ai +--- +name: qfpAdd32 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %3:hvxvr = V6_vd0 + %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %5:hvxvr = V6_vadd_sf %3:hvxvr, %4:hvxvr + %6:hvxvr = V6_vconv_sf_qf32 %5:hvxvr + V6_vS32Ub_ai %1:intregs, 0, %6:hvxvr +... diff --git a/llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll b/llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll new file mode 100644 index 0000000000000..2d46da7a039bc --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; REQUIRES: hexagon + +; This test was asserting because getVRegDef() was called on a register with +; multiple defs. +; Checks that the test does not assert and vsub is generated. +; CHECK: vsub + +target triple = "hexagon" + +@v = common dso_local local_unnamed_addr global <32 x i32> zeroinitializer, align 128 + +; Function Attrs: nounwind +define dso_local void @hvx_twoSum(<32 x i32>* nocapture noundef writeonly %s2lo) local_unnamed_addr #0 { +entry: + %0 = load <32 x i32>, <32 x i32>* @v, align 128 + %call = tail call inreg <32 x i32> @MY_Vsf_equals_Vqf32(<32 x i32> noundef %0) #3 + %1 = tail call <32 x i32> @llvm.hexagon.V6.vsub.sf.128B(<32 x i32> %call, <32 x i32> %call) + store <32 x i32> %1, <32 x i32>* @v, align 128 + store <32 x i32> %1, <32 x i32>* %s2lo, align 128 + ret void +} + +declare dso_local inreg <32 x i32> @MY_Vsf_equals_Vqf32(<32 x i32> noundef) local_unnamed_addr #1 + +; Function Attrs: nofree nosync nounwind readnone +declare <32 x i32> @llvm.hexagon.V6.vsub.sf.128B(<32 x i32>, <32 x i32>) #2 + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv73" "target-features"="+hvx-length128b,+hvxv73,+v73,-long-calls" } +attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv73" "target-features"="+hvx-length128b,+hvxv73,+v73,-long-calls" } +attributes #2 = { nofree nosync nounwind readnone } +attributes #3 = { nounwind } From fcb1a82811490ca581b4db484a2b67be86650a84 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 20 Oct 2025 16:59:02 -0400 Subject: [PATCH 24/38] [libc++] Fix off-by-one error in compare-benchmarks script That led us to overwrite the data of the last row with the geomean. --- libcxx/utils/compare-benchmarks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/utils/compare-benchmarks b/libcxx/utils/compare-benchmarks index d165c739183d4..b5bd8805df9d5 100755 --- a/libcxx/utils/compare-benchmarks +++ b/libcxx/utils/compare-benchmarks @@ -72,7 +72,7 @@ def plain_text_comparison(data, metric, baseline_name=None, candidate_name=None) geomean_0 = statistics.geometric_mean(data[f'{metric}_0'].dropna()) geomean_1 = statistics.geometric_mean(data[f'{metric}_1'].dropna()) geomean_row = ['Geomean', geomean_0, geomean_1, (geomean_1 - geomean_0), (geomean_1 - geomean_0) / geomean_0] - table.loc[table.index.max()] = geomean_row + table.loc[table.index.max() + 1] = geomean_row return tabulate.tabulate(table.set_index('benchmark'), headers=headers, floatfmt=fmt, numalign='right') From 46ab6c6fd3e6068a1f8f2935c17c09a3046180f9 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Mon, 20 Oct 2025 14:11:22 -0700 Subject: [PATCH 25/38] [FlowSensitive] [Optional] Fix absl::in_place (#163897) The mock was not accurate, absl defines in_place[_t] as an alias to std::in_place[_t]. --- .../Models/UncheckedOptionalAccessModel.cpp | 6 +++--- .../unittests/Analysis/FlowSensitive/MockHeaders.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index bb703eff4baff..0fa333eedcfdd 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -241,9 +241,9 @@ auto nulloptTypeDecl() { auto hasNulloptType() { return hasType(nulloptTypeDecl()); } auto inPlaceClass() { - return recordDecl(hasAnyName("std::in_place_t", "absl::in_place_t", - "base::in_place_t", "folly::in_place_t", - "bsl::in_place_t")); + return namedDecl(hasAnyName("std::in_place_t", "absl::in_place_t", + "base::in_place_t", "folly::in_place_t", + "bsl::in_place_t")); } auto isOptionalNulloptConstructor() { diff --git a/clang/unittests/Analysis/FlowSensitive/MockHeaders.cpp b/clang/unittests/Analysis/FlowSensitive/MockHeaders.cpp index d3dee58651396..d87554203a5b7 100644 --- a/clang/unittests/Analysis/FlowSensitive/MockHeaders.cpp +++ b/clang/unittests/Analysis/FlowSensitive/MockHeaders.cpp @@ -459,6 +459,10 @@ struct is_scalar template <> struct is_scalar : public true_type {}; +struct in_place_t {}; + +constexpr in_place_t in_place; + } // namespace std #endif // STD_TYPE_TRAITS_H @@ -511,9 +515,8 @@ using remove_reference_t = typename std::remove_reference::type; template using decay_t = typename std::decay::type; -struct in_place_t {}; - -constexpr in_place_t in_place; +using std::in_place; +using std::in_place_t; } // namespace absl #endif // ABSL_TYPE_TRAITS_H @@ -589,9 +592,6 @@ static constexpr char StdOptionalHeader[] = R"( namespace std { -struct in_place_t {}; -constexpr in_place_t in_place; - struct nullopt_t { constexpr explicit nullopt_t() {} }; From dfe48e77e6f4e0459ee530f5fea820d6c7a24aa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 20 Oct 2025 11:13:55 -1000 Subject: [PATCH 26/38] [flang][cuda] Update c_loc with device variable to get host address (#164317) Bypass the declare op because it is rewritten in CUFOpConversion and will only provide the device address. c_loc is expected to have the host address of a device address to be used in API like `cudaMemcpyToSymbol` so we need to provide the address of op directly. --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 12 ++++++++++++ flang/test/Lower/CUDA/cuda-cloc.cuf | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 flang/test/Lower/CUDA/cuda-cloc.cuf diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index ec0c802fb209c..29eedfb0ce9cd 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -3516,11 +3516,23 @@ static mlir::Value getAddrFromBox(fir::FirOpBuilder &builder, return addr; } +static void clocDeviceArgRewrite(fir::ExtendedValue arg) { + // Special case for device address in c_loc. + if (auto emboxOp = mlir::dyn_cast_or_null( + fir::getBase(arg).getDefiningOp())) + if (auto declareOp = mlir::dyn_cast_or_null( + emboxOp.getMemref().getDefiningOp())) + if (declareOp.getDataAttr() && + declareOp.getDataAttr() == cuf::DataAttribute::Device) + emboxOp.getMemrefMutable().assign(declareOp.getMemref()); +} + static fir::ExtendedValue genCLocOrCFunLoc(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type resultType, llvm::ArrayRef args, bool isFunc = false, bool isDevLoc = false) { assert(args.size() == 1); + clocDeviceArgRewrite(args[0]); mlir::Value res = fir::AllocaOp::create(builder, loc, resultType); mlir::Value resAddr; if (isDevLoc) diff --git a/flang/test/Lower/CUDA/cuda-cloc.cuf b/flang/test/Lower/CUDA/cuda-cloc.cuf new file mode 100644 index 0000000000000..87a98d47f4bfe --- /dev/null +++ b/flang/test/Lower/CUDA/cuda-cloc.cuf @@ -0,0 +1,19 @@ +! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s + +module symbols + integer(4), device, target :: sdev(100) +end module + +subroutine sub1 + use iso_c_binding + use symbols + print*, c_loc(sdev) +end subroutine + +! CHECK-LABEL: func.func @_QPsub1() +! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMsymbolsEsdev) : !fir.ref> +! CHECK: %[[EMBOX:.*]] = fir.embox %[[ADDR]](%{{.*}}) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[__ADDRESS:.*]] = fir.coordinate_of %{{.*}}, __address : (!fir.ref>) -> !fir.ref +! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[EMBOX]] : (!fir.box>) -> !fir.ref> +! CHECK: %[[CONV:.*]] = fir.convert %[[BOX_ADDR]] : (!fir.ref>) -> i64 +! CHECK: fir.store %[[CONV]] to %[[__ADDRESS]] : !fir.ref From 803883c6622685f342b546165ddce412cb057a8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 20 Oct 2025 11:19:38 -1000 Subject: [PATCH 27/38] [flang][cuda][rt] Canonicalize block size values (#164321) Set block size x and y to 1024 if the given value is higher. Set block z to 64 if the given value is higher. --- flang-rt/lib/cuda/kernel.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/flang-rt/lib/cuda/kernel.cpp b/flang-rt/lib/cuda/kernel.cpp index e299a114ed7eb..c52d039ce1075 100644 --- a/flang-rt/lib/cuda/kernel.cpp +++ b/flang-rt/lib/cuda/kernel.cpp @@ -23,9 +23,9 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY, gridDim.y = gridY; gridDim.z = gridZ; dim3 blockDim; - blockDim.x = blockX; - blockDim.y = blockY; - blockDim.z = blockZ; + blockDim.x = blockX > 1024 ? 1024 : blockX; + blockDim.y = blockY > 1024 ? 1024 : blockY; + blockDim.z = blockZ > 64 ? 64 : blockZ; unsigned nbNegGridDim{0}; if (gridX < 0) { ++nbNegGridDim; @@ -88,9 +88,9 @@ void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX, config.gridDim.x = gridX; config.gridDim.y = gridY; config.gridDim.z = gridZ; - config.blockDim.x = blockX; - config.blockDim.y = blockY; - config.blockDim.z = blockZ; + config.blockDim.x = blockX > 1024 ? 1024 : blockX; + config.blockDim.y = blockY > 1024 ? 1024 : blockY; + config.blockDim.z = blockZ > 64 ? 64 : blockZ; unsigned nbNegGridDim{0}; if (gridX < 0) { ++nbNegGridDim; @@ -165,9 +165,9 @@ void RTDEF(CUFLaunchCooperativeKernel)(const void *kernel, intptr_t gridX, gridDim.y = gridY; gridDim.z = gridZ; dim3 blockDim; - blockDim.x = blockX; - blockDim.y = blockY; - blockDim.z = blockZ; + blockDim.x = blockX > 1024 ? 1024 : blockX; + blockDim.y = blockY > 1024 ? 1024 : blockY; + blockDim.z = blockZ > 64 ? 64 : blockZ; unsigned nbNegGridDim{0}; if (gridX < 0) { ++nbNegGridDim; From 35b9f204490cd0101100878effe05ef5116b3099 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 20 Oct 2025 22:31:15 +0100 Subject: [PATCH 28/38] [LV] Check for TruncInsts in canTruncateToMinimalBitwidth. TruncInst must truncate at most to their destination. Return false if MinBWs contains a destination size > the trunc result type size. Fixes https://github.com/llvm/llvm-project/issues/162688. --- .../Transforms/Vectorize/LoopVectorize.cpp | 4 ++ .../truncate-to-minimal-bitwidth-cost.ll | 51 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index febdc54e666a9..1cc91735bb22d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1011,6 +1011,10 @@ class LoopVectorizationCostModel { /// \returns True if instruction \p I can be truncated to a smaller bitwidth /// for vectorization factor \p VF. bool canTruncateToMinimalBitwidth(Instruction *I, ElementCount VF) const { + // Truncs must truncate at most to their destination type. + if (isa_and_nonnull(I) && MinBWs.contains(I) && + I->getType()->getScalarSizeInBits() < MinBWs.lookup(I)) + return false; return VF.isVector() && MinBWs.contains(I) && !isProfitableToScalarize(I, VF) && !isScalarAfterVectorization(I, VF); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index c5319c6165f89..f4c7c6f6fba1b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -246,6 +246,57 @@ exit: ret void } +; Test for https://github.com/llvm/llvm-project/issues/162688. +define void @test_minbws_for_trunc(i32 %n, ptr noalias %p1, ptr noalias %p2) { +; CHECK-LABEL: define void @test_minbws_for_trunc( +; CHECK-SAME: i32 [[N:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_EXT:%.*]] = sext i16 [[IV]] to i64 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[IV_EXT]] +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[V1_TRUNC:%.*]] = trunc i32 [[V1]] to i16 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr [1 x [1 x i16]], ptr [[P2]], i64 [[IV_EXT]] +; CHECK-NEXT: store i16 [[V1_TRUNC]], ptr [[GEP2]], align 2 +; CHECK-NEXT: [[V1_TRUNC_I8:%.*]] = trunc i32 [[V1]] to i8 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[P2]], i64 [[IV_EXT]] +; CHECK-NEXT: store i8 [[V1_TRUNC_I8]], ptr [[GEP3]], align 1 +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr [1 x i64], ptr [[P2]], i64 [[IV_EXT]] +; CHECK-NEXT: store i64 0, ptr [[GEP4]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 4 +; CHECK-NEXT: [[IV_NEXT_EXT:%.*]] = sext i16 [[IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[IV_NEXT_EXT]], 1024 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] + %iv.ext = sext i16 %iv to i64 + %gep1 = getelementptr i32, ptr %p1, i64 %iv.ext + %v1 = load i32, ptr %gep1, align 4 + %v1.trunc = trunc i32 %v1 to i16 + %gep2 = getelementptr [1 x [1 x i16]], ptr %p2, i64 %iv.ext + store i16 %v1.trunc, ptr %gep2, align 2 + %v1.trunc.i8 = trunc i32 %v1 to i8 + %gep3 = getelementptr i8, ptr %p2, i64 %iv.ext + store i8 %v1.trunc.i8, ptr %gep3, align 1 + %gep4 = getelementptr [1 x i64], ptr %p2, i64 %iv.ext + store i64 0, ptr %gep4, align 8 + %iv.next = add i16 %iv, 4 + %iv.next.ext = sext i16 %iv.next to i32 + %cmp = icmp ne i32 %iv.next.ext, 1024 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + attributes #0 = { "target-features"="+64bit,+v,+zvl256b" } attributes #1 = { "target-features"="+64bit,+v" } From ca4df68aaa8243f202a643f4f172fe2f5bceaf71 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Mon, 20 Oct 2025 21:51:30 +0000 Subject: [PATCH 29/38] [msan][test] Add tests for target("aarch64.svcount") (#164315) target("aarch64.svcount") is not properly supported by MSan, and will lead to a crash: ``` fatal error: error in backend: Cannot implicitly convert a scalable size to a fixed-width size in `TypeSize::operator ScalarTy()` ``` This commit adds two test cases: a full test case for tracking any future improvements to the instrumentation (and also showing the crash), and a manually reduced test case to show the crash. Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll --- .../AArch64/sme-aarch64-svcount-mini.ll | 14 ++++ .../AArch64/sme-aarch64-svcount.ll | 68 +++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll create mode 100644 llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll new file mode 100644 index 0000000000000..1ddcd4b56688c --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s + +; XFAIL: * + +; Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll +; Manually minimized to show MSan leads to a compiler crash + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +define target("aarch64.svcount") @test_return_arg1(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1) nounwind { + ret target("aarch64.svcount") %arg1 +} diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll new file mode 100644 index 0000000000000..9caa89de63748 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s + +; XFAIL: * + +; Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +; +; Test simple loads, stores and return. +; +define target("aarch64.svcount") @test_load(ptr %ptr) nounwind { + %res = load target("aarch64.svcount"), ptr %ptr + ret target("aarch64.svcount") %res +} + +define void @test_store(ptr %ptr, target("aarch64.svcount") %val) nounwind { + store target("aarch64.svcount") %val, ptr %ptr + ret void +} + +define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcount") %val) nounwind { + %ptr = alloca target("aarch64.svcount"), align 1 + store target("aarch64.svcount") %val, ptr %ptr + %res = load target("aarch64.svcount"), ptr %ptr + ret target("aarch64.svcount") %res +} + +; +; Test passing as arguments (from perspective of callee) +; + +define target("aarch64.svcount") @test_return_arg1(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1) nounwind { + ret target("aarch64.svcount") %arg1 +} + +define target("aarch64.svcount") @test_return_arg4(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1, target("aarch64.svcount") %arg2, target("aarch64.svcount") %arg3, target("aarch64.svcount") %arg4) nounwind { + ret target("aarch64.svcount") %arg4 +} + +; +; Test passing as arguments (from perspective of caller) +; + +declare void @take_svcount_1(target("aarch64.svcount") %arg) +define void @test_pass_1arg(target("aarch64.svcount") %arg) nounwind { + call void @take_svcount_1(target("aarch64.svcount") %arg) + ret void +} + +declare void @take_svcount_5(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1, target("aarch64.svcount") %arg2, target("aarch64.svcount") %arg3, target("aarch64.svcount") %arg4) +define void @test_pass_5args(target("aarch64.svcount") %arg) nounwind { + call void @take_svcount_5(target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg) + ret void +} + +define target("aarch64.svcount") @test_sel(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i1 %cmp) sanitize_memory { + %x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y + ret target("aarch64.svcount") %x.y +} + +define target("aarch64.svcount") @test_sel_cc(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i32 %k) sanitize_memory { + %cmp = icmp sgt i32 %k, 42 + %x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y + ret target("aarch64.svcount") %x.y +} From dc718313ecb7cb382cb3c788380e388f7ce4551c Mon Sep 17 00:00:00 2001 From: Ebuka Ezike Date: Mon, 20 Oct 2025 23:02:23 +0100 Subject: [PATCH 30/38] [lldb-dap] use workspaceFolder in vscode configurations. (#164320) `workspaceRoot` was deprecated in [september 2017 release](https://code.visualstudio.com/updates/v1_17#_workspacefolder-in-launchjson-and-tasksjson) --- lldb/tools/lldb-dap/package.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index 3f0f150c0d98e..05dce285dd592 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -453,7 +453,7 @@ "cwd": { "type": "string", "description": "Program working directory.", - "default": "${workspaceRoot}" + "default": "${workspaceFolder}" }, "env": { "anyOf": [ @@ -883,10 +883,10 @@ "type": "lldb-dap", "request": "launch", "name": "Debug", - "program": "${workspaceRoot}/", + "program": "${workspaceFolder}/", "args": [], "env": [], - "cwd": "${workspaceRoot}" + "cwd": "${workspaceFolder}" } ], "configurationSnippets": [ @@ -897,10 +897,10 @@ "type": "lldb-dap", "request": "launch", "name": "${2:Launch}", - "program": "^\"\\${workspaceRoot}/${1:}\"", + "program": "^\"\\${workspaceFolder}/${1:}\"", "args": [], "env": [], - "cwd": "^\"\\${workspaceRoot}\"" + "cwd": "^\"\\${workspaceFolder}\"" } }, { From 437cad9121939344c8c12efec5fcb0929e9fb444 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Mon, 20 Oct 2025 15:05:10 -0700 Subject: [PATCH 31/38] [CIR] Upstream aggregate binary assign handling (#163877) This upstreams the implementation for handling binary assignment involving aggregate types. --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 5 +- clang/include/clang/CIR/Dialect/IR/CIROps.td | 7 +- clang/include/clang/CIR/MissingFeatures.h | 3 +- clang/lib/CIR/CodeGen/CIRGenDecl.cpp | 10 ++ clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 8 +- clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp | 123 ++++++++++++++++-- clang/lib/CIR/CodeGen/CIRGenFunction.h | 16 ++- clang/lib/CIR/CodeGen/CIRGenValue.h | 9 ++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +- clang/test/CIR/CodeGen/binassign.c | 48 ++++++- clang/test/CIR/CodeGen/dtors.cpp | 2 +- clang/test/CIR/CodeGen/struct.cpp | 2 +- 12 files changed, 211 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 3ac8987864168..3288f5b12c77e 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -311,8 +311,9 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { } /// Create a copy with inferred length. - cir::CopyOp createCopy(mlir::Value dst, mlir::Value src) { - return cir::CopyOp::create(*this, dst.getLoc(), dst, src); + cir::CopyOp createCopy(mlir::Value dst, mlir::Value src, + bool isVolatile = false) { + return cir::CopyOp::create(*this, dst.getLoc(), dst, src, isVolatile); } cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst, diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index e0163a4fecd5f..919c194ed4453 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2724,6 +2724,8 @@ def CIR_CopyOp : CIR_Op<"copy",[ type of `src` and `dst` must match and both must implement the `DataLayoutTypeInterface`. + The `volatile` keyword indicates that the operation is volatile. + Examples: ```mlir @@ -2734,10 +2736,11 @@ def CIR_CopyOp : CIR_Op<"copy",[ let arguments = (ins Arg:$dst, - Arg:$src + Arg:$src, + UnitAttr:$is_volatile ); - let assemblyFormat = [{$src `to` $dst + let assemblyFormat = [{$src `to` $dst (`volatile` $is_volatile^)? attr-dict `:` qualified(type($dst)) }]; let hasVerifier = 1; diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 090cf35c2d279..01da626227512 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -177,9 +177,10 @@ struct MissingFeatures { static bool atomicInfo() { return false; } static bool atomicInfoGetAtomicPointer() { return false; } static bool atomicInfoGetAtomicAddress() { return false; } - static bool atomicUseLibCall() { return false; } static bool atomicScope() { return false; } static bool atomicSyncScopeID() { return false; } + static bool atomicTypes() { return false; } + static bool atomicUseLibCall() { return false; } // Global ctor handling static bool globalCtorLexOrder() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index 4a19d91dcf4fa..5667273c00daf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -740,6 +740,16 @@ struct CallStackRestore final : EHScopeStack::Cleanup { }; } // namespace +/// Push the standard destructor for the given type as +/// at least a normal cleanup. +void CIRGenFunction::pushDestroy(QualType::DestructionKind dtorKind, + Address addr, QualType type) { + assert(dtorKind && "cannot push destructor for trivial type"); + + CleanupKind cleanupKind = getCleanupKind(dtorKind); + pushDestroy(cleanupKind, addr, type, getDestroyer(dtorKind)); +} + void CIRGenFunction::pushDestroy(CleanupKind cleanupKind, Address addr, QualType type, Destroyer *destroyer) { pushFullExprCleanup(cleanupKind, addr, type, destroyer); diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 0d364e1bb1595..9732c9c499960 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -1626,14 +1626,15 @@ LValue CIRGenFunction::emitBinaryOperatorLValue(const BinaryOperator *e) { /// Emit code to compute the specified expression which /// can have any type. The result is returned as an RValue struct. -RValue CIRGenFunction::emitAnyExpr(const Expr *e, AggValueSlot aggSlot) { +RValue CIRGenFunction::emitAnyExpr(const Expr *e, AggValueSlot aggSlot, + bool ignoreResult) { switch (CIRGenFunction::getEvaluationKind(e->getType())) { case cir::TEK_Scalar: return RValue::get(emitScalarExpr(e)); case cir::TEK_Complex: return RValue::getComplex(emitComplexExpr(e)); case cir::TEK_Aggregate: { - if (aggSlot.isIgnored()) + if (!ignoreResult && aggSlot.isIgnored()) aggSlot = createAggTemp(e->getType(), getLoc(e->getSourceRange()), getCounterAggTmpAsString()); emitAggExpr(e, aggSlot); @@ -1869,8 +1870,7 @@ RValue CIRGenFunction::emitCallExpr(const clang::CallExpr *e, /// Emit code to compute the specified expression, ignoring the result. void CIRGenFunction::emitIgnoredExpr(const Expr *e) { if (e->isPRValue()) { - assert(!cir::MissingFeatures::aggValueSlot()); - emitAnyExpr(e); + emitAnyExpr(e, AggValueSlot::ignored(), /*ignoreResult=*/true); return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp index ddee000056c7e..568cbdb06bb48 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp @@ -24,6 +24,73 @@ using namespace clang; using namespace clang::CIRGen; namespace { +// FIXME(cir): This should be a common helper between CIRGen +// and traditional CodeGen +/// Is the value of the given expression possibly a reference to or +/// into a __block variable? +static bool isBlockVarRef(const Expr *e) { + // Make sure we look through parens. + e = e->IgnoreParens(); + + // Check for a direct reference to a __block variable. + if (const DeclRefExpr *dre = dyn_cast(e)) { + const VarDecl *var = dyn_cast(dre->getDecl()); + return (var && var->hasAttr()); + } + + // More complicated stuff. + + // Binary operators. + if (const BinaryOperator *op = dyn_cast(e)) { + // For an assignment or pointer-to-member operation, just care + // about the LHS. + if (op->isAssignmentOp() || op->isPtrMemOp()) + return isBlockVarRef(op->getLHS()); + + // For a comma, just care about the RHS. + if (op->getOpcode() == BO_Comma) + return isBlockVarRef(op->getRHS()); + + // FIXME: pointer arithmetic? + return false; + + // Check both sides of a conditional operator. + } else if (const AbstractConditionalOperator *op = + dyn_cast(e)) { + return isBlockVarRef(op->getTrueExpr()) || + isBlockVarRef(op->getFalseExpr()); + + // OVEs are required to support BinaryConditionalOperators. + } else if (const OpaqueValueExpr *op = dyn_cast(e)) { + if (const Expr *src = op->getSourceExpr()) + return isBlockVarRef(src); + + // Casts are necessary to get things like (*(int*)&var) = foo(). + // We don't really care about the kind of cast here, except + // we don't want to look through l2r casts, because it's okay + // to get the *value* in a __block variable. + } else if (const CastExpr *cast = dyn_cast(e)) { + if (cast->getCastKind() == CK_LValueToRValue) + return false; + return isBlockVarRef(cast->getSubExpr()); + + // Handle unary operators. Again, just aggressively look through + // it, ignoring the operation. + } else if (const UnaryOperator *uop = dyn_cast(e)) { + return isBlockVarRef(uop->getSubExpr()); + + // Look into the base of a field access. + } else if (const MemberExpr *mem = dyn_cast(e)) { + return isBlockVarRef(mem->getBase()); + + // Look into the base of a subscript. + } else if (const ArraySubscriptExpr *sub = dyn_cast(e)) { + return isBlockVarRef(sub->getBase()); + } + + return false; +} + class AggExprEmitter : public StmtVisitor { CIRGenFunction &cgf; @@ -41,9 +108,7 @@ class AggExprEmitter : public StmtVisitor { AggValueSlot ensureSlot(mlir::Location loc, QualType t) { if (!dest.isIgnored()) return dest; - - cgf.cgm.errorNYI(loc, "Slot for ignored address"); - return dest; + return cgf.createAggTemp(t, loc, "agg.tmp.ensured"); } void ensureDest(mlir::Location loc, QualType ty) { @@ -89,6 +154,47 @@ class AggExprEmitter : public StmtVisitor { (void)cgf.emitCompoundStmt(*e->getSubStmt(), &retAlloca, dest); } + void VisitBinAssign(const BinaryOperator *e) { + // For an assignment to work, the value on the right has + // to be compatible with the value on the left. + assert(cgf.getContext().hasSameUnqualifiedType(e->getLHS()->getType(), + e->getRHS()->getType()) && + "Invalid assignment"); + + if (isBlockVarRef(e->getLHS()) && + e->getRHS()->HasSideEffects(cgf.getContext())) { + cgf.cgm.errorNYI(e->getSourceRange(), + "block var reference with side effects"); + return; + } + + LValue lhs = cgf.emitLValue(e->getLHS()); + + // If we have an atomic type, evaluate into the destination and then + // do an atomic copy. + assert(!cir::MissingFeatures::atomicTypes()); + + // Codegen the RHS so that it stores directly into the LHS. + assert(!cir::MissingFeatures::aggValueSlotGC()); + AggValueSlot lhsSlot = AggValueSlot::forLValue( + lhs, AggValueSlot::IsDestructed, AggValueSlot::IsAliased, + AggValueSlot::MayOverlap); + + // A non-volatile aggregate destination might have volatile member. + if (!lhsSlot.isVolatile() && cgf.hasVolatileMember(e->getLHS()->getType())) + lhsSlot.setVolatile(true); + + cgf.emitAggExpr(e->getRHS(), lhsSlot); + + // Copy into the destination if the assignment isn't ignored. + emitFinalDestCopy(e->getType(), lhs); + + if (!dest.isIgnored() && !dest.isExternallyDestructed() && + e->getType().isDestructedType() == QualType::DK_nontrivial_c_struct) + cgf.pushDestroy(QualType::DK_nontrivial_c_struct, dest.getAddress(), + e->getType()); + } + void VisitDeclRefExpr(DeclRefExpr *e) { emitAggLoadOfLValue(e); } void VisitInitListExpr(InitListExpr *e); @@ -186,9 +292,6 @@ class AggExprEmitter : public StmtVisitor { cgf.cgm.errorNYI(e->getSourceRange(), "AggExprEmitter: VisitPointerToDataMemberBinaryOperator"); } - void VisitBinAssign(const BinaryOperator *e) { - cgf.cgm.errorNYI(e->getSourceRange(), "AggExprEmitter: VisitBinAssign"); - } void VisitBinComma(const BinaryOperator *e) { cgf.emitIgnoredExpr(e->getLHS()); Visit(e->getRHS()); @@ -503,7 +606,8 @@ void AggExprEmitter::emitCopy(QualType type, const AggValueSlot &dest, LValue destLV = cgf.makeAddrLValue(dest.getAddress(), type); LValue srcLV = cgf.makeAddrLValue(src.getAddress(), type); assert(!cir::MissingFeatures::aggValueSlotVolatile()); - cgf.emitAggregateCopy(destLV, srcLV, type, dest.mayOverlap()); + cgf.emitAggregateCopy(destLV, srcLV, type, dest.mayOverlap(), + dest.isVolatile() || src.isVolatile()); } void AggExprEmitter::emitInitializationToLValue(Expr *e, LValue lv) { @@ -804,7 +908,8 @@ void CIRGenFunction::emitAggExpr(const Expr *e, AggValueSlot slot) { } void CIRGenFunction::emitAggregateCopy(LValue dest, LValue src, QualType ty, - AggValueSlot::Overlap_t mayOverlap) { + AggValueSlot::Overlap_t mayOverlap, + bool isVolatile) { // TODO(cir): this function needs improvements, commented code for now since // this will be touched again soon. assert(!ty->isAnyComplexType() && "Unexpected copy of complex"); @@ -860,7 +965,7 @@ void CIRGenFunction::emitAggregateCopy(LValue dest, LValue src, QualType ty, cgm.errorNYI("emitAggregateCopy: GC"); [[maybe_unused]] cir::CopyOp copyOp = - builder.createCopy(destPtr.getPointer(), srcPtr.getPointer()); + builder.createCopy(destPtr.getPointer(), srcPtr.getPointer(), isVolatile); assert(!cir::MissingFeatures::opTBAA()); } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index b8492a880a80a..5a71126c8dc07 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -858,6 +858,13 @@ class CIRGenFunction : public CIRGenTypeCache { FunctionArgList args, clang::SourceLocation loc, clang::SourceLocation startLoc); + /// returns true if aggregate type has a volatile member. + bool hasVolatileMember(QualType t) { + if (const auto *rd = t->getAsRecordDecl()) + return rd->hasVolatileMember(); + return false; + } + /// The cleanup depth enclosing all the cleanups associated with the /// parameters. EHScopeStack::stable_iterator prologueCleanupDepth; @@ -1082,6 +1089,9 @@ class CIRGenFunction : public CIRGenTypeCache { static Destroyer destroyCXXObject; + void pushDestroy(QualType::DestructionKind dtorKind, Address addr, + QualType type); + void pushDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer); @@ -1136,14 +1146,16 @@ class CIRGenFunction : public CIRGenTypeCache { /// occupied by some other object. More efficient code can often be /// generated if not. void emitAggregateCopy(LValue dest, LValue src, QualType eltTy, - AggValueSlot::Overlap_t mayOverlap); + AggValueSlot::Overlap_t mayOverlap, + bool isVolatile = false); /// Emit code to compute the specified expression which can have any type. The /// result is returned as an RValue struct. If this is an aggregate /// expression, the aggloc/agglocvolatile arguments indicate where the result /// should be returned. RValue emitAnyExpr(const clang::Expr *e, - AggValueSlot aggSlot = AggValueSlot::ignored()); + AggValueSlot aggSlot = AggValueSlot::ignored(), + bool ignoreResult = false); /// Emits the code necessary to evaluate an arbitrary expression into the /// given memory location. diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h index f83a952d26065..ab245a771d72c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenValue.h +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -380,6 +380,15 @@ class AggValueSlot { clang::Qualifiers getQualifiers() const { return quals; } + bool isVolatile() const { return quals.hasVolatile(); } + + void setVolatile(bool flag) { + if (flag) + quals.addVolatile(); + else + quals.removeVolatile(); + } + Address getAddress() const { return addr; } bool isIgnored() const { return !addr.isValid(); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 0243bf120f396..dc26dac3e349b 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -182,7 +182,7 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite( rewriter, op.getLoc(), rewriter.getI32Type(), op.getLength(layout)); assert(!cir::MissingFeatures::aggValueSlotVolatile()); rewriter.replaceOpWithNewOp( - op, adaptor.getDst(), adaptor.getSrc(), length, /*isVolatile=*/false); + op, adaptor.getDst(), adaptor.getSrc(), length, op.getIsVolatile()); return mlir::success(); } diff --git a/clang/test/CIR/CodeGen/binassign.c b/clang/test/CIR/CodeGen/binassign.c index dab987959bd5c..44c54b4a2969a 100644 --- a/clang/test/CIR/CodeGen/binassign.c +++ b/clang/test/CIR/CodeGen/binassign.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c23 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: %clang_cc1 -std=c23 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2> %t-before-lp.cir // RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR // RUN: %clang_cc1 -std=c23 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll // RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM @@ -54,3 +54,49 @@ void binary_assign(void) { // OGCG: store float 0x40091EB860000000, ptr %[[F_PTR]] // OGCG: store i32 42, ptr %[[I_PTR]] // OGCG: ret void + +struct S { + int a; + float b; +}; + +struct SV { + int a; + volatile float b; +}; + +struct S gs; +struct SV gsv; + +void binary_assign_struct() { + // Test normal struct assignment + struct S ls; + ls = gs; + + // Test assignment of a struct with a volatile member + struct SV lsv; + lsv = gsv; +} + +// CIR: cir.func{{.*}} @binary_assign_struct() +// CIR: %[[LS:.*]] = cir.alloca ![[REC_S:.*]], !cir.ptr, ["ls"] +// CIR: %[[LSV:.*]] = cir.alloca ![[REC_SV:.*]], !cir.ptr, ["lsv"] +// CIR: %[[GS_PTR:.*]] = cir.get_global @gs : !cir.ptr +// CIR: cir.copy %[[GS_PTR]] to %[[LS]] : !cir.ptr +// CIR: %[[GSV_PTR:.*]] = cir.get_global @gsv : !cir.ptr +// CIR: cir.copy %[[GSV_PTR]] to %[[LSV]] volatile : !cir.ptr +// CIR: cir.return + +// LLVM: define {{.*}}void @binary_assign_struct() +// LLVM: %[[LS_PTR:.*]] = alloca %struct.S +// LLVM: %[[LSV_PTR:.*]] = alloca %struct.SV +// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[LS_PTR]], ptr @gs, i32 8, i1 false) +// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[LSV_PTR]], ptr @gsv, i32 8, i1 true) +// LLVM: ret void + +// OGCG: define {{.*}}void @binary_assign_struct() +// OGCG: %[[LS_PTR:.*]] = alloca %struct.S +// OGCG: %[[LSV_PTR:.*]] = alloca %struct.SV +// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %[[LS_PTR]], ptr align 4 @gs, i64 8, i1 false) +// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %[[LSV_PTR]], ptr align 4 @gsv, i64 8, i1 true) +// OGCG: ret void diff --git a/clang/test/CIR/CodeGen/dtors.cpp b/clang/test/CIR/CodeGen/dtors.cpp index cb3886bf6af0a..f2c80a547f1d3 100644 --- a/clang/test/CIR/CodeGen/dtors.cpp +++ b/clang/test/CIR/CodeGen/dtors.cpp @@ -14,7 +14,7 @@ void test_temporary_dtor() { } // CIR: cir.func dso_local @_Z19test_temporary_dtorv() -// CIR: %[[ALLOCA:.*]] = cir.alloca !rec_A, !cir.ptr, ["agg.tmp0"] +// CIR: %[[ALLOCA:.*]] = cir.alloca !rec_A, !cir.ptr, ["agg.tmp.ensured"] // CIR: cir.call @_ZN1AD1Ev(%[[ALLOCA]]) nothrow : (!cir.ptr) -> () // LLVM: define dso_local void @_Z19test_temporary_dtorv(){{.*}} diff --git a/clang/test/CIR/CodeGen/struct.cpp b/clang/test/CIR/CodeGen/struct.cpp index 263799f8a5deb..6d362c79c1c44 100644 --- a/clang/test/CIR/CodeGen/struct.cpp +++ b/clang/test/CIR/CodeGen/struct.cpp @@ -265,7 +265,7 @@ void bin_comma() { // CIR: cir.func{{.*}} @_Z9bin_commav() // CIR: %[[A_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr, ["a", init] -// CIR: %[[TMP_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr, ["agg.tmp0"] +// CIR: %[[TMP_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr, ["agg.tmp.ensured"] // CIR: %[[ZERO:.*]] = cir.const #cir.zero : !rec_CompleteS // CIR: cir.store{{.*}} %[[ZERO]], %[[TMP_ADDR]] : !rec_CompleteS, !cir.ptr // CIR: %[[ZERO:.*]] = cir.const #cir.zero : !rec_CompleteS From ac65da0f861412afa5927196b51166269cf79ddf Mon Sep 17 00:00:00 2001 From: Walter Lee <49250218+googlewalt@users.noreply.github.com> Date: Mon, 20 Oct 2025 18:13:59 -0400 Subject: [PATCH 32/38] [mlir][spirv][test] Fork test to allow testing with assertions enabled (#164319) This way, testing with --debug flag can correctly specify that it requires assertions. This is a fix for #164098 --- .../SPIRV/function-decorations-asserts.mlir | 20 +++++++++++++++++++ .../Target/SPIRV/function-decorations.mlir | 1 - 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 mlir/test/Target/SPIRV/function-decorations-asserts.mlir diff --git a/mlir/test/Target/SPIRV/function-decorations-asserts.mlir b/mlir/test/Target/SPIRV/function-decorations-asserts.mlir new file mode 100644 index 0000000000000..ebdb9fb1e75e3 --- /dev/null +++ b/mlir/test/Target/SPIRV/function-decorations-asserts.mlir @@ -0,0 +1,20 @@ +// REQUIRES: asserts +// RUN: mlir-translate --no-implicit-module --test-spirv-roundtrip --split-input-file --debug %s | FileCheck %s + +spirv.module Logical GLSL450 requires #spirv.vce { + spirv.func @linkage_attr_test_kernel() "DontInline" attributes {} { + %uchar_0 = spirv.Constant 0 : i8 + %ushort_1 = spirv.Constant 1 : i16 + %uint_0 = spirv.Constant 0 : i32 + spirv.FunctionCall @outside.func.with.linkage(%uchar_0):(i8) -> () + spirv.Return + } + // CHECK: linkage_attributes = #spirv.linkage_attributes> + spirv.func @outside.func.with.linkage(%arg0 : i8) -> () "Pure" attributes { + linkage_attributes=#spirv.linkage_attributes< + linkage_name="outside.func", + linkage_type= + > + } + spirv.func @inside.func() -> () "Pure" attributes {} {spirv.Return} +} diff --git a/mlir/test/Target/SPIRV/function-decorations.mlir b/mlir/test/Target/SPIRV/function-decorations.mlir index 6098e42f063a2..cf6edaa0a3d5b 100644 --- a/mlir/test/Target/SPIRV/function-decorations.mlir +++ b/mlir/test/Target/SPIRV/function-decorations.mlir @@ -1,5 +1,4 @@ // RUN: mlir-translate --no-implicit-module --test-spirv-roundtrip --split-input-file %s | FileCheck %s -// RUN: mlir-translate --no-implicit-module --test-spirv-roundtrip --split-input-file --debug %s | FileCheck %s spirv.module Logical GLSL450 requires #spirv.vce { spirv.func @linkage_attr_test_kernel() "DontInline" attributes {} { From 32b534b1927089bffe71b8f6560a31a567a2b87b Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Mon, 20 Oct 2025 15:30:43 -0700 Subject: [PATCH 33/38] [lldb][doc NFC] fix typeo in reason:watchpoint desc --- lldb/docs/resources/lldbgdbremote.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/docs/resources/lldbgdbremote.md b/lldb/docs/resources/lldbgdbremote.md index f0c5e6b04d54c..287484ea5cabf 100644 --- a/lldb/docs/resources/lldbgdbremote.md +++ b/lldb/docs/resources/lldbgdbremote.md @@ -2167,7 +2167,7 @@ following keys and values: be outside the watchpoint that was triggered, the remote stub should determine which watchpoint was triggered and report an address from within its range. - 2. Wwatchpoint hardware register index number. + 2. Watchpoint hardware register index number. 3. Actual watchpoint trap address, which may be outside the range of any watched region of memory. On MIPS, an addr outside a watched range means lldb should disable the wp, From e7f370f910701b6c67d41dab80e645227692c58b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 20 Oct 2025 15:48:36 -0700 Subject: [PATCH 34/38] [SLP] Check all copyable children for non-schedulable parent nodes If the parent node is non-schedulable and it includes several copies of the same instruction, its operand might be replaced by the copyable nodes in multiple children nodes, and if the instruction is commutative, they can be used in different operands. The compiler shall consider this opportunity, taking into account that non-copyable children are scheduled only ones for the same parent instruction. Fixes #164242 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 77 +++++++++++-------- .../non-schedulable-parent-multi-copyables.ll | 37 +++++++++ 2 files changed, 82 insertions(+), 32 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3f18bd70539a0..106cde352e0b5 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5578,7 +5578,7 @@ class BoUpSLP { // Decrement the unscheduled counter and insert to ready list if // ready. auto DecrUnschedForInst = [&](Instruction *I, TreeEntry *UserTE, - unsigned OpIdx) { + unsigned OpIdx, bool FirstRun = false) { if (!ScheduleCopyableDataMap.empty()) { const EdgeInfo EI = {UserTE, OpIdx}; if (ScheduleCopyableData *CD = getScheduleCopyableData(EI, I)) { @@ -5586,6 +5586,8 @@ class BoUpSLP { return; } } + if (!FirstRun) + return; auto It = OperandsUses.find(I); assert(It != OperandsUses.end() && "Operand not found"); if (It->second > 0) { @@ -5602,37 +5604,48 @@ class BoUpSLP { break; // Need to search for the lane since the tree entry can be // reordered. - int Lane = std::distance(Bundle->getTreeEntry()->Scalars.begin(), - find(Bundle->getTreeEntry()->Scalars, In)); - assert(Lane >= 0 && "Lane not set"); - if (isa(In) && - !Bundle->getTreeEntry()->ReorderIndices.empty()) - Lane = Bundle->getTreeEntry()->ReorderIndices[Lane]; - assert(Lane < static_cast( - Bundle->getTreeEntry()->Scalars.size()) && - "Couldn't find extract lane"); - - // Since vectorization tree is being built recursively this - // assertion ensures that the tree entry has all operands set before - // reaching this code. Couple of exceptions known at the moment are - // extracts where their second (immediate) operand is not added. - // Since immediates do not affect scheduler behavior this is - // considered okay. - assert(In && - (isa(In) || - In->getNumOperands() == - Bundle->getTreeEntry()->getNumOperands() || - Bundle->getTreeEntry()->isCopyableElement(In)) && - "Missed TreeEntry operands?"); - - for (unsigned OpIdx : - seq(Bundle->getTreeEntry()->getNumOperands())) - if (auto *I = dyn_cast( - Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) { - LLVM_DEBUG(dbgs() << "SLP: check for readiness (def): " << *I - << "\n"); - DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx); - } + auto *It = find(Bundle->getTreeEntry()->Scalars, In); + bool FirstRun = true; + do { + int Lane = + std::distance(Bundle->getTreeEntry()->Scalars.begin(), It); + assert(Lane >= 0 && "Lane not set"); + if (isa(In) && + !Bundle->getTreeEntry()->ReorderIndices.empty()) + Lane = Bundle->getTreeEntry()->ReorderIndices[Lane]; + assert(Lane < static_cast( + Bundle->getTreeEntry()->Scalars.size()) && + "Couldn't find extract lane"); + + // Since vectorization tree is being built recursively this + // assertion ensures that the tree entry has all operands set + // before reaching this code. Couple of exceptions known at the + // moment are extracts where their second (immediate) operand is + // not added. Since immediates do not affect scheduler behavior + // this is considered okay. + assert(In && + (isa(In) || + In->getNumOperands() == + Bundle->getTreeEntry()->getNumOperands() || + Bundle->getTreeEntry()->isCopyableElement(In)) && + "Missed TreeEntry operands?"); + + for (unsigned OpIdx : + seq(Bundle->getTreeEntry()->getNumOperands())) + if (auto *I = dyn_cast( + Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) { + LLVM_DEBUG(dbgs() << "SLP: check for readiness (def): " + << *I << "\n"); + DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx, + FirstRun); + } + // If parent node is schedulable, it will be handle correctly. + if (!Bundle->getTreeEntry()->doesNotNeedToSchedule()) + break; + It = std::find(std::next(It), + Bundle->getTreeEntry()->Scalars.end(), In); + FirstRun = false; + } while (It != Bundle->getTreeEntry()->Scalars.end()); } } else { // If BundleMember is a stand-alone instruction, no operand reordering diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll new file mode 100644 index 0000000000000..7accca311af3c --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s -slp-threshold=-99999 | FileCheck %s + +define void @test() { +; CHECK-LABEL: define void @test() { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB6:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ , %[[BB]] ], [ , %[[BB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: ret void +; +bb: + br i1 false, label %bb1, label %bb6 + +bb1: + %add = add i32 0, 0 + %shl = shl i32 %add, 0 + %sub = sub i32 0, 1 + %add2 = add i32 %sub, %shl + %add3 = add i32 0, 0 + %shl4 = shl i32 %add3, 0 + %ashr = ashr i32 %shl4, 1 + %add5 = add i32 0, 0 + br label %bb6 + +bb6: + %phi = phi i32 [ poison, %bb ], [ %add2, %bb1 ] + %phi7 = phi i32 [ 0, %bb ], [ %ashr, %bb1 ] + %phi8 = phi i32 [ 0, %bb ], [ %add2, %bb1 ] + %phi9 = phi i32 [ 0, %bb ], [ %add5, %bb1 ] + %or = or i32 %phi8, 0 + ret void +} From c8c86efbbb55e51597c1bd8feb2e947bc0de3422 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 20 Oct 2025 15:59:56 -0700 Subject: [PATCH 35/38] [ORC] Replace ORC's baked-in dependence tracking with WaitingOnGraph. (#163027) WaitingOnGraph tracks waiting-on relationships between nodes (intended to represent symbols in an ORC program) in order to identify nodes that are *Ready* (i.e. are not waiting on any other nodes) or have *Failed* (are waiting on some node that cannot be produced). WaitingOnGraph replaces ORC's baked-in data structures that were tracking the same information (EmissionDepUnit, EmissionDepUnitInfo, ...). Isolating this information in a separate data structure simplifies the code, allows us to unit test it, and simplifies performance testing. The WaitingOnGraph uses several techniques to improve performance relative to the old data structures, including symbol coalescing ("SuperNodes") and symbol keys that don't perform unnecessary reference counting (NonOwningSymbolStringPtr). This commit includes unit tests for common dependence-tracking issues that have led to ORC bugs in the past. --- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 66 +- .../llvm/ExecutionEngine/Orc/WaitingOnGraph.h | 622 ++++++++++++ llvm/lib/ExecutionEngine/Orc/Core.cpp | 919 +++++------------- .../ExecutionEngine/Orc/SimpleRemoteEPC.cpp | 2 +- .../x86-64/LocalDependencyPropagation.s | 3 +- .../ExecutionEngine/Orc/CMakeLists.txt | 1 + .../Orc/WaitingOnGraphTest.cpp | 553 +++++++++++ 7 files changed, 1429 insertions(+), 737 deletions(-) create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h create mode 100644 llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index f407b56817fc3..8613ddd8e3b11 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -26,6 +26,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include "llvm/ExecutionEngine/Orc/TaskDispatch.h" +#include "llvm/ExecutionEngine/Orc/WaitingOnGraph.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ExtensibleRTTI.h" @@ -49,6 +50,9 @@ class InProgressLookupState; enum class SymbolState : uint8_t; +using WaitingOnGraph = + detail::WaitingOnGraph; + using ResourceTrackerSP = IntrusiveRefCntPtr; using JITDylibSP = IntrusiveRefCntPtr; @@ -1131,20 +1135,6 @@ class JITDylib : public ThreadSafeRefCountedBase, using UnmaterializedInfosList = std::vector>; - struct EmissionDepUnit { - EmissionDepUnit(JITDylib &JD) : JD(&JD) {} - - JITDylib *JD = nullptr; - DenseMap Symbols; - DenseMap> Dependencies; - }; - - struct EmissionDepUnitInfo { - std::shared_ptr EDU; - DenseSet IntraEmitUsers; - DenseMap> NewDeps; - }; - // Information about not-yet-ready symbol. // * DefiningEDU will point to the EmissionDepUnit that defines the symbol. // * DependantEDUs will hold pointers to any EmissionDepUnits currently @@ -1154,9 +1144,6 @@ class JITDylib : public ThreadSafeRefCountedBase, struct MaterializingInfo { friend class ExecutionSession; - std::shared_ptr DefiningEDU; - DenseSet DependantEDUs; - LLVM_ABI void addQuery(std::shared_ptr Q); LLVM_ABI void removeQuery(const AsynchronousSymbolQuery &Q); LLVM_ABI AsynchronousSymbolQueryList @@ -1778,30 +1765,26 @@ class ExecutionSession { LLVM_ABI Error OL_notifyResolved(MaterializationResponsibility &MR, const SymbolMap &Symbols); - using EDUInfosMap = - DenseMap; - - template - void propagateExtraEmitDeps(std::deque Worklist, - EDUInfosMap &EDUInfos, - HandleNewDepFn HandleNewDep); - EDUInfosMap simplifyDepGroups(MaterializationResponsibility &MR, - ArrayRef EmittedDeps); - void IL_makeEDUReady(std::shared_ptr EDU, - JITDylib::AsynchronousSymbolQuerySet &Queries); - void IL_makeEDUEmitted(std::shared_ptr EDU, - JITDylib::AsynchronousSymbolQuerySet &Queries); - bool IL_removeEDUDependence(JITDylib::EmissionDepUnit &EDU, JITDylib &DepJD, - NonOwningSymbolStringPtr DepSym, - EDUInfosMap &EDUInfos); - - static Error makeJDClosedError(JITDylib::EmissionDepUnit &EDU, - JITDylib &ClosedJD); - static Error makeUnsatisfiedDepsError(JITDylib::EmissionDepUnit &EDU, - JITDylib &BadJD, SymbolNameSet BadDeps); - - Expected - IL_emit(MaterializationResponsibility &MR, EDUInfosMap EDUInfos); + // FIXME: We should be able to derive FailedSymsForQuery from each query once + // we fix how the detach operation works. + struct EmitQueries { + JITDylib::AsynchronousSymbolQuerySet Updated; + JITDylib::AsynchronousSymbolQuerySet Failed; + DenseMap> + FailedSymsForQuery; + }; + + WaitingOnGraph::ExternalState + IL_getSymbolState(JITDylib *JD, NonOwningSymbolStringPtr Name); + + template + void IL_collectQueries(JITDylib::AsynchronousSymbolQuerySet &Qs, + WaitingOnGraph::ContainerElementsMap &QualifiedSymbols, + UpdateSymbolFn &&UpdateSymbol, + UpdateQueryFn &&UpdateQuery); + + Expected IL_emit(MaterializationResponsibility &MR, + WaitingOnGraph::SimplifyResult SR); LLVM_ABI Error OL_notifyEmitted(MaterializationResponsibility &MR, ArrayRef EmittedDeps); @@ -1830,6 +1813,7 @@ class ExecutionSession { std::vector ResourceManagers; std::vector JDs; + WaitingOnGraph G; // FIXME: Remove this (and runOutstandingMUs) once the linking layer works // with callbacks from asynchronous queries. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h b/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h new file mode 100644 index 0000000000000..a5b533351d4d0 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h @@ -0,0 +1,622 @@ +//===------ WaitingOnGraph.h - ORC symbol dependence graph ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines WaitingOnGraph and related utilities. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_EXECUTIONENGINE_ORC_WAITINGONGRAPH_H +#define LLVM_EXECUTIONENGINE_ORC_WAITINGONGRAPH_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace llvm::orc::detail { + +class WaitingOnGraphTest; + +/// WaitingOnGraph class template. +/// +/// This type is intended to provide efficient dependence tracking for Symbols +/// in an ORC program. +/// +/// WaitingOnGraph models a directed graph with four partitions: +/// 1. Not-yet-emitted nodes: Nodes identified as waited-on in an emit +/// operation. +/// 2. Emitted nodes: Nodes emitted and waiting on some non-empty set of +/// other nodes. +/// 3. Ready nodes: Nodes emitted and not waiting on any other nodes +/// (either because they weren't waiting on any nodes when they were +/// emitted, or because all transitively waited-on nodes have since +/// been emitted). +/// 4. Failed nodes: Nodes that have been marked as failed-to-emit, and +/// nodes that were found to transitively wait-on some failed node. +/// +/// Nodes are added to the graph by *emit* and *fail* operations. +/// +/// The *emit* operation takes a bipartite *local dependence graph* as an +/// argument and returns... +/// a. the set of nodes (both existing and newly added from the local +/// dependence graph) whose waiting-on set is the empty set, and... +/// b. the set of newly added nodes that are found to depend on failed +/// nodes. +/// +/// The *fail* operation takes a set of failed nodes and returns the set of +/// Emitted nodes that were waiting on the failed nodes. +/// +/// The concrete representation adopts several approaches for efficiency: +/// +/// 1. Only *Emitted* and *Not-yet-emitted* nodes are represented explicitly. +/// *Ready* and *Failed* nodes are represented by the values returned by the +/// GetExternalStateFn argument to *emit*. +/// +/// 2. Labels are (*Container*, *Element*) pairs that are intended to represent +/// ORC symbols (ORC uses types Container = JITDylib, +/// Element = NonOwningSymbolStringPtr). The internal representation of the +/// graph is optimized on the assumption that there are many more Elements +/// (symbol names) than Containers (JITDylibs) used to construct the labels. +/// (Consider for example the common case where most JIT'd code is placed in +/// a single "main" JITDylib). +/// +/// 3. The data structure stores *SuperNodes* which have multiple labels. This +/// reduces the number of nodes and edges in the graph in the common case +/// where many JIT symbols have the same set of dependencies. SuperNodes are +/// coalesced when their dependence sets become equal. +/// +/// 4. The *simplify* method can be applied to an initial *local dependence +/// graph* (as a list of SuperNodes) to eliminate any internal dependence +/// relationships that would have to be propagated internally by *emit*. +/// Access to the WaitingOnGraph is assumed to be guarded by a mutex (ORC +/// will access it from multiple threads) so this allows some pre-processing +/// to be performed outside the mutex. +template class WaitingOnGraph { + friend class WaitingOnGraphTest; + +public: + using ContainerId = ContainerIdT; + using ElementId = ElementIdT; + using ElementSet = DenseSet; + using ContainerElementsMap = DenseMap; + + class SuperNode { + friend class WaitingOnGraph; + friend class WaitingOnGraphTest; + + public: + SuperNode(ContainerElementsMap Defs, ContainerElementsMap Deps) + : Defs(std::move(Defs)), Deps(std::move(Deps)) {} + ContainerElementsMap &defs() { return Defs; } + const ContainerElementsMap &defs() const { return Defs; } + ContainerElementsMap &deps() { return Deps; } + const ContainerElementsMap &deps() const { return Deps; } + + private: + ContainerElementsMap Defs; + ContainerElementsMap Deps; + }; + +private: + using ElemToSuperNodeMap = + DenseMap>; + + using SuperNodeDepsMap = DenseMap>; + + class Coalescer { + public: + std::unique_ptr addOrCreateSuperNode(ContainerElementsMap Defs, + ContainerElementsMap Deps) { + auto H = getHash(Deps); + if (auto *ExistingSN = findCanonicalSuperNode(H, Deps)) { + for (auto &[Container, Elems] : Defs) { + auto &DstCElems = ExistingSN->Defs[Container]; + [[maybe_unused]] size_t ExpectedSize = + DstCElems.size() + Elems.size(); + DstCElems.insert(Elems.begin(), Elems.end()); + assert(DstCElems.size() == ExpectedSize); + } + return nullptr; + } + + auto NewSN = + std::make_unique(std::move(Defs), std::move(Deps)); + CanonicalSNs[H].push_back(NewSN.get()); + return NewSN; + } + + void coalesce(std::vector> &SNs, + ElemToSuperNodeMap &ElemToSN) { + for (size_t I = 0; I != SNs.size();) { + auto &SN = SNs[I]; + auto H = getHash(SN->Deps); + if (auto *CanonicalSN = findCanonicalSuperNode(H, SN->Deps)) { + for (auto &[Container, Elems] : SN->Defs) { + CanonicalSN->Defs[Container].insert(Elems.begin(), Elems.end()); + auto &ContainerElemToSN = ElemToSN[Container]; + for (auto &Elem : Elems) + ContainerElemToSN[Elem] = CanonicalSN; + } + std::swap(SN, SNs.back()); + SNs.pop_back(); + } else { + CanonicalSNs[H].push_back(SN.get()); + ++I; + } + } + } + + template void remove(Pred &&Remove) { + for (auto &[Hash, SNs] : CanonicalSNs) { + bool Found = false; + for (size_t I = 0; I != SNs.size(); ++I) { + if (Remove(SNs[I])) { + std::swap(SNs[I], SNs.back()); + SNs.pop_back(); + Found = true; + break; + } + } + if (Found) { + if (SNs.empty()) + CanonicalSNs.erase(Hash); + break; + } + } + } + + private: + hash_code getHash(const ContainerElementsMap &M) { + SmallVector SortedContainers; + SortedContainers.reserve(M.size()); + for (auto &[Container, Elems] : M) + SortedContainers.push_back(Container); + llvm::sort(SortedContainers); + hash_code Hash(0); + for (auto &Container : SortedContainers) { + auto &ContainerElems = M.at(Container); + SmallVector SortedElems(ContainerElems.begin(), + ContainerElems.end()); + llvm::sort(SortedElems); + Hash = hash_combine( + Hash, Container, + hash_combine_range(SortedElems.begin(), SortedElems.end())); + } + return Hash; + } + + SuperNode *findCanonicalSuperNode(hash_code H, + const ContainerElementsMap &M) { + for (auto *SN : CanonicalSNs[H]) + if (SN->Deps == M) + return SN; + return nullptr; + } + + DenseMap> CanonicalSNs; + }; + +public: + /// Build SuperNodes from (definition-set, dependence-set) pairs. + /// + /// Coalesces definition-sets with identical dependence-sets. + class SuperNodeBuilder { + public: + void add(ContainerElementsMap Defs, ContainerElementsMap Deps) { + if (Defs.empty()) + return; + // Remove any self-reference. + SmallVector ToRemove; + for (auto &[Container, Elems] : Defs) { + assert(!Elems.empty() && "Defs for container must not be empty"); + auto I = Deps.find(Container); + if (I == Deps.end()) + continue; + auto &DepsForContainer = I->second; + for (auto &Elem : Elems) + DepsForContainer.erase(Elem); + if (DepsForContainer.empty()) + ToRemove.push_back(Container); + } + for (auto &Container : ToRemove) + Deps.erase(Container); + if (auto SN = C.addOrCreateSuperNode(std::move(Defs), std::move(Deps))) + SNs.push_back(std::move(SN)); + } + std::vector> takeSuperNodes() { + return std::move(SNs); + } + + private: + Coalescer C; + std::vector> SNs; + }; + + class SimplifyResult { + friend class WaitingOnGraph; + friend class WaitingOnGraphTest; + + public: + const std::vector> &superNodes() const { + return SNs; + } + + private: + SimplifyResult(std::vector> SNs, + ElemToSuperNodeMap ElemToSN) + : SNs(std::move(SNs)), ElemToSN(std::move(ElemToSN)) {} + std::vector> SNs; + ElemToSuperNodeMap ElemToSN; + }; + + /// Preprocess a list of SuperNodes to remove all intra-SN dependencies. + static SimplifyResult simplify(std::vector> SNs) { + // Build ElemToSN map. + ElemToSuperNodeMap ElemToSN; + for (auto &SN : SNs) { + for (auto &[Container, Elements] : SN->Defs) { + auto &ContainerElemToSN = ElemToSN[Container]; + for (auto &E : Elements) + ContainerElemToSN[E] = SN.get(); + } + } + + SuperNodeDepsMap SuperNodeDeps; + hoistDeps(SuperNodeDeps, SNs, ElemToSN); + propagateSuperNodeDeps(SuperNodeDeps); + sinkDeps(SNs, SuperNodeDeps); + + // Pre-coalesce nodes. + Coalescer().coalesce(SNs, ElemToSN); + + return {std::move(SNs), std::move(ElemToSN)}; + } + + struct EmitResult { + std::vector> Ready; + std::vector> Failed; + }; + + enum class ExternalState { None, Ready, Failed }; + + /// Add the given SuperNodes to the graph, returning any SuperNodes that + /// move to the Ready or Failed states as a result. + /// The GetExternalState function is used to represent SuperNodes that have + /// already become Ready or Failed (since such nodes are not explicitly + /// represented in the graph). + template + EmitResult emit(SimplifyResult SR, GetExternalStateFn &&GetExternalState) { + auto NewSNs = std::move(SR.SNs); + auto ElemToNewSN = std::move(SR.ElemToSN); + + // First process any dependencies on nodes with external state. + auto FailedSNs = processExternalDeps(NewSNs, GetExternalState); + + // Collect the PendingSNs whose dep sets are about to be modified. + std::vector> ModifiedPendingSNs; + for (size_t I = 0; I != PendingSNs.size();) { + auto &SN = PendingSNs[I]; + bool Remove = false; + for (auto &[Container, Elems] : SN->Deps) { + auto I = ElemToNewSN.find(Container); + if (I == ElemToNewSN.end()) + continue; + for (auto Elem : Elems) { + if (I->second.contains(Elem)) { + Remove = true; + break; + } + } + if (Remove) + break; + } + if (Remove) { + ModifiedPendingSNs.push_back(std::move(SN)); + std::swap(SN, PendingSNs.back()); + PendingSNs.pop_back(); + } else + ++I; + } + + // Remove cycles from the graphs. + SuperNodeDepsMap SuperNodeDeps; + hoistDeps(SuperNodeDeps, ModifiedPendingSNs, ElemToNewSN); + + CoalesceToPendingSNs.remove( + [&](SuperNode *SN) { return SuperNodeDeps.count(SN); }); + + hoistDeps(SuperNodeDeps, NewSNs, ElemToPendingSN); + propagateSuperNodeDeps(SuperNodeDeps); + sinkDeps(NewSNs, SuperNodeDeps); + sinkDeps(ModifiedPendingSNs, SuperNodeDeps); + + // Process supernodes. Pending first, since we'll update PendingSNs when we + // incorporate NewSNs. + std::vector> ReadyNodes, FailedNodes; + processReadyOrFailed(ModifiedPendingSNs, ReadyNodes, FailedNodes, + SuperNodeDeps, ElemToPendingSN, FailedSNs); + processReadyOrFailed(NewSNs, ReadyNodes, FailedNodes, SuperNodeDeps, + ElemToNewSN, FailedSNs); + + CoalesceToPendingSNs.coalesce(ModifiedPendingSNs, ElemToPendingSN); + CoalesceToPendingSNs.coalesce(NewSNs, ElemToPendingSN); + + // Integrate remaining ModifiedPendingSNs and NewSNs into PendingSNs. + for (auto &SN : ModifiedPendingSNs) + PendingSNs.push_back(std::move(SN)); + + // Update ElemToPendingSN for the remaining elements. + for (auto &SN : NewSNs) { + for (auto &[Container, Elems] : SN->Defs) { + auto &Row = ElemToPendingSN[Container]; + for (auto &Elem : Elems) + Row[Elem] = SN.get(); + } + PendingSNs.push_back(std::move(SN)); + } + + return {std::move(ReadyNodes), std::move(FailedNodes)}; + } + + /// Identify the given symbols as Failed. + /// The elements of the Failed map will not be included in the returned + /// result, so clients should take whatever actions are needed to mark + /// this as failed in their external representation. + std::vector> + fail(const ContainerElementsMap &Failed) { + std::vector> FailedSNs; + + for (size_t I = 0; I != PendingSNs.size();) { + auto &PendingSN = PendingSNs[I]; + bool FailPendingSN = false; + for (auto &[Container, Elems] : PendingSN->Deps) { + if (FailPendingSN) + break; + auto I = Failed.find(Container); + if (I == Failed.end()) + continue; + for (auto &Elem : Elems) { + if (I->second.count(Elem)) { + FailPendingSN = true; + break; + } + } + } + if (FailPendingSN) { + FailedSNs.push_back(std::move(PendingSN)); + PendingSN = std::move(PendingSNs.back()); + PendingSNs.pop_back(); + } else + ++I; + } + + for (auto &SN : FailedSNs) { + CoalesceToPendingSNs.remove( + [&](SuperNode *SNC) { return SNC == SN.get(); }); + for (auto &[Container, Elems] : SN->Defs) { + assert(ElemToPendingSN.count(Container)); + auto &CElems = ElemToPendingSN[Container]; + for (auto &Elem : Elems) + CElems.erase(Elem); + if (CElems.empty()) + ElemToPendingSN.erase(Container); + } + } + + return FailedSNs; + } + + bool validate(raw_ostream &Log) { + bool AllGood = true; + auto ErrLog = [&]() -> raw_ostream & { + AllGood = false; + return Log; + }; + + size_t DefCount = 0; + for (auto &PendingSN : PendingSNs) { + if (PendingSN->Deps.empty()) + ErrLog() << "Pending SN " << PendingSN.get() << " has empty dep set.\n"; + else { + bool BadElem = false; + for (auto &[Container, Elems] : PendingSN->Deps) { + auto I = ElemToPendingSN.find(Container); + if (I == ElemToPendingSN.end()) + continue; + if (Elems.empty()) + ErrLog() << "Pending SN " << PendingSN.get() + << " has dependence map entry for " << Container + << " with empty element set.\n"; + for (auto &Elem : Elems) { + if (I->second.count(Elem)) { + ErrLog() << "Pending SN " << PendingSN.get() + << " has dependence on emitted element ( " << Container + << ", " << Elem << ")\n"; + BadElem = true; + break; + } + } + if (BadElem) + break; + } + } + + for (auto &[Container, Elems] : PendingSN->Defs) { + if (Elems.empty()) + ErrLog() << "Pending SN " << PendingSN.get() + << " has def map entry for " << Container + << " with empty element set.\n"; + DefCount += Elems.size(); + auto I = ElemToPendingSN.find(Container); + if (I == ElemToPendingSN.end()) + ErrLog() << "Pending SN " << PendingSN.get() << " has " + << Elems.size() << " defs in container " << Container + << " not covered by ElemsToPendingSN.\n"; + else { + for (auto &Elem : Elems) { + auto J = I->second.find(Elem); + if (J == I->second.end()) + ErrLog() << "Pending SN " << PendingSN.get() << " has element (" + << Container << ", " << Elem + << ") not covered by ElemsToPendingSN.\n"; + else if (J->second != PendingSN.get()) + ErrLog() << "ElemToPendingSN value invalid for (" << Container + << ", " << Elem << ")\n"; + } + } + } + } + + size_t DefCount2 = 0; + for (auto &[Container, Elems] : ElemToPendingSN) + DefCount2 += Elems.size(); + + assert(DefCount2 >= DefCount); + if (DefCount2 != DefCount) + ErrLog() << "ElemToPendingSN contains extra elements.\n"; + + return AllGood; + } + +private: + // Replace individual dependencies with supernode dependencies. + // + // For all dependencies in SNs, if the corresponding node is defined in + // ElemToSN then remove the individual dependency and add the record the + // dependency on the corresponding supernode in SuperNodeDeps. + static void hoistDeps(SuperNodeDepsMap &SuperNodeDeps, + std::vector> &SNs, + ElemToSuperNodeMap &ElemToSN) { + for (auto &SN : SNs) { + auto &SNDeps = SuperNodeDeps[SN.get()]; + for (auto &[DefContainer, DefElems] : ElemToSN) { + auto I = SN->Deps.find(DefContainer); + if (I == SN->Deps.end()) + continue; + for (auto &[DefElem, DefSN] : DefElems) + if (I->second.erase(DefElem)) + SNDeps.insert(DefSN); + if (I->second.empty()) + SN->Deps.erase(I); + } + } + } + + // Compute transitive closure of deps for each node. + static void propagateSuperNodeDeps(SuperNodeDepsMap &SuperNodeDeps) { + for (auto &[SN, Deps] : SuperNodeDeps) { + DenseSet Reachable({SN}); + SmallVector Worklist(Deps.begin(), Deps.end()); + + while (!Worklist.empty()) { + auto *DepSN = Worklist.pop_back_val(); + if (!Reachable.insert(DepSN).second) + continue; + auto I = SuperNodeDeps.find(DepSN); + if (I == SuperNodeDeps.end()) + continue; + for (auto *DepSNDep : I->second) + Worklist.push_back(DepSNDep); + } + + Deps = std::move(Reachable); + } + } + + // Sink SuperNode dependencies back to dependencies on individual nodes. + static void sinkDeps(std::vector> &SNs, + SuperNodeDepsMap &SuperNodeDeps) { + for (auto &SN : SNs) { + auto I = SuperNodeDeps.find(SN.get()); + if (I == SuperNodeDeps.end()) + continue; + + for (auto *DepSN : I->second) + for (auto &[Container, Elems] : DepSN->Deps) + SN->Deps[Container].insert(Elems.begin(), Elems.end()); + } + } + + template + static std::vector + processExternalDeps(std::vector> &SNs, + GetExternalStateFn &GetExternalState) { + std::vector FailedSNs; + for (auto &SN : SNs) { + bool SNHasError = false; + SmallVector ContainersToRemove; + for (auto &[Container, Elems] : SN->Deps) { + SmallVector ElemToRemove; + for (auto &Elem : Elems) { + switch (GetExternalState(Container, Elem)) { + case ExternalState::None: + break; + case ExternalState::Ready: + ElemToRemove.push_back(Elem); + break; + case ExternalState::Failed: + ElemToRemove.push_back(Elem); + SNHasError = true; + break; + } + } + for (auto &Elem : ElemToRemove) + Elems.erase(Elem); + if (Elems.empty()) + ContainersToRemove.push_back(Container); + } + for (auto &Container : ContainersToRemove) + SN->Deps.erase(Container); + if (SNHasError) + FailedSNs.push_back(SN.get()); + } + + return FailedSNs; + } + + void processReadyOrFailed(std::vector> &SNs, + std::vector> &Ready, + std::vector> &Failed, + SuperNodeDepsMap &SuperNodeDeps, + ElemToSuperNodeMap &ElemToSNs, + std::vector FailedSNs) { + for (size_t I = 0; I != SNs.size();) { + auto &SN = SNs[I]; + + bool SNFailed = false; + assert(SuperNodeDeps.count(SN.get())); + auto &SNSuperNodeDeps = SuperNodeDeps[SN.get()]; + for (auto *FailedSN : FailedSNs) { + if (FailedSN == SN.get() || SNSuperNodeDeps.count(FailedSN)) { + SNFailed = true; + break; + } + } + + bool SNReady = SN->Deps.empty(); + + if (SNReady || SNFailed) { + auto &NodeList = SNFailed ? Failed : Ready; + NodeList.push_back(std::move(SN)); + std::swap(SN, SNs.back()); + SNs.pop_back(); + } else + ++I; + } + } + + std::vector> PendingSNs; + ElemToSuperNodeMap ElemToPendingSN; + Coalescer CoalesceToPendingSNs; +}; + +} // namespace llvm::orc::detail + +#endif // LLVM_EXECUTIONENGINE_ORC_WAITINGONGRAPH_H diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index f47b7ecdcc7bb..62bb726b00050 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -1173,39 +1173,7 @@ void JITDylib::dump(raw_ostream &OS) { << " pending queries: { "; for (const auto &Q : KV.second.pendingQueries()) OS << Q.get() << " (" << Q->getRequiredState() << ") "; - OS << "}\n Defining EDU: "; - if (KV.second.DefiningEDU) { - OS << KV.second.DefiningEDU.get() << " { "; - for (auto &[Name, Flags] : KV.second.DefiningEDU->Symbols) - OS << Name << " "; - OS << "}\n"; - OS << " Dependencies:\n"; - if (!KV.second.DefiningEDU->Dependencies.empty()) { - for (auto &[DepJD, Deps] : KV.second.DefiningEDU->Dependencies) { - OS << " " << DepJD->getName() << ": [ "; - for (auto &Dep : Deps) - OS << Dep << " "; - OS << "]\n"; - } - } else - OS << " none\n"; - } else - OS << "none\n"; - OS << " Dependant EDUs:\n"; - if (!KV.second.DependantEDUs.empty()) { - for (auto &DependantEDU : KV.second.DependantEDUs) { - OS << " " << DependantEDU << ": " - << DependantEDU->JD->getName() << " { "; - for (auto &[Name, Flags] : DependantEDU->Symbols) - OS << Name << " "; - OS << "}\n"; - } - } else - OS << " none\n"; - assert((Symbols[KV.first].getState() != SymbolState::Ready || - (KV.second.pendingQueries().empty() && !KV.second.DefiningEDU && - !KV.second.DependantEDUs.empty())) && - "Stale materializing info entry"); + OS << "}\n"; } }); } @@ -2917,359 +2885,64 @@ Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR, return MR.JD.resolve(MR, Symbols); } -template -void ExecutionSession::propagateExtraEmitDeps( - std::deque Worklist, EDUInfosMap &EDUInfos, - HandleNewDepFn HandleNewDep) { - - // Iterate to a fixed-point to propagate extra-emit dependencies through the - // EDU graph. - while (!Worklist.empty()) { - auto &EDU = *Worklist.front(); - Worklist.pop_front(); - - assert(EDUInfos.count(&EDU) && "No info entry for EDU"); - auto &EDUInfo = EDUInfos[&EDU]; - - // Propagate new dependencies to users. - for (auto *UserEDU : EDUInfo.IntraEmitUsers) { - - // UserEDUInfo only present if UserEDU has its own users. - JITDylib::EmissionDepUnitInfo *UserEDUInfo = nullptr; - { - auto UserEDUInfoItr = EDUInfos.find(UserEDU); - if (UserEDUInfoItr != EDUInfos.end()) - UserEDUInfo = &UserEDUInfoItr->second; - } - - for (auto &[DepJD, Deps] : EDUInfo.NewDeps) { - auto &UserEDUDepsForJD = UserEDU->Dependencies[DepJD]; - DenseSet *UserEDUNewDepsForJD = nullptr; - for (auto Dep : Deps) { - if (UserEDUDepsForJD.insert(Dep).second) { - HandleNewDep(*UserEDU, *DepJD, Dep); - if (UserEDUInfo) { - if (!UserEDUNewDepsForJD) { - // If UserEDU has no new deps then it's not in the worklist - // yet, so add it. - if (UserEDUInfo->NewDeps.empty()) - Worklist.push_back(UserEDU); - UserEDUNewDepsForJD = &UserEDUInfo->NewDeps[DepJD]; - } - // Add (DepJD, Dep) to NewDeps. - UserEDUNewDepsForJD->insert(Dep); - } - } +WaitingOnGraph::ExternalState +ExecutionSession::IL_getSymbolState(JITDylib *JD, + NonOwningSymbolStringPtr Name) { + if (JD->State != JITDylib::Open) + return WaitingOnGraph::ExternalState::Failed; + + auto I = JD->Symbols.find_as(Name); + + // FIXME: Can we eliminate this possibility if we support query binding? + if (I == JD->Symbols.end()) + return WaitingOnGraph::ExternalState::Failed; + + if (I->second.getFlags().hasError()) + return WaitingOnGraph::ExternalState::Failed; + + if (I->second.getState() == SymbolState::Ready) + return WaitingOnGraph::ExternalState::Ready; + + return WaitingOnGraph::ExternalState::None; +} + +template +void ExecutionSession::IL_collectQueries( + JITDylib::AsynchronousSymbolQuerySet &Qs, + WaitingOnGraph::ContainerElementsMap &QualifiedSymbols, + UpdateSymbolFn &&UpdateSymbol, UpdateQueryFn &&UpdateQuery) { + + for (auto &[JD, Symbols] : QualifiedSymbols) { + // IL_emit and JITDylib removal are synchronized by the session lock. + // Since JITDylib removal removes any contained nodes from the + // WaitingOnGraph, we should be able to assert that all nodes in the + // WaitingOnGraph have not been removed. + assert(JD->State == JITDylib::Open && + "WaitingOnGraph includes definition in defunct JITDylib"); + for (auto &Symbol : Symbols) { + // Update symbol table. + auto I = JD->Symbols.find_as(Symbol); + assert(I != JD->Symbols.end() && + "Failed Symbol missing from JD symbol table"); + auto &Entry = I->second; + UpdateSymbol(Entry); + + // Collect queries. + auto J = JD->MaterializingInfos.find_as(Symbol); + if (J != JD->MaterializingInfos.end()) { + for (auto &Q : J->second.takeAllPendingQueries()) { + UpdateQuery(*Q, *JD, Symbol, Entry); + Qs.insert(std::move(Q)); } + JD->MaterializingInfos.erase(J); } } - - EDUInfo.NewDeps.clear(); - } -} - -// Note: This method modifies the emitted set. -ExecutionSession::EDUInfosMap ExecutionSession::simplifyDepGroups( - MaterializationResponsibility &MR, - ArrayRef EmittedDeps) { - - auto &TargetJD = MR.getTargetJITDylib(); - - // 1. Build initial EmissionDepUnit -> EmissionDepUnitInfo and - // Symbol -> EmissionDepUnit mappings. - DenseMap EDUInfos; - EDUInfos.reserve(EmittedDeps.size()); - DenseMap EDUForSymbol; - for (auto &DG : EmittedDeps) { - assert(!DG.Symbols.empty() && "DepGroup does not cover any symbols"); - - // Skip empty EDUs. - if (DG.Dependencies.empty()) - continue; - - auto TmpEDU = std::make_shared(TargetJD); - auto &EDUInfo = EDUInfos[TmpEDU.get()]; - EDUInfo.EDU = std::move(TmpEDU); - for (const auto &Symbol : DG.Symbols) { - NonOwningSymbolStringPtr NonOwningSymbol(Symbol); - assert(!EDUForSymbol.count(NonOwningSymbol) && - "Symbol should not appear in more than one SymbolDependenceGroup"); - assert(MR.getSymbols().count(Symbol) && - "Symbol in DepGroups not in the emitted set"); - auto NewlyEmittedItr = MR.getSymbols().find(Symbol); - EDUInfo.EDU->Symbols[NonOwningSymbol] = NewlyEmittedItr->second; - EDUForSymbol[NonOwningSymbol] = EDUInfo.EDU.get(); - } - } - - // 2. Build a "residual" EDU to cover all symbols that have no dependencies. - { - DenseMap ResidualSymbolFlags; - for (auto &[Sym, Flags] : MR.getSymbols()) { - if (!EDUForSymbol.count(NonOwningSymbolStringPtr(Sym))) - ResidualSymbolFlags[NonOwningSymbolStringPtr(Sym)] = Flags; - } - if (!ResidualSymbolFlags.empty()) { - auto ResidualEDU = std::make_shared(TargetJD); - ResidualEDU->Symbols = std::move(ResidualSymbolFlags); - auto &ResidualEDUInfo = EDUInfos[ResidualEDU.get()]; - ResidualEDUInfo.EDU = std::move(ResidualEDU); - - // If the residual EDU is the only one then bail out early. - if (EDUInfos.size() == 1) - return EDUInfos; - - // Otherwise add the residual EDU to the EDUForSymbol map. - for (auto &[Sym, Flags] : ResidualEDUInfo.EDU->Symbols) - EDUForSymbol[Sym] = ResidualEDUInfo.EDU.get(); - } - } - -#ifndef NDEBUG - assert(EDUForSymbol.size() == MR.getSymbols().size() && - "MR symbols not fully covered by EDUs?"); - for (auto &[Sym, Flags] : MR.getSymbols()) { - assert(EDUForSymbol.count(NonOwningSymbolStringPtr(Sym)) && - "Sym in MR not covered by EDU"); - } -#endif // NDEBUG - - // 3. Use the DepGroups array to build a graph of dependencies between - // EmissionDepUnits in this finalization. We want to remove these - // intra-finalization uses, propagating dependencies on symbols outside - // this finalization. Add EDUs to the worklist. - for (auto &DG : EmittedDeps) { - - // Skip SymbolDependenceGroups with no dependencies. - if (DG.Dependencies.empty()) - continue; - - assert(EDUForSymbol.count(NonOwningSymbolStringPtr(*DG.Symbols.begin())) && - "No EDU for DG"); - auto &EDU = - *EDUForSymbol.find(NonOwningSymbolStringPtr(*DG.Symbols.begin())) - ->second; - - for (auto &[DepJD, Deps] : DG.Dependencies) { - DenseSet NewDepsForJD; - - assert(!Deps.empty() && "Dependence set for DepJD is empty"); - - if (DepJD != &TargetJD) { - // DepJD is some other JITDylib.There can't be any intra-finalization - // edges here, so just skip. - for (auto &Dep : Deps) - NewDepsForJD.insert(NonOwningSymbolStringPtr(Dep)); - } else { - // DepJD is the Target JITDylib. Check for intra-finaliztaion edges, - // skipping any and recording the intra-finalization use instead. - for (auto &Dep : Deps) { - NonOwningSymbolStringPtr NonOwningDep(Dep); - auto I = EDUForSymbol.find(NonOwningDep); - if (I == EDUForSymbol.end()) { - if (!MR.getSymbols().count(Dep)) - NewDepsForJD.insert(NonOwningDep); - continue; - } - - if (I->second != &EDU) - EDUInfos[I->second].IntraEmitUsers.insert(&EDU); - } - } - - if (!NewDepsForJD.empty()) - EDU.Dependencies[DepJD] = std::move(NewDepsForJD); - } - } - - // 4. Build the worklist. - std::deque Worklist; - for (auto &[EDU, EDUInfo] : EDUInfos) { - // If this EDU has extra-finalization dependencies and intra-finalization - // users then add it to the worklist. - if (!EDU->Dependencies.empty()) { - auto I = EDUInfos.find(EDU); - if (I != EDUInfos.end()) { - auto &EDUInfo = I->second; - if (!EDUInfo.IntraEmitUsers.empty()) { - EDUInfo.NewDeps = EDU->Dependencies; - Worklist.push_back(EDU); - } - } - } - } - - // 4. Propagate dependencies through the EDU graph. - propagateExtraEmitDeps( - Worklist, EDUInfos, - [](JITDylib::EmissionDepUnit &, JITDylib &, NonOwningSymbolStringPtr) {}); - - return EDUInfos; -} - -void ExecutionSession::IL_makeEDUReady( - std::shared_ptr EDU, - JITDylib::AsynchronousSymbolQuerySet &Queries) { - - // The symbols for this EDU are ready. - auto &JD = *EDU->JD; - - for (auto &[Sym, Flags] : EDU->Symbols) { - assert(JD.Symbols.count(SymbolStringPtr(Sym)) && - "JD does not have an entry for Sym"); - auto &Entry = JD.Symbols[SymbolStringPtr(Sym)]; - - assert(((Entry.getFlags().hasMaterializationSideEffectsOnly() && - Entry.getState() == SymbolState::Materializing) || - Entry.getState() == SymbolState::Resolved || - Entry.getState() == SymbolState::Emitted) && - "Emitting from state other than Resolved"); - - Entry.setState(SymbolState::Ready); - - auto MII = JD.MaterializingInfos.find(SymbolStringPtr(Sym)); - - // Check for pending queries. - if (MII == JD.MaterializingInfos.end()) - continue; - auto &MI = MII->second; - - for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) { - Q->notifySymbolMetRequiredState(SymbolStringPtr(Sym), Entry.getSymbol()); - if (Q->isComplete()) - Queries.insert(Q); - Q->removeQueryDependence(JD, SymbolStringPtr(Sym)); - } - - JD.MaterializingInfos.erase(MII); - } - - JD.shrinkMaterializationInfoMemory(); -} - -void ExecutionSession::IL_makeEDUEmitted( - std::shared_ptr EDU, - JITDylib::AsynchronousSymbolQuerySet &Queries) { - - // The symbols for this EDU are emitted, but not ready. - auto &JD = *EDU->JD; - - for (auto &[Sym, Flags] : EDU->Symbols) { - assert(JD.Symbols.count(SymbolStringPtr(Sym)) && - "JD does not have an entry for Sym"); - auto &Entry = JD.Symbols[SymbolStringPtr(Sym)]; - - assert(((Entry.getFlags().hasMaterializationSideEffectsOnly() && - Entry.getState() == SymbolState::Materializing) || - Entry.getState() == SymbolState::Resolved || - Entry.getState() == SymbolState::Emitted) && - "Emitting from state other than Resolved"); - - if (Entry.getState() == SymbolState::Emitted) { - // This was already emitted, so we can skip the rest of this loop. -#ifndef NDEBUG - for (auto &[Sym, Flags] : EDU->Symbols) { - assert(JD.Symbols.count(SymbolStringPtr(Sym)) && - "JD does not have an entry for Sym"); - auto &Entry = JD.Symbols[SymbolStringPtr(Sym)]; - assert(Entry.getState() == SymbolState::Emitted && - "Symbols for EDU in inconsistent state"); - assert(JD.MaterializingInfos.count(SymbolStringPtr(Sym)) && - "Emitted symbol has no MI"); - auto MI = JD.MaterializingInfos[SymbolStringPtr(Sym)]; - assert(MI.takeQueriesMeeting(SymbolState::Emitted).empty() && - "Already-emitted symbol has waiting-on-emitted queries"); - } -#endif // NDEBUG - break; - } - - Entry.setState(SymbolState::Emitted); - auto &MI = JD.MaterializingInfos[SymbolStringPtr(Sym)]; - MI.DefiningEDU = EDU; - - for (auto &Q : MI.takeQueriesMeeting(SymbolState::Emitted)) { - Q->notifySymbolMetRequiredState(SymbolStringPtr(Sym), Entry.getSymbol()); - if (Q->isComplete()) - Queries.insert(Q); - } - } - - for (auto &[DepJD, Deps] : EDU->Dependencies) { - for (auto &Dep : Deps) - DepJD->MaterializingInfos[SymbolStringPtr(Dep)].DependantEDUs.insert( - EDU.get()); } } -/// Removes the given dependence from EDU. If EDU's dependence set becomes -/// empty then this function adds an entry for it to the EDUInfos map. -/// Returns true if a new EDUInfosMap entry is added. -bool ExecutionSession::IL_removeEDUDependence(JITDylib::EmissionDepUnit &EDU, - JITDylib &DepJD, - NonOwningSymbolStringPtr DepSym, - EDUInfosMap &EDUInfos) { - assert(EDU.Dependencies.count(&DepJD) && - "JD does not appear in Dependencies of DependantEDU"); - assert(EDU.Dependencies[&DepJD].count(DepSym) && - "Symbol does not appear in Dependencies of DependantEDU"); - auto &JDDeps = EDU.Dependencies[&DepJD]; - JDDeps.erase(DepSym); - if (JDDeps.empty()) { - EDU.Dependencies.erase(&DepJD); - if (EDU.Dependencies.empty()) { - // If the dependencies set has become empty then EDU _may_ be ready - // (we won't know for sure until we've propagated the extra-emit deps). - // Create an EDUInfo for it (if it doesn't have one already) so that - // it'll be visited after propagation. - auto &DepEDUInfo = EDUInfos[&EDU]; - if (!DepEDUInfo.EDU) { - assert(EDU.JD->Symbols.count( - SymbolStringPtr(EDU.Symbols.begin()->first)) && - "Missing symbol entry for first symbol in EDU"); - auto DepEDUFirstMI = EDU.JD->MaterializingInfos.find( - SymbolStringPtr(EDU.Symbols.begin()->first)); - assert(DepEDUFirstMI != EDU.JD->MaterializingInfos.end() && - "Missing MI for first symbol in DependantEDU"); - DepEDUInfo.EDU = DepEDUFirstMI->second.DefiningEDU; - return true; - } - } - } - return false; -} - -Error ExecutionSession::makeJDClosedError(JITDylib::EmissionDepUnit &EDU, - JITDylib &ClosedJD) { - SymbolNameSet FailedSymbols; - for (auto &[Sym, Flags] : EDU.Symbols) - FailedSymbols.insert(SymbolStringPtr(Sym)); - SymbolDependenceMap BadDeps; - for (auto &Dep : EDU.Dependencies[&ClosedJD]) - BadDeps[&ClosedJD].insert(SymbolStringPtr(Dep)); - return make_error( - ClosedJD.getExecutionSession().getSymbolStringPool(), EDU.JD, - std::move(FailedSymbols), std::move(BadDeps), - ClosedJD.getName() + " is closed"); -} - -Error ExecutionSession::makeUnsatisfiedDepsError(JITDylib::EmissionDepUnit &EDU, - JITDylib &BadJD, - SymbolNameSet BadDeps) { - SymbolNameSet FailedSymbols; - for (auto &[Sym, Flags] : EDU.Symbols) - FailedSymbols.insert(SymbolStringPtr(Sym)); - SymbolDependenceMap BadDepsMap; - BadDepsMap[&BadJD] = std::move(BadDeps); - return make_error( - BadJD.getExecutionSession().getSymbolStringPool(), &BadJD, - std::move(FailedSymbols), std::move(BadDepsMap), - "dependencies removed or in error state"); -} - -Expected +Expected ExecutionSession::IL_emit(MaterializationResponsibility &MR, - EDUInfosMap EDUInfos) { + WaitingOnGraph::SimplifyResult SR) { if (MR.RT->isDefunct()) return make_error(MR.RT); @@ -3279,169 +2952,50 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR, return make_error("JITDylib " + TargetJD.getName() + " is defunct", inconvertibleErrorCode()); + #ifdef EXPENSIVE_CHECKS verifySessionState("entering ExecutionSession::IL_emit"); #endif - // Walk all EDUs: - // 1. Verifying that dependencies are available (not removed or in the error - // state. - // 2. Removing any dependencies that are already Ready. - // 3. Lifting any EDUs for Emitted symbols into the EDUInfos map. - // 4. Finding any dependant EDUs and lifting them into the EDUInfos map. - std::deque Worklist; - for (auto &[EDU, _] : EDUInfos) - Worklist.push_back(EDU); - - for (auto *EDU : Worklist) { - auto *EDUInfo = &EDUInfos[EDU]; - - SmallVector DepJDsToRemove; - for (auto &[DepJD, Deps] : EDU->Dependencies) { - if (DepJD->State != JITDylib::Open) - return makeJDClosedError(*EDU, *DepJD); - - SymbolNameSet BadDeps; - SmallVector DepsToRemove; - for (auto &Dep : Deps) { - auto DepEntryItr = DepJD->Symbols.find(SymbolStringPtr(Dep)); - - // If this dep has been removed or moved to the error state then add it - // to the bad deps set. We aggregate these bad deps for more - // comprehensive error messages. - if (DepEntryItr == DepJD->Symbols.end() || - DepEntryItr->second.getFlags().hasError()) { - BadDeps.insert(SymbolStringPtr(Dep)); - continue; - } - - // If this dep isn't emitted yet then just add it to the NewDeps set to - // be propagated. - auto &DepEntry = DepEntryItr->second; - if (DepEntry.getState() < SymbolState::Emitted) { - EDUInfo->NewDeps[DepJD].insert(Dep); - continue; - } - - // This dep has been emitted, so add it to the list to be removed from - // EDU. - DepsToRemove.push_back(Dep); - - // If Dep is Ready then there's nothing further to do. - if (DepEntry.getState() == SymbolState::Ready) { - assert(!DepJD->MaterializingInfos.count(SymbolStringPtr(Dep)) && - "Unexpected MaterializationInfo attached to ready symbol"); - continue; - } - - // If we get here then Dep is Emitted. We need to look up its defining - // EDU and add this EDU to the defining EDU's list of users (this means - // creating an EDUInfos entry if the defining EDU doesn't have one - // already). - assert(DepJD->MaterializingInfos.count(SymbolStringPtr(Dep)) && - "Expected MaterializationInfo for emitted dependency"); - auto &DepMI = DepJD->MaterializingInfos[SymbolStringPtr(Dep)]; - assert(DepMI.DefiningEDU && - "Emitted symbol does not have a defining EDU"); - assert(DepMI.DependantEDUs.empty() && - "Already-emitted symbol has dependant EDUs?"); - auto &DepEDUInfo = EDUInfos[DepMI.DefiningEDU.get()]; - if (!DepEDUInfo.EDU) { - // No EDUInfo yet -- build initial entry, and reset the EDUInfo - // pointer, which we will have invalidated. - EDUInfo = &EDUInfos[EDU]; - DepEDUInfo.EDU = DepMI.DefiningEDU; - for (auto &[DepDepJD, DepDeps] : DepEDUInfo.EDU->Dependencies) { - if (DepDepJD == &TargetJD) { - for (auto &DepDep : DepDeps) - if (!MR.getSymbols().count(SymbolStringPtr(DepDep))) - DepEDUInfo.NewDeps[DepDepJD].insert(DepDep); - } else - DepEDUInfo.NewDeps[DepDepJD] = DepDeps; - } - } - DepEDUInfo.IntraEmitUsers.insert(EDU); - } - - // Some dependencies were removed or in an error state -- error out. - if (!BadDeps.empty()) - return makeUnsatisfiedDepsError(*EDU, *DepJD, std::move(BadDeps)); - - // Remove the emitted / ready deps from DepJD. - for (auto &Dep : DepsToRemove) - Deps.erase(Dep); + auto ER = G.emit(std::move(SR), + [this](JITDylib *JD, NonOwningSymbolStringPtr Name) { + return IL_getSymbolState(JD, Name); + }); - // If there are no further deps in DepJD then flag it for removal too. - if (Deps.empty()) - DepJDsToRemove.push_back(DepJD); - } - - // Remove any JDs whose dependence sets have become empty. - for (auto &DepJD : DepJDsToRemove) { - assert(EDU->Dependencies.count(DepJD) && - "Trying to remove non-existent dep entries"); - EDU->Dependencies.erase(DepJD); - } - - // Now look for users of this EDU. - for (auto &[Sym, Flags] : EDU->Symbols) { - assert(TargetJD.Symbols.count(SymbolStringPtr(Sym)) && - "Sym not present in symbol table"); - assert((TargetJD.Symbols[SymbolStringPtr(Sym)].getState() == - SymbolState::Resolved || - TargetJD.Symbols[SymbolStringPtr(Sym)] - .getFlags() - .hasMaterializationSideEffectsOnly()) && - "Emitting symbol not in the resolved state"); - assert(!TargetJD.Symbols[SymbolStringPtr(Sym)].getFlags().hasError() && - "Symbol is already in an error state"); - - auto MII = TargetJD.MaterializingInfos.find(SymbolStringPtr(Sym)); - if (MII == TargetJD.MaterializingInfos.end() || - MII->second.DependantEDUs.empty()) - continue; + EmitQueries EQ; - for (auto &DependantEDU : MII->second.DependantEDUs) { - if (IL_removeEDUDependence(*DependantEDU, TargetJD, Sym, EDUInfos)) - EDUInfo = &EDUInfos[EDU]; - EDUInfo->IntraEmitUsers.insert(DependantEDU); - } - MII->second.DependantEDUs.clear(); - } - } - - Worklist.clear(); - for (auto &[EDU, EDUInfo] : EDUInfos) { - if (!EDUInfo.IntraEmitUsers.empty() && !EDU->Dependencies.empty()) { - if (EDUInfo.NewDeps.empty()) - EDUInfo.NewDeps = EDU->Dependencies; - Worklist.push_back(EDU); - } - } - - propagateExtraEmitDeps( - Worklist, EDUInfos, - [](JITDylib::EmissionDepUnit &EDU, JITDylib &JD, - NonOwningSymbolStringPtr Sym) { - JD.MaterializingInfos[SymbolStringPtr(Sym)].DependantEDUs.insert(&EDU); - }); + // Handle failed queries. + for (auto &SN : ER.Failed) + IL_collectQueries( + EQ.Failed, SN->defs(), + [](JITDylib::SymbolTableEntry &E) { + E.setFlags(E.getFlags() = JITSymbolFlags::HasError); + }, + [&](AsynchronousSymbolQuery &Q, JITDylib &JD, + NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) { + auto &FS = EQ.FailedSymsForQuery[&Q]; + if (!FS) + FS = std::make_shared(); + (*FS)[&JD].insert(SymbolStringPtr(Name)); + }); - JITDylib::AsynchronousSymbolQuerySet CompletedQueries; + for (auto &FQ : EQ.Failed) + FQ->detach(); - // Extract completed queries and lodge not-yet-ready EDUs in the - // session. - for (auto &[EDU, EDUInfo] : EDUInfos) { - if (EDU->Dependencies.empty()) - IL_makeEDUReady(std::move(EDUInfo.EDU), CompletedQueries); - else - IL_makeEDUEmitted(std::move(EDUInfo.EDU), CompletedQueries); - } + for (auto &SN : ER.Ready) + IL_collectQueries( + EQ.Updated, SN->defs(), + [](JITDylib::SymbolTableEntry &E) { E.setState(SymbolState::Ready); }, + [](AsynchronousSymbolQuery &Q, JITDylib &JD, + NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) { + Q.notifySymbolMetRequiredState(SymbolStringPtr(Name), E.getSymbol()); + }); #ifdef EXPENSIVE_CHECKS verifySessionState("exiting ExecutionSession::IL_emit"); #endif - return std::move(CompletedQueries); + return std::move(EQ); } Error ExecutionSession::OL_notifyEmitted( @@ -3471,40 +3025,127 @@ Error ExecutionSession::OL_notifyEmitted( } #endif // NDEBUG - auto EDUInfos = simplifyDepGroups(MR, DepGroups); + std::vector> SNs; + WaitingOnGraph::ContainerElementsMap Residual; + { + auto &JDResidual = Residual[&MR.getTargetJITDylib()]; + for (auto &[Name, Flags] : MR.getSymbols()) + JDResidual.insert(NonOwningSymbolStringPtr(Name)); + + for (auto &SDG : DepGroups) { + WaitingOnGraph::ContainerElementsMap Defs; + assert(!SDG.Symbols.empty()); + auto &JDDefs = Defs[&MR.getTargetJITDylib()]; + for (auto &Def : SDG.Symbols) { + JDDefs.insert(NonOwningSymbolStringPtr(Def)); + JDResidual.erase(NonOwningSymbolStringPtr(Def)); + } + WaitingOnGraph::ContainerElementsMap Deps; + if (!SDG.Dependencies.empty()) { + for (auto &[JD, Syms] : SDG.Dependencies) { + auto &JDDeps = Deps[JD]; + for (auto &Dep : Syms) + JDDeps.insert(NonOwningSymbolStringPtr(Dep)); + } + } + SNs.push_back(std::make_unique( + std::move(Defs), std::move(Deps))); + } + if (!JDResidual.empty()) + SNs.push_back(std::make_unique( + std::move(Residual), WaitingOnGraph::ContainerElementsMap())); + } + + auto SR = WaitingOnGraph::simplify(std::move(SNs)); LLVM_DEBUG({ dbgs() << " Simplified dependencies:\n"; - for (auto &[EDU, EDUInfo] : EDUInfos) { - dbgs() << " Symbols: { "; - for (auto &[Sym, Flags] : EDU->Symbols) - dbgs() << Sym << " "; - dbgs() << "}, Dependencies: { "; - for (auto &[DepJD, Deps] : EDU->Dependencies) { - dbgs() << "(" << DepJD->getName() << ", { "; - for (auto &Dep : Deps) - dbgs() << Dep << " "; - dbgs() << "}) "; + for (auto &SN : SR.superNodes()) { + + auto SortedLibs = [](WaitingOnGraph::ContainerElementsMap &C) { + std::vector JDs; + for (auto &[JD, _] : C) + JDs.push_back(JD); + llvm::sort(JDs, [](const JITDylib *LHS, const JITDylib *RHS) { + return LHS->getName() < RHS->getName(); + }); + return JDs; + }; + + auto SortedNames = [](WaitingOnGraph::ElementSet &Elems) { + std::vector Names(Elems.begin(), Elems.end()); + llvm::sort(Names, [](const NonOwningSymbolStringPtr &LHS, + const NonOwningSymbolStringPtr &RHS) { + return *LHS < *RHS; + }); + return Names; + }; + + dbgs() << " Defs: {"; + for (auto *JD : SortedLibs(SN->defs())) { + dbgs() << " (" << JD->getName() << ", ["; + for (auto &Sym : SortedNames(SN->defs()[JD])) + dbgs() << " " << Sym; + dbgs() << " ])"; } - dbgs() << "}\n"; + dbgs() << " }, Deps: {"; + for (auto *JD : SortedLibs(SN->deps())) { + dbgs() << " (" << JD->getName() << ", ["; + for (auto &Sym : SortedNames(SN->deps()[JD])) + dbgs() << " " << Sym; + dbgs() << " ])"; + } + dbgs() << " }\n"; } }); - - auto CompletedQueries = - runSessionLocked([&]() { return IL_emit(MR, EDUInfos); }); + auto EmitQueries = + runSessionLocked([&]() { return IL_emit(MR, std::move(SR)); }); // On error bail out. - if (!CompletedQueries) - return CompletedQueries.takeError(); + if (!EmitQueries) + return EmitQueries.takeError(); - MR.SymbolFlags.clear(); + // Otherwise notify failed queries, and any updated queries that have been + // completed. - // Otherwise notify all the completed queries. - for (auto &Q : *CompletedQueries) { - assert(Q->isComplete() && "Q is not complete"); - Q->handleComplete(*this); + // FIXME: Get rid of error return from notifyEmitted. + SymbolDependenceMap BadDeps; + { + for (auto &FQ : EmitQueries->Failed) { + FQ->detach(); + assert(EmitQueries->FailedSymsForQuery.count(FQ.get()) && + "Missing failed symbols for query"); + auto FailedSyms = std::move(EmitQueries->FailedSymsForQuery[FQ.get()]); + for (auto &[JD, Syms] : *FailedSyms) { + auto &BadDepsForJD = BadDeps[JD]; + for (auto &Sym : Syms) + BadDepsForJD.insert(Sym); + } + FQ->handleFailed(make_error(getSymbolStringPool(), + std::move(FailedSyms))); + } + } + + for (auto &UQ : EmitQueries->Updated) + if (UQ->isComplete()) + UQ->handleComplete(*this); + + // If there are any bad dependencies then return an error. + if (!BadDeps.empty()) { + SymbolNameSet BadNames; + // Note: The name set calculated here is bogus: it includes all symbols in + // the MR, not just the ones that failed. We want to remove the error + // return path from notifyEmitted anyway, so this is just a brief + // placeholder to maintain (roughly) the current error behavior. + for (auto &[Name, Flags] : MR.getSymbols()) + BadNames.insert(Name); + MR.SymbolFlags.clear(); + return make_error( + getSymbolStringPool(), &MR.getTargetJITDylib(), std::move(BadNames), + std::move(BadDeps), "dependencies removed or in error state"); } + MR.SymbolFlags.clear(); return Error::success(); } @@ -3535,158 +3176,48 @@ ExecutionSession::IL_failSymbols(JITDylib &JD, #endif JITDylib::AsynchronousSymbolQuerySet FailedQueries; - auto FailedSymbolsMap = std::make_shared(); - auto ExtractFailedQueries = [&](JITDylib::MaterializingInfo &MI) { - JITDylib::AsynchronousSymbolQueryList ToDetach; - for (auto &Q : MI.pendingQueries()) { - // Add the query to the list to be failed and detach it. - FailedQueries.insert(Q); - ToDetach.push_back(Q); + auto Fail = [&](JITDylib *FailJD, NonOwningSymbolStringPtr FailSym) { + auto I = FailJD->Symbols.find_as(FailSym); + assert(I != FailJD->Symbols.end()); + I->second.setFlags(I->second.getFlags() | JITSymbolFlags::HasError); + auto J = FailJD->MaterializingInfos.find_as(FailSym); + if (J != FailJD->MaterializingInfos.end()) { + for (auto &Q : J->second.takeAllPendingQueries()) + FailedQueries.insert(std::move(Q)); + FailJD->MaterializingInfos.erase(J); } - for (auto &Q : ToDetach) - Q->detach(); - assert(!MI.hasQueriesPending() && "Queries still pending after detach"); }; - for (auto &Name : SymbolsToFail) { - (*FailedSymbolsMap)[&JD].insert(Name); - - // Look up the symbol to fail. - auto SymI = JD.Symbols.find(Name); - - // FIXME: Revisit this. We should be able to assert sequencing between - // ResourceTracker removal and symbol failure. - // - // It's possible that this symbol has already been removed, e.g. if a - // materialization failure happens concurrently with a ResourceTracker or - // JITDylib removal. In that case we can safely skip this symbol and - // continue. - if (SymI == JD.Symbols.end()) - continue; - auto &Sym = SymI->second; - - // If the symbol is already in the error state then we must have visited - // it earlier. - if (Sym.getFlags().hasError()) { - assert(!JD.MaterializingInfos.count(Name) && - "Symbol in error state still has MaterializingInfo"); - continue; + auto FailedSymbolsMap = std::make_shared(); + + { + auto &FailedSymsForJD = (*FailedSymbolsMap)[&JD]; + for (auto &Sym : SymbolsToFail) { + FailedSymsForJD.insert(Sym); + Fail(&JD, NonOwningSymbolStringPtr(Sym)); } + } - // Move the symbol into the error state. - Sym.setFlags(Sym.getFlags() | JITSymbolFlags::HasError); - - // FIXME: Come up with a sane mapping of state to - // presence-of-MaterializingInfo so that we can assert presence / absence - // here, rather than testing it. - auto MII = JD.MaterializingInfos.find(Name); - if (MII == JD.MaterializingInfos.end()) - continue; - - auto &MI = MII->second; - - // Collect queries to be failed for this MII. - ExtractFailedQueries(MI); - - if (MI.DefiningEDU) { - // If there is a DefiningEDU for this symbol then remove this - // symbol from it. - assert(MI.DependantEDUs.empty() && - "Symbol with DefiningEDU should not have DependantEDUs"); - assert(Sym.getState() >= SymbolState::Emitted && - "Symbol has EDU, should have been emitted"); - assert(MI.DefiningEDU->Symbols.count(NonOwningSymbolStringPtr(Name)) && - "Symbol does not appear in its DefiningEDU"); - MI.DefiningEDU->Symbols.erase(NonOwningSymbolStringPtr(Name)); - - // Remove this EDU from the dependants lists of its dependencies. - for (auto &[DepJD, DepSyms] : MI.DefiningEDU->Dependencies) { - for (auto DepSym : DepSyms) { - assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) && - "DepSym not in DepJD"); - assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) && - "DepSym has not MaterializingInfo"); - auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)]; - assert(SymMI.DependantEDUs.count(MI.DefiningEDU.get()) && - "DefiningEDU missing from DependantEDUs list of dependency"); - SymMI.DependantEDUs.erase(MI.DefiningEDU.get()); - } - } + WaitingOnGraph::ContainerElementsMap ToFail; + auto &JDToFail = ToFail[&JD]; + for (auto &Sym : SymbolsToFail) + JDToFail.insert(NonOwningSymbolStringPtr(Sym)); - MI.DefiningEDU = nullptr; - } else { - // Otherwise if there are any EDUs waiting on this symbol then move - // those symbols to the error state too, and deregister them from the - // symbols that they depend on. - // Note: We use a copy of DependantEDUs here since we'll be removing - // from the original set as we go. - for (auto &DependantEDU : MI.DependantEDUs) { - - // Remove DependantEDU from all of its users DependantEDUs lists. - for (auto &[DepJD, DepSyms] : DependantEDU->Dependencies) { - for (auto DepSym : DepSyms) { - // Skip self-reference to avoid invalidating the MI.DependantEDUs - // map. We'll clear this later. - if (DepJD == &JD && DepSym == Name) - continue; - assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) && - "DepSym not in DepJD?"); - assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) && - "DependantEDU not registered with symbol it depends on"); - auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)]; - assert(SymMI.DependantEDUs.count(DependantEDU) && - "DependantEDU missing from DependantEDUs list"); - SymMI.DependantEDUs.erase(DependantEDU); - } - } - - // Move any symbols defined by DependantEDU into the error state and - // fail any queries waiting on them. - auto &DepJD = *DependantEDU->JD; - auto DepEDUSymbols = std::move(DependantEDU->Symbols); - for (auto &[DepName, Flags] : DepEDUSymbols) { - auto DepSymItr = DepJD.Symbols.find(SymbolStringPtr(DepName)); - assert(DepSymItr != DepJD.Symbols.end() && - "Symbol not present in table"); - auto &DepSym = DepSymItr->second; - - assert(DepSym.getState() >= SymbolState::Emitted && - "Symbol has EDU, should have been emitted"); - assert(!DepSym.getFlags().hasError() && - "Symbol is already in the error state?"); - DepSym.setFlags(DepSym.getFlags() | JITSymbolFlags::HasError); - (*FailedSymbolsMap)[&DepJD].insert(SymbolStringPtr(DepName)); - - // This symbol has a defining EDU so its MaterializingInfo object must - // exist. - auto DepMIItr = - DepJD.MaterializingInfos.find(SymbolStringPtr(DepName)); - assert(DepMIItr != DepJD.MaterializingInfos.end() && - "Symbol has defining EDU but not MaterializingInfo"); - auto &DepMI = DepMIItr->second; - assert(DepMI.DefiningEDU.get() == DependantEDU && - "Bad EDU dependence edge"); - assert(DepMI.DependantEDUs.empty() && - "Symbol was emitted, should not have any DependantEDUs"); - ExtractFailedQueries(DepMI); - DepJD.MaterializingInfos.erase(SymbolStringPtr(DepName)); - } + auto FailedSNs = G.fail(ToFail); - DepJD.shrinkMaterializationInfoMemory(); + for (auto &SN : FailedSNs) { + for (auto &[FailJD, Defs] : SN->defs()) { + auto &FailedSymsForFailJD = (*FailedSymbolsMap)[FailJD]; + for (auto &Def : Defs) { + FailedSymsForFailJD.insert(SymbolStringPtr(Def)); + Fail(FailJD, Def); } - - MI.DependantEDUs.clear(); } - - assert(!MI.DefiningEDU && "DefiningEDU should have been reset"); - assert(MI.DependantEDUs.empty() && - "DependantEDUs should have been removed above"); - assert(!MI.hasQueriesPending() && - "Can not delete MaterializingInfo with queries pending"); - JD.MaterializingInfos.erase(Name); } - JD.shrinkMaterializationInfoMemory(); + // Detach all failed queries. + for (auto &Q : FailedQueries) + Q->detach(); #ifdef EXPENSIVE_CHECKS verifySessionState("exiting ExecutionSession::IL_failSymbols"); @@ -3721,9 +3252,11 @@ void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) { return IL_failSymbols(MR.getTargetJITDylib(), SymbolsToFail); }); - for (auto &Q : FailedQueries) + for (auto &Q : FailedQueries) { + Q->detach(); Q->handleFailed( make_error(getSymbolStringPool(), FailedSymbols)); + } } Error ExecutionSession::OL_replace(MaterializationResponsibility &MR, diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp index dec1df7da2f4a..893523ced8651 100644 --- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp +++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp @@ -448,7 +448,7 @@ Error SimpleRemoteEPC::handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes) { if (const char *ErrMsg = WFR.getOutOfBandError()) return make_error(ErrMsg, inconvertibleErrorCode()); - detail::SPSSerializableError Info; + orc::shared::detail::SPSSerializableError Info; SPSInputBuffer IB(WFR.data(), WFR.size()); if (!SPSArgList::deserialize(IB, Info)) return make_error("Could not deserialize hangup info", diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/LocalDependencyPropagation.s b/llvm/test/ExecutionEngine/JITLink/x86-64/LocalDependencyPropagation.s index 83d71cdf6fc83..529395822f5f7 100644 --- a/llvm/test/ExecutionEngine/JITLink/x86-64/LocalDependencyPropagation.s +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/LocalDependencyPropagation.s @@ -16,8 +16,7 @@ # CHECK-DAG: Symbols: { _foo }, Dependencies: { (main, { _external_func }) } # CHECK-DAG: Symbols: { _baz }, Dependencies: { (main, { _foo }) } # CHECK: Simplified dependencies: -# CHECK-DAG: Symbols: { _foo }, Dependencies: { (main, { _external_func }) } -# CHECK-DAG: Symbols: { _baz }, Dependencies: { (main, { _external_func }) } +# CHECK-DAG: Defs: { (main, [ _baz _foo ]) }, Deps: { (main, [ _external_func ]) } .section __TEXT,__text,regular,pure_instructions diff --git a/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt b/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt index a2bbb10039c9a..7e3ebc88cea63 100644 --- a/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt @@ -42,6 +42,7 @@ add_llvm_unittest(OrcJITTests SymbolStringPoolTest.cpp TaskDispatchTest.cpp ThreadSafeModuleTest.cpp + WaitingOnGraphTest.cpp WrapperFunctionUtilsTest.cpp JITLinkRedirectionManagerTest.cpp ReOptimizeLayerTest.cpp diff --git a/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp b/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp new file mode 100644 index 0000000000000..b988a78a3783a --- /dev/null +++ b/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp @@ -0,0 +1,553 @@ +//===--------- WaitingOnGraphTest.cpp - Test WaitingOnGraph APIs ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/WaitingOnGraph.h" +#include "gtest/gtest.h" + +namespace llvm::orc::detail { + +class WaitingOnGraphTest : public testing::Test { +public: + using TestGraph = WaitingOnGraph; + +protected: + using SuperNode = TestGraph::SuperNode; + using SuperNodeBuilder = TestGraph::SuperNodeBuilder; + using ContainerElementsMap = TestGraph::ContainerElementsMap; + using ElemToSuperNodeMap = TestGraph::ElemToSuperNodeMap; + using SimplifyResult = TestGraph::SimplifyResult; + using EmitResult = TestGraph::EmitResult; + + static const ContainerElementsMap &getDefs(SuperNode &SN) { return SN.Defs; } + + static const ContainerElementsMap &getDeps(SuperNode &SN) { return SN.Deps; } + + static std::vector> &getSNs(SimplifyResult &SR) { + return SR.SNs; + } + + static ElemToSuperNodeMap &getElemToSN(SimplifyResult &SR) { + return SR.ElemToSN; + } + + static std::vector> &getPendingSNs(TestGraph &G) { + return G.PendingSNs; + } + + static ContainerElementsMap merge(ContainerElementsMap M1, + const ContainerElementsMap &M2) { + ContainerElementsMap Result = std::move(M1); + for (auto &[Container, Elems] : M2) + Result[Container].insert(Elems.begin(), Elems.end()); + return Result; + } + + ContainerElementsMap + collapseDefs(std::vector> &SNs, + bool DepsMustMatch = true) { + if (SNs.empty()) + return ContainerElementsMap(); + + ContainerElementsMap Result = SNs[0]->defs(); + const ContainerElementsMap &Deps = SNs[0]->deps(); + + for (size_t I = 1; I != SNs.size(); ++I) { + assert(!DepsMustMatch || SNs[I]->deps() == Deps); + Result = merge(std::move(Result), SNs[I]->defs()); + } + + return Result; + } + + EmitResult integrate(EmitResult ER) { + for (auto &SN : ER.Ready) + for (auto &[Container, Elems] : SN->defs()) + Ready[Container].insert(Elems.begin(), Elems.end()); + for (auto &SN : ER.Failed) + for (auto &[Container, Elems] : SN->defs()) + Failed[Container].insert(Elems.begin(), Elems.end()); + return ER; + } + + EmitResult emit(SimplifyResult SR) { + return integrate(G.emit(std::move(SR), GetExternalState)); + } + + TestGraph G; + ContainerElementsMap Ready; + ContainerElementsMap Failed; + + class ExternalStateGetter { + public: + ExternalStateGetter(WaitingOnGraphTest &T) : T(T) {} + TestGraph::ExternalState operator()(TestGraph::ContainerId C, + TestGraph::ElementId E) { + { + auto I = T.Failed.find(C); + if (I != T.Failed.end()) + if (I->second.count(E)) + return TestGraph::ExternalState::Failed; + } + + { + auto I = T.Ready.find(C); + if (I != T.Ready.end()) + if (I->second.count(E)) + return TestGraph::ExternalState::Ready; + } + + return TestGraph::ExternalState::None; + } + + private: + WaitingOnGraphTest &T; + }; + + ExternalStateGetter GetExternalState{*this}; +}; + +} // namespace llvm::orc::detail + +using namespace llvm; +using namespace llvm::orc; +using namespace llvm::orc::detail; + +TEST_F(WaitingOnGraphTest, ConstructAndDestroyEmpty) { + // Nothing to do here -- we're just testing construction and destruction + // of the WaitingOnGraphTest::G member. +} + +TEST_F(WaitingOnGraphTest, Build_TrivialSingleSuperNode) { + // Add one set of trivial defs and empty deps to the builder, make sure that + // they're passed through to the resulting super-node. + SuperNodeBuilder B; + ContainerElementsMap Defs({{0, {0}}}); + ContainerElementsMap Deps; + B.add(Defs, Deps); + auto SNs = B.takeSuperNodes(); + EXPECT_EQ(SNs.size(), 1U); + EXPECT_EQ(getDefs(*SNs[0]), Defs); + EXPECT_EQ(getDeps(*SNs[0]), Deps); +} + +TEST_F(WaitingOnGraphTest, Build_EmptyDefs) { + // Adding empty def sets is ok, but should not result in creation of a + // SuperNode. + SuperNodeBuilder B; + ContainerElementsMap Empty; + B.add(Empty, Empty); + auto SNs = B.takeSuperNodes(); + EXPECT_TRUE(SNs.empty()); +} + +TEST_F(WaitingOnGraphTest, Build_NonTrivialSingleSuperNode) { + // Add one non-trivwial set of defs and deps. Make sure that they're passed + // through to the resulting super-node. + SuperNodeBuilder B; + ContainerElementsMap Defs({{0, {0, 1, 2}}}); + ContainerElementsMap Deps({{1, {3, 4, 5}}}); + B.add(Defs, Deps); + auto SNs = B.takeSuperNodes(); + EXPECT_EQ(SNs.size(), 1U); + EXPECT_EQ(getDefs(*SNs[0]), Defs); + EXPECT_EQ(getDeps(*SNs[0]), Deps); +} + +TEST_F(WaitingOnGraphTest, Build_CoalesceEmptyDeps) { + // Add two trivial defs both with empty deps to the builder. Check that + // they're coalesced into a single super-node. + SuperNodeBuilder B; + ContainerElementsMap Defs1({{0, {0}}}); + ContainerElementsMap Defs2({{0, {1}}}); + ContainerElementsMap Deps; + B.add(Defs1, Deps); + B.add(Defs2, Deps); + auto SNs = B.takeSuperNodes(); + EXPECT_EQ(SNs.size(), 1U); + EXPECT_EQ(getDefs(*SNs[0]), merge(Defs1, Defs2)); + EXPECT_EQ(getDeps(*SNs[0]), Deps); +} + +TEST_F(WaitingOnGraphTest, Build_CoalesceNonEmptyDeps) { + // Add two sets trivial of trivial defs with empty deps to the builder. Check + // that the two coalesce into a single super node. + SuperNodeBuilder B; + ContainerElementsMap Defs1({{0, {0}}}); + ContainerElementsMap Defs2({{0, {1}}}); + ContainerElementsMap Deps({{1, {1}}}); + B.add(Defs1, Deps); + B.add(Defs2, Deps); + auto SNs = B.takeSuperNodes(); + EXPECT_EQ(SNs.size(), 1U); + EXPECT_EQ(getDefs(*SNs[0]), merge(Defs1, Defs2)); + EXPECT_EQ(getDeps(*SNs[0]), Deps); +} + +TEST_F(WaitingOnGraphTest, Build_CoalesceInterleaved) { + // Add multiple sets of defs, some with the same dep sets. Check that nodes + // are still coalesced as expected. + SuperNodeBuilder B; + + ContainerElementsMap DefsA1({{0, {0}}}); + ContainerElementsMap DefsA2({{0, {1}}}); + ContainerElementsMap DefsB1({{1, {0}}}); + ContainerElementsMap DefsB2({{1, {1}}}); + ContainerElementsMap DepsA({{2, {0}}, {3, {0}}}); + ContainerElementsMap DepsB({{4, {0}}, {5, {0}}}); + B.add(DefsA1, DepsA); + B.add(DefsB1, DepsB); + B.add(DefsA2, DepsA); + B.add(DefsB2, DepsB); + auto SNs = B.takeSuperNodes(); + EXPECT_EQ(SNs.size(), 2U); + EXPECT_EQ(getDefs(*SNs[0]), merge(DefsA1, DefsA2)); + EXPECT_EQ(getDeps(*SNs[0]), DepsA); + EXPECT_EQ(getDefs(*SNs[1]), merge(DefsB1, DefsB2)); + EXPECT_EQ(getDeps(*SNs[1]), DepsB); +} + +TEST_F(WaitingOnGraphTest, Build_SelfDepRemoval) { + // Add multiple sets of defs, some with the same dep sets. Check that nodes + // are still coalesced as expected. + SuperNodeBuilder B; + ContainerElementsMap Defs({{0, {0, 1}}}); + ContainerElementsMap Deps({{0, {1}}}); + ContainerElementsMap Empty; + B.add(Defs, Deps); + auto SNs = B.takeSuperNodes(); + EXPECT_EQ(SNs.size(), 1U); + EXPECT_EQ(getDefs(*SNs[0]), Defs); + EXPECT_EQ(getDeps(*SNs[0]), Empty); +} + +TEST_F(WaitingOnGraphTest, Simplification_EmptySimplification) { + auto SR = TestGraph::simplify({}); + auto &SNs = getSNs(SR); + EXPECT_EQ(SNs.size(), 0U); + EXPECT_EQ(getElemToSN(SR), ElemToSuperNodeMap()); +} + +TEST_F(WaitingOnGraphTest, Simplification_TrivialSingleSuperNode) { + // Test trivial call to simplify. + SuperNodeBuilder B; + ContainerElementsMap Defs({{0, {0}}}); + ContainerElementsMap Deps({{0, {0}}}); + B.add(Defs, Deps); + auto SR = TestGraph::simplify(B.takeSuperNodes()); + ContainerElementsMap Empty; + + // Check SNs. + auto &SNs = getSNs(SR); + EXPECT_EQ(SNs.size(), 1U); + EXPECT_EQ(getDefs(*SNs.at(0)), Defs); + EXPECT_EQ(getDeps(*SNs.at(0)), Empty); + + // Check ElemToSNs. + ElemToSuperNodeMap ExpectedElemToSNs; + ExpectedElemToSNs[0][0] = SNs[0].get(); + EXPECT_EQ(getElemToSN(SR), ExpectedElemToSNs); +} + +TEST_F(WaitingOnGraphTest, Simplification_SimplifySingleContainerSimpleCycle) { + // Test trivial simplification call with two nodes and one internal + // dependence cycle within a single container: + // N0: (0, 0) -> (0, 1) + // N1: (0, 1) -> (0, 0) + // We expect intra-simplify cycle elimination to clear both dependence sets, + // and coalescing to join them into one supernode covering both defs. + SuperNodeBuilder B; + ContainerElementsMap Defs0({{0, {0}}}); + ContainerElementsMap Deps0({{0, {1}}}); + B.add(Defs0, Deps0); + ContainerElementsMap Defs1({{0, {1}}}); + ContainerElementsMap Deps1({{0, {0}}}); + B.add(Defs1, Deps1); + auto SR = TestGraph::simplify(B.takeSuperNodes()); + + // Check SNs. + auto &SNs = getSNs(SR); + ContainerElementsMap Empty; + EXPECT_EQ(SNs.size(), 1U); + EXPECT_EQ(getDefs(*SNs.at(0)), merge(Defs0, Defs1)); + EXPECT_EQ(getDeps(*SNs.at(0)), Empty); + + // Check ElemToSNs. + ElemToSuperNodeMap ExpectedElemToSNs; + ExpectedElemToSNs[0][0] = SNs[0].get(); + ExpectedElemToSNs[0][1] = SNs[0].get(); + + EXPECT_EQ(getElemToSN(SR), ExpectedElemToSNs); +} + +TEST_F(WaitingOnGraphTest, + Simplification_SimplifySingleContainerNElementCycle) { + // Test trivial simplification call with M nodes and one internal + // dependence cycle within a single container: + // N0: (0, 0) -> (0, 1) + // N1: (0, 1) -> (0, 2) + // ... + // NM: (0, M) -> (0, 0) + // We expect intra-simplify cycle elimination to clear all dependence sets, + // and coalescing to join them into one supernode covering all defs. + SuperNodeBuilder B; + constexpr size_t M = 10; + for (size_t I = 0; I != M; ++I) { + ContainerElementsMap Defs({{0, {I}}}); + ContainerElementsMap Deps({{0, {(I + 1) % M}}}); + B.add(Defs, Deps); + } + auto InitSNs = B.takeSuperNodes(); + EXPECT_EQ(InitSNs.size(), M); + + auto SR = TestGraph::simplify(std::move(InitSNs)); + + // Check SNs. + auto &SNs = getSNs(SR); + ContainerElementsMap ExpectedDefs; + for (size_t I = 0; I != M; ++I) + ExpectedDefs[0].insert(I); + ContainerElementsMap Empty; + EXPECT_EQ(SNs.size(), 1U); + EXPECT_EQ(getDefs(*SNs.at(0)), ExpectedDefs); + EXPECT_EQ(getDeps(*SNs.at(0)), Empty); + + // Check ElemToSNs. + ElemToSuperNodeMap ExpectedElemToSNs; + for (size_t I = 0; I != M; ++I) + ExpectedElemToSNs[0][I] = SNs[0].get(); + + EXPECT_EQ(getElemToSN(SR), ExpectedElemToSNs); +} + +TEST_F(WaitingOnGraphTest, Simplification_SimplifyIntraSimplifyPropagateDeps) { + // Test trivial simplification call with two nodes and one internal + // dependence cycle within a single container: + // N0: (0, 0) -> (0, {1, 2}) + // N1: (0, 1) -> (0, {3}) + // We expect intra-simplify cycle elimination to replace the dependence of + // (0, 0) on (0, 1) with a dependence on (0, 3) instead. + SuperNodeBuilder B; + ContainerElementsMap Defs0({{0, {0}}}); + ContainerElementsMap Deps0({{0, {1, 2}}}); + B.add(Defs0, Deps0); + ContainerElementsMap Defs1({{0, {1}}}); + ContainerElementsMap Deps1({{0, {3}}}); + B.add(Defs1, Deps1); + auto SR = TestGraph::simplify(B.takeSuperNodes()); + + // Check SNs. + auto &SNs = getSNs(SR); + EXPECT_EQ(SNs.size(), 2U); + + // ContainerElemenstMap ExpectedDefs0({{0, {0}}}); + // ContainerElemenstMap ExpectedDeps0({{0, {1, 3}}}); + EXPECT_EQ(getDefs(*SNs.at(0)), ContainerElementsMap({{0, {0}}})); + EXPECT_EQ(getDeps(*SNs.at(0)), ContainerElementsMap({{0, {2, 3}}})); + + EXPECT_EQ(getDefs(*SNs.at(1)), ContainerElementsMap({{0, {1}}})); + EXPECT_EQ(getDeps(*SNs.at(1)), ContainerElementsMap({{0, {3}}})); + + // Check ElemToSNs. + ElemToSuperNodeMap ExpectedElemToSNs; + ExpectedElemToSNs[0][0] = SNs[0].get(); + ExpectedElemToSNs[0][1] = SNs[1].get(); + + EXPECT_EQ(getElemToSN(SR), ExpectedElemToSNs); +} + +TEST_F(WaitingOnGraphTest, Emit_EmptyEmit) { + // Check that empty emits work as expected. + auto ER = G.emit(TestGraph::simplify({}), GetExternalState); + + EXPECT_EQ(ER.Ready.size(), 0U); + EXPECT_EQ(ER.Failed.size(), 0U); +} + +TEST_F(WaitingOnGraphTest, Emit_TrivialSingleNode) { + // Check that emitting a single node behaves as expected. + SuperNodeBuilder B; + ContainerElementsMap Defs({{0, {0}}}); + B.add(Defs, ContainerElementsMap()); + auto ER = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(collapseDefs(ER.Ready), Defs); + EXPECT_EQ(ER.Failed.size(), 0U); +} + +TEST_F(WaitingOnGraphTest, Emit_TrivialSequence) { + // Perform a sequence of two emits where the second emit depends on the + // first. Check that nodes become ready after each emit. + SuperNodeBuilder B; + ContainerElementsMap Defs0({{0, {0}}}); + ContainerElementsMap Empty; + B.add(Defs0, Empty); + auto ER0 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(collapseDefs(ER0.Ready), Defs0); + EXPECT_EQ(ER0.Failed.size(), 0U); + + ContainerElementsMap Defs1({{0, {1}}}); + ContainerElementsMap Deps1({{0, {0}}}); + B.add(Defs1, Deps1); + auto ER1 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(collapseDefs(ER1.Ready), Defs1); + EXPECT_EQ(ER1.Failed.size(), 0U); +} + +TEST_F(WaitingOnGraphTest, Emit_TrivialReverseSequence) { + // Perform a sequence of two emits where the first emit depends on the + // second. Check that both nodes become ready after the second emit. + SuperNodeBuilder B; + ContainerElementsMap Defs0({{0, {0}}}); + ContainerElementsMap Deps0({{0, {1}}}); + B.add(Defs0, Deps0); + auto ER0 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(ER0.Ready.size(), 0U); + EXPECT_EQ(ER0.Failed.size(), 0U); + + ContainerElementsMap Defs1({{0, {1}}}); + ContainerElementsMap Empty; + B.add(Defs1, Empty); + auto ER1 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(collapseDefs(ER1.Ready), merge(Defs0, Defs1)); + EXPECT_EQ(ER1.Failed.size(), 0U); +} + +TEST_F(WaitingOnGraphTest, Emit_Coalescing) { + SuperNodeBuilder B; + ContainerElementsMap Defs0({{0, {0}}}); + ContainerElementsMap Deps0({{1, {0}}}); + B.add(Defs0, Deps0); + auto ER0 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(ER0.Ready.size(), 0U); + EXPECT_EQ(ER0.Failed.size(), 0U); + + ContainerElementsMap Defs1({{0, {1}}}); + ContainerElementsMap Deps1({{1, {0}}}); + B.add(Defs1, Deps1); + auto ER1 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(ER1.Ready.size(), 0U); + EXPECT_EQ(ER1.Failed.size(), 0U); + + // Check that after emitting two nodes with the same dep set we have only one + // pending supernode whose defs are the union of the defs in the two emits. + auto &PendingSNs = getPendingSNs(G); + EXPECT_EQ(PendingSNs.size(), 1U); + EXPECT_EQ(getDefs(*PendingSNs.at(0)), merge(Defs0, Defs1)); + + ContainerElementsMap Defs2({{1, {0}}}); + ContainerElementsMap Empty; + B.add(Defs2, Empty); + auto ER2 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(collapseDefs(ER2.Ready), merge(merge(Defs0, Defs1), Defs2)); + EXPECT_EQ(ER2.Failed.size(), 0U); +} + +TEST_F(WaitingOnGraphTest, Emit_ZigZag) { + // Perform a sequence of four emits, where the first three contain a zig-zag + // pattern: + // 1. (0, 0) -> (0, 1) + // 2. (0, 2) -> (0, 3) + // ^ -- At this point we expect two pending supernodes. + // 3. (0, 1) -> (0, 2) + // ^ -- Resolution of (0, 1) should cause all three emitted nodes to coalsce + // into one supernode defining (0, {1, 2, 3}). + // 4. (0, 3) + // ^ -- Should cause all four nodes to become ready. + + SuperNodeBuilder B; + ContainerElementsMap Defs0({{0, {0}}}); + ContainerElementsMap Deps0({{0, {1}}}); + B.add(Defs0, Deps0); + auto ER0 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(ER0.Ready.size(), 0U); + EXPECT_EQ(ER0.Failed.size(), 0U); + + ContainerElementsMap Defs1({{0, {2}}}); + ContainerElementsMap Deps1({{0, {3}}}); + B.add(Defs1, Deps1); + auto ER1 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(ER1.Ready.size(), 0U); + EXPECT_EQ(ER1.Failed.size(), 0U); + + // Check that after emitting two nodes with the same dep set we have only one + // pending supernode whose defs are the union of the defs in the two emits. + auto &PendingSNs = getPendingSNs(G); + EXPECT_EQ(PendingSNs.size(), 2U); + EXPECT_EQ(getDefs(*PendingSNs.at(0)), Defs0); + EXPECT_EQ(getDeps(*PendingSNs.at(0)), Deps0); + EXPECT_EQ(getDefs(*PendingSNs.at(1)), Defs1); + EXPECT_EQ(getDeps(*PendingSNs.at(1)), Deps1); + + ContainerElementsMap Defs2({{0, {1}}}); + ContainerElementsMap Deps2({{0, {2}}}); + B.add(Defs2, Deps2); + auto ER2 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(ER2.Ready.size(), 0U); + EXPECT_EQ(ER2.Failed.size(), 0U); + + // Check that after emitting the third node we've coalesced all three. + EXPECT_EQ(PendingSNs.size(), 1U); + EXPECT_EQ(getDefs(*PendingSNs.at(0)), merge(merge(Defs0, Defs1), Defs2)); + EXPECT_EQ(getDeps(*PendingSNs.at(0)), Deps1); + + ContainerElementsMap Defs3({{0, {3}}}); + ContainerElementsMap Empty; + B.add(Defs3, Empty); + auto ER3 = emit(TestGraph::simplify(B.takeSuperNodes())); + + EXPECT_EQ(collapseDefs(ER3.Ready), + merge(merge(merge(Defs0, Defs1), Defs2), Defs3)); + EXPECT_EQ(ER2.Failed.size(), 0U); + EXPECT_TRUE(PendingSNs.empty()); +} + +TEST_F(WaitingOnGraphTest, Fail_Empty) { + // Check that failing an empty set is a no-op. + auto FR = G.fail(ContainerElementsMap()); + EXPECT_EQ(FR.size(), 0U); +} + +TEST_F(WaitingOnGraphTest, Fail_Single) { + // Check that failing a set with no existing dependencies works. + auto FR = G.fail({{0, {0}}}); + EXPECT_EQ(FR.size(), 0U); +} + +TEST_F(WaitingOnGraphTest, Fail_EmitDependenceOnFailure) { + // Check that emitted nodes that directly depend on failed nodes also fail. + Failed = {{0, {0}}}; + + SuperNodeBuilder B; + ContainerElementsMap Defs({{0, {1}}}); + ContainerElementsMap Deps({{0, {0}}}); + B.add(Defs, Deps); + auto ER = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(ER.Ready.size(), 0U); + EXPECT_EQ(collapseDefs(ER.Failed, false), Defs); +} + +TEST_F(WaitingOnGraphTest, Fail_ZigZag) { + // Check that if an emit introduces a transitive dependence of a failed + // node, then all nodes that depend on the failed node are also failed. + SuperNodeBuilder B; + + ContainerElementsMap Defs0({{0, {0}}}); + ContainerElementsMap Deps0({{0, {1}}}); + B.add(Defs0, Deps0); + auto ER0 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(ER0.Ready.size(), 0U); + EXPECT_EQ(ER0.Failed.size(), 0U); + + Failed = {{0, {2}}}; + + ContainerElementsMap Defs1({{0, {1}}}); + ContainerElementsMap Deps1({{0, {2}}}); + B.add(Defs1, Deps1); + auto ER1 = emit(TestGraph::simplify(B.takeSuperNodes())); + EXPECT_EQ(ER1.Ready.size(), 0U); + EXPECT_EQ(collapseDefs(ER1.Failed, false), merge(Defs0, Defs1)); +} From 13ca8723d1bfc9ae0b8983e936e9575e440cbee1 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 20 Oct 2025 16:20:37 -0700 Subject: [PATCH 36/38] Revert "[ORC] Replace ORC's baked-in dependence tracking ... (#163027)" Reverts commit c8c86efbbb55e51597c1bd8feb2e947bc0de3422 while I investigate bot failures, e.g. https://lab.llvm.org/buildbot/#/builders/187/builds/12743. --- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 66 +- .../llvm/ExecutionEngine/Orc/WaitingOnGraph.h | 622 ------------ llvm/lib/ExecutionEngine/Orc/Core.cpp | 919 +++++++++++++----- .../ExecutionEngine/Orc/SimpleRemoteEPC.cpp | 2 +- .../x86-64/LocalDependencyPropagation.s | 3 +- .../ExecutionEngine/Orc/CMakeLists.txt | 1 - .../Orc/WaitingOnGraphTest.cpp | 553 ----------- 7 files changed, 737 insertions(+), 1429 deletions(-) delete mode 100644 llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h delete mode 100644 llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 8613ddd8e3b11..f407b56817fc3 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -26,7 +26,6 @@ #include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include "llvm/ExecutionEngine/Orc/TaskDispatch.h" -#include "llvm/ExecutionEngine/Orc/WaitingOnGraph.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ExtensibleRTTI.h" @@ -50,9 +49,6 @@ class InProgressLookupState; enum class SymbolState : uint8_t; -using WaitingOnGraph = - detail::WaitingOnGraph; - using ResourceTrackerSP = IntrusiveRefCntPtr; using JITDylibSP = IntrusiveRefCntPtr; @@ -1135,6 +1131,20 @@ class JITDylib : public ThreadSafeRefCountedBase, using UnmaterializedInfosList = std::vector>; + struct EmissionDepUnit { + EmissionDepUnit(JITDylib &JD) : JD(&JD) {} + + JITDylib *JD = nullptr; + DenseMap Symbols; + DenseMap> Dependencies; + }; + + struct EmissionDepUnitInfo { + std::shared_ptr EDU; + DenseSet IntraEmitUsers; + DenseMap> NewDeps; + }; + // Information about not-yet-ready symbol. // * DefiningEDU will point to the EmissionDepUnit that defines the symbol. // * DependantEDUs will hold pointers to any EmissionDepUnits currently @@ -1144,6 +1154,9 @@ class JITDylib : public ThreadSafeRefCountedBase, struct MaterializingInfo { friend class ExecutionSession; + std::shared_ptr DefiningEDU; + DenseSet DependantEDUs; + LLVM_ABI void addQuery(std::shared_ptr Q); LLVM_ABI void removeQuery(const AsynchronousSymbolQuery &Q); LLVM_ABI AsynchronousSymbolQueryList @@ -1765,26 +1778,30 @@ class ExecutionSession { LLVM_ABI Error OL_notifyResolved(MaterializationResponsibility &MR, const SymbolMap &Symbols); - // FIXME: We should be able to derive FailedSymsForQuery from each query once - // we fix how the detach operation works. - struct EmitQueries { - JITDylib::AsynchronousSymbolQuerySet Updated; - JITDylib::AsynchronousSymbolQuerySet Failed; - DenseMap> - FailedSymsForQuery; - }; - - WaitingOnGraph::ExternalState - IL_getSymbolState(JITDylib *JD, NonOwningSymbolStringPtr Name); - - template - void IL_collectQueries(JITDylib::AsynchronousSymbolQuerySet &Qs, - WaitingOnGraph::ContainerElementsMap &QualifiedSymbols, - UpdateSymbolFn &&UpdateSymbol, - UpdateQueryFn &&UpdateQuery); - - Expected IL_emit(MaterializationResponsibility &MR, - WaitingOnGraph::SimplifyResult SR); + using EDUInfosMap = + DenseMap; + + template + void propagateExtraEmitDeps(std::deque Worklist, + EDUInfosMap &EDUInfos, + HandleNewDepFn HandleNewDep); + EDUInfosMap simplifyDepGroups(MaterializationResponsibility &MR, + ArrayRef EmittedDeps); + void IL_makeEDUReady(std::shared_ptr EDU, + JITDylib::AsynchronousSymbolQuerySet &Queries); + void IL_makeEDUEmitted(std::shared_ptr EDU, + JITDylib::AsynchronousSymbolQuerySet &Queries); + bool IL_removeEDUDependence(JITDylib::EmissionDepUnit &EDU, JITDylib &DepJD, + NonOwningSymbolStringPtr DepSym, + EDUInfosMap &EDUInfos); + + static Error makeJDClosedError(JITDylib::EmissionDepUnit &EDU, + JITDylib &ClosedJD); + static Error makeUnsatisfiedDepsError(JITDylib::EmissionDepUnit &EDU, + JITDylib &BadJD, SymbolNameSet BadDeps); + + Expected + IL_emit(MaterializationResponsibility &MR, EDUInfosMap EDUInfos); LLVM_ABI Error OL_notifyEmitted(MaterializationResponsibility &MR, ArrayRef EmittedDeps); @@ -1813,7 +1830,6 @@ class ExecutionSession { std::vector ResourceManagers; std::vector JDs; - WaitingOnGraph G; // FIXME: Remove this (and runOutstandingMUs) once the linking layer works // with callbacks from asynchronous queries. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h b/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h deleted file mode 100644 index a5b533351d4d0..0000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/WaitingOnGraph.h +++ /dev/null @@ -1,622 +0,0 @@ -//===------ WaitingOnGraph.h - ORC symbol dependence graph ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Defines WaitingOnGraph and related utilities. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_WAITINGONGRAPH_H -#define LLVM_EXECUTIONENGINE_ORC_WAITINGONGRAPH_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/raw_ostream.h" - -#include - -namespace llvm::orc::detail { - -class WaitingOnGraphTest; - -/// WaitingOnGraph class template. -/// -/// This type is intended to provide efficient dependence tracking for Symbols -/// in an ORC program. -/// -/// WaitingOnGraph models a directed graph with four partitions: -/// 1. Not-yet-emitted nodes: Nodes identified as waited-on in an emit -/// operation. -/// 2. Emitted nodes: Nodes emitted and waiting on some non-empty set of -/// other nodes. -/// 3. Ready nodes: Nodes emitted and not waiting on any other nodes -/// (either because they weren't waiting on any nodes when they were -/// emitted, or because all transitively waited-on nodes have since -/// been emitted). -/// 4. Failed nodes: Nodes that have been marked as failed-to-emit, and -/// nodes that were found to transitively wait-on some failed node. -/// -/// Nodes are added to the graph by *emit* and *fail* operations. -/// -/// The *emit* operation takes a bipartite *local dependence graph* as an -/// argument and returns... -/// a. the set of nodes (both existing and newly added from the local -/// dependence graph) whose waiting-on set is the empty set, and... -/// b. the set of newly added nodes that are found to depend on failed -/// nodes. -/// -/// The *fail* operation takes a set of failed nodes and returns the set of -/// Emitted nodes that were waiting on the failed nodes. -/// -/// The concrete representation adopts several approaches for efficiency: -/// -/// 1. Only *Emitted* and *Not-yet-emitted* nodes are represented explicitly. -/// *Ready* and *Failed* nodes are represented by the values returned by the -/// GetExternalStateFn argument to *emit*. -/// -/// 2. Labels are (*Container*, *Element*) pairs that are intended to represent -/// ORC symbols (ORC uses types Container = JITDylib, -/// Element = NonOwningSymbolStringPtr). The internal representation of the -/// graph is optimized on the assumption that there are many more Elements -/// (symbol names) than Containers (JITDylibs) used to construct the labels. -/// (Consider for example the common case where most JIT'd code is placed in -/// a single "main" JITDylib). -/// -/// 3. The data structure stores *SuperNodes* which have multiple labels. This -/// reduces the number of nodes and edges in the graph in the common case -/// where many JIT symbols have the same set of dependencies. SuperNodes are -/// coalesced when their dependence sets become equal. -/// -/// 4. The *simplify* method can be applied to an initial *local dependence -/// graph* (as a list of SuperNodes) to eliminate any internal dependence -/// relationships that would have to be propagated internally by *emit*. -/// Access to the WaitingOnGraph is assumed to be guarded by a mutex (ORC -/// will access it from multiple threads) so this allows some pre-processing -/// to be performed outside the mutex. -template class WaitingOnGraph { - friend class WaitingOnGraphTest; - -public: - using ContainerId = ContainerIdT; - using ElementId = ElementIdT; - using ElementSet = DenseSet; - using ContainerElementsMap = DenseMap; - - class SuperNode { - friend class WaitingOnGraph; - friend class WaitingOnGraphTest; - - public: - SuperNode(ContainerElementsMap Defs, ContainerElementsMap Deps) - : Defs(std::move(Defs)), Deps(std::move(Deps)) {} - ContainerElementsMap &defs() { return Defs; } - const ContainerElementsMap &defs() const { return Defs; } - ContainerElementsMap &deps() { return Deps; } - const ContainerElementsMap &deps() const { return Deps; } - - private: - ContainerElementsMap Defs; - ContainerElementsMap Deps; - }; - -private: - using ElemToSuperNodeMap = - DenseMap>; - - using SuperNodeDepsMap = DenseMap>; - - class Coalescer { - public: - std::unique_ptr addOrCreateSuperNode(ContainerElementsMap Defs, - ContainerElementsMap Deps) { - auto H = getHash(Deps); - if (auto *ExistingSN = findCanonicalSuperNode(H, Deps)) { - for (auto &[Container, Elems] : Defs) { - auto &DstCElems = ExistingSN->Defs[Container]; - [[maybe_unused]] size_t ExpectedSize = - DstCElems.size() + Elems.size(); - DstCElems.insert(Elems.begin(), Elems.end()); - assert(DstCElems.size() == ExpectedSize); - } - return nullptr; - } - - auto NewSN = - std::make_unique(std::move(Defs), std::move(Deps)); - CanonicalSNs[H].push_back(NewSN.get()); - return NewSN; - } - - void coalesce(std::vector> &SNs, - ElemToSuperNodeMap &ElemToSN) { - for (size_t I = 0; I != SNs.size();) { - auto &SN = SNs[I]; - auto H = getHash(SN->Deps); - if (auto *CanonicalSN = findCanonicalSuperNode(H, SN->Deps)) { - for (auto &[Container, Elems] : SN->Defs) { - CanonicalSN->Defs[Container].insert(Elems.begin(), Elems.end()); - auto &ContainerElemToSN = ElemToSN[Container]; - for (auto &Elem : Elems) - ContainerElemToSN[Elem] = CanonicalSN; - } - std::swap(SN, SNs.back()); - SNs.pop_back(); - } else { - CanonicalSNs[H].push_back(SN.get()); - ++I; - } - } - } - - template void remove(Pred &&Remove) { - for (auto &[Hash, SNs] : CanonicalSNs) { - bool Found = false; - for (size_t I = 0; I != SNs.size(); ++I) { - if (Remove(SNs[I])) { - std::swap(SNs[I], SNs.back()); - SNs.pop_back(); - Found = true; - break; - } - } - if (Found) { - if (SNs.empty()) - CanonicalSNs.erase(Hash); - break; - } - } - } - - private: - hash_code getHash(const ContainerElementsMap &M) { - SmallVector SortedContainers; - SortedContainers.reserve(M.size()); - for (auto &[Container, Elems] : M) - SortedContainers.push_back(Container); - llvm::sort(SortedContainers); - hash_code Hash(0); - for (auto &Container : SortedContainers) { - auto &ContainerElems = M.at(Container); - SmallVector SortedElems(ContainerElems.begin(), - ContainerElems.end()); - llvm::sort(SortedElems); - Hash = hash_combine( - Hash, Container, - hash_combine_range(SortedElems.begin(), SortedElems.end())); - } - return Hash; - } - - SuperNode *findCanonicalSuperNode(hash_code H, - const ContainerElementsMap &M) { - for (auto *SN : CanonicalSNs[H]) - if (SN->Deps == M) - return SN; - return nullptr; - } - - DenseMap> CanonicalSNs; - }; - -public: - /// Build SuperNodes from (definition-set, dependence-set) pairs. - /// - /// Coalesces definition-sets with identical dependence-sets. - class SuperNodeBuilder { - public: - void add(ContainerElementsMap Defs, ContainerElementsMap Deps) { - if (Defs.empty()) - return; - // Remove any self-reference. - SmallVector ToRemove; - for (auto &[Container, Elems] : Defs) { - assert(!Elems.empty() && "Defs for container must not be empty"); - auto I = Deps.find(Container); - if (I == Deps.end()) - continue; - auto &DepsForContainer = I->second; - for (auto &Elem : Elems) - DepsForContainer.erase(Elem); - if (DepsForContainer.empty()) - ToRemove.push_back(Container); - } - for (auto &Container : ToRemove) - Deps.erase(Container); - if (auto SN = C.addOrCreateSuperNode(std::move(Defs), std::move(Deps))) - SNs.push_back(std::move(SN)); - } - std::vector> takeSuperNodes() { - return std::move(SNs); - } - - private: - Coalescer C; - std::vector> SNs; - }; - - class SimplifyResult { - friend class WaitingOnGraph; - friend class WaitingOnGraphTest; - - public: - const std::vector> &superNodes() const { - return SNs; - } - - private: - SimplifyResult(std::vector> SNs, - ElemToSuperNodeMap ElemToSN) - : SNs(std::move(SNs)), ElemToSN(std::move(ElemToSN)) {} - std::vector> SNs; - ElemToSuperNodeMap ElemToSN; - }; - - /// Preprocess a list of SuperNodes to remove all intra-SN dependencies. - static SimplifyResult simplify(std::vector> SNs) { - // Build ElemToSN map. - ElemToSuperNodeMap ElemToSN; - for (auto &SN : SNs) { - for (auto &[Container, Elements] : SN->Defs) { - auto &ContainerElemToSN = ElemToSN[Container]; - for (auto &E : Elements) - ContainerElemToSN[E] = SN.get(); - } - } - - SuperNodeDepsMap SuperNodeDeps; - hoistDeps(SuperNodeDeps, SNs, ElemToSN); - propagateSuperNodeDeps(SuperNodeDeps); - sinkDeps(SNs, SuperNodeDeps); - - // Pre-coalesce nodes. - Coalescer().coalesce(SNs, ElemToSN); - - return {std::move(SNs), std::move(ElemToSN)}; - } - - struct EmitResult { - std::vector> Ready; - std::vector> Failed; - }; - - enum class ExternalState { None, Ready, Failed }; - - /// Add the given SuperNodes to the graph, returning any SuperNodes that - /// move to the Ready or Failed states as a result. - /// The GetExternalState function is used to represent SuperNodes that have - /// already become Ready or Failed (since such nodes are not explicitly - /// represented in the graph). - template - EmitResult emit(SimplifyResult SR, GetExternalStateFn &&GetExternalState) { - auto NewSNs = std::move(SR.SNs); - auto ElemToNewSN = std::move(SR.ElemToSN); - - // First process any dependencies on nodes with external state. - auto FailedSNs = processExternalDeps(NewSNs, GetExternalState); - - // Collect the PendingSNs whose dep sets are about to be modified. - std::vector> ModifiedPendingSNs; - for (size_t I = 0; I != PendingSNs.size();) { - auto &SN = PendingSNs[I]; - bool Remove = false; - for (auto &[Container, Elems] : SN->Deps) { - auto I = ElemToNewSN.find(Container); - if (I == ElemToNewSN.end()) - continue; - for (auto Elem : Elems) { - if (I->second.contains(Elem)) { - Remove = true; - break; - } - } - if (Remove) - break; - } - if (Remove) { - ModifiedPendingSNs.push_back(std::move(SN)); - std::swap(SN, PendingSNs.back()); - PendingSNs.pop_back(); - } else - ++I; - } - - // Remove cycles from the graphs. - SuperNodeDepsMap SuperNodeDeps; - hoistDeps(SuperNodeDeps, ModifiedPendingSNs, ElemToNewSN); - - CoalesceToPendingSNs.remove( - [&](SuperNode *SN) { return SuperNodeDeps.count(SN); }); - - hoistDeps(SuperNodeDeps, NewSNs, ElemToPendingSN); - propagateSuperNodeDeps(SuperNodeDeps); - sinkDeps(NewSNs, SuperNodeDeps); - sinkDeps(ModifiedPendingSNs, SuperNodeDeps); - - // Process supernodes. Pending first, since we'll update PendingSNs when we - // incorporate NewSNs. - std::vector> ReadyNodes, FailedNodes; - processReadyOrFailed(ModifiedPendingSNs, ReadyNodes, FailedNodes, - SuperNodeDeps, ElemToPendingSN, FailedSNs); - processReadyOrFailed(NewSNs, ReadyNodes, FailedNodes, SuperNodeDeps, - ElemToNewSN, FailedSNs); - - CoalesceToPendingSNs.coalesce(ModifiedPendingSNs, ElemToPendingSN); - CoalesceToPendingSNs.coalesce(NewSNs, ElemToPendingSN); - - // Integrate remaining ModifiedPendingSNs and NewSNs into PendingSNs. - for (auto &SN : ModifiedPendingSNs) - PendingSNs.push_back(std::move(SN)); - - // Update ElemToPendingSN for the remaining elements. - for (auto &SN : NewSNs) { - for (auto &[Container, Elems] : SN->Defs) { - auto &Row = ElemToPendingSN[Container]; - for (auto &Elem : Elems) - Row[Elem] = SN.get(); - } - PendingSNs.push_back(std::move(SN)); - } - - return {std::move(ReadyNodes), std::move(FailedNodes)}; - } - - /// Identify the given symbols as Failed. - /// The elements of the Failed map will not be included in the returned - /// result, so clients should take whatever actions are needed to mark - /// this as failed in their external representation. - std::vector> - fail(const ContainerElementsMap &Failed) { - std::vector> FailedSNs; - - for (size_t I = 0; I != PendingSNs.size();) { - auto &PendingSN = PendingSNs[I]; - bool FailPendingSN = false; - for (auto &[Container, Elems] : PendingSN->Deps) { - if (FailPendingSN) - break; - auto I = Failed.find(Container); - if (I == Failed.end()) - continue; - for (auto &Elem : Elems) { - if (I->second.count(Elem)) { - FailPendingSN = true; - break; - } - } - } - if (FailPendingSN) { - FailedSNs.push_back(std::move(PendingSN)); - PendingSN = std::move(PendingSNs.back()); - PendingSNs.pop_back(); - } else - ++I; - } - - for (auto &SN : FailedSNs) { - CoalesceToPendingSNs.remove( - [&](SuperNode *SNC) { return SNC == SN.get(); }); - for (auto &[Container, Elems] : SN->Defs) { - assert(ElemToPendingSN.count(Container)); - auto &CElems = ElemToPendingSN[Container]; - for (auto &Elem : Elems) - CElems.erase(Elem); - if (CElems.empty()) - ElemToPendingSN.erase(Container); - } - } - - return FailedSNs; - } - - bool validate(raw_ostream &Log) { - bool AllGood = true; - auto ErrLog = [&]() -> raw_ostream & { - AllGood = false; - return Log; - }; - - size_t DefCount = 0; - for (auto &PendingSN : PendingSNs) { - if (PendingSN->Deps.empty()) - ErrLog() << "Pending SN " << PendingSN.get() << " has empty dep set.\n"; - else { - bool BadElem = false; - for (auto &[Container, Elems] : PendingSN->Deps) { - auto I = ElemToPendingSN.find(Container); - if (I == ElemToPendingSN.end()) - continue; - if (Elems.empty()) - ErrLog() << "Pending SN " << PendingSN.get() - << " has dependence map entry for " << Container - << " with empty element set.\n"; - for (auto &Elem : Elems) { - if (I->second.count(Elem)) { - ErrLog() << "Pending SN " << PendingSN.get() - << " has dependence on emitted element ( " << Container - << ", " << Elem << ")\n"; - BadElem = true; - break; - } - } - if (BadElem) - break; - } - } - - for (auto &[Container, Elems] : PendingSN->Defs) { - if (Elems.empty()) - ErrLog() << "Pending SN " << PendingSN.get() - << " has def map entry for " << Container - << " with empty element set.\n"; - DefCount += Elems.size(); - auto I = ElemToPendingSN.find(Container); - if (I == ElemToPendingSN.end()) - ErrLog() << "Pending SN " << PendingSN.get() << " has " - << Elems.size() << " defs in container " << Container - << " not covered by ElemsToPendingSN.\n"; - else { - for (auto &Elem : Elems) { - auto J = I->second.find(Elem); - if (J == I->second.end()) - ErrLog() << "Pending SN " << PendingSN.get() << " has element (" - << Container << ", " << Elem - << ") not covered by ElemsToPendingSN.\n"; - else if (J->second != PendingSN.get()) - ErrLog() << "ElemToPendingSN value invalid for (" << Container - << ", " << Elem << ")\n"; - } - } - } - } - - size_t DefCount2 = 0; - for (auto &[Container, Elems] : ElemToPendingSN) - DefCount2 += Elems.size(); - - assert(DefCount2 >= DefCount); - if (DefCount2 != DefCount) - ErrLog() << "ElemToPendingSN contains extra elements.\n"; - - return AllGood; - } - -private: - // Replace individual dependencies with supernode dependencies. - // - // For all dependencies in SNs, if the corresponding node is defined in - // ElemToSN then remove the individual dependency and add the record the - // dependency on the corresponding supernode in SuperNodeDeps. - static void hoistDeps(SuperNodeDepsMap &SuperNodeDeps, - std::vector> &SNs, - ElemToSuperNodeMap &ElemToSN) { - for (auto &SN : SNs) { - auto &SNDeps = SuperNodeDeps[SN.get()]; - for (auto &[DefContainer, DefElems] : ElemToSN) { - auto I = SN->Deps.find(DefContainer); - if (I == SN->Deps.end()) - continue; - for (auto &[DefElem, DefSN] : DefElems) - if (I->second.erase(DefElem)) - SNDeps.insert(DefSN); - if (I->second.empty()) - SN->Deps.erase(I); - } - } - } - - // Compute transitive closure of deps for each node. - static void propagateSuperNodeDeps(SuperNodeDepsMap &SuperNodeDeps) { - for (auto &[SN, Deps] : SuperNodeDeps) { - DenseSet Reachable({SN}); - SmallVector Worklist(Deps.begin(), Deps.end()); - - while (!Worklist.empty()) { - auto *DepSN = Worklist.pop_back_val(); - if (!Reachable.insert(DepSN).second) - continue; - auto I = SuperNodeDeps.find(DepSN); - if (I == SuperNodeDeps.end()) - continue; - for (auto *DepSNDep : I->second) - Worklist.push_back(DepSNDep); - } - - Deps = std::move(Reachable); - } - } - - // Sink SuperNode dependencies back to dependencies on individual nodes. - static void sinkDeps(std::vector> &SNs, - SuperNodeDepsMap &SuperNodeDeps) { - for (auto &SN : SNs) { - auto I = SuperNodeDeps.find(SN.get()); - if (I == SuperNodeDeps.end()) - continue; - - for (auto *DepSN : I->second) - for (auto &[Container, Elems] : DepSN->Deps) - SN->Deps[Container].insert(Elems.begin(), Elems.end()); - } - } - - template - static std::vector - processExternalDeps(std::vector> &SNs, - GetExternalStateFn &GetExternalState) { - std::vector FailedSNs; - for (auto &SN : SNs) { - bool SNHasError = false; - SmallVector ContainersToRemove; - for (auto &[Container, Elems] : SN->Deps) { - SmallVector ElemToRemove; - for (auto &Elem : Elems) { - switch (GetExternalState(Container, Elem)) { - case ExternalState::None: - break; - case ExternalState::Ready: - ElemToRemove.push_back(Elem); - break; - case ExternalState::Failed: - ElemToRemove.push_back(Elem); - SNHasError = true; - break; - } - } - for (auto &Elem : ElemToRemove) - Elems.erase(Elem); - if (Elems.empty()) - ContainersToRemove.push_back(Container); - } - for (auto &Container : ContainersToRemove) - SN->Deps.erase(Container); - if (SNHasError) - FailedSNs.push_back(SN.get()); - } - - return FailedSNs; - } - - void processReadyOrFailed(std::vector> &SNs, - std::vector> &Ready, - std::vector> &Failed, - SuperNodeDepsMap &SuperNodeDeps, - ElemToSuperNodeMap &ElemToSNs, - std::vector FailedSNs) { - for (size_t I = 0; I != SNs.size();) { - auto &SN = SNs[I]; - - bool SNFailed = false; - assert(SuperNodeDeps.count(SN.get())); - auto &SNSuperNodeDeps = SuperNodeDeps[SN.get()]; - for (auto *FailedSN : FailedSNs) { - if (FailedSN == SN.get() || SNSuperNodeDeps.count(FailedSN)) { - SNFailed = true; - break; - } - } - - bool SNReady = SN->Deps.empty(); - - if (SNReady || SNFailed) { - auto &NodeList = SNFailed ? Failed : Ready; - NodeList.push_back(std::move(SN)); - std::swap(SN, SNs.back()); - SNs.pop_back(); - } else - ++I; - } - } - - std::vector> PendingSNs; - ElemToSuperNodeMap ElemToPendingSN; - Coalescer CoalesceToPendingSNs; -}; - -} // namespace llvm::orc::detail - -#endif // LLVM_EXECUTIONENGINE_ORC_WAITINGONGRAPH_H diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 62bb726b00050..f47b7ecdcc7bb 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -1173,7 +1173,39 @@ void JITDylib::dump(raw_ostream &OS) { << " pending queries: { "; for (const auto &Q : KV.second.pendingQueries()) OS << Q.get() << " (" << Q->getRequiredState() << ") "; - OS << "}\n"; + OS << "}\n Defining EDU: "; + if (KV.second.DefiningEDU) { + OS << KV.second.DefiningEDU.get() << " { "; + for (auto &[Name, Flags] : KV.second.DefiningEDU->Symbols) + OS << Name << " "; + OS << "}\n"; + OS << " Dependencies:\n"; + if (!KV.second.DefiningEDU->Dependencies.empty()) { + for (auto &[DepJD, Deps] : KV.second.DefiningEDU->Dependencies) { + OS << " " << DepJD->getName() << ": [ "; + for (auto &Dep : Deps) + OS << Dep << " "; + OS << "]\n"; + } + } else + OS << " none\n"; + } else + OS << "none\n"; + OS << " Dependant EDUs:\n"; + if (!KV.second.DependantEDUs.empty()) { + for (auto &DependantEDU : KV.second.DependantEDUs) { + OS << " " << DependantEDU << ": " + << DependantEDU->JD->getName() << " { "; + for (auto &[Name, Flags] : DependantEDU->Symbols) + OS << Name << " "; + OS << "}\n"; + } + } else + OS << " none\n"; + assert((Symbols[KV.first].getState() != SymbolState::Ready || + (KV.second.pendingQueries().empty() && !KV.second.DefiningEDU && + !KV.second.DependantEDUs.empty())) && + "Stale materializing info entry"); } }); } @@ -2885,64 +2917,359 @@ Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR, return MR.JD.resolve(MR, Symbols); } -WaitingOnGraph::ExternalState -ExecutionSession::IL_getSymbolState(JITDylib *JD, - NonOwningSymbolStringPtr Name) { - if (JD->State != JITDylib::Open) - return WaitingOnGraph::ExternalState::Failed; - - auto I = JD->Symbols.find_as(Name); - - // FIXME: Can we eliminate this possibility if we support query binding? - if (I == JD->Symbols.end()) - return WaitingOnGraph::ExternalState::Failed; - - if (I->second.getFlags().hasError()) - return WaitingOnGraph::ExternalState::Failed; - - if (I->second.getState() == SymbolState::Ready) - return WaitingOnGraph::ExternalState::Ready; - - return WaitingOnGraph::ExternalState::None; -} - -template -void ExecutionSession::IL_collectQueries( - JITDylib::AsynchronousSymbolQuerySet &Qs, - WaitingOnGraph::ContainerElementsMap &QualifiedSymbols, - UpdateSymbolFn &&UpdateSymbol, UpdateQueryFn &&UpdateQuery) { - - for (auto &[JD, Symbols] : QualifiedSymbols) { - // IL_emit and JITDylib removal are synchronized by the session lock. - // Since JITDylib removal removes any contained nodes from the - // WaitingOnGraph, we should be able to assert that all nodes in the - // WaitingOnGraph have not been removed. - assert(JD->State == JITDylib::Open && - "WaitingOnGraph includes definition in defunct JITDylib"); - for (auto &Symbol : Symbols) { - // Update symbol table. - auto I = JD->Symbols.find_as(Symbol); - assert(I != JD->Symbols.end() && - "Failed Symbol missing from JD symbol table"); - auto &Entry = I->second; - UpdateSymbol(Entry); - - // Collect queries. - auto J = JD->MaterializingInfos.find_as(Symbol); - if (J != JD->MaterializingInfos.end()) { - for (auto &Q : J->second.takeAllPendingQueries()) { - UpdateQuery(*Q, *JD, Symbol, Entry); - Qs.insert(std::move(Q)); +template +void ExecutionSession::propagateExtraEmitDeps( + std::deque Worklist, EDUInfosMap &EDUInfos, + HandleNewDepFn HandleNewDep) { + + // Iterate to a fixed-point to propagate extra-emit dependencies through the + // EDU graph. + while (!Worklist.empty()) { + auto &EDU = *Worklist.front(); + Worklist.pop_front(); + + assert(EDUInfos.count(&EDU) && "No info entry for EDU"); + auto &EDUInfo = EDUInfos[&EDU]; + + // Propagate new dependencies to users. + for (auto *UserEDU : EDUInfo.IntraEmitUsers) { + + // UserEDUInfo only present if UserEDU has its own users. + JITDylib::EmissionDepUnitInfo *UserEDUInfo = nullptr; + { + auto UserEDUInfoItr = EDUInfos.find(UserEDU); + if (UserEDUInfoItr != EDUInfos.end()) + UserEDUInfo = &UserEDUInfoItr->second; + } + + for (auto &[DepJD, Deps] : EDUInfo.NewDeps) { + auto &UserEDUDepsForJD = UserEDU->Dependencies[DepJD]; + DenseSet *UserEDUNewDepsForJD = nullptr; + for (auto Dep : Deps) { + if (UserEDUDepsForJD.insert(Dep).second) { + HandleNewDep(*UserEDU, *DepJD, Dep); + if (UserEDUInfo) { + if (!UserEDUNewDepsForJD) { + // If UserEDU has no new deps then it's not in the worklist + // yet, so add it. + if (UserEDUInfo->NewDeps.empty()) + Worklist.push_back(UserEDU); + UserEDUNewDepsForJD = &UserEDUInfo->NewDeps[DepJD]; + } + // Add (DepJD, Dep) to NewDeps. + UserEDUNewDepsForJD->insert(Dep); + } + } } - JD->MaterializingInfos.erase(J); } } + + EDUInfo.NewDeps.clear(); + } +} + +// Note: This method modifies the emitted set. +ExecutionSession::EDUInfosMap ExecutionSession::simplifyDepGroups( + MaterializationResponsibility &MR, + ArrayRef EmittedDeps) { + + auto &TargetJD = MR.getTargetJITDylib(); + + // 1. Build initial EmissionDepUnit -> EmissionDepUnitInfo and + // Symbol -> EmissionDepUnit mappings. + DenseMap EDUInfos; + EDUInfos.reserve(EmittedDeps.size()); + DenseMap EDUForSymbol; + for (auto &DG : EmittedDeps) { + assert(!DG.Symbols.empty() && "DepGroup does not cover any symbols"); + + // Skip empty EDUs. + if (DG.Dependencies.empty()) + continue; + + auto TmpEDU = std::make_shared(TargetJD); + auto &EDUInfo = EDUInfos[TmpEDU.get()]; + EDUInfo.EDU = std::move(TmpEDU); + for (const auto &Symbol : DG.Symbols) { + NonOwningSymbolStringPtr NonOwningSymbol(Symbol); + assert(!EDUForSymbol.count(NonOwningSymbol) && + "Symbol should not appear in more than one SymbolDependenceGroup"); + assert(MR.getSymbols().count(Symbol) && + "Symbol in DepGroups not in the emitted set"); + auto NewlyEmittedItr = MR.getSymbols().find(Symbol); + EDUInfo.EDU->Symbols[NonOwningSymbol] = NewlyEmittedItr->second; + EDUForSymbol[NonOwningSymbol] = EDUInfo.EDU.get(); + } + } + + // 2. Build a "residual" EDU to cover all symbols that have no dependencies. + { + DenseMap ResidualSymbolFlags; + for (auto &[Sym, Flags] : MR.getSymbols()) { + if (!EDUForSymbol.count(NonOwningSymbolStringPtr(Sym))) + ResidualSymbolFlags[NonOwningSymbolStringPtr(Sym)] = Flags; + } + if (!ResidualSymbolFlags.empty()) { + auto ResidualEDU = std::make_shared(TargetJD); + ResidualEDU->Symbols = std::move(ResidualSymbolFlags); + auto &ResidualEDUInfo = EDUInfos[ResidualEDU.get()]; + ResidualEDUInfo.EDU = std::move(ResidualEDU); + + // If the residual EDU is the only one then bail out early. + if (EDUInfos.size() == 1) + return EDUInfos; + + // Otherwise add the residual EDU to the EDUForSymbol map. + for (auto &[Sym, Flags] : ResidualEDUInfo.EDU->Symbols) + EDUForSymbol[Sym] = ResidualEDUInfo.EDU.get(); + } + } + +#ifndef NDEBUG + assert(EDUForSymbol.size() == MR.getSymbols().size() && + "MR symbols not fully covered by EDUs?"); + for (auto &[Sym, Flags] : MR.getSymbols()) { + assert(EDUForSymbol.count(NonOwningSymbolStringPtr(Sym)) && + "Sym in MR not covered by EDU"); + } +#endif // NDEBUG + + // 3. Use the DepGroups array to build a graph of dependencies between + // EmissionDepUnits in this finalization. We want to remove these + // intra-finalization uses, propagating dependencies on symbols outside + // this finalization. Add EDUs to the worklist. + for (auto &DG : EmittedDeps) { + + // Skip SymbolDependenceGroups with no dependencies. + if (DG.Dependencies.empty()) + continue; + + assert(EDUForSymbol.count(NonOwningSymbolStringPtr(*DG.Symbols.begin())) && + "No EDU for DG"); + auto &EDU = + *EDUForSymbol.find(NonOwningSymbolStringPtr(*DG.Symbols.begin())) + ->second; + + for (auto &[DepJD, Deps] : DG.Dependencies) { + DenseSet NewDepsForJD; + + assert(!Deps.empty() && "Dependence set for DepJD is empty"); + + if (DepJD != &TargetJD) { + // DepJD is some other JITDylib.There can't be any intra-finalization + // edges here, so just skip. + for (auto &Dep : Deps) + NewDepsForJD.insert(NonOwningSymbolStringPtr(Dep)); + } else { + // DepJD is the Target JITDylib. Check for intra-finaliztaion edges, + // skipping any and recording the intra-finalization use instead. + for (auto &Dep : Deps) { + NonOwningSymbolStringPtr NonOwningDep(Dep); + auto I = EDUForSymbol.find(NonOwningDep); + if (I == EDUForSymbol.end()) { + if (!MR.getSymbols().count(Dep)) + NewDepsForJD.insert(NonOwningDep); + continue; + } + + if (I->second != &EDU) + EDUInfos[I->second].IntraEmitUsers.insert(&EDU); + } + } + + if (!NewDepsForJD.empty()) + EDU.Dependencies[DepJD] = std::move(NewDepsForJD); + } + } + + // 4. Build the worklist. + std::deque Worklist; + for (auto &[EDU, EDUInfo] : EDUInfos) { + // If this EDU has extra-finalization dependencies and intra-finalization + // users then add it to the worklist. + if (!EDU->Dependencies.empty()) { + auto I = EDUInfos.find(EDU); + if (I != EDUInfos.end()) { + auto &EDUInfo = I->second; + if (!EDUInfo.IntraEmitUsers.empty()) { + EDUInfo.NewDeps = EDU->Dependencies; + Worklist.push_back(EDU); + } + } + } + } + + // 4. Propagate dependencies through the EDU graph. + propagateExtraEmitDeps( + Worklist, EDUInfos, + [](JITDylib::EmissionDepUnit &, JITDylib &, NonOwningSymbolStringPtr) {}); + + return EDUInfos; +} + +void ExecutionSession::IL_makeEDUReady( + std::shared_ptr EDU, + JITDylib::AsynchronousSymbolQuerySet &Queries) { + + // The symbols for this EDU are ready. + auto &JD = *EDU->JD; + + for (auto &[Sym, Flags] : EDU->Symbols) { + assert(JD.Symbols.count(SymbolStringPtr(Sym)) && + "JD does not have an entry for Sym"); + auto &Entry = JD.Symbols[SymbolStringPtr(Sym)]; + + assert(((Entry.getFlags().hasMaterializationSideEffectsOnly() && + Entry.getState() == SymbolState::Materializing) || + Entry.getState() == SymbolState::Resolved || + Entry.getState() == SymbolState::Emitted) && + "Emitting from state other than Resolved"); + + Entry.setState(SymbolState::Ready); + + auto MII = JD.MaterializingInfos.find(SymbolStringPtr(Sym)); + + // Check for pending queries. + if (MII == JD.MaterializingInfos.end()) + continue; + auto &MI = MII->second; + + for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) { + Q->notifySymbolMetRequiredState(SymbolStringPtr(Sym), Entry.getSymbol()); + if (Q->isComplete()) + Queries.insert(Q); + Q->removeQueryDependence(JD, SymbolStringPtr(Sym)); + } + + JD.MaterializingInfos.erase(MII); + } + + JD.shrinkMaterializationInfoMemory(); +} + +void ExecutionSession::IL_makeEDUEmitted( + std::shared_ptr EDU, + JITDylib::AsynchronousSymbolQuerySet &Queries) { + + // The symbols for this EDU are emitted, but not ready. + auto &JD = *EDU->JD; + + for (auto &[Sym, Flags] : EDU->Symbols) { + assert(JD.Symbols.count(SymbolStringPtr(Sym)) && + "JD does not have an entry for Sym"); + auto &Entry = JD.Symbols[SymbolStringPtr(Sym)]; + + assert(((Entry.getFlags().hasMaterializationSideEffectsOnly() && + Entry.getState() == SymbolState::Materializing) || + Entry.getState() == SymbolState::Resolved || + Entry.getState() == SymbolState::Emitted) && + "Emitting from state other than Resolved"); + + if (Entry.getState() == SymbolState::Emitted) { + // This was already emitted, so we can skip the rest of this loop. +#ifndef NDEBUG + for (auto &[Sym, Flags] : EDU->Symbols) { + assert(JD.Symbols.count(SymbolStringPtr(Sym)) && + "JD does not have an entry for Sym"); + auto &Entry = JD.Symbols[SymbolStringPtr(Sym)]; + assert(Entry.getState() == SymbolState::Emitted && + "Symbols for EDU in inconsistent state"); + assert(JD.MaterializingInfos.count(SymbolStringPtr(Sym)) && + "Emitted symbol has no MI"); + auto MI = JD.MaterializingInfos[SymbolStringPtr(Sym)]; + assert(MI.takeQueriesMeeting(SymbolState::Emitted).empty() && + "Already-emitted symbol has waiting-on-emitted queries"); + } +#endif // NDEBUG + break; + } + + Entry.setState(SymbolState::Emitted); + auto &MI = JD.MaterializingInfos[SymbolStringPtr(Sym)]; + MI.DefiningEDU = EDU; + + for (auto &Q : MI.takeQueriesMeeting(SymbolState::Emitted)) { + Q->notifySymbolMetRequiredState(SymbolStringPtr(Sym), Entry.getSymbol()); + if (Q->isComplete()) + Queries.insert(Q); + } + } + + for (auto &[DepJD, Deps] : EDU->Dependencies) { + for (auto &Dep : Deps) + DepJD->MaterializingInfos[SymbolStringPtr(Dep)].DependantEDUs.insert( + EDU.get()); } } -Expected +/// Removes the given dependence from EDU. If EDU's dependence set becomes +/// empty then this function adds an entry for it to the EDUInfos map. +/// Returns true if a new EDUInfosMap entry is added. +bool ExecutionSession::IL_removeEDUDependence(JITDylib::EmissionDepUnit &EDU, + JITDylib &DepJD, + NonOwningSymbolStringPtr DepSym, + EDUInfosMap &EDUInfos) { + assert(EDU.Dependencies.count(&DepJD) && + "JD does not appear in Dependencies of DependantEDU"); + assert(EDU.Dependencies[&DepJD].count(DepSym) && + "Symbol does not appear in Dependencies of DependantEDU"); + auto &JDDeps = EDU.Dependencies[&DepJD]; + JDDeps.erase(DepSym); + if (JDDeps.empty()) { + EDU.Dependencies.erase(&DepJD); + if (EDU.Dependencies.empty()) { + // If the dependencies set has become empty then EDU _may_ be ready + // (we won't know for sure until we've propagated the extra-emit deps). + // Create an EDUInfo for it (if it doesn't have one already) so that + // it'll be visited after propagation. + auto &DepEDUInfo = EDUInfos[&EDU]; + if (!DepEDUInfo.EDU) { + assert(EDU.JD->Symbols.count( + SymbolStringPtr(EDU.Symbols.begin()->first)) && + "Missing symbol entry for first symbol in EDU"); + auto DepEDUFirstMI = EDU.JD->MaterializingInfos.find( + SymbolStringPtr(EDU.Symbols.begin()->first)); + assert(DepEDUFirstMI != EDU.JD->MaterializingInfos.end() && + "Missing MI for first symbol in DependantEDU"); + DepEDUInfo.EDU = DepEDUFirstMI->second.DefiningEDU; + return true; + } + } + } + return false; +} + +Error ExecutionSession::makeJDClosedError(JITDylib::EmissionDepUnit &EDU, + JITDylib &ClosedJD) { + SymbolNameSet FailedSymbols; + for (auto &[Sym, Flags] : EDU.Symbols) + FailedSymbols.insert(SymbolStringPtr(Sym)); + SymbolDependenceMap BadDeps; + for (auto &Dep : EDU.Dependencies[&ClosedJD]) + BadDeps[&ClosedJD].insert(SymbolStringPtr(Dep)); + return make_error( + ClosedJD.getExecutionSession().getSymbolStringPool(), EDU.JD, + std::move(FailedSymbols), std::move(BadDeps), + ClosedJD.getName() + " is closed"); +} + +Error ExecutionSession::makeUnsatisfiedDepsError(JITDylib::EmissionDepUnit &EDU, + JITDylib &BadJD, + SymbolNameSet BadDeps) { + SymbolNameSet FailedSymbols; + for (auto &[Sym, Flags] : EDU.Symbols) + FailedSymbols.insert(SymbolStringPtr(Sym)); + SymbolDependenceMap BadDepsMap; + BadDepsMap[&BadJD] = std::move(BadDeps); + return make_error( + BadJD.getExecutionSession().getSymbolStringPool(), &BadJD, + std::move(FailedSymbols), std::move(BadDepsMap), + "dependencies removed or in error state"); +} + +Expected ExecutionSession::IL_emit(MaterializationResponsibility &MR, - WaitingOnGraph::SimplifyResult SR) { + EDUInfosMap EDUInfos) { if (MR.RT->isDefunct()) return make_error(MR.RT); @@ -2952,50 +3279,169 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR, return make_error("JITDylib " + TargetJD.getName() + " is defunct", inconvertibleErrorCode()); - #ifdef EXPENSIVE_CHECKS verifySessionState("entering ExecutionSession::IL_emit"); #endif - auto ER = G.emit(std::move(SR), - [this](JITDylib *JD, NonOwningSymbolStringPtr Name) { - return IL_getSymbolState(JD, Name); - }); + // Walk all EDUs: + // 1. Verifying that dependencies are available (not removed or in the error + // state. + // 2. Removing any dependencies that are already Ready. + // 3. Lifting any EDUs for Emitted symbols into the EDUInfos map. + // 4. Finding any dependant EDUs and lifting them into the EDUInfos map. + std::deque Worklist; + for (auto &[EDU, _] : EDUInfos) + Worklist.push_back(EDU); + + for (auto *EDU : Worklist) { + auto *EDUInfo = &EDUInfos[EDU]; + + SmallVector DepJDsToRemove; + for (auto &[DepJD, Deps] : EDU->Dependencies) { + if (DepJD->State != JITDylib::Open) + return makeJDClosedError(*EDU, *DepJD); + + SymbolNameSet BadDeps; + SmallVector DepsToRemove; + for (auto &Dep : Deps) { + auto DepEntryItr = DepJD->Symbols.find(SymbolStringPtr(Dep)); + + // If this dep has been removed or moved to the error state then add it + // to the bad deps set. We aggregate these bad deps for more + // comprehensive error messages. + if (DepEntryItr == DepJD->Symbols.end() || + DepEntryItr->second.getFlags().hasError()) { + BadDeps.insert(SymbolStringPtr(Dep)); + continue; + } - EmitQueries EQ; + // If this dep isn't emitted yet then just add it to the NewDeps set to + // be propagated. + auto &DepEntry = DepEntryItr->second; + if (DepEntry.getState() < SymbolState::Emitted) { + EDUInfo->NewDeps[DepJD].insert(Dep); + continue; + } - // Handle failed queries. - for (auto &SN : ER.Failed) - IL_collectQueries( - EQ.Failed, SN->defs(), - [](JITDylib::SymbolTableEntry &E) { - E.setFlags(E.getFlags() = JITSymbolFlags::HasError); - }, - [&](AsynchronousSymbolQuery &Q, JITDylib &JD, - NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) { - auto &FS = EQ.FailedSymsForQuery[&Q]; - if (!FS) - FS = std::make_shared(); - (*FS)[&JD].insert(SymbolStringPtr(Name)); - }); + // This dep has been emitted, so add it to the list to be removed from + // EDU. + DepsToRemove.push_back(Dep); - for (auto &FQ : EQ.Failed) - FQ->detach(); + // If Dep is Ready then there's nothing further to do. + if (DepEntry.getState() == SymbolState::Ready) { + assert(!DepJD->MaterializingInfos.count(SymbolStringPtr(Dep)) && + "Unexpected MaterializationInfo attached to ready symbol"); + continue; + } - for (auto &SN : ER.Ready) - IL_collectQueries( - EQ.Updated, SN->defs(), - [](JITDylib::SymbolTableEntry &E) { E.setState(SymbolState::Ready); }, - [](AsynchronousSymbolQuery &Q, JITDylib &JD, - NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) { - Q.notifySymbolMetRequiredState(SymbolStringPtr(Name), E.getSymbol()); - }); + // If we get here then Dep is Emitted. We need to look up its defining + // EDU and add this EDU to the defining EDU's list of users (this means + // creating an EDUInfos entry if the defining EDU doesn't have one + // already). + assert(DepJD->MaterializingInfos.count(SymbolStringPtr(Dep)) && + "Expected MaterializationInfo for emitted dependency"); + auto &DepMI = DepJD->MaterializingInfos[SymbolStringPtr(Dep)]; + assert(DepMI.DefiningEDU && + "Emitted symbol does not have a defining EDU"); + assert(DepMI.DependantEDUs.empty() && + "Already-emitted symbol has dependant EDUs?"); + auto &DepEDUInfo = EDUInfos[DepMI.DefiningEDU.get()]; + if (!DepEDUInfo.EDU) { + // No EDUInfo yet -- build initial entry, and reset the EDUInfo + // pointer, which we will have invalidated. + EDUInfo = &EDUInfos[EDU]; + DepEDUInfo.EDU = DepMI.DefiningEDU; + for (auto &[DepDepJD, DepDeps] : DepEDUInfo.EDU->Dependencies) { + if (DepDepJD == &TargetJD) { + for (auto &DepDep : DepDeps) + if (!MR.getSymbols().count(SymbolStringPtr(DepDep))) + DepEDUInfo.NewDeps[DepDepJD].insert(DepDep); + } else + DepEDUInfo.NewDeps[DepDepJD] = DepDeps; + } + } + DepEDUInfo.IntraEmitUsers.insert(EDU); + } + + // Some dependencies were removed or in an error state -- error out. + if (!BadDeps.empty()) + return makeUnsatisfiedDepsError(*EDU, *DepJD, std::move(BadDeps)); + + // Remove the emitted / ready deps from DepJD. + for (auto &Dep : DepsToRemove) + Deps.erase(Dep); + + // If there are no further deps in DepJD then flag it for removal too. + if (Deps.empty()) + DepJDsToRemove.push_back(DepJD); + } + + // Remove any JDs whose dependence sets have become empty. + for (auto &DepJD : DepJDsToRemove) { + assert(EDU->Dependencies.count(DepJD) && + "Trying to remove non-existent dep entries"); + EDU->Dependencies.erase(DepJD); + } + + // Now look for users of this EDU. + for (auto &[Sym, Flags] : EDU->Symbols) { + assert(TargetJD.Symbols.count(SymbolStringPtr(Sym)) && + "Sym not present in symbol table"); + assert((TargetJD.Symbols[SymbolStringPtr(Sym)].getState() == + SymbolState::Resolved || + TargetJD.Symbols[SymbolStringPtr(Sym)] + .getFlags() + .hasMaterializationSideEffectsOnly()) && + "Emitting symbol not in the resolved state"); + assert(!TargetJD.Symbols[SymbolStringPtr(Sym)].getFlags().hasError() && + "Symbol is already in an error state"); + + auto MII = TargetJD.MaterializingInfos.find(SymbolStringPtr(Sym)); + if (MII == TargetJD.MaterializingInfos.end() || + MII->second.DependantEDUs.empty()) + continue; + + for (auto &DependantEDU : MII->second.DependantEDUs) { + if (IL_removeEDUDependence(*DependantEDU, TargetJD, Sym, EDUInfos)) + EDUInfo = &EDUInfos[EDU]; + EDUInfo->IntraEmitUsers.insert(DependantEDU); + } + MII->second.DependantEDUs.clear(); + } + } + + Worklist.clear(); + for (auto &[EDU, EDUInfo] : EDUInfos) { + if (!EDUInfo.IntraEmitUsers.empty() && !EDU->Dependencies.empty()) { + if (EDUInfo.NewDeps.empty()) + EDUInfo.NewDeps = EDU->Dependencies; + Worklist.push_back(EDU); + } + } + + propagateExtraEmitDeps( + Worklist, EDUInfos, + [](JITDylib::EmissionDepUnit &EDU, JITDylib &JD, + NonOwningSymbolStringPtr Sym) { + JD.MaterializingInfos[SymbolStringPtr(Sym)].DependantEDUs.insert(&EDU); + }); + + JITDylib::AsynchronousSymbolQuerySet CompletedQueries; + + // Extract completed queries and lodge not-yet-ready EDUs in the + // session. + for (auto &[EDU, EDUInfo] : EDUInfos) { + if (EDU->Dependencies.empty()) + IL_makeEDUReady(std::move(EDUInfo.EDU), CompletedQueries); + else + IL_makeEDUEmitted(std::move(EDUInfo.EDU), CompletedQueries); + } #ifdef EXPENSIVE_CHECKS verifySessionState("exiting ExecutionSession::IL_emit"); #endif - return std::move(EQ); + return std::move(CompletedQueries); } Error ExecutionSession::OL_notifyEmitted( @@ -3025,127 +3471,40 @@ Error ExecutionSession::OL_notifyEmitted( } #endif // NDEBUG - std::vector> SNs; - WaitingOnGraph::ContainerElementsMap Residual; - { - auto &JDResidual = Residual[&MR.getTargetJITDylib()]; - for (auto &[Name, Flags] : MR.getSymbols()) - JDResidual.insert(NonOwningSymbolStringPtr(Name)); - - for (auto &SDG : DepGroups) { - WaitingOnGraph::ContainerElementsMap Defs; - assert(!SDG.Symbols.empty()); - auto &JDDefs = Defs[&MR.getTargetJITDylib()]; - for (auto &Def : SDG.Symbols) { - JDDefs.insert(NonOwningSymbolStringPtr(Def)); - JDResidual.erase(NonOwningSymbolStringPtr(Def)); - } - WaitingOnGraph::ContainerElementsMap Deps; - if (!SDG.Dependencies.empty()) { - for (auto &[JD, Syms] : SDG.Dependencies) { - auto &JDDeps = Deps[JD]; - for (auto &Dep : Syms) - JDDeps.insert(NonOwningSymbolStringPtr(Dep)); - } - } - SNs.push_back(std::make_unique( - std::move(Defs), std::move(Deps))); - } - if (!JDResidual.empty()) - SNs.push_back(std::make_unique( - std::move(Residual), WaitingOnGraph::ContainerElementsMap())); - } - - auto SR = WaitingOnGraph::simplify(std::move(SNs)); + auto EDUInfos = simplifyDepGroups(MR, DepGroups); LLVM_DEBUG({ dbgs() << " Simplified dependencies:\n"; - for (auto &SN : SR.superNodes()) { - - auto SortedLibs = [](WaitingOnGraph::ContainerElementsMap &C) { - std::vector JDs; - for (auto &[JD, _] : C) - JDs.push_back(JD); - llvm::sort(JDs, [](const JITDylib *LHS, const JITDylib *RHS) { - return LHS->getName() < RHS->getName(); - }); - return JDs; - }; - - auto SortedNames = [](WaitingOnGraph::ElementSet &Elems) { - std::vector Names(Elems.begin(), Elems.end()); - llvm::sort(Names, [](const NonOwningSymbolStringPtr &LHS, - const NonOwningSymbolStringPtr &RHS) { - return *LHS < *RHS; - }); - return Names; - }; - - dbgs() << " Defs: {"; - for (auto *JD : SortedLibs(SN->defs())) { - dbgs() << " (" << JD->getName() << ", ["; - for (auto &Sym : SortedNames(SN->defs()[JD])) - dbgs() << " " << Sym; - dbgs() << " ])"; + for (auto &[EDU, EDUInfo] : EDUInfos) { + dbgs() << " Symbols: { "; + for (auto &[Sym, Flags] : EDU->Symbols) + dbgs() << Sym << " "; + dbgs() << "}, Dependencies: { "; + for (auto &[DepJD, Deps] : EDU->Dependencies) { + dbgs() << "(" << DepJD->getName() << ", { "; + for (auto &Dep : Deps) + dbgs() << Dep << " "; + dbgs() << "}) "; } - dbgs() << " }, Deps: {"; - for (auto *JD : SortedLibs(SN->deps())) { - dbgs() << " (" << JD->getName() << ", ["; - for (auto &Sym : SortedNames(SN->deps()[JD])) - dbgs() << " " << Sym; - dbgs() << " ])"; - } - dbgs() << " }\n"; + dbgs() << "}\n"; } }); - auto EmitQueries = - runSessionLocked([&]() { return IL_emit(MR, std::move(SR)); }); - // On error bail out. - if (!EmitQueries) - return EmitQueries.takeError(); + auto CompletedQueries = + runSessionLocked([&]() { return IL_emit(MR, EDUInfos); }); - // Otherwise notify failed queries, and any updated queries that have been - // completed. + // On error bail out. + if (!CompletedQueries) + return CompletedQueries.takeError(); - // FIXME: Get rid of error return from notifyEmitted. - SymbolDependenceMap BadDeps; - { - for (auto &FQ : EmitQueries->Failed) { - FQ->detach(); - assert(EmitQueries->FailedSymsForQuery.count(FQ.get()) && - "Missing failed symbols for query"); - auto FailedSyms = std::move(EmitQueries->FailedSymsForQuery[FQ.get()]); - for (auto &[JD, Syms] : *FailedSyms) { - auto &BadDepsForJD = BadDeps[JD]; - for (auto &Sym : Syms) - BadDepsForJD.insert(Sym); - } - FQ->handleFailed(make_error(getSymbolStringPool(), - std::move(FailedSyms))); - } - } + MR.SymbolFlags.clear(); - for (auto &UQ : EmitQueries->Updated) - if (UQ->isComplete()) - UQ->handleComplete(*this); - - // If there are any bad dependencies then return an error. - if (!BadDeps.empty()) { - SymbolNameSet BadNames; - // Note: The name set calculated here is bogus: it includes all symbols in - // the MR, not just the ones that failed. We want to remove the error - // return path from notifyEmitted anyway, so this is just a brief - // placeholder to maintain (roughly) the current error behavior. - for (auto &[Name, Flags] : MR.getSymbols()) - BadNames.insert(Name); - MR.SymbolFlags.clear(); - return make_error( - getSymbolStringPool(), &MR.getTargetJITDylib(), std::move(BadNames), - std::move(BadDeps), "dependencies removed or in error state"); + // Otherwise notify all the completed queries. + for (auto &Q : *CompletedQueries) { + assert(Q->isComplete() && "Q is not complete"); + Q->handleComplete(*this); } - MR.SymbolFlags.clear(); return Error::success(); } @@ -3176,48 +3535,158 @@ ExecutionSession::IL_failSymbols(JITDylib &JD, #endif JITDylib::AsynchronousSymbolQuerySet FailedQueries; - auto Fail = [&](JITDylib *FailJD, NonOwningSymbolStringPtr FailSym) { - auto I = FailJD->Symbols.find_as(FailSym); - assert(I != FailJD->Symbols.end()); - I->second.setFlags(I->second.getFlags() | JITSymbolFlags::HasError); - auto J = FailJD->MaterializingInfos.find_as(FailSym); - if (J != FailJD->MaterializingInfos.end()) { - for (auto &Q : J->second.takeAllPendingQueries()) - FailedQueries.insert(std::move(Q)); - FailJD->MaterializingInfos.erase(J); + auto FailedSymbolsMap = std::make_shared(); + auto ExtractFailedQueries = [&](JITDylib::MaterializingInfo &MI) { + JITDylib::AsynchronousSymbolQueryList ToDetach; + for (auto &Q : MI.pendingQueries()) { + // Add the query to the list to be failed and detach it. + FailedQueries.insert(Q); + ToDetach.push_back(Q); } + for (auto &Q : ToDetach) + Q->detach(); + assert(!MI.hasQueriesPending() && "Queries still pending after detach"); }; - auto FailedSymbolsMap = std::make_shared(); - - { - auto &FailedSymsForJD = (*FailedSymbolsMap)[&JD]; - for (auto &Sym : SymbolsToFail) { - FailedSymsForJD.insert(Sym); - Fail(&JD, NonOwningSymbolStringPtr(Sym)); + for (auto &Name : SymbolsToFail) { + (*FailedSymbolsMap)[&JD].insert(Name); + + // Look up the symbol to fail. + auto SymI = JD.Symbols.find(Name); + + // FIXME: Revisit this. We should be able to assert sequencing between + // ResourceTracker removal and symbol failure. + // + // It's possible that this symbol has already been removed, e.g. if a + // materialization failure happens concurrently with a ResourceTracker or + // JITDylib removal. In that case we can safely skip this symbol and + // continue. + if (SymI == JD.Symbols.end()) + continue; + auto &Sym = SymI->second; + + // If the symbol is already in the error state then we must have visited + // it earlier. + if (Sym.getFlags().hasError()) { + assert(!JD.MaterializingInfos.count(Name) && + "Symbol in error state still has MaterializingInfo"); + continue; } - } - WaitingOnGraph::ContainerElementsMap ToFail; - auto &JDToFail = ToFail[&JD]; - for (auto &Sym : SymbolsToFail) - JDToFail.insert(NonOwningSymbolStringPtr(Sym)); + // Move the symbol into the error state. + Sym.setFlags(Sym.getFlags() | JITSymbolFlags::HasError); + + // FIXME: Come up with a sane mapping of state to + // presence-of-MaterializingInfo so that we can assert presence / absence + // here, rather than testing it. + auto MII = JD.MaterializingInfos.find(Name); + if (MII == JD.MaterializingInfos.end()) + continue; + + auto &MI = MII->second; + + // Collect queries to be failed for this MII. + ExtractFailedQueries(MI); + + if (MI.DefiningEDU) { + // If there is a DefiningEDU for this symbol then remove this + // symbol from it. + assert(MI.DependantEDUs.empty() && + "Symbol with DefiningEDU should not have DependantEDUs"); + assert(Sym.getState() >= SymbolState::Emitted && + "Symbol has EDU, should have been emitted"); + assert(MI.DefiningEDU->Symbols.count(NonOwningSymbolStringPtr(Name)) && + "Symbol does not appear in its DefiningEDU"); + MI.DefiningEDU->Symbols.erase(NonOwningSymbolStringPtr(Name)); + + // Remove this EDU from the dependants lists of its dependencies. + for (auto &[DepJD, DepSyms] : MI.DefiningEDU->Dependencies) { + for (auto DepSym : DepSyms) { + assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) && + "DepSym not in DepJD"); + assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) && + "DepSym has not MaterializingInfo"); + auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)]; + assert(SymMI.DependantEDUs.count(MI.DefiningEDU.get()) && + "DefiningEDU missing from DependantEDUs list of dependency"); + SymMI.DependantEDUs.erase(MI.DefiningEDU.get()); + } + } - auto FailedSNs = G.fail(ToFail); + MI.DefiningEDU = nullptr; + } else { + // Otherwise if there are any EDUs waiting on this symbol then move + // those symbols to the error state too, and deregister them from the + // symbols that they depend on. + // Note: We use a copy of DependantEDUs here since we'll be removing + // from the original set as we go. + for (auto &DependantEDU : MI.DependantEDUs) { + + // Remove DependantEDU from all of its users DependantEDUs lists. + for (auto &[DepJD, DepSyms] : DependantEDU->Dependencies) { + for (auto DepSym : DepSyms) { + // Skip self-reference to avoid invalidating the MI.DependantEDUs + // map. We'll clear this later. + if (DepJD == &JD && DepSym == Name) + continue; + assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) && + "DepSym not in DepJD?"); + assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) && + "DependantEDU not registered with symbol it depends on"); + auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)]; + assert(SymMI.DependantEDUs.count(DependantEDU) && + "DependantEDU missing from DependantEDUs list"); + SymMI.DependantEDUs.erase(DependantEDU); + } + } + + // Move any symbols defined by DependantEDU into the error state and + // fail any queries waiting on them. + auto &DepJD = *DependantEDU->JD; + auto DepEDUSymbols = std::move(DependantEDU->Symbols); + for (auto &[DepName, Flags] : DepEDUSymbols) { + auto DepSymItr = DepJD.Symbols.find(SymbolStringPtr(DepName)); + assert(DepSymItr != DepJD.Symbols.end() && + "Symbol not present in table"); + auto &DepSym = DepSymItr->second; + + assert(DepSym.getState() >= SymbolState::Emitted && + "Symbol has EDU, should have been emitted"); + assert(!DepSym.getFlags().hasError() && + "Symbol is already in the error state?"); + DepSym.setFlags(DepSym.getFlags() | JITSymbolFlags::HasError); + (*FailedSymbolsMap)[&DepJD].insert(SymbolStringPtr(DepName)); + + // This symbol has a defining EDU so its MaterializingInfo object must + // exist. + auto DepMIItr = + DepJD.MaterializingInfos.find(SymbolStringPtr(DepName)); + assert(DepMIItr != DepJD.MaterializingInfos.end() && + "Symbol has defining EDU but not MaterializingInfo"); + auto &DepMI = DepMIItr->second; + assert(DepMI.DefiningEDU.get() == DependantEDU && + "Bad EDU dependence edge"); + assert(DepMI.DependantEDUs.empty() && + "Symbol was emitted, should not have any DependantEDUs"); + ExtractFailedQueries(DepMI); + DepJD.MaterializingInfos.erase(SymbolStringPtr(DepName)); + } - for (auto &SN : FailedSNs) { - for (auto &[FailJD, Defs] : SN->defs()) { - auto &FailedSymsForFailJD = (*FailedSymbolsMap)[FailJD]; - for (auto &Def : Defs) { - FailedSymsForFailJD.insert(SymbolStringPtr(Def)); - Fail(FailJD, Def); + DepJD.shrinkMaterializationInfoMemory(); } + + MI.DependantEDUs.clear(); } + + assert(!MI.DefiningEDU && "DefiningEDU should have been reset"); + assert(MI.DependantEDUs.empty() && + "DependantEDUs should have been removed above"); + assert(!MI.hasQueriesPending() && + "Can not delete MaterializingInfo with queries pending"); + JD.MaterializingInfos.erase(Name); } - // Detach all failed queries. - for (auto &Q : FailedQueries) - Q->detach(); + JD.shrinkMaterializationInfoMemory(); #ifdef EXPENSIVE_CHECKS verifySessionState("exiting ExecutionSession::IL_failSymbols"); @@ -3252,11 +3721,9 @@ void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) { return IL_failSymbols(MR.getTargetJITDylib(), SymbolsToFail); }); - for (auto &Q : FailedQueries) { - Q->detach(); + for (auto &Q : FailedQueries) Q->handleFailed( make_error(getSymbolStringPool(), FailedSymbols)); - } } Error ExecutionSession::OL_replace(MaterializationResponsibility &MR, diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp index 893523ced8651..dec1df7da2f4a 100644 --- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp +++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp @@ -448,7 +448,7 @@ Error SimpleRemoteEPC::handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes) { if (const char *ErrMsg = WFR.getOutOfBandError()) return make_error(ErrMsg, inconvertibleErrorCode()); - orc::shared::detail::SPSSerializableError Info; + detail::SPSSerializableError Info; SPSInputBuffer IB(WFR.data(), WFR.size()); if (!SPSArgList::deserialize(IB, Info)) return make_error("Could not deserialize hangup info", diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/LocalDependencyPropagation.s b/llvm/test/ExecutionEngine/JITLink/x86-64/LocalDependencyPropagation.s index 529395822f5f7..83d71cdf6fc83 100644 --- a/llvm/test/ExecutionEngine/JITLink/x86-64/LocalDependencyPropagation.s +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/LocalDependencyPropagation.s @@ -16,7 +16,8 @@ # CHECK-DAG: Symbols: { _foo }, Dependencies: { (main, { _external_func }) } # CHECK-DAG: Symbols: { _baz }, Dependencies: { (main, { _foo }) } # CHECK: Simplified dependencies: -# CHECK-DAG: Defs: { (main, [ _baz _foo ]) }, Deps: { (main, [ _external_func ]) } +# CHECK-DAG: Symbols: { _foo }, Dependencies: { (main, { _external_func }) } +# CHECK-DAG: Symbols: { _baz }, Dependencies: { (main, { _external_func }) } .section __TEXT,__text,regular,pure_instructions diff --git a/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt b/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt index 7e3ebc88cea63..a2bbb10039c9a 100644 --- a/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt @@ -42,7 +42,6 @@ add_llvm_unittest(OrcJITTests SymbolStringPoolTest.cpp TaskDispatchTest.cpp ThreadSafeModuleTest.cpp - WaitingOnGraphTest.cpp WrapperFunctionUtilsTest.cpp JITLinkRedirectionManagerTest.cpp ReOptimizeLayerTest.cpp diff --git a/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp b/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp deleted file mode 100644 index b988a78a3783a..0000000000000 --- a/llvm/unittests/ExecutionEngine/Orc/WaitingOnGraphTest.cpp +++ /dev/null @@ -1,553 +0,0 @@ -//===--------- WaitingOnGraphTest.cpp - Test WaitingOnGraph APIs ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/ExecutionEngine/Orc/WaitingOnGraph.h" -#include "gtest/gtest.h" - -namespace llvm::orc::detail { - -class WaitingOnGraphTest : public testing::Test { -public: - using TestGraph = WaitingOnGraph; - -protected: - using SuperNode = TestGraph::SuperNode; - using SuperNodeBuilder = TestGraph::SuperNodeBuilder; - using ContainerElementsMap = TestGraph::ContainerElementsMap; - using ElemToSuperNodeMap = TestGraph::ElemToSuperNodeMap; - using SimplifyResult = TestGraph::SimplifyResult; - using EmitResult = TestGraph::EmitResult; - - static const ContainerElementsMap &getDefs(SuperNode &SN) { return SN.Defs; } - - static const ContainerElementsMap &getDeps(SuperNode &SN) { return SN.Deps; } - - static std::vector> &getSNs(SimplifyResult &SR) { - return SR.SNs; - } - - static ElemToSuperNodeMap &getElemToSN(SimplifyResult &SR) { - return SR.ElemToSN; - } - - static std::vector> &getPendingSNs(TestGraph &G) { - return G.PendingSNs; - } - - static ContainerElementsMap merge(ContainerElementsMap M1, - const ContainerElementsMap &M2) { - ContainerElementsMap Result = std::move(M1); - for (auto &[Container, Elems] : M2) - Result[Container].insert(Elems.begin(), Elems.end()); - return Result; - } - - ContainerElementsMap - collapseDefs(std::vector> &SNs, - bool DepsMustMatch = true) { - if (SNs.empty()) - return ContainerElementsMap(); - - ContainerElementsMap Result = SNs[0]->defs(); - const ContainerElementsMap &Deps = SNs[0]->deps(); - - for (size_t I = 1; I != SNs.size(); ++I) { - assert(!DepsMustMatch || SNs[I]->deps() == Deps); - Result = merge(std::move(Result), SNs[I]->defs()); - } - - return Result; - } - - EmitResult integrate(EmitResult ER) { - for (auto &SN : ER.Ready) - for (auto &[Container, Elems] : SN->defs()) - Ready[Container].insert(Elems.begin(), Elems.end()); - for (auto &SN : ER.Failed) - for (auto &[Container, Elems] : SN->defs()) - Failed[Container].insert(Elems.begin(), Elems.end()); - return ER; - } - - EmitResult emit(SimplifyResult SR) { - return integrate(G.emit(std::move(SR), GetExternalState)); - } - - TestGraph G; - ContainerElementsMap Ready; - ContainerElementsMap Failed; - - class ExternalStateGetter { - public: - ExternalStateGetter(WaitingOnGraphTest &T) : T(T) {} - TestGraph::ExternalState operator()(TestGraph::ContainerId C, - TestGraph::ElementId E) { - { - auto I = T.Failed.find(C); - if (I != T.Failed.end()) - if (I->second.count(E)) - return TestGraph::ExternalState::Failed; - } - - { - auto I = T.Ready.find(C); - if (I != T.Ready.end()) - if (I->second.count(E)) - return TestGraph::ExternalState::Ready; - } - - return TestGraph::ExternalState::None; - } - - private: - WaitingOnGraphTest &T; - }; - - ExternalStateGetter GetExternalState{*this}; -}; - -} // namespace llvm::orc::detail - -using namespace llvm; -using namespace llvm::orc; -using namespace llvm::orc::detail; - -TEST_F(WaitingOnGraphTest, ConstructAndDestroyEmpty) { - // Nothing to do here -- we're just testing construction and destruction - // of the WaitingOnGraphTest::G member. -} - -TEST_F(WaitingOnGraphTest, Build_TrivialSingleSuperNode) { - // Add one set of trivial defs and empty deps to the builder, make sure that - // they're passed through to the resulting super-node. - SuperNodeBuilder B; - ContainerElementsMap Defs({{0, {0}}}); - ContainerElementsMap Deps; - B.add(Defs, Deps); - auto SNs = B.takeSuperNodes(); - EXPECT_EQ(SNs.size(), 1U); - EXPECT_EQ(getDefs(*SNs[0]), Defs); - EXPECT_EQ(getDeps(*SNs[0]), Deps); -} - -TEST_F(WaitingOnGraphTest, Build_EmptyDefs) { - // Adding empty def sets is ok, but should not result in creation of a - // SuperNode. - SuperNodeBuilder B; - ContainerElementsMap Empty; - B.add(Empty, Empty); - auto SNs = B.takeSuperNodes(); - EXPECT_TRUE(SNs.empty()); -} - -TEST_F(WaitingOnGraphTest, Build_NonTrivialSingleSuperNode) { - // Add one non-trivwial set of defs and deps. Make sure that they're passed - // through to the resulting super-node. - SuperNodeBuilder B; - ContainerElementsMap Defs({{0, {0, 1, 2}}}); - ContainerElementsMap Deps({{1, {3, 4, 5}}}); - B.add(Defs, Deps); - auto SNs = B.takeSuperNodes(); - EXPECT_EQ(SNs.size(), 1U); - EXPECT_EQ(getDefs(*SNs[0]), Defs); - EXPECT_EQ(getDeps(*SNs[0]), Deps); -} - -TEST_F(WaitingOnGraphTest, Build_CoalesceEmptyDeps) { - // Add two trivial defs both with empty deps to the builder. Check that - // they're coalesced into a single super-node. - SuperNodeBuilder B; - ContainerElementsMap Defs1({{0, {0}}}); - ContainerElementsMap Defs2({{0, {1}}}); - ContainerElementsMap Deps; - B.add(Defs1, Deps); - B.add(Defs2, Deps); - auto SNs = B.takeSuperNodes(); - EXPECT_EQ(SNs.size(), 1U); - EXPECT_EQ(getDefs(*SNs[0]), merge(Defs1, Defs2)); - EXPECT_EQ(getDeps(*SNs[0]), Deps); -} - -TEST_F(WaitingOnGraphTest, Build_CoalesceNonEmptyDeps) { - // Add two sets trivial of trivial defs with empty deps to the builder. Check - // that the two coalesce into a single super node. - SuperNodeBuilder B; - ContainerElementsMap Defs1({{0, {0}}}); - ContainerElementsMap Defs2({{0, {1}}}); - ContainerElementsMap Deps({{1, {1}}}); - B.add(Defs1, Deps); - B.add(Defs2, Deps); - auto SNs = B.takeSuperNodes(); - EXPECT_EQ(SNs.size(), 1U); - EXPECT_EQ(getDefs(*SNs[0]), merge(Defs1, Defs2)); - EXPECT_EQ(getDeps(*SNs[0]), Deps); -} - -TEST_F(WaitingOnGraphTest, Build_CoalesceInterleaved) { - // Add multiple sets of defs, some with the same dep sets. Check that nodes - // are still coalesced as expected. - SuperNodeBuilder B; - - ContainerElementsMap DefsA1({{0, {0}}}); - ContainerElementsMap DefsA2({{0, {1}}}); - ContainerElementsMap DefsB1({{1, {0}}}); - ContainerElementsMap DefsB2({{1, {1}}}); - ContainerElementsMap DepsA({{2, {0}}, {3, {0}}}); - ContainerElementsMap DepsB({{4, {0}}, {5, {0}}}); - B.add(DefsA1, DepsA); - B.add(DefsB1, DepsB); - B.add(DefsA2, DepsA); - B.add(DefsB2, DepsB); - auto SNs = B.takeSuperNodes(); - EXPECT_EQ(SNs.size(), 2U); - EXPECT_EQ(getDefs(*SNs[0]), merge(DefsA1, DefsA2)); - EXPECT_EQ(getDeps(*SNs[0]), DepsA); - EXPECT_EQ(getDefs(*SNs[1]), merge(DefsB1, DefsB2)); - EXPECT_EQ(getDeps(*SNs[1]), DepsB); -} - -TEST_F(WaitingOnGraphTest, Build_SelfDepRemoval) { - // Add multiple sets of defs, some with the same dep sets. Check that nodes - // are still coalesced as expected. - SuperNodeBuilder B; - ContainerElementsMap Defs({{0, {0, 1}}}); - ContainerElementsMap Deps({{0, {1}}}); - ContainerElementsMap Empty; - B.add(Defs, Deps); - auto SNs = B.takeSuperNodes(); - EXPECT_EQ(SNs.size(), 1U); - EXPECT_EQ(getDefs(*SNs[0]), Defs); - EXPECT_EQ(getDeps(*SNs[0]), Empty); -} - -TEST_F(WaitingOnGraphTest, Simplification_EmptySimplification) { - auto SR = TestGraph::simplify({}); - auto &SNs = getSNs(SR); - EXPECT_EQ(SNs.size(), 0U); - EXPECT_EQ(getElemToSN(SR), ElemToSuperNodeMap()); -} - -TEST_F(WaitingOnGraphTest, Simplification_TrivialSingleSuperNode) { - // Test trivial call to simplify. - SuperNodeBuilder B; - ContainerElementsMap Defs({{0, {0}}}); - ContainerElementsMap Deps({{0, {0}}}); - B.add(Defs, Deps); - auto SR = TestGraph::simplify(B.takeSuperNodes()); - ContainerElementsMap Empty; - - // Check SNs. - auto &SNs = getSNs(SR); - EXPECT_EQ(SNs.size(), 1U); - EXPECT_EQ(getDefs(*SNs.at(0)), Defs); - EXPECT_EQ(getDeps(*SNs.at(0)), Empty); - - // Check ElemToSNs. - ElemToSuperNodeMap ExpectedElemToSNs; - ExpectedElemToSNs[0][0] = SNs[0].get(); - EXPECT_EQ(getElemToSN(SR), ExpectedElemToSNs); -} - -TEST_F(WaitingOnGraphTest, Simplification_SimplifySingleContainerSimpleCycle) { - // Test trivial simplification call with two nodes and one internal - // dependence cycle within a single container: - // N0: (0, 0) -> (0, 1) - // N1: (0, 1) -> (0, 0) - // We expect intra-simplify cycle elimination to clear both dependence sets, - // and coalescing to join them into one supernode covering both defs. - SuperNodeBuilder B; - ContainerElementsMap Defs0({{0, {0}}}); - ContainerElementsMap Deps0({{0, {1}}}); - B.add(Defs0, Deps0); - ContainerElementsMap Defs1({{0, {1}}}); - ContainerElementsMap Deps1({{0, {0}}}); - B.add(Defs1, Deps1); - auto SR = TestGraph::simplify(B.takeSuperNodes()); - - // Check SNs. - auto &SNs = getSNs(SR); - ContainerElementsMap Empty; - EXPECT_EQ(SNs.size(), 1U); - EXPECT_EQ(getDefs(*SNs.at(0)), merge(Defs0, Defs1)); - EXPECT_EQ(getDeps(*SNs.at(0)), Empty); - - // Check ElemToSNs. - ElemToSuperNodeMap ExpectedElemToSNs; - ExpectedElemToSNs[0][0] = SNs[0].get(); - ExpectedElemToSNs[0][1] = SNs[0].get(); - - EXPECT_EQ(getElemToSN(SR), ExpectedElemToSNs); -} - -TEST_F(WaitingOnGraphTest, - Simplification_SimplifySingleContainerNElementCycle) { - // Test trivial simplification call with M nodes and one internal - // dependence cycle within a single container: - // N0: (0, 0) -> (0, 1) - // N1: (0, 1) -> (0, 2) - // ... - // NM: (0, M) -> (0, 0) - // We expect intra-simplify cycle elimination to clear all dependence sets, - // and coalescing to join them into one supernode covering all defs. - SuperNodeBuilder B; - constexpr size_t M = 10; - for (size_t I = 0; I != M; ++I) { - ContainerElementsMap Defs({{0, {I}}}); - ContainerElementsMap Deps({{0, {(I + 1) % M}}}); - B.add(Defs, Deps); - } - auto InitSNs = B.takeSuperNodes(); - EXPECT_EQ(InitSNs.size(), M); - - auto SR = TestGraph::simplify(std::move(InitSNs)); - - // Check SNs. - auto &SNs = getSNs(SR); - ContainerElementsMap ExpectedDefs; - for (size_t I = 0; I != M; ++I) - ExpectedDefs[0].insert(I); - ContainerElementsMap Empty; - EXPECT_EQ(SNs.size(), 1U); - EXPECT_EQ(getDefs(*SNs.at(0)), ExpectedDefs); - EXPECT_EQ(getDeps(*SNs.at(0)), Empty); - - // Check ElemToSNs. - ElemToSuperNodeMap ExpectedElemToSNs; - for (size_t I = 0; I != M; ++I) - ExpectedElemToSNs[0][I] = SNs[0].get(); - - EXPECT_EQ(getElemToSN(SR), ExpectedElemToSNs); -} - -TEST_F(WaitingOnGraphTest, Simplification_SimplifyIntraSimplifyPropagateDeps) { - // Test trivial simplification call with two nodes and one internal - // dependence cycle within a single container: - // N0: (0, 0) -> (0, {1, 2}) - // N1: (0, 1) -> (0, {3}) - // We expect intra-simplify cycle elimination to replace the dependence of - // (0, 0) on (0, 1) with a dependence on (0, 3) instead. - SuperNodeBuilder B; - ContainerElementsMap Defs0({{0, {0}}}); - ContainerElementsMap Deps0({{0, {1, 2}}}); - B.add(Defs0, Deps0); - ContainerElementsMap Defs1({{0, {1}}}); - ContainerElementsMap Deps1({{0, {3}}}); - B.add(Defs1, Deps1); - auto SR = TestGraph::simplify(B.takeSuperNodes()); - - // Check SNs. - auto &SNs = getSNs(SR); - EXPECT_EQ(SNs.size(), 2U); - - // ContainerElemenstMap ExpectedDefs0({{0, {0}}}); - // ContainerElemenstMap ExpectedDeps0({{0, {1, 3}}}); - EXPECT_EQ(getDefs(*SNs.at(0)), ContainerElementsMap({{0, {0}}})); - EXPECT_EQ(getDeps(*SNs.at(0)), ContainerElementsMap({{0, {2, 3}}})); - - EXPECT_EQ(getDefs(*SNs.at(1)), ContainerElementsMap({{0, {1}}})); - EXPECT_EQ(getDeps(*SNs.at(1)), ContainerElementsMap({{0, {3}}})); - - // Check ElemToSNs. - ElemToSuperNodeMap ExpectedElemToSNs; - ExpectedElemToSNs[0][0] = SNs[0].get(); - ExpectedElemToSNs[0][1] = SNs[1].get(); - - EXPECT_EQ(getElemToSN(SR), ExpectedElemToSNs); -} - -TEST_F(WaitingOnGraphTest, Emit_EmptyEmit) { - // Check that empty emits work as expected. - auto ER = G.emit(TestGraph::simplify({}), GetExternalState); - - EXPECT_EQ(ER.Ready.size(), 0U); - EXPECT_EQ(ER.Failed.size(), 0U); -} - -TEST_F(WaitingOnGraphTest, Emit_TrivialSingleNode) { - // Check that emitting a single node behaves as expected. - SuperNodeBuilder B; - ContainerElementsMap Defs({{0, {0}}}); - B.add(Defs, ContainerElementsMap()); - auto ER = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(collapseDefs(ER.Ready), Defs); - EXPECT_EQ(ER.Failed.size(), 0U); -} - -TEST_F(WaitingOnGraphTest, Emit_TrivialSequence) { - // Perform a sequence of two emits where the second emit depends on the - // first. Check that nodes become ready after each emit. - SuperNodeBuilder B; - ContainerElementsMap Defs0({{0, {0}}}); - ContainerElementsMap Empty; - B.add(Defs0, Empty); - auto ER0 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(collapseDefs(ER0.Ready), Defs0); - EXPECT_EQ(ER0.Failed.size(), 0U); - - ContainerElementsMap Defs1({{0, {1}}}); - ContainerElementsMap Deps1({{0, {0}}}); - B.add(Defs1, Deps1); - auto ER1 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(collapseDefs(ER1.Ready), Defs1); - EXPECT_EQ(ER1.Failed.size(), 0U); -} - -TEST_F(WaitingOnGraphTest, Emit_TrivialReverseSequence) { - // Perform a sequence of two emits where the first emit depends on the - // second. Check that both nodes become ready after the second emit. - SuperNodeBuilder B; - ContainerElementsMap Defs0({{0, {0}}}); - ContainerElementsMap Deps0({{0, {1}}}); - B.add(Defs0, Deps0); - auto ER0 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(ER0.Ready.size(), 0U); - EXPECT_EQ(ER0.Failed.size(), 0U); - - ContainerElementsMap Defs1({{0, {1}}}); - ContainerElementsMap Empty; - B.add(Defs1, Empty); - auto ER1 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(collapseDefs(ER1.Ready), merge(Defs0, Defs1)); - EXPECT_EQ(ER1.Failed.size(), 0U); -} - -TEST_F(WaitingOnGraphTest, Emit_Coalescing) { - SuperNodeBuilder B; - ContainerElementsMap Defs0({{0, {0}}}); - ContainerElementsMap Deps0({{1, {0}}}); - B.add(Defs0, Deps0); - auto ER0 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(ER0.Ready.size(), 0U); - EXPECT_EQ(ER0.Failed.size(), 0U); - - ContainerElementsMap Defs1({{0, {1}}}); - ContainerElementsMap Deps1({{1, {0}}}); - B.add(Defs1, Deps1); - auto ER1 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(ER1.Ready.size(), 0U); - EXPECT_EQ(ER1.Failed.size(), 0U); - - // Check that after emitting two nodes with the same dep set we have only one - // pending supernode whose defs are the union of the defs in the two emits. - auto &PendingSNs = getPendingSNs(G); - EXPECT_EQ(PendingSNs.size(), 1U); - EXPECT_EQ(getDefs(*PendingSNs.at(0)), merge(Defs0, Defs1)); - - ContainerElementsMap Defs2({{1, {0}}}); - ContainerElementsMap Empty; - B.add(Defs2, Empty); - auto ER2 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(collapseDefs(ER2.Ready), merge(merge(Defs0, Defs1), Defs2)); - EXPECT_EQ(ER2.Failed.size(), 0U); -} - -TEST_F(WaitingOnGraphTest, Emit_ZigZag) { - // Perform a sequence of four emits, where the first three contain a zig-zag - // pattern: - // 1. (0, 0) -> (0, 1) - // 2. (0, 2) -> (0, 3) - // ^ -- At this point we expect two pending supernodes. - // 3. (0, 1) -> (0, 2) - // ^ -- Resolution of (0, 1) should cause all three emitted nodes to coalsce - // into one supernode defining (0, {1, 2, 3}). - // 4. (0, 3) - // ^ -- Should cause all four nodes to become ready. - - SuperNodeBuilder B; - ContainerElementsMap Defs0({{0, {0}}}); - ContainerElementsMap Deps0({{0, {1}}}); - B.add(Defs0, Deps0); - auto ER0 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(ER0.Ready.size(), 0U); - EXPECT_EQ(ER0.Failed.size(), 0U); - - ContainerElementsMap Defs1({{0, {2}}}); - ContainerElementsMap Deps1({{0, {3}}}); - B.add(Defs1, Deps1); - auto ER1 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(ER1.Ready.size(), 0U); - EXPECT_EQ(ER1.Failed.size(), 0U); - - // Check that after emitting two nodes with the same dep set we have only one - // pending supernode whose defs are the union of the defs in the two emits. - auto &PendingSNs = getPendingSNs(G); - EXPECT_EQ(PendingSNs.size(), 2U); - EXPECT_EQ(getDefs(*PendingSNs.at(0)), Defs0); - EXPECT_EQ(getDeps(*PendingSNs.at(0)), Deps0); - EXPECT_EQ(getDefs(*PendingSNs.at(1)), Defs1); - EXPECT_EQ(getDeps(*PendingSNs.at(1)), Deps1); - - ContainerElementsMap Defs2({{0, {1}}}); - ContainerElementsMap Deps2({{0, {2}}}); - B.add(Defs2, Deps2); - auto ER2 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(ER2.Ready.size(), 0U); - EXPECT_EQ(ER2.Failed.size(), 0U); - - // Check that after emitting the third node we've coalesced all three. - EXPECT_EQ(PendingSNs.size(), 1U); - EXPECT_EQ(getDefs(*PendingSNs.at(0)), merge(merge(Defs0, Defs1), Defs2)); - EXPECT_EQ(getDeps(*PendingSNs.at(0)), Deps1); - - ContainerElementsMap Defs3({{0, {3}}}); - ContainerElementsMap Empty; - B.add(Defs3, Empty); - auto ER3 = emit(TestGraph::simplify(B.takeSuperNodes())); - - EXPECT_EQ(collapseDefs(ER3.Ready), - merge(merge(merge(Defs0, Defs1), Defs2), Defs3)); - EXPECT_EQ(ER2.Failed.size(), 0U); - EXPECT_TRUE(PendingSNs.empty()); -} - -TEST_F(WaitingOnGraphTest, Fail_Empty) { - // Check that failing an empty set is a no-op. - auto FR = G.fail(ContainerElementsMap()); - EXPECT_EQ(FR.size(), 0U); -} - -TEST_F(WaitingOnGraphTest, Fail_Single) { - // Check that failing a set with no existing dependencies works. - auto FR = G.fail({{0, {0}}}); - EXPECT_EQ(FR.size(), 0U); -} - -TEST_F(WaitingOnGraphTest, Fail_EmitDependenceOnFailure) { - // Check that emitted nodes that directly depend on failed nodes also fail. - Failed = {{0, {0}}}; - - SuperNodeBuilder B; - ContainerElementsMap Defs({{0, {1}}}); - ContainerElementsMap Deps({{0, {0}}}); - B.add(Defs, Deps); - auto ER = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(ER.Ready.size(), 0U); - EXPECT_EQ(collapseDefs(ER.Failed, false), Defs); -} - -TEST_F(WaitingOnGraphTest, Fail_ZigZag) { - // Check that if an emit introduces a transitive dependence of a failed - // node, then all nodes that depend on the failed node are also failed. - SuperNodeBuilder B; - - ContainerElementsMap Defs0({{0, {0}}}); - ContainerElementsMap Deps0({{0, {1}}}); - B.add(Defs0, Deps0); - auto ER0 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(ER0.Ready.size(), 0U); - EXPECT_EQ(ER0.Failed.size(), 0U); - - Failed = {{0, {2}}}; - - ContainerElementsMap Defs1({{0, {1}}}); - ContainerElementsMap Deps1({{0, {2}}}); - B.add(Defs1, Deps1); - auto ER1 = emit(TestGraph::simplify(B.takeSuperNodes())); - EXPECT_EQ(ER1.Ready.size(), 0U); - EXPECT_EQ(collapseDefs(ER1.Failed, false), merge(Defs0, Defs1)); -} From cc88a3b8df21a04b688fccbdf239bd8329283100 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 20 Oct 2025 16:33:35 -0700 Subject: [PATCH 37/38] [bazel] Add dep for MLIR 5a112de (#164331) --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index a4724b91f0750..b5f14ff6f9b71 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1280,6 +1280,7 @@ cc_binary( deps = [ ":CAPIIR", ":CAPILLVM", + ":CAPITarget", ":MLIRBindingsPythonNanobindHeadersAndDeps", "@nanobind", ], From c9124a1b0853899bdd22d267124551ec4d720a23 Mon Sep 17 00:00:00 2001 From: jimingham Date: Mon, 20 Oct 2025 16:46:25 -0700 Subject: [PATCH 38/38] Fix a potential use-after-free in StopInfoBreakpoint. (#163471) StopInfoBreakpoint keeps a BreakpointLocationCollection for all the breakpoint locations at the BreakpointSite that was hit. It is also lives through the time a given thread is stopped, so there are plenty of opportunities for one of the owning breakpoints to get deleted. But BreakpointLocations don't keep their owner Breakpoints alive, so if the BreakpointLocationCollection can live past when some code gets a chance to delete an owner breakpoint, and then you ask that location for some breakpoint information, it will access freed memory. This wasn't a problem before PR #158128 because the StopInfoBreakpoint just kept the BreakpointSite that was hit, and when you asked it questions, it relooked up that list. That was not great, however, because if you hit breakpoints 5 & 6, deleted 5 and then asked which breakpoints got hit, you would just get 6. For that and other reasons that PR changed to storing a BreakpointLocationCollection of the breakpoints that were hit. That's better from a UI perspective but caused this potential problem. I fix it by adding a variant of the BreakpointLocationCollection that also holds onto a shared pointer to the Breakpoints that own the locations that were hit, thus keeping them alive till the StopInfoBreakpoint goes away. This fixed the ASAN assertion. I also added a test that works harder to cause trouble by deleting breakpoints during a stop. --- .../Breakpoint/BreakpointLocationCollection.h | 16 ++++- .../BreakpointLocationCollection.cpp | 25 ++++++- lldb/source/Target/StopInfo.cpp | 13 +++- .../callback_deletes_breakpoints/Makefile | 4 ++ .../TestCallbackDeletesBreakpoints.py | 67 +++++++++++++++++++ .../callback_deletes_breakpoints/main.c | 12 ++++ 6 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/Makefile create mode 100644 lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/TestCallbackDeletesBreakpoints.py create mode 100644 lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/main.c diff --git a/lldb/include/lldb/Breakpoint/BreakpointLocationCollection.h b/lldb/include/lldb/Breakpoint/BreakpointLocationCollection.h index 1df4e074680f5..124cb55eaf723 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointLocationCollection.h +++ b/lldb/include/lldb/Breakpoint/BreakpointLocationCollection.h @@ -9,6 +9,7 @@ #ifndef LLDB_BREAKPOINT_BREAKPOINTLOCATIONCOLLECTION_H #define LLDB_BREAKPOINT_BREAKPOINTLOCATIONCOLLECTION_H +#include #include #include @@ -19,7 +20,15 @@ namespace lldb_private { class BreakpointLocationCollection { public: - BreakpointLocationCollection(); + /// Breakpoint locations don't keep their breakpoint owners alive, so neither + /// will a collection of breakpoint locations. However, if you need to + /// use this collection in a context where some of the breakpoints whose + /// locations are in the collection might get deleted during its lifespan, + /// then you need to make sure the breakpoints don't get deleted out from + /// under you. To do that, pass true for preserving, and so long as there is + /// a location for a given breakpoint in the collection, the breakpoint will + /// not get destroyed. + BreakpointLocationCollection(bool preserving = false); ~BreakpointLocationCollection(); @@ -164,6 +173,10 @@ class BreakpointLocationCollection { collection m_break_loc_collection; mutable std::mutex m_collection_mutex; + /// These are used if we're preserving breakpoints in this list: + const bool m_preserving_bkpts = false; + std::map, lldb::BreakpointSP> + m_preserved_bps; public: typedef llvm::iterator_range @@ -172,7 +185,6 @@ class BreakpointLocationCollection { return BreakpointLocationCollectionIterable(m_break_loc_collection); } }; - } // namespace lldb_private #endif // LLDB_BREAKPOINT_BREAKPOINTLOCATIONCOLLECTION_H diff --git a/lldb/source/Breakpoint/BreakpointLocationCollection.cpp b/lldb/source/Breakpoint/BreakpointLocationCollection.cpp index 1d052c5fc9bb6..97715836ec104 100644 --- a/lldb/source/Breakpoint/BreakpointLocationCollection.cpp +++ b/lldb/source/Breakpoint/BreakpointLocationCollection.cpp @@ -17,7 +17,8 @@ using namespace lldb; using namespace lldb_private; // BreakpointLocationCollection constructor -BreakpointLocationCollection::BreakpointLocationCollection() = default; +BreakpointLocationCollection::BreakpointLocationCollection(bool preserving) + : m_preserving_bkpts(preserving) {} // Destructor BreakpointLocationCollection::~BreakpointLocationCollection() = default; @@ -26,8 +27,19 @@ void BreakpointLocationCollection::Add(const BreakpointLocationSP &bp_loc) { std::lock_guard guard(m_collection_mutex); BreakpointLocationSP old_bp_loc = FindByIDPair(bp_loc->GetBreakpoint().GetID(), bp_loc->GetID()); - if (!old_bp_loc.get()) + if (!old_bp_loc.get()) { m_break_loc_collection.push_back(bp_loc); + if (m_preserving_bkpts) { + lldb::break_id_t bp_loc_id = bp_loc->GetID(); + Breakpoint &bkpt = bp_loc->GetBreakpoint(); + lldb::break_id_t bp_id = bkpt.GetID(); + std::pair key = + std::make_pair(bp_id, bp_loc_id); + auto entry = m_preserved_bps.find(key); + if (entry == m_preserved_bps.end()) + m_preserved_bps.emplace(key, bkpt.shared_from_this()); + } + } } bool BreakpointLocationCollection::Remove(lldb::break_id_t bp_id, @@ -35,6 +47,15 @@ bool BreakpointLocationCollection::Remove(lldb::break_id_t bp_id, std::lock_guard guard(m_collection_mutex); collection::iterator pos = GetIDPairIterator(bp_id, bp_loc_id); // Predicate if (pos != m_break_loc_collection.end()) { + if (m_preserving_bkpts) { + std::pair key = + std::make_pair(bp_id, bp_loc_id); + auto entry = m_preserved_bps.find(key); + if (entry == m_preserved_bps.end()) + assert(0 && "Breakpoint added to collection but not preserving map."); + else + m_preserved_bps.erase(entry); + } m_break_loc_collection.erase(pos); return true; } diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index 7fa1fc5d71f13..e9e534a57973e 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -87,11 +87,15 @@ bool StopInfo::HasTargetRunSinceMe() { namespace lldb_private { class StopInfoBreakpoint : public StopInfo { public: + // We use a "breakpoint preserving BreakpointLocationCollection because we + // may need to hand out the "breakpoint hit" list as any point, potentially + // after the breakpoint has been deleted. But we still need to refer to them. StopInfoBreakpoint(Thread &thread, break_id_t break_id) : StopInfo(thread, break_id), m_should_stop(false), m_should_stop_is_valid(false), m_should_perform_action(true), m_address(LLDB_INVALID_ADDRESS), m_break_id(LLDB_INVALID_BREAK_ID), - m_was_all_internal(false), m_was_one_shot(false) { + m_was_all_internal(false), m_was_one_shot(false), + m_async_stopped_locs(true) { StoreBPInfo(); } @@ -99,7 +103,8 @@ class StopInfoBreakpoint : public StopInfo { : StopInfo(thread, break_id), m_should_stop(should_stop), m_should_stop_is_valid(true), m_should_perform_action(true), m_address(LLDB_INVALID_ADDRESS), m_break_id(LLDB_INVALID_BREAK_ID), - m_was_all_internal(false), m_was_one_shot(false) { + m_was_all_internal(false), m_was_one_shot(false), + m_async_stopped_locs(true) { StoreBPInfo(); } @@ -699,6 +704,10 @@ class StopInfoBreakpoint : public StopInfo { lldb::break_id_t m_break_id; bool m_was_all_internal; bool m_was_one_shot; + /// The StopInfoBreakpoint lives after the stop, and could get queried + /// at any time so we need to make sure that it keeps the breakpoints for + /// each of the locations it records alive while it is around. That's what + /// The BreakpointPreservingLocationCollection does. BreakpointLocationCollection m_async_stopped_locs; }; diff --git a/lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/Makefile b/lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/Makefile new file mode 100644 index 0000000000000..695335e068c0c --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/Makefile @@ -0,0 +1,4 @@ +C_SOURCES := main.c +CFLAGS_EXTRAS := -std=c99 + +include Makefile.rules diff --git a/lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/TestCallbackDeletesBreakpoints.py b/lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/TestCallbackDeletesBreakpoints.py new file mode 100644 index 0000000000000..2b8fc662ad42e --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/TestCallbackDeletesBreakpoints.py @@ -0,0 +1,67 @@ +""" +Make sure that deleting breakpoints in another breakpoint +callback doesn't cause problems. +""" + + +import lldb +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.lldbtest import * + + +class TestBreakpointDeletionInCallback(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def test_breakpoint_deletion_in_callback(self): + self.build() + self.main_source_file = lldb.SBFileSpec("main.c") + self.delete_others_test() + + def delete_others_test(self): + """You might use the test implementation in several ways, say so here.""" + + # This function starts a process, "a.out" by default, sets a source + # breakpoint, runs to it, and returns the thread, process & target. + # It optionally takes an SBLaunchOption argument if you want to pass + # arguments or environment variables. + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( + self, "Set a breakpoint here", self.main_source_file + ) + + # Now set a breakpoint on "I did something" several times + # + bkpt_numbers = [] + for idx in range(0, 5): + bkpt_numbers.append( + lldbutil.run_break_set_by_source_regexp(self, "// Deletable location") + ) + + # And add commands to the third one to delete two others: + deleter = target.FindBreakpointByID(bkpt_numbers[2]) + self.assertTrue(deleter.IsValid(), "Deleter is a good breakpoint") + commands = lldb.SBStringList() + deleted_ids = [bkpt_numbers[0], bkpt_numbers[3]] + for idx in deleted_ids: + commands.AppendString(f"break delete {idx}") + + deleter.SetCommandLineCommands(commands) + + thread_list = lldbutil.continue_to_breakpoint(process, deleter) + self.assertEqual(len(thread_list), 1) + stop_data = thread.stop_reason_data + # There are 5 breakpoints so 10 break_id, break_loc_id. + self.assertEqual(len(stop_data), 10) + # We should have been able to get break ID's and locations for all the + # breakpoints that we originally hit, but some won't be around anymore: + for idx in range(0, 5): + bkpt_id = stop_data[idx * 2] + print(f"{idx}: {bkpt_id}") + self.assertIn(bkpt_id, bkpt_numbers, "Found breakpoints are right") + loc_id = stop_data[idx * 2 + 1] + self.assertEqual(loc_id, 1, "All breakpoints have one location") + bkpt = target.FindBreakpointByID(bkpt_id) + if bkpt_id in deleted_ids: + # Looking these up should be an error: + self.assertFalse(bkpt.IsValid(), "Deleted breakpoints are deleted") + else: + self.assertTrue(bkpt.IsValid(), "The rest are still valid") diff --git a/lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/main.c b/lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/main.c new file mode 100644 index 0000000000000..2ffb897b2f92d --- /dev/null +++ b/lldb/test/API/functionalities/breakpoint/callback_deletes_breakpoints/main.c @@ -0,0 +1,12 @@ +#include + +int do_something(int input) { + return input % 5; // Deletable location +} + +int main() { + printf("Set a breakpoint here.\n"); + do_something(100); + do_something(200); + return 0; +}