From 1a7daf824ce690c5c564d952d5075b6d94514a18 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 12 Apr 2022 11:37:42 -0700 Subject: [PATCH] [Bitcode] materialize Functions early when BlockAddress taken IRLinker builds a work list of functions to materialize, then moves them from a source module to a destination module one at a time. This is a problem for blockaddress Constants, since they need not refer to the function they are used in; IPSCCP is quite good at sinking these constants deep into other functions when passed as arguments. This would lead to curious errors during LTO: ld.lld: error: Never resolved function from blockaddress ... based on the ordering of function definitions in IR. The problem was that IRLinker would basically do: for function f in worklist: materialize f splice f from source module to destination module in one pass, with Functions being lazily added to the running worklist. This confuses BitcodeReader, which cannot disambiguate whether a blockaddress is referring to a function which has not yet been parsed ("materialized") or is simply empty because its body was spliced out. This causes BitcodeReader to insert Functions into its BasicBlockFwdRefs list incorrectly, as it will never re-materialize an already materialized (but spliced out) function. Because of the possibility that blockaddress Constants may appear in Functions other than the ones they reference, this patch adds a new bitcode function code FUNC_CODE_BLOCKADDR_USERS that is a simple list of Functions that contain BlockAddress Constants that refer back to this Function, rather then the Function they are scoped in. We then materialize those functions when materializing `f` from the example loop above. This might over-materialize Functions should the user of BitcodeReader ultimately decide not to link those Functions, but we can at least now we can avoid this ordering related issue with blockaddresses. Fixes: https://github.com/llvm/llvm-project/issues/52787 Fixes: https://github.com/ClangBuiltLinux/linux/issues/1215 Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D120781 (cherry picked from commit 23ec5782c3cc0d77b5e44285e5124cd4a3ffeeef) --- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 17 +-- llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp | 1 + llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 37 ++++++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 24 +++- llvm/test/Bitcode/blockaddress-users.ll | 38 ++++++ llvm/test/Linker/blockaddress.ll | 125 ++++++++++++++++++++ 6 files changed, 233 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Bitcode/blockaddress-users.ll create mode 100644 llvm/test/Linker/blockaddress.ll diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 6d0f51ce9c6d0..1545a26591b1e 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -582,14 +582,15 @@ enum FunctionCodes { 52, // CATCHSWITCH: [num,args...] or [num,args...,bb] // 53 is unused. // 54 is unused. - FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] - FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval] - FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs, - // fnty, fnid, args...] - FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval] - FUNC_CODE_INST_ATOMICRMW = 59, // ATOMICRMW: [ptrty, ptr, valty, val, - // operation, align, vol, - // ordering, synchscope] + FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] + FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval] + FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs, + // fnty, fnid, args...] + FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval] + FUNC_CODE_INST_ATOMICRMW = 59, // ATOMICRMW: [ptrty, ptr, valty, val, + // operation, align, vol, + // ordering, synchscope] + FUNC_CODE_BLOCKADDR_USERS = 60, // BLOCKADDR_USERS: [value...] }; enum UseListCodes { diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp index ffef352999815..6c5c380b4e63c 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -267,6 +267,7 @@ static Optional GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC) STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG) STRINGIFY_CODE(FUNC_CODE, INST_CALLBR) + STRINGIFY_CODE(FUNC_CODE, BLOCKADDR_USERS) } case bitc::VALUE_SYMTAB_BLOCK_ID: switch (CodeID) { diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 720ab560f988c..5cb08bc0458aa 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -544,6 +544,13 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { DenseMap> BasicBlockFwdRefs; std::deque BasicBlockFwdRefQueue; + /// These are Functions that contain BlockAddresses which refer a different + /// Function. When parsing the different Function, queue Functions that refer + /// to the different Function. Those Functions must be materialized in order + /// to resolve their BlockAddress constants before the different Function + /// gets moved into another Module. + std::vector BackwardRefFunctions; + /// Indicates that we are using a new encoding for instruction operands where /// most operands in the current FUNCTION_BLOCK are encoded relative to the /// instruction number, for a more compact encoding. Some instruction @@ -859,6 +866,11 @@ Error BitcodeReader::materializeForwardReferencedFunctions() { } assert(BasicBlockFwdRefs.empty() && "Function missing from queue"); + for (Function *F : BackwardRefFunctions) + if (Error Err = materialize(F)) + return Err; + BackwardRefFunctions.clear(); + // Reset state. WillMaterializeAllForwardRefs = false; return Error::success(); @@ -4091,6 +4103,31 @@ Error BitcodeReader::parseFunctionBody(Function *F) { continue; } + case bitc::FUNC_CODE_BLOCKADDR_USERS: // BLOCKADDR_USERS: [vals...] + // The record should not be emitted if it's an empty list. + if (Record.empty()) + return error("Invalid record"); + // When we have the RARE case of a BlockAddress Constant that is not + // scoped to the Function it refers to, we need to conservatively + // materialize the referred to Function, regardless of whether or not + // that Function will ultimately be linked, otherwise users of + // BitcodeReader might start splicing out Function bodies such that we + // might no longer be able to materialize the BlockAddress since the + // BasicBlock (and entire body of the Function) the BlockAddress refers + // to may have been moved. In the case that the user of BitcodeReader + // decides ultimately not to link the Function body, materializing here + // could be considered wasteful, but it's better than a deserialization + // failure as described. This keeps BitcodeReader unaware of complex + // linkage policy decisions such as those use by LTO, leaving those + // decisions "one layer up." + for (uint64_t ValID : Record) + if (auto *F = dyn_cast(ValueList[ValID])) + BackwardRefFunctions.push_back(F); + else + return error("Invalid record"); + + continue; + case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN // This record indicates that the last instruction is at the same // location as the previous instruction with a location. diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 4bba0b3566751..cf9bbe2944c07 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -3347,8 +3348,10 @@ void ModuleBitcodeWriter::writeFunction( bool NeedsMetadataAttachment = F.hasMetadata(); DILocation *LastDL = nullptr; + SmallPtrSet BlockAddressUsers; + // Finally, emit all the instructions, in order. - for (const BasicBlock &BB : F) + for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { writeInstruction(I, InstID, Vals); @@ -3380,6 +3383,25 @@ void ModuleBitcodeWriter::writeFunction( LastDL = DL; } + if (BlockAddress *BA = BlockAddress::lookup(&BB)) { + for (User *U : BA->users()) { + if (auto *I = dyn_cast(U)) { + Function *P = I->getParent()->getParent(); + if (P != &F) + BlockAddressUsers.insert(P); + } + } + } + } + + if (!BlockAddressUsers.empty()) { + SmallVector Record; + Record.reserve(BlockAddressUsers.size()); + for (Function *F : BlockAddressUsers) + Record.push_back(VE.getValueID(F)); + Stream.EmitRecord(bitc::FUNC_CODE_BLOCKADDR_USERS, Record); + } + // Emit names for all the instructions etc. if (auto *Symtab = F.getValueSymbolTable()) writeFunctionLevelValueSymbolTable(*Symtab); diff --git a/llvm/test/Bitcode/blockaddress-users.ll b/llvm/test/Bitcode/blockaddress-users.ll new file mode 100644 index 0000000000000..75c2a13d7dfff --- /dev/null +++ b/llvm/test/Bitcode/blockaddress-users.ll @@ -0,0 +1,38 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s +; RUN: llvm-dis %t.bc + +; There's a curious case where blockaddress constants may refer to functions +; outside of the function they're used in. There's a special bitcode function +; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case. + +; The intent of this test is two-fold: +; 1. Ensure we produce BLOCKADDR_USERS bitcode function code on the first fn, +; @repro, since @fun and @fun2 both refer to @repro via blockaddress +; constants. +; 2. Ensure we can round-trip serializing+desearlizing such case. + +; CHECK: