Skip to content

Commit 769a905

Browse files
authored
TableGen: Emit statically generated hash table for runtime libcalls (#150192)
a961210 reverted a change to use a binary search on the string name table because it was too slow. This replaces it with a static string hash table based on the known set of libcall names. Microbenchmarking shows this is similarly fast to using DenseMap. It's possibly slightly slower than using StringSet, though these aren't an exact comparison. This also saves on the one time use construction of the map, so it could be better in practice. This search isn't simple set check, since it does find the range of possible matches with the same name. There's also an additional check for whether the current target supports the name. The runtime constructed set doesn't require this, since it only adds the symbols live for the target. Followed algorithm from this post http://0x80.pl/notesen/2023-04-30-lookup-in-strings.html I'm also thinking the 2 special case global symbols should just be added to RuntimeLibcalls. There are also other global references emitted in the backend that aren't tracked; we probably should just use this as a centralized database for all compiler selected symbols.
1 parent 07d3a73 commit 769a905

File tree

9 files changed

+492
-82
lines changed

9 files changed

+492
-82
lines changed

llvm/benchmarks/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,20 @@ add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)
1111
add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED)
1212
add_benchmark(SandboxIRBench SandboxIRBench.cpp PARTIAL_SOURCES_INTENDED)
1313

14+
# Extract the list of symbols in a random utility as sample data.
15+
set(SYMBOL_TEST_DATA_FILE "sample_symbol_list.txt")
16+
set(SYMBOL_TEST_DATA_SOURCE_BINARY $<TARGET_FILE:llc>)
17+
18+
add_custom_command(OUTPUT ${SYMBOL_TEST_DATA_FILE}
19+
COMMAND $<TARGET_FILE:llvm-nm> --no-demangle --no-sort
20+
--format=just-symbols
21+
${SYMBOL_TEST_DATA_SOURCE_BINARY} > ${SYMBOL_TEST_DATA_FILE}
22+
DEPENDS "$<TARGET_FILE:llvm-nm>" "$<TARGET_FILE:llc>")
23+
24+
add_custom_target(generate-runtime-libcalls-sample-symbol-list
25+
DEPENDS ${SYMBOL_TEST_DATA_FILE})
26+
add_benchmark(RuntimeLibcallsBench RuntimeLibcalls.cpp PARTIAL_SOURCES_INTENDED)
27+
28+
add_dependencies(RuntimeLibcallsBench generate-runtime-libcalls-sample-symbol-list)
29+
target_compile_definitions(RuntimeLibcallsBench PRIVATE
30+
-DSYMBOL_TEST_DATA_FILE="${CMAKE_CURRENT_BINARY_DIR}/${SYMBOL_TEST_DATA_FILE}")

llvm/benchmarks/RuntimeLibcalls.cpp

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/IR/RuntimeLibcalls.h"
10+
#include "benchmark/benchmark.h"
11+
#include "llvm/IR/DataLayout.h"
12+
#include "llvm/Support/Error.h"
13+
#include "llvm/Support/LineIterator.h"
14+
#include "llvm/Support/MemoryBuffer.h"
15+
#include "llvm/TargetParser/Triple.h"
16+
#include <random>
17+
#include <string>
18+
using namespace llvm;
19+
20+
static constexpr unsigned MaxFuncNameSize = 53;
21+
22+
static std::vector<StringRef> getLibcallNameStringRefs() {
23+
std::vector<StringRef> Names(RTLIB::NumLibcallImpls);
24+
// Keep the strlens on the StringRef construction out of the benchmark loop.
25+
for (RTLIB::LibcallImpl LC : RTLIB::libcall_impls()) {
26+
const char *Name = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LC);
27+
Names[LC] = StringRef(Name);
28+
}
29+
30+
return Names;
31+
}
32+
33+
static std::vector<std::string> getRandomFuncNames() {
34+
std::mt19937_64 Rng;
35+
std::uniform_int_distribution<> StringLengthDistribution(1, MaxFuncNameSize);
36+
std::uniform_int_distribution<> CharDistribution(1, 255);
37+
int NumTestFuncs = 1 << 10;
38+
std::vector<std::string> TestFuncNames(NumTestFuncs);
39+
40+
for (std::string &TestFuncName : TestFuncNames) {
41+
for (int I = 0, E = StringLengthDistribution(Rng); I != E; ++I)
42+
TestFuncName += static_cast<char>(CharDistribution(Rng));
43+
}
44+
45+
return TestFuncNames;
46+
}
47+
48+
static std::vector<std::string> readSymbolsFromFile(StringRef InputFile) {
49+
auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile, /*IsText=*/true);
50+
if (!BufOrError) {
51+
reportFatalUsageError("failed to open \'" + Twine(InputFile) +
52+
"\': " + BufOrError.getError().message());
53+
}
54+
55+
// Hackily figure out if there's a prefix on the symbol names - llvm-nm
56+
// appears to not have a flag to skip this.
57+
llvm::Triple HostTriple(LLVM_HOST_TRIPLE);
58+
std::string DummyDatalayout = "e";
59+
DummyDatalayout += DataLayout::getManglingComponent(HostTriple);
60+
61+
DataLayout DL(DummyDatalayout);
62+
char GlobalPrefix = DL.getGlobalPrefix();
63+
64+
std::vector<std::string> Lines;
65+
for (line_iterator LineIt(**BufOrError, /*SkipBlanks=*/true);
66+
!LineIt.is_at_eof(); ++LineIt) {
67+
StringRef SymbolName = *LineIt;
68+
SymbolName.consume_front(StringRef(&GlobalPrefix, 1));
69+
70+
Lines.push_back(SymbolName.str());
71+
}
72+
return Lines;
73+
}
74+
75+
static void BM_LookupRuntimeLibcallByNameKnownCalls(benchmark::State &State) {
76+
std::vector<StringRef> Names = getLibcallNameStringRefs();
77+
78+
for (auto _ : State) {
79+
for (StringRef Name : Names) {
80+
benchmark::DoNotOptimize(
81+
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(Name).empty());
82+
}
83+
}
84+
}
85+
86+
static void BM_LookupRuntimeLibcallByNameRandomCalls(benchmark::State &State) {
87+
std::vector<std::string> TestFuncNames = getRandomFuncNames();
88+
89+
for (auto _ : State) {
90+
for (const std::string &Name : TestFuncNames) {
91+
benchmark::DoNotOptimize(
92+
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name))
93+
.empty());
94+
}
95+
}
96+
}
97+
98+
// This isn't fully representative, it doesn't include any anonymous functions.
99+
// nm -n --no-demangle --format=just-symbols sample-binary > sample.txt
100+
static void BM_LookupRuntimeLibcallByNameSampleData(benchmark::State &State) {
101+
std::vector<std::string> TestFuncNames =
102+
readSymbolsFromFile(SYMBOL_TEST_DATA_FILE);
103+
for (auto _ : State) {
104+
for (const std::string &Name : TestFuncNames) {
105+
benchmark::DoNotOptimize(
106+
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name))
107+
.empty());
108+
}
109+
}
110+
}
111+
112+
BENCHMARK(BM_LookupRuntimeLibcallByNameKnownCalls);
113+
BENCHMARK(BM_LookupRuntimeLibcallByNameRandomCalls);
114+
BENCHMARK(BM_LookupRuntimeLibcallByNameSampleData);
115+
116+
BENCHMARK_MAIN();

llvm/include/llvm/IR/RuntimeLibcalls.h

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,41 @@ struct RuntimeLibcallsInfo {
132132
return ImplToLibcall[Impl];
133133
}
134134

135+
/// Check if a function name is a recognized runtime call of any kind. This
136+
/// does not consider if this call is available for any current compilation,
137+
/// just that it is a known call somewhere. This returns the set of all
138+
/// LibcallImpls which match the name; multiple implementations with the same
139+
/// name may exist but differ in interpretation based on the target context.
140+
///
141+
/// Generated by tablegen.
142+
LLVM_ABI static inline iota_range<RTLIB::LibcallImpl>
143+
lookupLibcallImplName(StringRef Name){
144+
// Inlining the early exit on the string name appears to be worthwhile when
145+
// querying a real set of symbols
146+
#define GET_LOOKUP_LIBCALL_IMPL_NAME_BODY
147+
#include "llvm/IR/RuntimeLibcalls.inc"
148+
#undef GET_LOOKUP_LIBCALL_IMPL_NAME_BODY
149+
}
150+
135151
/// Check if this is valid libcall for the current module, otherwise
136152
/// RTLIB::Unsupported.
137-
LLVM_ABI RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const;
153+
LLVM_ABI RTLIB::LibcallImpl
154+
getSupportedLibcallImpl(StringRef FuncName) const {
155+
for (RTLIB::LibcallImpl Impl : lookupLibcallImplName(FuncName)) {
156+
// FIXME: This should not depend on looking up ImplToLibcall, only the
157+
// list of libcalls for the module.
158+
RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]];
159+
if (Recognized != RTLIB::Unsupported)
160+
return Recognized;
161+
}
162+
163+
return RTLIB::Unsupported;
164+
}
138165

139166
private:
167+
LLVM_ABI static iota_range<RTLIB::LibcallImpl>
168+
lookupLibcallImplNameImpl(StringRef Name);
169+
140170
/// Stores the implementation choice for each each libcall.
141171
RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = {
142172
RTLIB::Unsupported};
@@ -157,13 +187,11 @@ struct RuntimeLibcallsInfo {
157187
/// Map from a concrete LibcallImpl implementation to its RTLIB::Libcall kind.
158188
LLVM_ABI static const RTLIB::Libcall ImplToLibcall[RTLIB::NumLibcallImpls];
159189

160-
/// Check if a function name is a recognized runtime call of any kind. This
161-
/// does not consider if this call is available for any current compilation,
162-
/// just that it is a known call somewhere. This returns the set of all
163-
/// LibcallImpls which match the name; multiple implementations with the same
164-
/// name may exist but differ in interpretation based on the target context.
165-
LLVM_ABI static iterator_range<ArrayRef<uint16_t>::const_iterator>
166-
getRecognizedLibcallImpls(StringRef FuncName);
190+
/// Utility function for tablegenerated lookup function. Return a range of
191+
/// enum values that apply for the function name at \p NameOffsetEntry with
192+
/// the value \p StrOffset.
193+
static inline iota_range<RTLIB::LibcallImpl>
194+
libcallImplNameHit(uint16_t NameOffsetEntry, uint16_t StrOffset);
167195

168196
static bool darwinHasSinCosStret(const Triple &TT) {
169197
if (!TT.isOSDarwin())

llvm/lib/IR/RuntimeLibcalls.cpp

Lines changed: 18 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "llvm/IR/RuntimeLibcalls.h"
1010
#include "llvm/ADT/StringTable.h"
1111
#include "llvm/Support/Debug.h"
12+
#include "llvm/Support/xxhash.h"
1213
#include "llvm/TargetParser/ARMTargetParser.h"
1314

1415
#define DEBUG_TYPE "runtime-libcalls-info"
@@ -18,9 +19,11 @@ using namespace RTLIB;
1819

1920
#define GET_INIT_RUNTIME_LIBCALL_NAMES
2021
#define GET_SET_TARGET_RUNTIME_LIBCALL_SETS
22+
#define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME
2123
#include "llvm/IR/RuntimeLibcalls.inc"
2224
#undef GET_INIT_RUNTIME_LIBCALL_NAMES
2325
#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS
26+
#undef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME
2427

2528
/// Set default libcall names. If a target wants to opt-out of a libcall it
2629
/// should be placed here.
@@ -58,49 +61,23 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
5861
}
5962
}
6063

61-
RTLIB::LibcallImpl
62-
RuntimeLibcallsInfo::getSupportedLibcallImpl(StringRef FuncName) const {
63-
const ArrayRef<uint16_t> RuntimeLibcallNameOffsets(
64-
RuntimeLibcallNameOffsetTable);
65-
66-
iterator_range<ArrayRef<uint16_t>::const_iterator> Range =
67-
getRecognizedLibcallImpls(FuncName);
68-
69-
for (auto I = Range.begin(); I != Range.end(); ++I) {
70-
RTLIB::LibcallImpl Impl =
71-
static_cast<RTLIB::LibcallImpl>(I - RuntimeLibcallNameOffsets.begin());
72-
73-
// FIXME: This should not depend on looking up ImplToLibcall, only the list
74-
// of libcalls for the module.
75-
RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]];
76-
if (Recognized != RTLIB::Unsupported)
77-
return Recognized;
64+
LLVM_ATTRIBUTE_ALWAYS_INLINE
65+
iota_range<RTLIB::LibcallImpl>
66+
RuntimeLibcallsInfo::libcallImplNameHit(uint16_t NameOffsetEntry,
67+
uint16_t StrOffset) {
68+
int NumAliases = 1;
69+
for (uint16_t Entry : ArrayRef(RuntimeLibcallNameOffsetTable)
70+
.drop_front(NameOffsetEntry + 1)) {
71+
if (Entry != StrOffset)
72+
break;
73+
++NumAliases;
7874
}
7975

80-
return RTLIB::Unsupported;
81-
}
82-
83-
iterator_range<ArrayRef<uint16_t>::const_iterator>
84-
RuntimeLibcallsInfo::getRecognizedLibcallImpls(StringRef FuncName) {
85-
StringTable::Iterator It = lower_bound(RuntimeLibcallImplNameTable, FuncName);
86-
if (It == RuntimeLibcallImplNameTable.end() || *It != FuncName)
87-
return iterator_range(ArrayRef<uint16_t>());
88-
89-
uint16_t IndexVal = It.offset().value();
90-
const ArrayRef<uint16_t> TableRef(RuntimeLibcallNameOffsetTable);
91-
92-
ArrayRef<uint16_t>::const_iterator E = TableRef.end();
93-
ArrayRef<uint16_t>::const_iterator EntriesBegin =
94-
std::lower_bound(TableRef.begin(), E, IndexVal);
95-
ArrayRef<uint16_t>::const_iterator EntriesEnd = EntriesBegin;
96-
97-
while (EntriesEnd != E && *EntriesEnd == IndexVal)
98-
++EntriesEnd;
99-
100-
assert(EntriesBegin != E &&
101-
"libcall found in name table but not offset table");
102-
103-
return make_range(EntriesBegin, EntriesEnd);
76+
RTLIB::LibcallImpl ImplStart = static_cast<RTLIB::LibcallImpl>(
77+
&RuntimeLibcallNameOffsetTable[NameOffsetEntry] -
78+
&RuntimeLibcallNameOffsetTable[0]);
79+
return enum_seq(ImplStart,
80+
static_cast<RTLIB::LibcallImpl>(ImplStart + NumAliases));
10481
}
10582

10683
bool RuntimeLibcallsInfo::isAAPCS_ABI(const Triple &TT, StringRef ABIName) {

llvm/lib/Object/IRSymtab.cpp

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,18 @@ static cl::opt<bool> DisableBitcodeVersionUpgrade(
4646
"disable-bitcode-version-upgrade", cl::Hidden,
4747
cl::desc("Disable automatic bitcode upgrade for version mismatch"));
4848

49-
static const char *PreservedSymbols[] = {
49+
static constexpr StringLiteral PreservedSymbols[] = {
5050
// There are global variables, so put it here instead of in
5151
// RuntimeLibcalls.td.
5252
// TODO: Are there similar such variables?
5353
"__ssp_canary_word",
5454
"__stack_chk_guard",
5555
};
5656

57+
static bool isPreservedGlobalVarName(StringRef Name) {
58+
return PreservedSymbols[0] == Name || PreservedSymbols[1] == Name;
59+
}
60+
5761
namespace {
5862

5963
const char *getExpectedProducerName() {
@@ -81,12 +85,16 @@ struct Builder {
8185
// The StringTableBuilder does not create a copy of any strings added to it,
8286
// so this provides somewhere to store any strings that we create.
8387
Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder,
84-
BumpPtrAllocator &Alloc)
85-
: Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {}
88+
BumpPtrAllocator &Alloc, const Triple &TT)
89+
: Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc), TT(TT),
90+
Libcalls(TT) {}
8691

8792
DenseMap<const Comdat *, int> ComdatMap;
8893
Mangler Mang;
89-
Triple TT;
94+
const Triple &TT;
95+
96+
// FIXME: This shouldn't be here.
97+
RTLIB::RuntimeLibcallsInfo Libcalls;
9098

9199
std::vector<storage::Comdat> Comdats;
92100
std::vector<storage::Module> Mods;
@@ -98,6 +106,10 @@ struct Builder {
98106

99107
std::vector<storage::Str> DependentLibraries;
100108

109+
bool isPreservedLibFuncName(StringRef Name) {
110+
return Libcalls.getSupportedLibcallImpl(Name) != RTLIB::Unsupported;
111+
}
112+
101113
void setStr(storage::Str &S, StringRef Value) {
102114
S.Offset = StrtabBuilder.add(Value);
103115
S.Size = Value.size();
@@ -213,19 +225,6 @@ Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) {
213225
return P.first->second;
214226
}
215227

216-
static StringSet<> buildPreservedSymbolsSet(const Triple &TT) {
217-
StringSet<> PreservedSymbolSet;
218-
PreservedSymbolSet.insert(std::begin(PreservedSymbols),
219-
std::end(PreservedSymbols));
220-
// FIXME: Do we need to pass in ABI fields from TargetOptions?
221-
RTLIB::RuntimeLibcallsInfo Libcalls(TT);
222-
for (RTLIB::LibcallImpl Impl : Libcalls.getLibcallImpls()) {
223-
if (Impl != RTLIB::Unsupported)
224-
PreservedSymbolSet.insert(Libcalls.getLibcallImplName(Impl));
225-
}
226-
return PreservedSymbolSet;
227-
}
228-
229228
Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
230229
const SmallPtrSet<GlobalValue *, 4> &Used,
231230
ModuleSymbolTable::Symbol Msym) {
@@ -279,13 +278,11 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
279278
return Error::success();
280279
}
281280

282-
setStr(Sym.IRName, GV->getName());
283-
284-
static const StringSet<> PreservedSymbolsSet =
285-
buildPreservedSymbolsSet(GV->getParent()->getTargetTriple());
286-
bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName());
281+
StringRef GVName = GV->getName();
282+
setStr(Sym.IRName, GVName);
287283

288-
if (Used.count(GV) || IsPreservedSymbol)
284+
if (Used.count(GV) || isPreservedLibFuncName(GVName) ||
285+
isPreservedGlobalVarName(GVName))
289286
Sym.Flags |= 1 << storage::Symbol::FB_used;
290287
if (GV->isThreadLocal())
291288
Sym.Flags |= 1 << storage::Symbol::FB_tls;
@@ -352,7 +349,6 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
352349
setStr(Hdr.Producer, kExpectedProducerName);
353350
setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple().str());
354351
setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
355-
TT = IRMods[0]->getTargetTriple();
356352

357353
for (auto *M : IRMods)
358354
if (Error Err = addModule(M))
@@ -378,7 +374,8 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
378374
Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
379375
StringTableBuilder &StrtabBuilder,
380376
BumpPtrAllocator &Alloc) {
381-
return Builder(Symtab, StrtabBuilder, Alloc).build(Mods);
377+
const Triple &TT = Mods[0]->getTargetTriple();
378+
return Builder(Symtab, StrtabBuilder, Alloc, TT).build(Mods);
382379
}
383380

384381
// Upgrade a vector of bitcode modules created by an old version of LLVM by

0 commit comments

Comments
 (0)