Skip to content

Commit 5819523

Browse files
committed
TableGen: Emit perfect hash function for runtime libcalls
a961210 reverted a change to use a binary search on the string name table because it was too slow. This replaces it with a static string hash table based on the known set of libcall names. Microbenchmarking shows this is similarly fast to using DenseMap. It's possibly slightly slower than using StringSet, though these aren't an exact comparison. This also saves on the one time use construction of the map, so it could be better in practice. This search isn't simple set check, since it does find the range of possible matches with the same name. There's also an additional check for whether the current target supports the name. The runtime constructed set doesn't require this, since it only adds the symbols live for the target. Followed algorithm from this post http://0x80.pl/notesen/2023-04-30-lookup-in-strings.html I'm also thinking the 2 special case global symbols should just be added to RuntimeLibcalls. There are also other global references emitted in the backend that aren't tracked; we probably should just use this as a centralized database for all compiler selected symbols.
1 parent 22c9236 commit 5819523

File tree

9 files changed

+493
-81
lines changed

9 files changed

+493
-81
lines changed

llvm/benchmarks/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,20 @@ add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)
1111
add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED)
1212
add_benchmark(SandboxIRBench SandboxIRBench.cpp PARTIAL_SOURCES_INTENDED)
1313

14+
# Extract the list of symbols in a random utility as sample data.
15+
set(SYMBOL_TEST_DATA_FILE "sample_symbol_list.txt")
16+
set(SYMBOL_TEST_DATA_SOURCE_BINARY $<TARGET_FILE:llc>)
17+
18+
add_custom_command(OUTPUT ${SYMBOL_TEST_DATA_FILE}
19+
COMMAND $<TARGET_FILE:llvm-nm> --no-demangle --no-sort
20+
--format=just-symbols
21+
${SYMBOL_TEST_DATA_SOURCE_BINARY} > ${SYMBOL_TEST_DATA_FILE}
22+
DEPENDS "$<TARGET_FILE:llvm-nm>" "$<TARGET_FILE:llc>")
23+
24+
add_custom_target(generate-runtime-libcalls-sample-symbol-list
25+
DEPENDS ${SYMBOL_TEST_DATA_FILE})
26+
add_benchmark(RuntimeLibcallsBench RuntimeLibcalls.cpp PARTIAL_SOURCES_INTENDED)
27+
28+
add_dependencies(RuntimeLibcallsBench generate-runtime-libcalls-sample-symbol-list)
29+
target_compile_definitions(RuntimeLibcallsBench PRIVATE
30+
-DSYMBOL_TEST_DATA_FILE="${CMAKE_CURRENT_BINARY_DIR}/${SYMBOL_TEST_DATA_FILE}")

llvm/benchmarks/RuntimeLibcalls.cpp

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/IR/RuntimeLibcalls.h"
10+
#include "benchmark/benchmark.h"
11+
#include "llvm/IR/DataLayout.h"
12+
#include "llvm/Support/Error.h"
13+
#include "llvm/Support/LineIterator.h"
14+
#include "llvm/Support/MemoryBuffer.h"
15+
#include "llvm/TargetParser/Triple.h"
16+
#include <random>
17+
#include <string>
18+
using namespace llvm;
19+
20+
static constexpr unsigned MaxFuncNameSize = 53;
21+
22+
static std::vector<StringRef> getLibcallNameStringRefs() {
23+
std::vector<StringRef> Names(RTLIB::NumLibcallImpls);
24+
// Keep the strlens on the StringRef construction out of the benchmark loop.
25+
for (RTLIB::LibcallImpl LC : RTLIB::libcall_impls()) {
26+
const char *Name = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LC);
27+
Names[LC] = StringRef(Name);
28+
}
29+
30+
return Names;
31+
}
32+
33+
static std::vector<std::string> getRandomFuncNames() {
34+
std::mt19937_64 Rng;
35+
std::uniform_int_distribution<> StringLengthDistribution(1, MaxFuncNameSize);
36+
std::uniform_int_distribution<> CharDistribution(1, 255);
37+
int NumTestFuncs = 1 << 10;
38+
std::vector<std::string> TestFuncNames(NumTestFuncs);
39+
40+
for (std::string &TestFuncName : TestFuncNames) {
41+
for (int I = 0, E = StringLengthDistribution(Rng); I != E; ++I)
42+
TestFuncName += static_cast<char>(CharDistribution(Rng));
43+
}
44+
45+
return TestFuncNames;
46+
}
47+
48+
static std::vector<std::string> readSymbolsFromFile(StringRef InputFile) {
49+
auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile, /*IsText=*/true);
50+
if (!BufOrError) {
51+
reportFatalUsageError("failed to open \'" + Twine(InputFile) +
52+
"\': " + BufOrError.getError().message());
53+
}
54+
55+
// Hackily figure out if there's a prefix on the symbol names - llvm-nm
56+
// appears to not have a flag to skip this.
57+
llvm::Triple HostTriple(LLVM_HOST_TRIPLE);
58+
std::string DummyDatalayout = "e";
59+
DummyDatalayout += DataLayout::getManglingComponent(HostTriple);
60+
61+
DataLayout DL(DummyDatalayout);
62+
char GlobalPrefix = DL.getGlobalPrefix();
63+
64+
std::vector<std::string> Lines;
65+
for (line_iterator LineIt(**BufOrError, /*SkipBlanks=*/true);
66+
!LineIt.is_at_eof(); ++LineIt) {
67+
StringRef SymbolName = *LineIt;
68+
SymbolName.consume_front(StringRef(&GlobalPrefix, 1));
69+
70+
Lines.push_back(SymbolName.str());
71+
}
72+
return Lines;
73+
}
74+
75+
static void BM_LookupRuntimeLibcallByNameKnownCalls(benchmark::State &State) {
76+
std::vector<StringRef> Names = getLibcallNameStringRefs();
77+
78+
for (auto _ : State) {
79+
for (StringRef Name : Names) {
80+
benchmark::DoNotOptimize(
81+
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(Name).empty());
82+
}
83+
}
84+
}
85+
86+
static void BM_LookupRuntimeLibcallByNameRandomCalls(benchmark::State &State) {
87+
std::vector<std::string> TestFuncNames = getRandomFuncNames();
88+
89+
for (auto _ : State) {
90+
for (const std::string &Name : TestFuncNames) {
91+
benchmark::DoNotOptimize(
92+
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name))
93+
.empty());
94+
}
95+
}
96+
}
97+
98+
// This isn't fully representative, it doesn't include any anonymous functions.
99+
// nm -n --no-demangle --format=just-symbols sample-binary > sample.txt
100+
static void BM_LookupRuntimeLibcallByNameSampleData(benchmark::State &State) {
101+
std::vector<std::string> TestFuncNames =
102+
readSymbolsFromFile(SYMBOL_TEST_DATA_FILE);
103+
for (auto _ : State) {
104+
for (const std::string &Name : TestFuncNames) {
105+
benchmark::DoNotOptimize(
106+
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name))
107+
.empty());
108+
}
109+
}
110+
}
111+
112+
BENCHMARK(BM_LookupRuntimeLibcallByNameKnownCalls);
113+
BENCHMARK(BM_LookupRuntimeLibcallByNameRandomCalls);
114+
BENCHMARK(BM_LookupRuntimeLibcallByNameSampleData);
115+
116+
BENCHMARK_MAIN();

llvm/include/llvm/IR/RuntimeLibcalls.h

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,41 @@ struct RuntimeLibcallsInfo {
132132
return ImplToLibcall[Impl];
133133
}
134134

135+
/// Check if a function name is a recognized runtime call of any kind. This
136+
/// does not consider if this call is available for any current compilation,
137+
/// just that it is a known call somewhere. This returns the set of all
138+
/// LibcallImpls which match the name; multiple implementations with the same
139+
/// name may exist but differ in interpretation based on the target context.
140+
///
141+
/// Generated by tablegen.
142+
LLVM_ABI static inline iota_range<RTLIB::LibcallImpl>
143+
lookupLibcallImplName(StringRef Name){
144+
// Inlining the early exit on the string name appears to be worthwhile when
145+
// querying a real set of symbols
146+
#define GET_LOOKUP_LIBCALL_IMPL_NAME_BODY
147+
#include "llvm/IR/RuntimeLibcalls.inc"
148+
#undef GET_LOOKUP_LIBCALL_IMPL_NAME_BODY
149+
}
150+
135151
/// Check if this is valid libcall for the current module, otherwise
136152
/// RTLIB::Unsupported.
137-
RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const;
153+
LLVM_ABI RTLIB::LibcallImpl
154+
getSupportedLibcallImpl(StringRef FuncName) const {
155+
for (RTLIB::LibcallImpl Impl : lookupLibcallImplName(FuncName)) {
156+
// FIXME: This should not depend on looking up ImplToLibcall, only the
157+
// list of libcalls for the module.
158+
RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]];
159+
if (Recognized != RTLIB::Unsupported)
160+
return Recognized;
161+
}
162+
163+
return RTLIB::Unsupported;
164+
}
138165

139166
private:
167+
LLVM_ABI static iota_range<RTLIB::LibcallImpl>
168+
lookupLibcallImplNameImpl(StringRef Name);
169+
140170
static const RTLIB::LibcallImpl
141171
DefaultLibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1];
142172

@@ -160,13 +190,11 @@ struct RuntimeLibcallsInfo {
160190
/// Map from a concrete LibcallImpl implementation to its RTLIB::Libcall kind.
161191
LLVM_ABI static const RTLIB::Libcall ImplToLibcall[RTLIB::NumLibcallImpls];
162192

163-
/// Check if a function name is a recognized runtime call of any kind. This
164-
/// does not consider if this call is available for any current compilation,
165-
/// just that it is a known call somewhere. This returns the set of all
166-
/// LibcallImpls which match the name; multiple implementations with the same
167-
/// name may exist but differ in interpretation based on the target context.
168-
LLVM_ABI static iterator_range<ArrayRef<uint16_t>::const_iterator>
169-
getRecognizedLibcallImpls(StringRef FuncName);
193+
/// Utility function for tablegenerated lookup function. Return a range of
194+
/// enum values that apply for the function name at \p NameOffsetEntry with
195+
/// the value \p StrOffset.
196+
static inline iota_range<RTLIB::LibcallImpl>
197+
libcallImplNameHit(uint16_t NameOffsetEntry, uint16_t StrOffset);
170198

171199
static bool darwinHasSinCosStret(const Triple &TT) {
172200
if (!TT.isOSDarwin())

llvm/lib/IR/RuntimeLibcalls.cpp

Lines changed: 17 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,18 @@
88

99
#include "llvm/IR/RuntimeLibcalls.h"
1010
#include "llvm/ADT/StringTable.h"
11+
#include "llvm/Support/xxhash.h"
1112

1213
using namespace llvm;
1314
using namespace RTLIB;
1415

1516
#define GET_INIT_RUNTIME_LIBCALL_NAMES
1617
#define GET_SET_TARGET_RUNTIME_LIBCALL_SETS
18+
#define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME
1719
#include "llvm/IR/RuntimeLibcalls.inc"
1820
#undef GET_INIT_RUNTIME_LIBCALL_NAMES
1921
#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS
22+
#undef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME
2023

2124
static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
2225
FloatABI::ABIType FloatABIType,
@@ -135,49 +138,22 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
135138
}
136139
}
137140

138-
RTLIB::LibcallImpl
139-
RuntimeLibcallsInfo::getSupportedLibcallImpl(StringRef FuncName) const {
140-
const ArrayRef<uint16_t> RuntimeLibcallNameOffsets(
141-
RuntimeLibcallNameOffsetTable);
142-
143-
iterator_range<ArrayRef<uint16_t>::const_iterator> Range =
144-
getRecognizedLibcallImpls(FuncName);
145-
146-
for (auto I = Range.begin(); I != Range.end(); ++I) {
147-
RTLIB::LibcallImpl Impl =
148-
static_cast<RTLIB::LibcallImpl>(I - RuntimeLibcallNameOffsets.begin());
149-
150-
// FIXME: This should not depend on looking up ImplToLibcall, only the list
151-
// of libcalls for the module.
152-
RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]];
153-
if (Recognized != RTLIB::Unsupported)
154-
return Recognized;
141+
LLVM_ATTRIBUTE_ALWAYS_INLINE
142+
iota_range<RTLIB::LibcallImpl>
143+
RuntimeLibcallsInfo::libcallImplNameHit(uint16_t NameOffsetEntry,
144+
uint16_t StrOffset) {
145+
int NumAliases = 1;
146+
for (int E = std::size(RuntimeLibcallNameOffsetTable);
147+
NameOffsetEntry + NumAliases != E &&
148+
RuntimeLibcallNameOffsetTable[NameOffsetEntry + NumAliases] == StrOffset;
149+
++NumAliases) {
155150
}
156151

157-
return RTLIB::Unsupported;
158-
}
159-
160-
iterator_range<ArrayRef<uint16_t>::const_iterator>
161-
RuntimeLibcallsInfo::getRecognizedLibcallImpls(StringRef FuncName) {
162-
StringTable::Iterator It = lower_bound(RuntimeLibcallImplNameTable, FuncName);
163-
if (It == RuntimeLibcallImplNameTable.end() || *It != FuncName)
164-
return iterator_range(ArrayRef<uint16_t>());
165-
166-
uint16_t IndexVal = It.offset().value();
167-
const ArrayRef<uint16_t> TableRef(RuntimeLibcallNameOffsetTable);
168-
169-
ArrayRef<uint16_t>::const_iterator E = TableRef.end();
170-
ArrayRef<uint16_t>::const_iterator EntriesBegin =
171-
std::lower_bound(TableRef.begin(), E, IndexVal);
172-
ArrayRef<uint16_t>::const_iterator EntriesEnd = EntriesBegin;
173-
174-
while (EntriesEnd != E && *EntriesEnd == IndexVal)
175-
++EntriesEnd;
176-
177-
assert(EntriesBegin != E &&
178-
"libcall found in name table but not offset table");
179-
180-
return make_range(EntriesBegin, EntriesEnd);
152+
RTLIB::LibcallImpl ImplStart = static_cast<RTLIB::LibcallImpl>(
153+
&RuntimeLibcallNameOffsetTable[NameOffsetEntry] -
154+
&RuntimeLibcallNameOffsetTable[0]);
155+
return enum_seq(ImplStart,
156+
static_cast<RTLIB::LibcallImpl>(ImplStart + NumAliases));
181157
}
182158

183159
bool RuntimeLibcallsInfo::darwinHasExp10(const Triple &TT) {

llvm/lib/Object/IRSymtab.cpp

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ static const char *PreservedSymbols[] = {
5454
"__stack_chk_guard",
5555
};
5656

57+
static bool isPreservedGlobalVarName(StringRef Name) {
58+
return StringRef(PreservedSymbols[0]) == Name ||
59+
StringRef(PreservedSymbols[1]) == Name;
60+
}
61+
5762
namespace {
5863

5964
const char *getExpectedProducerName() {
@@ -81,12 +86,16 @@ struct Builder {
8186
// The StringTableBuilder does not create a copy of any strings added to it,
8287
// so this provides somewhere to store any strings that we create.
8388
Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder,
84-
BumpPtrAllocator &Alloc)
85-
: Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {}
89+
BumpPtrAllocator &Alloc, const Triple &TT)
90+
: Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc), TT(TT),
91+
Libcalls(TT) {}
8692

8793
DenseMap<const Comdat *, int> ComdatMap;
8894
Mangler Mang;
89-
Triple TT;
95+
const Triple &TT;
96+
97+
// FIXME: This shouldn't be here.
98+
RTLIB::RuntimeLibcallsInfo Libcalls;
9099

91100
std::vector<storage::Comdat> Comdats;
92101
std::vector<storage::Module> Mods;
@@ -98,6 +107,10 @@ struct Builder {
98107

99108
std::vector<storage::Str> DependentLibraries;
100109

110+
bool isPreservedLibFuncName(StringRef Name) {
111+
return Libcalls.getSupportedLibcallImpl(Name) != RTLIB::Unsupported;
112+
}
113+
101114
void setStr(storage::Str &S, StringRef Value) {
102115
S.Offset = StrtabBuilder.add(Value);
103116
S.Size = Value.size();
@@ -213,19 +226,6 @@ Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) {
213226
return P.first->second;
214227
}
215228

216-
static StringSet<> buildPreservedSymbolsSet(const Triple &TT) {
217-
StringSet<> PreservedSymbolSet;
218-
PreservedSymbolSet.insert(std::begin(PreservedSymbols),
219-
std::end(PreservedSymbols));
220-
// FIXME: Do we need to pass in ABI fields from TargetOptions?
221-
RTLIB::RuntimeLibcallsInfo Libcalls(TT);
222-
for (RTLIB::LibcallImpl Impl : Libcalls.getLibcallImpls()) {
223-
if (Impl != RTLIB::Unsupported)
224-
PreservedSymbolSet.insert(Libcalls.getLibcallImplName(Impl));
225-
}
226-
return PreservedSymbolSet;
227-
}
228-
229229
Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
230230
const SmallPtrSet<GlobalValue *, 4> &Used,
231231
ModuleSymbolTable::Symbol Msym) {
@@ -279,13 +279,11 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
279279
return Error::success();
280280
}
281281

282-
setStr(Sym.IRName, GV->getName());
283-
284-
static const StringSet<> PreservedSymbolsSet =
285-
buildPreservedSymbolsSet(GV->getParent()->getTargetTriple());
286-
bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName());
282+
StringRef GVName = GV->getName();
283+
setStr(Sym.IRName, GVName);
287284

288-
if (Used.count(GV) || IsPreservedSymbol)
285+
if (Used.count(GV) || isPreservedLibFuncName(GVName) ||
286+
isPreservedGlobalVarName(GVName))
289287
Sym.Flags |= 1 << storage::Symbol::FB_used;
290288
if (GV->isThreadLocal())
291289
Sym.Flags |= 1 << storage::Symbol::FB_tls;
@@ -352,7 +350,6 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
352350
setStr(Hdr.Producer, kExpectedProducerName);
353351
setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple().str());
354352
setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
355-
TT = IRMods[0]->getTargetTriple();
356353

357354
for (auto *M : IRMods)
358355
if (Error Err = addModule(M))
@@ -378,7 +375,8 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
378375
Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
379376
StringTableBuilder &StrtabBuilder,
380377
BumpPtrAllocator &Alloc) {
381-
return Builder(Symtab, StrtabBuilder, Alloc).build(Mods);
378+
const Triple &TT = Mods[0]->getTargetTriple();
379+
return Builder(Symtab, StrtabBuilder, Alloc, TT).build(Mods);
382380
}
383381

384382
// Upgrade a vector of bitcode modules created by an old version of LLVM by

0 commit comments

Comments
 (0)