Skip to content

Commit 6e2b170

Browse files
committed
RuntimeLibcalls: Fix building hash table with duplicate entries
We were sizing the table appropriately for the number of LibcallImpls, but many of those have identical names which were pushing up the collision count unnecessarily. This ends up decreasing the table size slightly, and makes it a bit faster. BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and BM_LookupRuntimeLibcallByNameSampleData by ~5%. As a secondary change, align the table size up to the next power of 2. This makes the table larger than before, but improves the sample data benchmark by an additional 5%.
1 parent f0c374c commit 6e2b170

File tree

2 files changed

+35
-45
lines changed

2 files changed

+35
-45
lines changed

llvm/test/TableGen/RuntimeLibcallEmitter.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,9 @@ def BlahLibrary : SystemRuntimeLibrary<isBlahArch, (add calloc, LibraryWithCondi
176176

177177
// CHECK: iota_range<RTLIB::LibcallImpl> RTLIB::RuntimeLibcallsInfo::lookupLibcallImplNameImpl(StringRef Name) {
178178
// CHECK: static constexpr uint16_t HashTableNameToEnum[16] = {
179-
// CHECK: 2, // 0x000000705301b8, ___memset
179+
// CHECK: 2,
180180
// CHECK: 0,
181-
// CHECK: 6, // 0x0000001417a2af, calloc
181+
// CHECK: 6,
182182
// CHECK: 0,
183183
// CHECK: };
184184

llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp

Lines changed: 33 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -287,13 +287,6 @@ class RuntimeLibcallEmitter {
287287
void run(raw_ostream &OS);
288288
};
289289

290-
/// Helper struct for the name hash table.
291-
struct LookupEntry {
292-
StringRef FuncName;
293-
uint64_t Hash = 0;
294-
unsigned TableValue = 0;
295-
};
296-
297290
} // End anonymous namespace.
298291

299292
void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const {
@@ -339,14 +332,17 @@ static void emitHashFunction(raw_ostream &OS) {
339332
/// Return the table size, maximum number of collisions for the set of hashes
340333
static std::pair<int, int>
341334
computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
342-
const int SizeOverhead = 10;
343-
const int NumHashes = Hashes.size();
335+
// Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
336+
const int SizeOverhead = 4;
344337

345338
// Index derived from hash -> number of collisions.
346339
DenseMap<uint64_t, int> Table;
347340

341+
unsigned NumHashes = Hashes.size();
342+
348343
for (int MaxCollisions = 1;; ++MaxCollisions) {
349-
for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
344+
for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * NumHashes;
345+
N <<= 1) {
350346
Table.clear();
351347

352348
bool NeedResize = false;
@@ -365,41 +361,29 @@ computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
365361
}
366362
}
367363

368-
static std::vector<LookupEntry>
364+
static std::vector<unsigned>
369365
constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
370-
ArrayRef<uint64_t> Hashes, int Size, int Collisions,
371-
StringToOffsetTable &OffsetTable) {
372-
DenseSet<StringRef> Seen;
373-
std::vector<LookupEntry> Lookup(Size * Collisions);
374-
375-
for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
376-
StringRef ImplName = LibCallImpl.getLibcallFuncName();
377-
378-
// We do not want to add repeated entries for cases with the same name, only
379-
// an entry for the first, with the name collision enum values immediately
380-
// following.
381-
if (!Seen.insert(ImplName).second)
382-
continue;
383-
384-
uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
366+
ArrayRef<uint64_t> Hashes,
367+
ArrayRef<unsigned> TableValues, int Size,
368+
int Collisions, StringToOffsetTable &OffsetTable) {
369+
std::vector<unsigned> Lookup(Size * Collisions);
385370

371+
for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
386372
uint64_t Idx = (HashValue % static_cast<uint64_t>(Size)) *
387373
static_cast<uint64_t>(Collisions);
388374

389375
bool Found = false;
390376
for (int J = 0; J < Collisions; ++J) {
391-
LookupEntry &Entry = Lookup[Idx + J];
392-
if (Entry.TableValue == 0) {
393-
Entry.FuncName = ImplName;
394-
Entry.TableValue = LibCallImpl.getEnumVal();
395-
Entry.Hash = HashValue;
377+
unsigned &Entry = Lookup[Idx + J];
378+
if (Entry == 0) {
379+
Entry = TableValue;
396380
Found = true;
397381
break;
398382
}
399383
}
400384

401385
if (!Found)
402-
reportFatalInternalError("failure to hash " + ImplName);
386+
reportFatalInternalError("failure to hash");
403387
}
404388

405389
return Lookup;
@@ -409,15 +393,25 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
409393
void RuntimeLibcallEmitter::emitNameMatchHashTable(
410394
raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
411395
std::vector<uint64_t> Hashes(RuntimeLibcallImplDefList.size());
396+
std::vector<unsigned> TableValues(RuntimeLibcallImplDefList.size());
397+
DenseSet<StringRef> SeenFuncNames;
412398

413399
size_t MaxFuncNameSize = 0;
414400
size_t Index = 0;
401+
415402
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
416403
StringRef ImplName = LibCallImpl.getLibcallFuncName();
417-
MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
418-
Hashes[Index++] = hash(ImplName);
404+
if (SeenFuncNames.insert(ImplName).second) {
405+
MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
406+
TableValues[Index] = LibCallImpl.getEnumVal();
407+
Hashes[Index++] = hash(ImplName);
408+
}
419409
}
420410

411+
// Trim excess elements from non-unique entries.
412+
Hashes.resize(SeenFuncNames.size());
413+
TableValues.resize(SeenFuncNames.size());
414+
421415
LLVM_DEBUG({
422416
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
423417
StringRef ImplName = LibCallImpl.getLibcallFuncName();
@@ -447,8 +441,9 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable(
447441
"#endif\n";
448442

449443
auto [Size, Collisions] = computePerfectHashParameters(Hashes);
450-
std::vector<LookupEntry> Lookup = constructPerfectHashTable(
451-
RuntimeLibcallImplDefList, Hashes, Size, Collisions, OffsetTable);
444+
std::vector<unsigned> Lookup =
445+
constructPerfectHashTable(RuntimeLibcallImplDefList, Hashes, TableValues,
446+
Size, Collisions, OffsetTable);
452447

453448
LLVM_DEBUG(dbgs() << "Runtime libcall perfect hashing parameters: Size = "
454449
<< Size << ", maximum collisions = " << Collisions << '\n');
@@ -463,13 +458,8 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable(
463458
OS << " static constexpr uint16_t HashTableNameToEnum[" << Lookup.size()
464459
<< "] = {\n";
465460

466-
for (auto [FuncName, Hash, TableVal] : Lookup) {
467-
OS << " " << TableVal << ',';
468-
if (TableVal != 0)
469-
OS << " // " << format_hex(Hash, 16) << ", " << FuncName;
470-
471-
OS << '\n';
472-
}
461+
for (unsigned TableVal : Lookup)
462+
OS << " " << TableVal << ",\n";
473463

474464
OS << " };\n\n";
475465

0 commit comments

Comments
 (0)