From 6ef31ee6c9ea859742eda3108f794b1cf97b3ad2 Mon Sep 17 00:00:00 2001 From: Nicolas De Carli Date: Mon, 9 Jun 2025 14:33:31 -0700 Subject: [PATCH 1/2] Add rapidhash family of functions Summary: We've seen high cpu usage on these two hashing algorithms: std::_Hash_bytes and multifeed/common/Hash.h::hashBytesImpl For the record, std::_Hash_bytes compiles to ~60 instructions on aarch64 and ~100 instructions on AMD64: https://godbolt.org/z/xeoqf1aaE hashBytesImpl compiles to slightly over 100 instructions on aarch64 and slightly over 160 instructions on AMD64: https://godbolt.org/z/bTroqGE7o The diff adds three new hash functions: rapidhash, rapidhashMicro and rapidhashNano RapidhashNano is designed for situations where keeping a small code size is a top priority. Clang-19 compiles it to less than 100 instructions without stack usage, both on x86-64 and aarch64. The fastest for sizes up to 48 bytes, but may be considerably slower for larger inputs. RapidhashMicro is designed for situations where cache misses make a noticeable performance detriment. Clang-19 compiles it to ~140 instructions without stack usage, both on x86-64 and aarch64. Faster for sizes up to 512 bytes, just 15%-20% slower for inputs above 1kb. rapidhash provides formidable speed across all input sizes Clang-19 compiles it to ~185 instructions, both on x86-64 and aarch64. Benchmark results on BGM: P1826606121, and Grace: P1826591223 On AMD64, RapidhashNano should be strictly better than both std::_Hash_bytes and hashBytesImpl On aarch64, std::_Hash_bytes compiles to fewer instructions. RapidhashNano should still be faster in most situations, given its much higher throughput. It should also be strictly better than hashBytesImpl In many situations, RapidhashMicro should be a better choice, due to its higher throughput. This diff allows us to analyze workloads on a case by case basis. rapidhash seems to be the fastest high-quality hash function for aarch64 systems. It may still find usage on large-input cases. Folly's benchmark results have been updated to include runs from Bergamo and Neoverse-V2 Differential Revision: D66326393 --- .../src/folly/external/rapidhash/rapidhash.h | 574 ++++++++++++++++++ third-party/folly/src/folly/hash/rapidhash.h | 47 ++ .../src/folly/hash/test/HashBenchmark.cpp | 568 ++++++++++++++--- .../src/folly/hash/test/RapidHashTest.cpp | 176 ++++++ 4 files changed, 1270 insertions(+), 95 deletions(-) create mode 100644 third-party/folly/src/folly/external/rapidhash/rapidhash.h create mode 100644 third-party/folly/src/folly/hash/rapidhash.h create mode 100644 third-party/folly/src/folly/hash/test/RapidHashTest.cpp diff --git a/third-party/folly/src/folly/external/rapidhash/rapidhash.h b/third-party/folly/src/folly/external/rapidhash/rapidhash.h new file mode 100644 index 00000000000000..d4139e3bfb743e --- /dev/null +++ b/third-party/folly/src/folly/external/rapidhash/rapidhash.h @@ -0,0 +1,574 @@ +/* + * rapidhash V3 - Very fast, high quality, platform-independent hashing +algorithm. + * + * Based on 'wyhash', by Wang Yi + * + * Copyright (C) 2025 Nicolas De Carli + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in +all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * You can contact the author at: + * - rapidhash source repository: https://github.com/Nicoshev/rapidhash + */ + +#pragma once + +/* + * Includes. + */ +#include +#include +#if defined(_MSC_VER) +#include +#if defined(_M_X64) && !defined(_M_ARM64EC) +#pragma intrinsic(_umul128) +#endif +#endif + +#include +#include +#include + +namespace folly { +namespace external { +namespace rapidhash_detail { + +/* + * C++ macros. + */ +#if __cplusplus >= 201402L && !defined(_MSC_VER) +#define FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR FOLLY_ALWAYS_INLINE constexpr +#else +#define FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR FOLLY_ALWAYS_INLINE +#endif + +/* + * Unrolling macros, changes code definition for main hash function. + * + * FOLLY_EXTERNAL_RAPIDHASH_COMPACT: Legacy variant, each loop process 48 bytes. + * FOLLY_EXTERNAL_RAPIDHASH_UNROLLED: Unrolled variant, each loop process 96 bytes. + * + * Most modern CPUs should benefit from having RAPIDHASH_UNROLLED. + * + * These macros do not alter the output hash. + */ +#ifndef FOLLY_EXTERNAL_RAPIDHASH_COMPACT +#define FOLLY_EXTERNAL_RAPIDHASH_UNROLLED +#elif defined(FOLLY_EXTERNAL_RAPIDHASH_UNROLLED) +#error "cannot define FOLLY_EXTERNAL_RAPIDHASH_COMPACT and FOLLY_EXTERNAL_RAPIDHASH_UNROLLED simultaneously." +#endif + +/* + * Default secret parameters. + */ + constexpr uint64_t rapidhash_secret[8] = { + 0x2d358dccaa6c78a5ull, + 0x8bb84b93962eacc9ull, + 0x4b33a62ed433d4a3ull, + 0x4d5a2da51de1aa47ull, + 0xa0761d6478bd642full, + 0xe7037ed1a0b428dbull, + 0x90ed1765281c388cull, + 0xaaaaaaaaaaaaaaaaull}; + +/* + * 64*64 -> 128bit multiply function. + * + * @param A Address of 64-bit number. + * @param B Address of 64-bit number. + * + * Calculates 128-bit C = *A * *B. + * + */ +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR void rapidhash_mum(uint64_t* A, uint64_t* B) + noexcept { +#if defined(__SIZEOF_INT128__) + __uint128_t r = *A; + r *= *B; + *A = static_cast(r); + *B = static_cast(r >> 64); +#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64)) +#if defined(_M_X64) + *A = _umul128(*A, *B, B); +#else + uint64_t c = __umulh(*A, *B); + *A = *A * *B; + *B = c; +#endif +#else + uint64_t ha = *A >> 32, hb = *B >> 32, la = (uint32_t)*A, lb = (uint32_t)*B; + uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb, + t = rl + (rm0 << 32), c = t < rl; + uint64_t lo = t + (rm1 << 32); + c += lo < t; + uint64_t hi = rh + (rm0 >> 32) + (rm1 >> 32) + c; + *A = lo; + *B = hi; +#endif +} + +/* + * Multiply and xor mix function. + * + * @param A 64-bit number. + * @param B 64-bit number. + * + * Calculates 128-bit C = A * B. + * Returns 64-bit xor between high and low 64 bits of C. + */ +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t +rapidhash_mix(uint64_t A, uint64_t B) noexcept { + rapidhash_mum(&A, &B); + return A ^ B; +} + +/* + * Read functions. + */ +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR std::uint64_t rapidhash_read32_cx(const char* s) { + static_assert(kIsLittleEndian); + std::uint64_t ret = 0; + ret |= std::uint64_t(static_cast(s[0])) << (0 * 8); + ret |= std::uint64_t(static_cast(s[1])) << (1 * 8); + ret |= std::uint64_t(static_cast(s[2])) << (2 * 8); + ret |= std::uint64_t(static_cast(s[3])) << (3 * 8); + return ret; +} + +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t +rapidhash_read32(const char* p) noexcept { + if (folly::is_constant_evaluated_or(false) && kIsLittleEndian) { + return rapidhash_read32_cx(p); + } else { + return folly::Endian::little(loadUnaligned(p)); + } +} + +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR std::uint64_t rapidhash_read64_cx( + const char* s, std::size_t l) { + static_assert(kIsLittleEndian); + + std::uint64_t ret = 0; + for (std::size_t i = 0; i < l; ++i) { + ret |= std::uint64_t(static_cast(s[i])) << (i * 8); + } + return ret; +} + +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t +rapidhash_read64(const char* p) noexcept { + if (folly::is_constant_evaluated_or(false) && kIsLittleEndian) { + return rapidhash_read64_cx(p, 8); + } else { + return folly::Endian::little(loadUnaligned(p)); + } +} + +/* + * rapidhash main function. + * + * @param p Buffer to be hashed. + * @param len @key length, in bytes. + * @param seed 64-bit seed used to alter the hash result predictably. + * @param secret Triplet of 64-bit secrets used to alter hash result + * predictably. + * + * Returns a 64-bit hash. + */ +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t rapidhash_internal( + const char* p, size_t len, uint64_t seed, const uint64_t* secret) + noexcept { + seed ^= rapidhash_mix(seed ^ secret[2], secret[1]); + uint64_t a = 0, b = 0; + size_t i = len; + if (FOLLY_LIKELY(len <= 16)) { + if (len >= 4) { + seed ^= len; + if (len >= 8) { + const char* plast = p + len - 8; + a = rapidhash_read64(p); + b = rapidhash_read64(plast); + } else { + const char* plast = p + len - 4; + a = rapidhash_read32(p); + b = rapidhash_read32(plast); + } + } else if (len > 0) { + a = (static_cast(p[0]) << 45) | static_cast(p[len - 1]); + b = static_cast(p[len >> 1]); + } else + a = b = 0; + } else { + uint64_t see1 = seed, see2 = seed; + uint64_t see3 = seed, see4 = seed; + uint64_t see5 = seed, see6 = seed; +#ifdef FOLLY_EXTERNAL_RAPIDHASH_COMPACT + if (i > 112) { + do { + seed = + rapidhash_mix(rapidhash_read64(p) ^ secret[0], rapidhash_read64(p + 8) ^ seed); + see1 = rapidhash_mix( + rapidhash_read64(p + 16) ^ secret[1], rapidhash_read64(p + 24) ^ see1); + see2 = rapidhash_mix( + rapidhash_read64(p + 32) ^ secret[2], rapidhash_read64(p + 40) ^ see2); + see3 = rapidhash_mix( + rapidhash_read64(p + 48) ^ secret[3], rapidhash_read64(p + 56) ^ see3); + see4 = rapidhash_mix( + rapidhash_read64(p + 64) ^ secret[4], rapidhash_read64(p + 72) ^ see4); + see5 = rapidhash_mix( + rapidhash_read64(p + 80) ^ secret[5], rapidhash_read64(p + 88) ^ see5); + see6 = rapidhash_mix( + rapidhash_read64(p + 96) ^ secret[6], rapidhash_read64(p + 104) ^ see6); + p += 112; + i -= 112; + } while (i > 112); + seed ^= see1; + see2 ^= see3; + see4 ^= see5; + seed ^= see6; + see2 ^= see4; + seed ^= see2; + } +#else + if (i > 224) { + do { + seed = + rapidhash_mix(rapidhash_read64(p) ^ secret[0], rapidhash_read64(p + 8) ^ seed); + see1 = rapidhash_mix( + rapidhash_read64(p + 16) ^ secret[1], rapidhash_read64(p + 24) ^ see1); + see2 = rapidhash_mix( + rapidhash_read64(p + 32) ^ secret[2], rapidhash_read64(p + 40) ^ see2); + see3 = rapidhash_mix( + rapidhash_read64(p + 48) ^ secret[3], rapidhash_read64(p + 56) ^ see3); + see4 = rapidhash_mix( + rapidhash_read64(p + 64) ^ secret[4], rapidhash_read64(p + 72) ^ see4); + see5 = rapidhash_mix( + rapidhash_read64(p + 80) ^ secret[5], rapidhash_read64(p + 88) ^ see5); + see6 = rapidhash_mix( + rapidhash_read64(p + 96) ^ secret[6], rapidhash_read64(p + 104) ^ see6); + seed = rapidhash_mix( + rapidhash_read64(p + 112) ^ secret[0], rapidhash_read64(p + 120) ^ seed); + see1 = rapidhash_mix( + rapidhash_read64(p + 128) ^ secret[1], rapidhash_read64(p + 136) ^ see1); + see2 = rapidhash_mix( + rapidhash_read64(p + 144) ^ secret[2], rapidhash_read64(p + 152) ^ see2); + see3 = rapidhash_mix( + rapidhash_read64(p + 160) ^ secret[3], rapidhash_read64(p + 168) ^ see3); + see4 = rapidhash_mix( + rapidhash_read64(p + 176) ^ secret[4], rapidhash_read64(p + 184) ^ see4); + see5 = rapidhash_mix( + rapidhash_read64(p + 192) ^ secret[5], rapidhash_read64(p + 200) ^ see5); + see6 = rapidhash_mix( + rapidhash_read64(p + 208) ^ secret[6], rapidhash_read64(p + 216) ^ see6); + p += 224; + i -= 224; + } while (i > 224); + } + if (i > 112) { + seed = rapidhash_mix(rapidhash_read64(p) ^ secret[0], rapidhash_read64(p + 8) ^ seed); + see1 = rapidhash_mix( + rapidhash_read64(p + 16) ^ secret[1], rapidhash_read64(p + 24) ^ see1); + see2 = rapidhash_mix( + rapidhash_read64(p + 32) ^ secret[2], rapidhash_read64(p + 40) ^ see2); + see3 = rapidhash_mix( + rapidhash_read64(p + 48) ^ secret[3], rapidhash_read64(p + 56) ^ see3); + see4 = rapidhash_mix( + rapidhash_read64(p + 64) ^ secret[4], rapidhash_read64(p + 72) ^ see4); + see5 = rapidhash_mix( + rapidhash_read64(p + 80) ^ secret[5], rapidhash_read64(p + 88) ^ see5); + see6 = rapidhash_mix( + rapidhash_read64(p + 96) ^ secret[6], rapidhash_read64(p + 104) ^ see6); + p += 112; + i -= 112; + } + seed ^= see1; + see2 ^= see3; + see4 ^= see5; + seed ^= see6; + see2 ^= see4; + seed ^= see2; +#endif + if (i > 16) { + seed = rapidhash_mix(rapidhash_read64(p) ^ secret[2], rapidhash_read64(p + 8) ^ seed); + if (i > 32) { + seed = rapidhash_mix( + rapidhash_read64(p + 16) ^ secret[2], rapidhash_read64(p + 24) ^ seed); + if (i > 48) { + seed = rapidhash_mix( + rapidhash_read64(p + 32) ^ secret[1], rapidhash_read64(p + 40) ^ seed); + if (i > 64) { + seed = rapidhash_mix( + rapidhash_read64(p + 48) ^ secret[1], rapidhash_read64(p + 56) ^ seed); + if (i > 80) { + seed = rapidhash_mix( + rapidhash_read64(p + 64) ^ secret[2], + rapidhash_read64(p + 72) ^ seed); + if (i > 96) { + seed = rapidhash_mix( + rapidhash_read64(p + 80) ^ secret[1], + rapidhash_read64(p + 88) ^ seed); + } + } + } + } + } + } + a = rapidhash_read64(p + i - 16) ^ i; + b = rapidhash_read64(p + i - 8); + } + a ^= secret[1]; + b ^= seed; + rapidhash_mum(&a, &b); + return rapidhash_mix(a ^ secret[7], b ^ secret[1] ^ i); +} + +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t rapidhashMicro_internal( + const char* p, size_t len, uint64_t seed, const uint64_t* secret) + noexcept { + seed ^= rapidhash_mix(seed ^ secret[2], secret[1]); + uint64_t a = 0, b = 0; + size_t i = len; + if (FOLLY_LIKELY(len <= 16)) { + if (len >= 4) { + seed ^= len; + if (len >= 8) { + const char* plast = p + len - 8; + a = rapidhash_read64(p); + b = rapidhash_read64(plast); + } else { + const char* plast = p + len - 4; + a = rapidhash_read32(p); + b = rapidhash_read32(plast); + } + } else if (len > 0) { + a = (static_cast(p[0]) << 45) | static_cast(p[len - 1]); + b = static_cast(p[len >> 1]); + } else + a = b = 0; + } else { + if (i > 80) { + uint64_t see1 = seed, see2 = seed; + uint64_t see3 = seed, see4 = seed; + do { + seed = + rapidhash_mix(rapidhash_read64(p) ^ secret[0], rapidhash_read64(p + 8) ^ seed); + see1 = rapidhash_mix( + rapidhash_read64(p + 16) ^ secret[1], rapidhash_read64(p + 24) ^ see1); + see2 = rapidhash_mix( + rapidhash_read64(p + 32) ^ secret[2], rapidhash_read64(p + 40) ^ see2); + see3 = rapidhash_mix( + rapidhash_read64(p + 48) ^ secret[3], rapidhash_read64(p + 56) ^ see3); + see4 = rapidhash_mix( + rapidhash_read64(p + 64) ^ secret[4], rapidhash_read64(p + 72) ^ see4); + p += 80; + i -= 80; + } while (i > 80); + seed ^= see1; + see2 ^= see3; + seed ^= see4; + seed ^= see2; + } + if (i > 16) { + seed = rapidhash_mix(rapidhash_read64(p) ^ secret[2], rapidhash_read64(p + 8) ^ seed); + if (i > 32) { + seed = rapidhash_mix( + rapidhash_read64(p + 16) ^ secret[2], rapidhash_read64(p + 24) ^ seed); + if (i > 48) { + seed = rapidhash_mix( + rapidhash_read64(p + 32) ^ secret[1], rapidhash_read64(p + 40) ^ seed); + if (i > 64) { + seed = rapidhash_mix( + rapidhash_read64(p + 48) ^ secret[1], rapidhash_read64(p + 56) ^ seed); + } + } + } + } + a = rapidhash_read64(p + i - 16) ^ i; + b = rapidhash_read64(p + i - 8); + } + a ^= secret[1]; + b ^= seed; + rapidhash_mum(&a, &b); + return rapidhash_mix(a ^ secret[7], b ^ secret[1] ^ i); +} + +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t rapidhashNano_internal( + const char* p, size_t len, uint64_t seed, const uint64_t* secret) + noexcept { + seed ^= rapidhash_mix(seed ^ secret[2], secret[1]); + uint64_t a = 0, b = 0; + size_t i = len; + if (FOLLY_LIKELY(len <= 16)) { + if (len >= 4) { + seed ^= len; + if (len >= 8) { + const char* plast = p + len - 8; + a = rapidhash_read64(p); + b = rapidhash_read64(plast); + } else { + const char* plast = p + len - 4; + a = rapidhash_read32(p); + b = rapidhash_read32(plast); + } + } else if (len > 0) { + a = (static_cast(p[0]) << 45) | static_cast(p[len - 1]); + b = static_cast(p[len >> 1]); + } else + a = b = 0; + } else { + if (i > 48) { + uint64_t see1 = seed, see2 = seed; + do { + seed = + rapidhash_mix(rapidhash_read64(p) ^ secret[0], rapidhash_read64(p + 8) ^ seed); + see1 = rapidhash_mix( + rapidhash_read64(p + 16) ^ secret[1], rapidhash_read64(p + 24) ^ see1); + see2 = rapidhash_mix( + rapidhash_read64(p + 32) ^ secret[2], rapidhash_read64(p + 40) ^ see2); + p += 48; + i -= 48; + } while (i > 48); + seed ^= see1; + seed ^= see2; + } + if (i > 16) { + seed = rapidhash_mix(rapidhash_read64(p) ^ secret[2], rapidhash_read64(p + 8) ^ seed); + if (i > 32) { + seed = rapidhash_mix( + rapidhash_read64(p + 16) ^ secret[2], rapidhash_read64(p + 24) ^ seed); + } + } + a = rapidhash_read64(p + i - 16) ^ i; + b = rapidhash_read64(p + i - 8); + } + a ^= secret[1]; + b ^= seed; + rapidhash_mum(&a, &b); + return rapidhash_mix(a ^ secret[7], b ^ secret[1] ^ i); +} + +} // namespace rapidhash + +/* + * rapidhash seeded hash function. + * + * @param key Buffer to be hashed. + * @param len @key length, in bytes. + * @param seed 64-bit seed used to alter the hash result predictably. + * + * Calls rapidhash_internal using provided parameters and default secrets. + * + * Returns a 64-bit hash. + */ +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t rapidhash_with_seed( + const char* key, size_t len, uint64_t seed) noexcept { + return rapidhash_detail::rapidhash_internal(key, len, seed, rapidhash_detail::rapidhash_secret); +} + +/* + * rapidhash general purpose hash function. + * + * @param key Buffer to be hashed. + * @param len @key length, in bytes. + * + * Calls rapidhash_withSeed using provided parameters and the default seed. + * + * Returns a 64-bit hash. + */ +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t +rapidhash(const char* key, size_t len) noexcept { + return rapidhash_with_seed(key, len, 0); +} + +/* + * rapidhashMicro seeded hash function. + * + * Designed for HPC and server applications, where cache misses make a + * noticeable performance detriment. Clang-18+ compiles it to ~140 instructions + * without stack usage, both on x86-64 and aarch64. Faster for sizes up to 512 + * bytes, just 15%-20% slower for inputs above 1kb. + * + * @param key Buffer to be hashed. + * @param len @key length, in bytes. + * @param seed 64-bit seed used to alter the hash result predictably. + * + * Calls rapidhash_internal using provided parameters and default secrets. + * + * Returns a 64-bit hash. + */ +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t rapidhashMicro_with_seed( + const char* key, size_t len, uint64_t seed) noexcept { + return rapidhash_detail::rapidhashMicro_internal(key, len, seed, rapidhash_detail::rapidhash_secret); +} + +/* + * rapidhashMicro hash function. + * + * @param key Buffer to be hashed. + * @param len @key length, in bytes. + * + * Calls rapidhash_withSeed using provided parameters and the default seed. + * + * Returns a 64-bit hash. + */ +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t +rapidhashMicro(const char* key, size_t len) noexcept { + return rapidhashMicro_with_seed(key, len, 0); +} + +/* + * rapidhashNano seeded hash function. + * + * @param key Buffer to be hashed. + * @param len @key length, in bytes. + * @param seed 64-bit seed used to alter the hash result predictably. + * + * Calls rapidhash_internal using provided parameters and default secrets. + * + * Returns a 64-bit hash. + */ +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t rapidhashNano_with_seed( + const char* key, size_t len, uint64_t seed) noexcept { + return rapidhash_detail::rapidhashNano_internal(key, len, seed, rapidhash_detail::rapidhash_secret); +} + +/* + * rapidhashNano hash function. + * + * Designed for Mobile and embedded applications, where keeping a small code + * size is a top priority. Clang-18+ compiles it to less than 100 instructions + * without stack usage, both on x86-64 and aarch64. The fastest for sizes up to + * 48 bytes, but may be considerably slower for larger inputs. + * + * @param key Buffer to be hashed. + * @param len @key length, in bytes. + * + * Calls rapidhash_withSeed using provided parameters and the default seed. + * + * Returns a 64-bit hash. + */ +FOLLY_EXTERNAL_RAPIDHASH_INLINE_CONSTEXPR uint64_t +rapidhashNano(const char* key, size_t len) noexcept { + return rapidhashNano_with_seed(key, len, 0); +} + +} // namespace hash +} // namespace folly diff --git a/third-party/folly/src/folly/hash/rapidhash.h b/third-party/folly/src/folly/hash/rapidhash.h new file mode 100644 index 00000000000000..8f9e991f280f26 --- /dev/null +++ b/third-party/folly/src/folly/hash/rapidhash.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace folly { +namespace hash { + +// The values returned by Hash, Hash32, and Hash64 are only guaranteed to +// be the same within the same process. Fingerpring32 and Fingerprint64 +// are fixed algorithms that always give the same result. + +// uint64_t rapidhash(const char* key, size_t len) +using external::rapidhash; + +// uint64_t rapidhash_with_seed(const char* key, size_t len, uint64_t seed) +using external::rapidhash_with_seed; + +// uint64_t rapidhashMicro(const char* key, size_t len) +using external::rapidhashMicro; + +// uint64_t rapidhashMicro_with_seed(const char* key, size_t len, uint64_t seed) +using external::rapidhashMicro_with_seed; + +// uint64_t rapidhashNano(const char* key, size_t len) +using external::rapidhashNano; + +// uint64_t rapidhashNano_with_seed(const char* key, size_t len, uint64_t seed) +using external::rapidhashNano_with_seed; + +} // namespace hash +} // namespace folly diff --git a/third-party/folly/src/folly/hash/test/HashBenchmark.cpp b/third-party/folly/src/folly/hash/test/HashBenchmark.cpp index c8ae211007b67d..4817c2ac92c123 100644 --- a/third-party/folly/src/folly/hash/test/HashBenchmark.cpp +++ b/third-party/folly/src/folly/hash/test/HashBenchmark.cpp @@ -16,6 +16,7 @@ #include #include +#include #include @@ -78,7 +79,7 @@ void addHashBenchmark(const std::string& name) { }); } - for (size_t i = 0; i < 16; ++i) { + for (size_t i = 0; i < 21; ++i) { auto k = size_t(1) << i; names.emplace_back(fmt::format("{}: k=2^{}", name, i)); folly::addBenchmark(__FILE__, names.back().c_str(), [=](unsigned iters) { @@ -111,6 +112,26 @@ struct MurmurHash { } }; +struct RapidHash { + uint64_t operator()(const uint8_t* data, size_t size) const { + return folly::hash::rapidhash(reinterpret_cast(data), size); + } +}; + +struct RapidHashMicro { + uint64_t operator()(const uint8_t* data, size_t size) const { + return folly::hash::rapidhashMicro( + reinterpret_cast(data), size); + } +}; + +struct RapidHashNano { + uint64_t operator()(const uint8_t* data, size_t size) const { + return folly::hash::rapidhashNano( + reinterpret_cast(data), size); + } +}; + } // namespace detail int main(int argc, char** argv) { @@ -125,6 +146,9 @@ int main(int argc, char** argv) { BENCHMARK_HASH(SpookyHashV2); BENCHMARK_HASH(FNV64); BENCHMARK_HASH(MurmurHash); + BENCHMARK_HASH(RapidHash); + BENCHMARK_HASH(RapidHashMicro); + BENCHMARK_HASH(RapidHashNano); #undef BENCHMARK_HASH @@ -134,105 +158,459 @@ int main(int argc, char** argv) { } #if 0 -Intel(R) Xeon(R) Gold 6138 CPU @ 2.00GHz +AMD EPYC 9000 series CPU $ hash_benchmark --bm_min_usec=100000 ============================================================================ fbcode/folly/hash/test/HashBenchmark.cpp relative time/iter iters/s ============================================================================ -SpookyHashV2: k=1 7.36ns 135.94M -SpookyHashV2: k=2 7.47ns 133.91M -SpookyHashV2: k=3 7.74ns 129.16M -SpookyHashV2: k=4 7.14ns 140.06M -SpookyHashV2: k=5 7.52ns 133.05M -SpookyHashV2: k=6 7.84ns 127.58M -SpookyHashV2: k=7 8.11ns 123.34M -SpookyHashV2: k=8 7.24ns 138.09M -SpookyHashV2: k=9 7.32ns 136.57M -SpookyHashV2: k=10 7.62ns 131.27M -SpookyHashV2: k=11 7.84ns 127.58M -SpookyHashV2: k=12 8.07ns 123.90M -SpookyHashV2: k=13 7.85ns 127.40M -SpookyHashV2: k=14 8.02ns 124.76M -SpookyHashV2: k=15 8.24ns 121.42M -SpookyHashV2: k=2^0 7.18ns 139.28M -SpookyHashV2: k=2^1 7.65ns 130.68M -SpookyHashV2: k=2^2 7.24ns 138.16M -SpookyHashV2: k=2^3 7.40ns 135.15M -SpookyHashV2: k=2^4 13.91ns 71.91M -SpookyHashV2: k=2^5 14.06ns 71.11M -SpookyHashV2: k=2^6 21.83ns 45.81M -SpookyHashV2: k=2^7 37.35ns 26.77M -SpookyHashV2: k=2^8 47.34ns 21.12M -SpookyHashV2: k=2^9 65.66ns 15.23M -SpookyHashV2: k=2^10 99.14ns 10.09M -SpookyHashV2: k=2^11 172.31ns 5.80M -SpookyHashV2: k=2^12 314.87ns 3.18M -SpookyHashV2: k=2^13 596.77ns 1.68M -SpookyHashV2: k=2^14 1.16us 860.42K -SpookyHashV2: k=2^15 2.33us 428.39K +SpookyHashV2: k=1 5.80ns 172.36M +SpookyHashV2: k=2 6.18ns 161.74M +SpookyHashV2: k=3 6.36ns 157.12M +SpookyHashV2: k=4 5.68ns 175.90M +SpookyHashV2: k=5 6.18ns 161.70M +SpookyHashV2: k=6 6.38ns 156.71M +SpookyHashV2: k=7 6.59ns 151.76M +SpookyHashV2: k=8 5.70ns 175.40M +SpookyHashV2: k=9 5.85ns 170.82M +SpookyHashV2: k=10 6.23ns 160.51M +SpookyHashV2: k=11 6.37ns 156.97M +SpookyHashV2: k=12 5.87ns 170.32M +SpookyHashV2: k=13 6.23ns 160.47M +SpookyHashV2: k=14 6.36ns 157.19M +SpookyHashV2: k=15 6.65ns 150.32M +SpookyHashV2: k=2^0 5.80ns 172.43M +SpookyHashV2: k=2^1 6.18ns 161.81M +SpookyHashV2: k=2^2 5.68ns 175.97M +SpookyHashV2: k=2^3 5.70ns 175.51M +SpookyHashV2: k=2^4 11.98ns 83.48M +SpookyHashV2: k=2^5 11.36ns 88.06M +SpookyHashV2: k=2^6 18.85ns 53.06M +SpookyHashV2: k=2^7 35.96ns 27.81M +SpookyHashV2: k=2^8 48.41ns 20.66M +SpookyHashV2: k=2^9 69.53ns 14.38M +SpookyHashV2: k=2^10 104.62ns 9.56M +SpookyHashV2: k=2^11 184.95ns 5.41M +SpookyHashV2: k=2^12 330.11ns 3.03M +SpookyHashV2: k=2^13 634.08ns 1.58M +SpookyHashV2: k=2^14 1.23us 813.28K +SpookyHashV2: k=2^15 2.44us 410.27K +SpookyHashV2: k=2^16 4.83us 206.89K +SpookyHashV2: k=2^17 9.63us 103.86K +SpookyHashV2: k=2^18 19.21us 52.05K +SpookyHashV2: k=2^19 38.45us 26.01K +SpookyHashV2: k=2^20 77.18us 12.96K +---------------------------------------------------------------------------- +FNV64: k=1 1.12ns 893.79M +FNV64: k=2 1.64ns 608.67M +FNV64: k=3 2.22ns 450.32M +FNV64: k=4 2.66ns 376.63M +FNV64: k=5 3.22ns 310.52M +FNV64: k=6 3.91ns 255.89M +FNV64: k=7 4.75ns 210.39M +FNV64: k=8 4.75ns 210.61M +FNV64: k=9 5.62ns 177.82M +FNV64: k=10 6.02ns 166.23M +FNV64: k=11 6.86ns 145.80M +FNV64: k=12 9.04ns 110.62M +FNV64: k=13 9.97ns 100.32M +FNV64: k=14 10.55ns 94.77M +FNV64: k=15 11.19ns 89.34M +FNV64: k=2^0 1.12ns 894.46M +FNV64: k=2^1 1.64ns 608.43M +FNV64: k=2^2 2.66ns 376.59M +FNV64: k=2^3 4.75ns 210.71M +FNV64: k=2^4 11.56ns 86.50M +FNV64: k=2^5 22.44ns 44.56M +FNV64: k=2^6 61.72ns 16.20M +FNV64: k=2^7 149.54ns 6.69M +FNV64: k=2^8 324.43ns 3.08M +FNV64: k=2^9 675.13ns 1.48M +FNV64: k=2^10 1.38us 726.87K +FNV64: k=2^11 2.78us 359.99K +FNV64: k=2^12 5.59us 179.04K +FNV64: k=2^13 11.19us 89.34K +FNV64: k=2^14 22.41us 44.62K +FNV64: k=2^15 44.84us 22.30K +FNV64: k=2^16 89.69us 11.15K +FNV64: k=2^17 179.43us 5.57K +FNV64: k=2^18 358.91us 2.79K +FNV64: k=2^19 717.66us 1.39K +FNV64: k=2^20 1.44ms 696.51 +---------------------------------------------------------------------------- +MurmurHash: k=1 1.60ns 623.43M +MurmurHash: k=2 1.88ns 530.71M +MurmurHash: k=3 1.96ns 509.79M +MurmurHash: k=4 1.66ns 602.64M +MurmurHash: k=5 2.05ns 486.80M +MurmurHash: k=6 1.89ns 530.06M +MurmurHash: k=7 2.22ns 450.01M +MurmurHash: k=8 2.65ns 378.05M +MurmurHash: k=9 3.28ns 304.85M +MurmurHash: k=10 3.63ns 275.54M +MurmurHash: k=11 3.56ns 280.71M +MurmurHash: k=12 3.20ns 312.74M +MurmurHash: k=13 3.55ns 281.31M +MurmurHash: k=14 3.77ns 265.45M +MurmurHash: k=15 3.98ns 251.25M +MurmurHash: k=2^0 1.60ns 623.43M +MurmurHash: k=2^1 1.88ns 530.67M +MurmurHash: k=2^2 1.66ns 602.61M +MurmurHash: k=2^3 2.64ns 378.10M +MurmurHash: k=2^4 3.83ns 260.84M +MurmurHash: k=2^5 6.04ns 165.59M +MurmurHash: k=2^6 11.38ns 87.85M +MurmurHash: k=2^7 19.87ns 50.33M +MurmurHash: k=2^8 37.13ns 26.93M +MurmurHash: k=2^9 78.16ns 12.79M +MurmurHash: k=2^10 165.82ns 6.03M +MurmurHash: k=2^11 340.98ns 2.93M +MurmurHash: k=2^12 691.62ns 1.45M +MurmurHash: k=2^13 1.39us 718.14K +MurmurHash: k=2^14 2.80us 357.25K +MurmurHash: k=2^15 5.61us 178.20K +MurmurHash: k=2^16 11.22us 89.09K +MurmurHash: k=2^17 22.44us 44.56K +MurmurHash: k=2^18 44.87us 22.29K +MurmurHash: k=2^19 89.77us 11.14K +MurmurHash: k=2^20 179.86us 5.56K +---------------------------------------------------------------------------- +RapidHash: k=1 3.43ns 291.75M +RapidHash: k=2 3.43ns 291.76M +RapidHash: k=3 3.43ns 291.76M +RapidHash: k=4 3.08ns 324.23M +RapidHash: k=5 3.08ns 324.21M +RapidHash: k=6 3.09ns 324.13M +RapidHash: k=7 3.09ns 323.89M +RapidHash: k=8 3.09ns 323.80M +RapidHash: k=9 3.09ns 323.82M +RapidHash: k=10 3.09ns 323.81M +RapidHash: k=11 3.09ns 324.05M +RapidHash: k=12 3.09ns 324.14M +RapidHash: k=13 3.09ns 324.05M +RapidHash: k=14 3.09ns 324.07M +RapidHash: k=15 3.09ns 324.08M +RapidHash: k=2^0 3.43ns 291.77M +RapidHash: k=2^1 3.43ns 291.83M +RapidHash: k=2^2 3.08ns 324.24M +RapidHash: k=2^3 3.09ns 323.97M +RapidHash: k=2^4 3.09ns 324.06M +RapidHash: k=2^5 4.80ns 208.47M +RapidHash: k=2^6 5.55ns 180.23M +RapidHash: k=2^7 8.65ns 115.62M +RapidHash: k=2^8 15.47ns 64.63M +RapidHash: k=2^9 27.17ns 36.80M +RapidHash: k=2^10 50.30ns 19.88M +RapidHash: k=2^11 96.09ns 10.41M +RapidHash: k=2^12 188.00ns 5.32M +RapidHash: k=2^13 355.37ns 2.81M +RapidHash: k=2^14 709.39ns 1.41M +RapidHash: k=2^15 1.42us 706.36K +RapidHash: k=2^16 2.82us 354.93K +RapidHash: k=2^17 5.64us 177.36K +RapidHash: k=2^18 11.25us 88.90K +RapidHash: k=2^19 22.50us 44.44K +RapidHash: k=2^20 45.54us 21.96K +---------------------------------------------------------------------------- +RapidHashMicro: k=1 2.74ns 364.96M +RapidHashMicro: k=2 2.74ns 364.96M +RapidHashMicro: k=3 2.74ns 364.92M +RapidHashMicro: k=4 2.41ns 415.65M +RapidHashMicro: k=5 2.41ns 415.68M +RapidHashMicro: k=6 2.41ns 415.59M +RapidHashMicro: k=7 2.41ns 415.59M +RapidHashMicro: k=8 2.40ns 415.93M +RapidHashMicro: k=9 2.40ns 415.93M +RapidHashMicro: k=10 2.41ns 415.79M +RapidHashMicro: k=11 2.41ns 415.79M +RapidHashMicro: k=12 2.40ns 415.89M +RapidHashMicro: k=13 2.40ns 415.93M +RapidHashMicro: k=14 2.40ns 415.85M +RapidHashMicro: k=15 2.40ns 415.90M +RapidHashMicro: k=2^0 2.74ns 364.83M +RapidHashMicro: k=2^1 2.74ns 365.38M +RapidHashMicro: k=2^2 2.41ns 415.76M +RapidHashMicro: k=2^3 2.40ns 416.03M +RapidHashMicro: k=2^4 2.40ns 415.85M +RapidHashMicro: k=2^5 3.77ns 264.93M +RapidHashMicro: k=2^6 4.78ns 209.14M +RapidHashMicro: k=2^7 8.66ns 115.44M +RapidHashMicro: k=2^8 15.20ns 65.79M +RapidHashMicro: k=2^9 25.15ns 39.77M +RapidHashMicro: k=2^10 46.68ns 21.42M +RapidHashMicro: k=2^11 88.21ns 11.34M +RapidHashMicro: k=2^12 171.90ns 5.82M +RapidHashMicro: k=2^13 338.56ns 2.95M +RapidHashMicro: k=2^14 672.92ns 1.49M +RapidHashMicro: k=2^15 1.34us 746.37K +RapidHashMicro: k=2^16 2.68us 372.97K +RapidHashMicro: k=2^17 5.34us 187.15K +RapidHashMicro: k=2^18 10.67us 93.75K +RapidHashMicro: k=2^19 21.37us 46.79K +RapidHashMicro: k=2^20 43.10us 23.20K +---------------------------------------------------------------------------- +RapidHashNano: k=1 2.74ns 364.98M +RapidHashNano: k=2 2.74ns 364.94M +RapidHashNano: k=3 2.74ns 364.92M +RapidHashNano: k=4 2.41ns 415.68M +RapidHashNano: k=5 2.41ns 415.78M +RapidHashNano: k=6 2.41ns 415.67M +RapidHashNano: k=7 2.41ns 415.69M +RapidHashNano: k=8 2.40ns 415.98M +RapidHashNano: k=9 2.40ns 415.99M +RapidHashNano: k=10 2.40ns 415.94M +RapidHashNano: k=11 2.40ns 415.93M +RapidHashNano: k=12 2.40ns 415.91M +RapidHashNano: k=13 2.40ns 415.91M +RapidHashNano: k=14 2.40ns 415.86M +RapidHashNano: k=15 2.40ns 415.97M +RapidHashNano: k=2^0 2.74ns 365.00M +RapidHashNano: k=2^1 2.74ns 364.93M +RapidHashNano: k=2^2 2.41ns 415.60M +RapidHashNano: k=2^3 2.40ns 416.09M +RapidHashNano: k=2^4 2.40ns 415.87M +RapidHashNano: k=2^5 3.44ns 290.83M +RapidHashNano: k=2^6 5.05ns 198.00M +RapidHashNano: k=2^7 8.36ns 119.63M +RapidHashNano: k=2^8 14.69ns 68.09M +RapidHashNano: k=2^9 25.86ns 38.67M +RapidHashNano: k=2^10 49.95ns 20.02M +RapidHashNano: k=2^11 97.24ns 10.28M +RapidHashNano: k=2^12 189.93ns 5.27M +RapidHashNano: k=2^13 375.24ns 2.66M +RapidHashNano: k=2^14 744.88ns 1.34M +RapidHashNano: k=2^15 1.52us 657.09K +RapidHashNano: k=2^16 3.02us 331.24K +RapidHashNano: k=2^17 5.99us 166.97K +RapidHashNano: k=2^18 11.97us 83.55K +RapidHashNano: k=2^19 24.84us 40.25K +RapidHashNano: k=2^20 49.12us 20.36K +---------------------------------------------------------------------------- + +ARM Neoverse-V2 CPU +============================================================================ +fbcode/folly/hash/test/HashBenchmark.cpp relative time/iter iters/s +============================================================================ +SpookyHashV2: k=1 4.86ns 205.71M +SpookyHashV2: k=2 4.97ns 201.06M +SpookyHashV2: k=3 5.15ns 194.04M +SpookyHashV2: k=4 4.83ns 207.19M +SpookyHashV2: k=5 4.99ns 200.50M +SpookyHashV2: k=6 5.13ns 194.97M +SpookyHashV2: k=7 5.39ns 185.54M +SpookyHashV2: k=8 4.81ns 207.87M +SpookyHashV2: k=9 4.99ns 200.58M +SpookyHashV2: k=10 5.15ns 194.27M +SpookyHashV2: k=11 5.35ns 186.89M +SpookyHashV2: k=12 4.97ns 201.36M +SpookyHashV2: k=13 5.07ns 197.39M +SpookyHashV2: k=14 5.26ns 190.25M +SpookyHashV2: k=15 5.54ns 180.64M +SpookyHashV2: k=2^0 4.84ns 206.49M +SpookyHashV2: k=2^1 5.00ns 200.15M +SpookyHashV2: k=2^2 4.81ns 207.98M +SpookyHashV2: k=2^3 4.86ns 205.85M +SpookyHashV2: k=2^4 10.02ns 99.80M +SpookyHashV2: k=2^5 10.51ns 95.19M +SpookyHashV2: k=2^6 16.95ns 59.01M +SpookyHashV2: k=2^7 32.35ns 30.91M +SpookyHashV2: k=2^8 36.11ns 27.69M +SpookyHashV2: k=2^9 52.96ns 18.88M +SpookyHashV2: k=2^10 80.89ns 12.36M +SpookyHashV2: k=2^11 146.00ns 6.85M +SpookyHashV2: k=2^12 267.59ns 3.74M +SpookyHashV2: k=2^13 520.53ns 1.92M +SpookyHashV2: k=2^14 1.02us 978.70K +SpookyHashV2: k=2^15 2.03us 491.99K +SpookyHashV2: k=2^16 4.03us 247.85K +SpookyHashV2: k=2^17 8.07us 123.93K +SpookyHashV2: k=2^18 15.99us 62.54K +SpookyHashV2: k=2^19 31.84us 31.41K +SpookyHashV2: k=2^20 64.26us 15.56K +---------------------------------------------------------------------------- +FNV64: k=1 900.40ps 1.11G +FNV64: k=2 1.22ns 816.61M +FNV64: k=3 1.59ns 629.80M +FNV64: k=4 1.97ns 507.80M +FNV64: k=5 2.37ns 422.18M +FNV64: k=6 2.79ns 358.69M +FNV64: k=7 3.18ns 314.06M +FNV64: k=8 3.63ns 275.29M +FNV64: k=9 4.07ns 245.44M +FNV64: k=10 4.55ns 219.75M +FNV64: k=11 5.06ns 197.64M +FNV64: k=12 5.57ns 179.52M +FNV64: k=13 6.09ns 164.21M +FNV64: k=14 6.56ns 152.46M +FNV64: k=15 7.08ns 141.23M +FNV64: k=2^0 897.97ps 1.11G +FNV64: k=2^1 1.24ns 807.58M +FNV64: k=2^2 1.94ns 514.85M +FNV64: k=2^3 3.59ns 278.28M +FNV64: k=2^4 7.75ns 128.98M +FNV64: k=2^5 17.59ns 56.86M +FNV64: k=2^6 40.59ns 24.64M +FNV64: k=2^7 98.97ns 10.10M +FNV64: k=2^8 218.46ns 4.58M +FNV64: k=2^9 458.28ns 2.18M +FNV64: k=2^10 933.57ns 1.07M +FNV64: k=2^11 1.90us 526.35K +FNV64: k=2^12 3.80us 263.17K +FNV64: k=2^13 7.62us 131.16K +FNV64: k=2^14 15.32us 65.27K +FNV64: k=2^15 30.47us 32.82K +FNV64: k=2^16 61.54us 16.25K +FNV64: k=2^17 122.80us 8.14K +FNV64: k=2^18 244.89us 4.08K +FNV64: k=2^19 490.45us 2.04K +FNV64: k=2^20 974.42us 1.03K ---------------------------------------------------------------------------- -FNV64: k=1 1.67ns 597.73M -FNV64: k=2 2.16ns 463.65M -FNV64: k=3 2.98ns 335.84M -FNV64: k=4 3.34ns 299.81M -FNV64: k=5 3.94ns 253.49M -FNV64: k=6 4.50ns 222.04M -FNV64: k=7 5.10ns 196.27M -FNV64: k=8 4.29ns 233.13M -FNV64: k=9 5.16ns 193.88M -FNV64: k=10 5.70ns 175.35M -FNV64: k=11 6.28ns 159.25M -FNV64: k=12 7.32ns 136.54M -FNV64: k=13 8.01ns 124.80M -FNV64: k=14 8.80ns 113.60M -FNV64: k=15 9.42ns 106.17M -FNV64: k=2^0 1.66ns 600.75M -FNV64: k=2^1 2.21ns 452.85M -FNV64: k=2^2 3.40ns 294.24M -FNV64: k=2^3 4.33ns 231.13M -FNV64: k=2^4 9.42ns 106.15M -FNV64: k=2^5 22.39ns 44.66M -FNV64: k=2^6 50.47ns 19.81M -FNV64: k=2^7 127.87ns 7.82M -FNV64: k=2^8 279.80ns 3.57M -FNV64: k=2^9 589.47ns 1.70M -FNV64: k=2^10 1.22us 817.45K -FNV64: k=2^11 2.46us 406.98K -FNV64: k=2^12 4.92us 203.27K -FNV64: k=2^13 9.84us 101.61K -FNV64: k=2^14 19.66us 50.85K -FNV64: k=2^15 39.65us 25.22K +MurmurHash: k=1 800.93ps 1.25G +MurmurHash: k=2 839.94ps 1.19G +MurmurHash: k=3 943.71ps 1.06G +MurmurHash: k=4 940.77ps 1.06G +MurmurHash: k=5 934.52ps 1.07G +MurmurHash: k=6 930.07ps 1.08G +MurmurHash: k=7 1.06ns 945.85M +MurmurHash: k=8 1.36ns 736.43M +MurmurHash: k=9 1.69ns 591.59M +MurmurHash: k=10 1.94ns 516.57M +MurmurHash: k=11 2.11ns 475.03M +MurmurHash: k=12 1.74ns 574.93M +MurmurHash: k=13 1.82ns 548.98M +MurmurHash: k=14 2.09ns 479.18M +MurmurHash: k=15 2.17ns 460.00M +MurmurHash: k=2^0 797.67ps 1.25G +MurmurHash: k=2^1 847.22ps 1.18G +MurmurHash: k=2^2 933.75ps 1.07G +MurmurHash: k=2^3 1.35ns 738.51M +MurmurHash: k=2^4 2.02ns 495.93M +MurmurHash: k=2^5 3.26ns 306.59M +MurmurHash: k=2^6 6.00ns 166.56M +MurmurHash: k=2^7 11.28ns 88.63M +MurmurHash: k=2^8 22.85ns 43.76M +MurmurHash: k=2^9 48.04ns 20.82M +MurmurHash: k=2^10 109.83ns 9.11M +MurmurHash: k=2^11 230.43ns 4.34M +MurmurHash: k=2^12 466.45ns 2.14M +MurmurHash: k=2^13 947.20ns 1.06M +MurmurHash: k=2^14 1.90us 525.52K +MurmurHash: k=2^15 3.84us 260.44K +MurmurHash: k=2^16 7.68us 130.25K +MurmurHash: k=2^17 15.28us 65.44K +MurmurHash: k=2^18 30.80us 32.47K +MurmurHash: k=2^19 60.93us 16.41K +MurmurHash: k=2^20 123.62us 8.09K ---------------------------------------------------------------------------- -MurmurHash: k=1 1.92ns 520.45M -MurmurHash: k=2 2.22ns 451.21M -MurmurHash: k=3 2.28ns 437.75M -MurmurHash: k=4 1.98ns 504.77M -MurmurHash: k=5 2.18ns 458.61M -MurmurHash: k=6 2.46ns 406.96M -MurmurHash: k=7 2.52ns 396.89M -MurmurHash: k=8 2.84ns 352.24M -MurmurHash: k=9 3.63ns 275.50M -MurmurHash: k=10 3.88ns 257.82M -MurmurHash: k=11 4.03ns 248.11M -MurmurHash: k=12 3.72ns 268.52M -MurmurHash: k=13 3.91ns 255.67M -MurmurHash: k=14 4.20ns 238.10M -MurmurHash: k=15 4.41ns 226.70M -MurmurHash: k=2^0 1.87ns 533.86M -MurmurHash: k=2^1 2.17ns 460.14M -MurmurHash: k=2^2 1.96ns 510.29M -MurmurHash: k=2^3 2.78ns 359.29M -MurmurHash: k=2^4 3.84ns 260.18M -MurmurHash: k=2^5 5.22ns 191.49M -MurmurHash: k=2^6 8.99ns 111.18M -MurmurHash: k=2^7 17.05ns 58.63M -MurmurHash: k=2^8 32.43ns 30.84M -MurmurHash: k=2^9 70.59ns 14.17M -MurmurHash: k=2^10 147.21ns 6.79M -MurmurHash: k=2^11 301.94ns 3.31M -MurmurHash: k=2^12 614.43ns 1.63M -MurmurHash: k=2^13 1.23us 810.19K -MurmurHash: k=2^14 2.47us 405.39K -MurmurHash: k=2^15 4.94us 202.32K +RapidHash: k=1 1.83ns 547.33M +RapidHash: k=2 1.83ns 545.42M +RapidHash: k=3 1.83ns 547.41M +RapidHash: k=4 1.74ns 576.05M +RapidHash: k=5 1.75ns 572.67M +RapidHash: k=6 1.75ns 571.89M +RapidHash: k=7 1.74ns 574.81M +RapidHash: k=8 1.75ns 572.97M +RapidHash: k=9 1.75ns 569.93M +RapidHash: k=10 1.75ns 571.37M +RapidHash: k=11 1.75ns 572.62M +RapidHash: k=12 1.75ns 570.81M +RapidHash: k=13 1.74ns 575.19M +RapidHash: k=14 1.75ns 571.86M +RapidHash: k=15 1.76ns 569.45M +RapidHash: k=2^0 1.83ns 546.84M +RapidHash: k=2^1 1.84ns 542.67M +RapidHash: k=2^2 1.75ns 571.55M +RapidHash: k=2^3 1.75ns 570.59M +RapidHash: k=2^4 1.74ns 573.73M +RapidHash: k=2^5 6.06ns 164.97M +RapidHash: k=2^6 7.32ns 136.53M +RapidHash: k=2^7 8.56ns 116.83M +RapidHash: k=2^8 11.95ns 83.67M +RapidHash: k=2^9 21.28ns 47.00M +RapidHash: k=2^10 32.94ns 30.36M +RapidHash: k=2^11 60.44ns 16.55M +RapidHash: k=2^12 118.04ns 8.47M +RapidHash: k=2^13 228.19ns 4.38M +RapidHash: k=2^14 450.62ns 2.22M +RapidHash: k=2^15 892.95ns 1.12M +RapidHash: k=2^16 1.78us 563.35K +RapidHash: k=2^17 3.64us 274.81K +RapidHash: k=2^18 7.28us 137.45K +RapidHash: k=2^19 14.72us 67.94K +RapidHash: k=2^20 29.68us 33.69K ---------------------------------------------------------------------------- +RapidHashMicro: k=1 1.81ns 553.77M +RapidHashMicro: k=2 1.80ns 555.24M +RapidHashMicro: k=3 1.80ns 554.39M +RapidHashMicro: k=4 1.75ns 572.25M +RapidHashMicro: k=5 1.74ns 574.47M +RapidHashMicro: k=6 1.75ns 571.09M +RapidHashMicro: k=7 1.73ns 577.13M +RapidHashMicro: k=8 1.74ns 575.04M +RapidHashMicro: k=9 1.73ns 577.82M +RapidHashMicro: k=10 1.74ns 574.97M +RapidHashMicro: k=11 1.73ns 577.59M +RapidHashMicro: k=12 1.73ns 579.19M +RapidHashMicro: k=13 1.72ns 580.85M +RapidHashMicro: k=14 1.75ns 570.49M +RapidHashMicro: k=15 1.74ns 575.08M +RapidHashMicro: k=2^0 1.81ns 551.19M +RapidHashMicro: k=2^1 1.81ns 551.41M +RapidHashMicro: k=2^2 1.73ns 578.13M +RapidHashMicro: k=2^3 1.72ns 582.30M +RapidHashMicro: k=2^4 1.74ns 574.35M +RapidHashMicro: k=2^5 2.80ns 357.71M +RapidHashMicro: k=2^6 4.01ns 249.14M +RapidHashMicro: k=2^7 8.09ns 123.54M +RapidHashMicro: k=2^8 11.92ns 83.86M +RapidHashMicro: k=2^9 19.72ns 50.70M +RapidHashMicro: k=2^10 36.74ns 27.22M +RapidHashMicro: k=2^11 66.32ns 15.08M +RapidHashMicro: k=2^12 127.50ns 7.84M +RapidHashMicro: k=2^13 250.60ns 3.99M +RapidHashMicro: k=2^14 499.14ns 2.00M +RapidHashMicro: k=2^15 992.53ns 1.01M +RapidHashMicro: k=2^16 1.98us 504.40K +RapidHashMicro: k=2^17 4.01us 249.55K +RapidHashMicro: k=2^18 8.02us 124.75K +RapidHashMicro: k=2^19 16.20us 61.74K +RapidHashMicro: k=2^20 32.38us 30.88K +---------------------------------------------------------------------------- +RapidHashNano: k=1 1.83ns 547.40M +RapidHashNano: k=2 1.83ns 547.34M +RapidHashNano: k=3 1.83ns 547.45M +RapidHashNano: k=4 1.73ns 578.94M +RapidHashNano: k=5 1.73ns 577.38M +RapidHashNano: k=6 1.73ns 578.66M +RapidHashNano: k=7 1.72ns 579.98M +RapidHashNano: k=8 1.75ns 570.16M +RapidHashNano: k=9 1.73ns 577.08M +RapidHashNano: k=10 1.74ns 575.20M +RapidHashNano: k=11 1.73ns 577.25M +RapidHashNano: k=12 1.73ns 577.37M +RapidHashNano: k=13 1.75ns 573.06M +RapidHashNano: k=14 1.73ns 578.76M +RapidHashNano: k=15 1.73ns 579.37M +RapidHashNano: k=2^0 1.83ns 546.93M +RapidHashNano: k=2^1 1.82ns 550.47M +RapidHashNano: k=2^2 1.71ns 584.00M +RapidHashNano: k=2^3 1.73ns 577.64M +RapidHashNano: k=2^4 1.74ns 576.14M +RapidHashNano: k=2^5 2.76ns 362.17M +RapidHashNano: k=2^6 4.68ns 213.54M +RapidHashNano: k=2^7 6.92ns 144.61M +RapidHashNano: k=2^8 11.37ns 87.97M +RapidHashNano: k=2^9 20.72ns 48.27M +RapidHashNano: k=2^10 39.52ns 25.30M +RapidHashNano: k=2^11 78.77ns 12.70M +RapidHashNano: k=2^12 154.82ns 6.46M +RapidHashNano: k=2^13 308.12ns 3.25M +RapidHashNano: k=2^14 617.76ns 1.62M +RapidHashNano: k=2^15 1.23us 812.54K +RapidHashNano: k=2^16 2.46us 406.14K +RapidHashNano: k=2^17 4.91us 203.54K +RapidHashNano: k=2^18 9.82us 101.82K +RapidHashNano: k=2^19 19.82us 50.46K +RapidHashNano: k=2^20 39.76us 25.15K +---------------------------------------------------------------------------- + #endif diff --git a/third-party/folly/src/folly/hash/test/RapidHashTest.cpp b/third-party/folly/src/folly/hash/test/RapidHashTest.cpp new file mode 100644 index 00000000000000..d801eb06a7e5ed --- /dev/null +++ b/third-party/folly/src/folly/hash/test/RapidHashTest.cpp @@ -0,0 +1,176 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include + +using namespace std::literals; + +constexpr auto cases = std::array{ + std::pair{""sv, UINT64_C(232177599295442350)}, + std::pair{"0"sv, UINT64_C(14193856657648385672)}, + std::pair{"01"sv, UINT64_C(15549595023848265440)}, + std::pair{"012"sv, UINT64_C(14036073547449753364)}, + std::pair{"0123"sv, UINT64_C(2155398448399527240)}, + std::pair{"01234"sv, UINT64_C(11595122963875691922)}, + std::pair{"012345"sv, UINT64_C(12910097366968805346)}, + std::pair{"0123456"sv, UINT64_C(2988730266698498992)}, + std::pair{"01234567"sv, UINT64_C(7570412248888932898)}, + std::pair{"0123456789ABCDEF"sv, UINT64_C(4286119474277594607)}, + std::pair{"0123456789ABCDEF01234567"sv, UINT64_C(6602676763163752414)}, + std::pair{"0123456789ABCDEF0"sv, UINT64_C(12163985545246830987)}, + std::pair{"0123456789ABCDEF01"sv, UINT64_C(17633497820352341844)}, + std::pair{"0123456789ABCDEF012"sv, UINT64_C(5134914024862322698)}, + std::pair{"0123456789ABCDEF0123"sv, UINT64_C(15456488218748233591)}, + std::pair{"0123456789ABCDEF01234"sv, UINT64_C(8219044676438946980)}, + std::pair{"0123456789ABCDEF012345"sv, UINT64_C(2949818754802360919)}, + std::pair{"0123456789ABCDEF0123456"sv, UINT64_C(10100507821488338105)}, + std::pair{ + "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"sv, + UINT64_C(5986613784938156867)}, + // Sequences with bytes represented as negative chars. + std::pair{"\x80"sv, UINT64_C(7470186259668200490)}, + std::pair{"\x80\x81"sv, UINT64_C(11863878210592514807)}, + std::pair{ + "\x80\x81\x82\x83\x84\x85\x86\x87"sv, UINT64_C(4054026010566036770)}, + std::pair{ + "\x61\x80\x81\x82\x83\x84\x85\x86\x87\x62"sv, + UINT64_C(7452325226268640525)}, +}; + +TEST(RapidHash, Runtime) { + for (auto [in, out] : cases) { + EXPECT_EQ(out, folly::hash::rapidhash(in.data(), in.size())) << in; + } +} + +TEST(RapidHashMicro, Runtime) { + for (auto [in, out] : cases) { + EXPECT_EQ(out, folly::hash::rapidhashMicro(in.data(), in.size())) << in; + } +} + +TEST(RapidHashNano, Runtime) { + for (auto [in, out] : cases) { + EXPECT_EQ(out, folly::hash::rapidhashNano(in.data(), in.size())) << in; + } +} + +TEST(RapidHash, Constexpr) { +#define TEST_CASE(i) \ + { \ + constexpr auto testCase_##i = cases[i]; \ + constexpr uint64_t h_##i = folly::hash::rapidhash( \ + testCase_##i.first.data(), testCase_##i.first.size()); \ + static_assert(h_##i == testCase_##i.second); \ + } + + TEST_CASE(0); + TEST_CASE(1); + TEST_CASE(2); + TEST_CASE(3); + TEST_CASE(4); + TEST_CASE(5); + TEST_CASE(6); + TEST_CASE(7); + TEST_CASE(8); + TEST_CASE(9); + TEST_CASE(10); + TEST_CASE(11); + TEST_CASE(12); + TEST_CASE(13); + TEST_CASE(14); + TEST_CASE(15); + TEST_CASE(16); + TEST_CASE(17); + TEST_CASE(18); + TEST_CASE(19); + TEST_CASE(20); + TEST_CASE(21); + TEST_CASE(22); +} + +TEST(RapidHashMicro, Constexpr) { +#define TEST_CASE_MICRO(i) \ + { \ + constexpr auto testCase_##i = cases[i]; \ + constexpr uint64_t h_##i = folly::hash::rapidhashMicro( \ + testCase_##i.first.data(), testCase_##i.first.size()); \ + static_assert(h_##i == testCase_##i.second); \ + } + + TEST_CASE_MICRO(0); + TEST_CASE_MICRO(1); + TEST_CASE_MICRO(2); + TEST_CASE_MICRO(3); + TEST_CASE_MICRO(4); + TEST_CASE_MICRO(5); + TEST_CASE_MICRO(6); + TEST_CASE_MICRO(7); + TEST_CASE_MICRO(8); + TEST_CASE_MICRO(9); + TEST_CASE_MICRO(10); + TEST_CASE_MICRO(11); + TEST_CASE_MICRO(12); + TEST_CASE_MICRO(13); + TEST_CASE_MICRO(14); + TEST_CASE_MICRO(15); + TEST_CASE_MICRO(16); + TEST_CASE_MICRO(17); + TEST_CASE_MICRO(18); + TEST_CASE_MICRO(19); + TEST_CASE_MICRO(20); + TEST_CASE_MICRO(21); + TEST_CASE_MICRO(22); +} + +TEST(RapidHashNano, Constexpr) { +#define TEST_CASE_NANO(i) \ + { \ + constexpr auto testCase_##i = cases[i]; \ + constexpr uint64_t h_##i = folly::hash::rapidhashNano( \ + testCase_##i.first.data(), testCase_##i.first.size()); \ + static_assert(h_##i == testCase_##i.second); \ + } + + TEST_CASE_NANO(0); + TEST_CASE_NANO(1); + TEST_CASE_NANO(2); + TEST_CASE_NANO(3); + TEST_CASE_NANO(4); + TEST_CASE_NANO(5); + TEST_CASE_NANO(6); + TEST_CASE_NANO(7); + TEST_CASE_NANO(8); + TEST_CASE_NANO(9); + TEST_CASE_NANO(10); + TEST_CASE_NANO(11); + TEST_CASE_NANO(12); + TEST_CASE_NANO(13); + TEST_CASE_NANO(14); + TEST_CASE_NANO(15); + TEST_CASE_NANO(16); + TEST_CASE_NANO(17); + TEST_CASE_NANO(18); + TEST_CASE_NANO(19); + TEST_CASE_NANO(20); + TEST_CASE_NANO(21); + TEST_CASE_NANO(22); +} From 26465d98f37797279c4bea3b66db687e49e1cf0c Mon Sep 17 00:00:00 2001 From: Nicolas De Carli Date: Mon, 9 Jun 2025 14:41:11 -0700 Subject: [PATCH 2/2] Use rapidhashNano on folly::hasher (#9617) Summary: Pull Request resolved: https://github.com/facebook/hhvm/pull/9617 X-link: https://github.com/facebook/folly/pull/2448 Replacing SpookyHashV2 with rapidhashNano folly::hasher::operator() accounts for almost 3M$ in $cpu_t1_equiv_per_year_q2_2025 https://fburl.com/strobelight/izute4k3 Given that integral hashing is the identity function, most of the registered cycles should come from strings/byteRanges See D66326393 and D75697257 for a detailed discussion around benchmarks and canaries Differential Revision: D76052916 --- third-party/folly/src/folly/Range.h | 7 ++++--- third-party/folly/src/folly/hash/Hash.h | 7 +++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/third-party/folly/src/folly/Range.h b/third-party/folly/src/folly/Range.h index 0f7702b0800b13..7df8bef1e70c6d 100644 --- a/third-party/folly/src/folly/Range.h +++ b/third-party/folly/src/folly/Range.h @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -794,7 +795,7 @@ class Range { } // Do NOT use this function, which was left behind for backwards - // compatibility. Use SpookyHashV2 instead -- it is faster, and produces + // compatibility. Use rapidhashNano instead -- it is faster, and produces // a 64-bit hash, which means dramatically fewer collisions in large maps. // (The above advice does not apply if you are targeting a 32-bit system.) // @@ -1697,8 +1698,8 @@ struct hasher< // can contain pointers and padding. Also, floating point numbers // may be == without being bit-identical. size_t is less than 64 // bits on some platforms. - return static_cast( - hash::SpookyHashV2::Hash64(r.begin(), r.size() * sizeof(T), 0)); + return static_cast(folly::hash::rapidhashNano( + reinterpret_cast(r.begin()), r.size() * sizeof(T))); } }; diff --git a/third-party/folly/src/folly/hash/Hash.h b/third-party/folly/src/folly/hash/Hash.h index a18ac42c982928..e22adce3e5ffdc 100644 --- a/third-party/folly/src/folly/hash/Hash.h +++ b/third-party/folly/src/folly/hash/Hash.h @@ -46,6 +46,7 @@ #include #include #include +#include #include namespace folly { @@ -999,8 +1000,7 @@ struct hasher { using folly_is_avalanching = std::true_type; size_t operator()(const std::string& key) const { - return static_cast( - hash::SpookyHashV2::Hash64(key.data(), key.size(), 0)); + return static_cast(hash::rapidhashNano(key.data(), key.size())); } }; template @@ -1011,8 +1011,7 @@ struct hasher { using folly_is_avalanching = std::true_type; size_t operator()(const std::string_view& key) const { - return static_cast( - hash::SpookyHashV2::Hash64(key.data(), key.size(), 0)); + return static_cast(hash::rapidhashNano(key.data(), key.size())); } }; template