diff --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc index 3f785bd23fce3..4ef315eedb313 100644 --- a/compiler-rt/include/profile/MemProfData.inc +++ b/compiler-rt/include/profile/MemProfData.inc @@ -33,11 +33,10 @@ (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) // The version number of the raw binary format. -#define MEMPROF_RAW_VERSION 4ULL +#define MEMPROF_RAW_VERSION 5ULL // Currently supported versions. -#define MEMPROF_RAW_SUPPORTED_VERSIONS \ - { 3ULL, 4ULL } +#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL} #define MEMPROF_V3_MIB_SIZE 132ULL; @@ -229,6 +228,41 @@ void Merge(const MemInfoBlock &newMIB) { } __attribute__((__packed__)); #endif +constexpr int MantissaBits = 12; +constexpr int ExponentBits = 4; +constexpr uint16_t MaxMantissa = (1U << MantissaBits) - 1; +constexpr uint16_t MaxExponent = (1U << ExponentBits) - 1; + +// Encodes a 64-bit unsigned integer into a 16-bit scaled integer format. +inline uint16_t encodeHistogramCount(uint64_t Count) { + if (Count == 0) + return 0; + + const uint64_t MaxRepresentableValue = static_cast(MaxMantissa) + << MaxExponent; + if (Count > MaxRepresentableValue) + Count = MaxRepresentableValue; + + if (Count <= MaxMantissa) + return Count; + + uint64_t M = Count; + uint16_t E = 0; + while (M > MaxMantissa) { + M = (M + 1) >> 1; + E++; + } + return (E << MantissaBits) | static_cast(M); +} + +// Decodes a 16-bit scaled integer and returns the +// decoded 64-bit unsigned integer. +inline uint64_t decodeHistogramCount(uint16_t EncodedValue) { + const uint16_t E = EncodedValue >> MantissaBits; + const uint16_t M = EncodedValue & MaxMantissa; + return static_cast(M) << E; +} + } // namespace memprof } // namespace llvm diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp b/compiler-rt/lib/memprof/memprof_rawprofile.cpp index a897648584828..f909d78f5f36a 100644 --- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp +++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp @@ -19,6 +19,7 @@ using ::__sanitizer::Vector; using ::llvm::memprof::MemInfoBlock; using SegmentEntry = ::llvm::memprof::SegmentEntry; using Header = ::llvm::memprof::Header; +using ::llvm::memprof::encodeHistogramCount; namespace { template char *WriteBytes(const T &Pod, char *Buffer) { @@ -169,13 +170,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector &StackIds, // FIXME: We unnecessarily serialize the AccessHistogram pointer. Adding a // serialization schema will fix this issue. See also FIXME in // deserialization. - Ptr = WriteBytes((*h)->mib, Ptr); - for (u64 j = 0; j < (*h)->mib.AccessHistogramSize; ++j) { - u64 HistogramEntry = ((u64 *)((*h)->mib.AccessHistogram))[j]; + auto &MIB = (*h)->mib; + Ptr = WriteBytes(MIB, Ptr); + for (u64 j = 0; j < MIB.AccessHistogramSize; ++j) { + u16 HistogramEntry = + encodeHistogramCount(((u64 *)(MIB.AccessHistogram))[j]); Ptr = WriteBytes(HistogramEntry, Ptr); } - if ((*h)->mib.AccessHistogramSize > 0) { - InternalFree((void *)((*h)->mib.AccessHistogram)); + if (MIB.AccessHistogramSize > 0) { + InternalFree((void *)MIB.AccessHistogram); } } CHECK(ExpectedNumBytes >= static_cast(Ptr - Buffer) && @@ -249,7 +252,7 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef Modules, }, reinterpret_cast(&TotalAccessHistogramEntries)); const u64 NumHistogramBytes = - RoundUpTo(TotalAccessHistogramEntries * sizeof(uint64_t), 8); + RoundUpTo(TotalAccessHistogramEntries * sizeof(uint16_t), 8); const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8); diff --git a/compiler-rt/lib/memprof/tests/CMakeLists.txt b/compiler-rt/lib/memprof/tests/CMakeLists.txt index 0b5c302a4ce5d..1603d47d019ed 100644 --- a/compiler-rt/lib/memprof/tests/CMakeLists.txt +++ b/compiler-rt/lib/memprof/tests/CMakeLists.txt @@ -26,6 +26,7 @@ set(MEMPROF_SOURCES ../memprof_rawprofile.cpp) set(MEMPROF_UNITTESTS + histogram_encoding.cpp rawprofile.cpp driver.cpp) diff --git a/compiler-rt/lib/memprof/tests/histogram_encoding.cpp b/compiler-rt/lib/memprof/tests/histogram_encoding.cpp new file mode 100644 index 0000000000000..443d4cea17d20 --- /dev/null +++ b/compiler-rt/lib/memprof/tests/histogram_encoding.cpp @@ -0,0 +1,35 @@ +#include +#include + +#include "profile/MemProfData.inc" +#include "gtest/gtest.h" + +namespace llvm { +namespace memprof { +namespace { +TEST(MemProf, F16EncodeDecode) { + const std::vector TestCases = { + 0, 100, 4095, 4096, 5000, 8191, 65535, 1000000, 134213640, 200000000, + }; + + for (const uint64_t TestCase : TestCases) { + const uint16_t Encoded = encodeHistogramCount(TestCase); + const uint64_t Decoded = decodeHistogramCount(Encoded); + + const uint64_t MaxRepresentable = static_cast(MaxMantissa) + << MaxExponent; + + if (TestCase >= MaxRepresentable) { + EXPECT_EQ(Decoded, MaxRepresentable); + } else if (TestCase == 0) { + EXPECT_EQ(Decoded, TestCase); + } else { + // The decoded value should be close to the original value. + // The error should be less than 1/1024 for larger numbers. + EXPECT_NEAR(Decoded, TestCase, static_cast(TestCase) / 1024.0); + } + } +} +} // namespace +} // namespace memprof +} // namespace llvm diff --git a/compiler-rt/test/memprof/TestCases/memprof_histogram_uint8.cpp b/compiler-rt/test/memprof/TestCases/memprof_histogram_uint8.cpp new file mode 100644 index 0000000000000..98fceba6b4983 --- /dev/null +++ b/compiler-rt/test/memprof/TestCases/memprof_histogram_uint8.cpp @@ -0,0 +1,37 @@ +// Test the histogram support in memprof using the text format output. +// Shadow memory counters per object are limited to 8b. In memory counters +// aggregating counts across multiple objects are 64b. + +// RUN: %clangxx_memprof -O0 -mllvm -memprof-histogram -mllvm -memprof-use-callbacks=true %s -o %t && %env_memprof_opts=print_text=1:histogram=1:log_path=stdout %run %t 2>&1 | FileCheck %s + +#include +#include + +int main() { + // Allocate memory that will create a histogram + char *buffer = (char *)malloc(1024); + if (!buffer) + return 1; + + for (int i = 0; i < 10; ++i) { + // Access every 8th byte (since shadow granularity is 8b. + buffer[i * 8] = 'A'; + } + + for (int j = 0; j < 200; ++j) { + buffer[8] = 'B'; // Count = previous count + 200 + } + + for (int j = 0; j < 400; ++j) { + buffer[16] = 'B'; // Count is saturated at 255 + } + + // Free the memory to trigger MIB creation with histogram + free(buffer); + + printf("Test completed successfully\n"); + return 0; +} + +// CHECK: AccessCountHistogram[128]: 1 201 255 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +// CHECK: Test completed successfully diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc index 3f785bd23fce3..4ef315eedb313 100644 --- a/llvm/include/llvm/ProfileData/MemProfData.inc +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -33,11 +33,10 @@ (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) // The version number of the raw binary format. -#define MEMPROF_RAW_VERSION 4ULL +#define MEMPROF_RAW_VERSION 5ULL // Currently supported versions. -#define MEMPROF_RAW_SUPPORTED_VERSIONS \ - { 3ULL, 4ULL } +#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL} #define MEMPROF_V3_MIB_SIZE 132ULL; @@ -229,6 +228,41 @@ void Merge(const MemInfoBlock &newMIB) { } __attribute__((__packed__)); #endif +constexpr int MantissaBits = 12; +constexpr int ExponentBits = 4; +constexpr uint16_t MaxMantissa = (1U << MantissaBits) - 1; +constexpr uint16_t MaxExponent = (1U << ExponentBits) - 1; + +// Encodes a 64-bit unsigned integer into a 16-bit scaled integer format. +inline uint16_t encodeHistogramCount(uint64_t Count) { + if (Count == 0) + return 0; + + const uint64_t MaxRepresentableValue = static_cast(MaxMantissa) + << MaxExponent; + if (Count > MaxRepresentableValue) + Count = MaxRepresentableValue; + + if (Count <= MaxMantissa) + return Count; + + uint64_t M = Count; + uint16_t E = 0; + while (M > MaxMantissa) { + M = (M + 1) >> 1; + E++; + } + return (E << MantissaBits) | static_cast(M); +} + +// Decodes a 16-bit scaled integer and returns the +// decoded 64-bit unsigned integer. +inline uint64_t decodeHistogramCount(uint16_t EncodedValue) { + const uint16_t E = EncodedValue >> MantissaBits; + const uint16_t M = EncodedValue & MaxMantissa; + return static_cast(M) << E; +} + } // namespace memprof } // namespace llvm diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index 235b1347e0077..2605e10da5f84 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -166,6 +166,39 @@ readMemInfoBlocksV4(const char *Ptr) { return Items; } +llvm::SmallVector> +readMemInfoBlocksV5(const char *Ptr) { + using namespace support; + + const uint64_t NumItemsToRead = + endian::readNext(Ptr); + + llvm::SmallVector> Items; + for (uint64_t I = 0; I < NumItemsToRead; I++) { + const uint64_t Id = + endian::readNext(Ptr); + + MemInfoBlock MIB = *reinterpret_cast(Ptr); + Ptr += sizeof(MemInfoBlock); + + if (MIB.AccessHistogramSize > 0) { + // The in-memory representation uses uint64_t for histogram entries. + MIB.AccessHistogram = + (uintptr_t)malloc(MIB.AccessHistogramSize * sizeof(uint64_t)); + for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) { + // The on-disk format for V5 uses uint16_t which is then decoded to + // uint64_t. + const uint16_t Val = + endian::readNext( + Ptr); + ((uint64_t *)MIB.AccessHistogram)[J] = decodeHistogramCount(Val); + } + } + Items.push_back({Id, MIB}); + } + return Items; +} + CallStackMap readStackInfo(const char *Ptr) { using namespace support; @@ -658,6 +691,8 @@ RawMemProfReader::readMemInfoBlocks(const char *Ptr) { return readMemInfoBlocksV3(Ptr); if (MemprofRawVersion == 4ULL) return readMemInfoBlocksV4(Ptr); + if (MemprofRawVersion == 5ULL) + return readMemInfoBlocksV5(Ptr); llvm_unreachable( "Panic: Unsupported version number when reading MemInfoBlocks"); } diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe index f69c0b12a89eb..fc530a4e07650 100755 Binary files a/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofraw index ed679dc49c53b..d4920769a5c08 100644 Binary files a/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofraw differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe index 14cbfeb88eaf8..8810ee1090869 100755 Binary files a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw index c3ac49e8079e9..6943c18c74792 100644 Binary files a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe index 1b4db88d8186d..4ab80401496fe 100755 Binary files a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw index e959e7679f56c..c6aec8d0b59e1 100644 Binary files a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe index 2822f2fa20434..5af6c81f07fad 100755 Binary files a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw index 05deb2e963a27..8958af941c59d 100644 Binary files a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe index 22c6136f3dda8..e9ec22cc96708 100755 Binary files a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw index 364aa1cefdd73..3952768d44c68 100644 Binary files a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe index 34db7e784208c..e50f66341ec44 100755 Binary files a/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofraw index 7a7d3a6460aed..df6fcb10cd4fe 100644 Binary files a/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofraw differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe index f7d172314de6d..63eea4438dad8 100755 Binary files a/llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe differ diff --git a/llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw index 0920028b55840..b6a733af50f5d 100644 Binary files a/llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw differ diff --git a/llvm/test/tools/llvm-profdata/memprof-basic-histogram.test b/llvm/test/tools/llvm-profdata/memprof-basic-histogram.test index 3d30a627bdd79..ce534db77f4f4 100644 --- a/llvm/test/tools/llvm-profdata/memprof-basic-histogram.test +++ b/llvm/test/tools/llvm-profdata/memprof-basic-histogram.test @@ -7,7 +7,7 @@ We expect 5 MIBs, each with different AccessHistogramValues. CHECK: MemprofProfile: CHECK-NEXT: Summary: -CHECK-NEXT: Version: 4 +CHECK-NEXT: Version: 5 CHECK-NEXT: NumSegments: {{[0-9]+}} CHECK-NEXT: NumMibInfo: 5 CHECK-NEXT: NumAllocFunctions: 3 @@ -241,4 +241,4 @@ CHECK-NEXT: MinLifetimeAccessDensity: 56000 CHECK-NEXT: MaxLifetimeAccessDensity: 56000 CHECK-NEXT: AccessHistogramSize: 8 CHECK-NEXT: AccessHistogram: {{[0-9]+}} -CHECK-NEXT: AccessHistogramValues: 168 147 126 105 84 63 42 21 \ No newline at end of file +CHECK-NEXT: AccessHistogramValues: 168 147 126 105 84 63 42 21 diff --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test index e15df50bc1657..81550ebce40d3 100644 --- a/llvm/test/tools/llvm-profdata/memprof-basic.test +++ b/llvm/test/tools/llvm-profdata/memprof-basic.test @@ -8,7 +8,7 @@ additional allocations which do not originate from the main binary are pruned. CHECK: MemprofProfile: CHECK-NEXT: Summary: -CHECK-NEXT: Version: 4 +CHECK-NEXT: Version: 5 CHECK-NEXT: NumSegments: {{[0-9]+}} CHECK-NEXT: NumMibInfo: 2 CHECK-NEXT: NumAllocFunctions: 1 @@ -96,4 +96,4 @@ CHECK-NEXT: TotalLifetimeAccessDensity: 20000 CHECK-NEXT: MinLifetimeAccessDensity: 20000 CHECK-NEXT: MaxLifetimeAccessDensity: 20000 CHECK-NEXT: AccessHistogramSize: 0 -CHECK-NEXT: AccessHistogram: 0 \ No newline at end of file +CHECK-NEXT: AccessHistogram: 0 diff --git a/llvm/test/tools/llvm-profdata/memprof-inline.test b/llvm/test/tools/llvm-profdata/memprof-inline.test index 79ce2ad838482..4a3f6201f0a35 100644 --- a/llvm/test/tools/llvm-profdata/memprof-inline.test +++ b/llvm/test/tools/llvm-profdata/memprof-inline.test @@ -5,7 +5,7 @@ RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary % CHECK: MemprofProfile: CHECK-NEXT: Summary: -CHECK-NEXT: Version: 4 +CHECK-NEXT: Version: 5 CHECK-NEXT: NumSegments: {{[0-9]+}} CHECK-NEXT: NumMibInfo: 2 CHECK-NEXT: NumAllocFunctions: 2 diff --git a/llvm/test/tools/llvm-profdata/memprof-multi.test b/llvm/test/tools/llvm-profdata/memprof-multi.test index 62439823defd0..35f94dfe2c096 100644 --- a/llvm/test/tools/llvm-profdata/memprof-multi.test +++ b/llvm/test/tools/llvm-profdata/memprof-multi.test @@ -7,7 +7,7 @@ We expect 2 MIB entries, 1 each for the malloc calls in the program. CHECK: MemprofProfile: CHECK-NEXT: Summary: -CHECK-NEXT: Version: 4 +CHECK-NEXT: Version: 5 CHECK-NEXT: NumSegments: {{[0-9]+}} CHECK-NEXT: NumMibInfo: 2 CHECK-NEXT: NumAllocFunctions: 1 diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test index 4ba58e3c870d5..79521f3aceb6d 100644 --- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test +++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test @@ -7,7 +7,7 @@ We expect 2 different MIBs with histogram values. This test is to make sure we p CHECK: MemprofProfile: CHECK-NEXT: Summary: -CHECK-NEXT: Version: 4 +CHECK-NEXT: Version: 5 CHECK-NEXT: NumSegments: {{[0-9]+}} CHECK-NEXT: NumMibInfo: 2 CHECK-NEXT: NumAllocFunctions: 1 @@ -96,4 +96,4 @@ CHEC-NEXT MinLifetimeAccessDensity: 8000 CHEC-NEXT MaxLifetimeAccessDensity: 8000 CHEC-NEXT AccessHistogramSize: 6 CHEC-NEXT AccessHistogram: {{[0-9]+}} -CHEC-NEXT AccessHistogramValues: -2 -0 -0 -0 -1 -1 \ No newline at end of file +CHEC-NEXT AccessHistogramValues: -2 -0 -0 -0 -1 -1 diff --git a/llvm/test/tools/llvm-profdata/memprof-pic.test b/llvm/test/tools/llvm-profdata/memprof-pic.test index 78d2c5c54feb1..66203ef9248ff 100644 --- a/llvm/test/tools/llvm-profdata/memprof-pic.test +++ b/llvm/test/tools/llvm-profdata/memprof-pic.test @@ -11,7 +11,7 @@ RUN: llvm-profdata show --memory %p/Inputs/pic.memprofraw --profiled-binary %p/I CHECK: MemprofProfile: CHECK-NEXT: Summary: -CHECK-NEXT: Version: 4 +CHECK-NEXT: Version: 5 CHECK-NEXT: NumSegments: {{[0-9]+}} CHECK-NEXT: NumMibInfo: 2 CHECK-NEXT: NumAllocFunctions: 1 @@ -100,4 +100,4 @@ CHECK-NEXT: TotalLifetimeAccessDensity: 20000 CHECK-NEXT: MinLifetimeAccessDensity: 20000 CHECK-NEXT: MaxLifetimeAccessDensity: 20000 CHECK-NEXT: AccessHistogramSize: 0 -CHECK-NEXT: AccessHistogram: 0 \ No newline at end of file +CHECK-NEXT: AccessHistogram: 0