Skip to content

Commit cf5f311

Browse files
[libc] Polish GPU benchmarking (#153900)
This patch provides cleanups and improvements for the GPU benchmarking infrastructure. The key changes are: - Fix benchmark convergence bug: Round up the scaled iteration count (ceil) to ensure it grows properly. The previous truncation logic causes the iteration count to get stuck. - Resolve remaining compiler warning. - Remove unused `BenchmarkLogger` files: This is dead code that added maintenance and cognitive overhead without providing functionality. - Improve build hygiene: Clean up headers and CMake dependencies to strictly follow the 'include what you use' (IWYU) principle.
1 parent 4198649 commit cf5f311

File tree

9 files changed

+22
-153
lines changed

9 files changed

+22
-153
lines changed

libc/benchmarks/gpu/BenchmarkLogger.cpp

Lines changed: 0 additions & 97 deletions
This file was deleted.

libc/benchmarks/gpu/BenchmarkLogger.h

Lines changed: 0 additions & 29 deletions
This file was deleted.

libc/benchmarks/gpu/CMakeLists.txt

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,31 +38,25 @@ add_unittest_framework_library(
3838
SRCS
3939
LibcGpuBenchmark.cpp
4040
LibcGpuBenchmarkMain.cpp
41-
BenchmarkLogger.cpp
4241
HDRS
4342
LibcGpuBenchmark.h
44-
BenchmarkLogger.h
4543
DEPENDS
44+
libc.benchmarks.gpu.timing.timing
4645
libc.hdr.stdint_proxy
47-
libc.src.__support.big_int
48-
libc.src.__support.c_string
4946
libc.src.__support.CPP.string
5047
libc.src.__support.CPP.string_view
5148
libc.src.__support.CPP.type_traits
52-
libc.src.__support.CPP.limits
5349
libc.src.__support.CPP.algorithm
5450
libc.src.__support.CPP.atomic
5551
libc.src.__support.CPP.array
56-
libc.src.__support.fixed_point.fx_rep
57-
libc.src.__support.macros.properties.types
58-
libc.src.__support.OSUtil.osutil
59-
libc.src.__support.uint128
6052
libc.src.__support.FPUtil.fp_bits
53+
libc.src.__support.FPUtil.nearest_integer_operations
6154
libc.src.__support.FPUtil.sqrt
6255
libc.src.__support.fixedvector
63-
libc.src.time.clock
64-
libc.benchmarks.gpu.timing.timing
56+
libc.src.__support.GPU.utils
57+
libc.src.__support.time.gpu.time_utils
6558
libc.src.stdio.printf
59+
libc.src.time.clock
6660
)
6761

6862
add_subdirectory(src)

libc/benchmarks/gpu/LibcGpuBenchmark.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,17 @@
22

33
#include "hdr/stdint_proxy.h"
44
#include "src/__support/CPP/algorithm.h"
5-
#include "src/__support/CPP/array.h"
65
#include "src/__support/CPP/atomic.h"
76
#include "src/__support/CPP/string.h"
87
#include "src/__support/FPUtil/FPBits.h"
8+
#include "src/__support/FPUtil/NearestIntegerOperations.h"
99
#include "src/__support/FPUtil/sqrt.h"
1010
#include "src/__support/GPU/utils.h"
1111
#include "src/__support/fixedvector.h"
1212
#include "src/__support/macros/config.h"
1313
#include "src/__support/time/gpu/time_utils.h"
1414
#include "src/stdio/printf.h"
15+
#include "src/time/clock.h"
1516

1617
namespace LIBC_NAMESPACE_DECL {
1718
namespace benchmarks {
@@ -134,11 +135,13 @@ void print_results(Benchmark *b) {
134135
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
135136

136137
LIBC_NAMESPACE::printf(
137-
"%-24s |%15.0f |%9.0f |%8llu |%8llu |%11llu |%9u |\n",
138+
"%-24s |%15.0f |%9.0f |%8llu |%8llu |%15llu |%9u |\n",
138139
b->get_test_name().data(), final_result.cycles,
139-
final_result.standard_deviation, (unsigned long long)final_result.min,
140-
(unsigned long long)final_result.max,
141-
(unsigned long long)final_result.total_iterations, (unsigned)num_threads);
140+
final_result.standard_deviation,
141+
static_cast<unsigned long long>(final_result.min),
142+
static_cast<unsigned long long>(final_result.max),
143+
static_cast<unsigned long long>(final_result.total_iterations),
144+
static_cast<unsigned>(num_threads));
142145
}
143146

144147
void print_header() {
@@ -147,7 +150,7 @@ void print_header() {
147150
benchmarks[0]->get_suite_name().data());
148151
LIBC_NAMESPACE::printf("%s", RESET);
149152
cpp::string titles = "Benchmark | Cycles (Mean) | Stddev | "
150-
" Min | Max | Iterations | Threads |\n";
153+
" Min | Max | Iterations | Threads |\n";
151154
LIBC_NAMESPACE::printf(titles.data());
152155

153156
cpp::string separator(titles.size(), '-');
@@ -226,7 +229,8 @@ BenchmarkResult benchmark(const BenchmarkOptions &options,
226229
change_ratio < options.epsilon)
227230
break;
228231

229-
iterations = static_cast<uint32_t>(iterations * options.scaling_factor);
232+
iterations = static_cast<uint32_t>(
233+
fputil::ceil(iterations * options.scaling_factor));
230234
}
231235

232236
const auto &estimator = rep.get_estimator();

libc/benchmarks/gpu/LibcGpuBenchmark.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
11
#ifndef LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
22
#define LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
33

4-
#include "benchmarks/gpu/BenchmarkLogger.h"
54
#include "benchmarks/gpu/timing/timing.h"
5+
66
#include "hdr/stdint_proxy.h"
77
#include "src/__support/CPP/algorithm.h"
88
#include "src/__support/CPP/array.h"
9-
#include "src/__support/CPP/limits.h"
109
#include "src/__support/CPP/string_view.h"
1110
#include "src/__support/CPP/type_traits.h"
1211
#include "src/__support/FPUtil/FPBits.h"
1312
#include "src/__support/FPUtil/sqrt.h"
1413
#include "src/__support/macros/config.h"
15-
#include "src/time/clock.h"
1614

1715
namespace LIBC_NAMESPACE_DECL {
1816

libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ add_header_library(
44
timing.h
55
DEPENDS
66
libc.hdr.stdint_proxy
7-
libc.src.__support.common
87
libc.src.__support.macros.config
98
libc.src.__support.macros.attributes
109
libc.src.__support.CPP.algorithm
1110
libc.src.__support.CPP.array
11+
libc.src.__support.CPP.atomic
1212
libc.src.__support.CPP.type_traits
13+
libc.src.__support.GPU.utils
1314
)

libc/benchmarks/gpu/timing/amdgpu/timing.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include "src/__support/CPP/atomic.h"
1616
#include "src/__support/CPP/type_traits.h"
1717
#include "src/__support/GPU/utils.h"
18-
#include "src/__support/common.h"
1918
#include "src/__support/macros/attributes.h"
2019
#include "src/__support/macros/config.h"
2120

libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ add_header_library(
44
timing.h
55
DEPENDS
66
libc.hdr.stdint_proxy
7-
libc.src.__support.common
87
libc.src.__support.macros.config
98
libc.src.__support.macros.attributes
109
libc.src.__support.CPP.algorithm
1110
libc.src.__support.CPP.array
11+
libc.src.__support.CPP.atomic
1212
libc.src.__support.CPP.type_traits
13+
libc.src.__support.GPU.utils
1314
)

libc/benchmarks/gpu/timing/nvptx/timing.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@
1313
#include "src/__support/CPP/algorithm.h"
1414
#include "src/__support/CPP/array.h"
1515
#include "src/__support/CPP/atomic.h"
16-
#include "src/__support/CPP/type_traits.h"
1716
#include "src/__support/GPU/utils.h"
18-
#include "src/__support/common.h"
1917
#include "src/__support/macros/attributes.h"
2018
#include "src/__support/macros/config.h"
2119

@@ -66,7 +64,7 @@ template <typename F, typename T>
6664
uint64_t stop = gpu::processor_clock();
6765
cpp::atomic_thread_fence(cpp::MemoryOrder::ACQ_REL);
6866
asm("" ::"r"(stop));
69-
volatile T output = result;
67+
volatile auto output = result;
7068

7169
// Return the time elapsed.
7270
return stop - start;

0 commit comments

Comments
 (0)