Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lldb/include/lldb/Host/common/NativeThreadProtocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class NativeThreadProtocol {
return llvm::make_error<UnimplementedError>();
}

bool HasValidStopReason();

protected:
NativeProcessProtocol &m_process;
lldb::tid_t m_tid;
Expand Down
10 changes: 10 additions & 0 deletions lldb/source/Host/common/NativeThreadProtocol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,13 @@ using namespace lldb_private;
NativeThreadProtocol::NativeThreadProtocol(NativeProcessProtocol &process,
lldb::tid_t tid)
: m_process(process), m_tid(tid) {}

bool NativeThreadProtocol::HasValidStopReason() {
ThreadStopInfo stop_info;
std::string stop_description;
if (!GetStopReason(stop_info, stop_description))
return false;

return stop_info.reason != lldb::eStopReasonInvalid &&
stop_info.reason != lldb::eStopReasonNone;
}
31 changes: 30 additions & 1 deletion lldb/test/API/gpu/amd/basic/TestBasicAmdGpuPlugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from lldbsuite.test.lldbtest import *
from amdgpu_testcase import *

SHADOW_THREAD_NAME = "AMD Native Shadow Thread"


class BasicAmdGpuTestCase(AmdGpuTestCaseBase):
def test_gpu_target_created_on_demand(self):
Expand All @@ -27,7 +29,9 @@ def test_gpu_target_created_on_demand(self):
# Make sure the GPU target was created and has the default thread.
self.assertEqual(self.dbg.GetNumTargets(), 2, "There are two targets")
gpu_thread = self.gpu_process.GetThreadAtIndex(0)
self.assertEqual(gpu_thread.GetName(), "AMD Native Shadow Thread", "GPU thread has the right name")
self.assertEqual(
gpu_thread.GetName(), SHADOW_THREAD_NAME, "GPU thread has the right name"
)

# The target should have the triple set correctly.
self.assertIn("amdgcn-amd-amdhsa", self.gpu_target.GetTriple())
Expand All @@ -45,6 +49,31 @@ def test_gpu_breakpoint_hit(self):
)
self.assertNotEqual(None, gpu_threads, "GPU should be stopped at breakpoint")

def test_num_threads(self):
"""Test that we get the expected number of threads."""
self.build()

# GPU breakpoint should get hit by at least one thread.
source = "hello_world.hip"
gpu_threads_at_bp = self.run_to_gpu_breakpoint(
source, "// GPU BREAKPOINT", "// CPU BREAKPOINT - BEFORE LAUNCH"
)
self.assertNotEqual(
None, gpu_threads_at_bp, "GPU should be stopped at breakpoint"
)

# We launch one thread for each character in the output string.
gpu_threads = self.gpu_process.threads
num_expected_threads = len("Hello, world!")
self.assertEqual(len(gpu_threads), num_expected_threads)

# The shadow thread should not be listed once we have real threads
for thread in gpu_threads:
self.assertNotEqual(SHADOW_THREAD_NAME, thread.GetName())

# All threads should be stopped at the breakpoint.
self.assertEqual(len(gpu_threads_at_bp), num_expected_threads)

def test_no_unexpected_stop(self):
"""Test that we do not unexpectedly hit a stop in the debugger when
No breakpoints are set."""
Expand Down
3 changes: 3 additions & 0 deletions lldb/test/API/gpu/amd/multi-wave/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
HIP_SOURCES := multi-wave.hip

include Makefile.rules
42 changes: 42 additions & 0 deletions lldb/test/API/gpu/amd/multi-wave/TestMultiWaveAmdGpuPlugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""
Basic tests for the AMDGPU plugin with a multi-wave kernel.
"""


import lldb
import lldbsuite.test.lldbutil as lldbutil
from lldbsuite.test.lldbtest import *
from amdgpu_testcase import *

SHADOW_THREAD_NAME = "AMD Native Shadow Thread"


class BasicAmdGpuTestCase(AmdGpuTestCaseBase):
def run_to_breakpoint(self):
# GPU breakpoint should get hit by at least one thread.
source = "multi-wave.hip"
gpu_threads_at_bp = self.run_to_gpu_breakpoint(
source, "// GPU BREAKPOINT", "// CPU BREAKPOINT - BEFORE LAUNCH"
)
self.assertNotEqual(
None, gpu_threads_at_bp, "GPU should be stopped at breakpoint"
)

return gpu_threads_at_bp


def test_num_threads(self):
"""Test that we get the expected number of threads."""
self.build()

gpu_threads_at_breakpoint = self.run_to_breakpoint()

# We launch 960 total threads (8 blocks * 120 threads per block).
gpu_threads = self.gpu_process.threads
self.assertEqual(len(gpu_threads), 960)

# But not all waves may reach the breakpoint at the same time.
# So here we check that we have at least one wave's worth of threads
# stopped at the breakpoint. With wave size of 64, this means we should
# have at least 56 threads (120 = 64 + 56) hitting the breakpoint.
self.assertGreaterEqual(len(gpu_threads_at_breakpoint), 56)
151 changes: 151 additions & 0 deletions lldb/test/API/gpu/amd/multi-wave/multi-wave.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#include <cstddef>
#include <hip/hip_runtime.h>

#include <iostream>
#include <limits>

/// \brief Checks if the provided error code is \p hipSuccess and if not,
/// prints an error message to the standard error output and terminates the
/// program with an error code.
constexpr int error_exit_code = -1;
#define HIP_CHECK(condition) \
{ \
const hipError_t error = condition; \
if (error != hipSuccess) { \
std::cerr << "An error encountered: \"" << hipGetErrorString(error) \
<< "\" at " << __FILE__ << ':' << __LINE__ << std::endl; \
std::exit(error_exit_code); \
} \
}

static constexpr unsigned divideCeil(unsigned Numerator, unsigned Denominator) {
assert(Denominator && "Division by zero");
uint64_t Bias = (Numerator != 0);
return (Numerator - Bias) / Denominator + Bias;
}

static std::string get_num_threads_in_wave(unsigned num_threads_in_block, unsigned wave_size ) {
std::vector<unsigned> num_threads_in_wave(divideCeil(num_threads_in_block, wave_size), wave_size);
if (num_threads_in_block % wave_size != 0) {
num_threads_in_wave.back() = num_threads_in_block % wave_size;
}

std::string result = "[";
for (size_t i = 0; i < num_threads_in_wave.size(); ++i) {
result += std::to_string(num_threads_in_wave[i]);
if (i != num_threads_in_wave.size() - 1) {
result += ", ";
}
}
result += "]";
return result;
}

__device__ unsigned block_counter = 0;
__device__ void wait_for_all_threads() {
// Wait until all threads in the block have reached this point.
__syncthreads();

// Wait until all blocks have reached this point.
const unsigned int num_blocks = gridDim.x * gridDim.y * gridDim.z;
const bool is_first_thread_in_block = (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0);
if (is_first_thread_in_block) {
atomicAdd(&block_counter, 1);
while (atomicAdd(&block_counter, 0) < num_blocks) {
}
}

// Wait until all threads in the block have reached this point.
// This is to ensure that all blocks have incremented the block counter
// before continuing.
__syncthreads();
}

__device__ unsigned int get_global_idx() {
// Calculate block ID in the grid
unsigned block_id =
blockIdx.x + blockIdx.y * gridDim.y + blockIdx.z * gridDim.x * gridDim.y;

// Calculate thread ID within the block
unsigned thread_id_in_block = threadIdx.x + threadIdx.y * blockDim.x +
threadIdx.z * blockDim.x * blockDim.y;

// Calculate global thread ID
unsigned threads_per_block = blockDim.x * blockDim.y * blockDim.z;
unsigned global_thread_id = block_id * threads_per_block + thread_id_in_block;

return global_thread_id;
}

// Simple kernel that assigns each thread a unique ID
// and stores it in the output buffer.
__global__ void multi_wave_kernel(unsigned *buf, bool sync_threads) {
if (sync_threads) {
wait_for_all_threads();
}
unsigned int thread_idx = get_global_idx();
buf[thread_idx] = thread_idx; // GPU BREAKPOINT
}

int main() {
const dim3 blocks(2, 2, 2); // 3D grid specifying number of blocks to launch
const dim3 threads(5, 4, 6); // 3D grid specifying number of threads to launch
const size_t num_blocks = blocks.x * blocks.y * blocks.z;
const size_t threads_per_block = threads.x * threads.y * threads.z;
const size_t num_threads = num_blocks * threads_per_block;
const size_t buffer_size = num_threads * sizeof(unsigned);

int device;
HIP_CHECK(hipGetDevice(&device));
hipDeviceProp_t deviceProp;
HIP_CHECK(hipGetDeviceProperties(&deviceProp, device));
unsigned wave_size = deviceProp.warpSize;
printf("===================TEST CONFIGURATION===================\n");
printf("Running on device: %s\n", deviceProp.name);
printf("Wave size: %d\n", wave_size);
printf("Total number of threads: %zu\n", num_threads);
printf("Total number of blocks: %zu\n", num_blocks);
printf("Total number of waves: %d\n", divideCeil(num_threads, wave_size));
printf("Threads per block: %zu\n", threads_per_block);
printf("Waves per block: %d\n",
divideCeil(threads_per_block, wave_size));
printf("Threads per wave: %s\n",
get_num_threads_in_wave(threads_per_block, wave_size).c_str());
printf("===================TEST CONFIGURATION===================\n");
printf("\n\n");

// Allocate host vectors
std::vector<unsigned> h_buf(num_threads,
std::numeric_limits<unsigned>::max());

// Allocate device memory for the output data
unsigned *d_buf;
HIP_CHECK(hipMalloc(&d_buf, buffer_size));

// Copy data from host to device
printf("Copying data to device...\n");
HIP_CHECK(hipMemcpy(d_buf, h_buf.data(), buffer_size, hipMemcpyHostToDevice));

// Launch the kernel.
printf("Launching multi-wave kernel with %zu threads...\n",
num_threads); // CPU BREAKPOINT - BEFORE LAUNCH
multi_wave_kernel<<<blocks, threads, 0, hipStreamDefault>>>(d_buf, true);

// Copy data from device to host
printf("Copying data to host...\n"); // CPU BREAKPOINT - AFTER LAUNCH
HIP_CHECK(hipMemcpy(h_buf.data(), d_buf, buffer_size, hipMemcpyDeviceToHost));

// Free device memory
HIP_CHECK(hipFree(d_buf)); // CPU BREAKPOINT - AFTER FINISH

// Print the output
printf("Validating output...\n");
for (size_t i = 0; i < num_threads; ++i) {
if (h_buf[i] != i) {
std::cerr << "Error: Expected " << i << " but got " << h_buf[i] << std::endl;
return error_exit_code;
}
}
printf("Output matches expected values!\n");
return 0;
}
Loading