Skip to content

Commit db5f51f

Browse files
committed
Experimental unwinding - WIP
Ensure we use elf addresses instead of absolute addresses
1 parent da416ea commit db5f51f

File tree

5 files changed

+188
-7
lines changed

5 files changed

+188
-7
lines changed

include/async-profiler/codeCache.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#ifndef _CODECACHE_H
1818
#define _CODECACHE_H
1919

20-
// #include <jvmti.h>
20+
#include <stdint.h>
2121

2222
#define NO_MIN_ADDRESS ((const void *)-1)
2323
#define NO_MAX_ADDRESS ((const void *)0)
@@ -78,7 +78,7 @@ class CodeCache {
7878
short _lib_index;
7979
const void *_min_address;
8080
const void *_max_address;
81-
const char *_text_base;
81+
const void *_text_base;
8282

8383
void **_got_start;
8484
void **_got_end;
@@ -113,7 +113,7 @@ class CodeCache {
113113

114114
void setTextBase(const char *text_base) { _text_base = text_base; }
115115

116-
const char *getTextBase() { return _text_base; }
116+
const void *getTextBase() { return _text_base; }
117117

118118
void **gotStart() const { return _got_start; }
119119

@@ -136,7 +136,7 @@ class CodeCache {
136136
void makeGotPatchable();
137137

138138
void setDwarfTable(FrameDesc *table, int length);
139-
FrameDesc *findFrameDesc(const void *pc);
139+
FrameDesc *findFrameDesc(uint64_t elf_address);
140140
};
141141

142142
class CodeCacheArray {

src/async-profiler/codeCache.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,13 @@
2020
#include "codeCache.h"
2121
#include "dwarf.h"
2222
#include "os.h"
23+
2324
#include <stdint.h>
2425
#include <stdlib.h>
2526
#include <string.h>
2627
#include <sys/mman.h>
28+
#include <limits>
29+
#include <cassert>
2730

2831
char *NativeFunc::create(const char *name, short lib_index) {
2932
NativeFunc *f = (NativeFunc *)malloc(sizeof(NativeFunc) + 1 + strlen(name));
@@ -216,8 +219,9 @@ void CodeCache::setDwarfTable(FrameDesc *table, int length) {
216219
_dwarf_table_length = length;
217220
}
218221

219-
FrameDesc *CodeCache::findFrameDesc(const void *pc) {
220-
u32 target_loc = (const char *)pc - _text_base;
222+
FrameDesc *CodeCache::findFrameDesc(uintptr_t elf_address) {
223+
assert(elf_address < std::numeric_limits<u32>::max());
224+
const u32 target_loc = (const u32)elf_address;
221225
int low = 0;
222226
int high = _dwarf_table_length - 1;
223227

src/async-profiler/stackWalker.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer,
8787
CodeCacheArray *cache) {
8888
FrameDesc *f;
8989
CodeCache *cc = findLibraryByAddress(cache, sc.pc);
90-
if (cc == NULL || (f = cc->findFrameDesc(sc.pc)) == NULL) {
90+
if (cc == NULL || (f = cc->findFrameDesc(static_cast<const char*>(sc.pc) - static_cast<const char*>(cc->getTextBase()))) == NULL) {
9191
f = &FrameDesc::default_frame;
9292
}
9393
// const char *sym = cc?cc->binarySearch(sc.pc):"unknown";

src/async-profiler/symbols_linux.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,9 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array,
678678
printf("offset from get_elf_offset: %lx \n", elf_offset);
679679
printf("last readable: %lx \n", last_readable_base);
680680
}
681+
else {
682+
printf("Failed to read elf offsets \n");
683+
}
681684

682685
// Do not parse the same executable twice, e.g. on Alpine Linux
683686
if (parsed_inodes.insert(map.dev() | inode << 16).second) {

test/dwarf_unwind-ut.cc

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
#include <gtest/gtest.h>
2+
3+
#include "savecontext.hpp"
4+
#include "stackWalker.h"
5+
#include "unwind_state.hpp"
6+
7+
#include <array>
8+
9+
#include "async-profiler/codeCache.h"
10+
#include "async-profiler/stack_context.h"
11+
#include "async-profiler/symbols.h"
12+
13+
// Retrieves instruction pointer
14+
#define _THIS_IP_ \
15+
({ \
16+
__label__ __here; \
17+
__here: \
18+
(unsigned long)&&__here; \
19+
})
20+
21+
// #include "ddprof_defs.hpp"
22+
23+
// temp copy pasta
24+
#define PERF_SAMPLE_STACK_SIZE (4096UL * 8)
25+
26+
std::byte stack[PERF_SAMPLE_STACK_SIZE];
27+
28+
DDPROF_NOINLINE size_t funcA(std::array<uint64_t, ddprof::k_perf_register_count> &regs);
29+
DDPROF_NOINLINE size_t funcB(std::array<uint64_t, ddprof::k_perf_register_count> &regs);
30+
31+
size_t funcB(std::array<uint64_t, ddprof::k_perf_register_count> &regs) {
32+
printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_);
33+
std::span<const std::byte> bounds = ddprof::retrieve_stack_bounds();
34+
size_t size = ddprof::save_context(bounds, regs, stack);
35+
36+
return size;
37+
}
38+
39+
size_t funcA(std::array<uint64_t, ddprof::k_perf_register_count> &regs) {
40+
printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_);
41+
return funcB(regs);
42+
}
43+
44+
TEST(dwarf_unwind, simple) {
45+
CodeCacheArray cache_arary;
46+
// Load libraries
47+
Symbols::parsePidLibraries(getpid(), &cache_arary, false);
48+
std::array<uint64_t, ddprof::k_perf_register_count> regs;
49+
size_t size_stack = funcA(regs);
50+
EXPECT_TRUE(size_stack);
51+
52+
ap::StackContext sc = ap::from_regs(std::span(regs));
53+
ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack);
54+
55+
void *callchain[128];
56+
int n = stackWalk(&cache_arary, sc, buffer,
57+
const_cast<const void **>(callchain), 128, 0);
58+
const char *syms[128];
59+
for (int i = 0; i < n; ++i) {
60+
{ // retrieve symbol
61+
CodeCache *code_cache = findLibraryByAddress(
62+
&cache_arary, reinterpret_cast<void *>(callchain[i]));
63+
if (code_cache) {
64+
syms[i] = code_cache->binarySearch(callchain[i]);
65+
printf("IP = %p - %s\n", callchain[i], syms[i]);
66+
}
67+
}
68+
}
69+
70+
// Check that we found the expected functions during unwinding
71+
ASSERT_TRUE(std::string(syms[0]).find("save_context") != std::string::npos);
72+
ASSERT_TRUE(std::string(syms[1]).find("funcB") != std::string::npos);
73+
ASSERT_TRUE(std::string(syms[2]).find("funcA") != std::string::npos);
74+
}
75+
76+
#ifdef ALLOC_TRACKER
77+
#include "allocation_tracker.hpp"
78+
#include "perf_ringbuffer.hpp"
79+
#include "ringbuffer_holder.hpp"
80+
#include "ringbuffer_utils.hpp"
81+
#include <span>
82+
#include "defer.hpp"
83+
84+
namespace ddprof {
85+
static const uint64_t kSamplingRate = 1;
86+
87+
DDPROF_NOINLINE void func_save_sleep(size_t size);
88+
DDPROF_NOINLINE void func_intermediate_0(size_t size);
89+
DDPROF_NOINLINE void func_intermediate_1(size_t size);
90+
91+
DDPROF_NOINLINE void func_save_sleep(size_t size) {
92+
ddprof::TrackerThreadLocalState *tl_state = AllocationTracker::get_tl_state();
93+
assert(tl_state);
94+
int i = 0;
95+
while (++i < 100000) {
96+
97+
ddprof::AllocationTracker::track_allocation_s(0xdeadbeef, size, *tl_state);
98+
// prevent tail call optimization
99+
getpid();
100+
usleep(100);
101+
// printf("Save context nb -- %d \n", i);
102+
}
103+
}
104+
105+
void func_intermediate_0(size_t size) { func_intermediate_1(size); }
106+
107+
void func_intermediate_1(size_t size) { func_save_sleep(size); }
108+
109+
TEST(dwarf_unwind, remote) {
110+
const uint64_t rate = 1;
111+
const size_t buf_size_order = 5;
112+
ddprof::RingBufferHolder ring_buffer{buf_size_order,
113+
RingBufferType::kMPSCRingBuffer};
114+
AllocationTracker::allocation_tracking_init(
115+
kSamplingRate,
116+
AllocationTracker::kDeterministicSampling |
117+
AllocationTracker::kTrackDeallocations,
118+
k_default_perf_stack_sample_size, ring_buffer.get_buffer_info(), {});
119+
defer { AllocationTracker::allocation_tracking_free(); };
120+
121+
// Fork
122+
pid_t temp_pid = fork();
123+
if (!temp_pid) {
124+
func_intermediate_0(10);
125+
// char *const argList[] = {"sleep", "10", nullptr};
126+
// execvp("sleep", argList);
127+
return;
128+
}
129+
130+
// Load libraries from the fork - Cache array is relent to a single pid
131+
CodeCacheArray cache_arary;
132+
sleep(1);
133+
Symbols::parsePidLibraries(temp_pid, &cache_arary, false);
134+
// Establish a ring buffer ?
135+
136+
ddprof::MPSCRingBufferReader reader{&ring_buffer.get_ring_buffer()};
137+
ASSERT_GT(reader.available_size(), 0);
138+
139+
auto buf = reader.read_sample();
140+
ASSERT_FALSE(buf.empty());
141+
const perf_event_header *hdr =
142+
reinterpret_cast<const perf_event_header *>(buf.data());
143+
ASSERT_EQ(hdr->type, PERF_RECORD_SAMPLE);
144+
145+
// convert based on mask for this watcher (default in this case)
146+
perf_event_sample *sample = hdr2samp(hdr, ddprof::perf_event_default_sample_type());
147+
148+
std::span<const uint64_t, ddprof::k_perf_register_count> regs_span{sample->regs, ddprof::k_perf_register_count};
149+
ap::StackContext sc = ap::from_regs(regs_span);
150+
std::span<const std::byte> stack{
151+
reinterpret_cast<const std::byte *>(sample->data_stack), sample->size_stack};
152+
ap::StackBuffer buffer(stack, sc.sp, sc.sp + sample->size_stack);
153+
154+
void *callchain[ddprof::kMaxStackDepth];
155+
int n =
156+
stackWalk(&cache_arary, sc, buffer, const_cast<const void **>(callchain),
157+
ddprof::kMaxStackDepth, 0);
158+
159+
std::array<const char *, ddprof::kMaxStackDepth> syms;
160+
for (int i = 0; i < n; ++i) {
161+
{ // retrieve symbol
162+
CodeCache *code_cache = findLibraryByAddress(
163+
&cache_arary, reinterpret_cast<void *>(callchain[i]));
164+
if (code_cache) {
165+
syms[i] = code_cache->binarySearch(callchain[i]);
166+
printf("IP = %p - %s\n", callchain[i], syms[i]);
167+
}
168+
}
169+
// cleanup the producer fork
170+
kill(temp_pid, SIGTERM);
171+
}
172+
}
173+
}
174+
#endif

0 commit comments

Comments
 (0)