diff --git a/include/common_symbol_errors.hpp b/include/common_symbol_errors.hpp index 0f5c44c78..cb5b3793d 100644 --- a/include/common_symbol_errors.hpp +++ b/include/common_symbol_errors.hpp @@ -14,10 +14,14 @@ using namespace std::string_view_literals; namespace ddprof { -inline constexpr std::array k_common_frame_names = { - "[truncated]"sv, "[unknown mapping]"sv, - "[unwind failure]"sv, "[incomplete]"sv, - "[lost]"sv, "[maximum pids]"sv}; +inline constexpr std::array k_common_frame_names = { + "[truncated]"sv, + "[unknown mapping]"sv, + "[unwind failure]"sv, + "[incomplete]"sv, + "[lost]"sv, + "[maximum pids]"sv, + "[unsampled mapping]"sv}; enum SymbolErrors : std::uint8_t { truncated_stack = 0, @@ -26,6 +30,7 @@ enum SymbolErrors : std::uint8_t { incomplete_stack, lost_event, max_pids, + unsampled_mapping, }; } // namespace ddprof diff --git a/include/live_allocation.hpp b/include/live_allocation.hpp index c8ba012df..631de1952 100644 --- a/include/live_allocation.hpp +++ b/include/live_allocation.hpp @@ -10,8 +10,10 @@ #include "unwind_output_hash.hpp" #include +#include #include #include +#include namespace ddprof { @@ -26,15 +28,39 @@ T &access_resize(std::vector &v, size_t index, class LiveAllocation { public: - // For allocations Value is the size - // This is the cumulative value and count for a given stack + struct SmapsEntry { + ProcessAddress_t start; + ProcessAddress_t end; + size_t rss_kb; + size_t accounted_size{}; + }; + struct ValueAndCount { int64_t _value = 0; int64_t _count = 0; }; + struct StackAndMapping { + const UnwindOutput *uw_output_ptr; // Pointer to an UnwindOutput in a set + ProcessAddress_t start_mmap; // Start of associated mapping + + bool operator==(const StackAndMapping &other) const { + return uw_output_ptr == other.uw_output_ptr && + start_mmap == other.start_mmap; + } + }; + + struct StackAndMappingHash { + std::size_t operator()(const StackAndMapping &s) const { + size_t seed = std::hash{}(s.uw_output_ptr); + hash_combine(seed, std::hash{}(s.start_mmap)); + return seed; + } + }; + using PprofStacks = - std::unordered_map; + std::unordered_map; + using MappingValuesMap = std::unordered_map; struct ValuePerAddress { int64_t _value = 0; @@ -42,32 +68,34 @@ class LiveAllocation { }; using AddressMap = std::unordered_map; + struct PidStacks { AddressMap _address_map; PprofStacks _unique_stacks; + std::set + unwind_output_set; // Set to store all unique UnwindOutput objects + std::vector entries; + MappingValuesMap + mapping_values; // New map to track memory usage per mapping }; using PidMap = std::unordered_map; using WatcherVector = std::vector; - // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes) - WatcherVector _watcher_vector; - // Allocation should be aggregated per stack trace - // instead of a stack, we would have a total size for this unique stack trace - // and a count. void register_allocation(const UnwindOutput &uo, uintptr_t addr, size_t size, int watcher_pos, pid_t pid) { PidMap &pid_map = access_resize(_watcher_vector, watcher_pos); PidStacks &pid_stacks = pid_map[pid]; - register_allocation(uo, addr, size, pid_stacks._unique_stacks, - pid_stacks._address_map); + if (pid_stacks.entries.empty()) { + pid_stacks.entries = parse_smaps(pid); + } + register_allocation_internal(uo, addr, size, pid_stacks); } void register_deallocation(uintptr_t addr, int watcher_pos, pid_t pid) { PidMap &pid_map = access_resize(_watcher_vector, watcher_pos); PidStacks &pid_stacks = pid_map[pid]; - if (!register_deallocation(addr, pid_stacks._unique_stacks, - pid_stacks._address_map)) { + if (!register_deallocation_internal(addr, pid_stacks)) { ++_stats._unmatched_deallocations; } } @@ -83,24 +111,30 @@ class LiveAllocation { } } - [[nodiscard]] [[nodiscard]] unsigned get_nb_unmatched_deallocations() const { + [[nodiscard]] unsigned get_nb_unmatched_deallocations() const { return _stats._unmatched_deallocations; } + static std::vector parse_smaps(pid_t pid); + + static int64_t upscale_with_mapping(const PprofStacks::value_type &stack, + PidStacks &pid_stacks); + void cycle() { _stats = {}; } + // no lint to avoid warning about member being public (should be refactored) + WatcherVector _watcher_vector; // NOLINT private: - // returns true if the deallocation was registered - static bool register_deallocation(uintptr_t address, PprofStacks &stacks, - AddressMap &address_map); - - // returns true if the allocation was registerd - static bool register_allocation(const UnwindOutput &uo, uintptr_t address, - int64_t value, PprofStacks &stacks, - AddressMap &address_map); + static bool register_deallocation_internal(uintptr_t address, + PidStacks &pid_stacks); + + static bool register_allocation_internal(const UnwindOutput &uo, + uintptr_t address, int64_t value, + PidStacks &pid_stacks); + struct { unsigned _unmatched_deallocations = {}; } _stats; }; -} // namespace ddprof \ No newline at end of file +} // namespace ddprof diff --git a/include/pprof/ddprof_pprof.hpp b/include/pprof/ddprof_pprof.hpp index e0dfdda0c..c0796856f 100644 --- a/include/pprof/ddprof_pprof.hpp +++ b/include/pprof/ddprof_pprof.hpp @@ -21,6 +21,8 @@ namespace ddprof { class Symbolizer; struct SymbolHdr; +using NumToStrCache = std::unordered_map; + struct DDProfPProf { /* single profile gathering several value types */ ddog_prof_Profile _profile{}; @@ -28,7 +30,7 @@ struct DDProfPProf { Tags _tags; bool use_process_adresses{true}; // avoid re-creating strings for all pid numbers - std::unordered_map _pid_str; + NumToStrCache _pid_str; }; struct DDProfValuePack { @@ -39,6 +41,11 @@ struct DDProfValuePack { DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx); +struct AggregationConfig { + EventAggregationModePos value_pos{kSumPos}; + bool show_samples{false}; + bool adjust_locations{true}; +}; /** * Aggregate to the existing profile the provided unwinding output. * @param uw_output @@ -49,9 +56,8 @@ DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx); DDRes pprof_aggregate(const UnwindOutput *uw_output, const SymbolHdr &symbol_hdr, const DDProfValuePack &pack, const PerfWatcher *watcher, - const FileInfoVector &file_infos, bool show_samples, - EventAggregationModePos value_pos, Symbolizer *symbolizer, - DDProfPProf *pprof); + const FileInfoVector &file_infos, AggregationConfig conf, + Symbolizer *symbolizer, DDProfPProf *pprof); DDRes pprof_reset(DDProfPProf *pprof); diff --git a/include/unwind_output.hpp b/include/unwind_output.hpp index 7316365eb..f82c15bcb 100644 --- a/include/unwind_output.hpp +++ b/include/unwind_output.hpp @@ -31,11 +31,13 @@ struct FunLoc { struct UnwindOutput { void clear() { locs.clear(); + labels.clear(); container_id = k_container_id_unknown; exe_name = {}; thread_name = {}; } std::vector locs; + std::vector> labels; std::string_view container_id; std::string_view exe_name; std::string_view thread_name; diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index 020af43b8..ff6c65128 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -5,6 +5,7 @@ #include "ddprof_worker.hpp" +#include "common_symbol_errors.hpp" #include "ddprof_context.hpp" #include "ddprof_perf_event.hpp" #include "ddprof_stats.hpp" @@ -59,6 +60,12 @@ DDRes report_lost_events(DDProfContext &ctx) { if (nb_lost > 0) { PerfWatcher *watcher = &ctx.watchers[watcher_idx]; + // todo: check for alternative ways to report lost events + // this only happens when we are in live mode only + if (watcher->pprof_indices[kSumPos].pprof_index == -1) { + LG_DBG("Watcher #%d can not report lost events", watcher_idx); + continue; + } UnwindState *us = ctx.worker_ctx.us; us->output.clear(); add_common_frame(us, SymbolErrors::lost_event); @@ -74,8 +81,8 @@ DDRes report_lost_events(DDProfContext &ctx) { nb_lost, value, watcher_idx); DDRES_CHECK_FWD(pprof_aggregate( &us->output, us->symbol_hdr, {value, nb_lost, 0}, watcher, - ctx.worker_ctx.us->dso_hdr.get_file_info_vector(), false, kSumPos, - ctx.worker_ctx.symbolizer, + ctx.worker_ctx.us->dso_hdr.get_file_info_vector(), + AggregationConfig{.value_pos = kSumPos}, ctx.worker_ctx.symbolizer, ctx.worker_ctx.pprof[ctx.worker_ctx.i_current_pprof])); ctx.worker_ctx.lost_events_per_watcher[watcher_idx] = 0; } @@ -243,60 +250,116 @@ void ddprof_reset_worker_stats() { DDRes aggregate_livealloc_stack( const LiveAllocation::PprofStacks::value_type &alloc_info, - DDProfContext &ctx, const PerfWatcher *watcher, DDProfPProf *pprof, - const SymbolHdr &symbol_hdr) { + int64_t upscalled_value, DDProfContext &ctx, const PerfWatcher *watcher, + DDProfPProf *pprof, const SymbolHdr &symbol_hdr) { + + if (upscalled_value) { + LG_DBG("Upscaling from %ld to %ld", alloc_info.second._value, + upscalled_value); + } + // default to the sampled value const DDProfValuePack pack{ - alloc_info.second._value, + upscalled_value ? upscalled_value : alloc_info.second._value, static_cast(std::max(0, alloc_info.second._count)), 0}; + AggregationConfig conf{.value_pos = kLiveSumPos, + .show_samples = ctx.params.show_samples}; + DDRES_CHECK_FWD( + pprof_aggregate(alloc_info.first.uw_output_ptr, symbol_hdr, pack, watcher, + ctx.worker_ctx.us->dso_hdr.get_file_info_vector(), conf, + ctx.worker_ctx.symbolizer, pprof)); + return {}; +} + +DDRes aggregate_live_allocations_common(DDProfContext &ctx, + unsigned watcher_pos, pid_t pid, + LiveAllocation::PidStacks &pid_stacks) { + UnwindState *us = ctx.worker_ctx.us; + DDProfPProf *pprof = ctx.worker_ctx.pprof[ctx.worker_ctx.i_current_pprof]; + const SymbolHdr &symbol_hdr = us->symbol_hdr; + const PerfWatcher *watcher = &ctx.watchers[watcher_pos]; + + // Parse smaps entries for the current pid + auto new_entries = LiveAllocation::parse_smaps(pid); + if (!new_entries.empty()) { // Normal operation + pid_stacks.entries = new_entries; + } else { // Handle case when we get no entries (e.g., during shutdown) + for (auto &el : pid_stacks.entries) { + el.accounted_size = 0; + } + } + + // Step 1: Process existing samples + for (const auto &alloc_info : pid_stacks._unique_stacks) { + const int64_t upscaled_value = + ddprof::LiveAllocation::upscale_with_mapping(alloc_info, pid_stacks); + DDRES_CHECK_FWD(aggregate_livealloc_stack(alloc_info, upscaled_value, ctx, + watcher, pprof, symbol_hdr)); + } + + // Step 2: Add fake unwind frames for mappings without samples + // Create a fake unwind output to account for unsampled mappings + us->output.clear(); + us->output.pid = pid; + us->output.tid = 0; + add_common_frame(us, SymbolErrors::unsampled_mapping); + add_virtual_base_frame(us); + UnwindOutput &uo = us->output; + uo.labels.emplace_back("mapping", "other"); + for (const auto &el : pid_stacks.entries) { + if (el.accounted_size == 0) { + // Use the RSS value for this mapping as the "unsampled" value + const int64_t unsampled_value = el.rss_kb * 1000; // RSS in bytes + // Add the frame using the fake UnwindOutput + const DDProfValuePack pack{unsampled_value, 1, 0}; + AggregationConfig conf{.value_pos = kLiveSumPos, + .show_samples = false, + .adjust_locations = false}; + DDRES_CHECK_FWD( + pprof_aggregate(&uo, us->symbol_hdr, pack, watcher, + ctx.worker_ctx.us->dso_hdr.get_file_info_vector(), + conf, ctx.worker_ctx.symbolizer, pprof)); + } + } + + LG_NTC( + "<%u> Number of Live allocations for PID %d = %lu, Unique stacks = %lu", + watcher_pos, pid, pid_stacks._address_map.size(), + pid_stacks._unique_stacks.size()); - DDRES_CHECK_FWD(pprof_aggregate( - &alloc_info.first, symbol_hdr, pack, watcher, - ctx.worker_ctx.us->dso_hdr.get_file_info_vector(), - ctx.params.show_samples, kLiveSumPos, ctx.worker_ctx.symbolizer, pprof)); return {}; } +// Unified function for aggregating live allocations for a specific pid DDRes aggregate_live_allocations_for_pid(DDProfContext &ctx, pid_t pid) { - struct UnwindState *us = ctx.worker_ctx.us; - int const i_export = ctx.worker_ctx.i_current_pprof; - DDProfPProf *pprof = ctx.worker_ctx.pprof[i_export]; - const SymbolHdr &symbol_hdr = us->symbol_hdr; LiveAllocation &live_allocations = ctx.worker_ctx.live_allocation; + for (unsigned watcher_pos = 0; watcher_pos < live_allocations._watcher_vector.size(); ++watcher_pos) { auto &pid_map = live_allocations._watcher_vector[watcher_pos]; - const PerfWatcher *watcher = &ctx.watchers[watcher_pos]; - auto &pid_stacks = pid_map[pid]; - for (const auto &alloc_info : pid_stacks._unique_stacks) { - DDRES_CHECK_FWD(aggregate_livealloc_stack(alloc_info, ctx, watcher, pprof, - symbol_hdr)); - } + auto &pid_stacks = pid_map[pid]; // Get PidStacks for the specific pid + // Call the common function to handle the aggregation + DDRES_CHECK_FWD( + aggregate_live_allocations_common(ctx, watcher_pos, pid, pid_stacks)); } + return {}; } +// Unified function for aggregating live allocations across all PIDs DDRes aggregate_live_allocations(DDProfContext &ctx) { - // this would be more efficient if we could reuse the same stacks in - // libdatadog - UnwindState *us = ctx.worker_ctx.us; - int const i_export = ctx.worker_ctx.i_current_pprof; - DDProfPProf *pprof = ctx.worker_ctx.pprof[i_export]; - const SymbolHdr &symbol_hdr = us->symbol_hdr; - const LiveAllocation &live_allocations = ctx.worker_ctx.live_allocation; + LiveAllocation &live_allocations = ctx.worker_ctx.live_allocation; + for (unsigned watcher_pos = 0; watcher_pos < live_allocations._watcher_vector.size(); ++watcher_pos) { - const auto &pid_map = live_allocations._watcher_vector[watcher_pos]; - const PerfWatcher *watcher = &ctx.watchers[watcher_pos]; - for (const auto &pid_vt : pid_map) { - for (const auto &alloc_info : pid_vt.second._unique_stacks) { - DDRES_CHECK_FWD(aggregate_livealloc_stack(alloc_info, ctx, watcher, - pprof, symbol_hdr)); - } - LG_NTC("<%u> Number of Live allocations for PID%d=%lu, Unique stacks=%lu", - watcher_pos, pid_vt.first, pid_vt.second._address_map.size(), - pid_vt.second._unique_stacks.size()); + auto &pid_map = live_allocations._watcher_vector[watcher_pos]; + for (auto &pid_vt : pid_map) { + auto &pid_stacks = pid_vt.second; + // Call the common function to handle the aggregation for each PID + DDRES_CHECK_FWD(aggregate_live_allocations_common( + ctx, watcher_pos, pid_vt.first, pid_stacks)); } } + return {}; } @@ -427,6 +490,9 @@ DDRes ddprof_pr_sample(DDProfContext &ctx, perf_event_sample *sample, // Aggregate if unwinding went well (todo : fatal error propagation) if (!IsDDResFatal(res)) { struct UnwindState *us = ctx.worker_ctx.us; + if (sample->addr) { // todo: is this the correct trigger ? + us->output.labels.emplace_back("mapping", "heap"); + } if (Any(EventAggregationMode::kLiveSum & watcher->aggregation_mode) && sample->addr) { // null address means we should not account it @@ -450,11 +516,11 @@ DDRes ddprof_pr_sample(DDProfContext &ctx, perf_event_sample *sample, } const DDProfValuePack pack{static_cast(sample_val), 1, timestamp}; - - DDRES_CHECK_FWD(pprof_aggregate( - &us->output, us->symbol_hdr, pack, watcher, - us->dso_hdr.get_file_info_vector(), ctx.params.show_samples, kSumPos, - ctx.worker_ctx.symbolizer, pprof)); + AggregationConfig conf{.show_samples = ctx.params.show_samples}; + DDRES_CHECK_FWD(pprof_aggregate(&us->output, us->symbol_hdr, pack, + watcher, + us->dso_hdr.get_file_info_vector(), conf, + ctx.worker_ctx.symbolizer, pprof)); } } // We need to free the PID only after any aggregation operations diff --git a/src/live_allocation.cc b/src/live_allocation.cc index 7a27a7f53..d95a0d408 100644 --- a/src/live_allocation.cc +++ b/src/live_allocation.cc @@ -7,11 +7,19 @@ #include "logger.hpp" +#include +#include +#include + namespace ddprof { +using SmapsEntry = LiveAllocation::SmapsEntry; + +bool LiveAllocation::register_deallocation_internal(uintptr_t address, + PidStacks &pid_stacks) { + auto &stacks = pid_stacks._unique_stacks; + auto &address_map = pid_stacks._address_map; + auto &mapping_values = pid_stacks.mapping_values; -bool LiveAllocation::register_deallocation(uintptr_t address, - PprofStacks &stacks, - AddressMap &address_map) { // Find the ValuePerAddress object corresponding to the address auto map_iter = address_map.find(address); if (map_iter == address_map.end()) { @@ -21,16 +29,25 @@ bool LiveAllocation::register_deallocation(uintptr_t address, LG_DBG("Unmatched de-allocation at %lx", address); return false; } + ValuePerAddress const &v = map_iter->second; // Decrement count and value of the corresponding PprofStacks::value_type // object if (v._unique_stack) { + // Adjust the mapping values map for the allocation being deallocated + const ProcessAddress_t mapping_start = v._unique_stack->first.start_mmap; + mapping_values[mapping_start]._value -= v._value; + if (mapping_values[mapping_start]._count > 0) { + --mapping_values[mapping_start]._count; + } + v._unique_stack->second._value -= v._value; - if (v._unique_stack->second._count) { + if (v._unique_stack->second._count > 0) { --(v._unique_stack->second._count); } - if (!v._unique_stack->second._count) { + + if (v._unique_stack->second._count == 0) { // If count reaches 0, remove the UnwindOutput from stacks stacks.erase(v._unique_stack->first); } @@ -41,48 +58,216 @@ bool LiveAllocation::register_deallocation(uintptr_t address, return true; } -bool LiveAllocation::register_allocation(const UnwindOutput &uo, - uintptr_t address, int64_t value, - PprofStacks &stacks, - AddressMap &address_map) { +bool LiveAllocation::register_allocation_internal(const UnwindOutput &uo, + uintptr_t address, + int64_t value, + PidStacks &pid_stacks) { + if (uo.locs.empty()) { - // avoid sending empty stacks + // Avoid sending empty stacks LG_DBG("(LIVE_ALLOC) Avoid registering empty stack"); return false; } - // Find or create the PprofStacks::value_type object corresponding to the + + // Find or insert the UnwindOutput in the set + auto [uo_iter, inserted] = pid_stacks.unwind_output_set.insert(uo); + const UnwindOutput *uo_ptr = &(*uo_iter); + + // Find the corresponding smaps entry for the given address + auto entry = std::lower_bound(pid_stacks.entries.begin(), + pid_stacks.entries.end(), address, + [](const LiveAllocation::SmapsEntry &l, + uintptr_t addr) { return l.start < addr; }); + // If lower_bound points to the end or the start address is greater than the + // address, adjust the entry. + if (entry != pid_stacks.entries.begin() && + (entry == pid_stacks.entries.end() || address < entry->start)) { + --entry; // Move to the previous entry if it's not the beginning + } + ProcessAddress_t start_addr = 0; + if (entry == pid_stacks.entries.end() || address < entry->start) { + // Address not within any known mapping + LG_DBG("(LIVE_ALLOC) Address not within any known mapping: %lx", address); + } else { + start_addr = entry->start; + } + + // Create or find the PprofStacks::value_type object corresponding to the // UnwindOutput - auto iter = stacks.find(uo); + auto &stacks = pid_stacks._unique_stacks; + const StackAndMapping stack_key{uo_ptr, start_addr}; + auto iter = stacks.find(stack_key); if (iter == stacks.end()) { - iter = stacks.emplace(uo, ValueAndCount{}).first; + iter = stacks.emplace(stack_key, ValueAndCount{}).first; } + PprofStacks::value_type &unique_stack = *iter; - // Add the value to the address map + // Update the address map + auto &address_map = pid_stacks._address_map; ValuePerAddress &v = address_map[address]; - if (v._value) { - // unexpected, we already have an allocation here - // This means we missed a previous free - LG_DBG("Existing allocation: %lx (cleaning up)", address); + + if (unlikely(v._value)) { // we should not have double counts + // Existing allocation: handle cleanup + LG_DBG("(LIVE_ALLOC) Existing allocation found: %lx (cleaning up)", + address); if (v._unique_stack) { - // we should decrement count / value + // Adjust the mapping values map for the previous allocation + pid_stacks.mapping_values[v._unique_stack->first.start_mmap]._value -= + v._value; + if (pid_stacks.mapping_values[v._unique_stack->first.start_mmap]._count > + 0) { + --pid_stacks.mapping_values[v._unique_stack->first.start_mmap]._count; + } + v._unique_stack->second._value -= v._value; - if (v._unique_stack->second._count) { + if (v._unique_stack->second._count > 0) { --(v._unique_stack->second._count); } - // Should we erase the element here ? - // only if we are sure it is not the same as the one we are inserting. - if (v._unique_stack != &unique_stack && !v._unique_stack->second._count) { + + // Remove the old stack if its count is zero and not the same as the + // current unique stack + if (v._unique_stack != &unique_stack && + v._unique_stack->second._count == 0) { stacks.erase(v._unique_stack->first); } } } + // Set the new allocation value and stack association v._value = value; v._unique_stack = &unique_stack; v._unique_stack->second._value += value; ++(v._unique_stack->second._count); + + // Update the mapping values map + pid_stacks.mapping_values[unique_stack.first.start_mmap]._value += value; + ++pid_stacks.mapping_values[unique_stack.first.start_mmap]._count; + return true; } +std::vector LiveAllocation::parse_smaps(pid_t pid) { + const std::string smaps_file = absl::StrFormat("%s/%d/smaps", "/proc", pid); + + std::vector entries; + FILE *file = fopen(smaps_file.c_str(), "r"); + + if (!file) { + LG_WRN("Unable to access smaps file for %d", pid); + return entries; + } + char buffer[256]; + SmapsEntry current_entry; + + while (fgets(buffer, sizeof(buffer), file)) { + const std::string_view line(buffer); + + if (line.find("Rss:") == 0) { + // Extract RSS value (take characters after "Rss: ") + size_t rss = 0; + std::string_view rss_str = line.substr(4, line.find("kB") - 4); + rss_str.remove_prefix(std::min(rss_str.find_first_not_of(' '), + rss_str.size())); // trim leading spaces + auto res = + std::from_chars(rss_str.data(), rss_str.data() + rss_str.size(), rss); + if (res.ec != std::errc()) { + LG_DBG("Failed to convert RSS value in smaps file for %d", pid); + continue; + } + current_entry.rss_kb = rss; + // push back as we are not parsing other values + entries.push_back(current_entry); + current_entry = SmapsEntry(); // Reset for next entry + } else if (line.find('-') != std::string_view::npos) { + // Extract address + const std::string_view address = line.substr(0, line.find(' ')); + const size_t dash_position = address.find('-'); + unsigned long long start; + unsigned long long end; + // Convert start address + auto result = std::from_chars(address.data(), + address.data() + dash_position, start, 16); + if (result.ec != std::errc()) { + LG_DBG("Failed to convert start address in smaps file for %d", pid); + continue; + } + // Convert end address + result = std::from_chars(address.data() + dash_position + 1, + address.data() + address.size(), end, 16); + if (result.ec != std::errc()) { + LG_DBG("Failed to convert end address in smaps file for %d", pid); + // todo skip next rss + continue; + } + current_entry.end = end; + current_entry.start = start; + } + } + + fclose(file); + // should be a no-op + std::sort(entries.begin(), entries.end(), + [](const SmapsEntry &a, const SmapsEntry &b) { + return a.start < b.start; + }); + + return entries; +} + +int64_t +LiveAllocation::upscale_with_mapping(const PprofStacks::value_type &stack, + PidStacks &pid_stacks) { + const ValueAndCount &accounted_value = + pid_stacks.mapping_values[stack.first.start_mmap]; + // Find the corresponding smaps entry for the given address + auto entry = + std::lower_bound(pid_stacks.entries.begin(), pid_stacks.entries.end(), + stack.first.start_mmap, + [](const LiveAllocation::SmapsEntry &l, uintptr_t addr) { + return l.start < addr; + }); + + // We should already be matching an existing entry + // ie. entry->start == stack.first.start_mmap + if (entry != pid_stacks.entries.begin() && + (entry == pid_stacks.entries.end() || + stack.first.start_mmap < entry->start)) { + --entry; // Move to the previous entry if it's not the beginning + } + + // Check if a valid entry was found + if (entry == pid_stacks.entries.end() || + (stack.first.start_mmap != entry->start)) { + // todo: think about these cases + LG_DBG("Unable to upscale address for mapping at %lx", + stack.first.start_mmap); + return 0; + } + if (accounted_value._value == 0) { + LG_DBG("No accounted memory for mapping at %lx", stack.first.start_mmap); + return 0; + } + // save how much mem we have for this one + entry->accounted_size += stack.second._value; + // + // Mapping is 10 megs of RSS + // foo -> 10 samples / 100 kb; 10% of the memory in this mapping + // bar -> 90 samples / 900 kb; 90% of the memory in this mapping + // + // foo is upscaled to 1 Meg + // bar is upscaled to 9 megs + // + // Cases of interest: + // RSS is 0 -> is that OK not to show the allocations ? + // RSS does not shrink even if we no longer have allocations + // -> this is where malloc stats would help us + // + // What if we have a different profile type to show this? + // + constexpr int KILOBYTES_TO_BYTES = 1000; + return stack.second._value * entry->rss_kb * KILOBYTES_TO_BYTES / + accounted_value._value; +} + } // namespace ddprof diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index 9114df611..aed30a965 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -30,7 +30,7 @@ using namespace std::string_view_literals; namespace ddprof { namespace { -constexpr size_t k_max_pprof_labels{8}; +constexpr size_t k_max_pprof_labels{10}; constexpr int k_max_value_types = DDPROF_PWT_LENGTH * static_cast(kNbEventAggregationModes); @@ -40,13 +40,12 @@ struct ActiveIdsResult { PerfWatcher *default_watcher = nullptr; }; -std::string_view pid_str(pid_t pid, - std::unordered_map &pid_strs) { - auto it = pid_strs.find(pid); +std::string_view pid_str(ProcessAddress_t num, NumToStrCache &pid_strs) { + auto it = pid_strs.find(num); if (it != pid_strs.end()) { return it->second; } - const auto pair = pid_strs.emplace(pid, std::to_string(pid)); + const auto pair = pid_strs.emplace(num, std::to_string(num)); return pair.first->second; } @@ -235,7 +234,7 @@ ProfValueTypes compute_pprof_values(const ActiveIdsResult &active_ids) { } size_t prepare_labels(const UnwindOutput &uw_output, const PerfWatcher &watcher, - std::unordered_map &pid_strs, + NumToStrCache &pid_strs, std::span labels) { constexpr std::string_view k_container_id_label = "container_id"sv; constexpr std::string_view k_process_id_label = "process_id"sv; @@ -246,6 +245,7 @@ size_t prepare_labels(const UnwindOutput &uw_output, const PerfWatcher &watcher, constexpr std::string_view k_thread_name_label = "thread_name"sv; constexpr std::string_view k_tracepoint_label = "tracepoint_type"sv; size_t labels_num = 0; + if (!uw_output.container_id.empty()) { labels[labels_num].key = to_CharSlice(k_container_id_label); labels[labels_num].str = to_CharSlice(uw_output.container_id); @@ -260,7 +260,7 @@ size_t prepare_labels(const UnwindOutput &uw_output, const PerfWatcher &watcher, labels[labels_num].str = to_CharSlice(pid_str(uw_output.pid, pid_strs)); ++labels_num; } - if (!watcher.suppress_tid) { + if (!watcher.suppress_tid && uw_output.tid != 0) { labels[labels_num].key = to_CharSlice(k_thread_id_label); labels[labels_num].str = to_CharSlice(pid_str(uw_output.tid, pid_strs)); ++labels_num; @@ -286,6 +286,15 @@ size_t prepare_labels(const UnwindOutput &uw_output, const PerfWatcher &watcher, labels[labels_num].str = to_CharSlice(uw_output.thread_name); ++labels_num; } + for (const auto &el : uw_output.labels) { + if (labels_num >= labels.size()) { + LG_WRN("Labels buffer exceeded at %s (%d)", __FUNCTION__, __LINE__); + break; + } + labels[labels_num].key = to_CharSlice(el.first); + labels[labels_num].str = to_CharSlice(el.second); + ++labels_num; + } DDPROF_DCHECK_FATAL(labels_num <= labels.size(), "pprof_aggregate - label buffer exceeded"); return labels_num; @@ -488,6 +497,8 @@ DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx) { // Allow the data to be split by container-id pprof->_tags.emplace_back(std::string("ddprof.custom_ctx"), std::string("container_id")); + pprof->_tags.emplace_back(std::string("ddprof.custom_ctx"), + std::string("mapping")); } if (ctx.params.remote_symbolization) { @@ -512,11 +523,10 @@ DDRes pprof_free_profile(DDProfPProf *pprof) { DDRes pprof_aggregate(const UnwindOutput *uw_output, const SymbolHdr &symbol_hdr, const DDProfValuePack &pack, const PerfWatcher *watcher, - const FileInfoVector &file_infos, bool show_samples, - EventAggregationModePos value_pos, Symbolizer *symbolizer, - DDProfPProf *pprof) { + const FileInfoVector &file_infos, AggregationConfig conf, + Symbolizer *symbolizer, DDProfPProf *pprof) { - const PProfIndices &pprof_indices = watcher->pprof_indices[value_pos]; + const PProfIndices &pprof_indices = watcher->pprof_indices[conf.value_pos]; ddog_prof_Profile *profile = &pprof->_profile; int64_t values[k_max_value_types] = {}; assert(pprof_indices.pprof_index != -1); @@ -528,7 +538,9 @@ DDRes pprof_aggregate(const UnwindOutput *uw_output, std::array locations_buff; std::span locs{uw_output->locs}; - locs = adjust_locations(watcher, locs); + if (conf.adjust_locations) { + locs = adjust_locations(watcher, locs); + } // Blaze results should remain alive until we aggregate the pprof data Symbolizer::BlazeResultsWrapper session_results; @@ -549,10 +561,10 @@ DDRes pprof_aggregate(const UnwindOutput *uw_output, .labels = {.ptr = labels.data(), .len = labels_num}, }; - if (show_samples) { + if (conf.show_samples) { ddprof_print_sample(std::span{locations_buff.data(), write_index}, - pack.value, uw_output->pid, uw_output->tid, value_pos, - *watcher); + pack.value, uw_output->pid, uw_output->tid, + conf.value_pos, *watcher); } auto res = ddog_prof_Profile_add(profile, sample, pack.timestamp); if (res.tag != DDOG_PROF_PROFILE_RESULT_OK) { @@ -575,4 +587,5 @@ DDRes pprof_reset(DDProfPProf *pprof) { pprof->_pid_str.clear(); return {}; } + } // namespace ddprof diff --git a/test/ddprof_exporter-ut.cc b/test/ddprof_exporter-ut.cc index 648b3217c..d233892c3 100644 --- a/test/ddprof_exporter-ut.cc +++ b/test/ddprof_exporter-ut.cc @@ -163,7 +163,7 @@ TEST(DDProfExporter, simple) { res = pprof_create_profile(&pprofs, ctx); EXPECT_TRUE(IsDDResOK(res)); res = pprof_aggregate(&mock_output, symbol_hdr, {1000, 1, 0}, - &ctx.watchers[0], file_infos, false, kSumPos, + &ctx.watchers[0], file_infos, AggregationConfig{}, ctx.worker_ctx.symbolizer, &pprofs); EXPECT_TRUE(IsDDResOK(res)); } diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index 517c21ec2..4d4ec9845 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -72,8 +72,9 @@ TEST(DDProfPProf, aggregate) { DDRes res = pprof_create_profile(&pprof, ctx); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_index != -1); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_count_index != -1); + res = pprof_aggregate(&mock_output, symbol_hdr, {1000, 1, 0}, - &ctx.watchers[0], file_infos, false, kSumPos, + &ctx.watchers[0], file_infos, AggregationConfig{}, ctx.worker_ctx.symbolizer, &pprof); EXPECT_TRUE(IsDDResOK(res)); @@ -114,9 +115,10 @@ TEST(DDProfPProf, just_live) { EXPECT_TRUE(ctx.watchers[1].pprof_indices[kLiveSumPos].pprof_count_index != -1); FileInfoVector file_infos; - res = pprof_aggregate(&mock_output, symbol_hdr, {1000, 1, 0}, - &ctx.watchers[1], file_infos, false, kLiveSumPos, - ctx.worker_ctx.symbolizer, &pprof); + res = + pprof_aggregate(&mock_output, symbol_hdr, {1000, 1, 0}, &ctx.watchers[1], + file_infos, AggregationConfig{.value_pos = kLiveSumPos}, + ctx.worker_ctx.symbolizer, &pprof); EXPECT_TRUE(IsDDResOK(res)); test_pprof(&pprof); res = pprof_free_profile(&pprof); diff --git a/test/live_allocation-ut.cc b/test/live_allocation-ut.cc index c21e94a12..1f6a6d5b8 100644 --- a/test/live_allocation-ut.cc +++ b/test/live_allocation-ut.cc @@ -39,8 +39,9 @@ TEST(LiveAllocationTest, simple) { EXPECT_EQ(pid_stacks._address_map.size(), nb_registered_allocs); // though the stack is the same ASSERT_EQ(pid_stacks._unique_stacks.size(), 1); - const auto &el = pid_stacks._unique_stacks[uo]; - EXPECT_EQ(el._value, 100); + for (const auto &el : pid_stacks._unique_stacks) { + EXPECT_EQ(el.second._value, 100); + } { // allocate 10 uintptr_t addr = 0x10; @@ -110,9 +111,10 @@ TEST(LiveAllocationTest, overlap_registrations) { EXPECT_EQ(pid_stacks._unique_stacks.size(), 1); // Check that the value and count have the latest value - auto &el = pid_stacks._unique_stacks[uo]; - EXPECT_EQ(el._value, value * 2); - EXPECT_EQ(el._count, 1); + for (const auto &el : pid_stacks._unique_stacks) { + EXPECT_EQ(el.second._value, value * 2); + EXPECT_EQ(el.second._count, 1); + } // Deallocate the first allocation live_alloc.register_deallocation(addr, watcher_pos, pid); @@ -135,4 +137,13 @@ TEST(LiveAllocationTest, stats) { EXPECT_EQ(live_alloc.get_nb_unmatched_deallocations(), 0); } +TEST(LiveAllocationTest, smaps) { + LogHandle handle; + pid_t pid = getpid(); + auto entries = LiveAllocation::parse_smaps(pid); + for (auto entry : entries) { + LG_DBG("address: %ld, rss_kb: %zu", entry.start, entry.rss_kb); + } +} + } // namespace ddprof