Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
354 changes: 126 additions & 228 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "core/providers/qnn/builder/op_builder_factory.h"
#include "core/providers/qnn/builder/qnn_context_mem_handle_manager.h"
#include "core/providers/qnn/builder/qnn_def.h"
#include "core/providers/qnn/builder/qnn_profile_serializer.h"
#include "core/providers/qnn/builder/qnn_node_group/qnn_node_group.h"

namespace onnxruntime {
Expand Down Expand Up @@ -61,6 +62,13 @@ class QnnSerializerConfig {
*/
void SetGraphName(std::string graph_name);

/**
* Gets the name of the graph being serialized.
*
* \return graph_name The name of the graph being serialized.
*/
const std::string& GetGraphName() const;

/**
* Get any QNN Graph configs required to configure this serializer and perform any
* preparation, such as creating output directories.
Expand All @@ -83,7 +91,6 @@ class QnnSerializerConfig {

protected:
QnnSerializerConfig(std::string backend_path);
const std::string& GetGraphName() const;

private:
std::string backend_path_;
Expand Down Expand Up @@ -183,12 +190,13 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
// NOTE: This function locks the internal `logger_recursive_mutex_`.
Status ResetQnnLogLevel(std::optional<logging::Severity> ort_log_level = std::nullopt);

Status ExtractBackendProfilingInfo();
Status ExtractProfilingSubEvents(QnnProfile_EventId_t profile_event_id, std::ofstream& outfile,
bool backendSupportsExtendedEventData, bool tracelogging_provider_ep_enabled);
Status ExtractBackendProfilingInfo(qnn::profile::ProfilingInfo& profiling_info);

Status ExtractProfilingSubEvents(QnnProfile_EventId_t profile_event_id, profile::Serializer& profile_writer,
bool backendSupportsExtendedEventData);

Status ExtractProfilingEvent(QnnProfile_EventId_t profile_event_id, const std::string& eventLevel,
std::ofstream& outfile, bool backendSupportsExtendedEventData,
bool tracelogging_provider_ep_enabled);
profile::Serializer& profile_writer, bool backendSupportsExtendedEventData);

Status SetProfilingLevelETW(ProfilingLevel profiling_level_etw_param);

Expand Down Expand Up @@ -225,6 +233,8 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
// Resets the context priority to the session default as defined by context_priority_
Status ResetContextPriority();

bool ProfilingEnabled() { return profiling_enabled_; }

private:
Status LoadBackend();

Expand Down Expand Up @@ -307,26 +317,14 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
}

Status ExtractProfilingEventBasic(QnnProfile_EventId_t profile_event_id, const std::string& eventLevel,
std::ofstream& outfile, bool tracelogging_provider_ep_enabled);
profile::Serializer& profile_writer);

Status ExtractProfilingEventExtended(QnnProfile_EventId_t profile_event_id, const std::string& eventLevel,
std::ofstream& outfile, bool tracelogging_provider_ep_enabled);
static const std::string& GetUnitString(QnnProfile_EventUnit_t unitType);
static const std::unordered_map<QnnProfile_EventUnit_t, std::string>& GetUnitStringMap();
static const std::string GetEventTypeString(QnnProfile_EventType_t eventType);
static const std::string ExtractQnnScalarValue(const Qnn_Scalar_t& scalar);
profile::Serializer& profile_writer);

const char* QnnProfileErrorToString(QnnProfile_Error_t error);
std::string QnnErrorHandleToString(Qnn_ErrorHandle_t error);
QnnLog_Level_t MapOrtSeverityToQNNLogLevel(logging::Severity ort_log_level);
#ifdef _WIN32
void LogQnnProfileEventAsTraceLogging(
uint64_t timestamp,
const std::string& message,
const std::string& qnnScalarValue,
const std::string& unit,
const std::string& timingSource,
const std::string& eventLevel,
const char* eventIdentifier);
#endif

// Adds a new QNN context.
// Transfers ownership of `context_handle` (i.e., responsibility of freeing it) to this instance
Expand Down Expand Up @@ -437,6 +435,8 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
ProfilingLevel profiling_level_;
ProfilingLevel profiling_level_merge_;
const std::string profiling_file_path_;
bool system_lib_loaded_ = false;
bool profiling_enabled_ = false;
bool backend_initialized_ = false;
bool device_created_ = false;
bool context_created_ = false;
Expand Down
22 changes: 22 additions & 0 deletions onnxruntime/core/providers/qnn/builder/qnn_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@

namespace onnxruntime {
namespace qnn {
#if QNN_API_VERSION_MAJOR > 2 || \
(QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 29))
#define QNN_SYSTEM_PROFILE_API_ENABLED
#endif

// QNN only support subset of POSIX of dlopen/dlsym/dladdr/dlerror/dlclose
// except the following flags for dlopen, others should be done only
// when we really need them
Expand All @@ -32,9 +37,26 @@ enum class ProfilingLevel : uint8_t {
OFF = 0,
BASIC,
DETAILED,
OPTRACE,
INVALID
};

enum class ProfilingMethodType : uint8_t {
UNKNOWN = 0,
EXECUTE,
FINALIZE,
EXECUTE_ASYNC,
CREATE_FROM_BINARY,
DEINIT,
CONTEXT_CREATE,
COMPOSE_GRAPHS,
EXECUTE_IPS,
GRAPH_COMPONENT,
LIB_LOAD,
APPLY_BINARY_SECTION,
CONTEXT_FINALIZE
};

// Defines performance modes available for HTP backend.
enum class HtpPerformanceMode : uint8_t {
kHtpDefault = 0,
Expand Down
63 changes: 60 additions & 3 deletions onnxruntime/core/providers/qnn/builder/qnn_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "core/providers/qnn/builder/op_builder_factory.h"
#include "core/providers/qnn/builder/qnn_node_group/qnn_node_group.h"
#include "core/providers/qnn/builder/qnn_profile_serializer.h"
#include "core/providers/qnn/builder/qnn_utils.h"
#include "core/providers/qnn/ort_api.h"
#include "core/providers/qnn/qnn_allocator.h"
Expand Down Expand Up @@ -105,7 +106,6 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer,
// This name must be same with the EPContext node name
const auto& graph_name = fused_node.Name();
ORT_RETURN_IF_ERROR(SetGraphInputOutputInfo(graph_viewer, fused_node, logger));

QnnModelWrapper qnn_model_wrapper = QnnModelWrapper(graph_viewer, logger,
qnn_backend_manager_->GetQnnInterface(),
qnn_backend_manager_->GetQnnBackendHandle(),
Expand All @@ -114,11 +114,33 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer,
qnn_backend_manager_->GetQnnBackendType(),
model_settings);
bool rt = true;

qnn::profile::ProfilingInfo profiling_info;
#ifdef QNN_SYSTEM_PROFILE_API_ENABLED
if (qnn_backend_manager_->ProfilingEnabled()) {
profiling_info.graph_name = graph_name;
profiling_info.start_time = qnn::utils::GetTimeStampInUs();
}
#endif

rt = qnn_model_wrapper.CreateQnnGraph(qnn_backend_manager_->GetQnnContext(), graph_name, graph_configs);

#ifdef QNN_SYSTEM_PROFILE_API_ENABLED
if (qnn_backend_manager_->ProfilingEnabled()) {
profiling_info.stop_time = qnn::utils::GetTimeStampInUs();
profiling_info.method_type = ProfilingMethodType::COMPOSE_GRAPHS;
}
#endif

if (!rt) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to initialize qnn_model_wrapper.");
}

// NOTE: This function returns immediately when profiling is disabled.
// Extracting profiling data can be expensive, but it is typically only enabled for debugging purposes
// and not in production. We can improve synchronization for event profiling if it becomes an issue.
ORT_RETURN_IF_ERROR(qnn_backend_manager_->ExtractBackendProfilingInfo(profiling_info));

std::vector<std::unique_ptr<qnn::IQnnNodeGroup>> qnn_node_groups;
qnn_node_groups.reserve(node_unit_holder.size());

Expand Down Expand Up @@ -160,15 +182,35 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer,

Status QnnModel::FinalizeGraphs(const logging::Logger& logger) {
LOGS(logger, VERBOSE) << "FinalizeGraphs started.";

qnn::profile::ProfilingInfo profiling_info;
#ifdef QNN_SYSTEM_PROFILE_API_ENABLED
if (qnn_backend_manager_->ProfilingEnabled()) {
profiling_info.start_time = qnn::utils::GetTimeStampInUs();
}
#endif

Qnn_ErrorHandle_t status = qnn_backend_manager_->GetQnnInterface().graphFinalize(graph_info_->Graph(),
qnn_backend_manager_->GetQnnProfileHandle(),
nullptr);

#ifdef QNN_SYSTEM_PROFILE_API_ENABLED
if (qnn_backend_manager_->ProfilingEnabled()) {
profiling_info.stop_time = qnn::utils::GetTimeStampInUs();
profiling_info.method_type = ProfilingMethodType::FINALIZE;
profiling_info.graph_name = graph_info_->Name();
}
#endif

if (QNN_GRAPH_NO_ERROR != status) {
LOGS(logger, ERROR) << "Failed to finalize QNN graph. Error code: " << status;
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to finalize QNN graph.");
}

ORT_RETURN_IF_ERROR(qnn_backend_manager_->ExtractBackendProfilingInfo());
// NOTE: This function returns immediately when profiling is disabled.
// Extracting profiling data can be expensive, but it is typically only enabled for debugging purposes
// and not in production. We can improve synchronization for event profiling if it becomes an issue.
ORT_RETURN_IF_ERROR(qnn_backend_manager_->ExtractBackendProfilingInfo(profiling_info));

LOGS(logger, VERBOSE) << "FinalizeGraphs completed.";
return Status::OK();
Expand Down Expand Up @@ -297,6 +339,14 @@ Status QnnModel::ExecuteGraph(const Ort::KernelContext& context,
std::lock_guard<std::mutex> lock(graph_exec_mutex_);

LOGS(logger, VERBOSE) << "Start execute QNN graph:" << graph_info_->Name();

qnn::profile::ProfilingInfo profiling_info;
#ifdef QNN_SYSTEM_PROFILE_API_ENABLED
if (qnn_backend_manager_->ProfilingEnabled()) {
profiling_info.start_time = qnn::utils::GetTimeStampInUs();
}
#endif

auto profile_backend_handle = qnn_backend_manager_->GetQnnProfileHandle();
execute_status = qnn_interface.graphExecute(graph_info_->Graph(),
qnn_inputs.data(),
Expand All @@ -305,11 +355,18 @@ Status QnnModel::ExecuteGraph(const Ort::KernelContext& context,
static_cast<uint32_t>(qnn_outputs.size()),
profile_backend_handle,
nullptr);
#ifdef QNN_SYSTEM_PROFILE_API_ENABLED
if (qnn_backend_manager_->ProfilingEnabled()) {
profiling_info.stop_time = qnn::utils::GetTimeStampInUs();
profiling_info.method_type = ProfilingMethodType::EXECUTE;
profiling_info.graph_name = graph_info_->Name();
}
#endif

// NOTE: This function returns immediately when profiling is disabled.
// Extracting profiling data can be expensive, but it is typically only enabled for debugging purposes
// and not in production. We can improve synchronization for event profiling if it becomes an issue.
ORT_RETURN_IF_ERROR(qnn_backend_manager_->ExtractBackendProfilingInfo());
ORT_RETURN_IF_ERROR(qnn_backend_manager_->ExtractBackendProfilingInfo(profiling_info));
}

if (QNN_COMMON_ERROR_SYSTEM_COMMUNICATION == execute_status) {
Expand Down
Loading
Loading