diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 5bcb8ca394346..8ee523ce039ff 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -376,31 +376,34 @@ Status QnnBackendManager::LoadQnnSerializerBackend() { } Status QnnBackendManager::LoadQnnSystemLib() { + if (!system_lib_loaded_) { #ifdef _WIN32 - std::string system_lib_file = "QnnSystem.dll"; + std::string system_lib_file = "QnnSystem.dll"; #else - std::string system_lib_file = "libQnnSystem.so"; + std::string system_lib_file = "libQnnSystem.so"; #endif // #ifdef _WIN32 - LOGS_DEFAULT(INFO) << "Loading QnnSystem lib"; - std::filesystem::path lib_file_path(backend_path_.c_str()); - std::string sys_file_path(lib_file_path.remove_filename().string() + system_lib_file); - QnnSystemInterface_t* system_interface_provider{nullptr}; - auto rt = GetQnnInterfaceProvider(sys_file_path.c_str(), - "QnnSystemInterface_getProviders", - &system_lib_handle_, - {QNN_SYSTEM_API_VERSION_MAJOR, - QNN_SYSTEM_API_VERSION_MINOR, - QNN_SYSTEM_API_VERSION_PATCH}, - &system_interface_provider); - ORT_RETURN_IF_ERROR(rt); - Qnn_Version_t system_interface_version = GetQnnInterfaceApiVersion(system_interface_provider); - qnn_sys_interface_ = system_interface_provider->QNN_SYSTEM_INTERFACE_VER_NAME; - - LOGS_DEFAULT(INFO) << "Found valid system interface, version: " << system_interface_version.major - << "." << system_interface_version.minor - << " backend provider name: " << system_interface_provider->providerName; - + LOGS_DEFAULT(INFO) << "Loading QnnSystem lib"; + std::filesystem::path lib_file_path(backend_path_.c_str()); + std::string sys_file_path(lib_file_path.remove_filename().string() + system_lib_file); + QnnSystemInterface_t* system_interface_provider{nullptr}; + auto rt = GetQnnInterfaceProvider(sys_file_path.c_str(), + "QnnSystemInterface_getProviders", + &system_lib_handle_, + {QNN_SYSTEM_API_VERSION_MAJOR, + QNN_SYSTEM_API_VERSION_MINOR, + QNN_SYSTEM_API_VERSION_PATCH}, + &system_interface_provider); + ORT_RETURN_IF_ERROR(rt); + Qnn_Version_t system_interface_version = GetQnnInterfaceApiVersion(system_interface_provider); + qnn_sys_interface_ = system_interface_provider->QNN_SYSTEM_INTERFACE_VER_NAME; + + LOGS_DEFAULT(INFO) << "Found valid system interface, version: " << system_interface_version.major + << "." << system_interface_version.minor + << " backend provider name: " << system_interface_provider->providerName; + + system_lib_loaded_ = true; + } return Status::OK(); } @@ -639,6 +642,7 @@ Status QnnBackendManager::InitializeProfiling() { return Status::OK(); } + bool enable_optrace = false; QnnProfile_Level_t qnn_profile_level = QNN_PROFILE_LEVEL_BASIC; if (ProfilingLevel::BASIC == profiling_level_merge_) { qnn_profile_level = QNN_PROFILE_LEVEL_BASIC; @@ -646,10 +650,36 @@ Status QnnBackendManager::InitializeProfiling() { } else if (ProfilingLevel::DETAILED == profiling_level_merge_) { qnn_profile_level = QNN_PROFILE_LEVEL_DETAILED; LOGS_DEFAULT(VERBOSE) << "Profiling level set to detailed."; + } else if (ProfilingLevel::OPTRACE == profiling_level_merge_) { + qnn_profile_level = QNN_PROFILE_LEVEL_DETAILED; + enable_optrace = true; + LOGS_DEFAULT(VERBOSE) << "Profiling level set to optrace."; } + Qnn_ErrorHandle_t result = qnn_interface_.profileCreate(backend_handle_, qnn_profile_level, &profile_backend_handle_); ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result, "Failed to create QNN profile! Error: ", QnnErrorHandleToString(result)); +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + profiling_enabled_ = true; + ORT_RETURN_IF_ERROR(LoadQnnSystemLib()); + + if (enable_optrace) { + QnnProfile_Config_t optrace_config = QNN_PROFILE_CONFIG_INIT; + optrace_config.option = QNN_PROFILE_CONFIG_OPTION_ENABLE_OPTRACE; + optrace_config.enableOptrace = enable_optrace; + + const QnnProfile_Config_t* profile_configs[] = {&optrace_config, nullptr}; + result = qnn_interface_.profileSetConfig(profile_backend_handle_, profile_configs); + + ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result, "Failed to enable op trace! Error: ", QnnErrorHandleToString(result)); + } +#else + if (enable_optrace) { + LOGS_DEFAULT(WARNING) << "Profiling level set to optrace, but QNN SDK Version is older than 2.29.0. " + << "Profiling level will be set to detailed instead."; + } +#endif + return Status::OK(); } @@ -1128,6 +1158,13 @@ Status QnnBackendManager::LoadCachedQnnContextFromBuffer(char* buffer, uint64_t ORT_RETURN_IF(nullptr == qnn_interface_.contextCreateFromBinary, "Invalid function pointer for contextCreateFromBinary."); + qnn::profile::ProfilingInfo profiling_info; +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + if (ProfilingEnabled()) { + profiling_info.start_time = qnn::utils::GetTimeStampInUs(); + } +#endif + rt = qnn_interface_.contextCreateFromBinary(backend_handle_, device_handle_, context_configs, @@ -1135,9 +1172,20 @@ Status QnnBackendManager::LoadCachedQnnContextFromBuffer(char* buffer, uint64_t buffer_length, &context, profile_backend_handle_); + +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + if (ProfilingEnabled()) { + profiling_info.stop_time = qnn::utils::GetTimeStampInUs(); + profiling_info.method_type = ProfilingMethodType::CREATE_FROM_BINARY; + profiling_info.graph_name = node_name; + } +#endif + ORT_RETURN_IF(QNN_SUCCESS != rt, "Failed to create context from binary. Error code: ", rt); ORT_RETURN_IF_ERROR(AddQnnContextHandle(context)); + ORT_RETURN_IF_ERROR(ExtractBackendProfilingInfo(profiling_info)); + #if QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 26) } #endif @@ -1158,8 +1206,6 @@ Status QnnBackendManager::LoadCachedQnnContextFromBuffer(char* buffer, uint64_t qnn_sys_interface_.systemContextFree(sys_ctx_handle); sys_ctx_handle = nullptr; - - ORT_RETURN_IF_ERROR(ExtractBackendProfilingInfo()); context_created_ = true; LOGS(*logger_, VERBOSE) << "Load from cached QNN Context completed."; @@ -1569,40 +1615,40 @@ void QnnBackendManager::ReleaseResources() { return; } -Status QnnBackendManager::ExtractBackendProfilingInfo() { +Status QnnBackendManager::ExtractBackendProfilingInfo(qnn::profile::ProfilingInfo& profiling_info) { if (ProfilingLevel::OFF == profiling_level_merge_ || ProfilingLevel::INVALID == profiling_level_merge_) { return Status::OK(); } - bool tracelogging_provider_ep_enabled = false; #ifdef _WIN32 auto& provider = QnnTelemetry::Instance(); if (provider.IsEnabled()) { auto level = provider.Level(); auto keyword = provider.Keyword(); if ((keyword & static_cast(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)) != 0 && level >= 5) { - tracelogging_provider_ep_enabled = true; + profiling_info.tracelogging_provider_ep_enabled = true; } } #endif // defined(_WIN32) // ETW disabled previously, but enabled now - if (ProfilingLevel::INVALID == profiling_level_etw_ && tracelogging_provider_ep_enabled) { + if (ProfilingLevel::INVALID == profiling_level_etw_ && profiling_info.tracelogging_provider_ep_enabled) { LOGS(*logger_, ERROR) << "ETW disabled previously, but enabled now. Can't do the switch! Won't output any profiling."; return Status::OK(); } // ETW enabled previously, but disabled now - if (ProfilingLevel::INVALID != profiling_level_etw_ && !tracelogging_provider_ep_enabled) { + if (ProfilingLevel::INVALID != profiling_level_etw_ && !profiling_info.tracelogging_provider_ep_enabled) { LOGS(*logger_, ERROR) << "ETW enabled previously, but disabled now. Can't do the switch! Won't output any profiling."; return Status::OK(); } - ORT_RETURN_IF(!tracelogging_provider_ep_enabled && profiling_file_path_.empty(), + ORT_RETURN_IF(!profiling_info.tracelogging_provider_ep_enabled && profiling_file_path_.empty(), "Need to specify a CSV file via provider option profiling_file_path if ETW not enabled."); ORT_RETURN_IF(nullptr == profile_backend_handle_, "Backend profile handle not valid."); + LOGS(*logger_, VERBOSE) << "Extracting profiling events for graph " << profiling_info.graph_name; const QnnProfile_EventId_t* profile_events{nullptr}; uint32_t num_events{0}; Qnn_ErrorHandle_t result = qnn_interface_.profileGetEvents(profile_backend_handle_, &profile_events, &num_events); @@ -1629,35 +1675,35 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() { LOGS(*logger_, VERBOSE) << "The QNN backend does not support extended event data."; } - std::ofstream outfile; + profiling_info.output_filepath = profiling_file_path_; +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + profiling_info.num_events = num_events; + profiling_info.qnn_system_interface = qnn_sys_interface_; +#endif + + profile::Serializer profile_writer(profiling_info); if (!profiling_file_path_.empty()) { - // Write to CSV in append mode - std::ifstream infile(profiling_file_path_.c_str()); - bool exists = infile.good(); - infile.close(); - - outfile.open(profiling_file_path_, std::ios_base::app); - ORT_RETURN_IF(!outfile.is_open(), "Failed to open profiling file: ", profiling_file_path_); - // If file didn't exist before, write the header - if (!exists) { - outfile << "Msg Timestamp,Message,Time,Unit of Measurement,Timing Source,Event Level,Event Identifier\n"; - } + ORT_RETURN_IF_ERROR(profile_writer.InitFile()); } for (size_t event_idx = 0; event_idx < num_events; event_idx++) { ORT_RETURN_IF_ERROR( - ExtractProfilingEvent(*(profile_events + event_idx), "ROOT", outfile, backendSupportsExtendedEventData, - tracelogging_provider_ep_enabled)); + ExtractProfilingEvent(*(profile_events + event_idx), "ROOT", profile_writer, + backendSupportsExtendedEventData)); ORT_RETURN_IF_ERROR( - ExtractProfilingSubEvents(*(profile_events + event_idx), outfile, backendSupportsExtendedEventData, - tracelogging_provider_ep_enabled)); + ExtractProfilingSubEvents(*(profile_events + event_idx), profile_writer, + backendSupportsExtendedEventData)); } +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + ORT_RETURN_IF_ERROR(profile_writer.SerializeEvents()); +#endif - if (outfile) { + if (!profiling_file_path_.empty()) { LOGS(*logger_, VERBOSE) << "Wrote QNN profiling events (" << num_events << ") to file (" << profiling_file_path_ << ")"; } - if (tracelogging_provider_ep_enabled) { + + if (profiling_info.tracelogging_provider_ep_enabled) { LOGS(*logger_, VERBOSE) << "Wrote QNN profiling events (" << num_events << ") to ETW"; } } @@ -1667,9 +1713,8 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() { Status QnnBackendManager::ExtractProfilingSubEvents( QnnProfile_EventId_t profile_event_id, - std::ofstream& outfile, - bool useExtendedEventData, - bool tracelogging_provider_ep_enabled) { + profile::Serializer& profile_writer, + bool useExtendedEventData) { const QnnProfile_EventId_t* profile_sub_events{nullptr}; uint32_t num_sub_events{0}; Qnn_ErrorHandle_t result = qnn_interface_.profileGetSubEvents(profile_event_id, &profile_sub_events, &num_sub_events); @@ -1678,13 +1723,28 @@ Status QnnBackendManager::ExtractProfilingSubEvents( if (num_sub_events > 0) { LOGS(*logger_, VERBOSE) << "profile_sub_events: " << profile_sub_events << " num_sub_events: " << num_sub_events; +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + QnnSystemProfile_ProfileEventV1_t* parent_system_event = nullptr; + parent_system_event = profile_writer.GetParentSystemEvent(profile_event_id); + if (parent_system_event == nullptr) { + parent_system_event = profile_writer.GetSystemEventPointer(profile_event_id); + profile_writer.AddSubEventList(num_sub_events, parent_system_event); + } +#endif + for (size_t sub_event_idx = 0; sub_event_idx < num_sub_events; sub_event_idx++) { + QnnProfile_EventId_t subevent_id = *(profile_sub_events + sub_event_idx); + +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + + ORT_RETURN_IF_ERROR(profile_writer.SetParentSystemEvent(subevent_id, parent_system_event)); + +#endif + ORT_RETURN_IF_ERROR( - ExtractProfilingEvent(*(profile_sub_events + sub_event_idx), "SUB-EVENT", outfile, useExtendedEventData, - tracelogging_provider_ep_enabled)); + ExtractProfilingEvent(subevent_id, "SUB-EVENT", profile_writer, useExtendedEventData)); ORT_RETURN_IF_ERROR( - ExtractProfilingSubEvents(*(profile_sub_events + sub_event_idx), outfile, useExtendedEventData, - tracelogging_provider_ep_enabled)); + ExtractProfilingSubEvents(subevent_id, profile_writer, useExtendedEventData)); } LOGS(*logger_, VERBOSE) << "Wrote QNN profiling sub events (" << num_sub_events << ")"; @@ -1695,167 +1755,44 @@ Status QnnBackendManager::ExtractProfilingSubEvents( Status QnnBackendManager::ExtractProfilingEvent( QnnProfile_EventId_t profile_event_id, - const std::string& eventLevel, - std::ofstream& outfile, - bool useExtendedEventData, - bool tracelogging_provider_ep_enabled) { + const std::string& event_level, + profile::Serializer& profile_writer, + bool useExtendedEventData) { if (useExtendedEventData) { - return ExtractProfilingEventExtended(profile_event_id, eventLevel, outfile, tracelogging_provider_ep_enabled); + return ExtractProfilingEventExtended(profile_event_id, event_level, profile_writer); } else { - return ExtractProfilingEventBasic(profile_event_id, eventLevel, outfile, tracelogging_provider_ep_enabled); + return ExtractProfilingEventBasic(profile_event_id, event_level, profile_writer); } } Status QnnBackendManager::ExtractProfilingEventBasic( QnnProfile_EventId_t profile_event_id, - const std::string& eventLevel, - std::ofstream& outfile, - bool tracelogging_provider_ep_enabled) { + const std::string& event_level, + profile::Serializer& profile_writer) { QnnProfile_EventData_t event_data; Qnn_ErrorHandle_t result = qnn_interface_.profileGetEventData(profile_event_id, &event_data); QnnProfile_Error_t errorCode = static_cast(result & 0xFFFF); ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result, "Failed to get profile event data: " + std::string(QnnProfileErrorToString(errorCode))); - std::string message = GetEventTypeString(event_data.type); - std::string unit = GetUnitString(event_data.unit); - - if (outfile) { - outfile << "UNKNOWN" - << "," - << message << "," - << event_data.value << "," - << unit << "," - << "BACKEND" - << "," - << eventLevel << "," - << (event_data.identifier ? event_data.identifier : "NULL") << "\n"; - } - - if (tracelogging_provider_ep_enabled) { -#ifdef _WIN32 - LogQnnProfileEventAsTraceLogging( - (uint64_t)0, - message, - std::to_string(event_data.value), - unit, - "BACKEND", - eventLevel, - (event_data.identifier ? event_data.identifier : "NULL")); -#endif - } + ORT_RETURN_IF_ERROR(profile_writer.ProcessEvent(profile_event_id, event_level, event_data)); return Status::OK(); } Status QnnBackendManager::ExtractProfilingEventExtended( QnnProfile_EventId_t profile_event_id, - const std::string& eventLevel, - std::ofstream& outfile, - bool tracelogging_provider_ep_enabled) { + const std::string& event_level, + profile::Serializer& profile_writer) { QnnProfile_ExtendedEventData_t event_data_extended; auto resultGetExtendedEventData = qnn_interface_.profileGetExtendedEventData(profile_event_id, &event_data_extended); QnnProfile_Error_t errorCode = static_cast(resultGetExtendedEventData & 0xFFFF); ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != errorCode, "Failed to get profile event data: " + std::string(QnnProfileErrorToString(errorCode))); - // need to check the version first - std::string message = GetEventTypeString(event_data_extended.v1.type); - std::string unit = GetUnitString(event_data_extended.v1.unit); - - if (outfile) { - if (event_data_extended.version == QNN_PROFILE_DATA_VERSION_1) { - outfile << event_data_extended.v1.timestamp << "," - << message << "," - << ExtractQnnScalarValue(event_data_extended.v1.value) << "," - << unit << "," - << "BACKEND" - << "," - << eventLevel << "," - << (event_data_extended.v1.identifier ? event_data_extended.v1.identifier : "NULL") << "\n"; - } - } - - if (tracelogging_provider_ep_enabled) { -#ifdef _WIN32 - LogQnnProfileEventAsTraceLogging( - event_data_extended.v1.timestamp, - message, - ExtractQnnScalarValue(event_data_extended.v1.value), - unit, - "BACKEND", - eventLevel, - (event_data_extended.v1.identifier ? event_data_extended.v1.identifier : "NULL")); -#endif - } + ORT_RETURN_IF_ERROR(profile_writer.ProcessExtendedEvent(profile_event_id, event_level, event_data_extended)); return Status::OK(); } -#ifdef _WIN32 -void QnnBackendManager::LogQnnProfileEventAsTraceLogging( - uint64_t timestamp, - const std::string& message, - const std::string& qnnScalarValue, - const std::string& unit, - const std::string& timingSource, - const std::string& eventLevel, - const char* eventIdentifier) { - QnnTelemetry& qnn_telemetry = QnnTelemetry::Instance(); - qnn_telemetry.LogQnnProfileEvent(timestamp, message, qnnScalarValue, unit, timingSource, eventLevel, eventIdentifier); -} -#endif - -const std::string& QnnBackendManager::GetUnitString(QnnProfile_EventUnit_t unitType) { - const auto& unitStringMap = GetUnitStringMap(); - auto it = unitStringMap.find(unitType); - if (it != unitStringMap.end()) { - return it->second; - } - static const std::string unknown = "UNKNOWN"; - return unknown; -} - -const std::unordered_map& QnnBackendManager::GetUnitStringMap() { - static const std::unordered_map unitStringMap = { - {QNN_PROFILE_EVENTUNIT_MICROSEC, "US"}, - {QNN_PROFILE_EVENTUNIT_BYTES, "BYTES"}, - {QNN_PROFILE_EVENTUNIT_CYCLES, "CYCLES"}, - {QNN_PROFILE_EVENTUNIT_COUNT, "COUNT"}, - {QNN_PROFILE_EVENTUNIT_OBJECT, "OBJECT"}, - {QNN_PROFILE_EVENTUNIT_BACKEND, "BACKEND"}}; - return unitStringMap; -} - -const std::string QnnBackendManager::GetEventTypeString(QnnProfile_EventType_t eventType) { - // Interpret the event type - switch (eventType) { - case QNN_PROFILE_EVENTTYPE_INIT: - return "INIT"; - case QNN_PROFILE_EVENTTYPE_FINALIZE: - return "FINALIZE"; - case QNN_PROFILE_EVENTTYPE_EXECUTE: - return "EXECUTE"; - case QNN_PROFILE_EVENTTYPE_NODE: - return "NODE"; - case QNN_PROFILE_EVENTTYPE_EXECUTE_QUEUE_WAIT: - return "EXECUTE QUEUE WAIT"; - case QNN_PROFILE_EVENTTYPE_EXECUTE_PREPROCESS: - return "EXECUTE PREPROCESS"; - case QNN_PROFILE_EVENTTYPE_EXECUTE_DEVICE: - return "EXECUTE DEVICE"; - case QNN_PROFILE_EVENTTYPE_EXECUTE_POSTPROCESS: - return "EXECUTE POSTPROCESS"; - case QNN_PROFILE_EVENTTYPE_DEINIT: - return "DE-INIT"; - case QNN_PROFILE_EVENTTYPE_BACKEND: - return "BACKEND"; - default: - if (eventType > QNN_PROFILE_EVENTTYPE_BACKEND) { - return "BACKEND"; - } - return "UNKNOWN"; - } -} - const char* QnnBackendManager::QnnProfileErrorToString(QnnProfile_Error_t error) { switch (error) { case QNN_PROFILE_NO_ERROR: @@ -1881,45 +1818,6 @@ std::string QnnBackendManager::QnnErrorHandleToString(Qnn_ErrorHandle_t error) { return utils::GetQnnErrorMessage(qnn_interface_, error); } -const std::string QnnBackendManager::ExtractQnnScalarValue(const Qnn_Scalar_t& scalar) { - switch (scalar.dataType) { - case QNN_DATATYPE_INT_8: - return std::to_string(static_cast(scalar.int8Value)); - case QNN_DATATYPE_INT_16: - return std::to_string(scalar.int16Value); - case QNN_DATATYPE_INT_32: - return std::to_string(scalar.int32Value); - case QNN_DATATYPE_INT_64: - return std::to_string(scalar.int64Value); - case QNN_DATATYPE_UINT_8: - return std::to_string(static_cast(scalar.uint8Value)); - case QNN_DATATYPE_UINT_16: - return std::to_string(scalar.uint16Value); - case QNN_DATATYPE_UINT_32: - return std::to_string(scalar.uint32Value); - case QNN_DATATYPE_UINT_64: - return std::to_string(scalar.uint64Value); - case QNN_DATATYPE_FLOAT_16: - return std::to_string(scalar.floatValue); - case QNN_DATATYPE_FLOAT_32: - return std::to_string(scalar.floatValue); - case QNN_DATATYPE_SFIXED_POINT_8: - case QNN_DATATYPE_SFIXED_POINT_16: - case QNN_DATATYPE_SFIXED_POINT_32: - return std::to_string(scalar.int32Value); // Assume using int types for signed fixed points. - case QNN_DATATYPE_UFIXED_POINT_8: - case QNN_DATATYPE_UFIXED_POINT_16: - case QNN_DATATYPE_UFIXED_POINT_32: - return std::to_string(scalar.uint32Value); // Assume using unsigned int types for unsigned fixed points. - case QNN_DATATYPE_BOOL_8: - return scalar.bool8Value ? "true" : "false"; - case QNN_DATATYPE_STRING: - return scalar.stringValue ? scalar.stringValue : "NULL"; - default: - return "UNKNOWN"; - } -} - QnnBackendManager::~QnnBackendManager() { ReleaseResources(); } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h index 2a71c7391b180..604aa1fc78660 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h @@ -27,6 +27,7 @@ #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_context_mem_handle_manager.h" #include "core/providers/qnn/builder/qnn_def.h" +#include "core/providers/qnn/builder/qnn_profile_serializer.h" #include "core/providers/qnn/builder/qnn_node_group/qnn_node_group.h" namespace onnxruntime { @@ -61,6 +62,13 @@ class QnnSerializerConfig { */ void SetGraphName(std::string graph_name); + /** + * Gets the name of the graph being serialized. + * + * \return graph_name The name of the graph being serialized. + */ + const std::string& GetGraphName() const; + /** * Get any QNN Graph configs required to configure this serializer and perform any * preparation, such as creating output directories. @@ -83,7 +91,6 @@ class QnnSerializerConfig { protected: QnnSerializerConfig(std::string backend_path); - const std::string& GetGraphName() const; private: std::string backend_path_; @@ -183,12 +190,13 @@ class QnnBackendManager : public std::enable_shared_from_this // NOTE: This function locks the internal `logger_recursive_mutex_`. Status ResetQnnLogLevel(std::optional ort_log_level = std::nullopt); - Status ExtractBackendProfilingInfo(); - Status ExtractProfilingSubEvents(QnnProfile_EventId_t profile_event_id, std::ofstream& outfile, - bool backendSupportsExtendedEventData, bool tracelogging_provider_ep_enabled); + Status ExtractBackendProfilingInfo(qnn::profile::ProfilingInfo& profiling_info); + + Status ExtractProfilingSubEvents(QnnProfile_EventId_t profile_event_id, profile::Serializer& profile_writer, + bool backendSupportsExtendedEventData); + Status ExtractProfilingEvent(QnnProfile_EventId_t profile_event_id, const std::string& eventLevel, - std::ofstream& outfile, bool backendSupportsExtendedEventData, - bool tracelogging_provider_ep_enabled); + profile::Serializer& profile_writer, bool backendSupportsExtendedEventData); Status SetProfilingLevelETW(ProfilingLevel profiling_level_etw_param); @@ -225,6 +233,8 @@ class QnnBackendManager : public std::enable_shared_from_this // Resets the context priority to the session default as defined by context_priority_ Status ResetContextPriority(); + bool ProfilingEnabled() { return profiling_enabled_; } + private: Status LoadBackend(); @@ -307,26 +317,14 @@ class QnnBackendManager : public std::enable_shared_from_this } Status ExtractProfilingEventBasic(QnnProfile_EventId_t profile_event_id, const std::string& eventLevel, - std::ofstream& outfile, bool tracelogging_provider_ep_enabled); + profile::Serializer& profile_writer); + Status ExtractProfilingEventExtended(QnnProfile_EventId_t profile_event_id, const std::string& eventLevel, - std::ofstream& outfile, bool tracelogging_provider_ep_enabled); - static const std::string& GetUnitString(QnnProfile_EventUnit_t unitType); - static const std::unordered_map& GetUnitStringMap(); - static const std::string GetEventTypeString(QnnProfile_EventType_t eventType); - static const std::string ExtractQnnScalarValue(const Qnn_Scalar_t& scalar); + profile::Serializer& profile_writer); + const char* QnnProfileErrorToString(QnnProfile_Error_t error); std::string QnnErrorHandleToString(Qnn_ErrorHandle_t error); QnnLog_Level_t MapOrtSeverityToQNNLogLevel(logging::Severity ort_log_level); -#ifdef _WIN32 - void LogQnnProfileEventAsTraceLogging( - uint64_t timestamp, - const std::string& message, - const std::string& qnnScalarValue, - const std::string& unit, - const std::string& timingSource, - const std::string& eventLevel, - const char* eventIdentifier); -#endif // Adds a new QNN context. // Transfers ownership of `context_handle` (i.e., responsibility of freeing it) to this instance @@ -437,6 +435,8 @@ class QnnBackendManager : public std::enable_shared_from_this ProfilingLevel profiling_level_; ProfilingLevel profiling_level_merge_; const std::string profiling_file_path_; + bool system_lib_loaded_ = false; + bool profiling_enabled_ = false; bool backend_initialized_ = false; bool device_created_ = false; bool context_created_ = false; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index 6fba6d847cb74..42f4d7bb60f34 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -14,6 +14,11 @@ namespace onnxruntime { namespace qnn { +#if QNN_API_VERSION_MAJOR > 2 || \ + (QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 29)) +#define QNN_SYSTEM_PROFILE_API_ENABLED +#endif + // QNN only support subset of POSIX of dlopen/dlsym/dladdr/dlerror/dlclose // except the following flags for dlopen, others should be done only // when we really need them @@ -32,9 +37,26 @@ enum class ProfilingLevel : uint8_t { OFF = 0, BASIC, DETAILED, + OPTRACE, INVALID }; +enum class ProfilingMethodType : uint8_t { + UNKNOWN = 0, + EXECUTE, + FINALIZE, + EXECUTE_ASYNC, + CREATE_FROM_BINARY, + DEINIT, + CONTEXT_CREATE, + COMPOSE_GRAPHS, + EXECUTE_IPS, + GRAPH_COMPONENT, + LIB_LOAD, + APPLY_BINARY_SECTION, + CONTEXT_FINALIZE +}; + // Defines performance modes available for HTP backend. enum class HtpPerformanceMode : uint8_t { kHtpDefault = 0, diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model.cc b/onnxruntime/core/providers/qnn/builder/qnn_model.cc index 175a76b590895..85901ab6fdfec 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model.cc @@ -10,6 +10,7 @@ #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_node_group/qnn_node_group.h" +#include "core/providers/qnn/builder/qnn_profile_serializer.h" #include "core/providers/qnn/builder/qnn_utils.h" #include "core/providers/qnn/ort_api.h" #include "core/providers/qnn/qnn_allocator.h" @@ -105,7 +106,6 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer, // This name must be same with the EPContext node name const auto& graph_name = fused_node.Name(); ORT_RETURN_IF_ERROR(SetGraphInputOutputInfo(graph_viewer, fused_node, logger)); - QnnModelWrapper qnn_model_wrapper = QnnModelWrapper(graph_viewer, logger, qnn_backend_manager_->GetQnnInterface(), qnn_backend_manager_->GetQnnBackendHandle(), @@ -114,11 +114,33 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer, qnn_backend_manager_->GetQnnBackendType(), model_settings); bool rt = true; + + qnn::profile::ProfilingInfo profiling_info; +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + if (qnn_backend_manager_->ProfilingEnabled()) { + profiling_info.graph_name = graph_name; + profiling_info.start_time = qnn::utils::GetTimeStampInUs(); + } +#endif + rt = qnn_model_wrapper.CreateQnnGraph(qnn_backend_manager_->GetQnnContext(), graph_name, graph_configs); + +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + if (qnn_backend_manager_->ProfilingEnabled()) { + profiling_info.stop_time = qnn::utils::GetTimeStampInUs(); + profiling_info.method_type = ProfilingMethodType::COMPOSE_GRAPHS; + } +#endif + if (!rt) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to initialize qnn_model_wrapper."); } + // NOTE: This function returns immediately when profiling is disabled. + // Extracting profiling data can be expensive, but it is typically only enabled for debugging purposes + // and not in production. We can improve synchronization for event profiling if it becomes an issue. + ORT_RETURN_IF_ERROR(qnn_backend_manager_->ExtractBackendProfilingInfo(profiling_info)); + std::vector> qnn_node_groups; qnn_node_groups.reserve(node_unit_holder.size()); @@ -160,15 +182,35 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer, Status QnnModel::FinalizeGraphs(const logging::Logger& logger) { LOGS(logger, VERBOSE) << "FinalizeGraphs started."; + + qnn::profile::ProfilingInfo profiling_info; +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + if (qnn_backend_manager_->ProfilingEnabled()) { + profiling_info.start_time = qnn::utils::GetTimeStampInUs(); + } +#endif + Qnn_ErrorHandle_t status = qnn_backend_manager_->GetQnnInterface().graphFinalize(graph_info_->Graph(), qnn_backend_manager_->GetQnnProfileHandle(), nullptr); + +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + if (qnn_backend_manager_->ProfilingEnabled()) { + profiling_info.stop_time = qnn::utils::GetTimeStampInUs(); + profiling_info.method_type = ProfilingMethodType::FINALIZE; + profiling_info.graph_name = graph_info_->Name(); + } +#endif + if (QNN_GRAPH_NO_ERROR != status) { LOGS(logger, ERROR) << "Failed to finalize QNN graph. Error code: " << status; return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to finalize QNN graph."); } - ORT_RETURN_IF_ERROR(qnn_backend_manager_->ExtractBackendProfilingInfo()); + // NOTE: This function returns immediately when profiling is disabled. + // Extracting profiling data can be expensive, but it is typically only enabled for debugging purposes + // and not in production. We can improve synchronization for event profiling if it becomes an issue. + ORT_RETURN_IF_ERROR(qnn_backend_manager_->ExtractBackendProfilingInfo(profiling_info)); LOGS(logger, VERBOSE) << "FinalizeGraphs completed."; return Status::OK(); @@ -297,6 +339,14 @@ Status QnnModel::ExecuteGraph(const Ort::KernelContext& context, std::lock_guard lock(graph_exec_mutex_); LOGS(logger, VERBOSE) << "Start execute QNN graph:" << graph_info_->Name(); + + qnn::profile::ProfilingInfo profiling_info; +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + if (qnn_backend_manager_->ProfilingEnabled()) { + profiling_info.start_time = qnn::utils::GetTimeStampInUs(); + } +#endif + auto profile_backend_handle = qnn_backend_manager_->GetQnnProfileHandle(); execute_status = qnn_interface.graphExecute(graph_info_->Graph(), qnn_inputs.data(), @@ -305,11 +355,18 @@ Status QnnModel::ExecuteGraph(const Ort::KernelContext& context, static_cast(qnn_outputs.size()), profile_backend_handle, nullptr); +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + if (qnn_backend_manager_->ProfilingEnabled()) { + profiling_info.stop_time = qnn::utils::GetTimeStampInUs(); + profiling_info.method_type = ProfilingMethodType::EXECUTE; + profiling_info.graph_name = graph_info_->Name(); + } +#endif // NOTE: This function returns immediately when profiling is disabled. // Extracting profiling data can be expensive, but it is typically only enabled for debugging purposes // and not in production. We can improve synchronization for event profiling if it becomes an issue. - ORT_RETURN_IF_ERROR(qnn_backend_manager_->ExtractBackendProfilingInfo()); + ORT_RETURN_IF_ERROR(qnn_backend_manager_->ExtractBackendProfilingInfo(profiling_info)); } if (QNN_COMMON_ERROR_SYSTEM_COMMUNICATION == execute_status) { diff --git a/onnxruntime/core/providers/qnn/builder/qnn_profile_serializer.cc b/onnxruntime/core/providers/qnn/builder/qnn_profile_serializer.cc new file mode 100644 index 0000000000000..9405b5b63c549 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/qnn_profile_serializer.cc @@ -0,0 +1,466 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "qnn_profile_serializer.h" +#include "core/providers/qnn/qnn_telemetry.h" + +namespace onnxruntime { +namespace qnn { +namespace profile { + +const std::unordered_map& GetUnitStringMap() { + static const std::unordered_map unitStringMap = { + {QNN_PROFILE_EVENTUNIT_MICROSEC, "US"}, + {QNN_PROFILE_EVENTUNIT_BYTES, "BYTES"}, + {QNN_PROFILE_EVENTUNIT_CYCLES, "CYCLES"}, + {QNN_PROFILE_EVENTUNIT_COUNT, "COUNT"}, + {QNN_PROFILE_EVENTUNIT_OBJECT, "OBJECT"}, + {QNN_PROFILE_EVENTUNIT_BACKEND, "BACKEND"}}; + return unitStringMap; +} + +const std::string& GetUnitString(QnnProfile_EventUnit_t unitType) { + const auto& unitStringMap = GetUnitStringMap(); + auto it = unitStringMap.find(unitType); + if (it != unitStringMap.end()) { + return it->second; + } + static const std::string unknown = "UNKNOWN"; + return unknown; +} + +std::string GetEventTypeString(QnnProfile_EventType_t event_type) { + // Interpret the event type + switch (event_type) { + case QNN_PROFILE_EVENTTYPE_INIT: + return "INIT"; + case QNN_PROFILE_EVENTTYPE_FINALIZE: + return "FINALIZE"; + case QNN_PROFILE_EVENTTYPE_EXECUTE: + return "EXECUTE"; + case QNN_PROFILE_EVENTTYPE_NODE: + return "NODE"; + case QNN_PROFILE_EVENTTYPE_EXECUTE_QUEUE_WAIT: + return "EXECUTE QUEUE WAIT"; + case QNN_PROFILE_EVENTTYPE_EXECUTE_PREPROCESS: + return "EXECUTE PREPROCESS"; + case QNN_PROFILE_EVENTTYPE_EXECUTE_DEVICE: + return "EXECUTE DEVICE"; + case QNN_PROFILE_EVENTTYPE_EXECUTE_POSTPROCESS: + return "EXECUTE POSTPROCESS"; + case QNN_PROFILE_EVENTTYPE_DEINIT: + return "DE-INIT"; + case QNN_PROFILE_EVENTTYPE_BACKEND: + return "BACKEND"; + default: + if (event_type > QNN_PROFILE_EVENTTYPE_BACKEND) { + return "BACKEND"; + } + return "UNKNOWN"; + } +} + +std::string ExtractQnnScalarValue(const Qnn_Scalar_t& scalar) { + switch (scalar.dataType) { + case QNN_DATATYPE_INT_8: + return std::to_string(static_cast(scalar.int8Value)); + case QNN_DATATYPE_INT_16: + return std::to_string(scalar.int16Value); + case QNN_DATATYPE_INT_32: + return std::to_string(scalar.int32Value); + case QNN_DATATYPE_INT_64: + return std::to_string(scalar.int64Value); + case QNN_DATATYPE_UINT_8: + return std::to_string(static_cast(scalar.uint8Value)); + case QNN_DATATYPE_UINT_16: + return std::to_string(scalar.uint16Value); + case QNN_DATATYPE_UINT_32: + return std::to_string(scalar.uint32Value); + case QNN_DATATYPE_UINT_64: + return std::to_string(scalar.uint64Value); + case QNN_DATATYPE_FLOAT_16: + return std::to_string(scalar.floatValue); + case QNN_DATATYPE_FLOAT_32: + return std::to_string(scalar.floatValue); + case QNN_DATATYPE_SFIXED_POINT_8: + case QNN_DATATYPE_SFIXED_POINT_16: + case QNN_DATATYPE_SFIXED_POINT_32: + return std::to_string(scalar.int32Value); // Assume using int types for signed fixed points. + case QNN_DATATYPE_UFIXED_POINT_8: + case QNN_DATATYPE_UFIXED_POINT_16: + case QNN_DATATYPE_UFIXED_POINT_32: + return std::to_string(scalar.uint32Value); // Assume using unsigned int types for unsigned fixed points. + case QNN_DATATYPE_BOOL_8: + return scalar.bool8Value ? "true" : "false"; + case QNN_DATATYPE_STRING: + return scalar.stringValue ? scalar.stringValue : "NULL"; + default: + return "UNKNOWN"; + } +} + +#ifdef _WIN32 +void Serializer::LogQnnProfileEventAsTraceLogging( + uint64_t timestamp, + const std::string& message, + const std::string& qnnScalarValue, + const std::string& unit, + const std::string& timingSource, + const std::string& event_level, + const char* eventIdentifier) { + QnnTelemetry& qnn_telemetry = QnnTelemetry::Instance(); + qnn_telemetry.LogQnnProfileEvent(timestamp, message, qnnScalarValue, unit, timingSource, event_level, eventIdentifier); +} +#endif + +Status Serializer::ProcessEvent(const QnnProfile_EventId_t event_id, const std::string& event_level, + const QnnProfile_EventData_t& event_data) { + const std::string& message = GetEventTypeString(event_data.type); + const std::string& unit = GetUnitString(event_data.unit); + + ORT_UNUSED_PARAMETER(event_id); + + if (outfile_) { + outfile_ << "UNKNOWN" + << "," + << message << "," + << event_data.value << "," + << unit << "," + << "BACKEND" + << "," + << event_level << "," + << (event_data.identifier ? event_data.identifier : "NULL") << "\n"; + } +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + QnnSystemProfile_ProfileEventV1_t* created_event = nullptr; + if (event_level == "SUB-EVENT") { + auto parent_system_event = GetParentSystemEvent(event_id); + ORT_RETURN_IF(parent_system_event == nullptr, "Serialization of subevent failed: parent event pointer is null"); + created_event = AddSubEvent(event_id, event_data, parent_system_event); + } else { + created_event = AddEvent(event_id, event_data); + } + + ORT_RETURN_IF(created_event == nullptr, "Serialization of event failed: Unable to create system profile event"); +#endif + + if (tracelogging_provider_ep_enabled_) { +#ifdef _WIN32 + LogQnnProfileEventAsTraceLogging( + (uint64_t)0, + message, + std::to_string(event_data.value), + unit, + "BACKEND", + event_level, + (event_data.identifier ? event_data.identifier : "NULL")); +#endif + } + + return Status::OK(); +} + +Status Serializer::ProcessExtendedEvent(const QnnProfile_EventId_t event_id, const std::string& event_level, + const QnnProfile_ExtendedEventData_t& event_data) { + // need to check the version first + const std::string& message = GetEventTypeString(event_data.v1.type); + const std::string& unit = GetUnitString(event_data.v1.unit); + + ORT_UNUSED_PARAMETER(event_id); + + if (outfile_) { + if (event_data.version == QNN_PROFILE_DATA_VERSION_1) { + outfile_ << event_data.v1.timestamp << "," + << message << "," + << ExtractQnnScalarValue(event_data.v1.value) << "," + << unit << "," + << "BACKEND" + << "," + << event_level << "," + << (event_data.v1.identifier ? event_data.v1.identifier : "NULL") + << "\n"; + } + } +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + QnnSystemProfile_ProfileEventV1_t* created_event = nullptr; + if (event_level == "SUB-EVENT") { + auto parent_system_event = GetParentSystemEvent(event_id); + ORT_RETURN_IF(parent_system_event == nullptr, "Serialization of subevent failed: parent event pointer is null"); + created_event = AddExtendedSubEvent(event_id, event_data, parent_system_event); + } else { + created_event = AddExtendedEvent(event_id, event_data); + } + + ORT_RETURN_IF(created_event == nullptr, "Serialization of event failed: Unable to create system profile event"); +#endif + + if (tracelogging_provider_ep_enabled_) { +#ifdef _WIN32 + LogQnnProfileEventAsTraceLogging( + event_data.v1.timestamp, + message, + ExtractQnnScalarValue(event_data.v1.value), + unit, + "BACKEND", + event_level, + (event_data.v1.identifier ? event_data.v1.identifier : "NULL")); +#endif + } + + return Status::OK(); +} + +Status Serializer::InitFile() { + auto output_filepath = profiling_info_.output_filepath; + // Write to CSV in append mode + std::ifstream infile(output_filepath.c_str()); + bool exists = infile.good(); + if (infile.is_open()) { + infile.close(); + } + + outfile_.open(output_filepath.c_str(), std::ios_base::app); + ORT_RETURN_IF(!outfile_.is_open(), "Failed to open profiling file: ", output_filepath); + // If file didn't exist before, write the header + if (!exists) { + outfile_ << "Msg Timestamp,Message,Time,Unit of Measurement,Timing Source,Event Level,Event Identifier\n"; + } + + return Status::OK(); +} + +#ifndef QNN_SYSTEM_PROFILE_API_ENABLED +Serializer::Serializer(const ProfilingInfo& profiling_info) + : profiling_info_(profiling_info), + tracelogging_provider_ep_enabled_(profiling_info.tracelogging_provider_ep_enabled) { +} +#else +QnnSystemProfile_MethodType_t ParseMethodType(ProfilingMethodType method_type) { + switch (method_type) { + case ProfilingMethodType::EXECUTE: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_BACKEND_EXECUTE; + case ProfilingMethodType::FINALIZE: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_BACKEND_FINALIZE; + case ProfilingMethodType::EXECUTE_ASYNC: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_BACKEND_EXECUTE_ASYNC; + case ProfilingMethodType::CREATE_FROM_BINARY: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_BACKEND_CREATE_FROM_BINARY; + case ProfilingMethodType::DEINIT: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_BACKEND_DEINIT; + case ProfilingMethodType::CONTEXT_CREATE: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_APP_CONTEXT_CREATE; + case ProfilingMethodType::COMPOSE_GRAPHS: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_APP_COMPOSE_GRAPHS; + case ProfilingMethodType::EXECUTE_IPS: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_APP_EXECUTE_IPS; + case ProfilingMethodType::GRAPH_COMPONENT: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_BACKEND_GRAPH_COMPONENT; + case ProfilingMethodType::LIB_LOAD: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_APP_BACKEND_LIB_LOAD; + case ProfilingMethodType::APPLY_BINARY_SECTION: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_BACKEND_APPLY_BINARY_SECTION; + case ProfilingMethodType::CONTEXT_FINALIZE: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_CONTEXT_FINALIZE; + default: + return QNN_SYSTEM_PROFILE_METHOD_TYPE_NONE; + } +} + +std::string GetSystemProfileErrorString(Qnn_ErrorHandle_t error) { + switch (error) { + case QNN_SYSTEM_PROFILE_ERROR_UNSUPPORTED_FEATURE: + return "Unsupported Feature"; + case QNN_SYSTEM_PROFILE_ERROR_INVALID_HANDLE: + return "Invalid Handle"; + case QNN_SYSTEM_PROFILE_ERROR_INVALID_ARGUMENT: + return "Invalid Argument"; + case QNN_SYSTEM_PROFILE_ERROR_MEM_ALLOC: + return "Memory Allocation Error"; + default: + return "Unknown"; + } +} + +Serializer::Serializer(const ProfilingInfo& profiling_info) + : qnn_system_interface_(profiling_info.qnn_system_interface), + profiling_info_(profiling_info), + tracelogging_provider_ep_enabled_(profiling_info.tracelogging_provider_ep_enabled) { + std::filesystem::path output_fs_filepath(profiling_info.output_filepath); + qnn_log_filename_ = output_fs_filepath.filename().string(); + // Remove extension (assumed to be ".csv") then add "_qnn.log" + size_t extension_start_idx = qnn_log_filename_.rfind("."); + qnn_log_filename_ = qnn_log_filename_.substr(0, extension_start_idx); + qnn_log_filename_.append("_qnn.log"); + + std::filesystem::path abs_output_path; + if (output_fs_filepath.has_root_path()) { + abs_output_path = output_fs_filepath.parent_path(); + } else { + abs_output_path = std::filesystem::current_path() / output_fs_filepath.parent_path(); + } + output_directory_ = abs_output_path.string(); + + event_data_list_.reserve(profiling_info.num_events); +} + +QnnSystemProfile_ProfileEventV1_t* Serializer::AddEvent(const QnnProfile_EventId_t event_id, + const QnnProfile_EventData_t event) { + return CreateSystemEvent(event_data_list_, event_id, event); +} +QnnSystemProfile_ProfileEventV1_t* Serializer::AddExtendedEvent(const QnnProfile_EventId_t event_id, + const QnnProfile_ExtendedEventData_t event) { + return CreateSystemExtendedEvent(event_data_list_, event_id, event); +} + +QnnSystemProfile_ProfileEventV1_t* Serializer::AddSubEvent(const QnnProfile_EventId_t event_id, + const QnnProfile_EventData_t& sub_event, + QnnSystemProfile_ProfileEventV1_t* const parent_system_event) { + if (sub_event_data_map_.find(parent_system_event) == sub_event_data_map_.end()) { + return nullptr; + } + + auto& sub_event_list = sub_event_data_map_.at(parent_system_event); + return CreateSystemEvent(sub_event_list, event_id, sub_event); +} +QnnSystemProfile_ProfileEventV1_t* Serializer::AddExtendedSubEvent(const QnnProfile_EventId_t event_id, + const QnnProfile_ExtendedEventData_t& sub_event, + QnnSystemProfile_ProfileEventV1_t* const parent_system_event) { + if (sub_event_data_map_.find(parent_system_event) == sub_event_data_map_.end()) { + return nullptr; + } + + auto& sub_event_list = sub_event_data_map_.at(parent_system_event); + return CreateSystemExtendedEvent(sub_event_list, event_id, sub_event); +} + +Status Serializer::SerializeEvents() { + bool result = nullptr == qnn_system_interface_.systemProfileCreateSerializationTarget || + nullptr == qnn_system_interface_.systemProfileSerializeEventData || + nullptr == qnn_system_interface_.systemProfileFreeSerializationTarget; + ORT_RETURN_IF(result, "Failed to get system profile API pointers."); + + auto method_type = profiling_info_.method_type; + ORT_RETURN_IF(method_type == ProfilingMethodType::UNKNOWN, "Invalid serialization method type"); + + QnnSystemProfile_SerializationTargetConfig_t config; + config.type = QNN_SYSTEM_PROFILE_SERIALIZATION_TARGET_CONFIG_SERIALIZATION_HEADER; + + std::string backend_version(std::to_string(QNN_API_VERSION_MAJOR) + "." + std::to_string(QNN_API_VERSION_MINOR) + "." + std::to_string(QNN_API_VERSION_PATCH)); + + std::string app_version(std::to_string(ORT_API_VERSION)); + config.serializationHeader.appName = "OnnxRuntime"; + config.serializationHeader.appVersion = app_version.c_str(); + config.serializationHeader.backendVersion = backend_version.c_str(); + + QnnSystemProfile_SerializationTargetFile_t serialization_file{qnn_log_filename_.c_str(), output_directory_.c_str()}; + QnnSystemProfile_SerializationTarget_t serialization_target = { + QNN_SYSTEM_PROFILE_SERIALIZATION_TARGET_FILE, + {serialization_file}}; + + QnnSystemProfile_SerializationTargetHandle_t serialization_target_handle; + + auto status = qnn_system_interface_.systemProfileCreateSerializationTarget(serialization_target, &config, 1, + &serialization_target_handle); + ORT_RETURN_IF(QNN_SYSTEM_PROFILE_NO_ERROR != status, "Failed to create serialization target handle: ", + GetSystemProfileErrorString(status)); + + ManagedSerializationTargetHandle managed_target_handle(serialization_target_handle, qnn_system_interface_); + + // Set subevent data pointers for all event data + // Must be done here as underlying array ptrs can change as vectors are resized + for (auto it = sub_event_data_map_.begin(); it != sub_event_data_map_.end(); it++) { + it->first->profileSubEventData = it->second.data(); + it->first->numSubEvents = static_cast(it->second.size()); + } + + // Create QnnSystemProfile_ProfileData_t obj here + QnnSystemProfile_ProfileData_t system_profile_data = QNN_SYSTEM_PROFILE_DATA_INIT; + system_profile_data.version = QNN_SYSTEM_PROFILE_DATA_VERSION_1; + system_profile_data.v1.header.startTime = profiling_info_.start_time; + system_profile_data.v1.header.stopTime = profiling_info_.stop_time; + system_profile_data.v1.header.graphName = profiling_info_.graph_name.c_str(); + system_profile_data.v1.header.methodType = ParseMethodType(method_type); + system_profile_data.v1.profilingEvents = event_data_list_.data(); + system_profile_data.v1.numProfilingEvents = static_cast(event_data_list_.size()); + + std::vector system_profile_data_list = {&system_profile_data}; + status = qnn_system_interface_.systemProfileSerializeEventData(serialization_target_handle, + system_profile_data_list.data(), + 1); + + ORT_RETURN_IF(QNN_SYSTEM_PROFILE_NO_ERROR != status, "Failed to serialize QNN profiling data: ", + GetSystemProfileErrorString(status)); + + status = managed_target_handle.FreeHandle(); + ORT_RETURN_IF(QNN_SYSTEM_PROFILE_NO_ERROR != status, "Failed to free serialization target: ", + GetSystemProfileErrorString(status)); + + return Status::OK(); +} + +void Serializer::AddSubEventList(const uint32_t num_sub_events, QnnSystemProfile_ProfileEventV1_t* event_ptr) { + if (num_sub_events > 0U) { + auto it = sub_event_data_map_.emplace(event_ptr, std::vector()).first; + it->second.reserve(num_sub_events); + } +} + +QnnSystemProfile_ProfileEventV1_t* Serializer::GetSystemEventPointer(QnnProfile_EventId_t event_id) { + auto it = event_profile_id_lookup_map_.find(event_id); + if (it == event_profile_id_lookup_map_.end()) { + return nullptr; + } + + return it->second; +} + +Status Serializer::SetParentSystemEvent( + const QnnProfile_EventId_t event_id, + QnnSystemProfile_ProfileEventV1_t* const system_parent_event) { + ORT_RETURN_IF(!(system_parent_event_lookup_map_.emplace(event_id, system_parent_event).second), + "Failed to add subevent-parent event mapping"); + return Status::OK(); +} +QnnSystemProfile_ProfileEventV1_t* Serializer::GetParentSystemEvent(const QnnProfile_EventId_t event_id) { + if (system_parent_event_lookup_map_.find(event_id) == system_parent_event_lookup_map_.end()) { + return nullptr; + } + + return system_parent_event_lookup_map_.at(event_id); +} + +QnnSystemProfile_ProfileEventV1_t* Serializer::CreateSystemEvent( + std::vector& event_list, + QnnProfile_EventId_t event_id, + QnnProfile_EventData_t event_data) { + auto system_event = &(event_list.emplace_back()); + + system_event->type = QNN_SYSTEM_PROFILE_EVENT_DATA; + system_event->eventData = event_data; + system_event->profileSubEventData = NULL; + system_event->numSubEvents = 0; + + event_profile_id_lookup_map_.emplace(event_id, system_event); + + return system_event; +} + +QnnSystemProfile_ProfileEventV1_t* Serializer::CreateSystemExtendedEvent(std::vector& event_list, + QnnProfile_EventId_t event_id, + QnnProfile_ExtendedEventData_t event_data) { + auto system_event = &(event_list.emplace_back()); + + system_event->type = QNN_SYSTEM_PROFILE_EXTENDED_EVENT_DATA; + system_event->extendedEventData = event_data; + system_event->profileSubEventData = NULL; + system_event->numSubEvents = 0; + + event_profile_id_lookup_map_.emplace(event_id, system_event); + + return system_event; +} +#endif // QNN_SYSTEM_PROFILE_API_ENABLED + +} // namespace profile +} // namespace qnn +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/providers/qnn/builder/qnn_profile_serializer.h b/onnxruntime/core/providers/qnn/builder/qnn_profile_serializer.h new file mode 100644 index 0000000000000..ca79433ff7254 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/qnn_profile_serializer.h @@ -0,0 +1,155 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/qnn_def.h" + +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + +#include + +#include +#include +#include + +#endif + +namespace onnxruntime { + +namespace qnn { +namespace profile { + +struct ProfilingInfo { + std::string graph_name = ""; + std::string output_filepath = ""; + + bool tracelogging_provider_ep_enabled = false; + +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + uint64_t start_time = 0; + uint64_t stop_time = 0; + uint32_t num_events = 0; + + ProfilingMethodType method_type = ProfilingMethodType::UNKNOWN; + QNN_SYSTEM_INTERFACE_VER_TYPE qnn_system_interface = QNN_SYSTEM_INTERFACE_VER_TYPE_INIT; +#endif +}; + +class Serializer { + public: + Serializer(const ProfilingInfo& profiling_info); + + Status ProcessEvent(const QnnProfile_EventId_t event_Id, const std::string& event_level, + const QnnProfile_EventData_t& event_data); + + Status ProcessExtendedEvent(const QnnProfile_EventId_t event_id, const std::string& event_level, + const QnnProfile_ExtendedEventData_t& event_data); + + private: +#ifdef _WIN32 + void LogQnnProfileEventAsTraceLogging( + uint64_t timestamp, + const std::string& message, + const std::string& qnnScalarValue, + const std::string& unit, + const std::string& timingSource, + const std::string& eventLevel, + const char* eventIdentifier); +#endif + + // If QNN API is too old, turn Serializer into an ofstream wrapper class + // Keeps code clean, any performance impacts can be ignored when profiling is enabled + public: + ~Serializer() { +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + event_data_list_.clear(); + system_parent_event_lookup_map_.clear(); + event_profile_id_lookup_map_.clear(); + sub_event_data_map_.clear(); +#endif + } + + Status InitFile(); + +#ifdef QNN_SYSTEM_PROFILE_API_ENABLED + public: + Status SerializeEvents(); + + QnnSystemProfile_ProfileEventV1_t* GetParentSystemEvent(const QnnProfile_EventId_t event_id); + + QnnSystemProfile_ProfileEventV1_t* GetSystemEventPointer(const QnnProfile_EventId_t event_id); + + void AddSubEventList(const uint32_t num_sub_events, QnnSystemProfile_ProfileEventV1_t* event_ptr); + + Status SetParentSystemEvent(const QnnProfile_EventId_t event_id, + QnnSystemProfile_ProfileEventV1_t* const system_parent_event); + + private: + class ManagedSerializationTargetHandle { + public: + ManagedSerializationTargetHandle(const QnnSystemProfile_SerializationTargetHandle_t& raw_handle, + QNN_SYSTEM_INTERFACE_VER_TYPE qnn_system_interface) : qnn_system_interface_(qnn_system_interface), + handle_(raw_handle) {} + + ~ManagedSerializationTargetHandle() { + auto status = FreeHandle(); + ORT_UNUSED_PARAMETER(status); + } + + Qnn_ErrorHandle_t FreeHandle() { + return qnn_system_interface_.systemProfileFreeSerializationTarget(handle_); + } + + private: + QNN_SYSTEM_INTERFACE_VER_TYPE qnn_system_interface_; + QnnSystemProfile_SerializationTargetHandle_t handle_; + }; // ManagedSerializationTargetHandle + + QnnSystemProfile_ProfileEventV1_t* AddEvent(const QnnProfile_EventId_t event_Id, + const QnnProfile_EventData_t event); + + QnnSystemProfile_ProfileEventV1_t* AddExtendedEvent(const QnnProfile_EventId_t event_id, + const QnnProfile_ExtendedEventData_t event); + + QnnSystemProfile_ProfileEventV1_t* AddSubEvent(const QnnProfile_EventId_t event_id, + const QnnProfile_EventData_t& sub_event, + QnnSystemProfile_ProfileEventV1_t* const system_parent_event); + + QnnSystemProfile_ProfileEventV1_t* AddExtendedSubEvent(const QnnProfile_EventId_t event_id, + const QnnProfile_ExtendedEventData_t& sub_event, + QnnSystemProfile_ProfileEventV1_t* const system_parent_event); + + QnnSystemProfile_ProfileEventV1_t* CreateSystemEvent(std::vector& event_list, + const QnnProfile_EventId_t event_id, + QnnProfile_EventData_t event_data); + + QnnSystemProfile_ProfileEventV1_t* CreateSystemExtendedEvent(std::vector& event_list, + const QnnProfile_EventId_t event_id, + QnnProfile_ExtendedEventData_t event_data); + + std::string qnn_log_filename_; + std::string output_directory_; + std::vector event_data_list_; + std::unordered_map system_parent_event_lookup_map_; + std::unordered_map event_profile_id_lookup_map_; + std::unordered_map > sub_event_data_map_; + + QNN_SYSTEM_INTERFACE_VER_TYPE qnn_system_interface_; +#endif // QNN_SYSTEM_PROFILE_API_ENABLED + const ProfilingInfo profiling_info_; + bool tracelogging_provider_ep_enabled_ = false; + std::ofstream outfile_; +}; + +} // namespace profile +} // namespace qnn +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc index afa5e3bdbb6d1..6a3234e507f1f 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc @@ -1426,6 +1426,11 @@ Status GetPermToLastAxis(uint32_t axis, uint32_t rank, std::vector& pe return Status::OK(); } +uint64_t GetTimeStampInUs() { + auto timestamp = std::chrono::steady_clock::now().time_since_epoch(); + return std::chrono::duration_cast(timestamp).count(); +} + } // namespace utils } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.h b/onnxruntime/core/providers/qnn/builder/qnn_utils.h index b234f7df375e9..b86e4d27789b4 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.h @@ -453,6 +453,12 @@ Status InsertConvertOp(QnnModelWrapper& qnn_model_wrapper, * @return execution status of this function */ Status GetPermToLastAxis(uint32_t axis, uint32_t rank, std::vector& perm); +/** + * Get the current timestamp in microseconds + * + * @return the current timestamp in microseconds + */ +uint64_t GetTimeStampInUs(); } // namespace utils } // namespace qnn diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 2bdbfb9c1c62e..2ff349bb8d765 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -94,6 +94,8 @@ static void ParseProfilingLevel(std::string profiling_level_string, profiling_level = qnn::ProfilingLevel::BASIC; } else if (profiling_level_string == "detailed") { profiling_level = qnn::ProfilingLevel::DETAILED; + } else if (profiling_level_string == "optrace") { + profiling_level = qnn::ProfilingLevel::OPTRACE; } else { LOGS_DEFAULT(WARNING) << "Profiling level not valid."; } @@ -400,6 +402,7 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio if (profiling_level_pos != provider_options_map.end()) { ParseProfilingLevel(profiling_level_pos->second, profiling_level); } + static const std::string PROFILING_FILE = "profiling_file_path"; auto profiling_file_pos = provider_options_map.find(PROFILING_FILE); if (profiling_file_pos != provider_options_map.end()) { diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index f2a54b0335fe1..ff4c05b4b2c34 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -312,7 +312,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device ORT_THROW("Please provide the valid file path."); } } else if (key == "profiling_level") { - std::set supported_profiling_level = {"off", "basic", "detailed"}; + std::set supported_profiling_level = {"off", "basic", "detailed", "optrace"}; if (supported_profiling_level.find(value) == supported_profiling_level.end()) { ORT_THROW("Supported profiling_level: off, basic, detailed"); } diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index c7b68b7f25a91..485417b2e904c 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -1088,14 +1088,19 @@ static GetTestModelFn BuildCastAddTestCase() { }; } +void VerifyFileExistsAndIsNonEmpty(const std::string& filepath) { + std::ifstream csv_file(filepath, std::ifstream::binary); + EXPECT_TRUE(csv_file.good()); + + csv_file.seekg(0, csv_file.end); + size_t buffer_size = static_cast(csv_file.tellg()); + EXPECT_NE(0, buffer_size); +} + TEST_F(QnnHTPBackendTests, ProfilingTest) { onnxruntime::ProviderOptions provider_options; -#if defined(_WIN32) - provider_options["backend_path"] = "QnnHtp.dll"; -#else - provider_options["backend_path"] = "libQnnHtp.so"; -#endif + provider_options["backend_type"] = "htp"; provider_options["offload_graph_io_quantization"] = "0"; provider_options["enable_htp_fp16_precision"] = "1"; provider_options["profiling_level"] = "detailed"; @@ -1108,6 +1113,42 @@ TEST_F(QnnHTPBackendTests, ProfilingTest) { 13, ExpectedEPNodeAssignment::All, 0.008f); + + VerifyFileExistsAndIsNonEmpty(provider_options["profiling_file_path"]); + std::remove(provider_options["profiling_file_path"].c_str()); + +#if QNN_API_VERSION_MAJOR > 2 || \ + (QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 29)) + VerifyFileExistsAndIsNonEmpty("detailed_profile_qnn.log"); + std::remove("detailed_profile_qnn.log"); +#endif +} + +TEST_F(QnnHTPBackendTests, OptraceTest) { + onnxruntime::ProviderOptions provider_options; + + provider_options["backend_type"] = "htp"; + provider_options["offload_graph_io_quantization"] = "0"; + provider_options["enable_htp_fp16_precision"] = "1"; + provider_options["profiling_level"] = "optrace"; + provider_options["profiling_file_path"] = "optrace_profile.csv"; + + auto input_defs = {TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), + TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f)}; + RunQnnModelTest(BuildOpTestCase("Add", input_defs, {}, {}, kOnnxDomain), + provider_options, + 13, + ExpectedEPNodeAssignment::All, + 0.008f); + + VerifyFileExistsAndIsNonEmpty(provider_options["profiling_file_path"]); + std::remove(provider_options["profiling_file_path"].c_str()); + +#if QNN_API_VERSION_MAJOR > 2 || \ + (QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 29)) + VerifyFileExistsAndIsNonEmpty("optrace_profile_qnn.log"); + std::remove("optrace_profile_qnn.log"); +#endif } TEST_F(QnnHTPBackendTests, CastAddQDQU8) {