Skip to content

Commit fcb0138

Browse files
authored
ggml-hexagon: Initial Hexagon v68/v69 support (#17394)
* ggml-hexagon: fix build error with GCC Add stdexcept include to fix GCC build errors Signed-off-by: Mohamed Mediouni <[email protected]> * ggml-hexagon: check VTCM acquire failures Signed-off-by: Mohamed Mediouni <[email protected]> * ggml-hexagon: disable destination bypass on older than v73 v68 errors out if having bypass enabled when the VTCM is the destination. At least on v68 this made things actually work... not a proper fix though, so to look at later... Signed-off-by: Mohamed Mediouni <[email protected]> * ggml-hexagon: add initial v68/v69 support v68 is the Hexagon revision notably used on the Snapdragon 8cx Gen 3 and the QCM6490. Also add support for v69. 8MB isn't a supported page size, so relax asked for page size constraint for HAP_compute_res_attr_set_vtcm_param_v2 to optimal. Signed-off-by: Mohamed Mediouni <[email protected]> --------- Signed-off-by: Mohamed Mediouni <[email protected]>
1 parent d5bc1ad commit fcb0138

File tree

6 files changed

+56
-3
lines changed

6 files changed

+56
-3
lines changed

ggml/src/ggml-hexagon/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ set(HTP_CMAKE_ARGS
4343
-DHEXAGON_TOOLS_ROOT=$ENV{HEXAGON_TOOLS_ROOT}
4444
-DHEXAGON_HTP_DEBUG=${GGML_HEXAGON_HTP_DEBUG})
4545

46+
ExternalProject_Add(htp-v68
47+
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
48+
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v68 -DPREBUILT_LIB_DIR="toolv19_v68")
49+
50+
ExternalProject_Add(htp-v69
51+
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
52+
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v69 -DPREBUILT_LIB_DIR="toolv19_v69")
53+
4654
ExternalProject_Add(htp-v73
4755
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
4856
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v73 -DPREBUILT_LIB_DIR="toolv19_v73")
@@ -61,6 +69,8 @@ ExternalProject_Add(htp-v81
6169

6270
# Install Hexagon skels required at runtime
6371
install(FILES
72+
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v68.so
73+
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v69.so
6474
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v73.so
6575
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v75.so
6676
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v79.so

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <chrono>
1010
#include <mutex>
1111
#include <string>
12+
#include <stdexcept>
1213

1314
#ifdef _WIN32
1415
# include <sal.h>

ggml/src/ggml-hexagon/htp-utils.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,12 @@ int get_hex_arch_ver(int domain, int * arch) {
390390
}
391391

392392
switch (arch_ver.capability & 0xff) {
393+
case 0x68:
394+
*arch = 68;
395+
return 0;
396+
case 0x69:
397+
*arch = 69;
398+
return 0;
393399
case 0x73:
394400
*arch = 73;
395401
return 0;

ggml/src/ggml-hexagon/htp/htp-dma.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ static inline bool dma_queue_push(dma_queue * q,
6666
desc->desctype = HEXAGON_UDMA_DESC_DESCTYPE_TYPE1;
6767
desc->dstbypass = 1;
6868
desc->srcbypass = 1;
69+
#if __HVX_ARCH__ >= 73
70+
desc->dstbypass = 1;
71+
desc->srcbypass = 1;
72+
#else
73+
desc->dstbypass = 0;
74+
desc->srcbypass = 1;
75+
#endif
6976
desc->order = 0;
7077
desc->dstate = HEXAGON_UDMA_DESC_DSTATE_INCOMPLETE;
7178
desc->src = (void *) src;

ggml/src/ggml-hexagon/htp/hvx-utils.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,26 @@ typedef union {
2121
float fp32[VLEN_FP32];
2222
} __attribute__((aligned(VLEN), packed)) HVX_VectorAlias;
2323

24+
/* Q6_Vsf_equals_Vw is only available on v73+.*/
25+
#if __HVX_ARCH__ < 73
26+
static inline HVX_Vector int32_to_qfloat(HVX_Vector const in)
27+
{
28+
HVX_Vector const vzero = Q6_V_vzero();
29+
HVX_VectorPred is_zero = Q6_Q_vcmp_eq_VwVw(in, vzero);
30+
HVX_Vector lshift = Q6_Vw_vnormamt_Vw(in);
31+
HVX_Vector normalized = Q6_Vw_vasl_VwVw(in, lshift);
32+
HVX_Vector vexp = Q6_Vw_vsub_VwVw(Q6_V_vsplat_R(0x7f + 30), lshift);
33+
HVX_Vector mant = Q6_V_vand_VV(Q6_V_vsplat_R(0xFFFFFF00), normalized);
34+
HVX_Vector ret = Q6_V_vmux_QVV(is_zero, vzero, Q6_Vw_vadd_VwVw(mant, vexp));
35+
return ret;
36+
}
37+
38+
static inline HVX_Vector Q6_Vsf_equals_Vw(HVX_Vector const in)
39+
{
40+
return Q6_Vsf_equals_Vqf32(int32_to_qfloat(in));
41+
}
42+
#endif
43+
2444
static inline HVX_Vector hvx_vec_splat_fp32(float i) {
2545
union {
2646
float f;

ggml/src/ggml-hexagon/htp/main.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,16 +143,25 @@ AEEResult htp_iface_disable_etm(remote_handle64 handle) {
143143
}
144144

145145
static int vtcm_acquire(struct htp_context * ctx) {
146+
int err;
146147
if (!ctx->vtcm_valid) {
147148
// Temporarily bump thread priority to make sure it's higher than other sessions.
148149
// This way the resource manager will notify the other thread to release VTCM.
149150
// Note that we need to reaquire VTCM at normal priority for this to work next time.
150151
qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio - 10);
151-
HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
152+
err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
153+
if (err != 0) {
154+
FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err);
155+
abort();
156+
}
152157
HAP_compute_res_release_cached(ctx->vtcm_rctx);
153158
qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio);
154159

155-
HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
160+
err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
161+
if (err != 0) {
162+
FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err);
163+
abort();
164+
}
156165
ctx->vtcm_valid = true;
157166
}
158167

@@ -201,7 +210,7 @@ static int vtcm_alloc(struct htp_context * ctx) {
201210
HAP_compute_res_attr_init(&attr);
202211
HAP_compute_res_attr_set_serialize(&attr, 0);
203212
HAP_compute_res_attr_set_cache_mode(&attr, 1);
204-
HAP_compute_res_attr_set_vtcm_param_v2(&attr, vtcm_size, vtcm_size, vtcm_size);
213+
HAP_compute_res_attr_set_vtcm_param_v2(&attr, vtcm_size, 0, vtcm_size);
205214
HAP_compute_res_attr_set_release_callback(&attr, vtcm_release_callback, (void *) ctx);
206215
HAP_compute_res_attr_set_hmx_param(&attr, 1);
207216

0 commit comments

Comments
 (0)