From f29995db013627b15e19683e2f05591cac9064d0 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Tue, 18 Feb 2025 11:48:38 -0300 Subject: [PATCH 1/4] Switch to portable SIMD reads --- .../wasm2c_simd_source_declarations.cc | 37 +++++++++---------- src/template/wasm2c_simd.declarations.c | 25 ++++++------- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/src/prebuilt/wasm2c_simd_source_declarations.cc b/src/prebuilt/wasm2c_simd_source_declarations.cc index a43a000696..19148c67fb 100644 --- a/src/prebuilt/wasm2c_simd_source_declarations.cc +++ b/src/prebuilt/wasm2c_simd_source_declarations.cc @@ -1,27 +1,18 @@ -const char* s_simd_source_declarations = R"w2c_template(#if defined(__GNUC__) && defined(__x86_64__) +const char* s_simd_source_declarations = R"w2c_template(#define DEFINE_SIMD_LOAD_FUNC(name, func, t) \ )w2c_template" -R"w2c_template(#define SIMD_FORCE_READ(var) __asm__("" ::"x"(var)); -)w2c_template" -R"w2c_template(#elif defined(__GNUC__) && defined(__aarch64__) +R"w2c_template( static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 addr) { \ )w2c_template" -R"w2c_template(#define SIMD_FORCE_READ(var) __asm__("" ::"w"(var)); +R"w2c_template( char tmp[sizeof(t)]; \ )w2c_template" -R"w2c_template(#else +R"w2c_template( const volatile char* v_addr; \ )w2c_template" -R"w2c_template(#define SIMD_FORCE_READ(var) +R"w2c_template( v_addr = (const volatile char*)MEM_ADDR(mem, addr, sizeof(t)); \ )w2c_template" -R"w2c_template(#endif +R"w2c_template( for (int i = 0; i < sizeof(t); i++) \ )w2c_template" -R"w2c_template(// TODO: equivalent constraint for ARM and other architectures +R"w2c_template( tmp[i] = v_addr[i]; \ )w2c_template" -R"w2c_template( -#define DEFINE_SIMD_LOAD_FUNC(name, func, t) \ -)w2c_template" -R"w2c_template( static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 addr) { \ -)w2c_template" -R"w2c_template( v128 result = func(MEM_ADDR(mem, addr, sizeof(t))); \ -)w2c_template" -R"w2c_template( SIMD_FORCE_READ(result); \ +R"w2c_template( v128 result = func(&tmp); \ )w2c_template" R"w2c_template( return result; \ )w2c_template" @@ -36,9 +27,17 @@ R"w2c_template( static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 )w2c_template" R"w2c_template( v128 vec) { \ )w2c_template" -R"w2c_template( v128 result = func(MEM_ADDR(mem, addr, sizeof(t)), vec, lane); \ +R"w2c_template( char tmp[sizeof(t)]; \ +)w2c_template" +R"w2c_template( const volatile char* v_addr; \ +)w2c_template" +R"w2c_template( v_addr = (const volatile char*)MEM_ADDR(mem, addr, sizeof(t)); \ +)w2c_template" +R"w2c_template( for (int i = 0; i < sizeof(t); i++) \ +)w2c_template" +R"w2c_template( tmp[i] = v_addr[i]; \ )w2c_template" -R"w2c_template( SIMD_FORCE_READ(result); \ +R"w2c_template( v128 result = func(&tmp, vec, lane); \ )w2c_template" R"w2c_template( return result; \ )w2c_template" diff --git a/src/template/wasm2c_simd.declarations.c b/src/template/wasm2c_simd.declarations.c index 39eb4578e1..63bb1b4e3a 100644 --- a/src/template/wasm2c_simd.declarations.c +++ b/src/template/wasm2c_simd.declarations.c @@ -1,16 +1,11 @@ -#if defined(__GNUC__) && defined(__x86_64__) -#define SIMD_FORCE_READ(var) __asm__("" ::"x"(var)); -#elif defined(__GNUC__) && defined(__aarch64__) -#define SIMD_FORCE_READ(var) __asm__("" ::"w"(var)); -#else -#define SIMD_FORCE_READ(var) -#endif -// TODO: equivalent constraint for ARM and other architectures - #define DEFINE_SIMD_LOAD_FUNC(name, func, t) \ static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 addr) { \ - v128 result = func(MEM_ADDR(mem, addr, sizeof(t))); \ - SIMD_FORCE_READ(result); \ + char tmp[sizeof(t)]; \ + const volatile char* v_addr; \ + v_addr = (const volatile char*)MEM_ADDR(mem, addr, sizeof(t)); \ + for (int i = 0; i < sizeof(t); i++) \ + tmp[i] = v_addr[i]; \ + v128 result = func(&tmp); \ return result; \ } \ DEF_MEM_CHECKS0(name, _, t, return, v128); @@ -18,8 +13,12 @@ #define DEFINE_SIMD_LOAD_LANE(name, func, t, lane) \ static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 addr, \ v128 vec) { \ - v128 result = func(MEM_ADDR(mem, addr, sizeof(t)), vec, lane); \ - SIMD_FORCE_READ(result); \ + char tmp[sizeof(t)]; \ + const volatile char* v_addr; \ + v_addr = (const volatile char*)MEM_ADDR(mem, addr, sizeof(t)); \ + for (int i = 0; i < sizeof(t); i++) \ + tmp[i] = v_addr[i]; \ + v128 result = func(&tmp, vec, lane); \ return result; \ } \ DEF_MEM_CHECKS1(name, _, t, return, v128, v128); From a4dc754929bf4da6b00ab11d1d34c29531a3b2a9 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Tue, 18 Feb 2025 12:15:33 -0300 Subject: [PATCH 2/4] Alternative impl --- .../wasm2c_simd_source_declarations.cc | 29 +++++-------------- src/template/wasm2c_simd.declarations.c | 18 ++++-------- 2 files changed, 14 insertions(+), 33 deletions(-) diff --git a/src/prebuilt/wasm2c_simd_source_declarations.cc b/src/prebuilt/wasm2c_simd_source_declarations.cc index 19148c67fb..89d8b99d9a 100644 --- a/src/prebuilt/wasm2c_simd_source_declarations.cc +++ b/src/prebuilt/wasm2c_simd_source_declarations.cc @@ -1,18 +1,13 @@ -const char* s_simd_source_declarations = R"w2c_template(#define DEFINE_SIMD_LOAD_FUNC(name, func, t) \ +const char* s_simd_source_declarations = R"w2c_template(#define SIMD_FORCE_READ(var) (void)*(volatile v128*)&var; )w2c_template" -R"w2c_template( static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 addr) { \ -)w2c_template" -R"w2c_template( char tmp[sizeof(t)]; \ -)w2c_template" -R"w2c_template( const volatile char* v_addr; \ -)w2c_template" -R"w2c_template( v_addr = (const volatile char*)MEM_ADDR(mem, addr, sizeof(t)); \ +R"w2c_template( +#define DEFINE_SIMD_LOAD_FUNC(name, func, t) \ )w2c_template" -R"w2c_template( for (int i = 0; i < sizeof(t); i++) \ +R"w2c_template( static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 addr) { \ )w2c_template" -R"w2c_template( tmp[i] = v_addr[i]; \ +R"w2c_template( v128 result = func(MEM_ADDR(mem, addr, sizeof(t))); \ )w2c_template" -R"w2c_template( v128 result = func(&tmp); \ +R"w2c_template( SIMD_FORCE_READ(result); \ )w2c_template" R"w2c_template( return result; \ )w2c_template" @@ -27,17 +22,9 @@ R"w2c_template( static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 )w2c_template" R"w2c_template( v128 vec) { \ )w2c_template" -R"w2c_template( char tmp[sizeof(t)]; \ -)w2c_template" -R"w2c_template( const volatile char* v_addr; \ -)w2c_template" -R"w2c_template( v_addr = (const volatile char*)MEM_ADDR(mem, addr, sizeof(t)); \ -)w2c_template" -R"w2c_template( for (int i = 0; i < sizeof(t); i++) \ -)w2c_template" -R"w2c_template( tmp[i] = v_addr[i]; \ +R"w2c_template( v128 result = func(MEM_ADDR(mem, addr, sizeof(t)), vec, lane); \ )w2c_template" -R"w2c_template( v128 result = func(&tmp, vec, lane); \ +R"w2c_template( SIMD_FORCE_READ(result); \ )w2c_template" R"w2c_template( return result; \ )w2c_template" diff --git a/src/template/wasm2c_simd.declarations.c b/src/template/wasm2c_simd.declarations.c index 63bb1b4e3a..67173284d9 100644 --- a/src/template/wasm2c_simd.declarations.c +++ b/src/template/wasm2c_simd.declarations.c @@ -1,11 +1,9 @@ +#define SIMD_FORCE_READ(var) (void)*(volatile v128*)&var; + #define DEFINE_SIMD_LOAD_FUNC(name, func, t) \ static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 addr) { \ - char tmp[sizeof(t)]; \ - const volatile char* v_addr; \ - v_addr = (const volatile char*)MEM_ADDR(mem, addr, sizeof(t)); \ - for (int i = 0; i < sizeof(t); i++) \ - tmp[i] = v_addr[i]; \ - v128 result = func(&tmp); \ + v128 result = func(MEM_ADDR(mem, addr, sizeof(t))); \ + SIMD_FORCE_READ(result); \ return result; \ } \ DEF_MEM_CHECKS0(name, _, t, return, v128); @@ -13,12 +11,8 @@ #define DEFINE_SIMD_LOAD_LANE(name, func, t, lane) \ static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 addr, \ v128 vec) { \ - char tmp[sizeof(t)]; \ - const volatile char* v_addr; \ - v_addr = (const volatile char*)MEM_ADDR(mem, addr, sizeof(t)); \ - for (int i = 0; i < sizeof(t); i++) \ - tmp[i] = v_addr[i]; \ - v128 result = func(&tmp, vec, lane); \ + v128 result = func(MEM_ADDR(mem, addr, sizeof(t)), vec, lane); \ + SIMD_FORCE_READ(result); \ return result; \ } \ DEF_MEM_CHECKS1(name, _, t, return, v128, v128); From 132d7a0952b7aad8585960bc0a64d987d7a99712 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Mon, 17 Mar 2025 16:16:25 -0300 Subject: [PATCH 3/4] Only use volatile if we have no better option --- .../wasm2c_simd_source_declarations.cc | 22 ++++++++++++++++++- src/template/wasm2c_simd.declarations.c | 10 +++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/prebuilt/wasm2c_simd_source_declarations.cc b/src/prebuilt/wasm2c_simd_source_declarations.cc index 89d8b99d9a..1b0790a9d0 100644 --- a/src/prebuilt/wasm2c_simd_source_declarations.cc +++ b/src/prebuilt/wasm2c_simd_source_declarations.cc @@ -1,4 +1,24 @@ -const char* s_simd_source_declarations = R"w2c_template(#define SIMD_FORCE_READ(var) (void)*(volatile v128*)&var; +const char* s_simd_source_declarations = R"w2c_template(#if defined(__GNUC__) && defined(__x86_64__) +)w2c_template" +R"w2c_template(#define SIMD_FORCE_READ(var) __asm__("" ::"x"(var)); +)w2c_template" +R"w2c_template(#elif defined(__GNUC__) && defined(__aarch64__) +)w2c_template" +R"w2c_template(#define SIMD_FORCE_READ(var) __asm__("" ::"w"(var)); +)w2c_template" +R"w2c_template(#elif WASM_RT_MEMCHECK_GUARD_PAGES +)w2c_template" +R"w2c_template(// best-effort using volatile +)w2c_template" +R"w2c_template(#define SIMD_FORCE_READ(var) (void)*(volatile v128*)&var; +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define SIMD_FORCE_READ(var) +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template(// TODO: equivalent constraint for ARM and other architectures )w2c_template" R"w2c_template( #define DEFINE_SIMD_LOAD_FUNC(name, func, t) \ diff --git a/src/template/wasm2c_simd.declarations.c b/src/template/wasm2c_simd.declarations.c index 67173284d9..7007dc3d6c 100644 --- a/src/template/wasm2c_simd.declarations.c +++ b/src/template/wasm2c_simd.declarations.c @@ -1,4 +1,14 @@ +#if defined(__GNUC__) && defined(__x86_64__) +#define SIMD_FORCE_READ(var) __asm__("" ::"x"(var)); +#elif defined(__GNUC__) && defined(__aarch64__) +#define SIMD_FORCE_READ(var) __asm__("" ::"w"(var)); +#elif WASM_RT_MEMCHECK_GUARD_PAGES +// best-effort using volatile #define SIMD_FORCE_READ(var) (void)*(volatile v128*)&var; +#else +#define SIMD_FORCE_READ(var) +#endif +// TODO: equivalent constraint for ARM and other architectures #define DEFINE_SIMD_LOAD_FUNC(name, func, t) \ static inline v128 name##_unchecked(wasm_rt_memory_t* mem, u64 addr) { \ From 9e8e0f2fba1a1c33b1d1ab12aaa29fc93bcf5374 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Mon, 17 Mar 2025 20:47:24 -0300 Subject: [PATCH 4/4] Use force-read only when necessary --- src/prebuilt/wasm2c_simd_source_declarations.cc | 8 ++++++-- src/template/wasm2c_simd.declarations.c | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/prebuilt/wasm2c_simd_source_declarations.cc b/src/prebuilt/wasm2c_simd_source_declarations.cc index 1b0790a9d0..0089a0cac5 100644 --- a/src/prebuilt/wasm2c_simd_source_declarations.cc +++ b/src/prebuilt/wasm2c_simd_source_declarations.cc @@ -1,4 +1,6 @@ -const char* s_simd_source_declarations = R"w2c_template(#if defined(__GNUC__) && defined(__x86_64__) +const char* s_simd_source_declarations = R"w2c_template(#if WASM_RT_MEMCHECK_GUARD_PAGES +)w2c_template" +R"w2c_template(#if defined(__GNUC__) && defined(__x86_64__) )w2c_template" R"w2c_template(#define SIMD_FORCE_READ(var) __asm__("" ::"x"(var)); )w2c_template" @@ -6,12 +8,14 @@ R"w2c_template(#elif defined(__GNUC__) && defined(__aarch64__) )w2c_template" R"w2c_template(#define SIMD_FORCE_READ(var) __asm__("" ::"w"(var)); )w2c_template" -R"w2c_template(#elif WASM_RT_MEMCHECK_GUARD_PAGES +R"w2c_template(#else )w2c_template" R"w2c_template(// best-effort using volatile )w2c_template" R"w2c_template(#define SIMD_FORCE_READ(var) (void)*(volatile v128*)&var; )w2c_template" +R"w2c_template(#endif +)w2c_template" R"w2c_template(#else )w2c_template" R"w2c_template(#define SIMD_FORCE_READ(var) diff --git a/src/template/wasm2c_simd.declarations.c b/src/template/wasm2c_simd.declarations.c index 7007dc3d6c..8e16c1c4be 100644 --- a/src/template/wasm2c_simd.declarations.c +++ b/src/template/wasm2c_simd.declarations.c @@ -1,10 +1,12 @@ +#if WASM_RT_MEMCHECK_GUARD_PAGES #if defined(__GNUC__) && defined(__x86_64__) #define SIMD_FORCE_READ(var) __asm__("" ::"x"(var)); #elif defined(__GNUC__) && defined(__aarch64__) #define SIMD_FORCE_READ(var) __asm__("" ::"w"(var)); -#elif WASM_RT_MEMCHECK_GUARD_PAGES +#else // best-effort using volatile #define SIMD_FORCE_READ(var) (void)*(volatile v128*)&var; +#endif #else #define SIMD_FORCE_READ(var) #endif