From 56088811de25defb26bd1f62592f7bc67f46fbd7 Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Thu, 12 Jun 2025 15:07:46 +0100 Subject: [PATCH] Optional SIMD memrchr --- libc-top-half/musl/src/string/memrchr.c | 22 ++++++++++ test/src/misc/strrchr.c | 56 +++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 test/src/misc/strrchr.c diff --git a/libc-top-half/musl/src/string/memrchr.c b/libc-top-half/musl/src/string/memrchr.c index e51748b80..69e9a2eff 100644 --- a/libc-top-half/musl/src/string/memrchr.c +++ b/libc-top-half/musl/src/string/memrchr.c @@ -1,7 +1,29 @@ #include +#ifdef __wasm_simd128__ +#include +#endif + void *__memrchr(const void *m, int c, size_t n) { +#if defined(__wasm_simd128__) && defined(__wasilibc_simd_string) + // memrchr is allowed to read up to n bytes from the object. + // Search backward for the last matching character. + const v128_t *v = (v128_t *)((char *)m + n); + const v128_t vc = wasm_i8x16_splat(c); + for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { + const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(--v), vc); + // Bitmask is slow on AArch64, any_true is much faster. + if (wasm_v128_any_true(cmp)) { + // Find the offset of the last one bit (little-endian). + // The leading 16 bits of the bitmask are always zero, + // and to be ignored. + size_t clz = __builtin_clz(wasm_i8x16_bitmask(cmp)) - 16; + return (char *)(v + 1) - (clz + 1); + } + } +#endif + const unsigned char *s = m; c = (unsigned char)c; while (n--) if (s[n]==c) return (void *)(s+n); diff --git a/test/src/misc/strrchr.c b/test/src/misc/strrchr.c new file mode 100644 index 000000000..6e1128fa5 --- /dev/null +++ b/test/src/misc/strrchr.c @@ -0,0 +1,56 @@ +//! add-flags.py(LDFLAGS): -Wl,--stack-first -Wl,--initial-memory=327680 + +#include <__macro_PAGESIZE.h> +#include +#include + +void test(char *ptr, char *want) { + char *got = strrchr(ptr, 7); + if (got != want) { + printf("strrchr(%p, 7) = %p, want %p\n", ptr, got, want); + } +} + +int main(void) { + char *const LIMIT = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE); + + for (ptrdiff_t length = 0; length < 64; length++) { + for (ptrdiff_t alignment = 0; alignment < 24; alignment++) { + for (ptrdiff_t pos = -2; pos < length + 2; pos++) { + // Create a buffer with the given length, at a pointer with the given + // alignment. Using the offset LIMIT - PAGESIZE - 8 means many buffers + // will straddle a (Wasm, and likely OS) page boundary. Place the + // character to find at every position in the buffer, including just + // prior to it and after its end. + char *ptr = LIMIT - PAGESIZE - 8 + alignment; + memset(LIMIT - 2 * PAGESIZE, 0, 2 * PAGESIZE); + memset(ptr, 5, pos > length ? pos : length); + + // The last instance of the character is found. + ptr[0] = 7; + ptr[pos] = 7; + ptr[length] = 0; + + // The character is found if it's within range. + char *want = NULL; + if (length > 0) want = 0 <= pos && pos < length ? &ptr[pos] : ptr; + test(ptr, want); + } + } + + // We need space for the terminator. + if (length <= 1) continue; + + // Ensure we never read past the end of memory. + char *ptr = LIMIT - length; + memset(LIMIT - 2 * PAGESIZE, 0, 2 * PAGESIZE); + memset(ptr, 5, length); + + ptr[0] = 7; + ptr[length - 2] = 7; + ptr[length - 1] = 0; + test(ptr, &ptr[length - 2]); + } + + return 0; +}