From e2bc28f7c3f28b79d1a4b18ba63c4645152252b0 Mon Sep 17 00:00:00 2001 From: auvred Date: Thu, 21 Aug 2025 08:50:13 +0300 Subject: [PATCH] Adjust `lastIndex` to leading surrogate when inside a surrogate pair in unicode RegExp --- libregexp.c | 11 ++++++++++- tests/test_builtin.js | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/libregexp.c b/libregexp.c index 2b33c8695..61c69e75a 100644 --- a/libregexp.c +++ b/libregexp.c @@ -3165,6 +3165,15 @@ int lre_exec(uint8_t **capture, s->interrupt_counter = INTERRUPT_COUNTER_INIT; s->opaque = opaque; + const uint8_t *cptr = cbuf + (cindex << cbuf_type); + + if (0 < cindex && cindex < clen && s->is_unicode) { + const uint16_t *p = (const uint16_t *)cptr; + if (is_lo_surrogate(*p) && is_hi_surrogate(*(--p))) { + cptr = (const void *)p; + } + } + s->state_size = sizeof(REExecState) + s->capture_count * sizeof(capture[0]) * 2 + s->stack_size_max * sizeof(stack_buf[0]); @@ -3177,7 +3186,7 @@ int lre_exec(uint8_t **capture, alloca_size = s->stack_size_max * sizeof(stack_buf[0]); stack_buf = alloca(alloca_size); ret = lre_exec_backtrack(s, capture, stack_buf, 0, bc_buf + RE_HEADER_LEN, - cbuf + (cindex << cbuf_type), FALSE); + cptr, FALSE); lre_realloc(s->opaque, s->state_stack, 0); return ret; } diff --git a/tests/test_builtin.js b/tests/test_builtin.js index a541c1981..d33daa7d9 100644 --- a/tests/test_builtin.js +++ b/tests/test_builtin.js @@ -779,6 +779,21 @@ function test_regexp() /* Note: SpiderMonkey and v8 may not be correct */ assert("abcAbC".replace(/[\q{BC|A}]/gvi,"X"), "XXXX"); assert("abcAbC".replace(/[\q{BC|A}--a]/gvi,"X"), "aXAX"); + + a = /(?:)/gu; + a.lastIndex = 1; + a.exec("🐱"); + assert(a.lastIndex, 0); + + a.lastIndex = 1; + a.exec("a\udc00"); + assert(a.lastIndex, 1); + + a = /\u{10000}/vgd; + a.lastIndex = 1; + a = a.exec("\u{10000}_\u{10000}"); + assert(a.indices[0][0], 0); + assert(a.indices[0][1], 2); } function test_symbol()