From f85cf1a318090b5299a1c482ad0edd82402605e0 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 26 Sep 2025 14:00:28 -0400 Subject: [PATCH 1/3] Improve performance of contains_re --- cpp/src/strings/contains.cu | 4 +- cpp/src/strings/regex/regex.cuh | 23 +++++---- cpp/src/strings/regex/regex.inl | 82 ++++++++++++++++++++------------- 3 files changed, 62 insertions(+), 47 deletions(-) diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu index 67531fea579..e6b25841ed3 100644 --- a/cpp/src/strings/contains.cu +++ b/cpp/src/strings/contains.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,7 +53,7 @@ struct contains_fn { size_type end = beginning_only ? 1 // match only the beginning of the string; : -1; // match anywhere in the string - return prog.find(thread_idx, d_str, d_str.begin(), end).has_value(); + return prog.find(thread_idx, d_str, d_str.begin(), end).has_value(); } }; diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh index 6071a9fdd2d..b08df7828bb 100644 --- a/cpp/src/strings/regex/regex.cuh +++ b/cpp/src/strings/regex/regex.cuh @@ -34,7 +34,13 @@ namespace cudf { namespace strings { namespace detail { -struct relist; +enum class positional : int8_t { + DEFAULT = 0, /// both begin and end are returned + END_ONLY = 1, /// only end is returned +}; + +template +struct reljunk; using match_pair = thrust::pair; using match_result = cuda::std::optional; @@ -187,6 +193,7 @@ class reprog_device { * Specify -1 to match any virtual positions past the end of the string. * @return If match found, returns character positions of the matches. */ + template [[nodiscard]] __device__ inline match_result find(int32_t const thread_idx, string_view const d_str, string_view::const_iterator begin, @@ -213,16 +220,6 @@ class reprog_device { cudf::size_type const group_id) const; private: - struct reljunk { - relist* __restrict__ list1; - relist* __restrict__ list2; - int32_t starttype{}; - char32_t startchar{}; - - __device__ inline reljunk(relist* list1, relist* list2, reinst const inst); - __device__ inline void swaplist(); - }; - /** * @brief Returns the regex instruction object for a given id. */ @@ -236,8 +233,9 @@ class reprog_device { /** * @brief Executes the regex pattern on the given string. */ + template [[nodiscard]] __device__ inline match_result regexec(string_view const d_str, - reljunk jnk, + reljunk

& jnk, string_view::const_iterator begin, cudf::size_type end, cudf::size_type const group_id = 0) const; @@ -245,6 +243,7 @@ class reprog_device { /** * @brief Utility wrapper to setup state memory structures for calling regexec */ + template [[nodiscard]] __device__ inline match_result call_regexec( int32_t const thread_idx, string_view const d_str, diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl index 906f09e4d82..080a055271f 100644 --- a/cpp/src/strings/regex/regex.inl +++ b/cpp/src/strings/regex/regex.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,20 +71,26 @@ struct alignas(8) relist { size = 0; } + template __device__ __forceinline__ bool activate(int32_t id, int32_t begin, int32_t end) { if (readMask(id)) { return false; } writeMask(id); inst_ids[size * stride] = static_cast(id); - ranges[size * stride] = int2{begin, end}; + if constexpr (P == positional::DEFAULT) { ranges[size * stride] = int2{begin, end}; } ++size; return true; } + template [[nodiscard]] __device__ __forceinline__ restate get_state(int16_t idx) const { - return restate{ranges[idx * stride], inst_ids[idx * stride]}; + if constexpr (P == positional::DEFAULT) { + return restate{ranges[idx * stride], inst_ids[idx * stride]}; + } + return restate{{0, 0}, inst_ids[idx * stride]}; } + [[nodiscard]] __device__ __forceinline__ int16_t get_size() const { return size; } private: @@ -108,23 +114,28 @@ struct alignas(8) relist { } }; -__device__ __forceinline__ reprog_device::reljunk::reljunk(relist* list1, - relist* list2, - reinst const inst) - : list1(list1), list2(list2) -{ - if (inst.type == CHAR || inst.type == BOL) { - starttype = inst.type; - startchar = inst.u1.c; - } -} +template +struct reljunk { + relist* __restrict__ list1; + relist* __restrict__ list2; + int32_t starttype{}; + char32_t startchar{}; -__device__ __forceinline__ void reprog_device::reljunk::swaplist() -{ - auto tmp = list1; - list1 = list2; - list2 = tmp; -} + __device__ inline reljunk(relist* list1, relist* list2, reinst const inst) + : list1(list1), list2(list2) + { + if (inst.type == CHAR || inst.type == BOL) { + starttype = inst.type; + startchar = inst.u1.c; + } + } + __device__ inline void swaplist() + { + auto tmp = list1; + list1 = list2; + list2 = tmp; + } +}; /** * @brief Check for supported new-line characters @@ -249,8 +260,9 @@ __device__ __forceinline__ static string_view::const_iterator find_char( * @param group_id Index of the group to match in a multi-group regex pattern. * @return >0 if match found */ +template __device__ __forceinline__ match_result reprog_device::regexec(string_view const dstr, - reljunk jnk, + reljunk

& jnk, string_view::const_iterator itr, cudf::size_type end, cudf::size_type const group_id) const @@ -288,8 +300,9 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const if (((eos < 0) || (pos < eos)) && match == 0) { auto ids = _startinst_ids; - while (*ids >= 0) - jnk.list1->activate(*ids++, (group_id == 0 ? pos : -1), -1); + while (*ids >= 0) { + jnk.list1->template activate

(*ids++, (group_id == 0 ? pos : -1), -1); + } } last_character = itr.byte_offset() >= dstr.size_bytes(); @@ -303,7 +316,7 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const expanded = false; for (int16_t i = 0; i < jnk.list1->get_size(); i++) { - auto state = jnk.list1->get_state(i); + auto state = jnk.list1->template get_state

(i); auto range = state.range; auto const inst = get_inst(state.inst_id); int32_t id_activate = -1; @@ -316,12 +329,12 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const case NCCLASS: case END: id_activate = state.inst_id; break; case LBRA: - if (inst.u1.subid == group_id) range.x = pos; + if (inst.u1.subid == group_id) { range.x = pos; } id_activate = inst.u2.next_id; expanded = true; break; case RBRA: - if (inst.u1.subid == group_id) range.y = pos; + if (inst.u1.subid == group_id) { range.y = pos; } id_activate = inst.u2.next_id; expanded = true; break; @@ -363,12 +376,12 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const break; } case OR: - jnk.list2->activate(inst.u1.right_id, range.x, range.y); + jnk.list2->template activate

(inst.u1.right_id, range.x, range.y); id_activate = inst.u2.left_id; expanded = true; break; } - if (id_activate >= 0) jnk.list2->activate(id_activate, range.x, range.y); + if (id_activate >= 0) { jnk.list2->template activate

(id_activate, range.x, range.y); } } jnk.swaplist(); @@ -378,7 +391,7 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const bool continue_execute = true; jnk.list2->reset(); for (int16_t i = 0; continue_execute && i < jnk.list1->get_size(); i++) { - auto const state = jnk.list1->get_state(i); + auto const state = jnk.list1->template get_state

(i); auto const range = state.range; auto const inst = get_inst(state.inst_id); int32_t id_activate = -1; @@ -408,8 +421,9 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const continue_execute = false; break; } - if (continue_execute && (id_activate >= 0)) - jnk.list2->activate(id_activate, range.x, range.y); + if (continue_execute && (id_activate >= 0)) { + jnk.list2->template activate

(id_activate, range.x, range.y); + } } ++pos; @@ -421,12 +435,13 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const return match ? match_result({begin, end}) : cuda::std::nullopt; } +template __device__ __forceinline__ match_result reprog_device::find(int32_t const thread_idx, string_view const dstr, string_view::const_iterator begin, cudf::size_type end) const { - return call_regexec(thread_idx, dstr, begin, end); + return call_regexec

(thread_idx, dstr, begin, end); } __device__ __forceinline__ match_result reprog_device::extract(int32_t const thread_idx, @@ -439,6 +454,7 @@ __device__ __forceinline__ match_result reprog_device::extract(int32_t const thr return call_regexec(thread_idx, dstr, begin, end, group_id + 1); } +template __device__ __forceinline__ match_result reprog_device::call_regexec(int32_t const thread_idx, string_view const dstr, @@ -452,8 +468,8 @@ reprog_device::call_regexec(int32_t const thread_idx, gp_ptr += relist::alloc_size(_max_insts, _thread_count); relist list2(static_cast(_max_insts), _thread_count, gp_ptr, thread_idx); - reljunk jnk(&list1, &list2, get_inst(_startinst_id)); - return regexec(dstr, jnk, begin, end, group_id); + reljunk

jnk(&list1, &list2, get_inst(_startinst_id)); + return regexec

(dstr, jnk, begin, end, group_id); } } // namespace detail From c8318821cd2c1b62c2c44487b51d45df735460cf Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 29 Sep 2025 14:43:55 -0400 Subject: [PATCH 2/3] set invalid position values --- cpp/src/strings/regex/regex.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl index 080a055271f..9b383ab04a3 100644 --- a/cpp/src/strings/regex/regex.inl +++ b/cpp/src/strings/regex/regex.inl @@ -88,7 +88,7 @@ struct alignas(8) relist { if constexpr (P == positional::DEFAULT) { return restate{ranges[idx * stride], inst_ids[idx * stride]}; } - return restate{{0, 0}, inst_ids[idx * stride]}; + return restate{{-1, -1}, inst_ids[idx * stride]}; } [[nodiscard]] __device__ __forceinline__ int16_t get_size() const { return size; } From 8ef8d8fb03e0223ae153fd554bb9890c1d983b97 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 2 Oct 2025 10:37:14 -0400 Subject: [PATCH 3/3] update enum names and comments --- cpp/src/strings/regex/regex.cuh | 12 ++++++++---- cpp/src/strings/regex/regex.inl | 10 +++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh index b08df7828bb..fcdb1f6dcf3 100644 --- a/cpp/src/strings/regex/regex.cuh +++ b/cpp/src/strings/regex/regex.cuh @@ -34,9 +34,12 @@ namespace cudf { namespace strings { namespace detail { +/** + * @brief Template type used on `find` to specify desired position values in returned match_result + */ enum class positional : int8_t { - DEFAULT = 0, /// both begin and end are returned - END_ONLY = 1, /// only end is returned + BEGIN_END = 0, /// both begin and end positions are returned + END_ONLY = 1, /// only the end position is returned }; template @@ -186,6 +189,7 @@ class reprog_device { /** * @brief Does a find evaluation using the compiled expression on the given string. * + * @tparam P Desired positional values. Default includes valid begin and end match positions. * @param thread_idx The index used for mapping the state memory for this string in global memory. * @param d_str The string to search. * @param begin Position to begin the search within `d_str`. @@ -193,7 +197,7 @@ class reprog_device { * Specify -1 to match any virtual positions past the end of the string. * @return If match found, returns character positions of the matches. */ - template + template [[nodiscard]] __device__ inline match_result find(int32_t const thread_idx, string_view const d_str, string_view::const_iterator begin, @@ -243,7 +247,7 @@ class reprog_device { /** * @brief Utility wrapper to setup state memory structures for calling regexec */ - template + template [[nodiscard]] __device__ inline match_result call_regexec( int32_t const thread_idx, string_view const d_str, diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl index 9b383ab04a3..2ae8dbc7d62 100644 --- a/cpp/src/strings/regex/regex.inl +++ b/cpp/src/strings/regex/regex.inl @@ -71,21 +71,21 @@ struct alignas(8) relist { size = 0; } - template + template __device__ __forceinline__ bool activate(int32_t id, int32_t begin, int32_t end) { if (readMask(id)) { return false; } writeMask(id); inst_ids[size * stride] = static_cast(id); - if constexpr (P == positional::DEFAULT) { ranges[size * stride] = int2{begin, end}; } + if constexpr (P == positional::BEGIN_END) { ranges[size * stride] = int2{begin, end}; } ++size; return true; } - template + template [[nodiscard]] __device__ __forceinline__ restate get_state(int16_t idx) const { - if constexpr (P == positional::DEFAULT) { + if constexpr (P == positional::BEGIN_END) { return restate{ranges[idx * stride], inst_ids[idx * stride]}; } return restate{{-1, -1}, inst_ids[idx * stride]}; @@ -114,7 +114,7 @@ struct alignas(8) relist { } }; -template +template struct reljunk { relist* __restrict__ list1; relist* __restrict__ list2;