refactor: use upstream inline_key_fast (#17044)

zhuqi-lucas · web-flow · commit 71b92bc9e768 · 2025-08-05T15:40:49.000-04:00
diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs
@@ -289,120 +289,6 @@ impl CursorArray for StringViewArray {
     }
 }
 
-/// Todo use arrow-rs side api after: <https://github.com/apache/arrow-rs/pull/7748> and <https://github.com/apache/arrow-rs/pull/7875> released
-/// Builds a 128-bit composite key for an inline value:
-///
-/// - High 96 bits: the inline data in big-endian byte order (for correct lexicographical sorting).
-/// - Low  32 bits: the length in big-endian byte order, acting as a tiebreaker so shorter strings
-///   (or those with fewer meaningful bytes) always numerically sort before longer ones.
-///
-/// This function extracts the length and the 12-byte inline string data from the raw
-/// little-endian `u128` representation, converts them to big-endian ordering, and packs them
-/// into a single `u128` value suitable for fast, branchless comparisons.
-///
-/// # Why include length?
-///
-/// A pure 96-bit content comparison can’t distinguish between two values whose inline bytes
-/// compare equal—either because one is a true prefix of the other or because zero-padding
-/// hides extra bytes. By tucking the 32-bit length into the lower bits, a single `u128` compare
-/// handles both content and length in one go.
-///
-/// Example: comparing "bar" (3 bytes) vs "bar\0" (4 bytes)
-///
-/// | String     | Bytes 0–4 (length LE) | Bytes 4–16 (data + padding)    |
-/// |------------|-----------------------|---------------------------------|
-/// | `"bar"`   | `03 00 00 00`         | `62 61 72` + 9 × `00`           |
-/// | `"bar\0"`| `04 00 00 00`         | `62 61 72 00` + 8 × `00`        |
-///
-/// Both inline parts become `62 61 72 00…00`, so they tie on content. The length field
-/// then differentiates:
-///
-/// ```text
-/// key("bar")   = 0x0000000000000000000062617200000003
-/// key("bar\0") = 0x0000000000000000000062617200000004
-/// ⇒ key("bar") < key("bar\0")
-/// ```
-/// # Inlining and Endianness
-///
-/// - We start by calling `.to_le_bytes()` on the `raw` `u128`, because Rust’s native in‑memory
-///   representation is little‑endian on x86/ARM.
-/// - We extract the low 32 bits numerically (`raw as u32`)—this step is endianness‑free.
-/// - We copy the 12 bytes of inline data (original order) into `buf[0..12]`.
-/// - We serialize `length` as big‑endian into `buf[12..16]`.
-/// - Finally, `u128::from_be_bytes(buf)` treats `buf[0]` as the most significant byte
-///   and `buf[15]` as the least significant, producing a `u128` whose integer value
-///   directly encodes “inline data then length” in big‑endian form.
-///
-/// This ensures that a simple `u128` comparison is equivalent to the desired
-/// lexicographical comparison of the inline bytes followed by length.
-#[inline(always)]
-pub fn inline_key_fast(raw: u128) -> u128 {
-    // 1. Decompose `raw` into little‑endian bytes:
-    //    - raw_bytes[0..4]  = length in LE
-    //    - raw_bytes[4..16] = inline string data
-    let raw_bytes = raw.to_le_bytes();
-
-    // 2. Numerically truncate to get the low 32‑bit length (endianness‑free).
-    let length = raw as u32;
-
-    // 3. Build a 16‑byte buffer in big‑endian order:
-    //    - buf[0..12]  = inline string bytes (in original order)
-    //    - buf[12..16] = length.to_be_bytes() (BE)
-    let mut buf = [0u8; 16];
-    buf[0..12].copy_from_slice(&raw_bytes[4..16]); // inline data
-
-    // Why convert length to big-endian for comparison?
-    //
-    // Rust (on most platforms) stores integers in little-endian format,
-    // meaning the least significant byte is at the lowest memory address.
-    // For example, an u32 value like 0x22345677 is stored in memory as:
-    //
-    //   [0x77, 0x56, 0x34, 0x22]  // little-endian layout
-    //    ^     ^     ^     ^
-    //  LSB   ↑↑↑           MSB
-    //
-    // This layout is efficient for arithmetic but *not* suitable for
-    // lexicographic (dictionary-style) comparison of byte arrays.
-    //
-    // To compare values by byte order—e.g., for sorted keys or binary trees—
-    // we must convert them to **big-endian**, where:
-    //
-    //   - The most significant byte (MSB) comes first (index 0)
-    //   - The least significant byte (LSB) comes last (index N-1)
-    //
-    // In big-endian, the same u32 = 0x22345677 would be represented as:
-    //
-    //   [0x22, 0x34, 0x56, 0x77]
-    //
-    // This ordering aligns with natural string/byte sorting, so calling
-    // `.to_be_bytes()` allows us to construct
-    // keys where standard numeric comparison (e.g., `<`, `>`) behaves
-    // like lexicographic byte comparison.
-    buf[12..16].copy_from_slice(&length.to_be_bytes()); // length in BE
-
-    // 4. Deserialize the buffer as a big‑endian u128:
-    //    buf[0] is MSB, buf[15] is LSB.
-    // Details:
-    // Note on endianness and layout:
-    //
-    // Although `buf[0]` is stored at the lowest memory address,
-    // calling `u128::from_be_bytes(buf)` interprets it as the **most significant byte (MSB)**,
-    // and `buf[15]` as the **least significant byte (LSB)**.
-    //
-    // This is the core principle of **big-endian decoding**:
-    //   - Byte at index 0 maps to bits 127..120 (highest)
-    //   - Byte at index 1 maps to bits 119..112
-    //   - ...
-    //   - Byte at index 15 maps to bits 7..0 (lowest)
-    //
-    // So even though memory layout goes from low to high (left to right),
-    // big-endian treats the **first byte** as highest in value.
-    //
-    // This guarantees that comparing two `u128` keys is equivalent to lexicographically
-    // comparing the original inline bytes, followed by length.
-    u128::from_be_bytes(buf)
-}
-
 impl CursorValues for StringViewArray {
     fn len(&self) -> usize {
         self.views().len()
@@ -460,7 +346,8 @@ impl CursorValues for StringViewArray {
         if l.data_buffers().is_empty() && r.data_buffers().is_empty() {
             let l_view = unsafe { l.views().get_unchecked(l_idx) };
             let r_view = unsafe { r.views().get_unchecked(r_idx) };
-            return inline_key_fast(*l_view).cmp(&inline_key_fast(*r_view));
+            return StringViewArray::inline_key_fast(*l_view)
+                .cmp(&StringViewArray::inline_key_fast(*r_view));
         }
 
         unsafe { GenericByteViewArray::compare_unchecked(l, l_idx, r, r_idx) }
@@ -555,7 +442,6 @@ impl<T: CursorValues> CursorValues for ArrayValues<T> {
 
 #[cfg(test)]
 mod tests {
-    use arrow::array::GenericBinaryArray;
     use datafusion_execution::memory_pool::{
         GreedyMemoryPool, MemoryConsumer, MemoryPool,
     };
@@ -720,100 +606,4 @@ mod tests {
         b.advance();
         assert_eq!(a.cmp(&b), Ordering::Less);
     }
-
-    /// Integration tests for `inline_key_fast` covering:
-    ///
-    /// 1. Monotonic ordering across increasing lengths and lexical variations.
-    /// 2. Cross-check against `GenericBinaryArray` comparison to ensure semantic equivalence.
-    ///
-    /// This also includes a specific test for the “bar” vs. “bar\0” case, demonstrating why
-    /// the length field is required even when all inline bytes fit in 12 bytes.
-    ///
-    /// The test includes strings that verify correct byte order (prevent reversal bugs),
-    /// and length-based tie-breaking in the composite key.
-    ///
-    /// The test confirms that `inline_key_fast` produces keys which sort consistently
-    /// with the expected lexicographical order of the raw byte arrays.
-    #[test]
-    fn test_inline_key_fast_various_lengths_and_lexical() {
-        /// Helper to create a raw u128 value representing an inline ByteView:
-        /// - `length`: number of meaningful bytes (must be ≤ 12)
-        /// - `data`: the actual inline data bytes
-        ///
-        /// The first 4 bytes encode length in little-endian,
-        /// the following 12 bytes contain the inline string data (unpadded).
-        fn make_raw_inline(length: u32, data: &[u8]) -> u128 {
-            assert!(length as usize <= 12, "Inline length must be ≤ 12");
-            assert!(
-                data.len() == length as usize,
-                "Data length must match `length`"
-            );
-
-            let mut raw_bytes = [0u8; 16];
-            raw_bytes[0..4].copy_from_slice(&length.to_le_bytes()); // length stored little-endian
-            raw_bytes[4..(4 + data.len())].copy_from_slice(data); // inline data
-            u128::from_le_bytes(raw_bytes)
-        }
-
-        // Test inputs: various lengths and lexical orders,
-        // plus special cases for byte order and length tie-breaking
-        let test_inputs: Vec<&[u8]> = vec![
-            b"a",
-            b"aa",
-            b"aaa",
-            b"aab",
-            b"abcd",
-            b"abcde",
-            b"abcdef",
-            b"abcdefg",
-            b"abcdefgh",
-            b"abcdefghi",
-            b"abcdefghij",
-            b"abcdefghijk",
-            b"abcdefghijkl",
-            // Tests for byte-order reversal bug:
-            // Without the fix, "backend one" would compare as "eno dnekcab",
-            // causing incorrect sort order relative to "backend two".
-            b"backend one",
-            b"backend two",
-            // Tests length-tiebreaker logic:
-            // "bar" (3 bytes) and "bar\0" (4 bytes) have identical inline data,
-            // so only the length differentiates their ordering.
-            b"bar",
-            b"bar\0",
-            // Additional lexical and length tie-breaking cases with same prefix, in correct lex order:
-            b"than12Byt",
-            b"than12Bytes",
-            b"than12Bytes\0",
-            b"than12Bytesx",
-            b"than12Bytex",
-            b"than12Bytez",
-            // Additional lexical tests
-            b"xyy",
-            b"xyz",
-            b"xza",
-        ];
-
-        // Create a GenericBinaryArray for cross-comparison of lex order
-        let array: GenericBinaryArray<i32> = GenericBinaryArray::from(
-            test_inputs.iter().map(|s| Some(*s)).collect::<Vec<_>>(),
-        );
-
-        for i in 0..array.len() - 1 {
-            let v1 = array.value(i);
-            let v2 = array.value(i + 1);
-
-            // Assert the array's natural lexical ordering is correct
-            assert!(v1 < v2, "Array compare failed: {v1:?} !< {v2:?}");
-
-            // Assert the keys produced by inline_key_fast reflect the same ordering
-            let key1 = inline_key_fast(make_raw_inline(v1.len() as u32, v1));
-            let key2 = inline_key_fast(make_raw_inline(v2.len() as u32, v2));
-
-            assert!(
-                key1 < key2,
-                "Key compare failed: key({v1:?})=0x{key1:032x} !< key({v2:?})=0x{key2:032x}",
-            );
-        }
-    }
 }