Skip to content

Commit 0ff6084

Browse files
committed
add test case to check length of values buffer
1 parent 6b9e69a commit 0ff6084

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed

arrow-row/src/lib.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3166,4 +3166,43 @@ mod tests {
31663166
Ok(_) => panic!("Expected NotYetImplemented error for map data type"),
31673167
}
31683168
}
3169+
3170+
#[test]
3171+
fn test_values_buffer_smaller_when_utf8_validation_disabled() {
3172+
// StringViewArray with inline strings
3173+
let col = Arc::new(StringViewArray::from_iter([
3174+
Some("hello"), // short(5)
3175+
None, // null
3176+
Some("short"), // short(5)
3177+
Some("tiny"), // short(4)
3178+
])) as ArrayRef;
3179+
3180+
// 1. Convert cols into rows
3181+
let converter = RowConverter::new(vec![SortField::new(DataType::Utf8View)]).unwrap();
3182+
let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
3183+
3184+
// 2a. Convert rows into colsa (validate_utf8 = false)
3185+
let converted_without_utf8_validation = converter.convert_rows(&rows).unwrap();
3186+
3187+
// 2b. Convert rows into cols (validate_utf8 = true since Row is initialized through RowParser)
3188+
let rows = rows.try_into_binary().expect("reasonable size");
3189+
let parser = converter.parser();
3190+
let converted_with_utf8_validation = converter
3191+
.convert_rows(rows.iter().map(|b| parser.parse(b.expect("valid bytes"))))
3192+
.unwrap();
3193+
3194+
assert!(converted_without_utf8_validation.len() == 1);
3195+
assert!(converted_with_utf8_validation.len() == 1);
3196+
3197+
let array_1 = &converted_without_utf8_validation[0];
3198+
let array_2 = &converted_with_utf8_validation[0];
3199+
3200+
let values_buffer_1 = &array_1.as_string_view().data_buffers()[0];
3201+
let values_buffer_2 = &array_2.as_string_view().data_buffers()[0];
3202+
3203+
// Since there are no long (>12) strings, len of values buffer is 0
3204+
assert_eq!(values_buffer_1.len(), 0);
3205+
// When utf8 validation enabled, values buffer includes inline strings (5+5+4)
3206+
assert_eq!(values_buffer_2.len(), 14);
3207+
}
31693208
}

0 commit comments

Comments
 (0)