@@ -3166,4 +3166,43 @@ mod tests {
3166
3166
Ok ( _) => panic ! ( "Expected NotYetImplemented error for map data type" ) ,
3167
3167
}
3168
3168
}
3169
+
3170
+ #[ test]
3171
+ fn test_values_buffer_smaller_when_utf8_validation_disabled ( ) {
3172
+ // StringViewArray with inline strings
3173
+ let col = Arc :: new ( StringViewArray :: from_iter ( [
3174
+ Some ( "hello" ) , // short(5)
3175
+ None , // null
3176
+ Some ( "short" ) , // short(5)
3177
+ Some ( "tiny" ) , // short(4)
3178
+ ] ) ) as ArrayRef ;
3179
+
3180
+ // 1. Convert cols into rows
3181
+ let converter = RowConverter :: new ( vec ! [ SortField :: new( DataType :: Utf8View ) ] ) . unwrap ( ) ;
3182
+ let rows = converter. convert_columns ( & [ Arc :: clone ( & col) ] ) . unwrap ( ) ;
3183
+
3184
+ // 2a. Convert rows into colsa (validate_utf8 = false)
3185
+ let converted_without_utf8_validation = converter. convert_rows ( & rows) . unwrap ( ) ;
3186
+
3187
+ // 2b. Convert rows into cols (validate_utf8 = true since Row is initialized through RowParser)
3188
+ let rows = rows. try_into_binary ( ) . expect ( "reasonable size" ) ;
3189
+ let parser = converter. parser ( ) ;
3190
+ let converted_with_utf8_validation = converter
3191
+ . convert_rows ( rows. iter ( ) . map ( |b| parser. parse ( b. expect ( "valid bytes" ) ) ) )
3192
+ . unwrap ( ) ;
3193
+
3194
+ assert ! ( converted_without_utf8_validation. len( ) == 1 ) ;
3195
+ assert ! ( converted_with_utf8_validation. len( ) == 1 ) ;
3196
+
3197
+ let array_1 = & converted_without_utf8_validation[ 0 ] ;
3198
+ let array_2 = & converted_with_utf8_validation[ 0 ] ;
3199
+
3200
+ let values_buffer_1 = & array_1. as_string_view ( ) . data_buffers ( ) [ 0 ] ;
3201
+ let values_buffer_2 = & array_2. as_string_view ( ) . data_buffers ( ) [ 0 ] ;
3202
+
3203
+ // Since there are no long (>12) strings, len of values buffer is 0
3204
+ assert_eq ! ( values_buffer_1. len( ) , 0 ) ;
3205
+ // When utf8 validation enabled, values buffer includes inline strings (5+5+4)
3206
+ assert_eq ! ( values_buffer_2. len( ) , 14 ) ;
3207
+ }
3169
3208
}
0 commit comments