@@ -23,10 +23,11 @@ use arrow::array::{
23
23
TimestampSecondArray ,
24
24
} ;
25
25
use arrow:: datatypes:: {
26
- i256, BinaryType , BinaryViewType , Date32Type , Date64Type , Decimal128Type , Decimal256Type ,
27
- Decimal32Type , Decimal64Type , Float16Type , Float32Type , Float64Type , Int16Type , Int32Type ,
28
- Int64Type , Int8Type , LargeBinaryType , Time32MillisecondType , Time32SecondType ,
29
- Time64MicrosecondType , Time64NanosecondType , UInt16Type , UInt32Type , UInt64Type , UInt8Type ,
26
+ i256, ArrowNativeType , BinaryType , BinaryViewType , Date32Type , Date64Type , Decimal128Type ,
27
+ Decimal256Type , Decimal32Type , Decimal64Type , Float16Type , Float32Type , Float64Type , Int16Type ,
28
+ Int32Type , Int64Type , Int8Type , LargeBinaryType , RunEndIndexType , Time32MillisecondType ,
29
+ Time32SecondType , Time64MicrosecondType , Time64NanosecondType , UInt16Type , UInt32Type ,
30
+ UInt64Type , UInt8Type ,
30
31
} ;
31
32
use arrow:: temporal_conversions:: {
32
33
timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_s_to_datetime,
@@ -502,6 +503,17 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
502
503
builder
503
504
) ;
504
505
}
506
+ DataType :: RunEndEncoded ( run_ends, _) => match run_ends. data_type ( ) {
507
+ DataType :: Int16 => process_run_end_encoded :: < Int16Type > ( input, & mut builder) ?,
508
+ DataType :: Int32 => process_run_end_encoded :: < Int32Type > ( input, & mut builder) ?,
509
+ DataType :: Int64 => process_run_end_encoded :: < Int64Type > ( input, & mut builder) ?,
510
+ _ => {
511
+ return Err ( ArrowError :: CastError ( format ! (
512
+ "Unsupported run ends type: {:?}" ,
513
+ run_ends. data_type( )
514
+ ) ) ) ;
515
+ }
516
+ } ,
505
517
DataType :: Dictionary ( _, _) => {
506
518
let dict_array = input. as_any_dictionary ( ) ;
507
519
let values_variant_array = cast_to_variant ( dict_array. values ( ) . as_ref ( ) ) ?;
@@ -532,6 +544,41 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
532
544
Ok ( builder. build ( ) )
533
545
}
534
546
547
+ /// Generic function to process run-end encoded arrays
548
+ fn process_run_end_encoded < R : RunEndIndexType > (
549
+ input : & dyn Array ,
550
+ builder : & mut VariantArrayBuilder ,
551
+ ) -> Result < ( ) , ArrowError > {
552
+ let run_array = input. as_run :: < R > ( ) ;
553
+ let values_variant_array = cast_to_variant ( run_array. values ( ) . as_ref ( ) ) ?;
554
+
555
+ // Process runs in batches for better performance
556
+ let run_ends = run_array. run_ends ( ) . values ( ) ;
557
+ let mut logical_start = 0 ;
558
+
559
+ for ( physical_idx, & run_end) in run_ends. iter ( ) . enumerate ( ) {
560
+ let logical_end = run_end. as_usize ( ) ;
561
+ let run_length = logical_end - logical_start;
562
+
563
+ if values_variant_array. is_null ( physical_idx) {
564
+ // Append nulls for the entire run
565
+ for _ in 0 ..run_length {
566
+ builder. append_null ( ) ;
567
+ }
568
+ } else {
569
+ // Get the value once and append it for the entire run
570
+ let value = values_variant_array. value ( physical_idx) ;
571
+ for _ in 0 ..run_length {
572
+ builder. append_variant ( value. clone ( ) ) ;
573
+ }
574
+ }
575
+
576
+ logical_start = logical_end;
577
+ }
578
+
579
+ Ok ( ( ) )
580
+ }
581
+
535
582
// TODO do we need a cast_with_options to allow specifying conversion behavior,
536
583
// e.g. how to handle overflows, whether to convert to Variant::Null or return
537
584
// an error, etc. ?
@@ -544,9 +591,9 @@ mod tests {
544
591
Decimal256Array , Decimal32Array , Decimal64Array , DictionaryArray , FixedSizeBinaryBuilder ,
545
592
Float16Array , Float32Array , Float64Array , GenericByteBuilder , GenericByteViewBuilder ,
546
593
Int16Array , Int32Array , Int64Array , Int8Array , IntervalYearMonthArray , LargeStringArray ,
547
- NullArray , StringArray , StringViewArray , StructArray , Time32MillisecondArray ,
548
- Time32SecondArray , Time64MicrosecondArray , Time64NanosecondArray , UInt16Array , UInt32Array ,
549
- UInt64Array , UInt8Array ,
594
+ NullArray , StringArray , StringRunBuilder , StringViewArray , StructArray ,
595
+ Time32MillisecondArray , Time32SecondArray , Time64MicrosecondArray , Time64NanosecondArray ,
596
+ UInt16Array , UInt32Array , UInt64Array , UInt8Array ,
550
597
} ;
551
598
use arrow:: buffer:: NullBuffer ;
552
599
use arrow_schema:: { Field , Fields } ;
@@ -1847,6 +1894,58 @@ mod tests {
1847
1894
) ;
1848
1895
}
1849
1896
1897
+ #[ test]
1898
+ fn test_cast_to_variant_run_end_encoded ( ) {
1899
+ let mut builder = StringRunBuilder :: < Int32Type > :: new ( ) ;
1900
+ builder. append_value ( "apple" ) ;
1901
+ builder. append_value ( "apple" ) ;
1902
+ builder. append_value ( "banana" ) ;
1903
+ builder. append_value ( "banana" ) ;
1904
+ builder. append_value ( "banana" ) ;
1905
+ builder. append_value ( "cherry" ) ;
1906
+ let run_array = builder. finish ( ) ;
1907
+
1908
+ run_test (
1909
+ Arc :: new ( run_array) ,
1910
+ vec ! [
1911
+ Some ( Variant :: from( "apple" ) ) ,
1912
+ Some ( Variant :: from( "apple" ) ) ,
1913
+ Some ( Variant :: from( "banana" ) ) ,
1914
+ Some ( Variant :: from( "banana" ) ) ,
1915
+ Some ( Variant :: from( "banana" ) ) ,
1916
+ Some ( Variant :: from( "cherry" ) ) ,
1917
+ ] ,
1918
+ ) ;
1919
+ }
1920
+
1921
+ #[ test]
1922
+ fn test_cast_to_variant_run_end_encoded_with_nulls ( ) {
1923
+ use arrow:: array:: StringRunBuilder ;
1924
+ use arrow:: datatypes:: Int32Type ;
1925
+
1926
+ // Test run-end encoded array with nulls
1927
+ let mut builder = StringRunBuilder :: < Int32Type > :: new ( ) ;
1928
+ builder. append_value ( "apple" ) ;
1929
+ builder. append_null ( ) ;
1930
+ builder. append_value ( "banana" ) ;
1931
+ builder. append_value ( "banana" ) ;
1932
+ builder. append_null ( ) ;
1933
+ builder. append_null ( ) ;
1934
+ let run_array = builder. finish ( ) ;
1935
+
1936
+ run_test (
1937
+ Arc :: new ( run_array) ,
1938
+ vec ! [
1939
+ Some ( Variant :: from( "apple" ) ) ,
1940
+ None ,
1941
+ Some ( Variant :: from( "banana" ) ) ,
1942
+ Some ( Variant :: from( "banana" ) ) ,
1943
+ None ,
1944
+ None ,
1945
+ ] ,
1946
+ ) ;
1947
+ }
1948
+
1850
1949
#[ test]
1851
1950
fn test_cast_to_variant_dictionary ( ) {
1852
1951
let values = StringArray :: from ( vec ! [ "apple" , "banana" , "cherry" , "date" ] ) ;
0 commit comments