Skip to content

Commit 4009514

Browse files
Weijun-Halamb
andauthored
[Variant] feat: add support for casting MapArray to VariantArray (#8177)
# Which issue does this PR close? - Closes #8063 # Rationale for this change Maps are now cast to `Variant::Object`s # What changes are included in this PR? # Are these changes tested? Yes # Are there any user-facing changes? --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent cc1dc6c commit 4009514

File tree

1 file changed

+143
-5
lines changed

1 file changed

+143
-5
lines changed

parquet-variant-compute/src/cast_to_variant.rs

Lines changed: 143 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use arrow::array::{
2323
TimestampSecondArray,
2424
};
2525
use arrow::buffer::{OffsetBuffer, ScalarBuffer};
26+
use arrow::compute::kernels::cast;
2627
use arrow::datatypes::{
2728
i256, ArrowNativeType, BinaryType, BinaryViewType, Date32Type, Date64Type, Decimal128Type,
2829
Decimal256Type, Decimal32Type, Decimal64Type, Float16Type, Float32Type, Float64Type, Int16Type,
@@ -535,6 +536,46 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
535536
builder.append_variant(value);
536537
}
537538
}
539+
540+
DataType::Map(field, _) => match field.data_type() {
541+
DataType::Struct(_) => {
542+
let map_array = input.as_map();
543+
let keys = cast(map_array.keys(), &DataType::Utf8)?;
544+
let key_strings = keys.as_string::<i32>();
545+
let values = cast_to_variant(map_array.values())?;
546+
let offsets = map_array.offsets();
547+
548+
let mut start_offset = offsets[0];
549+
for end_offset in offsets.iter().skip(1) {
550+
if start_offset >= *end_offset {
551+
builder.append_null();
552+
continue;
553+
}
554+
555+
let length = (end_offset - start_offset) as usize;
556+
557+
let mut variant_builder = VariantBuilder::new();
558+
let mut object_builder = variant_builder.new_object();
559+
560+
for i in start_offset..*end_offset {
561+
let value = values.value(i as usize);
562+
object_builder.insert(key_strings.value(i as usize), value);
563+
}
564+
object_builder.finish()?;
565+
let (metadata, value) = variant_builder.finish();
566+
let variant = Variant::try_new(&metadata, &value)?;
567+
568+
builder.append_variant(variant);
569+
570+
start_offset += length as i32;
571+
}
572+
}
573+
_ => {
574+
return Err(ArrowError::CastError(format!(
575+
"Unsupported map field type for casting to Variant: {field:?}",
576+
)));
577+
}
578+
},
538579
DataType::List(_) => {
539580
let list_array = input.as_list::<i32>();
540581
let values = list_array.values();
@@ -575,7 +616,6 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
575616
builder.append_variant(Variant::List(variant_list.clone()))
576617
}
577618
}
578-
579619
DataType::LargeList(_) => {
580620
let large_list_array = input.as_list::<i64>();
581621
let values = large_list_array.values();
@@ -673,11 +713,12 @@ mod tests {
673713
Decimal256Array, Decimal32Array, Decimal64Array, DictionaryArray, FixedSizeBinaryBuilder,
674714
Float16Array, Float32Array, Float64Array, GenericByteBuilder, GenericByteViewBuilder,
675715
Int16Array, Int32Array, Int64Array, Int8Array, IntervalYearMonthArray, LargeListArray,
676-
LargeStringArray, ListArray, NullArray, StringArray, StringRunBuilder, StringViewArray,
677-
StructArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
678-
Time64NanosecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
716+
LargeStringArray, ListArray, MapArray, NullArray, StringArray, StringRunBuilder,
717+
StringViewArray, StructArray, Time32MillisecondArray, Time32SecondArray,
718+
Time64MicrosecondArray, Time64NanosecondArray, UInt16Array, UInt32Array, UInt64Array,
719+
UInt8Array,
679720
};
680-
use arrow::buffer::NullBuffer;
721+
use arrow::buffer::{NullBuffer, OffsetBuffer};
681722
use arrow_schema::{Field, Fields};
682723
use arrow_schema::{
683724
DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
@@ -2065,6 +2106,103 @@ mod tests {
20652106
);
20662107
}
20672108

2109+
#[test]
2110+
fn test_cast_map_to_variant_object() {
2111+
let keys = vec!["key1", "key2", "key3"];
2112+
let values_data = Int32Array::from(vec![1, 2, 3]);
2113+
let entry_offsets = vec![0, 1, 3];
2114+
let map_array =
2115+
MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
2116+
.unwrap();
2117+
2118+
let result = cast_to_variant(&map_array).unwrap();
2119+
// [{"key1":1}]
2120+
let variant1 = result.value(0);
2121+
assert_eq!(
2122+
variant1.as_object().unwrap().get("key1").unwrap(),
2123+
Variant::from(1)
2124+
);
2125+
2126+
// [{"key2":2},{"key3":3}]
2127+
let variant2 = result.value(1);
2128+
assert_eq!(
2129+
variant2.as_object().unwrap().get("key2").unwrap(),
2130+
Variant::from(2)
2131+
);
2132+
assert_eq!(
2133+
variant2.as_object().unwrap().get("key3").unwrap(),
2134+
Variant::from(3)
2135+
);
2136+
}
2137+
2138+
#[test]
2139+
fn test_cast_map_to_variant_object_with_nulls() {
2140+
let keys = vec!["key1", "key2", "key3"];
2141+
let values_data = Int32Array::from(vec![1, 2, 3]);
2142+
let entry_offsets = vec![0, 1, 1, 3];
2143+
let map_array =
2144+
MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
2145+
.unwrap();
2146+
2147+
let result = cast_to_variant(&map_array).unwrap();
2148+
// [{"key1":1}]
2149+
let variant1 = result.value(0);
2150+
assert_eq!(
2151+
variant1.as_object().unwrap().get("key1").unwrap(),
2152+
Variant::from(1)
2153+
);
2154+
2155+
// None
2156+
assert!(result.is_null(1));
2157+
2158+
// [{"key2":2},{"key3":3}]
2159+
let variant2 = result.value(2);
2160+
assert_eq!(
2161+
variant2.as_object().unwrap().get("key2").unwrap(),
2162+
Variant::from(2)
2163+
);
2164+
assert_eq!(
2165+
variant2.as_object().unwrap().get("key3").unwrap(),
2166+
Variant::from(3)
2167+
);
2168+
}
2169+
2170+
#[test]
2171+
fn test_cast_map_with_non_string_keys_to_variant_object() {
2172+
let offsets = OffsetBuffer::new(vec![0, 1, 3].into());
2173+
let fields = Fields::from(vec![
2174+
Field::new("key", DataType::Int32, false),
2175+
Field::new("values", DataType::Int32, false),
2176+
]);
2177+
let columns = vec![
2178+
Arc::new(Int32Array::from(vec![1, 2, 3])) as _,
2179+
Arc::new(Int32Array::from(vec![1, 2, 3])) as _,
2180+
];
2181+
2182+
let entries = StructArray::new(fields.clone(), columns, None);
2183+
let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
2184+
2185+
let map_array = MapArray::new(field.clone(), offsets.clone(), entries.clone(), None, false);
2186+
2187+
let result = cast_to_variant(&map_array).unwrap();
2188+
2189+
let variant1 = result.value(0);
2190+
assert_eq!(
2191+
variant1.as_object().unwrap().get("1").unwrap(),
2192+
Variant::from(1)
2193+
);
2194+
2195+
let variant2 = result.value(1);
2196+
assert_eq!(
2197+
variant2.as_object().unwrap().get("2").unwrap(),
2198+
Variant::from(2)
2199+
);
2200+
assert_eq!(
2201+
variant2.as_object().unwrap().get("3").unwrap(),
2202+
Variant::from(3)
2203+
);
2204+
}
2205+
20682206
#[test]
20692207
fn test_cast_to_variant_list() {
20702208
// List Array

0 commit comments

Comments
 (0)