Skip to content

Commit 1f77ac5

Browse files
klion26alamb
andauthored
[Variant] Support Variant to PrimitiveArrow for unsigned integer (#8369)
# Which issue does this PR close? - Closes #8368 . # Rationale for this change - Add support for variant to arrow primitive about unsigned integers - Add tests for signed & unsigned integers for variant to arrow primitive # Are these changes tested? Covered by added unit tests # Are there any user-facing changes? No If there are any breaking changes to public APIs, please call them out. Co-authored-by: Andrew Lamb <[email protected]>
1 parent d6f40ce commit 1f77ac5

File tree

3 files changed

+118
-23
lines changed

3 files changed

+118
-23
lines changed

parquet-variant-compute/src/type_conversion.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,30 @@ impl VariantAsPrimitive<datatypes::Float64Type> for Variant<'_, '_> {
7474
}
7575
}
7676

77+
impl VariantAsPrimitive<datatypes::UInt8Type> for Variant<'_, '_> {
78+
fn as_primitive(&self) -> Option<u8> {
79+
self.as_u8()
80+
}
81+
}
82+
83+
impl VariantAsPrimitive<datatypes::UInt16Type> for Variant<'_, '_> {
84+
fn as_primitive(&self) -> Option<u16> {
85+
self.as_u16()
86+
}
87+
}
88+
89+
impl VariantAsPrimitive<datatypes::UInt32Type> for Variant<'_, '_> {
90+
fn as_primitive(&self) -> Option<u32> {
91+
self.as_u32()
92+
}
93+
}
94+
95+
impl VariantAsPrimitive<datatypes::UInt64Type> for Variant<'_, '_> {
96+
fn as_primitive(&self) -> Option<u64> {
97+
self.as_u64()
98+
}
99+
}
100+
77101
/// Convert the value at a specific index in the given array into a `Variant`.
78102
macro_rules! non_generic_conversion_single_value {
79103
($array:expr, $cast_fn:expr, $index:expr) => {{

parquet-variant-compute/src/variant_get.rs

Lines changed: 62 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ mod test {
304304
};
305305
use arrow::buffer::NullBuffer;
306306
use arrow::compute::CastOptions;
307+
use arrow::datatypes::DataType::{Int16, Int32, Int64, UInt16, UInt32, UInt64, UInt8};
307308
use arrow_schema::{DataType, Field, FieldRef, Fields};
308309
use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES};
309310

@@ -661,19 +662,6 @@ mod test {
661662
numeric_perfectly_shredded_test!(f64, perfectly_shredded_float64_variant_array);
662663
}
663664

664-
/// Shredding: Extract the typed value as Int32Array
665-
#[test]
666-
fn get_variant_perfectly_shredded_int32_as_int32() {
667-
// Extract the typed value as Int32Array
668-
let array = perfectly_shredded_int32_variant_array();
669-
// specify we want the typed value as Int32
670-
let field = Field::new("typed_value", DataType::Int32, true);
671-
let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
672-
let result = variant_get(&array, options).unwrap();
673-
let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]));
674-
assert_eq!(&result, &expected)
675-
}
676-
677665
/// AllNull: extract a value as a VariantArray
678666
#[test]
679667
fn get_variant_all_null_as_variant() {
@@ -708,18 +696,69 @@ mod test {
708696
assert_eq!(&result, &expected)
709697
}
710698

711-
#[test]
712-
fn get_variant_perfectly_shredded_int16_as_int16() {
713-
// Extract the typed value as Int16Array
714-
let array = perfectly_shredded_int16_variant_array();
715-
// specify we want the typed value as Int16
716-
let field = Field::new("typed_value", DataType::Int16, true);
717-
let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
718-
let result = variant_get(&array, options).unwrap();
719-
let expected: ArrayRef = Arc::new(Int16Array::from(vec![Some(1), Some(2), Some(3)]));
720-
assert_eq!(&result, &expected)
699+
macro_rules! perfectly_shredded_to_arrow_primitive_test {
700+
($name:ident, $primitive_type:ident, $perfectly_shredded_array_gen_fun:ident, $expected_array:expr) => {
701+
#[test]
702+
fn $name() {
703+
let array = $perfectly_shredded_array_gen_fun();
704+
let field = Field::new("typed_value", $primitive_type, true);
705+
let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
706+
let result = variant_get(&array, options).unwrap();
707+
let expected_array: ArrayRef = Arc::new($expected_array);
708+
assert_eq!(&result, &expected_array);
709+
}
710+
};
721711
}
722712

713+
perfectly_shredded_to_arrow_primitive_test!(
714+
get_variant_perfectly_shredded_int16_as_int16,
715+
Int16,
716+
perfectly_shredded_int16_variant_array,
717+
Int16Array::from(vec![Some(1), Some(2), Some(3)])
718+
);
719+
720+
perfectly_shredded_to_arrow_primitive_test!(
721+
get_variant_perfectly_shredded_int32_as_int32,
722+
Int32,
723+
perfectly_shredded_int32_variant_array,
724+
Int32Array::from(vec![Some(1), Some(2), Some(3)])
725+
);
726+
727+
perfectly_shredded_to_arrow_primitive_test!(
728+
get_variant_perfectly_shredded_int64_as_int64,
729+
Int64,
730+
perfectly_shredded_int64_variant_array,
731+
Int64Array::from(vec![Some(1), Some(2), Some(3)])
732+
);
733+
734+
perfectly_shredded_to_arrow_primitive_test!(
735+
get_variant_perfectly_shredded_uint8_as_int8,
736+
UInt8,
737+
perfectly_shredded_uint8_variant_array,
738+
UInt8Array::from(vec![Some(1), Some(2), Some(3)])
739+
);
740+
741+
perfectly_shredded_to_arrow_primitive_test!(
742+
get_variant_perfectly_shredded_uint16_as_uint16,
743+
UInt16,
744+
perfectly_shredded_uint16_variant_array,
745+
UInt16Array::from(vec![Some(1), Some(2), Some(3)])
746+
);
747+
748+
perfectly_shredded_to_arrow_primitive_test!(
749+
get_variant_perfectly_shredded_uint32_as_uint32,
750+
UInt32,
751+
perfectly_shredded_uint32_variant_array,
752+
UInt32Array::from(vec![Some(1), Some(2), Some(3)])
753+
);
754+
755+
perfectly_shredded_to_arrow_primitive_test!(
756+
get_variant_perfectly_shredded_uint64_as_uint64,
757+
UInt64,
758+
perfectly_shredded_uint64_variant_array,
759+
UInt64Array::from(vec![Some(1), Some(2), Some(3)])
760+
);
761+
723762
/// Return a VariantArray that represents a perfectly "shredded" variant
724763
/// for the given typed value.
725764
///

parquet-variant-compute/src/variant_to_arrow.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ pub(crate) enum VariantToArrowRowBuilder<'a> {
3939
Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
4040
Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
4141
Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
42+
UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
43+
UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
44+
UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
45+
UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
4246
BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
4347

4448
// Path extraction wrapper - contains a boxed enum for any of the above
@@ -53,6 +57,10 @@ impl<'a> VariantToArrowRowBuilder<'a> {
5357
Int16(b) => b.append_null(),
5458
Int32(b) => b.append_null(),
5559
Int64(b) => b.append_null(),
60+
UInt8(b) => b.append_null(),
61+
UInt16(b) => b.append_null(),
62+
UInt32(b) => b.append_null(),
63+
UInt64(b) => b.append_null(),
5664
Float16(b) => b.append_null(),
5765
Float32(b) => b.append_null(),
5866
Float64(b) => b.append_null(),
@@ -68,6 +76,10 @@ impl<'a> VariantToArrowRowBuilder<'a> {
6876
Int16(b) => b.append_value(value),
6977
Int32(b) => b.append_value(value),
7078
Int64(b) => b.append_value(value),
79+
UInt8(b) => b.append_value(value),
80+
UInt16(b) => b.append_value(value),
81+
UInt32(b) => b.append_value(value),
82+
UInt64(b) => b.append_value(value),
7183
Float16(b) => b.append_value(value),
7284
Float32(b) => b.append_value(value),
7385
Float64(b) => b.append_value(value),
@@ -83,6 +95,10 @@ impl<'a> VariantToArrowRowBuilder<'a> {
8395
Int16(b) => b.finish(),
8496
Int32(b) => b.finish(),
8597
Int64(b) => b.finish(),
98+
UInt8(b) => b.finish(),
99+
UInt16(b) => b.finish(),
100+
UInt32(b) => b.finish(),
101+
UInt64(b) => b.finish(),
86102
Float16(b) => b.finish(),
87103
Float32(b) => b.finish(),
88104
Float64(b) => b.finish(),
@@ -132,6 +148,22 @@ pub(crate) fn make_variant_to_arrow_row_builder<'a>(
132148
cast_options,
133149
capacity,
134150
)),
151+
Some(DataType::UInt8) => UInt8(VariantToPrimitiveArrowRowBuilder::new(
152+
cast_options,
153+
capacity,
154+
)),
155+
Some(DataType::UInt16) => UInt16(VariantToPrimitiveArrowRowBuilder::new(
156+
cast_options,
157+
capacity,
158+
)),
159+
Some(DataType::UInt32) => UInt32(VariantToPrimitiveArrowRowBuilder::new(
160+
cast_options,
161+
capacity,
162+
)),
163+
Some(DataType::UInt64) => UInt64(VariantToPrimitiveArrowRowBuilder::new(
164+
cast_options,
165+
capacity,
166+
)),
135167
_ => {
136168
return Err(ArrowError::NotYetImplemented(format!(
137169
"variant_get with path={:?} and data_type={:?} not yet implemented",

0 commit comments

Comments
 (0)