Skip to content

Commit 56649bf

Browse files
committed
[Variant] Support primitive variant to arrow row for boolean
1 parent e06f0ee commit 56649bf

File tree

2 files changed

+115
-19
lines changed

2 files changed

+115
-19
lines changed

parquet-variant-compute/src/variant_get.rs

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ mod test {
307307
use arrow::buffer::NullBuffer;
308308
use arrow::compute::CastOptions;
309309
use arrow::datatypes::DataType::{Int16, Int32, Int64};
310+
use arrow_schema::DataType::{Boolean, Float32, Float64, Int8};
310311
use arrow_schema::{DataType, Field, FieldRef, Fields};
311312
use chrono::DateTime;
312313
use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES};
@@ -711,6 +712,13 @@ mod test {
711712
};
712713
}
713714

715+
perfectly_shredded_to_arrow_primitive_test!(
716+
get_variant_perfectly_shredded_int18_as_int8,
717+
Int8,
718+
perfectly_shredded_int8_variant_array,
719+
Int8Array::from(vec![Some(1), Some(2), Some(3)])
720+
);
721+
714722
perfectly_shredded_to_arrow_primitive_test!(
715723
get_variant_perfectly_shredded_int16_as_int16,
716724
Int16,
@@ -732,31 +740,37 @@ mod test {
732740
Int64Array::from(vec![Some(1), Some(2), Some(3)])
733741
);
734742

735-
/// Return a VariantArray that represents a perfectly "shredded" variant
736-
/// for the given typed value.
737-
///
738-
/// The schema of the corresponding `StructArray` would look like this:
739-
///
740-
/// ```text
741-
/// StructArray {
742-
/// metadata: BinaryViewArray,
743-
/// typed_value: Int32Array,
744-
/// }
745-
/// ```
746-
macro_rules! numeric_perfectly_shredded_variant_array_fn {
747-
($func:ident, $array_type:ident, $primitive_type:ty) => {
743+
perfectly_shredded_to_arrow_primitive_test!(
744+
get_variant_perfectly_shredded_float32_as_float32,
745+
Float32,
746+
perfectly_shredded_float32_variant_array,
747+
Float32Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
748+
);
749+
750+
perfectly_shredded_to_arrow_primitive_test!(
751+
get_variant_perfectly_shredded_float64_as_float64,
752+
Float64,
753+
perfectly_shredded_float64_variant_array,
754+
Float64Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
755+
);
756+
757+
perfectly_shredded_to_arrow_primitive_test!(
758+
get_variant_perfectly_shredded_boolean_as_boolean,
759+
Boolean,
760+
perfectly_shredded_bool_variant_array,
761+
BooleanArray::from(vec![Some(true), Some(false), Some(true)])
762+
);
763+
764+
macro_rules! perfectly_shredded_variant_array_fn {
765+
($func:ident, $typed_value_gen:expr) => {
748766
fn $func() -> ArrayRef {
749767
// At the time of writing, the `VariantArrayBuilder` does not support shredding.
750768
// so we must construct the array manually. see https://github.com/apache/arrow-rs/issues/7895
751769
let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
752770
EMPTY_VARIANT_METADATA_BYTES,
753771
3,
754772
));
755-
let typed_value = $array_type::from(vec![
756-
Some(<$primitive_type>::try_from(1u8).unwrap()),
757-
Some(<$primitive_type>::try_from(2u8).unwrap()),
758-
Some(<$primitive_type>::try_from(3u8).unwrap()),
759-
]);
773+
let typed_value = $typed_value_gen();
760774

761775
let struct_array = StructArrayBuilder::new()
762776
.with_field("metadata", Arc::new(metadata), false)
@@ -770,6 +784,33 @@ mod test {
770784
};
771785
}
772786

787+
perfectly_shredded_variant_array_fn!(perfectly_shredded_bool_variant_array, || {
788+
BooleanArray::from(vec![Some(true), Some(false), Some(true)])
789+
});
790+
791+
/// Return a VariantArray that represents a perfectly "shredded" variant
792+
/// for the given typed value.
793+
///
794+
/// The schema of the corresponding `StructArray` would look like this:
795+
///
796+
/// ```text
797+
/// StructArray {
798+
/// metadata: BinaryViewArray,
799+
/// typed_value: Int32Array,
800+
/// }
801+
/// ```
802+
macro_rules! numeric_perfectly_shredded_variant_array_fn {
803+
($func:ident, $array_type:ident, $primitive_type:ty) => {
804+
perfectly_shredded_variant_array_fn!($func, || {
805+
$array_type::from(vec![
806+
Some(<$primitive_type>::try_from(1u8).unwrap()),
807+
Some(<$primitive_type>::try_from(2u8).unwrap()),
808+
Some(<$primitive_type>::try_from(3u8).unwrap()),
809+
])
810+
});
811+
};
812+
}
813+
773814
numeric_perfectly_shredded_variant_array_fn!(
774815
perfectly_shredded_int8_variant_array,
775816
Int8Array,

parquet-variant-compute/src/variant_to_arrow.rs

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder};
18+
use arrow::array::{
19+
Array, ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveArray, PrimitiveBuilder,
20+
};
1921
use arrow::compute::CastOptions;
2022
use arrow::datatypes::{self, ArrowPrimitiveType, DataType};
2123
use arrow::error::{ArrowError, Result};
@@ -30,6 +32,7 @@ use std::sync::Arc;
3032
/// `VariantToArrowRowBuilder` (below) and `VariantToShreddedPrimitiveVariantRowBuilder` (in
3133
/// `shred_variant.rs`).
3234
pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
35+
Boolean(VariantToBooleanArrowRowBuilder<'a>),
3336
Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
3437
Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
3538
Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
@@ -41,6 +44,7 @@ pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
4144
Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
4245
Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
4346
Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
47+
Boolean(VariantToBooleanArrowRowBuilder<'a>),
4448
}
4549

4650
/// Builder for converting variant values into strongly typed Arrow arrays.
@@ -59,6 +63,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
5963
pub fn append_null(&mut self) -> Result<()> {
6064
use PrimitiveVariantToArrowRowBuilder::*;
6165
match self {
66+
Boolean(b) => b.append_null(),
6267
Int8(b) => b.append_null(),
6368
Int16(b) => b.append_null(),
6469
Int32(b) => b.append_null(),
@@ -70,6 +75,8 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
7075
Float16(b) => b.append_null(),
7176
Float32(b) => b.append_null(),
7277
Float64(b) => b.append_null(),
78+
TimestampMicro(b) => b.append_null(),
79+
TimestampNano(b) => b.append_null(),
7380
}
7481
}
7582

@@ -87,12 +94,14 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
8794
Float16(b) => b.append_value(value),
8895
Float32(b) => b.append_value(value),
8996
Float64(b) => b.append_value(value),
97+
Boolean(b) => b.append_value(value),
9098
}
9199
}
92100

93101
pub fn finish(self) -> Result<ArrayRef> {
94102
use PrimitiveVariantToArrowRowBuilder::*;
95103
match self {
104+
Boolean(b) => b.finish(),
96105
Int8(b) => b.finish(),
97106
Int16(b) => b.finish(),
98107
Int32(b) => b.finish(),
@@ -104,6 +113,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
104113
Float16(b) => b.finish(),
105114
Float32(b) => b.finish(),
106115
Float64(b) => b.finish(),
116+
Boolean(b) => b.finish(),
107117
}
108118
}
109119
}
@@ -146,6 +156,7 @@ pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
146156
use PrimitiveVariantToArrowRowBuilder::*;
147157

148158
let builder = match data_type {
159+
DataType::Boolean => Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)),
149160
DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
150161
cast_options,
151162
capacity,
@@ -190,6 +201,7 @@ pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
190201
cast_options,
191202
capacity,
192203
)),
204+
DataType::Boolean => Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)),
193205
_ if data_type.is_primitive() => {
194206
return Err(ArrowError::NotYetImplemented(format!(
195207
"Primitive data_type {data_type:?} not yet implemented"
@@ -297,6 +309,49 @@ fn get_type_name<T: ArrowPrimitiveType>() -> &'static str {
297309
}
298310
}
299311

312+
/// Builder for converting variant values to boolean values
313+
/// Boolean is not primitive types in Arrow, so we need a separate builder
314+
pub(crate) struct VariantToBooleanArrowRowBuilder<'a> {
315+
builder: arrow::array::BooleanBuilder,
316+
cast_options: &'a CastOptions<'a>,
317+
}
318+
319+
impl<'a> VariantToBooleanArrowRowBuilder<'a> {
320+
fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
321+
Self {
322+
builder: arrow::array::BooleanBuilder::with_capacity(capacity),
323+
cast_options,
324+
}
325+
}
326+
327+
fn append_null(&mut self) -> Result<()> {
328+
self.builder.append_null();
329+
Ok(())
330+
}
331+
332+
fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
333+
if let Some(v) = value.as_boolean() {
334+
self.builder.append_value(v);
335+
Ok(true)
336+
} else {
337+
if !self.cast_options.safe {
338+
// Unsafe casting: return error on conversion failure
339+
return Err(ArrowError::CastError(format!(
340+
"Failed to extract boolean from variant {:?} at path VariantPath([])",
341+
value
342+
)));
343+
}
344+
// Safe casting: append null on conversion failure
345+
self.builder.append_null();
346+
Ok(false)
347+
}
348+
}
349+
350+
fn finish(mut self) -> Result<ArrayRef> {
351+
Ok(Arc::new(self.builder.finish()))
352+
}
353+
}
354+
300355
/// Builder for converting variant values to primitive values
301356
pub(crate) struct VariantToPrimitiveArrowRowBuilder<'a, T: ArrowPrimitiveType> {
302357
builder: arrow::array::PrimitiveBuilder<T>,

0 commit comments

Comments
 (0)