From b124132d33f850a52ea4c68ffb2fbb98cb905b64 Mon Sep 17 00:00:00 2001 From: klion26 Date: Mon, 11 Aug 2025 10:13:51 +0800 Subject: [PATCH 1/8] Extract type_conversion and support typed value get for DataType::Int16 --- .../src/cast_to_variant.rs | 140 +++------------ parquet-variant-compute/src/lib.rs | 1 + .../src/type_conversion.rs | 166 ++++++++++++++++++ parquet-variant-compute/src/variant_array.rs | 10 +- .../src/variant_get/mod.rs | 58 ++++-- .../src/variant_get/output/mod.rs | 7 +- .../src/variant_get/output/primitive.rs | 8 +- .../src/variant_get/output/variant.rs | 18 +- 8 files changed, 258 insertions(+), 150 deletions(-) create mode 100644 parquet-variant-compute/src/type_conversion.rs diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index 43ee8ccb3929..5e5a6868e676 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -17,6 +17,10 @@ use std::sync::Arc; +use crate::{ + cast_conversion_nongeneric, cast_conversion_string, decimal_to_variant_decimal, + generic_conversion_array, non_generic_conversion_array, primitive_conversion, +}; use crate::{VariantArray, VariantArrayBuilder}; use arrow::array::{ Array, AsArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, @@ -40,55 +44,6 @@ use parquet_variant::{ Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8, }; -/// Convert the input array of a specific primitive type to a `VariantArray` -/// row by row -macro_rules! primitive_conversion { - ($t:ty, $input:expr, $builder:expr) => {{ - let array = $input.as_primitive::<$t>(); - for i in 0..array.len() { - if array.is_null(i) { - $builder.append_null(); - continue; - } - $builder.append_variant(Variant::from(array.value(i))); - } - }}; -} - -/// Convert the input array to a `VariantArray` row by row, using `method` -/// requiring a generic type to downcast the generic array to a specific -/// array type and `cast_fn` to transform each element to a type compatible with Variant -macro_rules! generic_conversion { - ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ - let array = $input.$method::<$t>(); - for i in 0..array.len() { - if array.is_null(i) { - $builder.append_null(); - continue; - } - let cast_value = $cast_fn(array.value(i)); - $builder.append_variant(Variant::from(cast_value)); - } - }}; -} - -/// Convert the input array to a `VariantArray` row by row, using `method` -/// not requiring a generic type to downcast the generic array to a specific -/// array type and `cast_fn` to transform each element to a type compatible with Variant -macro_rules! non_generic_conversion { - ($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ - let array = $input.$method(); - for i in 0..array.len() { - if array.is_null(i) { - $builder.append_null(); - continue; - } - let cast_value = $cast_fn(array.value(i)); - $builder.append_variant(Variant::from(cast_value)); - } - }}; -} - fn convert_timestamp( time_unit: &TimeUnit, time_zone: &Option>, @@ -157,61 +112,6 @@ fn convert_timestamp( } } -/// Convert a decimal value to a `VariantDecimal` -macro_rules! decimal_to_variant_decimal { - ($v:ident, $scale:expr, $value_type:ty, $variant_type:ty) => { - if *$scale < 0 { - // For negative scale, we need to multiply the value by 10^|scale| - // For example: 123 with scale -2 becomes 12300 - let multiplier = (10 as $value_type).pow((-*$scale) as u32); - // Check for overflow - if $v > 0 && $v > <$value_type>::MAX / multiplier { - return Variant::Null; - } - if $v < 0 && $v < <$value_type>::MIN / multiplier { - return Variant::Null; - } - <$variant_type>::try_new($v * multiplier, 0) - .map(|v| v.into()) - .unwrap_or(Variant::Null) - } else { - <$variant_type>::try_new($v, *$scale as u8) - .map(|v| v.into()) - .unwrap_or(Variant::Null) - } - }; -} - -/// Convert arrays that don't need generic type parameters -macro_rules! cast_conversion_nongeneric { - ($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ - let array = $input.$method(); - for i in 0..array.len() { - if array.is_null(i) { - $builder.append_null(); - continue; - } - let cast_value = $cast_fn(array.value(i)); - $builder.append_variant(Variant::from(cast_value)); - } - }}; -} - -/// Convert string arrays using the offset size as the type parameter -macro_rules! cast_conversion_string { - ($offset_type:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ - let array = $input.$method::<$offset_type>(); - for i in 0..array.len() { - if array.is_null(i) { - $builder.append_null(); - continue; - } - let cast_value = $cast_fn(array.value(i)); - $builder.append_variant(Variant::from(cast_value)); - } - }}; -} - /// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when you /// need to convert a specific data type /// @@ -248,17 +148,17 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { // todo: handle other types like Boolean, Date, Timestamp, etc. match input_type { DataType::Boolean => { - non_generic_conversion!(as_boolean, |v| v, input, builder); + non_generic_conversion_array!(as_boolean, |v| v, input, builder); } DataType::Binary => { - generic_conversion!(BinaryType, as_bytes, |v| v, input, builder); + generic_conversion_array!(BinaryType, as_bytes, |v| v, input, builder); } DataType::LargeBinary => { - generic_conversion!(LargeBinaryType, as_bytes, |v| v, input, builder); + generic_conversion_array!(LargeBinaryType, as_bytes, |v| v, input, builder); } DataType::BinaryView => { - generic_conversion!(BinaryViewType, as_byte_view, |v| v, input, builder); + generic_conversion_array!(BinaryViewType, as_byte_view, |v| v, input, builder); } DataType::Int8 => { primitive_conversion!(Int8Type, input, builder); @@ -285,7 +185,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { primitive_conversion!(UInt64Type, input, builder); } DataType::Float16 => { - generic_conversion!( + generic_conversion_array!( Float16Type, as_primitive, |v: f16| -> f32 { v.into() }, @@ -300,7 +200,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { primitive_conversion!(Float64Type, input, builder); } DataType::Decimal32(_, scale) => { - generic_conversion!( + generic_conversion_array!( Decimal32Type, as_primitive, |v| decimal_to_variant_decimal!(v, scale, i32, VariantDecimal4), @@ -309,7 +209,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { ); } DataType::Decimal64(_, scale) => { - generic_conversion!( + generic_conversion_array!( Decimal64Type, as_primitive, |v| decimal_to_variant_decimal!(v, scale, i64, VariantDecimal8), @@ -318,7 +218,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { ); } DataType::Decimal128(_, scale) => { - generic_conversion!( + generic_conversion_array!( Decimal128Type, as_primitive, |v| decimal_to_variant_decimal!(v, scale, i128, VariantDecimal16), @@ -327,7 +227,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { ); } DataType::Decimal256(_, scale) => { - generic_conversion!( + generic_conversion_array!( Decimal256Type, as_primitive, |v: i256| { @@ -345,7 +245,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { ); } DataType::FixedSizeBinary(_) => { - non_generic_conversion!(as_fixed_size_binary, |v| v, input, builder); + non_generic_conversion_array!(as_fixed_size_binary, |v| v, input, builder); } DataType::Null => { for _ in 0..input.len() { @@ -358,7 +258,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { DataType::Time32(unit) => { match *unit { TimeUnit::Second => { - generic_conversion!( + generic_conversion_array!( Time32SecondType, as_primitive, // nano second are always 0 @@ -368,7 +268,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { ); } TimeUnit::Millisecond => { - generic_conversion!( + generic_conversion_array!( Time32MillisecondType, as_primitive, |v| NaiveTime::from_num_seconds_from_midnight_opt( @@ -391,7 +291,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { DataType::Time64(unit) => { match *unit { TimeUnit::Microsecond => { - generic_conversion!( + generic_conversion_array!( Time64MicrosecondType, as_primitive, |v| NaiveTime::from_num_seconds_from_midnight_opt( @@ -404,7 +304,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { ); } TimeUnit::Nanosecond => { - generic_conversion!( + generic_conversion_array!( Time64NanosecondType, as_primitive, |v| NaiveTime::from_num_seconds_from_midnight_opt( @@ -486,7 +386,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { } } DataType::Date32 => { - generic_conversion!( + generic_conversion_array!( Date32Type, as_primitive, |v: i32| -> NaiveDate { Date32Type::to_naive_date(v) }, @@ -495,7 +395,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { ); } DataType::Date64 => { - generic_conversion!( + generic_conversion_array!( Date64Type, as_primitive, |v: i64| { Date64Type::to_naive_date_opt(v).unwrap() }, diff --git a/parquet-variant-compute/src/lib.rs b/parquet-variant-compute/src/lib.rs index 245e344488ce..ef674d9614b5 100644 --- a/parquet-variant-compute/src/lib.rs +++ b/parquet-variant-compute/src/lib.rs @@ -38,6 +38,7 @@ pub mod cast_to_variant; mod from_json; mod to_json; +mod type_conversion; mod variant_array; mod variant_array_builder; pub mod variant_get; diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs new file mode 100644 index 000000000000..8b4321b2d354 --- /dev/null +++ b/parquet-variant-compute/src/type_conversion.rs @@ -0,0 +1,166 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Module for transforming a typed arrow `Array` to `VariantArray`. + +/// Convert the input array of a specific primitive type to a `VariantArray` +/// row by row +#[macro_export] +macro_rules! primitive_conversion { + ($t:ty, $input:expr, $builder:expr) => {{ + let array = $input.as_primitive::<$t>(); + for i in 0..array.len() { + if array.is_null(i) { + $builder.append_null(); + continue; + } + $builder.append_variant(Variant::from(array.value(i))); + } + }}; +} + +/// Convert the value at a specific index in the given array into a `Variant`. +#[macro_export] +macro_rules! primitive_conversion_single_value { + ($t:ty, $input:expr, $index:expr) => {{ + let array = $input.as_primitive::<$t>(); + if array.is_null($index) { + return Variant::Null; + } + Variant::from(array.value($index)) + }}; +} + +/// Convert the input array to a `VariantArray` row by row, using `method` +/// requiring a generic type to downcast the generic array to a specific +/// array type and `cast_fn` to transform each element to a type compatible with Variant +#[macro_export] +macro_rules! generic_conversion_array { + ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ + let array = $input.$method::<$t>(); + for i in 0..array.len() { + if array.is_null(i) { + $builder.append_null(); + continue; + } + let cast_value = $cast_fn(array.value(i)); + $builder.append_variant(Variant::from(cast_value)); + } + }}; +} + +/// Convert the value at a specific index in the given array into a `Variant`, +/// using `method` requiring a generic type to downcast the generic array +/// to a specific array type and `cast_fn` to transform the element. +#[macro_export] +macro_rules! generic_conversion_single_value { + ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{ + let array = $input.$method::<$t>(); + if array.is_null($index) { + return Variant::Null; + } + let cast_value = $cast_fn(array.value($index)); + Variant::from(cast_value) + }}; +} + +/// Convert the input array to a `VariantArray` row by row, using `method` +/// not requiring a generic type to downcast the generic array to a specific +/// array type and `cast_fn` to transform each element to a type compatible with Variant +#[macro_export] +macro_rules! non_generic_conversion_array { + ($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ + let array = $input.$method(); + for i in 0..array.len() { + if array.is_null(i) { + $builder.append_null(); + continue; + } + let cast_value = $cast_fn(array.value(i)); + $builder.append_variant(Variant::from(cast_value)); + } + }}; +} + +#[macro_export] +macro_rules! non_generic_conversion_single_value { + ($method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{ + let array = $input.$method(); + if array.is_null($index) { + return Variant::Null; + } + Variant::from(cast_value) + }}; +} + +/// Convert a decimal value to a `VariantDecimal` +#[macro_export] +macro_rules! decimal_to_variant_decimal { + ($v:ident, $scale:expr, $value_type:ty, $variant_type:ty) => { + if *$scale < 0 { + // For negative scale, we need to multiply the value by 10^|scale| + // For example: 123 with scale -2 becomes 12300 + let multiplier = (10 as $value_type).pow((-*$scale) as u32); + // Check for overflow + if $v > 0 && $v > <$value_type>::MAX / multiplier { + return Variant::Null; + } + if $v < 0 && $v < <$value_type>::MIN / multiplier { + return Variant::Null; + } + <$variant_type>::try_new($v * multiplier, 0) + .map(|v| v.into()) + .unwrap_or(Variant::Null) + } else { + <$variant_type>::try_new($v, *$scale as u8) + .map(|v| v.into()) + .unwrap_or(Variant::Null) + } + }; +} + +/// Convert arrays that don't need generic type parameters +#[macro_export] +macro_rules! cast_conversion_nongeneric { + ($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ + let array = $input.$method(); + for i in 0..array.len() { + if array.is_null(i) { + $builder.append_null(); + continue; + } + let cast_value = $cast_fn(array.value(i)); + $builder.append_variant(Variant::from(cast_value)); + } + }}; +} + +/// Convert string arrays using the offset size as the type parameter +#[macro_export] +macro_rules! cast_conversion_string { + ($offset_type:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ + let array = $input.$method::<$offset_type>(); + for i in 0..array.len() { + if array.is_null(i) { + $builder.append_null(); + continue; + } + let cast_value = $cast_fn(array.value(i)); + $builder.append_variant(Variant::from(cast_value)); + } + }}; +} diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index c54125894222..22244df97041 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -19,12 +19,14 @@ use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray}; use arrow::buffer::NullBuffer; -use arrow::datatypes::Int32Type; +use arrow::datatypes::{Int16Type, Int32Type}; use arrow_schema::{ArrowError, DataType}; use parquet_variant::Variant; use std::any::Any; use std::sync::Arc; +use crate::primitive_conversion_single_value; + /// An array of Parquet [`Variant`] values /// /// A [`VariantArray`] wraps an Arrow [`StructArray`] that stores the underlying @@ -350,8 +352,10 @@ impl ShreddingState { fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, '_> { match typed_value.data_type() { DataType::Int32 => { - let typed_value = typed_value.as_primitive::(); - Variant::from(typed_value.value(index)) + primitive_conversion_single_value!(Int32Type, typed_value, index) + } + DataType::Int16 => { + primitive_conversion_single_value!(Int16Type, typed_value, index) } // todo other types here (note this is very similar to cast_to_variant.rs) // so it would be great to figure out how to share this code diff --git a/parquet-variant-compute/src/variant_get/mod.rs b/parquet-variant-compute/src/variant_get/mod.rs index 0c9d2686c032..4460705cba0b 100644 --- a/parquet-variant-compute/src/variant_get/mod.rs +++ b/parquet-variant-compute/src/variant_get/mod.rs @@ -107,7 +107,10 @@ impl<'a> GetOptions<'a> { mod test { use std::sync::Arc; - use arrow::array::{Array, ArrayRef, BinaryViewArray, Int32Array, StringArray, StructArray}; + use arrow::array::{ + Array, ArrayRef, BinaryViewArray, Int16Array, Int32Array, PrimitiveArray, StringArray, + StructArray, + }; use arrow::buffer::NullBuffer; use arrow::compute::CastOptions; use arrow_schema::{DataType, Field, FieldRef, Fields}; @@ -258,7 +261,8 @@ mod test { /// Perfect Shredding: extract the typed value as a VariantArray #[test] fn get_variant_perfectly_shredded_int32_as_variant() { - let array = perfectly_shredded_int32_variant_array(); + let array = + perfectly_shredded_variant_array(Int32Array::from(vec![Some(1), Some(2), Some(3)])); let options = GetOptions::new(); let result = variant_get(&array, options).unwrap(); @@ -276,7 +280,8 @@ mod test { #[test] fn get_variant_perfectly_shredded_int32_as_int32() { // Extract the typed value as Int32Array - let array = perfectly_shredded_int32_variant_array(); + let array = + perfectly_shredded_variant_array(Int32Array::from(vec![Some(1), Some(2), Some(3)])); // specify we want the typed value as Int32 let field = Field::new("typed_value", DataType::Int32, true); let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); @@ -319,14 +324,38 @@ mod test { assert_eq!(&result, &expected) } + #[test] + fn get_variant_perfectly_shredded_int16_as_variant() { + let array = + perfectly_shredded_variant_array(Int16Array::from(vec![Some(1), Some(2), Some(3)])); + let options = GetOptions::new(); + let result = variant_get(&array, options).unwrap(); + + // expect the result is a VariantArray + let result: &VariantArray = result.as_any().downcast_ref().unwrap(); + assert_eq!(result.len(), 3); + + // Expect the values are the same as the original values + assert_eq!(result.value(0), Variant::Int16(1)); + assert_eq!(result.value(1), Variant::Int16(2)); + assert_eq!(result.value(2), Variant::Int16(3)); + } + + #[test] + fn get_variant_perfectly_shredded_int16_as_int16() { + // Extract the typed value as Int16Array + let array = + perfectly_shredded_variant_array(Int16Array::from(vec![Some(1), Some(2), Some(3)])); + // specify we want the typed value as Int16 + let field = Field::new("typed_value", DataType::Int16, true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&array, options).unwrap(); + let expected: ArrayRef = Arc::new(Int16Array::from(vec![Some(1), Some(2), Some(3)])); + assert_eq!(&result, &expected) + } + /// Return a VariantArray that represents a perfectly "shredded" variant - /// for the following example (3 Variant::Int32 values): - /// - /// ```text - /// 1 - /// 2 - /// 3 - /// ``` + /// for the given typed value. /// /// The schema of the corresponding `StructArray` would look like this: /// @@ -336,13 +365,16 @@ mod test { /// typed_value: Int32Array, /// } /// ``` - fn perfectly_shredded_int32_variant_array() -> ArrayRef { + fn perfectly_shredded_variant_array(typed_value: PrimitiveArray) -> ArrayRef + where + T: arrow::datatypes::ArrowPrimitiveType, + { // At the time of writing, the `VariantArrayBuilder` does not support shredding. // so we must construct the array manually. see https://github.com/apache/arrow-rs/issues/7895 let (metadata, _value) = { parquet_variant::VariantBuilder::new().finish() }; - let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3)); - let typed_value = Int32Array::from(vec![Some(1), Some(2), Some(3)]); + let metadata = + BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, typed_value.len())); let struct_array = StructArrayBuilder::new() .with_field("metadata", Arc::new(metadata)) diff --git a/parquet-variant-compute/src/variant_get/output/mod.rs b/parquet-variant-compute/src/variant_get/output/mod.rs index 52a8f5bc0288..3ca21d482f31 100644 --- a/parquet-variant-compute/src/variant_get/output/mod.rs +++ b/parquet-variant-compute/src/variant_get/output/mod.rs @@ -23,7 +23,7 @@ use crate::variant_get::output::variant::VariantOutputBuilder; use crate::variant_get::GetOptions; use crate::VariantArray; use arrow::array::{ArrayRef, BinaryViewArray}; -use arrow::datatypes::Int32Type; +use arrow::datatypes::{Int16Type, Int32Type}; use arrow::error::Result; use arrow_schema::{ArrowError, DataType}; @@ -87,6 +87,11 @@ pub(crate) fn instantiate_output_builder<'a>( as_type, cast_options, ))), + DataType::Int16 => Ok(Box::new(PrimitiveOutputBuilder::::new( + path, + as_type, + cast_options, + ))), dt => Err(ArrowError::NotYetImplemented(format!( "variant_get with as_type={dt} is not implemented yet", ))), diff --git a/parquet-variant-compute/src/variant_get/output/primitive.rs b/parquet-variant-compute/src/variant_get/output/primitive.rs index aabc9827a7b7..ff3e58c3c340 100644 --- a/parquet-variant-compute/src/variant_get/output/primitive.rs +++ b/parquet-variant-compute/src/variant_get/output/primitive.rs @@ -24,7 +24,7 @@ use arrow::array::{ NullBufferBuilder, PrimitiveArray, }; use arrow::compute::{cast_with_options, CastOptions}; -use arrow::datatypes::Int32Type; +use arrow::datatypes::{Int16Type, Int32Type}; use arrow_schema::{ArrowError, FieldRef}; use parquet_variant::{Variant, VariantPath}; use std::marker::PhantomData; @@ -176,3 +176,9 @@ impl ArrowPrimitiveVariant for Int32Type { variant.as_int32() } } + +impl ArrowPrimitiveVariant for Int16Type { + fn from_variant(variant: &Variant) -> Option { + variant.as_int16() + } +} diff --git a/parquet-variant-compute/src/variant_get/output/variant.rs b/parquet-variant-compute/src/variant_get/output/variant.rs index 7c8b4da2f5c1..be8ad02ec948 100644 --- a/parquet-variant-compute/src/variant_get/output/variant.rs +++ b/parquet-variant-compute/src/variant_get/output/variant.rs @@ -16,9 +16,9 @@ // under the License. use crate::variant_get::output::OutputBuilder; -use crate::{VariantArray, VariantArrayBuilder}; +use crate::{primitive_conversion, VariantArray, VariantArrayBuilder}; use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray}; -use arrow::datatypes::Int32Type; +use arrow::datatypes::{Int16Type, Int32Type}; use arrow_schema::{ArrowError, DataType}; use parquet_variant::{Variant, VariantPath}; use std::sync::Arc; @@ -93,16 +93,10 @@ impl OutputBuilder for VariantOutputBuilder<'_> { let mut array_builder = VariantArrayBuilder::new(variant_array.len()); match typed_value.data_type() { DataType::Int32 => { - let primitive_array = typed_value.as_primitive::(); - for i in 0..variant_array.len() { - if primitive_array.is_null(i) { - array_builder.append_null(); - continue; - } - - let int_value = primitive_array.value(i); - array_builder.append_variant(Variant::from(int_value)); - } + primitive_conversion!(Int32Type, typed_value, array_builder); + } + DataType::Int16 => { + primitive_conversion!(Int16Type, typed_value, array_builder); } dt => { // https://github.com/apache/arrow-rs/issues/8087 From f813b0826b02a9c79e83d5e2c24cc29890290b6e Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 21 Aug 2025 13:36:18 +0800 Subject: [PATCH 2/8] address comment --- .../src/cast_to_variant.rs | 30 ++++----- .../src/type_conversion.rs | 62 ++++--------------- .../src/variant_get/output/variant.rs | 6 +- 3 files changed, 30 insertions(+), 68 deletions(-) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index 5e5a6868e676..f40849796f05 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -18,8 +18,8 @@ use std::sync::Arc; use crate::{ - cast_conversion_nongeneric, cast_conversion_string, decimal_to_variant_decimal, - generic_conversion_array, non_generic_conversion_array, primitive_conversion, + decimal_to_variant_decimal, generic_conversion_array, non_generic_conversion_array, + primitive_conversion_array, }; use crate::{VariantArray, VariantArrayBuilder}; use arrow::array::{ @@ -161,28 +161,28 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { generic_conversion_array!(BinaryViewType, as_byte_view, |v| v, input, builder); } DataType::Int8 => { - primitive_conversion!(Int8Type, input, builder); + primitive_conversion_array!(Int8Type, input, builder); } DataType::Int16 => { - primitive_conversion!(Int16Type, input, builder); + primitive_conversion_array!(Int16Type, input, builder); } DataType::Int32 => { - primitive_conversion!(Int32Type, input, builder); + primitive_conversion_array!(Int32Type, input, builder); } DataType::Int64 => { - primitive_conversion!(Int64Type, input, builder); + primitive_conversion_array!(Int64Type, input, builder); } DataType::UInt8 => { - primitive_conversion!(UInt8Type, input, builder); + primitive_conversion_array!(UInt8Type, input, builder); } DataType::UInt16 => { - primitive_conversion!(UInt16Type, input, builder); + primitive_conversion_array!(UInt16Type, input, builder); } DataType::UInt32 => { - primitive_conversion!(UInt32Type, input, builder); + primitive_conversion_array!(UInt32Type, input, builder); } DataType::UInt64 => { - primitive_conversion!(UInt64Type, input, builder); + primitive_conversion_array!(UInt64Type, input, builder); } DataType::Float16 => { generic_conversion_array!( @@ -194,10 +194,10 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { ); } DataType::Float32 => { - primitive_conversion!(Float32Type, input, builder); + primitive_conversion_array!(Float32Type, input, builder); } DataType::Float64 => { - primitive_conversion!(Float64Type, input, builder); + primitive_conversion_array!(Float64Type, input, builder); } DataType::Decimal32(_, scale) => { generic_conversion_array!( @@ -332,13 +332,13 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { )); } DataType::Utf8 => { - cast_conversion_string!(i32, as_string, |v| v, input, builder); + generic_conversion_array!(i32, as_string, |v| v, input, builder); } DataType::LargeUtf8 => { - cast_conversion_string!(i64, as_string, |v| v, input, builder); + generic_conversion_array!(i64, as_string, |v| v, input, builder); } DataType::Utf8View => { - cast_conversion_nongeneric!(as_string_view, |v| v, input, builder); + non_generic_conversion_array!(as_string_view, |v| v, input, builder); } DataType::Struct(_) => { let struct_array = input.as_struct(); diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 8b4321b2d354..f3c75d53b5da 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -20,7 +20,7 @@ /// Convert the input array of a specific primitive type to a `VariantArray` /// row by row #[macro_export] -macro_rules! primitive_conversion { +macro_rules! primitive_conversion_array { ($t:ty, $input:expr, $builder:expr) => {{ let array = $input.as_primitive::<$t>(); for i in 0..array.len() { @@ -96,6 +96,7 @@ macro_rules! non_generic_conversion_array { }}; } +/// Convert the value at a specific index in the given array into a `Variant`. #[macro_export] macro_rules! non_generic_conversion_single_value { ($method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{ @@ -103,6 +104,7 @@ macro_rules! non_generic_conversion_single_value { if array.is_null($index) { return Variant::Null; } + let cast_value = $cast_fn(array.value($index)); Variant::from(cast_value) }}; } @@ -110,57 +112,17 @@ macro_rules! non_generic_conversion_single_value { /// Convert a decimal value to a `VariantDecimal` #[macro_export] macro_rules! decimal_to_variant_decimal { - ($v:ident, $scale:expr, $value_type:ty, $variant_type:ty) => { - if *$scale < 0 { + ($v:ident, $scale:expr, $value_type:ty, $variant_type:ty) => {{ + let (v, scale) = if *$scale < 0 { // For negative scale, we need to multiply the value by 10^|scale| - // For example: 123 with scale -2 becomes 12300 - let multiplier = (10 as $value_type).pow((-*$scale) as u32); - // Check for overflow - if $v > 0 && $v > <$value_type>::MAX / multiplier { - return Variant::Null; - } - if $v < 0 && $v < <$value_type>::MIN / multiplier { - return Variant::Null; - } - <$variant_type>::try_new($v * multiplier, 0) - .map(|v| v.into()) - .unwrap_or(Variant::Null) + // For example: 123 with scale -2 becomes 12300 with scale 0 + let multiplier = <$value_type>::pow(10, (-*$scale) as u32); + (<$value_type>::checked_mul($v, multiplier), 0u8) } else { - <$variant_type>::try_new($v, *$scale as u8) - .map(|v| v.into()) - .unwrap_or(Variant::Null) - } - }; -} + (Some($v), *$scale as u8) + }; -/// Convert arrays that don't need generic type parameters -#[macro_export] -macro_rules! cast_conversion_nongeneric { - ($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ - let array = $input.$method(); - for i in 0..array.len() { - if array.is_null(i) { - $builder.append_null(); - continue; - } - let cast_value = $cast_fn(array.value(i)); - $builder.append_variant(Variant::from(cast_value)); - } - }}; -} - -/// Convert string arrays using the offset size as the type parameter -#[macro_export] -macro_rules! cast_conversion_string { - ($offset_type:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ - let array = $input.$method::<$offset_type>(); - for i in 0..array.len() { - if array.is_null(i) { - $builder.append_null(); - continue; - } - let cast_value = $cast_fn(array.value(i)); - $builder.append_variant(Variant::from(cast_value)); - } + v.and_then(|v| <$variant_type>::try_new(v, scale).ok()) + .map_or(Variant::Null, Variant::from) }}; } diff --git a/parquet-variant-compute/src/variant_get/output/variant.rs b/parquet-variant-compute/src/variant_get/output/variant.rs index be8ad02ec948..8e219d246b59 100644 --- a/parquet-variant-compute/src/variant_get/output/variant.rs +++ b/parquet-variant-compute/src/variant_get/output/variant.rs @@ -16,7 +16,7 @@ // under the License. use crate::variant_get::output::OutputBuilder; -use crate::{primitive_conversion, VariantArray, VariantArrayBuilder}; +use crate::{primitive_conversion_array, VariantArray, VariantArrayBuilder}; use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray}; use arrow::datatypes::{Int16Type, Int32Type}; use arrow_schema::{ArrowError, DataType}; @@ -93,10 +93,10 @@ impl OutputBuilder for VariantOutputBuilder<'_> { let mut array_builder = VariantArrayBuilder::new(variant_array.len()); match typed_value.data_type() { DataType::Int32 => { - primitive_conversion!(Int32Type, typed_value, array_builder); + primitive_conversion_array!(Int32Type, typed_value, array_builder); } DataType::Int16 => { - primitive_conversion!(Int16Type, typed_value, array_builder); + primitive_conversion_array!(Int16Type, typed_value, array_builder); } dt => { // https://github.com/apache/arrow-rs/issues/8087 From 07c45ce4a1d7025b791ed0b715837a8edb184643 Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 21 Aug 2025 14:58:48 +0800 Subject: [PATCH 3/8] refine macros --- .../src/cast_to_variant.rs | 6 +- .../src/type_conversion.rs | 61 +++++++------------ .../src/variant_get/output/variant.rs | 5 +- 3 files changed, 30 insertions(+), 42 deletions(-) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index f40849796f05..fa62c8938f7f 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -148,7 +148,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { // todo: handle other types like Boolean, Date, Timestamp, etc. match input_type { DataType::Boolean => { - non_generic_conversion_array!(as_boolean, |v| v, input, builder); + non_generic_conversion_array!(input.as_boolean(), |v| v, builder); } DataType::Binary => { @@ -245,7 +245,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { ); } DataType::FixedSizeBinary(_) => { - non_generic_conversion_array!(as_fixed_size_binary, |v| v, input, builder); + non_generic_conversion_array!(input.as_fixed_size_binary(), |v| v, builder); } DataType::Null => { for _ in 0..input.len() { @@ -338,7 +338,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { generic_conversion_array!(i64, as_string, |v| v, input, builder); } DataType::Utf8View => { - non_generic_conversion_array!(as_string_view, |v| v, input, builder); + non_generic_conversion_array!(input.as_string_view(), |v| v, builder); } DataType::Struct(_) => { let struct_array = input.as_struct(); diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index f3c75d53b5da..7668dbd8d813 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,31 +17,34 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. -/// Convert the input array of a specific primitive type to a `VariantArray` -/// row by row +/// Convert the input array to a `VariantArray` row by row, using `method` +/// not requiring a generic type to downcast the generic array to a specific +/// array type and `cast_fn` to transform each element to a type compatible with Variant #[macro_export] -macro_rules! primitive_conversion_array { - ($t:ty, $input:expr, $builder:expr) => {{ - let array = $input.as_primitive::<$t>(); +macro_rules! non_generic_conversion_array { + ($array:expr, $cast_fn:expr, $builder:expr) => {{ + let array = $array; for i in 0..array.len() { if array.is_null(i) { $builder.append_null(); continue; } - $builder.append_variant(Variant::from(array.value(i))); + let cast_value = $cast_fn(array.value(i)); + $builder.append_variant(Variant::from(cast_value)); } }}; } /// Convert the value at a specific index in the given array into a `Variant`. #[macro_export] -macro_rules! primitive_conversion_single_value { - ($t:ty, $input:expr, $index:expr) => {{ - let array = $input.as_primitive::<$t>(); +macro_rules! non_generic_conversion_single_value { + ($method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{ + let array = $input.$method(); if array.is_null($index) { return Variant::Null; } - Variant::from(array.value($index)) + let cast_value = $cast_fn(array.value($index)); + Variant::from(cast_value) }}; } @@ -51,15 +54,7 @@ macro_rules! primitive_conversion_single_value { #[macro_export] macro_rules! generic_conversion_array { ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ - let array = $input.$method::<$t>(); - for i in 0..array.len() { - if array.is_null(i) { - $builder.append_null(); - continue; - } - let cast_value = $cast_fn(array.value(i)); - $builder.append_variant(Variant::from(cast_value)); - } + non_generic_conversion_array!($input.$method::<$t>(), $cast_fn, $builder) }}; } @@ -78,34 +73,24 @@ macro_rules! generic_conversion_single_value { }}; } -/// Convert the input array to a `VariantArray` row by row, using `method` -/// not requiring a generic type to downcast the generic array to a specific -/// array type and `cast_fn` to transform each element to a type compatible with Variant +/// Convert the input array of a specific primitive type to a `VariantArray` +/// row by row #[macro_export] -macro_rules! non_generic_conversion_array { - ($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ - let array = $input.$method(); - for i in 0..array.len() { - if array.is_null(i) { - $builder.append_null(); - continue; - } - let cast_value = $cast_fn(array.value(i)); - $builder.append_variant(Variant::from(cast_value)); - } +macro_rules! primitive_conversion_array { + ($t:ty, $input:expr, $builder:expr) => {{ + generic_conversion_array!($t, as_primitive, |v| v, $input, $builder) }}; } /// Convert the value at a specific index in the given array into a `Variant`. #[macro_export] -macro_rules! non_generic_conversion_single_value { - ($method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{ - let array = $input.$method(); +macro_rules! primitive_conversion_single_value { + ($t:ty, $input:expr, $index:expr) => {{ + let array = $input.as_primitive::<$t>(); if array.is_null($index) { return Variant::Null; } - let cast_value = $cast_fn(array.value($index)); - Variant::from(cast_value) + Variant::from(array.value($index)) }}; } diff --git a/parquet-variant-compute/src/variant_get/output/variant.rs b/parquet-variant-compute/src/variant_get/output/variant.rs index 8e219d246b59..028d0c4e32df 100644 --- a/parquet-variant-compute/src/variant_get/output/variant.rs +++ b/parquet-variant-compute/src/variant_get/output/variant.rs @@ -16,7 +16,10 @@ // under the License. use crate::variant_get::output::OutputBuilder; -use crate::{primitive_conversion_array, VariantArray, VariantArrayBuilder}; +use crate::{ + generic_conversion_array, non_generic_conversion_array, primitive_conversion_array, + VariantArray, VariantArrayBuilder, +}; use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray}; use arrow::datatypes::{Int16Type, Int32Type}; use arrow_schema::{ArrowError, DataType}; From 74e0f38208e5603d9a57f0733242ea3a8164fe48 Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 21 Aug 2025 21:12:19 +0800 Subject: [PATCH 4/8] refine macro --- parquet-variant-compute/src/type_conversion.rs | 4 ++-- parquet-variant-compute/src/variant_get/output/variant.rs | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 7668dbd8d813..6d757cb1530c 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -54,7 +54,7 @@ macro_rules! non_generic_conversion_single_value { #[macro_export] macro_rules! generic_conversion_array { ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ - non_generic_conversion_array!($input.$method::<$t>(), $cast_fn, $builder) + $crate::non_generic_conversion_array!($input.$method::<$t>(), $cast_fn, $builder) }}; } @@ -78,7 +78,7 @@ macro_rules! generic_conversion_single_value { #[macro_export] macro_rules! primitive_conversion_array { ($t:ty, $input:expr, $builder:expr) => {{ - generic_conversion_array!($t, as_primitive, |v| v, $input, $builder) + $crate::generic_conversion_array!($t, as_primitive, |v| v, $input, $builder) }}; } diff --git a/parquet-variant-compute/src/variant_get/output/variant.rs b/parquet-variant-compute/src/variant_get/output/variant.rs index 028d0c4e32df..8e219d246b59 100644 --- a/parquet-variant-compute/src/variant_get/output/variant.rs +++ b/parquet-variant-compute/src/variant_get/output/variant.rs @@ -16,10 +16,7 @@ // under the License. use crate::variant_get::output::OutputBuilder; -use crate::{ - generic_conversion_array, non_generic_conversion_array, primitive_conversion_array, - VariantArray, VariantArrayBuilder, -}; +use crate::{primitive_conversion_array, VariantArray, VariantArrayBuilder}; use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray}; use arrow::datatypes::{Int16Type, Int32Type}; use arrow_schema::{ArrowError, DataType}; From e1752b78d88839d6201cec2d4cce69810f7c7781 Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 21 Aug 2025 21:31:05 +0800 Subject: [PATCH 5/8] address comments --- .../src/cast_to_variant.rs | 8 +------- .../src/type_conversion.rs | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index fa62c8938f7f..0c9697ec1d36 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -185,13 +185,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { primitive_conversion_array!(UInt64Type, input, builder); } DataType::Float16 => { - generic_conversion_array!( - Float16Type, - as_primitive, - |v: f16| -> f32 { v.into() }, - input, - builder - ); + generic_conversion_array!(Float16Type, as_primitive, f32::from, input, builder); } DataType::Float32 => { primitive_conversion_array!(Float32Type, input, builder); diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 6d757cb1530c..230fad69ecc5 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -41,10 +41,11 @@ macro_rules! non_generic_conversion_single_value { ($method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{ let array = $input.$method(); if array.is_null($index) { - return Variant::Null; + Variant::Null + } else { + let cast_value = $cast_fn(array.value($index)); + Variant::from(cast_value) } - let cast_value = $cast_fn(array.value($index)); - Variant::from(cast_value) }}; } @@ -66,10 +67,11 @@ macro_rules! generic_conversion_single_value { ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{ let array = $input.$method::<$t>(); if array.is_null($index) { - return Variant::Null; + Variant::Null + } else { + let cast_value = $cast_fn(array.value($index)); + Variant::from(cast_value) } - let cast_value = $cast_fn(array.value($index)); - Variant::from(cast_value) }}; } @@ -88,9 +90,10 @@ macro_rules! primitive_conversion_single_value { ($t:ty, $input:expr, $index:expr) => {{ let array = $input.as_primitive::<$t>(); if array.is_null($index) { - return Variant::Null; + Variant::Null + } else { + Variant::from(array.value($index)) } - Variant::from(array.value($index)) }}; } From efa327a8d6caebd3e3960c038552a033640188e6 Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 21 Aug 2025 21:35:14 +0800 Subject: [PATCH 6/8] fix style --- parquet-variant-compute/src/cast_to_variant.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index 0c9697ec1d36..afbf421e310a 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -39,7 +39,6 @@ use arrow::temporal_conversions::{ }; use arrow_schema::{ArrowError, DataType, TimeUnit}; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc}; -use half::f16; use parquet_variant::{ Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8, }; From 6d1c9292f4d801d4b1b9bd2ca8009f5cee95df93 Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 21 Aug 2025 21:40:54 +0800 Subject: [PATCH 7/8] fix clippy check --- parquet-variant-compute/src/cast_to_variant.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index afbf421e310a..b3bd1deb7581 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -39,6 +39,8 @@ use arrow::temporal_conversions::{ }; use arrow_schema::{ArrowError, DataType, TimeUnit}; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc}; +#[allow(unused_imports)] +use half::f16; use parquet_variant::{ Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8, }; From 59239b39758250ee0f44249bbc4191ba94b37e42 Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 21 Aug 2025 23:34:06 +0800 Subject: [PATCH 8/8] address comments --- .../src/cast_to_variant.rs | 5 +- .../src/type_conversion.rs | 57 +++++++++++-------- parquet-variant-compute/src/variant_array.rs | 2 +- .../src/variant_get/output/variant.rs | 2 +- 4 files changed, 37 insertions(+), 29 deletions(-) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index b3bd1deb7581..dd745c9dc957 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -17,7 +17,7 @@ use std::sync::Arc; -use crate::{ +use crate::type_conversion::{ decimal_to_variant_decimal, generic_conversion_array, non_generic_conversion_array, primitive_conversion_array, }; @@ -39,8 +39,6 @@ use arrow::temporal_conversions::{ }; use arrow_schema::{ArrowError, DataType, TimeUnit}; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc}; -#[allow(unused_imports)] -use half::f16; use parquet_variant::{ Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8, }; @@ -495,6 +493,7 @@ mod tests { use arrow_schema::{ DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, }; + use half::f16; use parquet_variant::{Variant, VariantDecimal16}; use std::{sync::Arc, vec}; diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 230fad69ecc5..647d2c705ff0 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -20,7 +20,6 @@ /// Convert the input array to a `VariantArray` row by row, using `method` /// not requiring a generic type to downcast the generic array to a specific /// array type and `cast_fn` to transform each element to a type compatible with Variant -#[macro_export] macro_rules! non_generic_conversion_array { ($array:expr, $cast_fn:expr, $builder:expr) => {{ let array = $array; @@ -34,12 +33,12 @@ macro_rules! non_generic_conversion_array { } }}; } +pub(crate) use non_generic_conversion_array; /// Convert the value at a specific index in the given array into a `Variant`. -#[macro_export] macro_rules! non_generic_conversion_single_value { - ($method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{ - let array = $input.$method(); + ($array:expr, $cast_fn:expr, $index:expr) => {{ + let array = $array; if array.is_null($index) { Variant::Null } else { @@ -48,57 +47,66 @@ macro_rules! non_generic_conversion_single_value { } }}; } +pub(crate) use non_generic_conversion_single_value; /// Convert the input array to a `VariantArray` row by row, using `method` /// requiring a generic type to downcast the generic array to a specific /// array type and `cast_fn` to transform each element to a type compatible with Variant -#[macro_export] macro_rules! generic_conversion_array { ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ - $crate::non_generic_conversion_array!($input.$method::<$t>(), $cast_fn, $builder) + $crate::type_conversion::non_generic_conversion_array!( + $input.$method::<$t>(), + $cast_fn, + $builder + ) }}; } +pub(crate) use generic_conversion_array; /// Convert the value at a specific index in the given array into a `Variant`, /// using `method` requiring a generic type to downcast the generic array /// to a specific array type and `cast_fn` to transform the element. -#[macro_export] macro_rules! generic_conversion_single_value { ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{ - let array = $input.$method::<$t>(); - if array.is_null($index) { - Variant::Null - } else { - let cast_value = $cast_fn(array.value($index)); - Variant::from(cast_value) - } + $crate::type_conversion::non_generic_conversion_single_value!( + $input.$method::<$t>(), + $cast_fn, + $index + ) }}; } +pub(crate) use generic_conversion_single_value; /// Convert the input array of a specific primitive type to a `VariantArray` /// row by row -#[macro_export] macro_rules! primitive_conversion_array { ($t:ty, $input:expr, $builder:expr) => {{ - $crate::generic_conversion_array!($t, as_primitive, |v| v, $input, $builder) + $crate::type_conversion::generic_conversion_array!( + $t, + as_primitive, + |v| v, + $input, + $builder + ) }}; } +pub(crate) use primitive_conversion_array; /// Convert the value at a specific index in the given array into a `Variant`. -#[macro_export] macro_rules! primitive_conversion_single_value { ($t:ty, $input:expr, $index:expr) => {{ - let array = $input.as_primitive::<$t>(); - if array.is_null($index) { - Variant::Null - } else { - Variant::from(array.value($index)) - } + $crate::type_conversion::generic_conversion_single_value!( + $t, + as_primitive, + |v| v, + $input, + $index + ) }}; } +pub(crate) use primitive_conversion_single_value; /// Convert a decimal value to a `VariantDecimal` -#[macro_export] macro_rules! decimal_to_variant_decimal { ($v:ident, $scale:expr, $value_type:ty, $variant_type:ty) => {{ let (v, scale) = if *$scale < 0 { @@ -114,3 +122,4 @@ macro_rules! decimal_to_variant_decimal { .map_or(Variant::Null, Variant::from) }}; } +pub(crate) use decimal_to_variant_decimal; diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index 22244df97041..10fb5f67eec6 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -25,7 +25,7 @@ use parquet_variant::Variant; use std::any::Any; use std::sync::Arc; -use crate::primitive_conversion_single_value; +use crate::type_conversion::primitive_conversion_single_value; /// An array of Parquet [`Variant`] values /// diff --git a/parquet-variant-compute/src/variant_get/output/variant.rs b/parquet-variant-compute/src/variant_get/output/variant.rs index 8e219d246b59..203fab233b02 100644 --- a/parquet-variant-compute/src/variant_get/output/variant.rs +++ b/parquet-variant-compute/src/variant_get/output/variant.rs @@ -16,7 +16,7 @@ // under the License. use crate::variant_get::output::OutputBuilder; -use crate::{primitive_conversion_array, VariantArray, VariantArrayBuilder}; +use crate::{type_conversion::primitive_conversion_array, VariantArray, VariantArrayBuilder}; use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray}; use arrow::datatypes::{Int16Type, Int32Type}; use arrow_schema::{ArrowError, DataType};