Skip to content

Commit 19e0b2b

Browse files
committed
Extract type_conversion and support typed value get for DataType::Int16
1 parent 6c1b96f commit 19e0b2b

File tree

8 files changed

+256
-148
lines changed

8 files changed

+256
-148
lines changed

parquet-variant-compute/src/cast_to_variant.rs

Lines changed: 18 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717

1818
use std::sync::Arc;
1919

20+
use crate::{
21+
cast_conversion_nongeneric, cast_conversion_string, decimal_to_variant_decimal,
22+
generic_conversion_array, non_generic_conversion_array, primitive_conversion,
23+
};
2024
use crate::{VariantArray, VariantArrayBuilder};
2125
use arrow::array::{
2226
Array, AsArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
@@ -39,55 +43,6 @@ use parquet_variant::{
3943
Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8,
4044
};
4145

42-
/// Convert the input array of a specific primitive type to a `VariantArray`
43-
/// row by row
44-
macro_rules! primitive_conversion {
45-
($t:ty, $input:expr, $builder:expr) => {{
46-
let array = $input.as_primitive::<$t>();
47-
for i in 0..array.len() {
48-
if array.is_null(i) {
49-
$builder.append_null();
50-
continue;
51-
}
52-
$builder.append_variant(Variant::from(array.value(i)));
53-
}
54-
}};
55-
}
56-
57-
/// Convert the input array to a `VariantArray` row by row, using `method`
58-
/// requiring a generic type to downcast the generic array to a specific
59-
/// array type and `cast_fn` to transform each element to a type compatible with Variant
60-
macro_rules! generic_conversion {
61-
($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
62-
let array = $input.$method::<$t>();
63-
for i in 0..array.len() {
64-
if array.is_null(i) {
65-
$builder.append_null();
66-
continue;
67-
}
68-
let cast_value = $cast_fn(array.value(i));
69-
$builder.append_variant(Variant::from(cast_value));
70-
}
71-
}};
72-
}
73-
74-
/// Convert the input array to a `VariantArray` row by row, using `method`
75-
/// not requiring a generic type to downcast the generic array to a specific
76-
/// array type and `cast_fn` to transform each element to a type compatible with Variant
77-
macro_rules! non_generic_conversion {
78-
($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
79-
let array = $input.$method();
80-
for i in 0..array.len() {
81-
if array.is_null(i) {
82-
$builder.append_null();
83-
continue;
84-
}
85-
let cast_value = $cast_fn(array.value(i));
86-
$builder.append_variant(Variant::from(cast_value));
87-
}
88-
}};
89-
}
90-
9146
fn convert_timestamp(
9247
time_unit: &TimeUnit,
9348
time_zone: &Option<Arc<str>>,
@@ -156,61 +111,6 @@ fn convert_timestamp(
156111
}
157112
}
158113

159-
/// Convert a decimal value to a `VariantDecimal`
160-
macro_rules! decimal_to_variant_decimal {
161-
($v:ident, $scale:expr, $value_type:ty, $variant_type:ty) => {
162-
if *$scale < 0 {
163-
// For negative scale, we need to multiply the value by 10^|scale|
164-
// For example: 123 with scale -2 becomes 12300
165-
let multiplier = (10 as $value_type).pow((-*$scale) as u32);
166-
// Check for overflow
167-
if $v > 0 && $v > <$value_type>::MAX / multiplier {
168-
return Variant::Null;
169-
}
170-
if $v < 0 && $v < <$value_type>::MIN / multiplier {
171-
return Variant::Null;
172-
}
173-
<$variant_type>::try_new($v * multiplier, 0)
174-
.map(|v| v.into())
175-
.unwrap_or(Variant::Null)
176-
} else {
177-
<$variant_type>::try_new($v, *$scale as u8)
178-
.map(|v| v.into())
179-
.unwrap_or(Variant::Null)
180-
}
181-
};
182-
}
183-
184-
/// Convert arrays that don't need generic type parameters
185-
macro_rules! cast_conversion_nongeneric {
186-
($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
187-
let array = $input.$method();
188-
for i in 0..array.len() {
189-
if array.is_null(i) {
190-
$builder.append_null();
191-
continue;
192-
}
193-
let cast_value = $cast_fn(array.value(i));
194-
$builder.append_variant(Variant::from(cast_value));
195-
}
196-
}};
197-
}
198-
199-
/// Convert string arrays using the offset size as the type parameter
200-
macro_rules! cast_conversion_string {
201-
($offset_type:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
202-
let array = $input.$method::<$offset_type>();
203-
for i in 0..array.len() {
204-
if array.is_null(i) {
205-
$builder.append_null();
206-
continue;
207-
}
208-
let cast_value = $cast_fn(array.value(i));
209-
$builder.append_variant(Variant::from(cast_value));
210-
}
211-
}};
212-
}
213-
214114
/// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when you
215115
/// need to convert a specific data type
216116
///
@@ -247,17 +147,17 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
247147
// todo: handle other types like Boolean, Date, Timestamp, etc.
248148
match input_type {
249149
DataType::Boolean => {
250-
non_generic_conversion!(as_boolean, |v| v, input, builder);
150+
non_generic_conversion_array!(as_boolean, |v| v, input, builder);
251151
}
252152

253153
DataType::Binary => {
254-
generic_conversion!(BinaryType, as_bytes, |v| v, input, builder);
154+
generic_conversion_array!(BinaryType, as_bytes, |v| v, input, builder);
255155
}
256156
DataType::LargeBinary => {
257-
generic_conversion!(LargeBinaryType, as_bytes, |v| v, input, builder);
157+
generic_conversion_array!(LargeBinaryType, as_bytes, |v| v, input, builder);
258158
}
259159
DataType::BinaryView => {
260-
generic_conversion!(BinaryViewType, as_byte_view, |v| v, input, builder);
160+
generic_conversion_array!(BinaryViewType, as_byte_view, |v| v, input, builder);
261161
}
262162
DataType::Int8 => {
263163
primitive_conversion!(Int8Type, input, builder);
@@ -284,7 +184,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
284184
primitive_conversion!(UInt64Type, input, builder);
285185
}
286186
DataType::Float16 => {
287-
generic_conversion!(
187+
generic_conversion_array!(
288188
Float16Type,
289189
as_primitive,
290190
|v: f16| -> f32 { v.into() },
@@ -299,7 +199,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
299199
primitive_conversion!(Float64Type, input, builder);
300200
}
301201
DataType::Decimal32(_, scale) => {
302-
generic_conversion!(
202+
generic_conversion_array!(
303203
Decimal32Type,
304204
as_primitive,
305205
|v| decimal_to_variant_decimal!(v, scale, i32, VariantDecimal4),
@@ -308,7 +208,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
308208
);
309209
}
310210
DataType::Decimal64(_, scale) => {
311-
generic_conversion!(
211+
generic_conversion_array!(
312212
Decimal64Type,
313213
as_primitive,
314214
|v| decimal_to_variant_decimal!(v, scale, i64, VariantDecimal8),
@@ -317,7 +217,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
317217
);
318218
}
319219
DataType::Decimal128(_, scale) => {
320-
generic_conversion!(
220+
generic_conversion_array!(
321221
Decimal128Type,
322222
as_primitive,
323223
|v| decimal_to_variant_decimal!(v, scale, i128, VariantDecimal16),
@@ -326,7 +226,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
326226
);
327227
}
328228
DataType::Decimal256(_, scale) => {
329-
generic_conversion!(
229+
generic_conversion_array!(
330230
Decimal256Type,
331231
as_primitive,
332232
|v: i256| {
@@ -344,7 +244,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
344244
);
345245
}
346246
DataType::FixedSizeBinary(_) => {
347-
non_generic_conversion!(as_fixed_size_binary, |v| v, input, builder);
247+
non_generic_conversion_array!(as_fixed_size_binary, |v| v, input, builder);
348248
}
349249
DataType::Null => {
350250
for _ in 0..input.len() {
@@ -357,7 +257,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
357257
DataType::Time32(unit) => {
358258
match *unit {
359259
TimeUnit::Second => {
360-
generic_conversion!(
260+
generic_conversion_array!(
361261
Time32SecondType,
362262
as_primitive,
363263
// nano second are always 0
@@ -367,7 +267,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
367267
);
368268
}
369269
TimeUnit::Millisecond => {
370-
generic_conversion!(
270+
generic_conversion_array!(
371271
Time32MillisecondType,
372272
as_primitive,
373273
|v| NaiveTime::from_num_seconds_from_midnight_opt(
@@ -390,7 +290,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
390290
DataType::Time64(unit) => {
391291
match *unit {
392292
TimeUnit::Microsecond => {
393-
generic_conversion!(
293+
generic_conversion_array!(
394294
Time64MicrosecondType,
395295
as_primitive,
396296
|v| NaiveTime::from_num_seconds_from_midnight_opt(
@@ -403,7 +303,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
403303
);
404304
}
405305
TimeUnit::Nanosecond => {
406-
generic_conversion!(
306+
generic_conversion_array!(
407307
Time64NanosecondType,
408308
as_primitive,
409309
|v| NaiveTime::from_num_seconds_from_midnight_opt(

parquet-variant-compute/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
pub mod cast_to_variant;
3939
mod from_json;
4040
mod to_json;
41+
mod type_conversion;
4142
mod variant_array;
4243
mod variant_array_builder;
4344
pub mod variant_get;

0 commit comments

Comments
 (0)