Skip to content

Commit b124132

Browse files
committed
Extract type_conversion and support typed value get for DataType::Int16
1 parent ebb6ede commit b124132

File tree

8 files changed

+258
-150
lines changed

8 files changed

+258
-150
lines changed

parquet-variant-compute/src/cast_to_variant.rs

Lines changed: 20 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717

1818
use std::sync::Arc;
1919

20+
use crate::{
21+
cast_conversion_nongeneric, cast_conversion_string, decimal_to_variant_decimal,
22+
generic_conversion_array, non_generic_conversion_array, primitive_conversion,
23+
};
2024
use crate::{VariantArray, VariantArrayBuilder};
2125
use arrow::array::{
2226
Array, AsArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
@@ -40,55 +44,6 @@ use parquet_variant::{
4044
Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8,
4145
};
4246

43-
/// Convert the input array of a specific primitive type to a `VariantArray`
44-
/// row by row
45-
macro_rules! primitive_conversion {
46-
($t:ty, $input:expr, $builder:expr) => {{
47-
let array = $input.as_primitive::<$t>();
48-
for i in 0..array.len() {
49-
if array.is_null(i) {
50-
$builder.append_null();
51-
continue;
52-
}
53-
$builder.append_variant(Variant::from(array.value(i)));
54-
}
55-
}};
56-
}
57-
58-
/// Convert the input array to a `VariantArray` row by row, using `method`
59-
/// requiring a generic type to downcast the generic array to a specific
60-
/// array type and `cast_fn` to transform each element to a type compatible with Variant
61-
macro_rules! generic_conversion {
62-
($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
63-
let array = $input.$method::<$t>();
64-
for i in 0..array.len() {
65-
if array.is_null(i) {
66-
$builder.append_null();
67-
continue;
68-
}
69-
let cast_value = $cast_fn(array.value(i));
70-
$builder.append_variant(Variant::from(cast_value));
71-
}
72-
}};
73-
}
74-
75-
/// Convert the input array to a `VariantArray` row by row, using `method`
76-
/// not requiring a generic type to downcast the generic array to a specific
77-
/// array type and `cast_fn` to transform each element to a type compatible with Variant
78-
macro_rules! non_generic_conversion {
79-
($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
80-
let array = $input.$method();
81-
for i in 0..array.len() {
82-
if array.is_null(i) {
83-
$builder.append_null();
84-
continue;
85-
}
86-
let cast_value = $cast_fn(array.value(i));
87-
$builder.append_variant(Variant::from(cast_value));
88-
}
89-
}};
90-
}
91-
9247
fn convert_timestamp(
9348
time_unit: &TimeUnit,
9449
time_zone: &Option<Arc<str>>,
@@ -157,61 +112,6 @@ fn convert_timestamp(
157112
}
158113
}
159114

160-
/// Convert a decimal value to a `VariantDecimal`
161-
macro_rules! decimal_to_variant_decimal {
162-
($v:ident, $scale:expr, $value_type:ty, $variant_type:ty) => {
163-
if *$scale < 0 {
164-
// For negative scale, we need to multiply the value by 10^|scale|
165-
// For example: 123 with scale -2 becomes 12300
166-
let multiplier = (10 as $value_type).pow((-*$scale) as u32);
167-
// Check for overflow
168-
if $v > 0 && $v > <$value_type>::MAX / multiplier {
169-
return Variant::Null;
170-
}
171-
if $v < 0 && $v < <$value_type>::MIN / multiplier {
172-
return Variant::Null;
173-
}
174-
<$variant_type>::try_new($v * multiplier, 0)
175-
.map(|v| v.into())
176-
.unwrap_or(Variant::Null)
177-
} else {
178-
<$variant_type>::try_new($v, *$scale as u8)
179-
.map(|v| v.into())
180-
.unwrap_or(Variant::Null)
181-
}
182-
};
183-
}
184-
185-
/// Convert arrays that don't need generic type parameters
186-
macro_rules! cast_conversion_nongeneric {
187-
($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
188-
let array = $input.$method();
189-
for i in 0..array.len() {
190-
if array.is_null(i) {
191-
$builder.append_null();
192-
continue;
193-
}
194-
let cast_value = $cast_fn(array.value(i));
195-
$builder.append_variant(Variant::from(cast_value));
196-
}
197-
}};
198-
}
199-
200-
/// Convert string arrays using the offset size as the type parameter
201-
macro_rules! cast_conversion_string {
202-
($offset_type:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
203-
let array = $input.$method::<$offset_type>();
204-
for i in 0..array.len() {
205-
if array.is_null(i) {
206-
$builder.append_null();
207-
continue;
208-
}
209-
let cast_value = $cast_fn(array.value(i));
210-
$builder.append_variant(Variant::from(cast_value));
211-
}
212-
}};
213-
}
214-
215115
/// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when you
216116
/// need to convert a specific data type
217117
///
@@ -248,17 +148,17 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
248148
// todo: handle other types like Boolean, Date, Timestamp, etc.
249149
match input_type {
250150
DataType::Boolean => {
251-
non_generic_conversion!(as_boolean, |v| v, input, builder);
151+
non_generic_conversion_array!(as_boolean, |v| v, input, builder);
252152
}
253153

254154
DataType::Binary => {
255-
generic_conversion!(BinaryType, as_bytes, |v| v, input, builder);
155+
generic_conversion_array!(BinaryType, as_bytes, |v| v, input, builder);
256156
}
257157
DataType::LargeBinary => {
258-
generic_conversion!(LargeBinaryType, as_bytes, |v| v, input, builder);
158+
generic_conversion_array!(LargeBinaryType, as_bytes, |v| v, input, builder);
259159
}
260160
DataType::BinaryView => {
261-
generic_conversion!(BinaryViewType, as_byte_view, |v| v, input, builder);
161+
generic_conversion_array!(BinaryViewType, as_byte_view, |v| v, input, builder);
262162
}
263163
DataType::Int8 => {
264164
primitive_conversion!(Int8Type, input, builder);
@@ -285,7 +185,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
285185
primitive_conversion!(UInt64Type, input, builder);
286186
}
287187
DataType::Float16 => {
288-
generic_conversion!(
188+
generic_conversion_array!(
289189
Float16Type,
290190
as_primitive,
291191
|v: f16| -> f32 { v.into() },
@@ -300,7 +200,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
300200
primitive_conversion!(Float64Type, input, builder);
301201
}
302202
DataType::Decimal32(_, scale) => {
303-
generic_conversion!(
203+
generic_conversion_array!(
304204
Decimal32Type,
305205
as_primitive,
306206
|v| decimal_to_variant_decimal!(v, scale, i32, VariantDecimal4),
@@ -309,7 +209,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
309209
);
310210
}
311211
DataType::Decimal64(_, scale) => {
312-
generic_conversion!(
212+
generic_conversion_array!(
313213
Decimal64Type,
314214
as_primitive,
315215
|v| decimal_to_variant_decimal!(v, scale, i64, VariantDecimal8),
@@ -318,7 +218,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
318218
);
319219
}
320220
DataType::Decimal128(_, scale) => {
321-
generic_conversion!(
221+
generic_conversion_array!(
322222
Decimal128Type,
323223
as_primitive,
324224
|v| decimal_to_variant_decimal!(v, scale, i128, VariantDecimal16),
@@ -327,7 +227,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
327227
);
328228
}
329229
DataType::Decimal256(_, scale) => {
330-
generic_conversion!(
230+
generic_conversion_array!(
331231
Decimal256Type,
332232
as_primitive,
333233
|v: i256| {
@@ -345,7 +245,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
345245
);
346246
}
347247
DataType::FixedSizeBinary(_) => {
348-
non_generic_conversion!(as_fixed_size_binary, |v| v, input, builder);
248+
non_generic_conversion_array!(as_fixed_size_binary, |v| v, input, builder);
349249
}
350250
DataType::Null => {
351251
for _ in 0..input.len() {
@@ -358,7 +258,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
358258
DataType::Time32(unit) => {
359259
match *unit {
360260
TimeUnit::Second => {
361-
generic_conversion!(
261+
generic_conversion_array!(
362262
Time32SecondType,
363263
as_primitive,
364264
// nano second are always 0
@@ -368,7 +268,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
368268
);
369269
}
370270
TimeUnit::Millisecond => {
371-
generic_conversion!(
271+
generic_conversion_array!(
372272
Time32MillisecondType,
373273
as_primitive,
374274
|v| NaiveTime::from_num_seconds_from_midnight_opt(
@@ -391,7 +291,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
391291
DataType::Time64(unit) => {
392292
match *unit {
393293
TimeUnit::Microsecond => {
394-
generic_conversion!(
294+
generic_conversion_array!(
395295
Time64MicrosecondType,
396296
as_primitive,
397297
|v| NaiveTime::from_num_seconds_from_midnight_opt(
@@ -404,7 +304,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
404304
);
405305
}
406306
TimeUnit::Nanosecond => {
407-
generic_conversion!(
307+
generic_conversion_array!(
408308
Time64NanosecondType,
409309
as_primitive,
410310
|v| NaiveTime::from_num_seconds_from_midnight_opt(
@@ -486,7 +386,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
486386
}
487387
}
488388
DataType::Date32 => {
489-
generic_conversion!(
389+
generic_conversion_array!(
490390
Date32Type,
491391
as_primitive,
492392
|v: i32| -> NaiveDate { Date32Type::to_naive_date(v) },
@@ -495,7 +395,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
495395
);
496396
}
497397
DataType::Date64 => {
498-
generic_conversion!(
398+
generic_conversion_array!(
499399
Date64Type,
500400
as_primitive,
501401
|v: i64| { Date64Type::to_naive_date_opt(v).unwrap() },

parquet-variant-compute/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
pub mod cast_to_variant;
3939
mod from_json;
4040
mod to_json;
41+
mod type_conversion;
4142
mod variant_array;
4243
mod variant_array_builder;
4344
pub mod variant_get;

0 commit comments

Comments
 (0)