17
17
18
18
use std:: sync:: Arc ;
19
19
20
+ use crate :: type_conversion:: {
21
+ decimal_to_variant_decimal, generic_conversion_array, non_generic_conversion_array,
22
+ primitive_conversion_array,
23
+ } ;
20
24
use crate :: { VariantArray , VariantArrayBuilder } ;
21
25
use arrow:: array:: {
22
26
Array , AsArray , TimestampMicrosecondArray , TimestampMillisecondArray , TimestampNanosecondArray ,
@@ -37,60 +41,10 @@ use arrow::temporal_conversions::{
37
41
} ;
38
42
use arrow_schema:: { ArrowError , DataType , TimeUnit } ;
39
43
use chrono:: { DateTime , NaiveDate , NaiveDateTime , NaiveTime , TimeZone , Utc } ;
40
- use half:: f16;
41
44
use parquet_variant:: {
42
45
Variant , VariantBuilder , VariantDecimal16 , VariantDecimal4 , VariantDecimal8 ,
43
46
} ;
44
47
45
- /// Convert the input array of a specific primitive type to a `VariantArray`
46
- /// row by row
47
- macro_rules! primitive_conversion {
48
- ( $t: ty, $input: expr, $builder: expr) => { {
49
- let array = $input. as_primitive:: <$t>( ) ;
50
- for i in 0 ..array. len( ) {
51
- if array. is_null( i) {
52
- $builder. append_null( ) ;
53
- continue ;
54
- }
55
- $builder. append_variant( Variant :: from( array. value( i) ) ) ;
56
- }
57
- } } ;
58
- }
59
-
60
- /// Convert the input array to a `VariantArray` row by row, using `method`
61
- /// requiring a generic type to downcast the generic array to a specific
62
- /// array type and `cast_fn` to transform each element to a type compatible with Variant
63
- macro_rules! generic_conversion {
64
- ( $t: ty, $method: ident, $cast_fn: expr, $input: expr, $builder: expr) => { {
65
- let array = $input. $method:: <$t>( ) ;
66
- for i in 0 ..array. len( ) {
67
- if array. is_null( i) {
68
- $builder. append_null( ) ;
69
- continue ;
70
- }
71
- let cast_value = $cast_fn( array. value( i) ) ;
72
- $builder. append_variant( Variant :: from( cast_value) ) ;
73
- }
74
- } } ;
75
- }
76
-
77
- /// Convert the input array to a `VariantArray` row by row, using `method`
78
- /// not requiring a generic type to downcast the generic array to a specific
79
- /// array type and `cast_fn` to transform each element to a type compatible with Variant
80
- macro_rules! non_generic_conversion {
81
- ( $method: ident, $cast_fn: expr, $input: expr, $builder: expr) => { {
82
- let array = $input. $method( ) ;
83
- for i in 0 ..array. len( ) {
84
- if array. is_null( i) {
85
- $builder. append_null( ) ;
86
- continue ;
87
- }
88
- let cast_value = $cast_fn( array. value( i) ) ;
89
- $builder. append_variant( Variant :: from( cast_value) ) ;
90
- }
91
- } } ;
92
- }
93
-
94
48
fn convert_timestamp (
95
49
time_unit : & TimeUnit ,
96
50
time_zone : & Option < Arc < str > > ,
@@ -159,61 +113,6 @@ fn convert_timestamp(
159
113
}
160
114
}
161
115
162
- /// Convert a decimal value to a `VariantDecimal`
163
- macro_rules! decimal_to_variant_decimal {
164
- ( $v: ident, $scale: expr, $value_type: ty, $variant_type: ty) => {
165
- if * $scale < 0 {
166
- // For negative scale, we need to multiply the value by 10^|scale|
167
- // For example: 123 with scale -2 becomes 12300
168
- let multiplier = ( 10 as $value_type) . pow( ( -* $scale) as u32 ) ;
169
- // Check for overflow
170
- if $v > 0 && $v > <$value_type>:: MAX / multiplier {
171
- return Variant :: Null ;
172
- }
173
- if $v < 0 && $v < <$value_type>:: MIN / multiplier {
174
- return Variant :: Null ;
175
- }
176
- <$variant_type>:: try_new( $v * multiplier, 0 )
177
- . map( |v| v. into( ) )
178
- . unwrap_or( Variant :: Null )
179
- } else {
180
- <$variant_type>:: try_new( $v, * $scale as u8 )
181
- . map( |v| v. into( ) )
182
- . unwrap_or( Variant :: Null )
183
- }
184
- } ;
185
- }
186
-
187
- /// Convert arrays that don't need generic type parameters
188
- macro_rules! cast_conversion_nongeneric {
189
- ( $method: ident, $cast_fn: expr, $input: expr, $builder: expr) => { {
190
- let array = $input. $method( ) ;
191
- for i in 0 ..array. len( ) {
192
- if array. is_null( i) {
193
- $builder. append_null( ) ;
194
- continue ;
195
- }
196
- let cast_value = $cast_fn( array. value( i) ) ;
197
- $builder. append_variant( Variant :: from( cast_value) ) ;
198
- }
199
- } } ;
200
- }
201
-
202
- /// Convert string arrays using the offset size as the type parameter
203
- macro_rules! cast_conversion_string {
204
- ( $offset_type: ty, $method: ident, $cast_fn: expr, $input: expr, $builder: expr) => { {
205
- let array = $input. $method:: <$offset_type>( ) ;
206
- for i in 0 ..array. len( ) {
207
- if array. is_null( i) {
208
- $builder. append_null( ) ;
209
- continue ;
210
- }
211
- let cast_value = $cast_fn( array. value( i) ) ;
212
- $builder. append_variant( Variant :: from( cast_value) ) ;
213
- }
214
- } } ;
215
- }
216
-
217
116
/// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when you
218
117
/// need to convert a specific data type
219
118
///
@@ -250,58 +149,52 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
250
149
// todo: handle other types like Boolean, Date, Timestamp, etc.
251
150
match input_type {
252
151
DataType :: Boolean => {
253
- non_generic_conversion ! ( as_boolean, |v| v, input , builder) ;
152
+ non_generic_conversion_array ! ( input . as_boolean( ) , |v| v, builder) ;
254
153
}
255
154
DataType :: Binary => {
256
- generic_conversion ! ( BinaryType , as_bytes, |v| v, input, builder) ;
155
+ generic_conversion_array ! ( BinaryType , as_bytes, |v| v, input, builder) ;
257
156
}
258
157
DataType :: LargeBinary => {
259
- generic_conversion ! ( LargeBinaryType , as_bytes, |v| v, input, builder) ;
158
+ generic_conversion_array ! ( LargeBinaryType , as_bytes, |v| v, input, builder) ;
260
159
}
261
160
DataType :: BinaryView => {
262
- generic_conversion ! ( BinaryViewType , as_byte_view, |v| v, input, builder) ;
161
+ generic_conversion_array ! ( BinaryViewType , as_byte_view, |v| v, input, builder) ;
263
162
}
264
163
DataType :: Int8 => {
265
- primitive_conversion ! ( Int8Type , input, builder) ;
164
+ primitive_conversion_array ! ( Int8Type , input, builder) ;
266
165
}
267
166
DataType :: Int16 => {
268
- primitive_conversion ! ( Int16Type , input, builder) ;
167
+ primitive_conversion_array ! ( Int16Type , input, builder) ;
269
168
}
270
169
DataType :: Int32 => {
271
- primitive_conversion ! ( Int32Type , input, builder) ;
170
+ primitive_conversion_array ! ( Int32Type , input, builder) ;
272
171
}
273
172
DataType :: Int64 => {
274
- primitive_conversion ! ( Int64Type , input, builder) ;
173
+ primitive_conversion_array ! ( Int64Type , input, builder) ;
275
174
}
276
175
DataType :: UInt8 => {
277
- primitive_conversion ! ( UInt8Type , input, builder) ;
176
+ primitive_conversion_array ! ( UInt8Type , input, builder) ;
278
177
}
279
178
DataType :: UInt16 => {
280
- primitive_conversion ! ( UInt16Type , input, builder) ;
179
+ primitive_conversion_array ! ( UInt16Type , input, builder) ;
281
180
}
282
181
DataType :: UInt32 => {
283
- primitive_conversion ! ( UInt32Type , input, builder) ;
182
+ primitive_conversion_array ! ( UInt32Type , input, builder) ;
284
183
}
285
184
DataType :: UInt64 => {
286
- primitive_conversion ! ( UInt64Type , input, builder) ;
185
+ primitive_conversion_array ! ( UInt64Type , input, builder) ;
287
186
}
288
187
DataType :: Float16 => {
289
- generic_conversion ! (
290
- Float16Type ,
291
- as_primitive,
292
- |v: f16| -> f32 { v. into( ) } ,
293
- input,
294
- builder
295
- ) ;
188
+ generic_conversion_array ! ( Float16Type , as_primitive, f32 :: from, input, builder) ;
296
189
}
297
190
DataType :: Float32 => {
298
- primitive_conversion ! ( Float32Type , input, builder) ;
191
+ primitive_conversion_array ! ( Float32Type , input, builder) ;
299
192
}
300
193
DataType :: Float64 => {
301
- primitive_conversion ! ( Float64Type , input, builder) ;
194
+ primitive_conversion_array ! ( Float64Type , input, builder) ;
302
195
}
303
196
DataType :: Decimal32 ( _, scale) => {
304
- generic_conversion ! (
197
+ generic_conversion_array ! (
305
198
Decimal32Type ,
306
199
as_primitive,
307
200
|v| decimal_to_variant_decimal!( v, scale, i32 , VariantDecimal4 ) ,
@@ -310,7 +203,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
310
203
) ;
311
204
}
312
205
DataType :: Decimal64 ( _, scale) => {
313
- generic_conversion ! (
206
+ generic_conversion_array ! (
314
207
Decimal64Type ,
315
208
as_primitive,
316
209
|v| decimal_to_variant_decimal!( v, scale, i64 , VariantDecimal8 ) ,
@@ -319,7 +212,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
319
212
) ;
320
213
}
321
214
DataType :: Decimal128 ( _, scale) => {
322
- generic_conversion ! (
215
+ generic_conversion_array ! (
323
216
Decimal128Type ,
324
217
as_primitive,
325
218
|v| decimal_to_variant_decimal!( v, scale, i128 , VariantDecimal16 ) ,
@@ -328,7 +221,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
328
221
) ;
329
222
}
330
223
DataType :: Decimal256 ( _, scale) => {
331
- generic_conversion ! (
224
+ generic_conversion_array ! (
332
225
Decimal256Type ,
333
226
as_primitive,
334
227
|v: i256| {
@@ -346,7 +239,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
346
239
) ;
347
240
}
348
241
DataType :: FixedSizeBinary ( _) => {
349
- non_generic_conversion ! ( as_fixed_size_binary, |v| v, input , builder) ;
242
+ non_generic_conversion_array ! ( input . as_fixed_size_binary( ) , |v| v, builder) ;
350
243
}
351
244
DataType :: Null => {
352
245
for _ in 0 ..input. len ( ) {
@@ -359,7 +252,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
359
252
DataType :: Time32 ( unit) => {
360
253
match * unit {
361
254
TimeUnit :: Second => {
362
- generic_conversion ! (
255
+ generic_conversion_array ! (
363
256
Time32SecondType ,
364
257
as_primitive,
365
258
// nano second are always 0
@@ -369,7 +262,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
369
262
) ;
370
263
}
371
264
TimeUnit :: Millisecond => {
372
- generic_conversion ! (
265
+ generic_conversion_array ! (
373
266
Time32MillisecondType ,
374
267
as_primitive,
375
268
|v| NaiveTime :: from_num_seconds_from_midnight_opt(
@@ -392,7 +285,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
392
285
DataType :: Time64 ( unit) => {
393
286
match * unit {
394
287
TimeUnit :: Microsecond => {
395
- generic_conversion ! (
288
+ generic_conversion_array ! (
396
289
Time64MicrosecondType ,
397
290
as_primitive,
398
291
|v| NaiveTime :: from_num_seconds_from_midnight_opt(
@@ -405,7 +298,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
405
298
) ;
406
299
}
407
300
TimeUnit :: Nanosecond => {
408
- generic_conversion ! (
301
+ generic_conversion_array ! (
409
302
Time64NanosecondType ,
410
303
as_primitive,
411
304
|v| NaiveTime :: from_num_seconds_from_midnight_opt(
@@ -433,13 +326,13 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
433
326
) ) ;
434
327
}
435
328
DataType :: Utf8 => {
436
- cast_conversion_string ! ( i32 , as_string, |v| v, input, builder) ;
329
+ generic_conversion_array ! ( i32 , as_string, |v| v, input, builder) ;
437
330
}
438
331
DataType :: LargeUtf8 => {
439
- cast_conversion_string ! ( i64 , as_string, |v| v, input, builder) ;
332
+ generic_conversion_array ! ( i64 , as_string, |v| v, input, builder) ;
440
333
}
441
334
DataType :: Utf8View => {
442
- cast_conversion_nongeneric ! ( as_string_view, |v| v, input , builder) ;
335
+ non_generic_conversion_array ! ( input . as_string_view( ) , |v| v, builder) ;
443
336
}
444
337
DataType :: Struct ( _) => {
445
338
let struct_array = input. as_struct ( ) ;
@@ -487,7 +380,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
487
380
}
488
381
}
489
382
DataType :: Date32 => {
490
- generic_conversion ! (
383
+ generic_conversion_array ! (
491
384
Date32Type ,
492
385
as_primitive,
493
386
|v: i32 | -> NaiveDate { Date32Type :: to_naive_date( v) } ,
@@ -496,7 +389,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
496
389
) ;
497
390
}
498
391
DataType :: Date64 => {
499
- generic_conversion ! (
392
+ generic_conversion_array ! (
500
393
Date64Type ,
501
394
as_primitive,
502
395
|v: i64 | { Date64Type :: to_naive_date_opt( v) . unwrap( ) } ,
@@ -723,6 +616,7 @@ mod tests {
723
616
use arrow_schema:: {
724
617
DECIMAL128_MAX_PRECISION , DECIMAL32_MAX_PRECISION , DECIMAL64_MAX_PRECISION ,
725
618
} ;
619
+ use half:: f16;
726
620
use parquet_variant:: { Variant , VariantDecimal16 } ;
727
621
use std:: { sync:: Arc , vec} ;
728
622
0 commit comments