Skip to content

Commit f24c3a8

Browse files
committed
[Variant] Add Variant type access for DataType::Binary
1 parent 109612e commit f24c3a8

File tree

2 files changed

+38
-7
lines changed

2 files changed

+38
-7
lines changed

parquet-variant-compute/src/variant_get.rs

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -309,10 +309,10 @@ mod test {
309309
use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
310310
use crate::{VariantArray, VariantArrayBuilder, json_to_variant};
311311
use arrow::array::{
312-
Array, ArrayRef, AsArray, BinaryViewArray, BooleanArray, Date32Array, Decimal32Array,
313-
Decimal64Array, Decimal128Array, Decimal256Array, Float32Array, Float64Array, Int8Array,
314-
Int16Array, Int32Array, Int64Array, LargeStringArray, NullBuilder, StringArray,
315-
StructArray, Time64MicrosecondArray,
312+
Array, ArrayRef, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
313+
Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, Float32Array,
314+
Float64Array, Int8Array, Int16Array, Int32Array, Int64Array, LargeStringArray, NullBuilder,
315+
StringArray, StructArray, Time64MicrosecondArray,
316316
};
317317
use arrow::buffer::NullBuffer;
318318
use arrow::compute::CastOptions;
@@ -1321,6 +1321,25 @@ mod test {
13211321
])
13221322
);
13231323

1324+
perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_variant_array, || {
1325+
BinaryArray::from(vec![
1326+
Some(b"Arrow-parquet-variant" as &[u8]),
1327+
Some(b"binary-value" as &[u8]),
1328+
Some(b"apache-arrow-rs-parquet-variant-binary" as &[u8]),
1329+
])
1330+
});
1331+
1332+
perfectly_shredded_to_arrow_primitive_test!(
1333+
get_variant_perfectly_shredded_binary_as_binary,
1334+
DataType::Binary,
1335+
perfectly_shredded_binary_variant_array,
1336+
BinaryArray::from(vec![
1337+
Some(b"Arrow-parquet-variant" as &[u8]),
1338+
Some(b"binary-value" as &[u8]),
1339+
Some(b"apache-arrow-rs-parquet-variant-binary" as &[u8]),
1340+
])
1341+
);
1342+
13241343
/// Return a VariantArray that represents a normal "shredded" variant
13251344
/// for the following example
13261345
///

parquet-variant-compute/src/variant_to_arrow.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
// under the License.
1717

1818
use arrow::array::{
19-
ArrayRef, BinaryViewArray, BooleanBuilder, GenericStringBuilder, NullArray, NullBufferBuilder,
20-
OffsetSizeTrait, PrimitiveBuilder,
19+
ArrayRef, BinaryViewArray, BooleanBuilder, GenericByteBuilder, GenericStringBuilder, NullArray,
20+
NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder,
2121
};
2222
use arrow::compute::{CastOptions, DecimalCast};
23-
use arrow::datatypes::{self, DataType, DecimalType};
23+
use arrow::datatypes::{self, DataType, DecimalType, GenericBinaryType};
2424
use arrow::error::{ArrowError, Result};
2525
use parquet_variant::{Variant, VariantPath};
2626

@@ -63,6 +63,7 @@ pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
6363
Date(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
6464
Utf8(VariantToUtf8ArrowRowBuilder<'a, i32>),
6565
LargeUtf8(VariantToUtf8ArrowRowBuilder<'a, i64>),
66+
Binary(VariantToBinaryRowBuilder<'a>),
6667
}
6768

6869
/// Builder for converting variant values into strongly typed Arrow arrays.
@@ -106,6 +107,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
106107
Date(b) => b.append_null(),
107108
Utf8(b) => b.append_null(),
108109
LargeUtf8(b) => b.append_null(),
110+
Binary(b) => b.append_null(),
109111
}
110112
}
111113

@@ -137,6 +139,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
137139
Date(b) => b.append_value(value),
138140
Utf8(b) => b.append_value(value),
139141
LargeUtf8(b) => b.append_value(value),
142+
Binary(b) => b.append_value(value),
140143
}
141144
}
142145

@@ -168,6 +171,7 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
168171
Date(b) => b.finish(),
169172
Utf8(b) => b.finish(),
170173
LargeUtf8(b) => b.finish(),
174+
Binary(b) => b.finish(),
171175
}
172176
}
173177
}
@@ -294,6 +298,7 @@ pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
294298
DataType::LargeUtf8 => {
295299
LargeUtf8(VariantToUtf8ArrowRowBuilder::new(cast_options, capacity))
296300
}
301+
DataType::Binary => Binary(VariantToBinaryRowBuilder::new(cast_options, capacity)),
297302
_ if data_type.is_primitive() => {
298303
return Err(ArrowError::NotYetImplemented(format!(
299304
"Primitive data_type {data_type:?} not yet implemented"
@@ -478,6 +483,13 @@ define_variant_to_primitive_builder!(
478483
type_name: format!("{}Utf8", O::PREFIX)
479484
);
480485

486+
define_variant_to_primitive_builder!(
487+
struct VariantToBinaryRowBuilder<'a>
488+
|capacity| -> GenericByteBuilder<GenericBinaryType<i32>> { GenericByteBuilder::<GenericBinaryType<i32>>::with_capacity(capacity, 1024) },
489+
|value| value.as_u8_slice(),
490+
type_name: "Binary"
491+
);
492+
481493
/// Builder for converting variant values to arrow Decimal values
482494
pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T>
483495
where

0 commit comments

Comments
 (0)