Skip to content

Commit 7a5f6d3

Browse files
Implement DataType::{Binary, LargeBinary, BinaryView} => Variant::Binary (#8074)
# Which issue does this PR close? - Closes #8050 # Rationale for this change Adds Binary, LargeBinary, and BinaryView conversions to the cast_to_variant kernel # What changes are included in this PR? - a macro to simplify array type conversions - conversion of DataType:::{Binary, LargeBinary, BinaryView}=> Variant::Binary # Are these changes tested? Yes, additional unit tests have been added. # Are there any user-facing changes? Yes, adds new type conversions to kernel --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 25bbb3d commit 7a5f6d3

File tree

1 file changed

+80
-9
lines changed

1 file changed

+80
-9
lines changed

parquet-variant-compute/src/cast_to_variant.rs

Lines changed: 80 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
use crate::{VariantArray, VariantArrayBuilder};
1919
use arrow::array::{Array, AsArray};
2020
use arrow::datatypes::{
21-
Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
22-
UInt32Type, UInt64Type, UInt8Type,
21+
BinaryType, BinaryViewType, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type,
22+
Int64Type, Int8Type, LargeBinaryType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
2323
};
2424
use arrow_schema::{ArrowError, DataType};
2525
use half::f16;
@@ -40,11 +40,12 @@ macro_rules! primitive_conversion {
4040
}};
4141
}
4242

43-
/// Convert the input array to a `VariantArray` row by row,
44-
/// transforming each element with `cast_fn`
43+
/// Convert the input array to a `VariantArray` row by row, using `method`
44+
/// to downcast the generic array to a specific array type and `cast_fn`
45+
/// to transform each element to a type compatible with Variant
4546
macro_rules! cast_conversion {
46-
($t:ty, $cast_fn:expr, $input:expr, $builder:expr) => {{
47-
let array = $input.as_primitive::<$t>();
47+
($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
48+
let array = $input.$method::<$t>();
4849
for i in 0..array.len() {
4950
if array.is_null(i) {
5051
$builder.append_null();
@@ -85,6 +86,15 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
8586
let input_type = input.data_type();
8687
// todo: handle other types like Boolean, Strings, Date, Timestamp, etc.
8788
match input_type {
89+
DataType::Binary => {
90+
cast_conversion!(BinaryType, as_bytes, |v| v, input, builder);
91+
}
92+
DataType::LargeBinary => {
93+
cast_conversion!(LargeBinaryType, as_bytes, |v| v, input, builder);
94+
}
95+
DataType::BinaryView => {
96+
cast_conversion!(BinaryViewType, as_byte_view, |v| v, input, builder);
97+
}
8898
DataType::Int8 => {
8999
primitive_conversion!(Int8Type, input, builder);
90100
}
@@ -110,7 +120,13 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
110120
primitive_conversion!(UInt64Type, input, builder);
111121
}
112122
DataType::Float16 => {
113-
cast_conversion!(Float16Type, |v: f16| -> f32 { v.into() }, input, builder);
123+
cast_conversion!(
124+
Float16Type,
125+
as_primitive,
126+
|v: f16| -> f32 { v.into() },
127+
input,
128+
builder
129+
);
114130
}
115131
DataType::Float32 => {
116132
primitive_conversion!(Float32Type, input, builder);
@@ -135,12 +151,67 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
135151
mod tests {
136152
use super::*;
137153
use arrow::array::{
138-
ArrayRef, Float16Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
139-
Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
154+
ArrayRef, Float16Array, Float32Array, Float64Array, GenericByteBuilder,
155+
GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array,
156+
UInt32Array, UInt64Array, UInt8Array,
140157
};
141158
use parquet_variant::{Variant, VariantDecimal16};
142159
use std::sync::Arc;
143160

161+
#[test]
162+
fn test_cast_to_variant_binary() {
163+
// BinaryType
164+
let mut builder = GenericByteBuilder::<BinaryType>::new();
165+
builder.append_value(b"hello");
166+
builder.append_value(b"");
167+
builder.append_null();
168+
builder.append_value(b"world");
169+
let binary_array = builder.finish();
170+
run_test(
171+
Arc::new(binary_array),
172+
vec![
173+
Some(Variant::Binary(b"hello")),
174+
Some(Variant::Binary(b"")),
175+
None,
176+
Some(Variant::Binary(b"world")),
177+
],
178+
);
179+
180+
// LargeBinaryType
181+
let mut builder = GenericByteBuilder::<LargeBinaryType>::new();
182+
builder.append_value(b"hello");
183+
builder.append_value(b"");
184+
builder.append_null();
185+
builder.append_value(b"world");
186+
let large_binary_array = builder.finish();
187+
run_test(
188+
Arc::new(large_binary_array),
189+
vec![
190+
Some(Variant::Binary(b"hello")),
191+
Some(Variant::Binary(b"")),
192+
None,
193+
Some(Variant::Binary(b"world")),
194+
],
195+
);
196+
197+
// BinaryViewType
198+
let mut builder = GenericByteViewBuilder::<BinaryViewType>::new();
199+
builder.append_value(b"hello");
200+
builder.append_value(b"");
201+
builder.append_null();
202+
builder.append_value(b"world");
203+
let byte_view_array = builder.finish();
204+
run_test(
205+
Arc::new(byte_view_array),
206+
vec![
207+
Some(Variant::Binary(b"hello")),
208+
Some(Variant::Binary(b"")),
209+
None,
210+
Some(Variant::Binary(b"world")),
211+
],
212+
);
213+
}
214+
144215
#[test]
145216
fn test_cast_to_variant_int8() {
146217
run_test(

0 commit comments

Comments
 (0)