Skip to content

Commit bfdc31b

Browse files
scovichalamb
andauthored
[Variant] Add nullable arg to StructArrayBuilder::with_field (#8342)
# Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Closes #NNN. # Rationale for this change `StructArrayBuilder::with_field` is currently hard-wired to assume the field will be nullable. This is unhelpful when adding non-nullable fields such as variant a `metadata` column. # What changes are included in this PR? Add a third parameter, `nullable`. # Are these changes tested? The builder is heavily used by unit tests, which have been adjusted to pass the new param. # Are there any user-facing changes? No. Co-authored-by: Andrew Lamb <[email protected]>
1 parent 20ccf5f commit bfdc31b

File tree

2 files changed

+49
-84
lines changed

2 files changed

+49
-84
lines changed

parquet-variant-compute/src/variant_array.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,12 @@ impl VariantArray {
140140
nulls: Option<NullBuffer>,
141141
) -> Self {
142142
let mut builder =
143-
StructArrayBuilder::new().with_field("metadata", Arc::new(metadata.clone()));
143+
StructArrayBuilder::new().with_field("metadata", Arc::new(metadata.clone()), false);
144144
if let Some(value) = value.clone() {
145-
builder = builder.with_field("value", Arc::new(value));
145+
builder = builder.with_field("value", Arc::new(value), true);
146146
}
147147
if let Some(typed_value) = typed_value.clone() {
148-
builder = builder.with_field("typed_value", typed_value);
148+
builder = builder.with_field("typed_value", typed_value, true);
149149
}
150150
if let Some(nulls) = nulls {
151151
builder = builder.with_nulls(nulls);
@@ -564,8 +564,8 @@ impl StructArrayBuilder {
564564
}
565565

566566
/// Add an array to this struct array as a field with the specified name.
567-
pub fn with_field(mut self, field_name: &str, array: ArrayRef) -> Self {
568-
let field = Field::new(field_name, array.data_type().clone(), true);
567+
pub fn with_field(mut self, field_name: &str, array: ArrayRef, nullable: bool) -> Self {
568+
let field = Field::new(field_name, array.data_type().clone(), nullable);
569569
self.fields.push(Arc::new(field));
570570
self.arrays.push(array);
571571
self

parquet-variant-compute/src/variant_get.rs

Lines changed: 44 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,8 @@ mod test {
307307
use parquet_variant::{Variant, VariantPath};
308308

309309
use crate::json_to_variant;
310-
use crate::{variant_array::ShreddedVariantFieldArray, VariantArray};
310+
use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
311+
use crate::VariantArray;
311312

312313
use super::{variant_get, GetOptions};
313314

@@ -692,8 +693,8 @@ mod test {
692693
]);
693694

694695
let struct_array = StructArrayBuilder::new()
695-
.with_field("metadata", Arc::new(metadata))
696-
.with_field("typed_value", Arc::new(typed_value))
696+
.with_field("metadata", Arc::new(metadata), false)
697+
.with_field("typed_value", Arc::new(typed_value), true)
697698
.build();
698699

699700
Arc::new(
@@ -821,9 +822,9 @@ mod test {
821822
]);
822823

823824
let struct_array = StructArrayBuilder::new()
824-
.with_field("metadata", Arc::new(metadata))
825-
.with_field("typed_value", Arc::new(typed_value))
826-
.with_field("value", Arc::new(values))
825+
.with_field("metadata", Arc::new(metadata), false)
826+
.with_field("typed_value", Arc::new(typed_value), true)
827+
.with_field("value", Arc::new(values), true)
827828
.with_nulls(nulls)
828829
.build();
829830

@@ -926,9 +927,9 @@ mod test {
926927
]);
927928

928929
let struct_array = StructArrayBuilder::new()
929-
.with_field("metadata", Arc::new(metadata))
930-
.with_field("typed_value", Arc::new(typed_value))
931-
.with_field("value", Arc::new(values))
930+
.with_field("metadata", Arc::new(metadata), true)
931+
.with_field("typed_value", Arc::new(typed_value), true)
932+
.with_field("value", Arc::new(values), true)
932933
.with_nulls(nulls)
933934
.build();
934935

@@ -937,45 +938,6 @@ mod test {
937938
)
938939
}
939940

940-
/// Builds struct arrays from component fields
941-
///
942-
/// TODO: move to arrow crate
943-
#[derive(Debug, Default, Clone)]
944-
struct StructArrayBuilder {
945-
fields: Vec<FieldRef>,
946-
arrays: Vec<ArrayRef>,
947-
nulls: Option<NullBuffer>,
948-
}
949-
950-
impl StructArrayBuilder {
951-
fn new() -> Self {
952-
Default::default()
953-
}
954-
955-
/// Add an array to this struct array as a field with the specified name.
956-
fn with_field(mut self, field_name: &str, array: ArrayRef) -> Self {
957-
let field = Field::new(field_name, array.data_type().clone(), true);
958-
self.fields.push(Arc::new(field));
959-
self.arrays.push(array);
960-
self
961-
}
962-
963-
/// Set the null buffer for this struct array.
964-
fn with_nulls(mut self, nulls: NullBuffer) -> Self {
965-
self.nulls = Some(nulls);
966-
self
967-
}
968-
969-
pub fn build(self) -> StructArray {
970-
let Self {
971-
fields,
972-
arrays,
973-
nulls,
974-
} = self;
975-
StructArray::new(Fields::from(fields), arrays, nulls)
976-
}
977-
}
978-
979941
/// Return a VariantArray that represents an "all null" variant
980942
/// for the following example (3 null values):
981943
///
@@ -1005,7 +967,7 @@ mod test {
1005967
let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
1006968

1007969
let struct_array = StructArrayBuilder::new()
1008-
.with_field("metadata", Arc::new(metadata))
970+
.with_field("metadata", Arc::new(metadata), false)
1009971
.with_nulls(nulls)
1010972
.build();
1011973

@@ -1096,8 +1058,8 @@ mod test {
10961058
let x_field_typed_value = Int32Array::from(vec![Some(1), Some(42)]);
10971059

10981060
// For perfect shredding of the x field, no "value" column, only typed_value
1099-
let x_field_struct = crate::variant_array::StructArrayBuilder::new()
1100-
.with_field("typed_value", Arc::new(x_field_typed_value))
1061+
let x_field_struct = StructArrayBuilder::new()
1062+
.with_field("typed_value", Arc::new(x_field_typed_value), true)
11011063
.build();
11021064

11031065
// Wrap the x field struct in a ShreddedVariantFieldArray
@@ -1118,10 +1080,10 @@ mod test {
11181080
.unwrap();
11191081

11201082
// Create the main VariantArray
1121-
let main_struct = crate::variant_array::StructArrayBuilder::new()
1122-
.with_field("metadata", Arc::new(metadata_array))
1123-
.with_field("value", Arc::new(value_array))
1124-
.with_field("typed_value", Arc::new(typed_value_struct))
1083+
let main_struct = StructArrayBuilder::new()
1084+
.with_field("metadata", Arc::new(metadata_array), false)
1085+
.with_field("value", Arc::new(value_array), true)
1086+
.with_field("typed_value", Arc::new(typed_value_struct), true)
11251087
.build();
11261088

11271089
Arc::new(VariantArray::try_new(Arc::new(main_struct)).expect("should create variant array"))
@@ -1476,8 +1438,8 @@ mod test {
14761438
let x_field_typed_value = Int32Array::from(vec![Some(42), None]);
14771439

14781440
// For the x field, only typed_value (perfect shredding when possible)
1479-
let x_field_struct = crate::variant_array::StructArrayBuilder::new()
1480-
.with_field("typed_value", Arc::new(x_field_typed_value))
1441+
let x_field_struct = StructArrayBuilder::new()
1442+
.with_field("typed_value", Arc::new(x_field_typed_value), true)
14811443
.build();
14821444

14831445
let x_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(x_field_struct))
@@ -1494,10 +1456,10 @@ mod test {
14941456
.unwrap();
14951457

14961458
// Build final VariantArray
1497-
let struct_array = crate::variant_array::StructArrayBuilder::new()
1498-
.with_field("metadata", Arc::new(metadata_array))
1499-
.with_field("value", Arc::new(value_array))
1500-
.with_field("typed_value", Arc::new(typed_value_struct))
1459+
let struct_array = StructArrayBuilder::new()
1460+
.with_field("metadata", Arc::new(metadata_array), false)
1461+
.with_field("value", Arc::new(value_array), true)
1462+
.with_field("typed_value", Arc::new(typed_value_struct), true)
15011463
.build();
15021464

15031465
Arc::new(VariantArray::try_new(Arc::new(struct_array)).expect("should create VariantArray"))
@@ -1555,8 +1517,8 @@ mod test {
15551517
// Create the nested shredded structure
15561518
// Level 2: x field (the deepest level)
15571519
let x_typed_value = Int32Array::from(vec![Some(55), None]);
1558-
let x_field_struct = crate::variant_array::StructArrayBuilder::new()
1559-
.with_field("typed_value", Arc::new(x_typed_value))
1520+
let x_field_struct = StructArrayBuilder::new()
1521+
.with_field("typed_value", Arc::new(x_typed_value), true)
15601522
.build();
15611523
let x_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(x_field_struct))
15621524
.expect("should create ShreddedVariantFieldArray for x");
@@ -1582,15 +1544,16 @@ mod test {
15821544
x_field_shredded.data_type().clone(),
15831545
true,
15841546
)]);
1585-
let a_inner_struct = crate::variant_array::StructArrayBuilder::new()
1547+
let a_inner_struct = StructArrayBuilder::new()
15861548
.with_field(
15871549
"typed_value",
15881550
Arc::new(
15891551
StructArray::try_new(a_inner_fields, vec![Arc::new(x_field_shredded)], None)
15901552
.unwrap(),
15911553
),
1554+
true,
15921555
)
1593-
.with_field("value", Arc::new(a_value_array))
1556+
.with_field("value", Arc::new(a_value_array), true)
15941557
.build();
15951558
let a_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(a_inner_struct))
15961559
.expect("should create ShreddedVariantFieldArray for a");
@@ -1606,10 +1569,10 @@ mod test {
16061569
.unwrap();
16071570

16081571
// Build final VariantArray
1609-
let struct_array = crate::variant_array::StructArrayBuilder::new()
1610-
.with_field("metadata", Arc::new(metadata_array))
1611-
.with_field("value", Arc::new(value_array))
1612-
.with_field("typed_value", Arc::new(typed_value_struct))
1572+
let struct_array = StructArrayBuilder::new()
1573+
.with_field("metadata", Arc::new(metadata_array), false)
1574+
.with_field("value", Arc::new(value_array), true)
1575+
.with_field("typed_value", Arc::new(typed_value_struct), true)
16131576
.build();
16141577

16151578
Arc::new(VariantArray::try_new(Arc::new(struct_array)).expect("should create VariantArray"))
@@ -1660,8 +1623,8 @@ mod test {
16601623

16611624
// Level 3: x field (deepest level)
16621625
let x_typed_value = Int32Array::from(vec![Some(100), None, None]);
1663-
let x_field_struct = crate::variant_array::StructArrayBuilder::new()
1664-
.with_field("typed_value", Arc::new(x_typed_value))
1626+
let x_field_struct = StructArrayBuilder::new()
1627+
.with_field("typed_value", Arc::new(x_typed_value), true)
16651628
.build();
16661629
let x_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(x_field_struct))
16671630
.expect("should create ShreddedVariantFieldArray for x");
@@ -1685,15 +1648,16 @@ mod test {
16851648
x_field_shredded.data_type().clone(),
16861649
true,
16871650
)]);
1688-
let b_inner_struct = crate::variant_array::StructArrayBuilder::new()
1651+
let b_inner_struct = StructArrayBuilder::new()
16891652
.with_field(
16901653
"typed_value",
16911654
Arc::new(
16921655
StructArray::try_new(b_inner_fields, vec![Arc::new(x_field_shredded)], None)
16931656
.unwrap(),
16941657
),
1658+
true,
16951659
)
1696-
.with_field("value", Arc::new(b_value_array))
1660+
.with_field("value", Arc::new(b_value_array), true)
16971661
.build();
16981662
let b_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(b_inner_struct))
16991663
.expect("should create ShreddedVariantFieldArray for b");
@@ -1717,15 +1681,16 @@ mod test {
17171681
b_field_shredded.data_type().clone(),
17181682
true,
17191683
)]);
1720-
let a_inner_struct = crate::variant_array::StructArrayBuilder::new()
1684+
let a_inner_struct = StructArrayBuilder::new()
17211685
.with_field(
17221686
"typed_value",
17231687
Arc::new(
17241688
StructArray::try_new(a_inner_fields, vec![Arc::new(b_field_shredded)], None)
17251689
.unwrap(),
17261690
),
1691+
true,
17271692
)
1728-
.with_field("value", Arc::new(a_value_array))
1693+
.with_field("value", Arc::new(a_value_array), true)
17291694
.build();
17301695
let a_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(a_inner_struct))
17311696
.expect("should create ShreddedVariantFieldArray for a");
@@ -1741,10 +1706,10 @@ mod test {
17411706
.unwrap();
17421707

17431708
// Build final VariantArray
1744-
let struct_array = crate::variant_array::StructArrayBuilder::new()
1745-
.with_field("metadata", Arc::new(metadata_array))
1746-
.with_field("value", Arc::new(value_array))
1747-
.with_field("typed_value", Arc::new(typed_value_struct))
1709+
let struct_array = StructArrayBuilder::new()
1710+
.with_field("metadata", Arc::new(metadata_array), false)
1711+
.with_field("value", Arc::new(value_array), true)
1712+
.with_field("typed_value", Arc::new(typed_value_struct), true)
17481713
.build();
17491714

17501715
Arc::new(VariantArray::try_new(Arc::new(struct_array)).expect("should create VariantArray"))

0 commit comments

Comments
 (0)