Skip to content

Commit 89b4b13

Browse files
authored
[Variant] Minor code cleanups (#8356)
# Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Closes #NNN. # Rationale for this change Small code cleanups that accumulated during other work. # What changes are included in this PR? Now that `VariantToArrowRowBuilder` is an enum instead of a dyn trait, its `finish` method can take `self` instead of `&mut self`, which simplifies both its semantics and its call sites. Additionally, pass the input length to the row builders so they can reserve capacity accordingly. # Are these changes tested? Existing unit tests cover this code # Are there any user-facing changes? No
1 parent 477d69e commit 89b4b13

File tree

2 files changed

+39
-18
lines changed

2 files changed

+39
-18
lines changed

parquet-variant-compute/src/variant_get.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ fn shredded_get_path(
135135
let shred_basic_variant =
136136
|target: VariantArray, path: VariantPath<'_>, as_field: Option<&Field>| {
137137
let as_type = as_field.map(|f| f.data_type());
138-
let mut builder = make_variant_to_arrow_row_builder(path, as_type, cast_options)?;
138+
let mut builder =
139+
make_variant_to_arrow_row_builder(path, as_type, cast_options, target.len())?;
139140
for i in 0..target.len() {
140141
if target.is_null(i) {
141142
builder.append_null()?;

parquet-variant-compute/src/variant_to_arrow.rs

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ impl<'a> VariantToArrowRowBuilder<'a> {
7676
}
7777
}
7878

79-
pub fn finish(&mut self) -> Result<ArrayRef> {
79+
pub fn finish(self) -> Result<ArrayRef> {
8080
use VariantToArrowRowBuilder::*;
8181
match self {
8282
Int8(b) => b.finish(),
@@ -97,19 +97,41 @@ pub(crate) fn make_variant_to_arrow_row_builder<'a>(
9797
path: VariantPath<'a>,
9898
data_type: Option<&'a DataType>,
9999
cast_options: &'a CastOptions,
100+
capacity: usize,
100101
) -> Result<VariantToArrowRowBuilder<'a>> {
101102
use VariantToArrowRowBuilder::*;
102103

103104
let mut builder = match data_type {
104105
// If no data type was requested, build an unshredded VariantArray.
105-
None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(16)),
106-
Some(DataType::Int8) => Int8(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
107-
Some(DataType::Int16) => Int16(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
108-
Some(DataType::Int32) => Int32(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
109-
Some(DataType::Int64) => Int64(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
110-
Some(DataType::Float16) => Float16(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
111-
Some(DataType::Float32) => Float32(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
112-
Some(DataType::Float64) => Float64(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
106+
None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(capacity)),
107+
Some(DataType::Int8) => Int8(VariantToPrimitiveArrowRowBuilder::new(
108+
cast_options,
109+
capacity,
110+
)),
111+
Some(DataType::Int16) => Int16(VariantToPrimitiveArrowRowBuilder::new(
112+
cast_options,
113+
capacity,
114+
)),
115+
Some(DataType::Int32) => Int32(VariantToPrimitiveArrowRowBuilder::new(
116+
cast_options,
117+
capacity,
118+
)),
119+
Some(DataType::Int64) => Int64(VariantToPrimitiveArrowRowBuilder::new(
120+
cast_options,
121+
capacity,
122+
)),
123+
Some(DataType::Float16) => Float16(VariantToPrimitiveArrowRowBuilder::new(
124+
cast_options,
125+
capacity,
126+
)),
127+
Some(DataType::Float32) => Float32(VariantToPrimitiveArrowRowBuilder::new(
128+
cast_options,
129+
capacity,
130+
)),
131+
Some(DataType::Float64) => Float64(VariantToPrimitiveArrowRowBuilder::new(
132+
cast_options,
133+
capacity,
134+
)),
113135
_ => {
114136
return Err(ArrowError::NotYetImplemented(format!(
115137
"variant_get with path={:?} and data_type={:?} not yet implemented",
@@ -150,7 +172,7 @@ impl<'a> VariantPathRowBuilder<'a> {
150172
}
151173
}
152174

153-
fn finish(&mut self) -> Result<ArrayRef> {
175+
fn finish(self) -> Result<ArrayRef> {
154176
self.builder.finish()
155177
}
156178
}
@@ -180,9 +202,9 @@ pub(crate) struct VariantToPrimitiveArrowRowBuilder<'a, T: ArrowPrimitiveType> {
180202
}
181203

182204
impl<'a, T: ArrowPrimitiveType> VariantToPrimitiveArrowRowBuilder<'a, T> {
183-
fn new(cast_options: &'a CastOptions<'a>) -> Self {
205+
fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
184206
Self {
185-
builder: PrimitiveBuilder::<T>::new(),
207+
builder: PrimitiveBuilder::<T>::with_capacity(capacity),
186208
cast_options,
187209
}
188210
}
@@ -217,7 +239,7 @@ where
217239
}
218240
}
219241

220-
fn finish(&mut self) -> Result<ArrayRef> {
242+
fn finish(mut self) -> Result<ArrayRef> {
221243
Ok(Arc::new(self.builder.finish()))
222244
}
223245
}
@@ -253,9 +275,7 @@ impl VariantToBinaryVariantArrowRowBuilder {
253275
Ok(true)
254276
}
255277

256-
fn finish(&mut self) -> Result<ArrayRef> {
257-
// VariantArrayBuilder::build takes ownership, so we need to replace it
258-
let builder = std::mem::replace(&mut self.builder, VariantArrayBuilder::new(0));
259-
Ok(Arc::new(builder.build()))
278+
fn finish(self) -> Result<ArrayRef> {
279+
Ok(Arc::new(self.builder.build()))
260280
}
261281
}

0 commit comments

Comments
 (0)