Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions parquet-variant-compute/src/variant_get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ mod test {
use arrow::buffer::NullBuffer;
use arrow::compute::CastOptions;
use arrow_schema::{DataType, Field, FieldRef, Fields};
use parquet_variant::{Variant, VariantPath};
use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES};

use crate::json_to_variant;
use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
Expand Down Expand Up @@ -701,8 +701,10 @@ mod test {
fn $func() -> ArrayRef {
// At the time of writing, the `VariantArrayBuilder` does not support shredding.
// so we must construct the array manually. see https://github.com/apache/arrow-rs/issues/7895
let (metadata, _value) = { parquet_variant::VariantBuilder::new().finish() };
let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
EMPTY_VARIANT_METADATA_BYTES,
3,
));
let typed_value = $array_type::from(vec![
Some(<$primitive_type>::try_from(1u8).unwrap()),
Some(<$primitive_type>::try_from(2u8).unwrap()),
Expand Down Expand Up @@ -1032,16 +1034,15 @@ mod test {
/// }
/// ```
fn all_null_variant_array() -> ArrayRef {
let (metadata, _value) = { parquet_variant::VariantBuilder::new().finish() };

let nulls = NullBuffer::from(vec![
false, // row 0 is null
false, // row 1 is null
false, // row 2 is null
]);

// metadata is the same for all rows (though they're all null)
let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
let metadata =
BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3));

let struct_array = StructArrayBuilder::new()
.with_field("metadata", Arc::new(metadata), false)
Expand Down Expand Up @@ -2502,8 +2503,8 @@ mod test {
.build();

// Build final VariantArray with top-level nulls
let (metadata, _) = parquet_variant::VariantBuilder::new().finish();
let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
let metadata_array =
BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 4));
let nulls = NullBuffer::from(vec![
true, // row 0: inner struct exists with typed_value=42
true, // row 1: inner field NULL
Expand Down
2 changes: 1 addition & 1 deletion parquet-variant/src/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
pub use self::list::VariantList;
pub use self::metadata::VariantMetadata;
pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES};
pub use self::object::VariantObject;
use crate::decoder::{
self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
Expand Down
33 changes: 33 additions & 0 deletions parquet-variant/src/variant/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,39 @@ pub struct VariantMetadata<'m> {
// could increase the size of Variant. All those size increases could hurt performance.
const _: () = crate::utils::expect_size_of::<VariantMetadata>(32);

/// The canonical byte slice corresponding to an empty metadata dictionary.
///
/// ```
/// # use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, VariantMetadata, WritableMetadataBuilder};
/// let mut metadata_builder = WritableMetadataBuilder::default();
/// metadata_builder.finish();
/// let metadata_bytes = metadata_builder.into_inner();
/// assert_eq!(&metadata_bytes, EMPTY_VARIANT_METADATA_BYTES);
/// ```
pub const EMPTY_VARIANT_METADATA_BYTES: &[u8] = &[1, 0, 0];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


/// The empty metadata dictionary.
///
/// ```
/// # use parquet_variant::{EMPTY_VARIANT_METADATA, VariantMetadata, WritableMetadataBuilder};
/// let mut metadata_builder = WritableMetadataBuilder::default();
/// metadata_builder.finish();
/// let metadata_bytes = metadata_builder.into_inner();
/// let empty_metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
/// assert_eq!(empty_metadata, EMPTY_VARIANT_METADATA);
/// ```
pub const EMPTY_VARIANT_METADATA: VariantMetadata = VariantMetadata {
bytes: EMPTY_VARIANT_METADATA_BYTES,
header: VariantMetadataHeader {
version: CORRECT_VERSION_VALUE,
is_sorted: false,
offset_size: OffsetSizeBytes::One,
},
dictionary_size: 0,
first_value_byte: 3,
validated: true,
};

impl<'m> VariantMetadata<'m> {
/// Attempts to interpret `bytes` as a variant metadata instance, with full [validation] of all
/// dictionary entries.
Expand Down
Loading