Skip to content
2 changes: 1 addition & 1 deletion parquet-testing
Submodule parquet-testing updated 276 files
13 changes: 10 additions & 3 deletions parquet-variant-compute/src/variant_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@

//! [`VariantArray`] implementation

use crate::type_conversion::primitive_conversion_single_value;
use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray};
use arrow::buffer::NullBuffer;
use arrow::datatypes::{
Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
UInt32Type, UInt64Type, UInt8Type,
};
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
use parquet_variant::Uuid;
use parquet_variant::Variant;
use std::any::Any;
use std::sync::Arc;

use crate::type_conversion::primitive_conversion_single_value;

/// An array of Parquet [`Variant`] values
///
/// A [`VariantArray`] wraps an Arrow [`StructArray`] that stores the underlying
Expand Down Expand Up @@ -556,8 +556,15 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, '
let value = boolean_array.value(index);
Variant::from(value)
}
DataType::FixedSizeBinary(_) => {
DataType::FixedSizeBinary(binary_len) => {
let array = typed_value.as_fixed_size_binary();
// Try to treat 16 byte FixedSizeBinary as UUID
let value = array.value(index);
if *binary_len == 16 {
if let Ok(uuid) = Uuid::from_slice(value) {
return Variant::from(uuid);
}
}
let value = array.value(index);
Variant::from(value)
}
Expand Down
7 changes: 5 additions & 2 deletions parquet-variant/src/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
pub use self::list::VariantList;
pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES};
pub use self::object::VariantObject;

// Publically export types used in the API
pub use half::f16;
pub use uuid::Uuid;

use crate::decoder::{
self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
};
Expand All @@ -28,8 +33,6 @@ use std::ops::Deref;

use arrow_schema::ArrowError;
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
use half::f16;
use uuid::Uuid;

mod decimal;
mod list;
Expand Down
12 changes: 11 additions & 1 deletion parquet-variant/src/variant/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ impl VariantMetadataHeader {
/// [Variant Spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#metadata-encoding
#[derive(Debug, Clone, PartialEq)]
pub struct VariantMetadata<'m> {
/// (Only) the bytes that make up this metadata instance.
pub(crate) bytes: &'m [u8],
header: VariantMetadataHeader,
dictionary_size: u32,
Expand Down Expand Up @@ -332,7 +333,7 @@ impl<'m> VariantMetadata<'m> {
self.header.version
}

/// Gets an offset array entry by index.
/// Gets an offset into the dictionary entry by index.
///
/// This offset is an index into the dictionary, at the boundary between string `i-1` and string
/// `i`. See [`Self::get`] to retrieve a specific dictionary entry.
Expand All @@ -342,6 +343,15 @@ impl<'m> VariantMetadata<'m> {
self.header.offset_size.unpack_u32(bytes, i)
}

/// Returns the total size, in bytes, of the metadata.
///
/// Note this value may be smaller than what was passed to [`Self::new`] or
/// [`Self::try_new`] if the input was larger than necessary to encode the
/// metadata dictionary.
pub fn size(&self) -> usize {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I needed to expose this information because the variant metadata / data are appended in one .bin file in the test cases

self.bytes.len()
}

/// Attempts to retrieve a dictionary entry by index, failing if out of bounds or if the
/// underlying bytes are [invalid].
///
Expand Down
5 changes: 5 additions & 0 deletions parquet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ name = "encryption"
required-features = ["arrow"]
path = "./tests/encryption/mod.rs"

[[test]]
name = "variant_integration"
required-features = ["arrow", "variant_experimental", "serde"]
path = "./tests/variant_integration.rs"

[[bin]]
name = "parquet-read"
required-features = ["cli"]
Expand Down
Loading
Loading