Skip to content

Commit 8e4b034

Browse files
committed
[ADD] add support for path operations on different data types
1 parent de9d386 commit 8e4b034

File tree

3 files changed

+418
-88
lines changed

3 files changed

+418
-88
lines changed

parquet-variant-compute/src/field_operations.rs

Lines changed: 82 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -346,13 +346,38 @@ impl FieldOperations {
346346
Ok(Some(current_value))
347347
}
348348

349+
/// Get the value at a specific path and return its type and data
350+
pub fn get_path_with_type(
351+
metadata_bytes: &[u8],
352+
value_bytes: &[u8],
353+
path: &VariantPath,
354+
) -> Result<Option<(crate::variant_parser::VariantType, Vec<u8>)>, ArrowError> {
355+
if let Some(value_bytes) = Self::get_path_bytes(metadata_bytes, value_bytes, path)? {
356+
if !value_bytes.is_empty() {
357+
let variant_type = VariantParser::parse_variant_header(value_bytes[0])?;
358+
return Ok(Some((variant_type, value_bytes)));
359+
}
360+
}
361+
Ok(None)
362+
}
363+
349364
/// Get field bytes from an object at the byte level
350365
fn get_field_bytes(
351366
metadata_bytes: &[u8],
352367
value_bytes: &[u8],
353368
field_name: &str,
354369
) -> Result<Option<Vec<u8>>, ArrowError> {
355-
Self::extract_field_bytes(metadata_bytes, value_bytes, field_name)
370+
// Use the general dispatch parser to ensure we're dealing with an object
371+
if !value_bytes.is_empty() {
372+
match VariantParser::parse_variant_header(value_bytes[0])? {
373+
crate::variant_parser::VariantType::Object(_) => {
374+
Self::extract_field_bytes(metadata_bytes, value_bytes, field_name)
375+
}
376+
_ => Ok(None), // Not an object, can't extract fields
377+
}
378+
} else {
379+
Ok(None)
380+
}
356381
}
357382

358383
/// Get array element bytes at the byte level
@@ -361,72 +386,67 @@ impl FieldOperations {
361386
value_bytes: &[u8],
362387
index: usize,
363388
) -> Result<Option<Vec<u8>>, ArrowError> {
364-
// Check if this is an array
389+
// Use the general dispatch parser to ensure we're dealing with an array
365390
if value_bytes.is_empty() {
366391
return Ok(None);
367392
}
368393

369-
let header_byte = value_bytes[0];
370-
let basic_type = VariantParser::get_basic_type(header_byte);
371-
372-
// Only handle arrays (basic_type == 3 according to variant spec)
373-
if basic_type != 3 {
374-
return Ok(None);
375-
}
376-
377-
// Parse array header to get element count and offsets
378-
let array_header = VariantParser::parse_array_header(header_byte)?;
379-
let num_elements = VariantParser::unpack_int(
380-
&value_bytes[1..],
381-
array_header.num_elements_size
382-
)?;
383-
384-
// Check bounds
385-
if index >= num_elements {
386-
return Ok(None);
387-
}
388-
389-
// Calculate array offsets
390-
let offsets = VariantParser::calculate_array_offsets(&array_header, num_elements);
391-
392-
// Get element offset
393-
let element_offset_start = offsets.element_offsets_start + index * array_header.element_offset_size;
394-
let element_offset_end = element_offset_start + array_header.element_offset_size;
395-
396-
if element_offset_end > value_bytes.len() {
397-
return Err(ArrowError::InvalidArgumentError(
398-
"Element offset exceeds value buffer".to_string()
399-
));
400-
}
401-
402-
let element_offset = VariantParser::unpack_int(
403-
&value_bytes[element_offset_start..element_offset_end],
404-
array_header.element_offset_size
405-
)?;
406-
407-
// Get next element offset (or end of data)
408-
let next_offset = if index + 1 < num_elements {
409-
let next_element_offset_start = offsets.element_offsets_start + (index + 1) * array_header.element_offset_size;
410-
let next_element_offset_end = next_element_offset_start + array_header.element_offset_size;
411-
VariantParser::unpack_int(
412-
&value_bytes[next_element_offset_start..next_element_offset_end],
413-
array_header.element_offset_size
414-
)?
415-
} else {
416-
value_bytes.len()
417-
};
418-
419-
// Extract element bytes
420-
let element_start = offsets.elements_start + element_offset;
421-
let element_end = offsets.elements_start + next_offset;
422-
423-
if element_end > value_bytes.len() {
424-
return Err(ArrowError::InvalidArgumentError(
425-
"Element data exceeds value buffer".to_string()
426-
));
394+
match VariantParser::parse_variant_header(value_bytes[0])? {
395+
crate::variant_parser::VariantType::Array(array_header) => {
396+
let num_elements = VariantParser::unpack_int(
397+
&value_bytes[1..],
398+
array_header.num_elements_size
399+
)?;
400+
401+
// Check bounds
402+
if index >= num_elements {
403+
return Ok(None);
404+
}
405+
406+
// Calculate array offsets
407+
let offsets = VariantParser::calculate_array_offsets(&array_header, num_elements);
408+
409+
// Get element offset
410+
let element_offset_start = offsets.element_offsets_start + index * array_header.element_offset_size;
411+
let element_offset_end = element_offset_start + array_header.element_offset_size;
412+
413+
if element_offset_end > value_bytes.len() {
414+
return Err(ArrowError::InvalidArgumentError(
415+
"Element offset exceeds value buffer".to_string()
416+
));
417+
}
418+
419+
let element_offset = VariantParser::unpack_int(
420+
&value_bytes[element_offset_start..element_offset_end],
421+
array_header.element_offset_size
422+
)?;
423+
424+
// Get next element offset (or end of data)
425+
let next_offset = if index + 1 < num_elements {
426+
let next_element_offset_start = offsets.element_offsets_start + (index + 1) * array_header.element_offset_size;
427+
let next_element_offset_end = next_element_offset_start + array_header.element_offset_size;
428+
VariantParser::unpack_int(
429+
&value_bytes[next_element_offset_start..next_element_offset_end],
430+
array_header.element_offset_size
431+
)?
432+
} else {
433+
value_bytes.len()
434+
};
435+
436+
// Extract element bytes
437+
let element_start = offsets.elements_start + element_offset;
438+
let element_end = offsets.elements_start + next_offset;
439+
440+
if element_end > value_bytes.len() {
441+
return Err(ArrowError::InvalidArgumentError(
442+
"Element data exceeds value buffer".to_string()
443+
));
444+
}
445+
446+
Ok(Some(value_bytes[element_start..element_end].to_vec()))
447+
}
448+
_ => Ok(None), // Not an array, can't extract elements
427449
}
428-
429-
Ok(Some(value_bytes[element_start..element_end].to_vec()))
430450
}
431451
}
432452

parquet-variant-compute/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,6 @@ pub mod to_json;
2727
pub use variant_array::VariantArray;
2828
pub use variant_array_builder::VariantArrayBuilder;
2929
pub use field_operations::{VariantPath, VariantPathElement};
30+
pub use variant_parser::{VariantType, PrimitiveType, ShortStringHeader, ObjectHeader, ArrayHeader};
3031
pub use from_json::batch_json_string_to_variant;
3132
pub use to_json::batch_variant_to_json_string;

0 commit comments

Comments
 (0)