Skip to content

Commit ff3dc75

Browse files
committed
Add support for arrays
1 parent d0980b1 commit ff3dc75

File tree

2 files changed

+144
-4
lines changed

2 files changed

+144
-4
lines changed

arrow-array/src/ffi.rs

Lines changed: 143 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,8 @@ pub unsafe fn export_array_into_raw(
140140
Ok(())
141141
}
142142

143-
// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
144-
// This is set by the Arrow specification
143+
/// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
144+
/// This is set by the Arrow specification
145145
fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
146146
if let Some(primitive) = data_type.primitive_width() {
147147
return match i {
@@ -180,6 +180,10 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
180180
| (DataType::List(_), 1)
181181
| (DataType::Map(_, _), 1) => i32::BITS as _,
182182
(DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
183+
// List views have two i32 buffers, offsets and sizes
184+
(DataType::ListView(_), 1) | (DataType::ListView(_), 2) => i32::BITS as _,
185+
// Large list views have two i64 buffers, offsets and sizes
186+
(DataType::LargeListView(_), 1) | (DataType::LargeListView(_), 2) => i64::BITS as _,
183187
(DataType::List(_), _) | (DataType::Map(_, _), _) => {
184188
return Err(ArrowError::CDataInterface(format!(
185189
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
@@ -351,6 +355,8 @@ impl ImportedArrowArray<'_> {
351355
DataType::List(field)
352356
| DataType::FixedSizeList(field, _)
353357
| DataType::LargeList(field)
358+
| DataType::ListView(field)
359+
| DataType::LargeListView(field)
354360
| DataType::Map(field, _) => Ok([self.consume_child(0, field.data_type())?].to_vec()),
355361
DataType::Struct(fields) => {
356362
assert!(fields.len() == self.array.num_children());
@@ -471,6 +477,14 @@ impl ImportedArrowArray<'_> {
471477
debug_assert_eq!(bits % 8, 0);
472478
(length + 1) * (bits / 8)
473479
}
480+
(DataType::ListView(_), 1)
481+
| (DataType::ListView(_), 2)
482+
| (DataType::LargeListView(_), 1)
483+
| (DataType::LargeListView(_), 2) => {
484+
let bits = bit_width(data_type, i)?;
485+
debug_assert_eq!(bits % 8, 0);
486+
length * (bits / 8)
487+
}
474488
(DataType::Utf8, 2) | (DataType::Binary, 2) => {
475489
if self.array.is_empty() {
476490
return Ok(0);
@@ -553,7 +567,7 @@ mod tests_to_then_from_ffi {
553567
use std::collections::HashMap;
554568
use std::mem::ManuallyDrop;
555569

556-
use arrow_buffer::NullBuffer;
570+
use arrow_buffer::{ArrowNativeType, NullBuffer};
557571
use arrow_schema::Field;
558572

559573
use crate::builder::UnionBuilder;
@@ -783,6 +797,71 @@ mod tests_to_then_from_ffi {
783797
test_generic_list::<i64>()
784798
}
785799

800+
fn test_generic_list_view<Offset: OffsetSizeTrait + ArrowNativeType>() -> Result<()> {
801+
// Construct a value array
802+
let value_data = ArrayData::builder(DataType::Int16)
803+
.len(8)
804+
.add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
805+
.build()
806+
.unwrap();
807+
808+
// Construct a buffer for value offsets, for the nested array:
809+
// [[0, 1, 2], [3, 4, 5], [6, 7]]
810+
let value_offsets = [0_usize, 3, 6]
811+
.iter()
812+
.map(|i| Offset::from_usize(*i).unwrap())
813+
.collect::<Buffer>();
814+
815+
let sizes_buffer = [3_usize, 3, 2]
816+
.iter()
817+
.map(|i| Offset::from_usize(*i).unwrap())
818+
.collect::<Buffer>();
819+
820+
// Construct a list array from the above two
821+
let list_view_dt = GenericListViewArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
822+
Field::new_list_field(DataType::Int16, false),
823+
));
824+
825+
let list_data = ArrayData::builder(list_view_dt)
826+
.len(3)
827+
.add_buffer(value_offsets)
828+
.add_buffer(sizes_buffer)
829+
.add_child_data(value_data)
830+
.build()
831+
.unwrap();
832+
833+
let original = GenericListViewArray::<Offset>::from(list_data.clone());
834+
835+
// export it
836+
let (array, schema) = to_ffi(&original.to_data())?;
837+
838+
// (simulate consumer) import it
839+
let data = unsafe { from_ffi(array, &schema) }?;
840+
let array = make_array(data);
841+
842+
// downcast
843+
let array = array
844+
.as_any()
845+
.downcast_ref::<GenericListViewArray<Offset>>()
846+
.unwrap();
847+
848+
assert_eq!(&array.value(0), &original.value(0));
849+
assert_eq!(&array.value(1), &original.value(1));
850+
assert_eq!(&array.value(2), &original.value(2));
851+
852+
Ok(())
853+
}
854+
855+
#[test]
856+
fn test_list_view() -> Result<()> {
857+
test_generic_list_view::<i32>()
858+
}
859+
860+
#[test]
861+
fn test_large_list_view() -> Result<()> {
862+
test_generic_list_view::<i64>()
863+
}
864+
786865
fn test_generic_binary<Offset: OffsetSizeTrait>() -> Result<()> {
787866
// create an array natively
788867
let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
@@ -1315,6 +1394,7 @@ mod tests_from_ffi {
13151394
use std::ptr::NonNull;
13161395
use std::sync::Arc;
13171396

1397+
use arrow_buffer::NullBuffer;
13181398
#[cfg(not(feature = "force_validate"))]
13191399
use arrow_buffer::{ScalarBuffer, bit_util, buffer::Buffer};
13201400
#[cfg(feature = "force_validate")]
@@ -1325,6 +1405,7 @@ mod tests_from_ffi {
13251405
use arrow_schema::{DataType, Field};
13261406

13271407
use super::Result;
1408+
13281409
use crate::builder::GenericByteViewBuilder;
13291410
use crate::types::{BinaryViewType, ByteViewType, Int32Type, StringViewType};
13301411
use crate::{
@@ -1528,6 +1609,65 @@ mod tests_from_ffi {
15281609
test_round_trip(&data)
15291610
}
15301611

1612+
#[test]
1613+
fn test_list_view() -> Result<()> {
1614+
// Construct a value array
1615+
let value_data = ArrayData::builder(DataType::Int16)
1616+
.len(8)
1617+
.add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
1618+
.build()
1619+
.unwrap();
1620+
1621+
// Construct a buffer for value offsets, for the nested array:
1622+
// [[0, 1, 2], [3, 4, 5], [6, 7]]
1623+
let value_offsets = Buffer::from(vec![0_i32, 3, 6]);
1624+
let sizes_buffer = Buffer::from(vec![3_i32, 3, 2]);
1625+
1626+
// Construct a list array from the above two
1627+
let list_view_dt =
1628+
DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, false)));
1629+
1630+
let list_view_data = ArrayData::builder(list_view_dt)
1631+
.len(3)
1632+
.add_buffer(value_offsets)
1633+
.add_buffer(sizes_buffer)
1634+
.add_child_data(value_data)
1635+
.build()
1636+
.unwrap();
1637+
1638+
test_round_trip(&list_view_data)
1639+
}
1640+
1641+
#[test]
1642+
fn test_list_view_with_nulls() -> Result<()> {
1643+
// Construct a value array
1644+
let value_data = ArrayData::builder(DataType::Int16)
1645+
.len(8)
1646+
.add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
1647+
.build()
1648+
.unwrap();
1649+
1650+
// Construct a buffer for value offsets, for the nested array:
1651+
// [[0, 1, 2], [3, 4, 5], [6, 7], null]
1652+
let value_offsets = Buffer::from(vec![0_i32, 3, 6, 8]);
1653+
let sizes_buffer = Buffer::from(vec![3_i32, 3, 2, 0]);
1654+
1655+
// Construct a list array from the above two
1656+
let list_view_dt =
1657+
DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, true)));
1658+
1659+
let list_view_data = ArrayData::builder(list_view_dt)
1660+
.len(4)
1661+
.add_buffer(value_offsets)
1662+
.add_buffer(sizes_buffer)
1663+
.add_child_data(value_data)
1664+
.nulls(Some(NullBuffer::from(vec![true, true, true, false])))
1665+
.build()
1666+
.unwrap();
1667+
1668+
test_round_trip(&list_view_data)
1669+
}
1670+
15311671
#[test]
15321672
#[cfg(not(feature = "force_validate"))]
15331673
fn test_empty_string_with_non_zero_offset() -> Result<()> {

arrow-data/src/data.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1783,7 +1783,7 @@ impl DataTypeLayout {
17831783
},
17841784
],
17851785
can_contain_null_mask: true,
1786-
variadic: true,
1786+
variadic: false,
17871787
}
17881788
}
17891789
}

0 commit comments

Comments
 (0)