diff --git a/rust/arrow/examples/dynamic_types.rs b/rust/arrow/examples/dynamic_types.rs index 95e0a2831e3..58e41560e23 100644 --- a/rust/arrow/examples/dynamic_types.rs +++ b/rust/arrow/examples/dynamic_types.rs @@ -95,7 +95,7 @@ fn process(batch: &RecordBatch) { Arc::new(projected_schema), vec![ id.clone(), // NOTE: this is cloning the Arc not the array data - Arc::new(Float64Array::from(nested_c.data())), + Arc::new(Float64Array::from(nested_c.data().clone())), ], ); } diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs index 23ac173fadb..63d41dffd82 100644 --- a/rust/arrow/src/array/array.rs +++ b/rust/arrow/src/array/array.rs @@ -19,7 +19,6 @@ use std::fmt; use std::sync::Arc; use std::{any::Any, convert::TryFrom}; -use super::ArrayDataRef; use super::*; use crate::array::equal_json::JsonEqual; use crate::buffer::{Buffer, MutableBuffer}; @@ -57,11 +56,13 @@ pub trait Array: fmt::Debug + Send + Sync + JsonEqual { /// ``` fn as_any(&self) -> &Any; - /// Returns a reference-counted pointer to the underlying data of this array. - fn data(&self) -> ArrayDataRef; + /// Returns a reference to the underlying data of this array. + fn data(&self) -> &ArrayData; - /// Returns a borrowed & reference-counted pointer to the underlying data of this array. - fn data_ref(&self) -> &ArrayDataRef; + /// Returns a reference-counted pointer to the underlying data of this array. + fn data_ref(&self) -> &ArrayData { + self.data() + } /// Returns a reference to the [`DataType`](crate::datatypes::DataType) of this array. /// @@ -93,7 +94,7 @@ pub trait Array: fmt::Debug + Send + Sync + JsonEqual { /// assert_eq!(array_slice.as_ref(), &Int32Array::from(vec![2, 3, 4])); /// ``` fn slice(&self, offset: usize, length: usize) -> ArrayRef { - make_array(Arc::new(self.data_ref().as_ref().slice(offset, length))) + make_array(self.data_ref().slice(offset, length)) } /// Returns the length (i.e., number of elements) of this array. @@ -206,7 +207,7 @@ pub trait Array: fmt::Debug + Send + Sync + JsonEqual { fn to_raw( &self, ) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> { - let data = self.data().as_ref().clone(); + let data = self.data().clone(); let array = ffi::ArrowArray::try_from(data)?; Ok(ffi::ArrowArray::into_raw(array)) } @@ -217,7 +218,7 @@ pub type ArrayRef = Arc; /// Constructs an array using the input `data`. /// Returns a reference-counted `Array` instance. -pub fn make_array(data: ArrayDataRef) -> ArrayRef { +pub fn make_array(data: ArrayData) -> ArrayRef { match data.data_type() { DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef, DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef, @@ -325,7 +326,7 @@ pub fn make_array(data: ArrayDataRef) -> ArrayRef { /// Creates a new empty array pub fn new_empty_array(data_type: &DataType) -> ArrayRef { let data = ArrayData::new_empty(data_type); - make_array(Arc::new(data)) + make_array(data) } /// Creates a new array of `data_type` of length `length` filled entirely of `NULL` values pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef { @@ -334,7 +335,7 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef { DataType::Null => Arc::new(NullArray::new(length)), DataType::Boolean => { let null_buf: Buffer = MutableBuffer::new_null(length).into(); - make_array(Arc::new(ArrayData::new( + make_array(ArrayData::new( data_type.clone(), length, Some(length), @@ -342,7 +343,7 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef { 0, vec![null_buf], vec![], - ))) + )) } DataType::Int8 => new_null_sized_array::(data_type, length), DataType::UInt8 => new_null_sized_array::(data_type, length), @@ -371,7 +372,7 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef { new_null_sized_array::(data_type, length) } }, - DataType::FixedSizeBinary(value_len) => make_array(Arc::new(ArrayData::new( + DataType::FixedSizeBinary(value_len) => make_array(ArrayData::new( data_type.clone(), length, Some(length), @@ -379,7 +380,7 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef { 0, vec![Buffer::from(vec![0u8; *value_len as usize * length])], vec![], - ))), + )), DataType::Binary | DataType::Utf8 => { new_null_binary_array::(data_type, length) } @@ -392,21 +393,20 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef { DataType::LargeList(field) => { new_null_list_array::(data_type, field.data_type(), length) } - DataType::FixedSizeList(field, value_len) => { - make_array(Arc::new(ArrayData::new( - data_type.clone(), - length, - Some(length), - Some(MutableBuffer::new_null(length).into()), - 0, - vec![], - vec![ - new_null_array(field.data_type(), *value_len as usize * length) - .data(), - ], - ))) - } - DataType::Struct(fields) => make_array(Arc::new(ArrayData::new( + DataType::FixedSizeList(field, value_len) => make_array(ArrayData::new( + data_type.clone(), + length, + Some(length), + Some(MutableBuffer::new_null(length).into()), + 0, + vec![], + vec![ + new_null_array(field.data_type(), *value_len as usize * length) + .data() + .clone(), + ], + )), + DataType::Struct(fields) => make_array(ArrayData::new( data_type.clone(), length, Some(length), @@ -415,22 +415,22 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef { vec![], fields .iter() - .map(|field| Arc::new(ArrayData::new_empty(field.data_type()))) + .map(|field| ArrayData::new_empty(field.data_type())) .collect(), - ))), + )), DataType::Union(_) => { unimplemented!("Creating null Union array not yet supported") } DataType::Dictionary(_, value) => { - make_array(Arc::new(ArrayData::new( + make_array(ArrayData::new( data_type.clone(), length, Some(length), Some(MutableBuffer::new_null(length).into()), 0, vec![MutableBuffer::new(0).into()], // values are empty - vec![new_empty_array(value.as_ref()).data()], - ))) + vec![new_empty_array(value.as_ref()).data().clone()], + )) } DataType::Decimal(_, _) => { unimplemented!("Creating null Decimal array not yet supported") @@ -444,7 +444,7 @@ fn new_null_list_array( child_data_type: &DataType, length: usize, ) -> ArrayRef { - make_array(Arc::new(ArrayData::new( + make_array(ArrayData::new( data_type.clone(), length, Some(length), @@ -453,8 +453,8 @@ fn new_null_list_array( vec![Buffer::from( vec![OffsetSize::zero(); length + 1].to_byte_slice(), )], - vec![Arc::new(ArrayData::new_empty(child_data_type))], - ))) + vec![ArrayData::new_empty(child_data_type)], + )) } #[inline] @@ -462,7 +462,7 @@ fn new_null_binary_array( data_type: &DataType, length: usize, ) -> ArrayRef { - make_array(Arc::new(ArrayData::new( + make_array(ArrayData::new( data_type.clone(), length, Some(length), @@ -473,7 +473,7 @@ fn new_null_binary_array( MutableBuffer::new(0).into(), ], vec![], - ))) + )) } #[inline] @@ -481,7 +481,7 @@ fn new_null_sized_array( data_type: &DataType, length: usize, ) -> ArrayRef { - make_array(Arc::new(ArrayData::new( + make_array(ArrayData::new( data_type.clone(), length, Some(length), @@ -489,7 +489,7 @@ fn new_null_sized_array( 0, vec![Buffer::from(vec![0u8; length * T::get_byte_width()])], vec![], - ))) + )) } /// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface @@ -501,7 +501,7 @@ pub unsafe fn make_array_from_raw( schema: *const ffi::FFI_ArrowSchema, ) -> Result { let array = ffi::ArrowArray::try_from_raw(array, schema)?; - let data = Arc::new(ArrayData::try_from(array)?); + let data = ArrayData::try_from(array)?; Ok(make_array(data)) } // Helper function for printing potentially long arrays. diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs index efd07f83d17..b5fdd616277 100644 --- a/rust/arrow/src/array/array_binary.rs +++ b/rust/arrow/src/array/array_binary.rs @@ -15,16 +15,13 @@ // specific language governing permissions and limitations // under the License. +use std::convert::{From, TryInto}; use std::fmt; use std::mem; use std::{any::Any, iter::FromIterator}; -use std::{ - convert::{From, TryInto}, - sync::Arc, -}; use super::{ - array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, ArrayDataRef, + array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait, }; use crate::buffer::Buffer; @@ -47,7 +44,7 @@ impl BinaryOffsetSizeTrait for i64 { } pub struct GenericBinaryArray { - data: ArrayDataRef, + data: ArrayData, value_offsets: RawPtrBox, value_data: RawPtrBox, } @@ -199,11 +196,7 @@ impl Array for GenericBinaryArray self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -218,10 +211,10 @@ impl Array for GenericBinaryArray } } -impl From +impl From for GenericBinaryArray { - fn from(data: ArrayDataRef) -> Self { + fn from(data: ArrayData) -> Self { assert_eq!( data.data_type(), &::DATA_TYPE, @@ -324,7 +317,7 @@ impl From> for GenericBinaryArray< /// A type of `FixedSizeListArray` whose elements are binaries. pub struct FixedSizeBinaryArray { - data: ArrayDataRef, + data: ArrayData, value_data: RawPtrBox, length: i32, } @@ -453,7 +446,7 @@ impl FixedSizeBinaryArray { vec![buffer.into()], vec![], ); - Ok(FixedSizeBinaryArray::from(Arc::new(array_data))) + Ok(FixedSizeBinaryArray::from(array_data)) } /// Create an array from an iterable argument of byte slices. @@ -521,8 +514,8 @@ impl FixedSizeBinaryArray { } } -impl From for FixedSizeBinaryArray { - fn from(data: ArrayDataRef) -> Self { +impl From for FixedSizeBinaryArray { + fn from(data: ArrayData) -> Self { assert_eq!( data.buffers().len(), 1, @@ -583,11 +576,7 @@ impl Array for FixedSizeBinaryArray { self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -604,7 +593,7 @@ impl Array for FixedSizeBinaryArray { /// A type of `DecimalArray` whose elements are binaries. pub struct DecimalArray { - data: ArrayDataRef, + data: ArrayData, value_data: RawPtrBox, precision: usize, scale: usize, @@ -692,8 +681,8 @@ impl DecimalArray { } } -impl From for DecimalArray { - fn from(data: ArrayDataRef) -> Self { +impl From for DecimalArray { + fn from(data: ArrayData) -> Self { assert_eq!( data.buffers().len(), 1, @@ -730,11 +719,7 @@ impl Array for DecimalArray { self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } diff --git a/rust/arrow/src/array/array_boolean.rs b/rust/arrow/src/array/array_boolean.rs index e4b83e69794..2512a956db4 100644 --- a/rust/arrow/src/array/array_boolean.rs +++ b/rust/arrow/src/array/array_boolean.rs @@ -16,10 +16,10 @@ // under the License. use std::borrow::Borrow; +use std::convert::From; use std::iter::{FromIterator, IntoIterator}; use std::mem; use std::{any::Any, fmt}; -use std::{convert::From, sync::Arc}; use super::*; use super::{array::print_long_array, raw_pointer::RawPtrBox}; @@ -28,7 +28,7 @@ use crate::util::bit_util; /// Array of bools pub struct BooleanArray { - data: ArrayDataRef, + data: ArrayData, /// Pointer to the value array. The lifetime of this must be <= to the value buffer /// stored in `data`, so it's safe to store. raw_values: RawPtrBox, @@ -81,11 +81,7 @@ impl Array for BooleanArray { self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -125,8 +121,8 @@ impl From>> for BooleanArray { } } -impl From for BooleanArray { - fn from(data: ArrayDataRef) -> Self { +impl From for BooleanArray { + fn from(data: ArrayData) -> Self { assert_eq!( data.buffers().len(), 1, @@ -187,7 +183,7 @@ impl>> FromIterator for BooleanArray { vec![val_buf.into()], vec![], ); - BooleanArray::from(Arc::new(data)) + BooleanArray::from(data) } } diff --git a/rust/arrow/src/array/array_dictionary.rs b/rust/arrow/src/array/array_dictionary.rs index 312e465f50a..5948658157e 100644 --- a/rust/arrow/src/array/array_dictionary.rs +++ b/rust/arrow/src/array/array_dictionary.rs @@ -15,15 +15,15 @@ // specific language governing permissions and limitations // under the License. +use std::any::Any; use std::fmt; use std::iter::IntoIterator; use std::mem; -use std::{any::Any, sync::Arc}; use std::{convert::From, iter::FromIterator}; use super::{ - make_array, Array, ArrayData, ArrayDataRef, ArrayRef, PrimitiveArray, - PrimitiveBuilder, StringArray, StringBuilder, StringDictionaryBuilder, + make_array, Array, ArrayData, ArrayRef, PrimitiveArray, PrimitiveBuilder, + StringArray, StringBuilder, StringDictionaryBuilder, }; use crate::datatypes::ArrowNativeType; use crate::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType, DataType}; @@ -54,7 +54,7 @@ use crate::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType, DataType}; pub struct DictionaryArray { /// Data of this dictionary. Note that this is _not_ compatible with the C Data interface, /// as, in the current implementation, `values` below are the first child of this struct. - data: ArrayDataRef, + data: ArrayData, /// The keys of this dictionary. These are constructed from the buffer and null bitmap /// of `data`. @@ -87,7 +87,7 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray { data.buffers().to_vec(), vec![], ); - PrimitiveArray::::from(Arc::new(keys_data)) + PrimitiveArray::::from(keys_data) } /// Returns the lookup key by doing reverse dictionary lookup @@ -128,8 +128,8 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray { } /// Constructs a `DictionaryArray` from an array data reference. -impl From for DictionaryArray { - fn from(data: ArrayDataRef) -> Self { +impl From for DictionaryArray { + fn from(data: ArrayData) -> Self { assert_eq!( data.buffers().len(), 1, @@ -146,7 +146,7 @@ impl From for DictionaryArray { panic!("DictionaryArray's data type must match.") }; // create a zero-copy of the keys' data - let keys = PrimitiveArray::::from(Arc::new(ArrayData::new( + let keys = PrimitiveArray::::from(ArrayData::new( T::DATA_TYPE, data.len(), Some(data.null_count()), @@ -154,7 +154,7 @@ impl From for DictionaryArray { data.offset(), data.buffers().to_vec(), vec![], - ))); + )); let values = make_array(data.child_data()[0].clone()); Self { data, @@ -221,11 +221,7 @@ impl Array for DictionaryArray { self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -292,7 +288,7 @@ mod tests { let dict_array = Int16DictionaryArray::from(dict_data); let values = dict_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int8, dict_array.value_type()); assert_eq!(3, dict_array.len()); @@ -311,7 +307,7 @@ mod tests { let dict_array = Int16DictionaryArray::from(dict_data); let values = dict_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int8, dict_array.value_type()); assert_eq!(2, dict_array.len()); assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4])); diff --git a/rust/arrow/src/array/array_list.rs b/rust/arrow/src/array/array_list.rs index a94dc2643b9..0e334631adf 100644 --- a/rust/arrow/src/array/array_list.rs +++ b/rust/arrow/src/array/array_list.rs @@ -23,7 +23,7 @@ use num::Num; use super::{ array::print_long_array, make_array, raw_pointer::RawPtrBox, Array, ArrayData, - ArrayDataRef, ArrayRef, BooleanBufferBuilder, GenericListArrayIter, PrimitiveArray, + ArrayRef, BooleanBufferBuilder, GenericListArrayIter, PrimitiveArray, }; use crate::{ buffer::MutableBuffer, @@ -51,7 +51,7 @@ impl OffsetSizeTrait for i64 { } pub struct GenericListArray { - data: ArrayDataRef, + data: ArrayData, values: ArrayRef, value_offsets: RawPtrBox, } @@ -183,15 +183,15 @@ impl GenericListArray { let data = ArrayData::builder(data_type) .len(null_buf.len()) .add_buffer(offsets.into()) - .add_child_data(values.data()) + .add_child_data(values.data().clone()) .null_bit_buffer(null_buf.into()) .build(); Self::from(data) } } -impl From for GenericListArray { - fn from(data: ArrayDataRef) -> Self { +impl From for GenericListArray { + fn from(data: ArrayData) -> Self { Self::try_new_from_array_data(data).expect( "Expected infallable creation of GenericListArray from ArrayDataRef failed", ) @@ -199,7 +199,7 @@ impl From for GenericListArray GenericListArray { - fn try_new_from_array_data(data: ArrayDataRef) -> Result { + fn try_new_from_array_data(data: ArrayData) -> Result { if data.buffers().len() != 1 { return Err(ArrowError::InvalidArgumentError( format!("ListArray data should contain a single buffer only (value offsets), had {}", @@ -255,11 +255,7 @@ impl Array for GenericListArray ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -297,7 +293,7 @@ pub type LargeListArray = GenericListArray; /// A list array where each element is a fixed-size sequence of values with the same /// type whose maximum length is represented by a i32. pub struct FixedSizeListArray { - data: ArrayDataRef, + data: ArrayData, values: ArrayRef, length: i32, } @@ -341,8 +337,8 @@ impl FixedSizeListArray { } } -impl From for FixedSizeListArray { - fn from(data: ArrayDataRef) -> Self { +impl From for FixedSizeListArray { + fn from(data: ArrayData) -> Self { assert_eq!( data.buffers().len(), 0, @@ -385,11 +381,7 @@ impl Array for FixedSizeListArray { self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -488,7 +480,7 @@ mod tests { let list_array = ListArray::from(list_data); let values = list_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -526,7 +518,7 @@ mod tests { let list_array = ListArray::from(list_data); let values = list_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -574,7 +566,7 @@ mod tests { let list_array = LargeListArray::from(list_data); let values = list_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -612,7 +604,7 @@ mod tests { let list_array = LargeListArray::from(list_data); let values = list_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -657,7 +649,7 @@ mod tests { let list_array = FixedSizeListArray::from(list_data); let values = list_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -686,7 +678,7 @@ mod tests { let list_array = FixedSizeListArray::from(list_data); let values = list_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(3, list_array.len()); assert_eq!(0, list_array.null_count()); @@ -757,7 +749,7 @@ mod tests { let list_array = ListArray::from(list_data); let values = list_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(9, list_array.len()); assert_eq!(4, list_array.null_count()); @@ -819,7 +811,7 @@ mod tests { let list_array = LargeListArray::from(list_data); let values = list_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(9, list_array.len()); assert_eq!(4, list_array.null_count()); @@ -916,7 +908,7 @@ mod tests { let list_array = FixedSizeListArray::from(list_data); let values = list_array.values(); - assert_eq!(value_data, values.data()); + assert_eq!(&value_data, values.data()); assert_eq!(DataType::Int32, list_array.value_type()); assert_eq!(5, list_array.len()); assert_eq!(2, list_array.null_count()); diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index 39a9dc4c4e6..588e3bed6d5 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -21,7 +21,6 @@ use std::convert::From; use std::fmt; use std::iter::{FromIterator, IntoIterator}; use std::mem; -use std::sync::Arc; use chrono::prelude::*; @@ -49,7 +48,7 @@ pub struct PrimitiveArray { /// Underlying ArrayData /// # Safety /// must have exactly one buffer, aligned to type T - data: ArrayDataRef, + data: ArrayData, /// Pointer to the value array. The lifetime of this must be <= to the value buffer /// stored in `data`, so it's safe to store. /// # Safety @@ -111,7 +110,7 @@ impl PrimitiveArray { vec![val_buf], vec![], ); - PrimitiveArray::from(Arc::new(data)) + PrimitiveArray::from(data) } } @@ -120,11 +119,7 @@ impl Array for PrimitiveArray { self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -135,7 +130,7 @@ impl Array for PrimitiveArray { /// Returns the total number of bytes of memory occupied physically by this [PrimitiveArray]. fn get_array_memory_size(&self) -> usize { - self.data.get_array_memory_size() + mem::size_of_val(self) + self.data.get_array_memory_size() + mem::size_of::>() } } @@ -298,7 +293,7 @@ impl::Native vec![buffer], vec![], ); - PrimitiveArray::from(Arc::new(data)) + PrimitiveArray::from(data) } } @@ -321,7 +316,7 @@ impl PrimitiveArray { let data = ArrayData::new(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]); - PrimitiveArray::from(Arc::new(data)) + PrimitiveArray::from(data) } } @@ -420,8 +415,8 @@ impl PrimitiveArray { } /// Constructs a `PrimitiveArray` from an array data reference. -impl From for PrimitiveArray { - fn from(data: ArrayDataRef) -> Self { +impl From for PrimitiveArray { + fn from(data: ArrayData) -> Self { assert_eq!( data.buffers().len(), 1, @@ -458,14 +453,9 @@ mod tests { assert!(arr.is_valid(i)); assert_eq!(i as i32, arr.value(i)); } - assert_eq!(&[0, 1, 2, 3, 4], arr.values()); assert_eq!(64, arr.get_buffer_memory_size()); - let internals_of_primitive_array = 8 + 72; // RawPtrBox & Arc combined. - assert_eq!( - arr.get_buffer_memory_size() + internals_of_primitive_array, - arr.get_array_memory_size() - ); + assert_eq!(136, arr.get_array_memory_size()); } #[test] @@ -487,11 +477,7 @@ mod tests { } assert_eq!(128, arr.get_buffer_memory_size()); - let internals_of_primitive_array = 8 + 72 + 16; // RawPtrBox & Arc and it's null_bitmap combined. - assert_eq!( - arr.get_buffer_memory_size() + internals_of_primitive_array, - arr.get_array_memory_size() - ); + assert_eq!(216, arr.get_array_memory_size()); } #[test] diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs index bc6f2743072..0519148e6f4 100644 --- a/rust/arrow/src/array/array_string.rs +++ b/rust/arrow/src/array/array_string.rs @@ -21,8 +21,8 @@ use std::mem; use std::{any::Any, iter::FromIterator}; use super::{ - array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, ArrayDataRef, - GenericListArray, GenericStringIter, OffsetSizeTrait, + array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, GenericListArray, + GenericStringIter, OffsetSizeTrait, }; use crate::buffer::Buffer; use crate::util::bit_util; @@ -44,7 +44,7 @@ impl StringOffsetSizeTrait for i64 { /// Generic struct for \[Large\]StringArray pub struct GenericStringArray { - data: ArrayDataRef, + data: ArrayData, value_offsets: RawPtrBox, value_data: RawPtrBox, } @@ -133,15 +133,15 @@ impl GenericStringArray { (i.e. List>)." ); assert_eq!( - v.data_ref().child_data()[0].data_type(), + v.data().child_data()[0].data_type(), &DataType::UInt8, "StringArray can only be created from List arrays, mismatched data types." ); let mut builder = ArrayData::builder(OffsetSize::DATA_TYPE) .len(v.len()) - .add_buffer(v.data_ref().buffers()[0].clone()) - .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone()); + .add_buffer(v.data().buffers()[0].clone()) + .add_buffer(v.data().child_data()[0].buffers()[0].clone()); if let Some(bitmap) = v.data().null_bitmap() { builder = builder.null_bit_buffer(bitmap.bits.clone()) } @@ -283,11 +283,7 @@ impl Array for GenericStringArray self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -302,10 +298,10 @@ impl Array for GenericStringArray } } -impl From +impl From for GenericStringArray { - fn from(data: ArrayDataRef) -> Self { + fn from(data: ArrayData) -> Self { assert_eq!( data.data_type(), &::DATA_TYPE, @@ -389,7 +385,7 @@ mod tests { #[should_panic(expected = "[Large]StringArray expects Datatype::[Large]Utf8")] fn test_string_array_from_int() { let array = LargeStringArray::from(vec!["a", "b"]); - StringArray::from(array.data()); + StringArray::from(array.data().clone()); } #[test] diff --git a/rust/arrow/src/array/array_struct.rs b/rust/arrow/src/array/array_struct.rs index 55f5f775e5b..59ee527e5f8 100644 --- a/rust/arrow/src/array/array_struct.rs +++ b/rust/arrow/src/array/array_struct.rs @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. +use std::any::Any; use std::convert::{From, TryFrom}; use std::fmt; use std::iter::IntoIterator; use std::mem; -use std::{any::Any, sync::Arc}; -use super::{make_array, Array, ArrayData, ArrayDataRef, ArrayRef}; +use super::{make_array, Array, ArrayData, ArrayRef}; use crate::datatypes::DataType; use crate::error::{ArrowError, Result}; use crate::{ @@ -32,7 +32,7 @@ use crate::{ /// A nested array type where each child (called *field*) is represented by a separate /// array. pub struct StructArray { - data: ArrayDataRef, + data: ArrayData, pub(crate) boxed_fields: Vec, } @@ -81,12 +81,12 @@ impl StructArray { } } -impl From for StructArray { - fn from(data: ArrayDataRef) -> Self { +impl From for StructArray { + fn from(data: ArrayData) -> Self { let mut boxed_fields = vec![]; for cd in data.child_data() { let child_data = if data.offset() != 0 || data.len() != cd.len() { - Arc::new(cd.slice(data.offset(), data.len())) + cd.slice(data.offset(), data.len()) } else { cd.clone() }; @@ -170,11 +170,7 @@ impl Array for StructArray { self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -214,7 +210,7 @@ impl From> for StructArray { } let data = ArrayData::builder(DataType::Struct(field_types)) - .child_data(field_values.into_iter().map(|a| a.data()).collect()) + .child_data(field_values.into_iter().map(|a| a.data().clone()).collect()) .len(length) .build(); Self::from(data) @@ -261,7 +257,7 @@ impl From<(Vec<(Field, ArrayRef)>, Buffer)> for StructArray { let data = ArrayData::builder(DataType::Struct(field_types)) .null_bit_buffer(pair.1) - .child_data(field_values.into_iter().map(|a| a.data()).collect()) + .child_data(field_values.into_iter().map(|a| a.data().clone()).collect()) .len(length) .build(); Self::from(data) @@ -286,8 +282,10 @@ mod tests { #[test] fn test_struct_array_builder() { - let boolean_data = BooleanArray::from(vec![false, false, true, true]).data(); - let int_data = Int64Array::from(vec![42, 28, 19, 31]).data(); + let array = BooleanArray::from(vec![false, false, true, true]); + let boolean_data = array.data(); + let array = Int64Array::from(vec![42, 28, 19, 31]); + let int_data = array.data(); let fields = vec![ Field::new("a", DataType::Boolean, false), @@ -364,7 +362,7 @@ mod tests { .add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice())) .build(); - assert_eq!(expected_string_data, arr.column(0).data()); + assert_eq!(&expected_string_data, arr.column(0).data()); // TODO: implement equality for ArrayData assert_eq!(expected_int_data.len(), arr.column(1).data().len()); @@ -459,8 +457,8 @@ mod tests { assert!(struct_array.is_valid(2)); assert!(struct_array.is_null(3)); assert!(struct_array.is_valid(4)); - assert_eq!(boolean_data, struct_array.column(0).data()); - assert_eq!(int_data, struct_array.column(1).data()); + assert_eq!(&boolean_data, struct_array.column(0).data()); + assert_eq!(&int_data, struct_array.column(1).data()); let c0 = struct_array.column(0); let c0 = c0.as_any().downcast_ref::().unwrap(); diff --git a/rust/arrow/src/array/array_union.rs b/rust/arrow/src/array/array_union.rs index cc5598f8da2..083d5bba15b 100644 --- a/rust/arrow/src/array/array_union.rs +++ b/rust/arrow/src/array/array_union.rs @@ -73,9 +73,7 @@ //! # Ok(()) //! # } //! ``` -use crate::array::{ - data::count_nulls, make_array, Array, ArrayData, ArrayDataRef, ArrayRef, -}; +use crate::array::{data::count_nulls, make_array, Array, ArrayData, ArrayRef}; use crate::buffer::Buffer; use crate::datatypes::*; use crate::error::{ArrowError, Result}; @@ -87,7 +85,7 @@ use std::mem::size_of; /// An Array that can represent slots of varying types. pub struct UnionArray { - data: ArrayDataRef, + data: ArrayData, boxed_fields: Vec, } @@ -127,7 +125,7 @@ impl UnionArray { let len = type_ids.len(); let mut builder = ArrayData::builder(DataType::Union(field_types)) .add_buffer(type_ids) - .child_data(field_values.into_iter().map(|a| a.data()).collect()) + .child_data(field_values.into_iter().map(|a| a.data().clone()).collect()) .len(len); if let Some(bitmap) = bitmap_data { builder = builder.null_bit_buffer(bitmap) @@ -260,8 +258,8 @@ impl UnionArray { } } -impl From for UnionArray { - fn from(data: ArrayDataRef) -> Self { +impl From for UnionArray { + fn from(data: ArrayData) -> Self { let mut boxed_fields = vec![]; for cd in data.child_data() { boxed_fields.push(make_array(cd.clone())); @@ -275,11 +273,7 @@ impl Array for UnionArray { self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -403,16 +397,6 @@ mod tests { let value = slot.value(0); assert_eq!(expected_value, &value); } - - assert_eq!( - 4 * 8 * 4 * mem::size_of::(), - union.get_buffer_memory_size() - ); - let internals_of_union_array = (8 + 72) + (union.boxed_fields.len() * 144); // Arc & Vec combined. - assert_eq!( - union.get_buffer_memory_size() + internals_of_union_array, - union.get_array_memory_size() - ); } #[test] diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs index 1ac679ddbfc..c77577acc5f 100644 --- a/rust/arrow/src/array/builder.rs +++ b/rust/arrow/src/array/builder.rs @@ -671,7 +671,7 @@ impl PrimitiveBuilder { if null_count > 0 { builder = builder.null_bit_buffer(null_bit_buffer.unwrap()); } - builder = builder.add_child_data(values.data()); + builder = builder.add_child_data(values.data().clone()); DictionaryArray::::from(builder.build()) } @@ -802,7 +802,7 @@ where let data = ArrayData::builder(data_type) .len(len) .add_buffer(offset_buffer) - .add_child_data(values_data) + .add_child_data(values_data.clone()) .null_bit_buffer(null_bit_buffer) .build(); @@ -931,7 +931,7 @@ where self.list_len, )) .len(len) - .add_child_data(values_data) + .add_child_data(values_data.clone()) .null_bit_buffer(null_bit_buffer) .build(); @@ -1481,7 +1481,7 @@ impl StructBuilder { let mut child_data = Vec::with_capacity(self.field_builders.len()); for f in &mut self.field_builders { let arr = f.finish(); - child_data.push(arr.data()); + child_data.push(arr.data().clone()); } let null_bit_buffer = self.bitmap_builder.finish(); @@ -2883,7 +2883,7 @@ mod tests { .add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4])) .build(); - assert_eq!(expected_string_data, arr.column(0).data()); + assert_eq!(&expected_string_data, arr.column(0).data()); // TODO: implement equality for ArrayData assert_eq!(expected_int_data.len(), arr.column(1).data().len()); diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs index 0a10e9f462a..7ae3858e35c 100644 --- a/rust/arrow/src/array/data.rs +++ b/rust/arrow/src/array/data.rs @@ -229,7 +229,7 @@ pub struct ArrayData { /// The child(ren) of this array. Only non-empty for nested types, currently /// `ListArray` and `StructArray`. - child_data: Vec, + child_data: Vec, /// The null bitmap. A `None` value for this indicates all values are non-null in /// this array. @@ -246,7 +246,7 @@ impl ArrayData { null_bit_buffer: Option, offset: usize, buffers: Vec, - child_data: Vec, + child_data: Vec, ) -> Self { let null_count = match null_count { None => count_nulls(null_bit_buffer.as_ref(), offset, len), @@ -282,7 +282,7 @@ impl ArrayData { } /// Returns a slice of children data arrays - pub fn child_data(&self) -> &[ArrayDataRef] { + pub fn child_data(&self) -> &[ArrayData] { &self.child_data[..] } @@ -444,21 +444,21 @@ impl ArrayData { | DataType::FixedSizeBinary(_) | DataType::Decimal(_, _) => vec![], DataType::List(field) => { - vec![Arc::new(Self::new_empty(field.data_type()))] + vec![Self::new_empty(field.data_type())] } DataType::FixedSizeList(field, _) => { - vec![Arc::new(Self::new_empty(field.data_type()))] + vec![Self::new_empty(field.data_type())] } DataType::LargeList(field) => { - vec![Arc::new(Self::new_empty(field.data_type()))] + vec![Self::new_empty(field.data_type())] } DataType::Struct(fields) => fields .iter() - .map(|field| Arc::new(Self::new_empty(field.data_type()))) + .map(|field| Self::new_empty(field.data_type())) .collect(), DataType::Union(_) => unimplemented!(), DataType::Dictionary(_, data_type) => { - vec![Arc::new(Self::new_empty(data_type))] + vec![Self::new_empty(data_type)] } DataType::Float16 => unreachable!(), }; @@ -482,7 +482,7 @@ pub struct ArrayDataBuilder { null_bit_buffer: Option, offset: usize, buffers: Vec, - child_data: Vec, + child_data: Vec, } impl ArrayDataBuilder { @@ -526,18 +526,18 @@ impl ArrayDataBuilder { self } - pub fn child_data(mut self, v: Vec) -> Self { + pub fn child_data(mut self, v: Vec) -> Self { self.child_data = v; self } - pub fn add_child_data(mut self, r: ArrayDataRef) -> Self { + pub fn add_child_data(mut self, r: ArrayData) -> Self { self.child_data.push(r); self } - pub fn build(self) -> ArrayDataRef { - let data = ArrayData::new( + pub fn build(self) -> ArrayData { + ArrayData::new( self.data_type, self.len, self.null_count, @@ -545,8 +545,7 @@ impl ArrayDataBuilder { self.offset, self.buffers, self.child_data, - ); - Arc::new(data) + ) } } @@ -554,8 +553,6 @@ impl ArrayDataBuilder { mod tests { use super::*; - use std::sync::Arc; - use crate::buffer::Buffer; use crate::util::bit_util; @@ -572,7 +569,7 @@ mod tests { #[test] fn test_builder() { - let child_arr_data = Arc::new(ArrayData::new( + let child_arr_data = ArrayData::new( DataType::Int32, 5, Some(0), @@ -580,7 +577,7 @@ mod tests { 0, vec![Buffer::from_slice_ref(&[1i32, 2, 3, 4, 5])], vec![], - )); + ); let v = vec![0, 1, 2, 3]; let b1 = Buffer::from(&v[..]); let arr_data = ArrayData::builder(DataType::Int32) @@ -651,7 +648,6 @@ mod tests { .len(16) .null_bit_buffer(Buffer::from(bit_v)) .build(); - let data = data.as_ref(); let new_data = data.slice(1, 15); assert_eq!(data.len() - 1, new_data.len()); assert_eq!(1, new_data.offset()); diff --git a/rust/arrow/src/array/equal/dictionary.rs b/rust/arrow/src/array/equal/dictionary.rs index 81eedce9697..22add2494d2 100644 --- a/rust/arrow/src/array/equal/dictionary.rs +++ b/rust/arrow/src/array/equal/dictionary.rs @@ -34,8 +34,8 @@ pub(super) fn dictionary_equal( let lhs_keys = lhs.buffer::(0); let rhs_keys = rhs.buffer::(0); - let lhs_values = lhs.child_data()[0].as_ref(); - let rhs_values = rhs.child_data()[0].as_ref(); + let lhs_values = &lhs.child_data()[0]; + let rhs_values = &rhs.child_data()[0]; let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len); let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len); diff --git a/rust/arrow/src/array/equal/fixed_list.rs b/rust/arrow/src/array/equal/fixed_list.rs index a107285e5da..e708a06efcd 100644 --- a/rust/arrow/src/array/equal/fixed_list.rs +++ b/rust/arrow/src/array/equal/fixed_list.rs @@ -36,8 +36,8 @@ pub(super) fn fixed_list_equal( _ => unreachable!(), }; - let lhs_values = lhs.child_data()[0].as_ref(); - let rhs_values = rhs.child_data()[0].as_ref(); + let lhs_values = &lhs.child_data()[0]; + let rhs_values = &rhs.child_data()[0]; let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len); let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len); diff --git a/rust/arrow/src/array/equal/list.rs b/rust/arrow/src/array/equal/list.rs index 7acd47796c3..331cdc7c614 100644 --- a/rust/arrow/src/array/equal/list.rs +++ b/rust/arrow/src/array/equal/list.rs @@ -115,8 +115,8 @@ pub(super) fn list_equal( return true; } - let lhs_values = lhs.child_data()[0].as_ref(); - let rhs_values = rhs.child_data()[0].as_ref(); + let lhs_values = &lhs.child_data()[0]; + let rhs_values = &rhs.child_data()[0]; let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len); let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len); diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs index ad6f122aa17..0924fc193a6 100644 --- a/rust/arrow/src/array/equal/mod.rs +++ b/rust/arrow/src/array/equal/mod.rs @@ -59,73 +59,73 @@ use variable_size::variable_sized_equal; impl PartialEq for dyn Array { fn eq(&self, other: &Self) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for dyn Array { fn eq(&self, other: &T) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for NullArray { fn eq(&self, other: &NullArray) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for PrimitiveArray { fn eq(&self, other: &PrimitiveArray) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for BooleanArray { fn eq(&self, other: &BooleanArray) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for GenericStringArray { fn eq(&self, other: &Self) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for GenericBinaryArray { fn eq(&self, other: &Self) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for FixedSizeBinaryArray { fn eq(&self, other: &Self) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for DecimalArray { fn eq(&self, other: &Self) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for GenericListArray { fn eq(&self, other: &Self) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for FixedSizeListArray { fn eq(&self, other: &Self) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } impl PartialEq for StructArray { fn eq(&self, other: &Self) -> bool { - equal(self.data().as_ref(), other.data().as_ref()) + equal(self.data(), other.data()) } } @@ -290,10 +290,10 @@ mod tests { use std::sync::Arc; use crate::array::{ - array::Array, ArrayDataBuilder, ArrayDataRef, ArrayRef, BinaryOffsetSizeTrait, - BooleanArray, DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder, - GenericBinaryArray, Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, - StringArray, StringDictionaryBuilder, StringOffsetSizeTrait, StructArray, + array::Array, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait, BooleanArray, + DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray, + Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray, + StringDictionaryBuilder, StringOffsetSizeTrait, StructArray, }; use crate::array::{GenericStringArray, Int32Array}; use crate::buffer::Buffer; @@ -303,11 +303,14 @@ mod tests { #[test] fn test_null_equal() { - let a = NullArray::new(12).data(); - let b = NullArray::new(12).data(); + let a = NullArray::new(12); + let a = a.data(); + let b = NullArray::new(12); + let b = b.data(); test_equal(&a, &b, true); - let b = NullArray::new(10).data(); + let b = NullArray::new(10); + let b = b.data(); test_equal(&a, &b, false); // Test the case where offset != 0 @@ -323,36 +326,43 @@ mod tests { #[test] fn test_boolean_equal() { - let a = BooleanArray::from(vec![false, false, true]).data(); - let b = BooleanArray::from(vec![false, false, true]).data(); - test_equal(a.as_ref(), b.as_ref(), true); + let a = BooleanArray::from(vec![false, false, true]); + let a = a.data(); + let b = BooleanArray::from(vec![false, false, true]); + let b = b.data(); + test_equal(&a, &b, true); - let b = BooleanArray::from(vec![false, false, false]).data(); - test_equal(a.as_ref(), b.as_ref(), false); + let b = BooleanArray::from(vec![false, false, false]); + let b = b.data(); + test_equal(&a, &b, false); } #[test] - fn test_boolean_equal_null() { - let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data(); - let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data(); - test_equal(a.as_ref(), b.as_ref(), true); + fn test_boolean_equal_nulls() { + let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]); + let a = a.data(); + let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]); + let b = b.data(); + test_equal(&a, &b, true); - let b = BooleanArray::from(vec![None, None, None, Some(true)]).data(); - test_equal(a.as_ref(), b.as_ref(), false); + let b = BooleanArray::from(vec![None, None, None, Some(true)]); + let b = b.data(); + test_equal(&a, &b, false); - let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]).data(); - test_equal(a.as_ref(), b.as_ref(), false); + let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]); + let b = b.data(); + test_equal(&a, &b, false); } #[test] fn test_boolean_equal_offset() { - let a = - BooleanArray::from(vec![false, true, false, true, false, false, true]).data(); + let a = BooleanArray::from(vec![false, true, false, true, false, false, true]); + let a = a.data(); let b = - BooleanArray::from(vec![true, false, false, false, true, false, true, true]) - .data(); - assert_eq!(equal(a.as_ref(), b.as_ref()), false); - assert_eq!(equal(b.as_ref(), a.as_ref()), false); + BooleanArray::from(vec![true, false, false, false, true, false, true, true]); + let b = b.data(); + assert_eq!(equal(a, b), false); + assert_eq!(equal(b, a), false); let a_slice = a.slice(2, 3); let b_slice = b.slice(3, 3); @@ -368,15 +378,19 @@ mod tests { // Elements fill in `u8`'s exactly. let mut vector = vec![false, false, true, true, true, true, true, true]; - let a = BooleanArray::from(vector.clone()).data(); - let b = BooleanArray::from(vector.clone()).data(); - test_equal(a.as_ref(), b.as_ref(), true); + let a = BooleanArray::from(vector.clone()); + let a = a.data(); + let b = BooleanArray::from(vector.clone()); + let b = b.data(); + test_equal(&a, &b, true); // Elements fill in `u8`s + suffix bits. vector.push(true); - let a = BooleanArray::from(vector.clone()).data(); - let b = BooleanArray::from(vector).data(); - test_equal(a.as_ref(), b.as_ref(), true); + let a = BooleanArray::from(vector.clone()); + let a = a.data(); + let b = BooleanArray::from(vector); + let b = b.data(); + test_equal(&a, &b, true); } #[test] @@ -410,8 +424,10 @@ mod tests { ]; for (lhs, rhs, expected) in cases { - let lhs = Int32Array::from(lhs).data(); - let rhs = Int32Array::from(rhs).data(); + let lhs = Int32Array::from(lhs); + let lhs = lhs.data(); + let rhs = Int32Array::from(rhs); + let rhs = rhs.data(); test_equal(&lhs, &rhs, expected); } } @@ -457,9 +473,11 @@ mod tests { ]; for (lhs, slice_lhs, rhs, slice_rhs, expected) in cases { - let lhs = Int32Array::from(lhs).data(); + let lhs = Int32Array::from(lhs); + let lhs = lhs.data(); let lhs = lhs.slice(slice_lhs.0, slice_lhs.1); - let rhs = Int32Array::from(rhs).data(); + let rhs = Int32Array::from(rhs); + let rhs = rhs.data(); let rhs = rhs.slice(slice_rhs.0, slice_rhs.1); test_equal(&lhs, &rhs, expected); @@ -514,9 +532,11 @@ mod tests { for (lhs, rhs, expected) in cases { let lhs = lhs.iter().map(|x| x.as_deref()).collect(); let rhs = rhs.iter().map(|x| x.as_deref()).collect(); - let lhs = GenericStringArray::::from_opt_vec(lhs).data(); - let rhs = GenericStringArray::::from_opt_vec(rhs).data(); - test_equal(lhs.as_ref(), rhs.as_ref(), expected); + let lhs = GenericStringArray::::from_opt_vec(lhs); + let lhs = lhs.data(); + let rhs = GenericStringArray::::from_opt_vec(rhs); + let rhs = rhs.data(); + test_equal(lhs, rhs, expected); } } @@ -542,9 +562,11 @@ mod tests { .iter() .map(|x| x.as_deref().map(|x| x.as_bytes())) .collect(); - let lhs = GenericBinaryArray::::from_opt_vec(lhs).data(); - let rhs = GenericBinaryArray::::from_opt_vec(rhs).data(); - test_equal(lhs.as_ref(), rhs.as_ref(), expected); + let lhs = GenericBinaryArray::::from_opt_vec(lhs); + let lhs = lhs.data(); + let rhs = GenericBinaryArray::::from_opt_vec(rhs); + let rhs = rhs.data(); + test_equal(lhs, rhs, expected); } } @@ -560,18 +582,21 @@ mod tests { #[test] fn test_string_offset() { - let a = StringArray::from(vec![Some("a"), None, Some("b")]).data(); + let a = StringArray::from(vec![Some("a"), None, Some("b")]); + let a = a.data(); let a = a.slice(2, 1); - let b = StringArray::from(vec![Some("b")]).data(); + let b = StringArray::from(vec![Some("b")]); + let b = b.data(); - test_equal(&a, b.as_ref(), true); + test_equal(&a, &b, true); } #[test] fn test_string_offset_larger() { - let a = - StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]).data(); - let b = StringArray::from(vec![None, Some("b"), None, Some("c")]).data(); + let a = StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]); + let a = a.data(); + let b = StringArray::from(vec![None, Some("b"), None, Some("c")]); + let b = b.data(); test_equal(&a.slice(2, 2), &b.slice(0, 2), false); test_equal(&a.slice(2, 2), &b.slice(1, 2), true); @@ -580,17 +605,18 @@ mod tests { #[test] fn test_null() { - let a = NullArray::new(2).data(); - let b = NullArray::new(2).data(); - test_equal(a.as_ref(), b.as_ref(), true); + let a = NullArray::new(2); + let a = a.data(); + let b = NullArray::new(2); + let b = b.data(); + test_equal(&a, &b, true); - let b = NullArray::new(1).data(); - test_equal(a.as_ref(), b.as_ref(), false); + let b = NullArray::new(1); + let b = b.data(); + test_equal(&a, &b, false); } - fn create_list_array, T: AsRef<[Option]>>( - data: T, - ) -> ArrayDataRef { + fn create_list_array, T: AsRef<[Option]>>(data: T) -> ArrayData { let mut builder = ListBuilder::new(Int32Builder::new(10)); for d in data.as_ref() { if let Some(v) = d { @@ -600,17 +626,17 @@ mod tests { builder.append(false).unwrap() } } - builder.finish().data() + builder.finish().data().clone() } #[test] fn test_list_equal() { let a = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]); let b = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); let b = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 7])]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); } // Test the case where null_count > 0 @@ -620,7 +646,7 @@ mod tests { create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]); let b = create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); let b = create_list_array(&[ Some(&[1, 2]), @@ -630,11 +656,11 @@ mod tests { None, None, ]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); let b = create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); // a list where the nullness of values is determined by the list's bitmap let c_values = Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]); @@ -645,7 +671,7 @@ mod tests { )))) .len(6) .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice())) - .add_child_data(c_values.data()) + .add_child_data(c_values.data().clone()) .null_bit_buffer(Buffer::from(vec![0b00001001])) .build(); @@ -666,10 +692,10 @@ mod tests { )))) .len(6) .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice())) - .add_child_data(d_values.data()) + .add_child_data(d_values.data().clone()) .null_bit_buffer(Buffer::from(vec![0b00001001])) .build(); - test_equal(c.as_ref(), d.as_ref(), true); + test_equal(&c, &d, true); } // Test the case where offset != 0 @@ -695,7 +721,7 @@ mod tests { fn create_fixed_size_binary_array, T: AsRef<[Option]>>( data: T, - ) -> ArrayDataRef { + ) -> ArrayData { let mut builder = FixedSizeBinaryBuilder::new(15, 5); for d in data.as_ref() { @@ -705,17 +731,17 @@ mod tests { builder.append_null().unwrap(); } } - builder.finish().data() + builder.finish().data().clone() } #[test] fn test_fixed_size_binary_equal() { let a = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world")]); let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world")]); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"arrow")]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); } // Test the case where null_count > 0 @@ -723,13 +749,13 @@ mod tests { fn test_fixed_size_binary_null() { let a = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"world")]); let b = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"world")]); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world"), None]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); let b = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"arrow")]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); } #[test] @@ -769,7 +795,7 @@ mod tests { test_equal(&a_slice, &b_slice, false); } - fn create_decimal_array(data: &[Option]) -> ArrayDataRef { + fn create_decimal_array(data: &[Option]) -> ArrayData { let mut builder = DecimalBuilder::new(20, 23, 6); for d in data { @@ -779,17 +805,17 @@ mod tests { builder.append_null().unwrap(); } } - builder.finish().data() + builder.finish().data().clone() } #[test] fn test_decimal_equal() { let a = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000)]); let b = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000)]); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); let b = create_decimal_array(&[Some(15_887_000_000), Some(-8_887_000_000)]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); } // Test the case where null_count > 0 @@ -797,13 +823,13 @@ mod tests { fn test_decimal_null() { let a = create_decimal_array(&[Some(8_887_000_000), None, Some(-8_887_000_000)]); let b = create_decimal_array(&[Some(8_887_000_000), None, Some(-8_887_000_000)]); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); let b = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000), None]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); let b = create_decimal_array(&[Some(15_887_000_000), None, Some(-8_887_000_000)]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); } #[test] @@ -863,7 +889,7 @@ mod tests { /// Create a fixed size list of 2 value lengths fn create_fixed_size_list_array, T: AsRef<[Option]>>( data: T, - ) -> ArrayDataRef { + ) -> ArrayData { let mut builder = FixedSizeListBuilder::new(Int32Builder::new(10), 3); for d in data.as_ref() { @@ -877,17 +903,17 @@ mod tests { builder.append(false).unwrap() } } - builder.finish().data() + builder.finish().data().clone() } #[test] fn test_fixed_size_list_equal() { let a = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]); let b = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); let b = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 7])]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); } // Test the case where null_count > 0 @@ -909,7 +935,7 @@ mod tests { None, None, ]); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); let b = create_fixed_size_list_array(&[ Some(&[1, 2, 3]), @@ -919,7 +945,7 @@ mod tests { None, None, ]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); let b = create_fixed_size_list_array(&[ Some(&[1, 2, 3]), @@ -929,7 +955,7 @@ mod tests { None, None, ]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); } #[test] @@ -984,14 +1010,13 @@ mod tests { let a = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) - .unwrap() - .data(); + .unwrap(); + let a = a.data(); - let b = StructArray::try_from(vec![("f1", strings), ("f2", ints)]) - .unwrap() - .data(); + let b = StructArray::try_from(vec![("f1", strings), ("f2", ints)]).unwrap(); + let b = b.data(); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); } #[test] @@ -1159,7 +1184,7 @@ mod tests { test_equal(a.data_ref(), c.data_ref(), false); } - fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayDataRef { + fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData { let values = StringArray::from(values.to_vec()); let mut builder = StringDictionaryBuilder::new_with_dictionary( PrimitiveBuilder::::new(3), @@ -1173,7 +1198,7 @@ mod tests { builder.append_null().unwrap() } } - builder.finish().data() + builder.finish().data().clone() } #[test] @@ -1188,26 +1213,26 @@ mod tests { &["a", "c", "b"], &[Some("a"), Some("b"), Some("a"), Some("c")], ); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); // different len let b = create_dictionary_array(&["a", "c", "b"], &[Some("a"), Some("b"), Some("a")]); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); // different key let b = create_dictionary_array( &["a", "c", "b"], &[Some("a"), Some("b"), Some("a"), Some("a")], ); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); // different values, same keys let b = create_dictionary_array( &["a", "b", "d"], &[Some("a"), Some("b"), Some("a"), Some("d")], ); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); } #[test] @@ -1219,34 +1244,34 @@ mod tests { ); // equal to self - test_equal(a.as_ref(), a.as_ref(), true); + test_equal(&a, &a, true); // different representation (values and keys are swapped), same result let b = create_dictionary_array( &["a", "c", "b"], &[Some("a"), None, Some("a"), Some("c")], ); - test_equal(a.as_ref(), b.as_ref(), true); + test_equal(&a, &b, true); // different null position let b = create_dictionary_array( &["a", "c", "b"], &[Some("a"), Some("b"), Some("a"), None], ); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); // different key let b = create_dictionary_array( &["a", "c", "b"], &[Some("a"), None, Some("a"), Some("a")], ); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); // different values, same keys let b = create_dictionary_array( &["a", "b", "d"], &[Some("a"), None, Some("a"), Some("d")], ); - test_equal(a.as_ref(), b.as_ref(), false); + test_equal(&a, &b, false); } } diff --git a/rust/arrow/src/array/ffi.rs b/rust/arrow/src/array/ffi.rs index 6fe44306f83..a9b5d29ed79 100644 --- a/rust/arrow/src/array/ffi.rs +++ b/rust/arrow/src/array/ffi.rs @@ -105,19 +105,22 @@ mod tests { #[test] fn test_u32() -> Result<()> { - let data = UInt32Array::from(vec![Some(2), None, Some(1), None]).data(); - test_round_trip(data.as_ref()) + let array = UInt32Array::from(vec![Some(2), None, Some(1), None]); + let data = array.data(); + test_round_trip(data) } #[test] fn test_u64() -> Result<()> { - let data = UInt64Array::from(vec![Some(2), None, Some(1), None]).data(); - test_round_trip(data.as_ref()) + let array = UInt64Array::from(vec![Some(2), None, Some(1), None]); + let data = array.data(); + test_round_trip(data) } #[test] fn test_i64() -> Result<()> { - let data = Int64Array::from(vec![Some(2), None, Some(1), None]).data(); - test_round_trip(data.as_ref()) + let array = Int64Array::from(vec![Some(2), None, Some(1), None]); + let data = array.data(); + test_round_trip(data) } } diff --git a/rust/arrow/src/array/null.rs b/rust/arrow/src/array/null.rs index 9cb84cab7bb..8e95bb00ed1 100644 --- a/rust/arrow/src/array/null.rs +++ b/rust/arrow/src/array/null.rs @@ -38,12 +38,12 @@ use std::any::Any; use std::fmt; use std::mem; -use crate::array::{Array, ArrayData, ArrayDataRef}; +use crate::array::{Array, ArrayData}; use crate::datatypes::*; /// An Array where all elements are nulls pub struct NullArray { - data: ArrayDataRef, + data: ArrayData, } impl NullArray { @@ -59,11 +59,7 @@ impl Array for NullArray { self } - fn data(&self) -> ArrayDataRef { - self.data.clone() - } - - fn data_ref(&self) -> &ArrayDataRef { + fn data(&self) -> &ArrayData { &self.data } @@ -92,12 +88,12 @@ impl Array for NullArray { /// Returns the total number of bytes of memory occupied physically by this [NullArray]. fn get_array_memory_size(&self) -> usize { - self.data.get_array_memory_size() + mem::size_of_val(self) + mem::size_of_val(self) } } -impl From for NullArray { - fn from(data: ArrayDataRef) -> Self { +impl From for NullArray { + fn from(data: ArrayData) -> Self { assert_eq!( data.data_type(), &DataType::Null, @@ -135,9 +131,8 @@ mod tests { assert_eq!(null_arr.is_valid(0), false); assert_eq!(0, null_arr.get_buffer_memory_size()); - let internals_of_null_array = 64; // Arc assert_eq!( - null_arr.get_buffer_memory_size() + internals_of_null_array, + null_arr.get_buffer_memory_size() + std::mem::size_of::(), null_arr.get_array_memory_size() ); } diff --git a/rust/arrow/src/array/ord.rs b/rust/arrow/src/array/ord.rs index 79359cc44ef..efd68b12264 100644 --- a/rust/arrow/src/array/ord.rs +++ b/rust/arrow/src/array/ord.rs @@ -93,8 +93,8 @@ where let left_keys = left.keys_array(); let right_keys = right.keys_array(); - let left_values = StringArray::from(left.values().data()); - let right_values = StringArray::from(left.values().data()); + let left_values = StringArray::from(left.values().data().clone()); + let right_values = StringArray::from(left.values().data().clone()); Box::new(move |i: usize, j: usize| { let key_left = left_keys.value(i).to_usize().unwrap(); diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs index 2d9cbe9b3ef..82dfc1c4707 100644 --- a/rust/arrow/src/array/transform/mod.rs +++ b/rust/arrow/src/array/transform/mod.rs @@ -15,13 +15,11 @@ // specific language governing permissions and limitations // under the License. -use std::sync::Arc; - use crate::{buffer::MutableBuffer, datatypes::DataType, util::bit_util}; use super::{ data::{into_buffers, new_buffers}, - ArrayData, ArrayDataRef, + ArrayData, }; mod boolean; @@ -58,7 +56,7 @@ struct _MutableArrayData<'a> { } impl<'a> _MutableArrayData<'a> { - fn freeze(self, dictionary: Option) -> ArrayData { + fn freeze(self, dictionary: Option) -> ArrayData { let buffers = into_buffers(&self.data_type, self.buffer1, self.buffer2); let child_data = match self.data_type { @@ -66,7 +64,7 @@ impl<'a> _MutableArrayData<'a> { _ => { let mut child_data = Vec::with_capacity(self.child_data.len()); for child in self.child_data { - child_data.push(Arc::new(child.freeze())); + child_data.push(child.freeze()); } child_data } @@ -120,18 +118,18 @@ fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits /// # Example: /// /// ``` -/// use std::sync::Arc; /// use arrow::{array::{Int32Array, Array, MutableArrayData}}; /// -/// let array = Int32Array::from(vec![1, 2, 3, 4, 5]).data(); +/// let array = Int32Array::from(vec![1, 2, 3, 4, 5]); +/// let array = array.data(); /// // Create a new `MutableArrayData` from an array and with a capacity of 4. /// // Capacity here is equivalent to `Vec::with_capacity` -/// let arrays = vec![array.as_ref()]; +/// let arrays = vec![array]; /// let mut mutable = MutableArrayData::new(arrays, false, 4); /// mutable.extend(0, 1, 3); // extend from the slice [1..3], [2,3] /// mutable.extend(0, 0, 3); // extend from the slice [0..3], [1,2,3] /// // `.freeze()` to convert `MutableArrayData` into a `ArrayData`. -/// let new_array = Int32Array::from(Arc::new(mutable.freeze())); +/// let new_array = Int32Array::from(mutable.freeze()); /// assert_eq!(Int32Array::from(vec![2, 3, 1, 2, 3]), new_array); /// ``` pub struct MutableArrayData<'a> { @@ -145,7 +143,7 @@ pub struct MutableArrayData<'a> { // the child data of the `Array` in Dictionary arrays. // This is not stored in `MutableArrayData` because these values constant and only needed // at the end, when freezing [_MutableArrayData]. - dictionary: Option, + dictionary: Option, // function used to extend values from arrays. This function's lifetime is bound to the array // because it reads values from it. @@ -319,7 +317,7 @@ impl<'a> MutableArrayData<'a> { DataType::List(_) | DataType::LargeList(_) => { let childs = arrays .iter() - .map(|array| array.child_data()[0].as_ref()) + .map(|array| &array.child_data()[0]) .collect::>(); vec![MutableArrayData::new(childs, use_nulls, capacity)] } @@ -330,7 +328,7 @@ impl<'a> MutableArrayData<'a> { .map(|i| { let child_arrays = arrays .iter() - .map(|array| array.child_data()[i].as_ref()) + .map(|array| &array.child_data()[i]) .collect::>(); MutableArrayData::new(child_arrays, use_nulls, capacity) }) @@ -400,15 +398,16 @@ impl<'a> MutableArrayData<'a> { } } +/* #[cfg(test)] mod tests { - use std::convert::TryFrom; + use std::{convert::TryFrom, sync::Arc}; use super::*; use crate::{ array::{ - Array, ArrayDataRef, ArrayRef, BooleanArray, DictionaryArray, + Array, ArrayData, ArrayRef, BooleanArray, DictionaryArray, FixedSizeBinaryArray, Int16Array, Int16Type, Int32Array, Int64Array, Int64Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray, StringDictionaryBuilder, StructArray, UInt8Array, @@ -424,12 +423,13 @@ mod tests { /// tests extending from a primitive array w/ offset nor nulls #[test] fn test_primitive() { - let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]).data(); - let arrays = vec![b.as_ref()]; + let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]); + let b = b.data(); + let arrays = vec![b]; let mut a = MutableArrayData::new(arrays, false, 3); a.extend(0, 0, 2); let result = a.freeze(); - let array = UInt8Array::from(Arc::new(result)); + let array = UInt8Array::from(result); let expected = UInt8Array::from(vec![Some(1), Some(2)]); assert_eq!(array, expected); } @@ -438,12 +438,13 @@ mod tests { #[test] fn test_primitive_offset() { let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]); - let b = b.slice(1, 2).data(); - let arrays = vec![b.as_ref()]; + let b = b.slice(1, 2); + let b = b.data(); + let arrays = vec![b]; let mut a = MutableArrayData::new(arrays, false, 2); a.extend(0, 0, 2); let result = a.freeze(); - let array = UInt8Array::from(Arc::new(result)); + let array = UInt8Array::from(result); let expected = UInt8Array::from(vec![Some(2), Some(3)]); assert_eq!(array, expected); } @@ -453,11 +454,11 @@ mod tests { fn test_primitive_null_offset() { let b = UInt8Array::from(vec![Some(1), None, Some(3)]); let b = b.slice(1, 2).data(); - let arrays = vec![b.as_ref()]; + let arrays = vec![b]; let mut a = MutableArrayData::new(arrays, false, 2); a.extend(0, 0, 2); let result = a.freeze(); - let array = UInt8Array::from(Arc::new(result)); + let array = UInt8Array::from(result); let expected = UInt8Array::from(vec![None, Some(3)]); assert_eq!(array, expected); } @@ -466,13 +467,13 @@ mod tests { fn test_primitive_null_offset_nulls() { let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]); let b = b.slice(1, 2).data(); - let arrays = vec![b.as_ref()]; + let arrays = vec![b]; let mut a = MutableArrayData::new(arrays, true, 2); a.extend(0, 0, 2); a.extend_nulls(3); a.extend(0, 1, 2); let result = a.freeze(); - let array = UInt8Array::from(Arc::new(result)); + let array = UInt8Array::from(result); let expected = UInt8Array::from(vec![Some(2), Some(3), None, None, None, Some(3)]); assert_eq!(array, expected); @@ -489,13 +490,13 @@ mod tests { builder.values().append_slice(&[6, 7, 8])?; builder.append(true)?; let array = builder.finish().data(); - let arrays = vec![array.as_ref()]; + let arrays = vec![array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 0, 1); let result = mutable.freeze(); - let array = ListArray::from(Arc::new(result)); + let array = ListArray::from(result); let int_builder = Int64Builder::new(24); let mut builder = ListBuilder::::new(int_builder); @@ -513,14 +514,14 @@ mod tests { fn test_variable_sized_nulls() { let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).data(); - let arrays = vec![array.as_ref()]; + let arrays = vec![array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 1, 3); let result = mutable.freeze(); - let result = StringArray::from(Arc::new(result)); + let result = StringArray::from(result); let expected = StringArray::from(vec![Some("bc"), None]); assert_eq!(result, expected); @@ -541,7 +542,7 @@ mod tests { mutable.extend(0, 0, 3); let result = mutable.freeze(); - let result = StringArray::from(Arc::new(result)); + let result = StringArray::from(result); let expected = StringArray::from(vec![Some("bc"), None, Some("defh")]); assert_eq!(result, expected); @@ -560,7 +561,7 @@ mod tests { mutable.extend(0, 0, 3); let result = mutable.freeze(); - let result = StringArray::from(Arc::new(result)); + let result = StringArray::from(result); let expected = StringArray::from(vec![Some("bc"), None, Some("defh")]); assert_eq!(result, expected); @@ -571,7 +572,7 @@ mod tests { let array1 = StringArray::from(vec!["hello", "world"]).data(); let array2 = StringArray::from(vec![Some("1"), None]).data(); - let arrays = vec![array1.as_ref(), array2.as_ref()]; + let arrays = vec![array1, array2]; let mut mutable = MutableArrayData::new(arrays, false, 5); @@ -579,7 +580,7 @@ mod tests { mutable.extend(1, 0, 2); let result = mutable.freeze(); - let result = StringArray::from(Arc::new(result)); + let result = StringArray::from(result); let expected = StringArray::from(vec![Some("hello"), Some("world"), Some("1"), None]); @@ -600,7 +601,7 @@ mod tests { mutable.extend_nulls(1); let result = mutable.freeze(); - let result = StringArray::from(Arc::new(result)); + let result = StringArray::from(result); let expected = StringArray::from(vec![None, Some("defh"), None]); assert_eq!(result, expected); @@ -610,14 +611,14 @@ mod tests { fn test_bool() { let array = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]).data(); - let arrays = vec![array.as_ref()]; + let arrays = vec![array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 1, 3); let result = mutable.freeze(); - let result = BooleanArray::from(Arc::new(result)); + let result = BooleanArray::from(result); let expected = BooleanArray::from(vec![Some(true), None]); assert_eq!(result, expected); @@ -627,7 +628,7 @@ mod tests { fn test_null() { let array1 = NullArray::new(10).data(); let array2 = NullArray::new(5).data(); - let arrays = vec![array1.as_ref(), array2.as_ref()]; + let arrays = vec![array1, array2]; let mut mutable = MutableArrayData::new(arrays, false, 0); @@ -635,13 +636,13 @@ mod tests { mutable.extend(1, 0, 1); let result = mutable.freeze(); - let result = NullArray::from(Arc::new(result)); + let result = NullArray::from(result); let expected = NullArray::new(3); assert_eq!(result, expected); } - fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayDataRef { + fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData { let values = StringArray::from(values.to_vec()); let mut builder = StringDictionaryBuilder::new_with_dictionary( PrimitiveBuilder::::new(3), @@ -655,7 +656,7 @@ mod tests { builder.append_null().unwrap() } } - builder.finish().data() + builder.finish().data().clone() } #[test] @@ -665,14 +666,14 @@ mod tests { &["a", "b", "c"], &[Some("a"), Some("b"), None, Some("c")], ); - let arrays = vec![array.as_ref()]; + let arrays = vec![&array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 1, 3); let result = mutable.freeze(); - let result = DictionaryArray::from(Arc::new(result)); + let result = DictionaryArray::from(result); let expected = Int16Array::from(vec![Some(1), None]); assert_eq!(result.keys(), &expected); @@ -699,12 +700,12 @@ mod tests { StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) .unwrap() .data(); - let arrays = vec![array.as_ref()]; + let arrays = vec![array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 1, 3); let data = mutable.freeze(); - let array = StructArray::from(Arc::new(data)); + let array = StructArray::from(data); let expected = StructArray::try_from(vec![ ("f1", strings.slice(1, 2)), @@ -775,13 +776,13 @@ mod tests { StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) .unwrap() .data(); - let arrays = vec![array.as_ref()]; + let arrays = vec![array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 1, 3); let data = mutable.freeze(); - let array = StructArray::from(Arc::new(data)); + let array = StructArray::from(data); let expected_string = Arc::new(StringArray::from(vec![None, None])) as ArrayRef; let expected_int = Arc::new(Int32Array::from(vec![Some(2), None])) as ArrayRef; @@ -813,13 +814,13 @@ mod tests { StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) .unwrap() .data(); - let arrays = vec![array.as_ref(), array.as_ref()]; + let arrays = vec![array, array]; let mut mutable = MutableArrayData::new(arrays, false, 0); mutable.extend(0, 1, 3); mutable.extend(1, 0, 2); let data = mutable.freeze(); - let array = StructArray::from(Arc::new(data)); + let array = StructArray::from(data); let expected_string = Arc::new(StringArray::from(vec![None, None, Some("joe"), None])) as ArrayRef; @@ -850,7 +851,7 @@ mod tests { mutable.extend(0, 0, 1); let result = mutable.freeze(); - let result = FixedSizeBinaryArray::from(Arc::new(result)); + let result = FixedSizeBinaryArray::from(result); let expected = FixedSizeBinaryArray::try_from_iter(vec![vec![0, 2], vec![0, 1]].into_iter()) @@ -882,8 +883,7 @@ mod tests { let b = b.data(); let c = b.slice(1, 2); - let mut mutable = - MutableArrayData::new(vec![a.as_ref(), b.as_ref(), &c], false, 1); + let mut mutable = MutableArrayData::new(vec![a, b, &c], false, 1); mutable.extend(0, 0, a.len()); mutable.extend(1, 0, b.len()); mutable.extend(2, 0, c.len()); @@ -920,7 +920,7 @@ mod tests { None, 0, vec![list_value_offsets], - vec![expected_int_array.data()], + vec![expected_int_array.data().clone()], ); assert_eq!(finished, expected_list_data); @@ -957,8 +957,7 @@ mod tests { let c = b.slice(1, 2); let d = b.slice(2, 2); - let mut mutable = - MutableArrayData::new(vec![a.as_ref(), b.as_ref(), &c, &d], false, 10); + let mut mutable = MutableArrayData::new(vec![a, b, &c, &d], false, 10); mutable.extend(0, 0, a.len()); mutable.extend(1, 0, b.len()); @@ -1002,7 +1001,7 @@ mod tests { Some(Buffer::from(&[0b11011011, 0b1110])), 0, vec![list_value_offsets], - vec![expected_int_array.data()], + vec![expected_int_array.data().clone()], ); assert_eq!(result, expected_list_data); @@ -1032,7 +1031,7 @@ mod tests { builder.append(true)?; let b = builder.finish().data(); - let mut mutable = MutableArrayData::new(vec![a.as_ref(), b.as_ref()], false, 10); + let mut mutable = MutableArrayData::new(vec![a, b], false, 10); mutable.extend(0, 0, a.len()); mutable.extend(1, 0, b.len()); @@ -1073,7 +1072,7 @@ mod tests { None, 0, vec![list_value_offsets], - vec![expected_string_array.data()], + vec![expected_string_array.data().clone()], ); assert_eq!(result, expected_list_data); Ok(()) @@ -1098,7 +1097,7 @@ mod tests { .expect("Failed to create FixedSizeBinaryArray from iterable") .data(); - let mut mutable = MutableArrayData::new(vec![a.as_ref(), b.as_ref()], false, 10); + let mut mutable = MutableArrayData::new(vec![a, b], false, 10); mutable.extend(0, 0, a.len()); mutable.extend(1, 0, b.len()); @@ -1213,7 +1212,7 @@ mod tests { vec![expected_int_array.data()], ); let expected_list = - FixedSizeListArray::from(Arc::new(expected_list_data) as ArrayDataRef); + FixedSizeListArray::from(Arc::new(expected_list_data) as ArrayData); assert_eq!(&expected_list.values(), &finished.values()); assert_eq!(expected_list.len(), finished.len()); @@ -1221,3 +1220,4 @@ mod tests { } */ } + */ diff --git a/rust/arrow/src/compute/kernels/arithmetic.rs b/rust/arrow/src/compute/kernels/arithmetic.rs index a40e5ea4308..d7aadf144d4 100644 --- a/rust/arrow/src/compute/kernels/arithmetic.rs +++ b/rust/arrow/src/compute/kernels/arithmetic.rs @@ -23,14 +23,15 @@ //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information. use std::ops::{Add, Div, Mul, Neg, Sub}; -use std::sync::Arc; use num::{One, Zero}; use crate::buffer::Buffer; #[cfg(simd)] use crate::buffer::MutableBuffer; -use crate::compute::{kernels::arity::unary, util::combine_option_bitmap}; +#[cfg(not(simd))] +use crate::compute::kernels::arity::unary; +use crate::compute::util::combine_option_bitmap; use crate::datatypes; use crate::datatypes::ArrowNumericType; use crate::error::{ArrowError, Result}; @@ -87,7 +88,7 @@ where vec![result.into()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(PrimitiveArray::::from(data)) } #[cfg(simd)] @@ -136,7 +137,7 @@ where vec![result.into()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(PrimitiveArray::::from(data)) } /// Helper function to perform math lambda function on values from two arrays. If either @@ -185,7 +186,7 @@ where vec![buffer], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(PrimitiveArray::::from(data)) } /// Helper function to divide two arrays. @@ -253,7 +254,7 @@ where vec![buffer], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(PrimitiveArray::::from(data)) } /// Scalar-divisor version of `math_divide`. @@ -281,7 +282,7 @@ where vec![buffer], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(PrimitiveArray::::from(data)) } /// SIMD vectorized version of `math_op` above. @@ -344,7 +345,7 @@ where vec![result.into()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(PrimitiveArray::::from(data)) } /// SIMD vectorized implementation of `left / right`. @@ -560,7 +561,7 @@ where vec![result.into()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(PrimitiveArray::::from(data)) } /// SIMD vectorized version of `divide_scalar`. @@ -606,7 +607,7 @@ where vec![result.into()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(PrimitiveArray::::from(data)) } /// Perform `left + right` operation on two arrays. If either left or right value is null diff --git a/rust/arrow/src/compute/kernels/arity.rs b/rust/arrow/src/compute/kernels/arity.rs index 11139f83270..4aa7f3d6e5d 100644 --- a/rust/arrow/src/compute/kernels/arity.rs +++ b/rust/arrow/src/compute/kernels/arity.rs @@ -70,5 +70,5 @@ where let buffer = unsafe { Buffer::from_trusted_len_iter(values) }; let data = into_primitive_array_data::<_, O>(array, buffer); - PrimitiveArray::::from(std::sync::Arc::new(data)) + PrimitiveArray::::from(data) } diff --git a/rust/arrow/src/compute/kernels/boolean.rs b/rust/arrow/src/compute/kernels/boolean.rs index b0f7f9f2fc2..54f5413b05a 100644 --- a/rust/arrow/src/compute/kernels/boolean.rs +++ b/rust/arrow/src/compute/kernels/boolean.rs @@ -23,7 +23,6 @@ //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information. use std::ops::Not; -use std::sync::Arc; use crate::array::{Array, ArrayData, BooleanArray, PrimitiveArray}; use crate::buffer::{ @@ -71,7 +70,7 @@ where vec![values], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// Performs `AND` operation on two arrays. If either left or right value is null then the @@ -153,7 +152,7 @@ pub fn not(left: &BooleanArray) -> Result { vec![values], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// Returns a non-null [BooleanArray] with whether each value of the array is null. @@ -185,7 +184,7 @@ pub fn is_null(input: &Array) -> Result { let data = ArrayData::new(DataType::Boolean, len, None, None, 0, vec![output], vec![]); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// Returns a non-null [BooleanArray] with whether each value of the array is not null. @@ -219,7 +218,7 @@ pub fn is_not_null(input: &Array) -> Result { let data = ArrayData::new(DataType::Boolean, len, None, None, 0, vec![output], vec![]); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// Copies original array, setting null bit to true if a secondary comparison boolean array is set to true. @@ -310,11 +309,13 @@ where data_buffers, left_data.child_data().to_vec(), ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(PrimitiveArray::::from(data)) } #[cfg(test)] mod tests { + use std::sync::Arc; + use super::*; use crate::array::{ArrayRef, Int32Array}; diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index 0d8dc822fd9..2a519a06aec 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -268,10 +268,8 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { (_, Struct(_)) => Err(ArrowError::ComputeError( "Cannot cast to struct from other types".to_string(), )), - (List(_), List(ref to)) => cast_list_inner::(&**array, to, to_type), - (LargeList(_), LargeList(ref to)) => { - cast_list_inner::(&**array, to, to_type) - } + (List(_), List(ref to)) => cast_list_inner::(array, to, to_type), + (LargeList(_), LargeList(ref to)) => cast_list_inner::(array, to, to_type), (List(list_from), LargeList(list_to)) => { if list_to.data_type() != list_from.data_type() { Err(ArrowError::ComputeError( @@ -635,7 +633,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { } //(Time32(TimeUnit::Second), Time64(_)) => {}, (Time32(from_unit), Time64(to_unit)) => { - let time_array = Int32Array::from(array.data()); + let time_array = Int32Array::from(array.data().clone()); // note: (numeric_cast + SIMD multiply) is faster than (cast & multiply) let c: Int64Array = numeric_cast(&time_array); let from_size = time_unit_multiple(&from_unit); @@ -678,7 +676,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { Ok(Arc::new(values) as ArrayRef) } (Time64(from_unit), Time32(to_unit)) => { - let time_array = Int64Array::from(array.data()); + let time_array = Int64Array::from(array.data().clone()); let from_size = time_unit_multiple(&from_unit); let to_size = time_unit_multiple(&to_unit); let divisor = from_size / to_size; @@ -715,7 +713,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { } } (Timestamp(from_unit, _), Timestamp(to_unit, _)) => { - let time_array = Int64Array::from(array.data()); + let time_array = Int64Array::from(array.data().clone()); let from_size = time_unit_multiple(&from_unit); let to_size = time_unit_multiple(&to_unit); // we either divide or multiply, depending on size of each unit @@ -752,7 +750,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { } } (Timestamp(from_unit, _), Date32) => { - let time_array = Int64Array::from(array.data()); + let time_array = Int64Array::from(array.data().clone()); let from_size = time_unit_multiple(&from_unit) * SECONDS_IN_DAY; let mut b = Date32Builder::new(array.len()); for i in 0..array.len() { @@ -775,7 +773,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { match to_size.cmp(&from_size) { std::cmp::Ordering::Less => { - let time_array = Date64Array::from(array.data()); + let time_array = Date64Array::from(array.data().clone()); Ok(Arc::new(divide( &time_array, &Date64Array::from(vec![from_size / to_size; array.len()]), @@ -785,7 +783,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { cast_array_data::(array, to_type.clone()) } std::cmp::Ordering::Greater => { - let time_array = Date64Array::from(array.data()); + let time_array = Date64Array::from(array.data().clone()); Ok(Arc::new(multiply( &time_array, &Date64Array::from(vec![to_size / from_size; array.len()]), @@ -853,7 +851,7 @@ fn cast_array_data(array: &ArrayRef, to_type: DataType) -> Result where TO: ArrowNumericType, { - let data = Arc::new(ArrayData::new( + let data = ArrayData::new( to_type, array.len(), Some(array.null_count()), @@ -861,7 +859,7 @@ where array.data().offset(), array.data().buffers().to_vec(), vec![], - )); + ); Ok(Arc::new(PrimitiveArray::::from(data)) as ArrayRef) } @@ -1158,7 +1156,7 @@ fn dictionary_cast( } // keys are data, child_data is values (dictionary) - let data = Arc::new(ArrayData::new( + let data = ArrayData::new( to_type.clone(), cast_keys.len(), Some(cast_keys.null_count()), @@ -1169,8 +1167,8 @@ fn dictionary_cast( .map(|bitmap| bitmap.bits), cast_keys.data().offset(), cast_keys.data().buffers().to_vec(), - vec![cast_values.data()], - )); + vec![cast_values.data().clone()], + ); // create the appropriate array type let new_array: ArrayRef = match **to_index_type { @@ -1342,17 +1340,17 @@ fn cast_primitive_to_list( .map(|bitmap| bitmap.bits), 0, vec![offsets.into()], - vec![cast_array.data()], + vec![cast_array.data().clone()], ); let list_array = - Arc::new(GenericListArray::::from(Arc::new(list_data))) as ArrayRef; + Arc::new(GenericListArray::::from(list_data)) as ArrayRef; Ok(list_array) } /// Helper function that takes an Generic list container and casts the inner datatype. fn cast_list_inner( - array: &dyn Array, + array: &Arc, to: &Field, to_type: &DataType, ) -> Result { @@ -1371,9 +1369,9 @@ fn cast_list_inner( array.offset(), // reuse offset buffer data.buffers().to_vec(), - vec![cast_array.data()], + vec![cast_array.data().clone()], ); - let list = GenericListArray::::from(Arc::new(array_data)); + let list = GenericListArray::::from(array_data); Ok(Arc::new(list) as ArrayRef) } @@ -1684,7 +1682,9 @@ mod tests { #[test] fn test_cast_list_i32_to_list_u16() { // Construct a value array - let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000]).data(); + let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000]) + .data() + .clone(); let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]); @@ -1742,8 +1742,9 @@ mod tests { )] fn test_cast_list_i32_to_list_timestamp() { // Construct a value array - let value_data = - Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000]).data(); + let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000]) + .data() + .clone(); let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 9]); diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index 3d96d7760bc..823c4e46c4d 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -24,7 +24,6 @@ use regex::Regex; use std::collections::HashMap; -use std::sync::Arc; use crate::array::*; use crate::buffer::{Buffer, MutableBuffer}; @@ -60,7 +59,7 @@ macro_rules! compare_op { vec![buffer], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) }}; } @@ -81,7 +80,7 @@ macro_rules! compare_op_scalar { vec![buffer], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) }}; } @@ -176,7 +175,7 @@ pub fn like_utf8( vec![result.finish()], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } fn is_like_pattern(c: char) -> bool { @@ -246,7 +245,7 @@ pub fn like_utf8_scalar( vec![bool_buf.into()], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// Perform SQL `left NOT LIKE right` operation on [`StringArray`] / @@ -298,7 +297,7 @@ pub fn nlike_utf8( vec![result.finish()], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// Perform SQL `left NOT LIKE right` operation on [`StringArray`] / @@ -351,7 +350,7 @@ pub fn nlike_utf8_scalar( vec![result.finish()], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`]. @@ -537,7 +536,7 @@ where vec![result.into()], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// Helper function to perform boolean lambda function on values from an array and a scalar value using @@ -620,7 +619,7 @@ where vec![result.into()], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// Perform `left == right` operation on two arrays. @@ -822,7 +821,7 @@ where vec![bool_buf.into()], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// Checks if a [`GenericListArray`] contains a value in the [`GenericStringArray`] @@ -880,7 +879,7 @@ where vec![bool_buf.into()], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } // create a buffer and fill it with valid bits @@ -1198,7 +1197,8 @@ mod tests { None, Some(7), ]) - .data(); + .data() + .clone(); let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 6, 9]); let list_data_type = DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true))); diff --git a/rust/arrow/src/compute/kernels/concat.rs b/rust/arrow/src/compute/kernels/concat.rs index aee2a1c83aa..32880286a72 100644 --- a/rust/arrow/src/compute/kernels/concat.rs +++ b/rust/arrow/src/compute/kernels/concat.rs @@ -30,8 +30,6 @@ //! assert_eq!(arr.len(), 3); //! ``` -use std::sync::Arc; - use crate::array::*; use crate::error::{ArrowError, Result}; @@ -56,10 +54,7 @@ pub fn concat(arrays: &[&Array]) -> Result { let lengths = arrays.iter().map(|array| array.len()).collect::>(); let capacity = lengths.iter().sum(); - let arrays = arrays - .iter() - .map(|a| a.data_ref().as_ref()) - .collect::>(); + let arrays = arrays.iter().map(|a| a.data()).collect::>(); let mut mutable = MutableArrayData::new(arrays, false, capacity); @@ -67,7 +62,7 @@ pub fn concat(arrays: &[&Array]) -> Result { mutable.extend(i, 0, *len) } - Ok(make_array(Arc::new(mutable.freeze()))) + Ok(make_array(mutable.freeze())) } #[cfg(test)] diff --git a/rust/arrow/src/compute/kernels/filter.rs b/rust/arrow/src/compute/kernels/filter.rs index 08965023f20..68feb0a546e 100644 --- a/rust/arrow/src/compute/kernels/filter.rs +++ b/rust/arrow/src/compute/kernels/filter.rs @@ -20,7 +20,7 @@ use crate::error::Result; use crate::record_batch::RecordBatch; use crate::{array::*, util::bit_chunk_iterator::BitChunkIterator}; -use std::{iter::Enumerate, sync::Arc}; +use std::iter::Enumerate; /// Function that can filter arbitrary arrays pub type Filter<'a> = Box ArrayData + 'a>; @@ -227,7 +227,7 @@ pub fn filter(array: &Array, filter: &BooleanArray) -> Result { MutableArrayData::new(vec![array.data_ref()], false, iter.filter_count); iter.for_each(|(start, end)| mutable.extend(0, start, end)); let data = mutable.freeze(); - Ok(make_array(Arc::new(data))) + Ok(make_array(data)) } /// Returns a new [RecordBatch] with arrays containing only values matching the filter. @@ -241,7 +241,7 @@ pub fn filter_record_batch( let filtered_arrays = record_batch .columns() .iter() - .map(|a| make_array(Arc::new(filter(&a.data())))) + .map(|a| make_array(filter(&a.data()))) .collect(); RecordBatch::try_new(record_batch.schema(), filtered_arrays) } diff --git a/rust/arrow/src/compute/kernels/length.rs b/rust/arrow/src/compute/kernels/length.rs index ed1fda4a062..4d704d27078 100644 --- a/rust/arrow/src/compute/kernels/length.rs +++ b/rust/arrow/src/compute/kernels/length.rs @@ -26,7 +26,6 @@ use crate::{ datatypes::{DataType, Int32Type, Int64Type}, error::{ArrowError, Result}, }; -use std::sync::Arc; fn unary_offsets_string( array: &GenericStringArray, @@ -67,7 +66,7 @@ where vec![buffer], vec![], ); - make_array(Arc::new(data)) + make_array(data) } fn octet_length( diff --git a/rust/arrow/src/compute/kernels/limit.rs b/rust/arrow/src/compute/kernels/limit.rs index ee7d031c245..4b4b08572a2 100644 --- a/rust/arrow/src/compute/kernels/limit.rs +++ b/rust/arrow/src/compute/kernels/limit.rs @@ -164,8 +164,8 @@ mod tests { assert_eq!(5, struct_array.len()); assert_eq!(1, struct_array.null_count()); - assert_eq!(boolean_data, struct_array.column(0).data()); - assert_eq!(int_data, struct_array.column(1).data()); + assert_eq!(&boolean_data, struct_array.column(0).data()); + assert_eq!(&int_data, struct_array.column(1).data()); let array: ArrayRef = Arc::new(struct_array); diff --git a/rust/arrow/src/compute/kernels/sort.rs b/rust/arrow/src/compute/kernels/sort.rs index 738178f15d4..bf8eda353e6 100644 --- a/rust/arrow/src/compute/kernels/sort.rs +++ b/rust/arrow/src/compute/kernels/sort.rs @@ -18,7 +18,6 @@ //! Defines sort kernel for `ArrayRef` use std::cmp::Ordering; -use std::sync::Arc; use crate::array::*; use crate::buffer::MutableBuffer; @@ -440,7 +439,7 @@ fn sort_boolean( } } - let result_data = Arc::new(ArrayData::new( + let result_data = ArrayData::new( DataType::UInt32, len, Some(0), @@ -448,7 +447,7 @@ fn sort_boolean( 0, vec![result.into()], vec![], - )); + ); Ok(UInt32Array::from(result_data)) } @@ -517,7 +516,7 @@ where } } - let result_data = Arc::new(ArrayData::new( + let result_data = ArrayData::new( DataType::UInt32, len, Some(0), @@ -525,7 +524,7 @@ where 0, vec![result.into()], vec![], - )); + ); Ok(UInt32Array::from(result_data)) } @@ -808,7 +807,7 @@ pub fn lexsort_to_indices( let flat_columns = columns .iter() .map( - |column| -> Result<(&ArrayDataRef, DynComparator, SortOptions)> { + |column| -> Result<(&ArrayData, DynComparator, SortOptions)> { // flatten and convert build comparators // use ArrayData for is_valid checks later to avoid dynamic call let values = column.values.as_ref(); @@ -820,7 +819,7 @@ pub fn lexsort_to_indices( )) }, ) - .collect::>>()?; + .collect::>>()?; let lex_comparator = |a_idx: &usize, b_idx: &usize| -> Ordering { for (data, comparator, sort_option) in flat_columns.iter() { diff --git a/rust/arrow/src/compute/kernels/substring.rs b/rust/arrow/src/compute/kernels/substring.rs index c6f5cc710e7..d9956b89687 100644 --- a/rust/arrow/src/compute/kernels/substring.rs +++ b/rust/arrow/src/compute/kernels/substring.rs @@ -22,7 +22,6 @@ use crate::{ datatypes::DataType, error::{ArrowError, Result}, }; -use std::sync::Arc; #[allow(clippy::unnecessary_wraps)] fn generic_substring( @@ -87,7 +86,7 @@ fn generic_substring( ], vec![], ); - Ok(make_array(Arc::new(data))) + Ok(make_array(data)) } /// Returns an ArrayRef with a substring starting from `start` and with optional length `length` of each of the elements in `array`. diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs index adae71d72aa..0217573dc5d 100644 --- a/rust/arrow/src/compute/kernels/take.rs +++ b/rust/arrow/src/compute/kernels/take.rs @@ -446,7 +446,7 @@ where vec![buffer], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(PrimitiveArray::::from(data)) } /// `take` implementation for boolean arrays @@ -521,7 +521,7 @@ where vec![val_buf.into()], vec![], ); - Ok(BooleanArray::from(Arc::new(data))) + Ok(BooleanArray::from(data)) } /// `take` implementation for string arrays @@ -681,7 +681,7 @@ where .len(indices.len()) .null_bit_buffer(null_buf.into()) .offset(0) - .add_child_data(taken.data()) + .add_child_data(taken.data().clone()) .add_buffer(value_offsets) .build(); Ok(GenericListArray::::from(list_data)) @@ -722,7 +722,7 @@ where .len(indices.len()) .null_bit_buffer(null_buf.into()) .offset(0) - .add_child_data(taken.data()) + .add_child_data(taken.data().clone()) .build(); Ok(FixedSizeListArray::from(list_data)) @@ -745,7 +745,7 @@ where let new_keys = take_primitive::(&values.keys_array(), indices)?; let new_keys_data = new_keys.data_ref(); - let data = Arc::new(ArrayData::new( + let data = ArrayData::new( values.data_type().clone(), new_keys.len(), Some(new_keys_data.null_count()), @@ -753,7 +753,7 @@ where 0, new_keys_data.buffers().to_vec(), values.data().child_data().to_vec(), - )); + ); Ok(DictionaryArray::::from(data)) } @@ -812,8 +812,10 @@ mod tests { // create a simple struct for testing purposes fn create_test_struct() -> StructArray { - let boolean_data = BooleanArray::from(vec![true, false, false, true]).data(); - let int_data = Int32Array::from(vec![42, 28, 19, 31]).data(); + let boolean_data = BooleanArray::from(vec![true, false, false, true]) + .data() + .clone(); + let int_data = Int32Array::from(vec![42, 28, 19, 31]).data().clone(); let mut field_types = vec![]; field_types.push(Field::new("a", DataType::Boolean, true)); field_types.push(Field::new("b", DataType::Int32, true)); @@ -1133,7 +1135,9 @@ mod tests { macro_rules! test_take_list { ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{ // Construct a value array, [[0,0,0], [-1,-2,-1], [2,3]] - let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]).data(); + let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]) + .data() + .clone(); // Construct offsets let value_offsets: [$offset_type; 4] = [0, 3, 6, 8]; let value_offsets = Buffer::from_slice_ref(&value_offsets); @@ -1171,7 +1175,8 @@ mod tests { Some(0), Some(0), ]) - .data(); + .data() + .clone(); // construct offsets let expected_offsets: [$offset_type; 6] = [0, 2, 2, 5, 7, 10]; let expected_offsets = Buffer::from_slice_ref(&expected_offsets); @@ -1205,7 +1210,8 @@ mod tests { Some(5), None, ]) - .data(); + .data() + .clone(); // Construct offsets let value_offsets: [$offset_type; 5] = [0, 3, 6, 7, 9]; let value_offsets = Buffer::from_slice_ref(&value_offsets); @@ -1243,7 +1249,8 @@ mod tests { None, Some(0), ]) - .data(); + .data() + .clone(); // construct offsets let expected_offsets: [$offset_type; 6] = [0, 1, 1, 4, 6, 9]; let expected_offsets = Buffer::from_slice_ref(&expected_offsets); @@ -1276,7 +1283,8 @@ mod tests { Some(5), None, ]) - .data(); + .data() + .clone(); // Construct offsets let value_offsets: [$offset_type; 5] = [0, 3, 6, 6, 8]; let value_offsets = Buffer::from_slice_ref(&value_offsets); @@ -1313,7 +1321,8 @@ mod tests { None, Some(0), ]) - .data(); + .data() + .clone(); // construct offsets let expected_offsets: [$offset_type; 6] = [0, 0, 0, 3, 5, 8]; let expected_offsets = Buffer::from_slice_ref(&expected_offsets); @@ -1445,7 +1454,9 @@ mod tests { #[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")] fn test_take_list_out_of_bounds() { // Construct a value array, [[0,0,0], [-1,-2,-1], [2,3]] - let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]).data(); + let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]) + .data() + .clone(); // Construct offsets let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]); // Construct a list array from the above two @@ -1475,9 +1486,10 @@ mod tests { assert_eq!(index.len(), a.len()); assert_eq!(0, a.null_count()); - let expected_bool_data = - BooleanArray::from(vec![true, true, false, true, false]).data(); - let expected_int_data = Int32Array::from(vec![42, 31, 28, 42, 19]).data(); + let expected_bool_data = BooleanArray::from(vec![true, true, false, true, false]) + .data() + .clone(); + let expected_int_data = Int32Array::from(vec![42, 31, 28, 42, 19]).data().clone(); let mut field_types = vec![]; field_types.push(Field::new("a", DataType::Boolean, true)); field_types.push(Field::new("b", DataType::Int32, true)); @@ -1503,9 +1515,12 @@ mod tests { let expected_bool_data = BooleanArray::from(vec![None, Some(true), Some(false), None, Some(true)]) - .data(); + .data() + .clone(); let expected_int_data = - Int32Array::from(vec![None, Some(31), Some(28), None, Some(42)]).data(); + Int32Array::from(vec![None, Some(31), Some(28), None, Some(42)]) + .data() + .clone(); let mut field_types = vec![]; field_types.push(Field::new("a", DataType::Boolean, true)); @@ -1585,7 +1600,7 @@ mod tests { .downcast_ref::>() .unwrap(); - let result_values: StringArray = result.values().data().into(); + let result_values: StringArray = result.values().data().clone().into(); // dictionary values should stay the same let expected_values = StringArray::from(vec!["foo", "bar", ""]); diff --git a/rust/arrow/src/compute/kernels/window.rs b/rust/arrow/src/compute/kernels/window.rs index cc30293fd9c..82e712c3079 100644 --- a/rust/arrow/src/compute/kernels/window.rs +++ b/rust/arrow/src/compute/kernels/window.rs @@ -19,7 +19,6 @@ use crate::compute::concat; use num::{abs, clamp}; -use std::sync::Arc; use crate::{ array::{make_array, ArrayData, PrimitiveArray}, @@ -74,7 +73,7 @@ where ); // Concatenate both arrays, add nulls after if shift > 0 else before - let null_arr = make_array(Arc::new(null_data)); + let null_arr = make_array(null_data); if offset > 0 { concat(&[null_arr.as_ref(), slice.as_ref()]) } else { diff --git a/rust/arrow/src/compute/kernels/zip.rs b/rust/arrow/src/compute/kernels/zip.rs index ba8456829bb..0ee8e47bede 100644 --- a/rust/arrow/src/compute/kernels/zip.rs +++ b/rust/arrow/src/compute/kernels/zip.rs @@ -18,7 +18,6 @@ use crate::array::*; use crate::compute::SlicesIterator; use crate::error::{ArrowError, Result}; -use std::sync::Arc; /// Zip two arrays by some boolean mask. Where the mask evaluates `true` values of `truthy` /// are taken, where the mask evaluates `false` values of `falsy` are taken. @@ -68,7 +67,7 @@ pub fn zip( } let data = mutable.freeze(); - Ok(make_array(Arc::new(data))) + Ok(make_array(data)) } #[cfg(test)] diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs index 0e6de913daf..56de5948301 100644 --- a/rust/arrow/src/compute/util.rs +++ b/rust/arrow/src/compute/util.rs @@ -29,8 +29,8 @@ use std::ops::Add; /// This function is useful when implementing operations on higher level arrays. #[allow(clippy::unnecessary_wraps)] pub(super) fn combine_option_bitmap( - left_data: &ArrayDataRef, - right_data: &ArrayDataRef, + left_data: &ArrayData, + right_data: &ArrayData, len_in_bits: usize, ) -> Result> { let left_offset_in_bits = left_data.offset(); @@ -63,8 +63,8 @@ pub(super) fn combine_option_bitmap( /// This function is useful when implementing operations on higher level arrays. #[allow(clippy::unnecessary_wraps)] pub(super) fn compare_option_bitmap( - left_data: &ArrayDataRef, - right_data: &ArrayDataRef, + left_data: &ArrayData, + right_data: &ArrayData, len_in_bits: usize, ) -> Result> { let left_offset_in_bits = left_data.offset(); @@ -302,7 +302,7 @@ pub(super) mod tests { offset.push(values.len() as i64); } - let value_data = PrimitiveArray::::from(values).data(); + let value_data = PrimitiveArray::::from(values).data().clone(); let (list_data_type, value_offsets) = if TypeId::of::() == TypeId::of::() { ( @@ -391,7 +391,7 @@ pub(super) mod tests { length, ); - let child_data = PrimitiveArray::::from(values).data(); + let child_data = PrimitiveArray::::from(values).data().clone(); let list_data = ArrayData::builder(list_data_type) .len(list_len) diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs index ffaf7423b61..6df5690fd41 100644 --- a/rust/arrow/src/ffi.rs +++ b/rust/arrow/src/ffi.rs @@ -643,7 +643,6 @@ mod tests { }; use crate::compute::kernels; use std::convert::TryFrom; - use std::sync::Arc; #[test] fn test_round_trip() -> Result<()> { @@ -651,10 +650,10 @@ mod tests { let array = Int32Array::from(vec![1, 2, 3]); // export it - let array = ArrowArray::try_from(array.data().as_ref().clone())?; + let array = ArrowArray::try_from(array.data().clone())?; // (simulate consumer) import it - let data = Arc::new(ArrayData::try_from(array)?); + let data = ArrayData::try_from(array)?; let array = make_array(data); // perform some operation @@ -675,10 +674,10 @@ mod tests { GenericStringArray::::from(vec![Some("a"), None, Some("aaa")]); // export it - let array = ArrowArray::try_from(array.data().as_ref().clone())?; + let array = ArrowArray::try_from(array.data().clone())?; // (simulate consumer) import it - let data = Arc::new(ArrayData::try_from(array)?); + let data = ArrayData::try_from(array)?; let array = make_array(data); // perform some operation @@ -719,10 +718,10 @@ mod tests { let array = GenericBinaryArray::::from(array); // export it - let array = ArrowArray::try_from(array.data().as_ref().clone())?; + let array = ArrowArray::try_from(array.data().clone())?; // (simulate consumer) import it - let data = Arc::new(ArrayData::try_from(array)?); + let data = ArrayData::try_from(array)?; let array = make_array(data); // perform some operation @@ -764,10 +763,10 @@ mod tests { let array = BooleanArray::from(vec![None, Some(true), Some(false)]); // export it - let array = ArrowArray::try_from(array.data().as_ref().clone())?; + let array = ArrowArray::try_from(array.data().clone())?; // (simulate consumer) import it - let data = Arc::new(ArrayData::try_from(array)?); + let data = ArrayData::try_from(array)?; let array = make_array(data); // perform some operation @@ -790,10 +789,10 @@ mod tests { let array = Time32MillisecondArray::from(vec![None, Some(1), Some(2)]); // export it - let array = ArrowArray::try_from(array.data().as_ref().clone())?; + let array = ArrowArray::try_from(array.data().clone())?; // (simulate consumer) import it - let data = Arc::new(ArrayData::try_from(array)?); + let data = ArrayData::try_from(array)?; let array = make_array(data); // perform some operation diff --git a/rust/arrow/src/ipc/reader.rs b/rust/arrow/src/ipc/reader.rs index 2e50080796e..3c893cdf2ff 100644 --- a/rust/arrow/src/ipc/reader.rs +++ b/rust/arrow/src/ipc/reader.rs @@ -264,7 +264,7 @@ fn create_primitive_array( let values = Arc::new(Int64Array::from(builder.build())) as ArrayRef; // this cast is infallible, the unwrap is safe let casted = cast(&values, data_type).unwrap(); - casted.data() + casted.data().clone() } else { let mut builder = ArrayData::builder(data_type.clone()) .len(length) @@ -289,7 +289,7 @@ fn create_primitive_array( let values = Arc::new(Float64Array::from(builder.build())) as ArrayRef; // this cast is infallible, the unwrap is safe let casted = cast(&values, data_type).unwrap(); - casted.data() + casted.data().clone() } else { let mut builder = ArrayData::builder(data_type.clone()) .len(length) @@ -350,7 +350,7 @@ fn create_list_array( .len(field_node.length() as usize) .buffers(buffers[1..2].to_vec()) .offset(0) - .child_data(vec![child_array.data()]); + .child_data(vec![child_array.data().clone()]); if null_count > 0 { builder = builder.null_bit_buffer(buffers[0].clone()) } @@ -361,7 +361,7 @@ fn create_list_array( .len(field_node.length() as usize) .buffers(buffers[1..2].to_vec()) .offset(0) - .child_data(vec![child_array.data()]); + .child_data(vec![child_array.data().clone()]); if null_count > 0 { builder = builder.null_bit_buffer(buffers[0].clone()) } @@ -372,7 +372,7 @@ fn create_list_array( .len(field_node.length() as usize) .buffers(buffers[1..1].to_vec()) .offset(0) - .child_data(vec![child_array.data()]); + .child_data(vec![child_array.data().clone()]); if null_count > 0 { builder = builder.null_bit_buffer(buffers[0].clone()) } @@ -396,7 +396,7 @@ fn create_dictionary_array( .len(field_node.length() as usize) .buffers(buffers[1..2].to_vec()) .offset(0) - .child_data(vec![value_array.data()]); + .child_data(vec![value_array.data().clone()]); if null_count > 0 { builder = builder.null_bit_buffer(buffers[0].clone()) } diff --git a/rust/arrow/src/ipc/writer.rs b/rust/arrow/src/ipc/writer.rs index fd104e66416..f5a90fa1716 100644 --- a/rust/arrow/src/ipc/writer.rs +++ b/rust/arrow/src/ipc/writer.rs @@ -25,7 +25,7 @@ use std::io::{BufWriter, Write}; use flatbuffers::FlatBufferBuilder; -use crate::array::{ArrayDataRef, ArrayRef}; +use crate::array::{ArrayData, ArrayRef}; use crate::buffer::{Buffer, MutableBuffer}; use crate::datatypes::*; use crate::error::{ArrowError, Result}; @@ -233,7 +233,7 @@ impl IpcDataGenerator { fn dictionary_batch_to_bytes( &self, dict_id: i64, - array_data: &ArrayDataRef, + array_data: &ArrayData, write_options: &IpcWriteOptions, ) -> EncodedData { let mut fbb = FlatBufferBuilder::new(); @@ -656,7 +656,7 @@ fn write_continuation( /// Write array data to a vector of bytes fn write_array_data( - array_data: &ArrayDataRef, + array_data: &ArrayData, mut buffers: &mut Vec, mut arrow_data: &mut Vec, mut nodes: &mut Vec, diff --git a/rust/arrow/src/json/reader.rs b/rust/arrow/src/json/reader.rs index bc94bb50624..31c496c9293 100644 --- a/rust/arrow/src/json/reader.rs +++ b/rust/arrow/src/json/reader.rs @@ -961,7 +961,7 @@ impl Decoder { }); let valid_len = cur_offset.to_usize().unwrap(); let array_data = match list_field.data_type() { - DataType::Null => NullArray::new(valid_len).data(), + DataType::Null => NullArray::new(valid_len).data().clone(), DataType::Boolean => { let num_bytes = bit_util::ceil(valid_len, 8); let mut bool_values = MutableBuffer::from_len_zeroed(num_bytes); @@ -1021,20 +1021,22 @@ impl Decoder { DataType::Utf8 => { StringArray::from_iter(flatten_json_string_values(rows).into_iter()) .data() + .clone() } DataType::LargeUtf8 => { LargeStringArray::from_iter(flatten_json_string_values(rows).into_iter()) .data() + .clone() } DataType::List(field) => { let child = self .build_nested_list_array::(&flatten_json_values(rows), field)?; - child.data() + child.data().clone() } DataType::LargeList(field) => { let child = self .build_nested_list_array::(&flatten_json_values(rows), field)?; - child.data() + child.data().clone() } DataType::Struct(fields) => { // extract list values, with non-lists converted to Value::Null @@ -1073,7 +1075,7 @@ impl Decoder { ArrayDataBuilder::new(data_type) .len(rows.len()) .null_bit_buffer(buf) - .child_data(arrays.into_iter().map(|a| a.data()).collect()) + .child_data(arrays.into_iter().map(|a| a.data().clone()).collect()) .build() } datatype => { @@ -1275,7 +1277,9 @@ impl Decoder { let data = ArrayDataBuilder::new(data_type) .len(len) .null_bit_buffer(null_buffer.into()) - .child_data(arrays.into_iter().map(|a| a.data()).collect()) + .child_data( + arrays.into_iter().map(|a| a.data().clone()).collect(), + ) .build(); Ok(make_array(data)) } @@ -1316,7 +1320,7 @@ impl Decoder { } /// Read the primitive list's values into ArrayData - fn read_primitive_list_values(&self, rows: &[Value]) -> ArrayDataRef + fn read_primitive_list_values(&self, rows: &[Value]) -> ArrayData where T: ArrowPrimitiveType + ArrowNumericType, T::Native: num::NumCast, @@ -1344,7 +1348,7 @@ impl Decoder { }) .collect::>>(); let array = PrimitiveArray::::from_iter(values.iter()); - array.data() + array.data().clone() } } @@ -2031,13 +2035,13 @@ mod tests { let d = StringArray::from(vec![Some("text"), None, Some("text"), None]); let c = ArrayDataBuilder::new(c_field.data_type().clone()) .len(4) - .add_child_data(d.data()) + .add_child_data(d.data().clone()) .null_bit_buffer(Buffer::from(vec![0b00000101])) .build(); let b = BooleanArray::from(vec![Some(true), Some(false), Some(true), None]); let a = ArrayDataBuilder::new(a_field.data_type().clone()) .len(4) - .add_child_data(b.data()) + .add_child_data(b.data().clone()) .add_child_data(c) .null_bit_buffer(Buffer::from(vec![0b00000111])) .build(); @@ -2094,7 +2098,7 @@ mod tests { ]); let c = ArrayDataBuilder::new(c_field.data_type().clone()) .len(7) - .add_child_data(d.data()) + .add_child_data(d.data().clone()) .null_bit_buffer(Buffer::from(vec![0b00111011])) .build(); let b = BooleanArray::from(vec![ @@ -2108,7 +2112,7 @@ mod tests { ]); let a = ArrayDataBuilder::new(a_struct_field.data_type().clone()) .len(7) - .add_child_data(b.data()) + .add_child_data(b.data().clone()) .add_child_data(c.clone()) .null_bit_buffer(Buffer::from(vec![0b00111111])) .build(); diff --git a/rust/arrow/src/json/writer.rs b/rust/arrow/src/json/writer.rs index dbb70cf897e..f55496af86a 100644 --- a/rust/arrow/src/json/writer.rs +++ b/rust/arrow/src/json/writer.rs @@ -623,7 +623,7 @@ mod tests { let a_list_data = ArrayData::builder(field_c1.data_type().clone()) .len(5) .add_buffer(a_value_offsets) - .add_child_data(a_values.data()) + .add_child_data(a_values.data().clone()) .null_bit_buffer(Buffer::from(vec![0b00011111])) .build(); let a = ListArray::from(a_list_data); @@ -675,7 +675,7 @@ mod tests { .len(3) .add_buffer(a_value_offsets) .null_bit_buffer(Buffer::from(vec![0b00000111])) - .add_child_data(a_values.data()) + .add_child_data(a_values.data().clone()) .build(); let c1_value_offsets = Buffer::from(&[0, 2, 2, 3].to_byte_slice()); @@ -755,7 +755,7 @@ mod tests { let c1_list_data = ArrayData::builder(field_c1.data_type().clone()) .len(3) .add_buffer(c1_value_offsets) - .add_child_data(struct_values.data()) + .add_child_data(struct_values.data().clone()) .null_bit_buffer(Buffer::from(vec![0b00000101])) .build(); let c1 = ListArray::from(c1_list_data); diff --git a/rust/arrow/src/record_batch.rs b/rust/arrow/src/record_batch.rs index 2a09c03f814..93abb909d02 100644 --- a/rust/arrow/src/record_batch.rs +++ b/rust/arrow/src/record_batch.rs @@ -367,7 +367,7 @@ mod tests { DataType::Int8, false, )))) - .add_child_data(a2_child.data()) + .add_child_data(a2_child.data().clone()) .len(2) .add_buffer(Buffer::from(vec![0i32, 3, 4].to_byte_slice())) .build(); @@ -376,8 +376,8 @@ mod tests { Field::new("aa1", DataType::Int32, false), Field::new("a2", a2.data_type().clone(), false), ])) - .add_child_data(a1.data()) - .add_child_data(a2.data()) + .add_child_data(a1.data().clone()) + .add_child_data(a2.data().clone()) .len(2) .build(); let a: ArrayRef = Arc::new(StructArray::from(a)); diff --git a/rust/arrow/src/util/integration_util.rs b/rust/arrow/src/util/integration_util.rs index 3bc43cd261f..ec2c294cb4e 100644 --- a/rust/arrow/src/util/integration_util.rs +++ b/rust/arrow/src/util/integration_util.rs @@ -238,7 +238,7 @@ impl ArrowJsonBatch { arr.equals_json(&json_array.iter().collect::>()[..]) } DataType::Int32 | DataType::Date32 | DataType::Time32(_) => { - let arr = Int32Array::from(arr.data()); + let arr = Int32Array::from(arr.data().clone()); arr.equals_json(&json_array.iter().collect::>()[..]) } DataType::Int64 @@ -246,15 +246,15 @@ impl ArrowJsonBatch { | DataType::Time64(_) | DataType::Timestamp(_, _) | DataType::Duration(_) => { - let arr = Int64Array::from(arr.data()); + let arr = Int64Array::from(arr.data().clone()); arr.equals_json(&json_array.iter().collect::>()[..]) } DataType::Interval(IntervalUnit::YearMonth) => { - let arr = IntervalYearMonthArray::from(arr.data()); + let arr = IntervalYearMonthArray::from(arr.data().clone()); arr.equals_json(&json_array.iter().collect::>()[..]) } DataType::Interval(IntervalUnit::DayTime) => { - let arr = IntervalDayTimeArray::from(arr.data()); + let arr = IntervalDayTimeArray::from(arr.data().clone()); let x = json_array .iter() .map(|v| { @@ -892,7 +892,7 @@ mod tests { let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) - .add_child_data(value_data.data()) + .add_child_data(value_data.data().clone()) .build(); let lists = ListArray::from(list_data); diff --git a/rust/datafusion/src/physical_plan/math_expressions.rs b/rust/datafusion/src/physical_plan/math_expressions.rs index 772b80d409a..382a15f8ccf 100644 --- a/rust/datafusion/src/physical_plan/math_expressions.rs +++ b/rust/datafusion/src/physical_plan/math_expressions.rs @@ -17,8 +17,6 @@ //! Math expressions -use std::sync::Arc; - use arrow::array::{make_array, Array, ArrayData, Float32Array, Float64Array}; use arrow::buffer::Buffer; use arrow::datatypes::{DataType, ToByteSlice}; @@ -41,7 +39,7 @@ macro_rules! compute_op { vec![Buffer::from(result.to_byte_slice())], vec![], ); - Ok(make_array(Arc::new(data))) + Ok(make_array(data)) }}; } diff --git a/rust/integration-testing/src/lib.rs b/rust/integration-testing/src/lib.rs index 5d35c3f00f8..22eed0395c5 100644 --- a/rust/integration-testing/src/lib.rs +++ b/rust/integration-testing/src/lib.rs @@ -419,7 +419,7 @@ fn array_from_json( .len(json_col.count) .offset(0) .add_buffer(Buffer::from(&offsets.to_byte_slice())) - .add_child_data(child_array.data()) + .add_child_data(child_array.data().clone()) .null_bit_buffer(null_buf) .build(); Ok(Arc::new(ListArray::from(list_data))) @@ -446,7 +446,7 @@ fn array_from_json( .len(json_col.count) .offset(0) .add_buffer(Buffer::from(&offsets.to_byte_slice())) - .add_child_data(child_array.data()) + .add_child_data(child_array.data().clone()) .null_bit_buffer(null_buf) .build(); Ok(Arc::new(LargeListArray::from(list_data))) @@ -461,7 +461,7 @@ fn array_from_json( let null_buf = create_null_buf(&json_col); let list_data = ArrayData::builder(field.data_type().clone()) .len(json_col.count) - .add_child_data(child_array.data()) + .add_child_data(child_array.data().clone()) .null_bit_buffer(null_buf) .build(); Ok(Arc::new(FixedSizeListArray::from(list_data))) @@ -475,7 +475,7 @@ fn array_from_json( for (field, col) in fields.iter().zip(json_col.children.unwrap()) { let array = array_from_json(field, col, dictionaries)?; - array_data = array_data.add_child_data(array.data()); + array_data = array_data.add_child_data(array.data().clone()); } let array = StructArray::from(array_data.build()); @@ -556,7 +556,7 @@ fn dictionary_array_from_json( .len(keys.len()) .add_buffer(keys.data().buffers()[0].clone()) .null_bit_buffer(null_buf) - .add_child_data(values.data()) + .add_child_data(values.data().clone()) .build(); let array = match dict_key { diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs index 83ae04215b5..3fae72dcdd7 100644 --- a/rust/parquet/src/arrow/array_reader.rs +++ b/rust/parquet/src/arrow/array_reader.rs @@ -24,11 +24,11 @@ use std::sync::Arc; use std::vec::Vec; use arrow::array::{ - new_empty_array, Array, ArrayData, ArrayDataBuilder, ArrayDataRef, ArrayRef, - BinaryArray, BinaryBuilder, BooleanArray, BooleanBufferBuilder, BooleanBuilder, - DecimalBuilder, FixedSizeBinaryArray, FixedSizeBinaryBuilder, GenericListArray, - Int16BufferBuilder, Int32Array, Int64Array, OffsetSizeTrait, PrimitiveArray, - PrimitiveBuilder, StringArray, StringBuilder, StructArray, + new_empty_array, Array, ArrayData, ArrayDataBuilder, ArrayRef, BinaryArray, + BinaryBuilder, BooleanArray, BooleanBufferBuilder, BooleanBuilder, DecimalBuilder, + FixedSizeBinaryArray, FixedSizeBinaryBuilder, GenericListArray, Int16BufferBuilder, + Int32Array, Int64Array, OffsetSizeTrait, PrimitiveArray, PrimitiveBuilder, + StringArray, StringBuilder, StructArray, }; use arrow::buffer::{Buffer, MutableBuffer}; use arrow::datatypes::{ @@ -890,7 +890,7 @@ impl ArrayReader for ListArrayReader { let list_data = ArrayData::builder(self.get_data_type().clone()) .len(offsets.len() - 1) .add_buffer(value_offsets) - .add_child_data(batch_values.data()) + .add_child_data(batch_values.data().clone()) .null_bit_buffer(null_buf.into()) .offset(next_batch_array.offset()) .build(); @@ -1039,8 +1039,8 @@ impl ArrayReader for StructArrayReader { .child_data( children_array .iter() - .map(|x| x.data()) - .collect::>(), + .map(|x| x.data().clone()) + .collect::>(), ) .build(); diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs index 298ab34e008..1ce907f81c1 100644 --- a/rust/parquet/src/arrow/arrow_writer.rs +++ b/rust/parquet/src/arrow/arrow_writer.rs @@ -208,7 +208,7 @@ fn write_leaf( // If the column is a Date64, we cast it to a Date32, and then interpret that as Int32 let array = if let ArrowDataType::Date64 = column.data_type() { let array = arrow::compute::cast(column, &ArrowDataType::Date32)?; - Arc::new(arrow_array::Int32Array::from(array.data())) + arrow::compute::cast(&array, &ArrowDataType::Int32)? } else { arrow::compute::cast(column, &ArrowDataType::Int32)? }; @@ -223,7 +223,10 @@ fn write_leaf( )? } ColumnWriter::BoolColumnWriter(ref mut typed) => { - let array = arrow_array::BooleanArray::from(column.data()); + let array = column + .as_any() + .downcast_ref::() + .expect("Unable to get boolean array"); typed.write_batch( get_bool_array_slice(&array, &indices).as_slice(), Some(levels.definition.as_slice()), @@ -231,9 +234,25 @@ fn write_leaf( )? } ColumnWriter::Int64ColumnWriter(ref mut typed) => { - let array = arrow_array::Int64Array::from(column.data()); + let values = match column.data_type() { + ArrowDataType::Int64 => { + let array = column + .as_any() + .downcast_ref::() + .expect("Unable to get i64 array"); + get_numeric_array_slice::(&array, &indices) + } + _ => { + let array = arrow::compute::cast(column, &ArrowDataType::Int64)?; + let array = array + .as_any() + .downcast_ref::() + .expect("Unable to get i64 array"); + get_numeric_array_slice::(&array, &indices) + } + }; typed.write_batch( - get_numeric_array_slice::(&array, &indices).as_slice(), + values.as_slice(), Some(levels.definition.as_slice()), levels.repetition.as_deref(), )? @@ -242,7 +261,10 @@ fn write_leaf( unreachable!("Currently unreachable because data type not supported") } ColumnWriter::FloatColumnWriter(ref mut typed) => { - let array = arrow_array::Float32Array::from(column.data()); + let array = column + .as_any() + .downcast_ref::() + .expect("Unable to get Float32 array"); typed.write_batch( get_numeric_array_slice::(&array, &indices).as_slice(), Some(levels.definition.as_slice()), @@ -250,7 +272,10 @@ fn write_leaf( )? } ColumnWriter::DoubleColumnWriter(ref mut typed) => { - let array = arrow_array::Float64Array::from(column.data()); + let array = column + .as_any() + .downcast_ref::() + .expect("Unable to get Float64 array"); typed.write_batch( get_numeric_array_slice::(&array, &indices).as_slice(), Some(levels.definition.as_slice()), @@ -259,7 +284,10 @@ fn write_leaf( } ColumnWriter::ByteArrayColumnWriter(ref mut typed) => match column.data_type() { ArrowDataType::Binary => { - let array = arrow_array::BinaryArray::from(column.data()); + let array = column + .as_any() + .downcast_ref::() + .expect("Unable to get BinaryArray array"); typed.write_batch( get_binary_array(&array).as_slice(), Some(levels.definition.as_slice()), @@ -267,7 +295,10 @@ fn write_leaf( )? } ArrowDataType::Utf8 => { - let array = arrow_array::StringArray::from(column.data()); + let array = column + .as_any() + .downcast_ref::() + .expect("Unable to get LargeBinaryArray array"); typed.write_batch( get_string_array(&array).as_slice(), Some(levels.definition.as_slice()), @@ -275,7 +306,10 @@ fn write_leaf( )? } ArrowDataType::LargeBinary => { - let array = arrow_array::LargeBinaryArray::from(column.data()); + let array = column + .as_any() + .downcast_ref::() + .expect("Unable to get LargeBinaryArray array"); typed.write_batch( get_large_binary_array(&array).as_slice(), Some(levels.definition.as_slice()), @@ -283,7 +317,10 @@ fn write_leaf( )? } ArrowDataType::LargeUtf8 => { - let array = arrow_array::LargeStringArray::from(column.data()); + let array = column + .as_any() + .downcast_ref::() + .expect("Unable to get LargeUtf8 array"); typed.write_batch( get_large_string_array(&array).as_slice(), Some(levels.definition.as_slice()), @@ -532,8 +569,8 @@ mod tests { assert_eq!(expected_batch.num_columns(), actual_batch.num_columns()); assert_eq!(expected_batch.num_rows(), actual_batch.num_rows()); for i in 0..expected_batch.num_columns() { - let expected_data = expected_batch.column(i).data(); - let actual_data = actual_batch.column(i).data(); + let expected_data = expected_batch.column(i).data().clone(); + let actual_data = actual_batch.column(i).data().clone(); assert_eq!(expected_data, actual_data); } @@ -582,7 +619,7 @@ mod tests { )))) .len(5) .add_buffer(a_value_offsets) - .add_child_data(a_values.data()) + .add_child_data(a_values.data().clone()) .null_bit_buffer(Buffer::from(vec![0b00011011])) .build(); let a = ListArray::from(a_list_data); @@ -742,7 +779,7 @@ mod tests { let g_list_data = ArrayData::builder(struct_field_g.data_type().clone()) .len(5) .add_buffer(g_value_offsets) - .add_child_data(g_value.data()) + .add_child_data(g_value.data().clone()) // .null_bit_buffer(Buffer::from(vec![0b00011011])) // TODO: add to test after resolving other issues .build(); let g = ListArray::from(g_list_data); @@ -780,13 +817,13 @@ mod tests { let b_data = ArrayDataBuilder::new(field_b.data_type().clone()) .len(6) .null_bit_buffer(Buffer::from(vec![0b00100111])) - .add_child_data(c.data()) + .add_child_data(c.data().clone()) .build(); let b = StructArray::from(b_data); let a_data = ArrayDataBuilder::new(field_a.data_type().clone()) .len(6) .null_bit_buffer(Buffer::from(vec![0b00101111])) - .add_child_data(b.data()) + .add_child_data(b.data().clone()) .build(); let a = StructArray::from(a_data); @@ -811,12 +848,12 @@ mod tests { let c = Int32Array::from(vec![1, 2, 3, 4, 5, 6]); let b_data = ArrayDataBuilder::new(field_b.data_type().clone()) .len(6) - .add_child_data(c.data()) + .add_child_data(c.data().clone()) .build(); let b = StructArray::from(b_data); let a_data = ArrayDataBuilder::new(field_a.data_type().clone()) .len(6) - .add_child_data(b.data()) + .add_child_data(b.data().clone()) .build(); let a = StructArray::from(a_data); @@ -843,13 +880,13 @@ mod tests { let b_data = ArrayDataBuilder::new(field_b.data_type().clone()) .len(6) .null_bit_buffer(Buffer::from(vec![0b00100111])) - .add_child_data(c.data()) + .add_child_data(c.data().clone()) .build(); let b = StructArray::from(b_data); // a intentionally has no null buffer, to test that this is handled correctly let a_data = ArrayDataBuilder::new(field_a.data_type().clone()) .len(6) - .add_child_data(b.data()) + .add_child_data(b.data().clone()) .build(); let a = StructArray::from(a_data); @@ -1219,7 +1256,7 @@ mod tests { .len(5) .add_buffer(a_value_offsets) .null_bit_buffer(Buffer::from(vec![0b00011011])) - .add_child_data(a_values.data()) + .add_child_data(a_values.data().clone()) .build(); assert_eq!(a_list_data.null_count(), 1); @@ -1242,7 +1279,7 @@ mod tests { )))) .len(5) .add_buffer(a_value_offsets) - .add_child_data(a_values.data()) + .add_child_data(a_values.data().clone()) .null_bit_buffer(Buffer::from(vec![0b00011011])) .build(); diff --git a/rust/parquet/src/arrow/levels.rs b/rust/parquet/src/arrow/levels.rs index 4ea1811d29b..641e330522d 100644 --- a/rust/parquet/src/arrow/levels.rs +++ b/rust/parquet/src/arrow/levels.rs @@ -1174,7 +1174,7 @@ mod tests { .len(5) .add_buffer(a_value_offsets) .null_bit_buffer(Buffer::from(vec![0b00011011])) - .add_child_data(a_values.data()) + .add_child_data(a_values.data().clone()) .build(); assert_eq!(a_list_data.null_count(), 1); @@ -1278,7 +1278,7 @@ mod tests { let g_list_data = ArrayData::builder(struct_field_g.data_type().clone()) .len(5) .add_buffer(g_value_offsets) - .add_child_data(g_value.data()) + .add_child_data(g_value.data().clone()) .build(); let g = ListArray::from(g_list_data);