Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rust/arrow/examples/dynamic_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ fn process(batch: &RecordBatch) {
Arc::new(projected_schema),
vec![
id.clone(), // NOTE: this is cloning the Arc not the array data
Arc::new(Float64Array::from(nested_c.data())),
Arc::new(Float64Array::from(nested_c.data().clone())),
],
);
}
82 changes: 41 additions & 41 deletions rust/arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ use std::fmt;
use std::sync::Arc;
use std::{any::Any, convert::TryFrom};

use super::ArrayDataRef;
use super::*;
use crate::array::equal_json::JsonEqual;
use crate::buffer::{Buffer, MutableBuffer};
Expand Down Expand Up @@ -57,11 +56,13 @@ pub trait Array: fmt::Debug + Send + Sync + JsonEqual {
/// ```
fn as_any(&self) -> &Any;

/// Returns a reference-counted pointer to the underlying data of this array.
fn data(&self) -> ArrayDataRef;
/// Returns a reference to the underlying data of this array.
fn data(&self) -> &ArrayData;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if both data() and data_ref() return &ArrayData may be just data_ref() is enough and data() should be removed; or possibly change data() to return ArrayData

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand why we wanted to avoid Array::data().clone() to get ArrayData, but with these 2 functions returning the same reference, I'd opt to make Array::data() -> ArrayData

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yordan-pavlov I think it's safer to deprecate data(), then remove it later. We still use it a lot in the codebase, but it's often more performant to use array_ref(), so returning ArrayData doesn't guide users on a faster path.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, deprecate data() is what I meant, immediate removal is not great for backwards compatibility


/// Returns a borrowed & reference-counted pointer to the underlying data of this array.
fn data_ref(&self) -> &ArrayDataRef;
/// Returns a reference-counted pointer to the underlying data of this array.
fn data_ref(&self) -> &ArrayData {
self.data()
}

/// Returns a reference to the [`DataType`](crate::datatypes::DataType) of this array.
///
Expand Down Expand Up @@ -93,7 +94,7 @@ pub trait Array: fmt::Debug + Send + Sync + JsonEqual {
/// assert_eq!(array_slice.as_ref(), &Int32Array::from(vec![2, 3, 4]));
/// ```
fn slice(&self, offset: usize, length: usize) -> ArrayRef {
make_array(Arc::new(self.data_ref().as_ref().slice(offset, length)))
make_array(self.data_ref().slice(offset, length))
}

/// Returns the length (i.e., number of elements) of this array.
Expand Down Expand Up @@ -206,7 +207,7 @@ pub trait Array: fmt::Debug + Send + Sync + JsonEqual {
fn to_raw(
&self,
) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> {
let data = self.data().as_ref().clone();
let data = self.data().clone();
let array = ffi::ArrowArray::try_from(data)?;
Ok(ffi::ArrowArray::into_raw(array))
}
Expand All @@ -217,7 +218,7 @@ pub type ArrayRef = Arc<Array>;

/// Constructs an array using the input `data`.
/// Returns a reference-counted `Array` instance.
pub fn make_array(data: ArrayDataRef) -> ArrayRef {
pub fn make_array(data: ArrayData) -> ArrayRef {
match data.data_type() {
DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
Expand Down Expand Up @@ -325,7 +326,7 @@ pub fn make_array(data: ArrayDataRef) -> ArrayRef {
/// Creates a new empty array
pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
let data = ArrayData::new_empty(data_type);
make_array(Arc::new(data))
make_array(data)
}
/// Creates a new array of `data_type` of length `length` filled entirely of `NULL` values
pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
Expand All @@ -334,15 +335,15 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
DataType::Null => Arc::new(NullArray::new(length)),
DataType::Boolean => {
let null_buf: Buffer = MutableBuffer::new_null(length).into();
make_array(Arc::new(ArrayData::new(
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(null_buf.clone()),
0,
vec![null_buf],
vec![],
)))
))
}
DataType::Int8 => new_null_sized_array::<Int8Type>(data_type, length),
DataType::UInt8 => new_null_sized_array::<UInt8Type>(data_type, length),
Expand Down Expand Up @@ -371,15 +372,15 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
new_null_sized_array::<IntervalDayTimeType>(data_type, length)
}
},
DataType::FixedSizeBinary(value_len) => make_array(Arc::new(ArrayData::new(
DataType::FixedSizeBinary(value_len) => make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(vec![0u8; *value_len as usize * length])],
vec![],
))),
)),
DataType::Binary | DataType::Utf8 => {
new_null_binary_array::<i32>(data_type, length)
}
Expand All @@ -392,21 +393,20 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
DataType::LargeList(field) => {
new_null_list_array::<i64>(data_type, field.data_type(), length)
}
DataType::FixedSizeList(field, value_len) => {
make_array(Arc::new(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![],
vec![
new_null_array(field.data_type(), *value_len as usize * length)
.data(),
],
)))
}
DataType::Struct(fields) => make_array(Arc::new(ArrayData::new(
DataType::FixedSizeList(field, value_len) => make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![],
vec![
new_null_array(field.data_type(), *value_len as usize * length)
.data()
.clone(),
],
)),
DataType::Struct(fields) => make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Expand All @@ -415,22 +415,22 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
vec![],
fields
.iter()
.map(|field| Arc::new(ArrayData::new_empty(field.data_type())))
.map(|field| ArrayData::new_empty(field.data_type()))
.collect(),
))),
)),
DataType::Union(_) => {
unimplemented!("Creating null Union array not yet supported")
}
DataType::Dictionary(_, value) => {
make_array(Arc::new(ArrayData::new(
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![MutableBuffer::new(0).into()], // values are empty
vec![new_empty_array(value.as_ref()).data()],
)))
vec![new_empty_array(value.as_ref()).data().clone()],
))
}
DataType::Decimal(_, _) => {
unimplemented!("Creating null Decimal array not yet supported")
Expand All @@ -444,7 +444,7 @@ fn new_null_list_array<OffsetSize: OffsetSizeTrait>(
child_data_type: &DataType,
length: usize,
) -> ArrayRef {
make_array(Arc::new(ArrayData::new(
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Expand All @@ -453,16 +453,16 @@ fn new_null_list_array<OffsetSize: OffsetSizeTrait>(
vec![Buffer::from(
vec![OffsetSize::zero(); length + 1].to_byte_slice(),
)],
vec![Arc::new(ArrayData::new_empty(child_data_type))],
)))
vec![ArrayData::new_empty(child_data_type)],
))
}

#[inline]
fn new_null_binary_array<OffsetSize: OffsetSizeTrait>(
data_type: &DataType,
length: usize,
) -> ArrayRef {
make_array(Arc::new(ArrayData::new(
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Expand All @@ -473,23 +473,23 @@ fn new_null_binary_array<OffsetSize: OffsetSizeTrait>(
MutableBuffer::new(0).into(),
],
vec![],
)))
))
}

#[inline]
fn new_null_sized_array<T: ArrowPrimitiveType>(
data_type: &DataType,
length: usize,
) -> ArrayRef {
make_array(Arc::new(ArrayData::new(
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(vec![0u8; length * T::get_byte_width()])],
vec![],
)))
))
}

/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface
Expand All @@ -501,7 +501,7 @@ pub unsafe fn make_array_from_raw(
schema: *const ffi::FFI_ArrowSchema,
) -> Result<ArrayRef> {
let array = ffi::ArrowArray::try_from_raw(array, schema)?;
let data = Arc::new(ArrayData::try_from(array)?);
let data = ArrayData::try_from(array)?;
Ok(make_array(data))
}
// Helper function for printing potentially long arrays.
Expand Down
45 changes: 15 additions & 30 deletions rust/arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,13 @@
// specific language governing permissions and limitations
// under the License.

use std::convert::{From, TryInto};
use std::fmt;
use std::mem;
use std::{any::Any, iter::FromIterator};
use std::{
convert::{From, TryInto},
sync::Arc,
};

use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, ArrayDataRef,
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData,
FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait,
};
use crate::buffer::Buffer;
Expand All @@ -47,7 +44,7 @@ impl BinaryOffsetSizeTrait for i64 {
}

pub struct GenericBinaryArray<OffsetSize: BinaryOffsetSizeTrait> {
data: ArrayDataRef,
data: ArrayData,
value_offsets: RawPtrBox<OffsetSize>,
value_data: RawPtrBox<u8>,
}
Expand Down Expand Up @@ -199,11 +196,7 @@ impl<OffsetSize: BinaryOffsetSizeTrait> Array for GenericBinaryArray<OffsetSize>
self
}

fn data(&self) -> ArrayDataRef {
self.data.clone()
}

fn data_ref(&self) -> &ArrayDataRef {
fn data(&self) -> &ArrayData {
&self.data
}

Expand All @@ -218,10 +211,10 @@ impl<OffsetSize: BinaryOffsetSizeTrait> Array for GenericBinaryArray<OffsetSize>
}
}

impl<OffsetSize: BinaryOffsetSizeTrait> From<ArrayDataRef>
impl<OffsetSize: BinaryOffsetSizeTrait> From<ArrayData>
for GenericBinaryArray<OffsetSize>
{
fn from(data: ArrayDataRef) -> Self {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.data_type(),
&<OffsetSize as BinaryOffsetSizeTrait>::DATA_TYPE,
Expand Down Expand Up @@ -324,7 +317,7 @@ impl<T: BinaryOffsetSizeTrait> From<GenericListArray<T>> for GenericBinaryArray<

/// A type of `FixedSizeListArray` whose elements are binaries.
pub struct FixedSizeBinaryArray {
data: ArrayDataRef,
data: ArrayData,
value_data: RawPtrBox<u8>,
length: i32,
}
Expand Down Expand Up @@ -453,7 +446,7 @@ impl FixedSizeBinaryArray {
vec![buffer.into()],
vec![],
);
Ok(FixedSizeBinaryArray::from(Arc::new(array_data)))
Ok(FixedSizeBinaryArray::from(array_data))
}

/// Create an array from an iterable argument of byte slices.
Expand Down Expand Up @@ -521,8 +514,8 @@ impl FixedSizeBinaryArray {
}
}

impl From<ArrayDataRef> for FixedSizeBinaryArray {
fn from(data: ArrayDataRef) -> Self {
impl From<ArrayData> for FixedSizeBinaryArray {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.buffers().len(),
1,
Expand Down Expand Up @@ -583,11 +576,7 @@ impl Array for FixedSizeBinaryArray {
self
}

fn data(&self) -> ArrayDataRef {
self.data.clone()
}

fn data_ref(&self) -> &ArrayDataRef {
fn data(&self) -> &ArrayData {
&self.data
}

Expand All @@ -604,7 +593,7 @@ impl Array for FixedSizeBinaryArray {

/// A type of `DecimalArray` whose elements are binaries.
pub struct DecimalArray {
data: ArrayDataRef,
data: ArrayData,
value_data: RawPtrBox<u8>,
precision: usize,
scale: usize,
Expand Down Expand Up @@ -692,8 +681,8 @@ impl DecimalArray {
}
}

impl From<ArrayDataRef> for DecimalArray {
fn from(data: ArrayDataRef) -> Self {
impl From<ArrayData> for DecimalArray {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.buffers().len(),
1,
Expand Down Expand Up @@ -730,11 +719,7 @@ impl Array for DecimalArray {
self
}

fn data(&self) -> ArrayDataRef {
self.data.clone()
}

fn data_ref(&self) -> &ArrayDataRef {
fn data(&self) -> &ArrayData {
&self.data
}

Expand Down
Loading