Skip to content

Commit cdbbbf7

Browse files
emilkirenjj
andauthored
Improve Display for DataType and Field (#8290)
This is part of an attempt to improve the error reporting of `arrow-rs`, `datafusion`, and any other 3rd party crates. I believe that error messages should be as readable as possible. Aim for `rustc` more than `gcc`. Here's an example of how this PR improves some existing error messages: Before: > Casting from Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Interval(DayTime), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) to Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Duration(Second), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, true) not supported After: > Casting from Map(Field { "entries": Struct(key Utf8, value nullable Interval(DayTime)) }, false) to Map(Field { "entries": Struct(key Utf8, value Duration(Second)) }, true) not supported # Which issue does this PR close? - Closes #7048 - Continues and closes #7051 - Continues #7469 - More improvements coming in #8291 - Sibling PR: apache/datafusion#17565 - Part of #8351 # Rationale for this change DataType:s are often shown in error messages. Making these error messages readable is _very important_. # What changes are included in this PR? ## ~Unify `Debug` and `Display`~ ~The `Display` and `Debug` of `DataType` are now the SAME.~ ~Why? Both are frequently used in error messages (both in arrow, and `datafusion`), and both benefit from being readable yet reversible.~ Reverted based on PR feedback. I will try to improve the `Debug` formatting in a future PR, with clever use of https://doc.rust-lang.org/std/fmt/struct.Formatter.html#method.debug_struct ## Improve `Display` of lists Improve the `Display` formatting of * `DataType::List` * `DataType::LargeList` * `DataType::FixedSizeList` **Before**: `List(Field { name: \"item\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })` **After**: `List(nullable Int32)` **Before**: `FixedSizeList(Field { name: \"item\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, 5)` **After**: `FixedSizeList(5 x Int32)` ### Better formatting of `DataType::Struct` The formatting of `Struct` is now reversible, including nullability and metadata. - Continues #7469 ### ~Improve `Debug` format of `Field`~ ~Best understood with this diff for an existing test:~ <img width="1140" height="499" alt="Screenshot 2025-09-07 at 18 30 44" src="https://github.com/user-attachments/assets/794b4de9-8459-4ee7-82d2-8f5ae248614c" /> **EDIT**: reverted # Are these changes tested? Yes - new tests cover them # Are there any user-facing changes? `Display/to_string` has changed, and so this is a **BREAKING CHANGE**. Care has been taken that the formatting contains all necessary information (i.e. is reversible), though the actual `FromStr` implementation is still not written (it is missing on `main`, and missing in this PR - so no change). ---- Let me know if I went to far… or not far enough 😆 --------- Co-authored-by: irenjj <[email protected]>
1 parent 010d0e7 commit cdbbbf7

File tree

29 files changed

+387
-116
lines changed

29 files changed

+387
-116
lines changed

arrow-array/src/array/fixed_size_list_array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ impl From<ArrayData> for FixedSizeListArray {
350350
let value_length = match data.data_type() {
351351
DataType::FixedSizeList(_, len) => *len,
352352
data_type => {
353-
panic!("FixedSizeListArray data should contain a FixedSizeList data type, got {data_type:?}")
353+
panic!("FixedSizeListArray data should contain a FixedSizeList data type, got {data_type}")
354354
}
355355
};
356356

arrow-array/src/array/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -824,20 +824,20 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
824824
DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
825825
DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
826826
DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
827-
dt => panic!("Unexpected dictionary key type {dt:?}"),
827+
dt => panic!("Unexpected dictionary key type {dt}"),
828828
},
829829
DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
830830
DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
831831
DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
832832
DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
833-
dt => panic!("Unexpected data type for run_ends array {dt:?}"),
833+
dt => panic!("Unexpected data type for run_ends array {dt}"),
834834
},
835835
DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
836836
DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
837837
DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
838838
DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
839839
DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
840-
dt => panic!("Unexpected data type {dt:?}"),
840+
dt => panic!("Unexpected data type {dt}"),
841841
}
842842
}
843843

arrow-array/src/array/primitive_array.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,7 +1290,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
12901290
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
12911291
let data_type = self.data_type();
12921292

1293-
write!(f, "PrimitiveArray<{data_type:?}>\n[\n")?;
1293+
write!(f, "PrimitiveArray<{data_type}>\n[\n")?;
12941294
print_long_array(self, f, |array, index, f| match data_type {
12951295
DataType::Date32 | DataType::Date64 => {
12961296
let v = self.value(index).to_i64().unwrap();
@@ -1299,7 +1299,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
12991299
None => {
13001300
write!(
13011301
f,
1302-
"Cast error: Failed to convert {v} to temporal for {data_type:?}"
1302+
"Cast error: Failed to convert {v} to temporal for {data_type}"
13031303
)
13041304
}
13051305
}
@@ -1311,7 +1311,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
13111311
None => {
13121312
write!(
13131313
f,
1314-
"Cast error: Failed to convert {v} to temporal for {data_type:?}"
1314+
"Cast error: Failed to convert {v} to temporal for {data_type}"
13151315
)
13161316
}
13171317
}

arrow-array/src/builder/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
567567
.with_values_field(fields[1].clone()),
568568
)
569569
}
570-
t => panic!("The field of Map data type {t:?} should have a child Struct field"),
570+
t => panic!("The field of Map data type {t} should have a child Struct field"),
571571
},
572572
DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
573573
t @ DataType::Dictionary(key_type, value_type) => {
@@ -594,7 +594,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
594594
LargeBinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
595595
Box::new(dict_builder)
596596
}
597-
t => panic!("Dictionary value type {t:?} is not currently supported"),
597+
t => panic!("Dictionary value type {t} is not currently supported"),
598598
}
599599
};
600600
}
@@ -604,10 +604,10 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
604604
DataType::Int32 => dict_builder!(Int32Type),
605605
DataType::Int64 => dict_builder!(Int64Type),
606606
_ => {
607-
panic!("Data type {t:?} with key type {key_type:?} is not currently supported")
607+
panic!("Data type {t} with key type {key_type} is not currently supported")
608608
}
609609
}
610610
}
611-
t => panic!("Data type {t:?} is not currently supported"),
611+
t => panic!("Data type {t} is not currently supported"),
612612
}
613613
}

arrow-array/src/builder/struct_builder.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ use std::sync::Arc;
6262
///
6363
/// // We can't obtain the ListBuilder<StructBuilder> with the expected generic types, because under the hood
6464
/// // the StructBuilder was returned as a Box<dyn ArrayBuilder> and passed as such to the ListBuilder constructor
65-
///
65+
///
6666
/// // This panics in runtime, even though we know that the builder is a ListBuilder<StructBuilder>.
6767
/// // let sb = col_struct_builder
6868
/// // .field_builder::<ListBuilder<StructBuilder>>(0)
@@ -267,7 +267,7 @@ impl StructBuilder {
267267
let schema = builder.finish();
268268

269269
panic!("{}", format!(
270-
"StructBuilder ({:?}) and field_builder with index {} ({:?}) are of unequal lengths: ({} != {}).",
270+
"StructBuilder ({}) and field_builder with index {} ({}) are of unequal lengths: ({} != {}).",
271271
schema,
272272
idx,
273273
self.fields[idx].data_type(),
@@ -648,7 +648,7 @@ mod tests {
648648

649649
#[test]
650650
#[should_panic(
651-
expected = "StructBuilder (Schema { fields: [Field { name: \"f1\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"f2\", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
651+
expected = "StructBuilder (Field { \"f1\": Int32 }, Field { \"f2\": Boolean }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
652652
)]
653653
fn test_struct_array_builder_unequal_field_builders_lengths() {
654654
let mut int_builder = Int32Builder::with_capacity(10);

arrow-array/src/ffi.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -146,11 +146,11 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
146146
if let Some(primitive) = data_type.primitive_width() {
147147
return match i {
148148
0 => Err(ArrowError::CDataInterface(format!(
149-
"The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
149+
"The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
150150
))),
151151
1 => Ok(primitive * 8),
152152
i => Err(ArrowError::CDataInterface(format!(
153-
"The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
153+
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
154154
))),
155155
};
156156
}
@@ -159,7 +159,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
159159
(DataType::Boolean, 1) => 1,
160160
(DataType::Boolean, _) => {
161161
return Err(ArrowError::CDataInterface(format!(
162-
"The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
162+
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
163163
)))
164164
}
165165
(DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize,
@@ -169,7 +169,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
169169
},
170170
(DataType::FixedSizeBinary(_), _) | (DataType::FixedSizeList(_, _), _) => {
171171
return Err(ArrowError::CDataInterface(format!(
172-
"The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
172+
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
173173
)))
174174
},
175175
// Variable-size list and map have one i32 buffer.
@@ -179,12 +179,12 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
179179
(DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
180180
(DataType::List(_), _) | (DataType::Map(_, _), _) => {
181181
return Err(ArrowError::CDataInterface(format!(
182-
"The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
182+
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
183183
)))
184184
}
185185
(DataType::Utf8, _) | (DataType::Binary, _) => {
186186
return Err(ArrowError::CDataInterface(format!(
187-
"The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
187+
"The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
188188
)))
189189
}
190190
// Variable-sized binaries: have two buffers.
@@ -193,7 +193,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
193193
(DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2)=> u8::BITS as _,
194194
(DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _)=> {
195195
return Err(ArrowError::CDataInterface(format!(
196-
"The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
196+
"The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
197197
)))
198198
}
199199
// Variable-sized views: have 3 or more buffers.
@@ -209,24 +209,24 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
209209
(DataType::Union(_, UnionMode::Dense), 1) => i32::BITS as _,
210210
(DataType::Union(_, UnionMode::Sparse), _) => {
211211
return Err(ArrowError::CDataInterface(format!(
212-
"The datatype \"{data_type:?}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
212+
"The datatype \"{data_type}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
213213
)))
214214
}
215215
(DataType::Union(_, UnionMode::Dense), _) => {
216216
return Err(ArrowError::CDataInterface(format!(
217-
"The datatype \"{data_type:?}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
217+
"The datatype \"{data_type}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
218218
)))
219219
}
220220
(_, 0) => {
221221
// We don't call this `bit_width` to compute buffer length for null buffer. If any types that don't have null buffer like
222222
// UnionArray, they should be handled above.
223223
return Err(ArrowError::CDataInterface(format!(
224-
"The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
224+
"The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
225225
)))
226226
}
227227
_ => {
228228
return Err(ArrowError::CDataInterface(format!(
229-
"The datatype \"{data_type:?}\" is still not supported in Rust implementation"
229+
"The datatype \"{data_type}\" is still not supported in Rust implementation"
230230
)))
231231
}
232232
})

arrow-array/src/record_batch.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ impl RecordBatch {
360360

361361
if let Some((i, (col_type, field_type))) = not_match {
362362
return Err(ArrowError::InvalidArgumentError(format!(
363-
"column types must match schema types, expected {field_type:?} but found {col_type:?} at column index {i}")));
363+
"column types must match schema types, expected {field_type} but found {col_type} at column index {i}")));
364364
}
365365

366366
Ok(RecordBatch {
@@ -422,7 +422,7 @@ impl RecordBatch {
422422
/// // Insert a key-value pair into the metadata
423423
/// batch.schema_metadata_mut().insert("key".into(), "value".into());
424424
/// assert_eq!(batch.schema().metadata().get("key"), Some(&String::from("value")));
425-
/// ```
425+
/// ```
426426
pub fn schema_metadata_mut(&mut self) -> &mut std::collections::HashMap<String, String> {
427427
let schema = Arc::make_mut(&mut self.schema);
428428
&mut schema.metadata

arrow-cast/src/cast/dictionary.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
7878
UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
7979
_ => {
8080
return Err(ArrowError::CastError(format!(
81-
"Unsupported type {to_index_type:?} for dictionary index"
81+
"Unsupported type {to_index_type} for dictionary index"
8282
)));
8383
}
8484
};
@@ -313,7 +313,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
313313
pack_byte_to_fixed_size_dictionary::<K>(array, cast_options, byte_size)
314314
}
315315
_ => Err(ArrowError::CastError(format!(
316-
"Unsupported output type for dictionary packing: {dict_value_type:?}"
316+
"Unsupported output type for dictionary packing: {dict_value_type}"
317317
))),
318318
}
319319
}

0 commit comments

Comments
 (0)