Skip to content

Commit c8bdedd

Browse files
committed
Update to arrow-57
1 parent ec3d20b commit c8bdedd

File tree

72 files changed

+1082
-1209
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+1082
-1209
lines changed

Cargo.lock

Lines changed: 104 additions & 243 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ half = { version = "2.7.0", default-features = false }
155155
hashbrown = { version = "0.14.5", features = ["raw"] }
156156
hex = { version = "0.4.3" }
157157
indexmap = "2.11.4"
158+
insta = { version = "1.43.2", features = ["glob", "filters"] }
158159
itertools = "0.14"
159160
log = "^0.4"
160161
object_store = { version = "0.12.4", default-features = false }
@@ -164,11 +165,10 @@ parquet = { version = "56.2.0", default-features = false, features = [
164165
"async",
165166
"object_store",
166167
] }
167-
pbjson = { version = "0.7.0" }
168-
pbjson-types = "0.7"
168+
pbjson = { version = "0.8.0" }
169+
pbjson-types = "0.8"
169170
# Should match arrow-flight's version of prost.
170-
insta = { version = "1.43.2", features = ["glob", "filters"] }
171-
prost = "0.13.1"
171+
prost = "0.14.1"
172172
rand = "0.9"
173173
recursive = "0.1.1"
174174
regex = "1.11"
@@ -260,3 +260,32 @@ incremental = false
260260
inherits = "release"
261261
debug = true
262262
strip = false
263+
264+
[patch.crates-io]
265+
# Local pin
266+
# arrow = { path = "/Users/andrewlamb/Software/arrow-rs/arrow" }
267+
# arrow-array = { path = "/Users/andrewlamb/Software/arrow-rs/arrow-array" }
268+
# arrow-buffer = { path = "/Users/andrewlamb/Software/arrow-rs/arrow-buffer" }
269+
# arrow-cast = { path = "/Users/andrewlamb/Software/arrow-rs/arrow-cast" }
270+
# arrow-data = { path = "/Users/andrewlamb/Software/arrow-rs/arrow-data" }
271+
# arrow-ipc = { path = "/Users/andrewlamb/Software/arrow-rs/arrow-ipc" }
272+
# arrow-schema = { path = "/Users/andrewlamb/Software/arrow-rs/arrow-schema" }
273+
# arrow-select = { path = "/Users/andrewlamb/Software/arrow-rs/arrow-select" }
274+
# arrow-string = { path = "/Users/andrewlamb/Software/arrow-rs/arrow-string" }
275+
# arrow-ord = { path = "/Users/andrewlamb/Software/arrow-rs/arrow-ord" }
276+
# arrow-flight = { path = "/Users/andrewlamb/Software/arrow-rs/arrow-flight" }
277+
# parquet = { path = "/Users/andrewlamb/Software/arrow-rs/parquet" }
278+
279+
# latest arrow main: https://github.com/apache/arrow-rs
280+
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
281+
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
282+
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
283+
arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
284+
arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
285+
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
286+
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
287+
arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
288+
arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
289+
arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
290+
arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }
291+
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d49f017fe1c6712ba32e2222c6f031278b588ca5" }

datafusion-cli/src/functions.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,9 @@ impl TableFunctionImpl for ParquetMetadataFunc {
419419
stats_max_value_arr.push(None);
420420
};
421421
compression_arr.push(format!("{:?}", column.compression()));
422-
encodings_arr.push(format!("{:?}", column.encodings()));
422+
// need to collect into Vec to format
423+
let encodings: Vec<_> = column.encodings().collect();
424+
encodings_arr.push(format!("{:?}", encodings));
423425
index_page_offset_arr.push(column.index_page_offset());
424426
dictionary_page_offset_arr.push(column.dictionary_page_offset());
425427
data_page_offset_arr.push(column.data_page_offset());

datafusion-cli/src/main.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ mod tests {
497497
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
498498
| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
499499
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
500-
| ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | "f0.list.item" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [RLE_DICTIONARY, PLAIN, RLE] | | 4 | 46 | 121 | 123 |
500+
| ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | "f0.list.item" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [PLAIN, RLE, RLE_DICTIONARY] | | 4 | 46 | 121 | 123 |
501501
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
502502
"#);
503503

@@ -510,7 +510,7 @@ mod tests {
510510
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
511511
| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
512512
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
513-
| ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | "f0.list.item" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [RLE_DICTIONARY, PLAIN, RLE] | | 4 | 46 | 121 | 123 |
513+
| ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | "f0.list.item" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [PLAIN, RLE, RLE_DICTIONARY] | | 4 | 46 | 121 | 123 |
514514
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
515515
"#);
516516

@@ -532,7 +532,7 @@ mod tests {
532532
+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
533533
| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
534534
+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
535-
| ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0 | 14 | 1 | 163 | 0 | 4 | 14 | "String" | BYTE_ARRAY | Hello | today | 0 | | Hello | today | GZIP(GzipLevel(6)) | [BIT_PACKED, RLE, PLAIN] | | | 4 | 152 | 163 |
535+
| ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0 | 14 | 1 | 163 | 0 | 4 | 14 | "String" | BYTE_ARRAY | Hello | today | 0 | | Hello | today | GZIP(GzipLevel(6)) | [PLAIN, RLE, BIT_PACKED] | | | 4 | 152 | 163 |
536536
+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
537537
"#);
538538

@@ -592,9 +592,9 @@ mod tests {
592592
+-----------------------------------+-----------------+---------------------+------+------------------+
593593
| filename | file_size_bytes | metadata_size_bytes | hits | extra |
594594
+-----------------------------------+-----------------+---------------------+------+------------------+
595-
| alltypes_plain.parquet | 1851 | 10181 | 2 | page_index=false |
596-
| alltypes_tiny_pages.parquet | 454233 | 881418 | 2 | page_index=true |
597-
| lz4_raw_compressed_larger.parquet | 380836 | 2939 | 2 | page_index=false |
595+
| alltypes_plain.parquet | 1851 | 6957 | 2 | page_index=false |
596+
| alltypes_tiny_pages.parquet | 454233 | 267014 | 2 | page_index=true |
597+
| lz4_raw_compressed_larger.parquet | 380836 | 996 | 2 | page_index=false |
598598
+-----------------------------------+-----------------+---------------------+------+------------------+
599599
");
600600

@@ -623,9 +623,9 @@ mod tests {
623623
+-----------------------------------+-----------------+---------------------+------+------------------+
624624
| filename | file_size_bytes | metadata_size_bytes | hits | extra |
625625
+-----------------------------------+-----------------+---------------------+------+------------------+
626-
| alltypes_plain.parquet | 1851 | 10181 | 5 | page_index=false |
627-
| alltypes_tiny_pages.parquet | 454233 | 881418 | 2 | page_index=true |
628-
| lz4_raw_compressed_larger.parquet | 380836 | 2939 | 3 | page_index=false |
626+
| alltypes_plain.parquet | 1851 | 6957 | 5 | page_index=false |
627+
| alltypes_tiny_pages.parquet | 454233 | 267014 | 2 | page_index=true |
628+
| lz4_raw_compressed_larger.parquet | 380836 | 996 | 3 | page_index=false |
629629
+-----------------------------------+-----------------+---------------------+------+------------------+
630630
");
631631

datafusion-examples/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ serde_json = { workspace = true }
8181
tempfile = { workspace = true }
8282
test-utils = { path = "../test-utils" }
8383
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
84-
tonic = "0.13.1"
84+
tonic = "0.14"
8585
tracing = { version = "0.1" }
8686
tracing-subscriber = { version = "0.3" }
8787
url = { workspace = true }

datafusion-examples/examples/flight/flight_client.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
use std::collections::HashMap;
1919
use std::sync::Arc;
20+
use tonic::transport::Endpoint;
2021

2122
use datafusion::arrow::datatypes::Schema;
2223

@@ -34,7 +35,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
3435
let testdata = datafusion::test_util::parquet_test_data();
3536

3637
// Create Flight client
37-
let mut client = FlightServiceClient::connect("http://localhost:50051").await?;
38+
let endpoint = Endpoint::new("http://localhost:50051")?;
39+
let channel = endpoint.connect().await?;
40+
let mut client = FlightServiceClient::new(channel);
3841

3942
// Call get_schema to get the schema of a Parquet file
4043
let request = tonic::Request::new(FlightDescriptor {

datafusion-examples/examples/flight/flight_server.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator};
18+
use arrow::ipc::writer::{CompressionContext, DictionaryTracker, IpcDataGenerator};
1919
use std::sync::Arc;
2020

2121
use arrow_flight::{PollInfo, SchemaAsIpc};
@@ -106,6 +106,7 @@ impl FlightService for FlightServiceImpl {
106106

107107
// add an initial FlightData message that sends schema
108108
let options = arrow::ipc::writer::IpcWriteOptions::default();
109+
let mut compression_context = CompressionContext::default();
109110
let schema_flight_data = SchemaAsIpc::new(&schema, &options);
110111

111112
let mut flights = vec![FlightData::from(schema_flight_data)];
@@ -115,7 +116,7 @@ impl FlightService for FlightServiceImpl {
115116

116117
for batch in &results {
117118
let (flight_dictionaries, flight_batch) = encoder
118-
.encoded_batch(batch, &mut tracker, &options)
119+
.encode(batch, &mut tracker, &options, &mut compression_context)
119120
.map_err(|e: ArrowError| Status::internal(e.to_string()))?;
120121

121122
flights.extend(flight_dictionaries.into_iter().map(Into::into));

datafusion-examples/examples/parquet_encrypted.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@
1616
// under the License.
1717

1818
use datafusion::common::DataFusionError;
19-
use datafusion::config::TableParquetOptions;
19+
use datafusion::config::{ConfigFileEncryptionProperties, TableParquetOptions};
2020
use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
2121
use datafusion::logical_expr::{col, lit};
2222
use datafusion::parquet::encryption::decrypt::FileDecryptionProperties;
2323
use datafusion::parquet::encryption::encrypt::FileEncryptionProperties;
2424
use datafusion::prelude::{ParquetReadOptions, SessionContext};
25+
use std::sync::Arc;
2526
use tempfile::TempDir;
2627

2728
#[tokio::main]
@@ -55,7 +56,7 @@ async fn main() -> datafusion::common::Result<()> {
5556

5657
// Write encrypted parquet
5758
let mut options = TableParquetOptions::default();
58-
options.crypto.file_encryption = Some((&encrypt).into());
59+
options.crypto.file_encryption = Some(ConfigFileEncryptionProperties::from(&encrypt));
5960
parquet_df
6061
.write_parquet(
6162
tempfile_str.as_str(),
@@ -100,7 +101,8 @@ async fn query_dataframe(df: &DataFrame) -> Result<(), DataFusionError> {
100101
// Setup encryption and decryption properties
101102
fn setup_encryption(
102103
parquet_df: &DataFrame,
103-
) -> Result<(FileEncryptionProperties, FileDecryptionProperties), DataFusionError> {
104+
) -> Result<(Arc<FileEncryptionProperties>, Arc<FileDecryptionProperties>), DataFusionError>
105+
{
104106
let schema = parquet_df.schema();
105107
let footer_key = b"0123456789012345".to_vec(); // 128bit/16
106108
let column_key = b"1234567890123450".to_vec(); // 128bit/16

datafusion-examples/examples/parquet_encrypted_with_kms.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ impl EncryptionFactory for TestEncryptionFactory {
226226
options: &EncryptionFactoryOptions,
227227
schema: &SchemaRef,
228228
_file_path: &Path,
229-
) -> Result<Option<FileEncryptionProperties>> {
229+
) -> Result<Option<Arc<FileEncryptionProperties>>> {
230230
let config: EncryptionConfig = options.to_extension_options()?;
231231

232232
// Generate a random encryption key for this file.
@@ -268,7 +268,7 @@ impl EncryptionFactory for TestEncryptionFactory {
268268
&self,
269269
_options: &EncryptionFactoryOptions,
270270
_file_path: &Path,
271-
) -> Result<Option<FileDecryptionProperties>> {
271+
) -> Result<Option<Arc<FileDecryptionProperties>>> {
272272
let decryption_properties =
273273
FileDecryptionProperties::with_key_retriever(Arc::new(TestKeyRetriever {}))
274274
.build()?;

datafusion/common/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ log = { workspace = true }
7171
object_store = { workspace = true, optional = true }
7272
parquet = { workspace = true, optional = true, default-features = true }
7373
paste = "1.0.15"
74-
pyo3 = { version = "0.25", optional = true }
74+
pyo3 = { version = "0.26", optional = true }
7575
recursive = { workspace = true, optional = true }
7676
sqlparser = { workspace = true, optional = true }
7777
tokio = { workspace = true }

0 commit comments

Comments
 (0)