@@ -1313,6 +1313,7 @@ impl FileSink for ParquetSink {
1313
1313
. build ( ) ?;
1314
1314
let schema = get_writer_schema ( & self . config ) ;
1315
1315
let props = parquet_props. clone ( ) ;
1316
+ let skip_arrow_metadata = self . parquet_options . global . skip_arrow_metadata ;
1316
1317
let parallel_options_clone = parallel_options. clone ( ) ;
1317
1318
let pool = Arc :: clone ( context. memory_pool ( ) ) ;
1318
1319
file_write_tasks. spawn ( async move {
@@ -1321,6 +1322,7 @@ impl FileSink for ParquetSink {
1321
1322
rx,
1322
1323
schema,
1323
1324
& props,
1325
+ skip_arrow_metadata,
1324
1326
parallel_options_clone,
1325
1327
pool,
1326
1328
)
@@ -1647,7 +1649,8 @@ async fn output_single_parquet_file_parallelized(
1647
1649
object_store_writer : Box < dyn AsyncWrite + Send + Unpin > ,
1648
1650
data : Receiver < RecordBatch > ,
1649
1651
output_schema : Arc < Schema > ,
1650
- parquet_props : & WriterProperties ,
1652
+ writer_properties : & WriterProperties ,
1653
+ skip_arrow_metadata : bool ,
1651
1654
parallel_options : ParallelParquetWriterOptions ,
1652
1655
pool : Arc < dyn MemoryPool > ,
1653
1656
) -> Result < FileMetaData > {
@@ -1657,20 +1660,22 @@ async fn output_single_parquet_file_parallelized(
1657
1660
mpsc:: channel :: < SpawnedTask < RBStreamSerializeResult > > ( max_rowgroups) ;
1658
1661
1659
1662
let merged_buff = SharedBuffer :: new ( INITIAL_BUFFER_BYTES ) ;
1660
- let writer = ArrowWriter :: try_new (
1663
+ let options = ArrowWriterOptions :: new ( )
1664
+ . with_properties ( writer_properties. clone ( ) )
1665
+ . with_skip_arrow_metadata ( skip_arrow_metadata) ;
1666
+ let writer = ArrowWriter :: try_new_with_options (
1661
1667
merged_buff. clone ( ) ,
1662
1668
Arc :: clone ( & output_schema) ,
1663
- Some ( parquet_props . clone ( ) ) ,
1669
+ options ,
1664
1670
) ?;
1665
1671
let ( writer, row_group_writer_factory) = writer. into_serialized_writer ( ) ?;
1666
1672
1667
- let arc_props = Arc :: new ( parquet_props. clone ( ) ) ;
1668
1673
let launch_serialization_task = spawn_parquet_parallel_serialization_task (
1669
1674
row_group_writer_factory,
1670
1675
data,
1671
1676
serialize_tx,
1672
1677
Arc :: clone ( & output_schema) ,
1673
- Arc :: clone ( & arc_props ) ,
1678
+ Arc :: new ( writer_properties . clone ( ) ) ,
1674
1679
parallel_options,
1675
1680
Arc :: clone ( & pool) ,
1676
1681
) ;
0 commit comments