@@ -77,10 +77,10 @@ use object_store::{ObjectMeta, ObjectStore};
77
77
use parquet:: arrow:: arrow_reader:: statistics:: StatisticsConverter ;
78
78
use parquet:: arrow:: arrow_writer:: {
79
79
compute_leaves, ArrowColumnChunk , ArrowColumnWriter , ArrowLeafColumn ,
80
- ArrowRowGroupWriterFactory , ArrowWriterOptions ,
80
+ ArrowWriterOptions ,
81
81
} ;
82
82
use parquet:: arrow:: async_reader:: MetadataFetch ;
83
- use parquet:: arrow:: { parquet_to_arrow_schema, ArrowSchemaConverter , AsyncArrowWriter } ;
83
+ use parquet:: arrow:: { parquet_to_arrow_schema, ArrowSchemaConverter , ArrowWriter , AsyncArrowWriter } ;
84
84
use parquet:: basic:: Type ;
85
85
86
86
use parquet:: errors:: ParquetError ;
@@ -1463,13 +1463,10 @@ type ColSender = Sender<ArrowLeafColumn>;
1463
1463
/// Returns join handles for each columns serialization task along with a send channel
1464
1464
/// to send arrow arrays to each serialization task.
1465
1465
fn spawn_column_parallel_row_group_writer (
1466
- arrow_row_group_writer_factory : Arc < ArrowRowGroupWriterFactory > ,
1466
+ col_writers : Vec < ArrowColumnWriter > ,
1467
1467
max_buffer_size : usize ,
1468
1468
pool : & Arc < dyn MemoryPool > ,
1469
1469
) -> Result < ( Vec < ColumnWriterTask > , Vec < ColSender > ) > {
1470
- let arrow_row_group_writer =
1471
- arrow_row_group_writer_factory. create_row_group_writer ( 0 ) ?;
1472
- let col_writers = arrow_row_group_writer. into_column_writers ( ) ;
1473
1470
let num_columns = col_writers. len ( ) ;
1474
1471
1475
1472
let mut col_writer_tasks = Vec :: with_capacity ( num_columns) ;
@@ -1564,7 +1561,7 @@ fn spawn_rg_join_and_finalize_task(
1564
1561
/// across both columns and row_groups, with a theoretical max number of parallel tasks
1565
1562
/// given by n_columns * num_row_groups.
1566
1563
fn spawn_parquet_parallel_serialization_task (
1567
- arrow_row_group_writer_factory : Arc < ArrowRowGroupWriterFactory > ,
1564
+ arrow_writer : ArrowWriter < SerializedFileWriter < SharedBuffer > > ,
1568
1565
mut data : Receiver < RecordBatch > ,
1569
1566
serialize_tx : Sender < SpawnedTask < RBStreamSerializeResult > > ,
1570
1567
schema : Arc < Schema > ,
@@ -1575,9 +1572,10 @@ fn spawn_parquet_parallel_serialization_task(
1575
1572
SpawnedTask :: spawn ( async move {
1576
1573
let max_buffer_rb = parallel_options. max_buffered_record_batches_per_stream ;
1577
1574
let max_row_group_rows = writer_props. max_row_group_size ( ) ;
1575
+ let col_writers = arrow_writer. get_column_writers ( ) . unwrap ( ) ;
1578
1576
let ( mut column_writer_handles, mut col_array_channels) =
1579
1577
spawn_column_parallel_row_group_writer (
1580
- Arc :: clone ( & arrow_row_group_writer_factory ) ,
1578
+ col_writers ,
1581
1579
max_buffer_rb,
1582
1580
& pool,
1583
1581
) ?;
@@ -1631,7 +1629,7 @@ fn spawn_parquet_parallel_serialization_task(
1631
1629
1632
1630
( column_writer_handles, col_array_channels) =
1633
1631
spawn_column_parallel_row_group_writer (
1634
- Arc :: clone ( & arrow_row_group_writer_factory ) ,
1632
+ col_writers ,
1635
1633
max_buffer_rb,
1636
1634
& pool,
1637
1635
) ?;
@@ -1730,16 +1728,12 @@ async fn output_single_parquet_file_parallelized(
1730
1728
parquet_schema. root_schema_ptr ( ) ,
1731
1729
parquet_props. clone ( ) . into ( ) ,
1732
1730
) ?;
1733
- let arrow_row_group_writer_factory = ArrowRowGroupWriterFactory :: new (
1734
- & parquet_writer,
1735
- parquet_schema,
1736
- Arc :: clone ( & output_schema) ,
1737
- parquet_props. clone ( ) . into ( ) ,
1738
- ) ;
1731
+ let writer = ArrowWriter :: try_new (
1732
+ parquet_writer, Arc :: clone ( & output_schema) , Some ( parquet_props. clone ( ) ) ) ?;
1739
1733
1740
1734
let arc_props = Arc :: new ( parquet_props. clone ( ) ) ;
1741
1735
let launch_serialization_task = spawn_parquet_parallel_serialization_task (
1742
- Arc :: new ( arrow_row_group_writer_factory ) ,
1736
+ writer ,
1743
1737
data,
1744
1738
serialize_tx,
1745
1739
Arc :: clone ( & output_schema) ,
0 commit comments