Skip to content

Commit a9f316b

Browse files
authored
Use in-memory buffer for arrow_writer benchmark (#7823)
# Which issue does this PR close? Prerequisite for investigating parquet writing performance (#7822). # Rationale for this change The benchmark should measure the cpu overhead of parquet writing, not the os or filesystem parts of it. Running the benchmark showed that the file has nearly a 50% overhead, which makes profiling more difficult by hiding the bottlenecks inside the parquet code itself. # What changes are included in this PR? Use a Vec instead of an unbuffered File as the sink. # Are these changes tested? Tested by running the benchmark. # Are there any user-facing changes? No
1 parent 19a14dc commit a9f316b

File tree

1 file changed

+2
-5
lines changed

1 file changed

+2
-5
lines changed

parquet/benches/arrow_writer.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
extern crate criterion;
2020

2121
use criterion::{Criterion, Throughput};
22-
use std::env;
23-
use std::fs::File;
2422

2523
extern crate arrow;
2624
extern crate parquet;
@@ -349,9 +347,8 @@ fn write_batch_enable_bloom_filter(batch: &RecordBatch) -> Result<()> {
349347

350348
#[inline]
351349
fn write_batch_with_option(batch: &RecordBatch, props: Option<WriterProperties>) -> Result<()> {
352-
let path = env::temp_dir().join("arrow_writer.temp");
353-
let file = File::create(path).unwrap();
354-
let mut writer = ArrowWriter::try_new(file, batch.schema(), props)?;
350+
let mut file = vec![];
351+
let mut writer = ArrowWriter::try_new(&mut file, batch.schema(), props)?;
355352

356353
writer.write(batch)?;
357354
writer.close()?;

0 commit comments

Comments
 (0)