Skip to content

Commit 2a39622

Browse files
committed
bench: add ArrayIter benchmarks
Needed for: - #8697
1 parent 87154eb commit 2a39622

File tree

2 files changed

+311
-0
lines changed

2 files changed

+311
-0
lines changed

arrow/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ required-features = ["test_utils"]
123123
name = "array_from"
124124
harness = false
125125

126+
[[bench]]
127+
name = "array_iter"
128+
harness = false
129+
required-features = ["test_utils"]
130+
131+
126132
[[bench]]
127133
name = "builder"
128134
harness = false

arrow/benches/array_iter.rs

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
extern crate arrow;
19+
#[macro_use]
20+
extern crate criterion;
21+
22+
use criterion::{Criterion, Throughput};
23+
use std::hint;
24+
25+
use arrow::array::*;
26+
use arrow::util::bench_util::*;
27+
use arrow_array::types::{Int8Type, Int16Type, Int32Type, Int64Type};
28+
29+
const BATCH_SIZE: usize = 64 * 1024;
30+
31+
/// Run [`ArrayIter::fold`] while using black_box on each item and the result of the cb to prevent compiler optimizations.
32+
fn fold_black_box_item_and_cb_res<ArrayAcc, F, B>(array: ArrayAcc, init: B, mut f: F)
33+
where
34+
ArrayAcc: ArrayAccessor,
35+
F: FnMut(B, Option<ArrayAcc::Item>) -> B,
36+
{
37+
let result = ArrayIter::new(array).fold(hint::black_box(init), |acc, item| {
38+
let res = f(acc, hint::black_box(item));
39+
hint::black_box(res)
40+
});
41+
42+
hint::black_box(result);
43+
}
44+
/// Run [`ArrayIter::fold`] while using black_box on each item to prevent compiler optimizations.
45+
fn fold_black_box_item<ArrayAcc, F, B>(array: ArrayAcc, init: B, mut f: F)
46+
where
47+
ArrayAcc: ArrayAccessor,
48+
F: FnMut(B, Option<ArrayAcc::Item>) -> B,
49+
{
50+
let result = ArrayIter::new(array).fold(hint::black_box(init), |acc, item| {
51+
f(acc, hint::black_box(item))
52+
});
53+
54+
hint::black_box(result);
55+
}
56+
57+
/// Run [`ArrayIter::fold`] without using black_box on each item, but only on the result
58+
/// to see if the compiler can do more optimizations.
59+
fn fold_black_box_result<ArrayAcc, F, B>(array: ArrayAcc, init: B, f: F)
60+
where
61+
ArrayAcc: ArrayAccessor,
62+
F: FnMut(B, Option<ArrayAcc::Item>) -> B,
63+
{
64+
let result = ArrayIter::new(array).fold(hint::black_box(init), f);
65+
66+
hint::black_box(result);
67+
}
68+
69+
/// Run [`ArrayIter::any`] while using black_box on each item and the predicate return value to prevent compiler optimizations.
70+
fn any_black_box_item_and_predicate<ArrayAcc>(
71+
array: ArrayAcc,
72+
mut any_predicate: impl FnMut(Option<ArrayAcc::Item>) -> bool,
73+
) where
74+
ArrayAcc: ArrayAccessor,
75+
{
76+
let any_res = ArrayIter::new(array).any(|item| {
77+
let item = hint::black_box(item);
78+
let res = any_predicate(item);
79+
hint::black_box(res)
80+
});
81+
82+
hint::black_box(any_res);
83+
}
84+
85+
/// Run [`ArrayIter::any`] without using black_box in the loop, but only on the result
86+
/// to see if the compiler can do more optimizations.
87+
fn any_black_box_result<ArrayAcc>(
88+
array: ArrayAcc,
89+
any_predicate: impl FnMut(Option<ArrayAcc::Item>) -> bool,
90+
) where
91+
ArrayAcc: ArrayAccessor,
92+
{
93+
let any_res = ArrayIter::new(array).any(any_predicate);
94+
95+
hint::black_box(any_res);
96+
}
97+
98+
/// Benchmark [`ArrayIter`] functions,
99+
///
100+
/// The passed `predicate_that_will_always_evaluate_to_false` function should be a predicate
101+
/// that always returns `false` to ensure that the full array is always iterated over.
102+
///
103+
/// The predicate function should:
104+
/// 1. always return false
105+
/// 2. be impossible for the compiler to optimize away
106+
/// 3. not use `hint::black_box` internally (unless impossible) to allow for more compiler optimizations
107+
///
108+
/// the way to achieve this is to make the predicate check for a value that is not presented in the array.
109+
///
110+
/// The reason for these requirements is that we want to iterate over the entire array while
111+
/// letting the compiler have room for optimizations so it will be more representative of real world usage.
112+
fn benchmark_array_iter<ArrayAcc, FoldFn, FoldInit>(
113+
c: &mut Criterion,
114+
name: &str,
115+
nonnull_array: ArrayAcc,
116+
nullable_array: ArrayAcc,
117+
fold_init: FoldInit,
118+
fold_fn: FoldFn,
119+
predicate_that_will_always_evaluate_to_false: impl Fn(Option<ArrayAcc::Item>) -> bool,
120+
) where
121+
ArrayAcc: ArrayAccessor + Copy,
122+
FoldInit: Copy,
123+
FoldFn: Fn(FoldInit, Option<ArrayAcc::Item>) -> FoldInit,
124+
{
125+
let predicate_that_will_always_evaluate_to_false =
126+
&predicate_that_will_always_evaluate_to_false;
127+
let fold_fn = &fold_fn;
128+
129+
// Assert always false return false
130+
{
131+
let found = ArrayIter::new(nonnull_array).any(predicate_that_will_always_evaluate_to_false);
132+
assert!(!found, "The predicate must always evaluate to false");
133+
}
134+
{
135+
let found =
136+
ArrayIter::new(nullable_array).any(predicate_that_will_always_evaluate_to_false);
137+
assert!(!found, "The predicate must always evaluate to false");
138+
}
139+
140+
c.benchmark_group(name)
141+
.throughput(Throughput::Elements(BATCH_SIZE as u64))
142+
// Most of the Rust default iterator functions are implemented on top of 2 functions:
143+
// `fold` and `try_fold`
144+
// so we are benchmarking `fold` first
145+
.bench_function("nonnull fold black box item and fold result", |b| {
146+
b.iter(|| fold_black_box_item_and_cb_res(nonnull_array, fold_init, fold_fn))
147+
})
148+
.bench_function("nonnull fold black box item", |b| {
149+
b.iter(|| fold_black_box_item(nonnull_array, fold_init, fold_fn))
150+
})
151+
.bench_function("nonnull fold black box only result", |b| {
152+
b.iter(|| fold_black_box_result(nonnull_array, fold_init, fold_fn))
153+
})
154+
.bench_function("null fold black box item and fold result", |b| {
155+
b.iter(|| fold_black_box_item_and_cb_res(nullable_array, fold_init, fold_fn))
156+
})
157+
.bench_function("null fold black box item", |b| {
158+
b.iter(|| fold_black_box_item(nullable_array, fold_init, fold_fn))
159+
})
160+
.bench_function("null fold black box only result", |b| {
161+
b.iter(|| fold_black_box_result(nullable_array, fold_init, fold_fn))
162+
})
163+
// Due to `try_fold` not being available in stable Rust,
164+
// we are benchmarking `any` instead which the default Rust implementation
165+
// uses `try_fold` under the hood.
166+
.bench_function("nonnull any black box item and predicate", |b| {
167+
b.iter(|| {
168+
any_black_box_item_and_predicate(
169+
nonnull_array,
170+
predicate_that_will_always_evaluate_to_false,
171+
)
172+
})
173+
})
174+
.bench_function("nonnull any black box only result", |b| {
175+
b.iter(|| {
176+
any_black_box_result(nonnull_array, predicate_that_will_always_evaluate_to_false)
177+
})
178+
})
179+
.bench_function("null any black box item and predicate", |b| {
180+
b.iter(|| {
181+
any_black_box_item_and_predicate(
182+
nullable_array,
183+
predicate_that_will_always_evaluate_to_false,
184+
)
185+
})
186+
})
187+
.bench_function("null any black box only result", |b| {
188+
b.iter(|| {
189+
any_black_box_result(nullable_array, predicate_that_will_always_evaluate_to_false)
190+
})
191+
});
192+
}
193+
194+
/// Replace all occurrences of `item_to_replace` with `replace_with` in the given `PrimitiveArray`.
195+
/// will make it so we can filter by missing value
196+
fn replace_primitive_value<T>(
197+
array: PrimitiveArray<T>,
198+
item_to_replace: T::Native,
199+
replace_with: T::Native,
200+
) -> PrimitiveArray<T>
201+
where
202+
T: ArrowPrimitiveType,
203+
<T as ArrowPrimitiveType>::Native: Eq,
204+
{
205+
array.unary(|item| {
206+
if item == item_to_replace {
207+
replace_with
208+
} else {
209+
item
210+
}
211+
})
212+
}
213+
214+
fn add_benchmark(c: &mut Criterion) {
215+
benchmark_array_iter(
216+
c,
217+
"int8",
218+
&replace_primitive_value(create_primitive_array::<Int8Type>(BATCH_SIZE, 0.0), 42, 1),
219+
&replace_primitive_value(create_primitive_array::<Int8Type>(BATCH_SIZE, 0.5), 42, 1),
220+
// fold init
221+
0i8,
222+
// fold function
223+
|acc, item| acc.wrapping_add(item.unwrap_or_default()),
224+
// predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
225+
|item| item == Some(42),
226+
);
227+
benchmark_array_iter(
228+
c,
229+
"int16",
230+
&replace_primitive_value(create_primitive_array::<Int16Type>(BATCH_SIZE, 0.0), 42, 1),
231+
&replace_primitive_value(create_primitive_array::<Int16Type>(BATCH_SIZE, 0.5), 42, 1),
232+
// fold init
233+
0i16,
234+
// fold function
235+
|acc, item| acc.wrapping_add(item.unwrap_or_default()),
236+
// predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
237+
|item| item == Some(42),
238+
);
239+
benchmark_array_iter(
240+
c,
241+
"int32",
242+
&replace_primitive_value(create_primitive_array::<Int32Type>(BATCH_SIZE, 0.0), 42, 1),
243+
&replace_primitive_value(create_primitive_array::<Int32Type>(BATCH_SIZE, 0.5), 42, 1),
244+
// fold init
245+
0i32,
246+
// fold function
247+
|acc, item| acc.wrapping_add(item.unwrap_or_default()),
248+
// predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
249+
|item| item == Some(42),
250+
);
251+
benchmark_array_iter(
252+
c,
253+
"int64",
254+
&replace_primitive_value(create_primitive_array::<Int64Type>(BATCH_SIZE, 0.0), 42, 1),
255+
&replace_primitive_value(create_primitive_array::<Int64Type>(BATCH_SIZE, 0.5), 42, 1),
256+
// fold init
257+
0i64,
258+
// fold function
259+
|acc, item| acc.wrapping_add(item.unwrap_or_default()),
260+
// predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
261+
|item| item == Some(42),
262+
);
263+
264+
benchmark_array_iter(
265+
c,
266+
"string with len 16",
267+
&create_string_array_with_len::<i32>(BATCH_SIZE, 0.0, 16),
268+
&create_string_array_with_len::<i32>(BATCH_SIZE, 0.5, 16),
269+
// fold init
270+
0_usize,
271+
// fold function
272+
|acc, item| acc.wrapping_add(item.map(|item| item.len()).unwrap_or_default()),
273+
// predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
274+
|item| item.is_some_and(|item| item.is_empty()),
275+
);
276+
277+
benchmark_array_iter(
278+
c,
279+
"string view with len 16",
280+
&create_string_view_array_with_len(BATCH_SIZE, 0.0, 16, false),
281+
&create_string_view_array_with_len(BATCH_SIZE, 0.5, 16, false),
282+
// fold init
283+
0_usize,
284+
// fold function
285+
|acc, item| acc.wrapping_add(item.map(|item| item.len()).unwrap_or_default()),
286+
// predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
287+
|item| item.is_some_and(|item| item.is_empty()),
288+
);
289+
290+
benchmark_array_iter(
291+
c,
292+
"boolean mixed true and false",
293+
&create_boolean_array(BATCH_SIZE, 0.0, 0.5),
294+
&create_boolean_array(BATCH_SIZE, 0.5, 0.5),
295+
// fold init
296+
0_usize,
297+
// fold function
298+
|acc, item| acc.wrapping_add(item.unwrap_or_default() as usize),
299+
// Must use black_box here as this can be optimized away
300+
|_item| hint::black_box(false),
301+
);
302+
}
303+
304+
criterion_group!(benches, add_benchmark);
305+
criterion_main!(benches);

0 commit comments

Comments
 (0)