Skip to content

Commit 40ec044

Browse files
Take perf.
1 parent 009d419 commit 40ec044

File tree

4 files changed

+60
-0
lines changed

4 files changed

+60
-0
lines changed

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,7 @@ harness = false
2121
[[bench]]
2222
name = "sum_nulls_bitmap"
2323
harness = false
24+
25+
[[bench]]
26+
name = "take"
27+
harness = false

benches/take.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
use criterion::{criterion_group, criterion_main, Criterion};
2+
3+
use simd_benches::take::*;
4+
5+
fn close(l: &[f32], r: &[f32]) {
6+
for (l, r) in l.iter().zip(r.iter()) {
7+
assert!((l - r).abs() < l * 0.001);
8+
}
9+
}
10+
11+
fn add_benchmark(c: &mut Criterion) {
12+
let name = "";
13+
(10..=20).step_by(2).for_each(|log2_size| {
14+
let size = 2usize.pow(log2_size);
15+
let array = (0..size).map(|x| 1.0 + x as f32).collect::<Vec<_>>();
16+
let indices = (0..size).collect::<Vec<_>>();
17+
let result = naive_take(&array, &indices);
18+
19+
c.bench_function(
20+
&format!("core_simd_take{} 2^{} f32", name, log2_size),
21+
|b| b.iter(|| close(&core_simd_take(&array, &indices), &result)),
22+
);
23+
c.bench_function(&format!("naive_take{} 2^{} f32", name, log2_size), |b| {
24+
b.iter(|| close(&naive_take(&array, &indices), &result))
25+
});
26+
});
27+
}
28+
29+
criterion_group!(benches, add_benchmark);
30+
criterion_main!(benches);

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ pub mod bitmap_ops;
44
pub mod sum;
55
pub mod sum_nulls;
66
pub mod sum_nulls_bitmap;
7+
pub mod take;

src/take.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
use core_simd::*;
2+
3+
pub fn naive_take(values: &[f32], indices: &[usize]) -> Vec<f32> {
4+
indices.iter().map(|i| values[*i]).collect()
5+
}
6+
7+
pub fn core_simd_take(values: &[f32], indices: &[usize]) -> Vec<f32> {
8+
let chunks = indices.chunks_exact(8);
9+
// todo handle remainder
10+
11+
let mut result = vec![0.0; indices.len()]; // todo: maybeUninit
12+
let result_chunks = result.chunks_exact_mut(8);
13+
chunks.zip(result_chunks).for_each(|(chunk, r_chunk)| {
14+
let idxs: [usize; 8] = chunk.try_into().unwrap();
15+
let idxs: usizex8 = usizex8::from_array(idxs);
16+
17+
let r = Simd::gather_or_default(&values, idxs);
18+
let r: [f32; 8] = r.to_array();
19+
20+
let r_chunk: &mut [f32; 8] = r_chunk.try_into().unwrap();
21+
*r_chunk = r;
22+
});
23+
24+
result
25+
}

0 commit comments

Comments
 (0)