Skip to content

Commit 77960c3

Browse files
Take perf.
1 parent 009d419 commit 77960c3

File tree

5 files changed

+71
-0
lines changed

5 files changed

+71
-0
lines changed

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,7 @@ harness = false
2121
[[bench]]
2222
name = "sum_nulls_bitmap"
2323
harness = false
24+
25+
[[bench]]
26+
name = "take"
27+
harness = false

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ nonsimd_sum bitmap 2^20 f32 [541.78 us 545.16 us 549.09 us]
5151
naive_sum bitmap 2^20 f32 [1.6740 ms 1.6922 ms 1.7149 ms]
5252
```
5353

54+
### Take of values
55+
56+
```
57+
core_simd_take 2^20 f32 time: [911.13 us 912.21 us 913.33 us]
58+
naive_take 2^20 f32 time: [912.39 us 915.22 us 918.41 us]
59+
```
60+
5461
## Bench results on default
5562

5663
Command:

benches/take.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
use criterion::{criterion_group, criterion_main, Criterion};
2+
3+
use simd_benches::take::*;
4+
5+
fn close(l: &[f32], r: &[f32]) {
6+
for (l, r) in l.iter().zip(r.iter()) {
7+
assert!((l - r).abs() < l * 0.001);
8+
}
9+
}
10+
11+
fn add_benchmark(c: &mut Criterion) {
12+
let name = "";
13+
(10..=20).step_by(2).for_each(|log2_size| {
14+
let size = 2usize.pow(log2_size);
15+
let array = (0..size).map(|x| 1.0 + x as f32).collect::<Vec<_>>();
16+
let indices = (0..size).collect::<Vec<_>>();
17+
// check that they are equal...
18+
close(
19+
&core_simd_take(&array, &indices),
20+
&naive_take(&array, &indices),
21+
);
22+
23+
c.bench_function(
24+
&format!("core_simd_take{} 2^{} f32", name, log2_size),
25+
|b| b.iter(|| core_simd_take(&array, &indices)),
26+
);
27+
c.bench_function(&format!("naive_take{} 2^{} f32", name, log2_size), |b| {
28+
b.iter(|| naive_take(&array, &indices))
29+
});
30+
});
31+
}
32+
33+
criterion_group!(benches, add_benchmark);
34+
criterion_main!(benches);

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ pub mod bitmap_ops;
44
pub mod sum;
55
pub mod sum_nulls;
66
pub mod sum_nulls_bitmap;
7+
pub mod take;

src/take.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
use core_simd::*;
2+
3+
pub fn naive_take(values: &[f32], indices: &[usize]) -> Vec<f32> {
4+
indices.iter().map(|i| values[*i]).collect()
5+
}
6+
7+
pub fn core_simd_take(values: &[f32], indices: &[usize]) -> Vec<f32> {
8+
let chunks = indices.chunks_exact(8);
9+
// todo handle remainder
10+
11+
let mut result = vec![0.0; indices.len()]; // todo: maybeUninit
12+
let result_chunks = result.chunks_exact_mut(8);
13+
chunks.zip(result_chunks).for_each(|(chunk, r_chunk)| {
14+
let idxs: [usize; 8] = chunk.try_into().unwrap();
15+
let idxs: usizex8 = usizex8::from_array(idxs);
16+
17+
let r = Simd::gather_or_default(&values, idxs);
18+
let r: [f32; 8] = r.to_array();
19+
20+
let r_chunk: &mut [f32; 8] = r_chunk.try_into().unwrap();
21+
*r_chunk = r;
22+
});
23+
24+
result
25+
}

0 commit comments

Comments
 (0)