Skip to content

Commit d1eda54

Browse files
Added take with nulls.
1 parent 77960c3 commit d1eda54

File tree

4 files changed

+101
-5
lines changed

4 files changed

+101
-5
lines changed

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,7 @@ harness = false
2525
[[bench]]
2626
name = "take"
2727
harness = false
28+
29+
[[bench]]
30+
name = "take_nulls_bitmap"
31+
harness = false

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ core_simd_take 2^20 f32 time: [911.13 us 912.21 us 913.33 us]
5858
naive_take 2^20 f32 time: [912.39 us 915.22 us 918.41 us]
5959
```
6060

61+
### Nullable take of values (`Bitmap)
62+
63+
```
64+
core_simd_take_nulls 2^20 f32 time: [950.40 us 954.08 us 958.88 us]
65+
naive_take_nulls 2^20 f32 time: [2.3714 ms 2.3968 ms 2.4296 ms]
66+
```
67+
6168
## Bench results on default
6269

6370
Command:

benches/take_nulls_bitmap.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
use criterion::{criterion_group, criterion_main, Criterion};
2+
3+
use simd_benches::bitmap_ops;
4+
use simd_benches::take::*;
5+
6+
fn close(l: &[f32], r: &[f32]) {
7+
for (l, r) in l.iter().zip(r.iter()) {
8+
assert!((l - r).abs() < l * 0.001 || (l.abs() < 0.000001 && r.abs() < 0.000001));
9+
}
10+
}
11+
12+
fn add_benchmark(c: &mut Criterion) {
13+
let name = "";
14+
(10..=20).step_by(2).for_each(|log2_size| {
15+
let size = 2usize.pow(log2_size);
16+
let array = (0..size).map(|x| 1.0 + x as f32).collect::<Vec<_>>();
17+
let mut mask = vec![0u8; size / 8];
18+
// 10% nulls
19+
(0..size).for_each(|x| bitmap_ops::set_bit(&mut mask, x, (1 + x) % 10 != 0));
20+
let mask = (mask, size);
21+
let indices = (0..size).collect::<Vec<_>>();
22+
// check that they are equal...
23+
close(
24+
&core_simd_take_nulls(&array, &indices, &mask),
25+
&naive_take_nulls(&array, &indices, &mask),
26+
);
27+
28+
c.bench_function(
29+
&format!("core_simd_take_nulls{} 2^{} f32", name, log2_size),
30+
|b| b.iter(|| core_simd_take_nulls(&array, &indices, &mask)),
31+
);
32+
c.bench_function(
33+
&format!("naive_take_nulls{} 2^{} f32", name, log2_size),
34+
|b| b.iter(|| naive_take_nulls(&array, &indices, &mask)),
35+
);
36+
});
37+
}
38+
39+
criterion_group!(benches, add_benchmark);
40+
criterion_main!(benches);

src/take.rs

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,70 @@
11
use core_simd::*;
22

3+
use super::bitmap_ops::*;
4+
35
pub fn naive_take(values: &[f32], indices: &[usize]) -> Vec<f32> {
46
indices.iter().map(|i| values[*i]).collect()
57
}
68

9+
const LANES: usize = 8;
10+
const MASK_LANES: usize = 8 / 8;
11+
712
pub fn core_simd_take(values: &[f32], indices: &[usize]) -> Vec<f32> {
8-
let chunks = indices.chunks_exact(8);
13+
let chunks = indices.chunks_exact(LANES);
914
// todo handle remainder
1015

1116
let mut result = vec![0.0; indices.len()]; // todo: maybeUninit
12-
let result_chunks = result.chunks_exact_mut(8);
17+
let result_chunks = result.chunks_exact_mut(LANES);
1318
chunks.zip(result_chunks).for_each(|(chunk, r_chunk)| {
14-
let idxs: [usize; 8] = chunk.try_into().unwrap();
19+
let idxs: [usize; LANES] = chunk.try_into().unwrap();
1520
let idxs: usizex8 = usizex8::from_array(idxs);
1621

1722
let r = Simd::gather_or_default(&values, idxs);
18-
let r: [f32; 8] = r.to_array();
23+
let r: [f32; LANES] = r.to_array();
1924

20-
let r_chunk: &mut [f32; 8] = r_chunk.try_into().unwrap();
25+
let r_chunk: &mut [f32; LANES] = r_chunk.try_into().unwrap();
2126
*r_chunk = r;
2227
});
2328

2429
result
2530
}
31+
32+
type Bitmap = (Vec<u8>, usize);
33+
34+
pub fn naive_take_nulls(values: &[f32], indices: &[usize], mask: &Bitmap) -> Vec<f32> {
35+
let mask = (0..mask.1).map(|x| get_bit(&mask.0, x));
36+
37+
indices
38+
.iter()
39+
.zip(mask)
40+
.map(|(x, m)| if m { values[*x] } else { 0.0f32 })
41+
.collect()
42+
}
43+
44+
pub fn core_simd_take_nulls(values: &[f32], indices: &[usize], mask: &Bitmap) -> Vec<f32> {
45+
assert_eq!(mask.1 % 16, 0); // todo: handle remainders
46+
let chunks = indices.chunks_exact(LANES);
47+
let mask_chunks = mask.0.chunks_exact(MASK_LANES);
48+
//let remainder = chunks.remainder();
49+
//let mask_remainder = mask_chunks.remainder();
50+
51+
let mut result = vec![0.0; indices.len()]; // todo: maybeUninit
52+
let result_chunks = result.chunks_exact_mut(LANES);
53+
chunks
54+
.zip(mask_chunks)
55+
.zip(result_chunks)
56+
.for_each(|((chunk, mask_chunk), r_chunk)| {
57+
let idxs: [usize; LANES] = chunk.try_into().unwrap();
58+
let idxs: usizex8 = usizex8::from_array(idxs);
59+
60+
let mask: [u8; MASK_LANES] = mask_chunk.try_into().unwrap();
61+
let mask = masksizex8::from_bitmask(mask);
62+
63+
let r = Simd::gather_select(&values, mask, idxs, Simd::splat(f32::default()));
64+
let r: [f32; LANES] = r.to_array();
65+
66+
let r_chunk: &mut [f32; LANES] = r_chunk.try_into().unwrap();
67+
*r_chunk = r;
68+
});
69+
result
70+
}

0 commit comments

Comments
 (0)