|
1 | 1 | use core_simd::*;
|
2 | 2 |
|
| 3 | +use super::bitmap_ops::*; |
| 4 | + |
3 | 5 | pub fn naive_take(values: &[f32], indices: &[usize]) -> Vec<f32> {
|
4 | 6 | indices.iter().map(|i| values[*i]).collect()
|
5 | 7 | }
|
6 | 8 |
|
| 9 | +const LANES: usize = 8; |
| 10 | +const MASK_LANES: usize = 8 / 8; |
| 11 | + |
7 | 12 | pub fn core_simd_take(values: &[f32], indices: &[usize]) -> Vec<f32> {
|
8 |
| - let chunks = indices.chunks_exact(8); |
| 13 | + let chunks = indices.chunks_exact(LANES); |
9 | 14 | // todo handle remainder
|
10 | 15 |
|
11 | 16 | let mut result = vec![0.0; indices.len()]; // todo: maybeUninit
|
12 |
| - let result_chunks = result.chunks_exact_mut(8); |
| 17 | + let result_chunks = result.chunks_exact_mut(LANES); |
13 | 18 | chunks.zip(result_chunks).for_each(|(chunk, r_chunk)| {
|
14 |
| - let idxs: [usize; 8] = chunk.try_into().unwrap(); |
| 19 | + let idxs: [usize; LANES] = chunk.try_into().unwrap(); |
15 | 20 | let idxs: usizex8 = usizex8::from_array(idxs);
|
16 | 21 |
|
17 | 22 | let r = Simd::gather_or_default(&values, idxs);
|
18 |
| - let r: [f32; 8] = r.to_array(); |
| 23 | + let r: [f32; LANES] = r.to_array(); |
19 | 24 |
|
20 |
| - let r_chunk: &mut [f32; 8] = r_chunk.try_into().unwrap(); |
| 25 | + let r_chunk: &mut [f32; LANES] = r_chunk.try_into().unwrap(); |
21 | 26 | *r_chunk = r;
|
22 | 27 | });
|
23 | 28 |
|
24 | 29 | result
|
25 | 30 | }
|
| 31 | + |
| 32 | +type Bitmap = (Vec<u8>, usize); |
| 33 | + |
| 34 | +pub fn naive_take_nulls(values: &[f32], indices: &[usize], mask: &Bitmap) -> Vec<f32> { |
| 35 | + let mask = (0..mask.1).map(|x| get_bit(&mask.0, x)); |
| 36 | + |
| 37 | + indices |
| 38 | + .iter() |
| 39 | + .zip(mask) |
| 40 | + .map(|(x, m)| if m { values[*x] } else { 0.0f32 }) |
| 41 | + .collect() |
| 42 | +} |
| 43 | + |
| 44 | +pub fn core_simd_take_nulls(values: &[f32], indices: &[usize], mask: &Bitmap) -> Vec<f32> { |
| 45 | + assert_eq!(mask.1 % 16, 0); // todo: handle remainders |
| 46 | + let chunks = indices.chunks_exact(LANES); |
| 47 | + let mask_chunks = mask.0.chunks_exact(MASK_LANES); |
| 48 | + //let remainder = chunks.remainder(); |
| 49 | + //let mask_remainder = mask_chunks.remainder(); |
| 50 | + |
| 51 | + let mut result = vec![0.0; indices.len()]; // todo: maybeUninit |
| 52 | + let result_chunks = result.chunks_exact_mut(LANES); |
| 53 | + chunks |
| 54 | + .zip(mask_chunks) |
| 55 | + .zip(result_chunks) |
| 56 | + .for_each(|((chunk, mask_chunk), r_chunk)| { |
| 57 | + let idxs: [usize; LANES] = chunk.try_into().unwrap(); |
| 58 | + let idxs: usizex8 = usizex8::from_array(idxs); |
| 59 | + |
| 60 | + let mask: [u8; MASK_LANES] = mask_chunk.try_into().unwrap(); |
| 61 | + let mask = masksizex8::from_bitmask(mask); |
| 62 | + |
| 63 | + let r = Simd::gather_select(&values, mask, idxs, Simd::splat(f32::default())); |
| 64 | + let r: [f32; LANES] = r.to_array(); |
| 65 | + |
| 66 | + let r_chunk: &mut [f32; LANES] = r_chunk.try_into().unwrap(); |
| 67 | + *r_chunk = r; |
| 68 | + }); |
| 69 | + result |
| 70 | +} |
0 commit comments