@@ -44,6 +44,15 @@ namespace arrow::internal {
4444// - Inspect how swizzle across lanes are handled: _mm256_shuffle_epi8 not used?
4545// - Investigate AVX2 with 128 bit register
4646
47+ template <typename Arr>
48+ constexpr Arr BuildConstantArray (typename Arr::value_type val) {
49+ Arr out = {};
50+ for (auto & v : out) {
51+ v = val;
52+ }
53+ return out;
54+ }
55+
4756constexpr bool PackedIsOversizedForSimd (int simd_bit_size, int unpacked_bit_size,
4857 int packed_bit_size) {
4958 const int unpacked_per_simd = simd_bit_size / unpacked_bit_size;
@@ -219,6 +228,8 @@ constexpr MediumKernelPlan<UnpackedUint, kPackedBitSize, kSimdBitSize> BuildMedi
219228 plan.reads .at (r) = read_start_byte;
220229
221230 for (int sw = 0 ; sw < kPlanSize .swizzles_per_read (); ++sw) {
231+ constexpr int kUndefined = -1 ;
232+ plan.swizzles .at (r).at (sw) = BuildConstantArray<typename Plan::Swizzle>(kUndefined );
222233 for (int sh = 0 ; sh < kPlanSize .shifts_per_swizzle (); ++sh) {
223234 const int sh_offset_bytes = sh * kShape .packed_max_spread_bytes ();
224235 const int sh_offset_bits = 8 * sh_offset_bytes;
@@ -443,6 +454,10 @@ constexpr LargeKernelPlan<UnpackedUint, kPackedBitSize, kSimdBitSize> BuildLarge
443454 const int read_start_byte = packed_start_bit / 8 ;
444455 plan.reads .at (r) = read_start_byte;
445456
457+ constexpr int kUndefined = -1 ;
458+ plan.low_swizzles .at (r) = BuildConstantArray<typename Plan::Swizzle>(kUndefined );
459+ plan.high_swizzles .at (r) = BuildConstantArray<typename Plan::Swizzle>(kUndefined );
460+
446461 for (int u = 0 ; u < kShape .unpacked_per_simd (); ++u) {
447462 const int packed_start_byte = packed_start_bit / 8 ;
448463 const int packed_byte_in_read = packed_start_byte - read_start_byte;
0 commit comments