@@ -57,7 +57,7 @@ struct avx2_vector<int64_t> {
5757 static const uint8_t numlanes = 4 ;
5858 static constexpr int network_sort_threshold = 64 ;
5959 static constexpr int partition_unroll_factor = 4 ;
60-
60+
6161 using swizzle_ops = avx2_64bit_swizzle_ops;
6262
6363 static type_t type_max ()
@@ -85,14 +85,6 @@ struct avx2_vector<int64_t> {
8585 {
8686 return _mm256_xor_si256 (x, y);
8787 }
88- static opmask_t knot_opmask (opmask_t x)
89- {
90- return ~x;
91- }
92- static opmask_t le (reg_t x, reg_t y)
93- {
94- return ~_mm256_cmpgt_epi64 (x, y);
95- }
9688 static opmask_t ge (reg_t x, reg_t y)
9789 {
9890 opmask_t equal = eq (x, y);
@@ -113,8 +105,7 @@ struct avx2_vector<int64_t> {
113105 template <int scale>
114106 static reg_t i64gather (__m256i index, void const *base)
115107 {
116- return _mm256_i64gather_epi64 (
117- (long long int const *)base, index, scale);
108+ return _mm256_i64gather_epi64 ((int64_t const *)base, index, scale);
118109 }
119110 static reg_t loadu (void const *mem)
120111 {
@@ -205,10 +196,12 @@ struct avx2_vector<int64_t> {
205196 {
206197 return sort_ymm_64bit<avx2_vector<type_t >>(x);
207198 }
208- static reg_t cast_from (__m256i v){
199+ static reg_t cast_from (__m256i v)
200+ {
209201 return v;
210202 }
211- static __m256i cast_to (reg_t v){
203+ static __m256i cast_to (reg_t v)
204+ {
212205 return v;
213206 }
214207};
@@ -221,7 +214,7 @@ struct avx2_vector<uint64_t> {
221214 static const uint8_t numlanes = 4 ;
222215 static constexpr int network_sort_threshold = 64 ;
223216 static constexpr int partition_unroll_factor = 4 ;
224-
217+
225218 using swizzle_ops = avx2_64bit_swizzle_ops;
226219
227220 static type_t type_max ()
@@ -257,10 +250,6 @@ struct avx2_vector<uint64_t> {
257250 return _mm256_i64gather_epi64 (
258251 (long long int const *)base, index, scale);
259252 }
260- static opmask_t knot_opmask (opmask_t x)
261- {
262- return ~x;
263- }
264253 static opmask_t ge (reg_t x, reg_t y)
265254 {
266255 opmask_t equal = eq (x, y);
@@ -362,10 +351,12 @@ struct avx2_vector<uint64_t> {
362351 {
363352 return sort_ymm_64bit<avx2_vector<type_t >>(x);
364353 }
365- static reg_t cast_from (__m256i v){
354+ static reg_t cast_from (__m256i v)
355+ {
366356 return v;
367357 }
368- static __m256i cast_to (reg_t v){
358+ static __m256i cast_to (reg_t v)
359+ {
369360 return v;
370361 }
371362};
@@ -378,7 +369,7 @@ struct avx2_vector<double> {
378369 static const uint8_t numlanes = 4 ;
379370 static constexpr int network_sort_threshold = 64 ;
380371 static constexpr int partition_unroll_factor = 4 ;
381-
372+
382373 using swizzle_ops = avx2_64bit_swizzle_ops;
383374
384375 static type_t type_max ()
@@ -421,10 +412,6 @@ struct avx2_vector<double> {
421412 {
422413 return _mm256_maskload_pd ((const double *)mem, mask);
423414 }
424- static opmask_t knot_opmask (opmask_t x)
425- {
426- return ~x;
427- }
428415 static opmask_t ge (reg_t x, reg_t y)
429416 {
430417 return _mm256_castpd_si256 (_mm256_cmp_pd (x, y, _CMP_GE_OQ));
@@ -531,55 +518,64 @@ struct avx2_vector<double> {
531518 {
532519 return sort_ymm_64bit<avx2_vector<type_t >>(x);
533520 }
534- static reg_t cast_from (__m256i v){
521+ static reg_t cast_from (__m256i v)
522+ {
535523 return _mm256_castsi256_pd (v);
536524 }
537- static __m256i cast_to (reg_t v){
525+ static __m256i cast_to (reg_t v)
526+ {
538527 return _mm256_castpd_si256 (v);
539528 }
540529};
541530
542- struct avx2_64bit_swizzle_ops {
531+ struct avx2_64bit_swizzle_ops {
543532 template <typename vtype, int scale>
544- X86_SIMD_SORT_INLINE typename vtype::reg_t swap_n (typename vtype::reg_t reg){
533+ X86_SIMD_SORT_INLINE typename vtype::reg_t swap_n (typename vtype::reg_t reg)
534+ {
545535 __m256i v = vtype::cast_to (reg);
546536
547- if constexpr (scale == 2 ){
537+ if constexpr (scale == 2 ) {
548538 v = _mm256_permute4x64_epi64 (v, 0b10110001 );
549- }else if constexpr (scale == 4 ){
539+ }
540+ else if constexpr (scale == 4 ) {
550541 v = _mm256_permute4x64_epi64 (v, 0b01001110 );
551- }else {
542+ }
543+ else {
552544 static_assert (scale == -1 , " should not be reached" );
553545 }
554546
555547 return vtype::cast_from (v);
556548 }
557549
558550 template <typename vtype, int scale>
559- X86_SIMD_SORT_INLINE typename vtype::reg_t reverse_n (typename vtype::reg_t reg){
551+ X86_SIMD_SORT_INLINE typename vtype::reg_t
552+ reverse_n (typename vtype::reg_t reg)
553+ {
560554 __m256i v = vtype::cast_to (reg);
561555
562- if constexpr (scale == 2 ){
563- return swap_n<vtype, 2 >(reg);
564- }else if constexpr (scale == 4 ){
556+ if constexpr (scale == 2 ) { return swap_n<vtype, 2 >(reg); }
557+ else if constexpr (scale == 4 ) {
565558 return vtype::reverse (reg);
566- }else {
559+ }
560+ else {
567561 static_assert (scale == -1 , " should not be reached" );
568562 }
569563
570564 return vtype::cast_from (v);
571565 }
572566
573567 template <typename vtype, int scale>
574- X86_SIMD_SORT_INLINE typename vtype::reg_t merge_n (typename vtype::reg_t reg, typename vtype::reg_t other){
568+ X86_SIMD_SORT_INLINE typename vtype::reg_t
569+ merge_n (typename vtype::reg_t reg, typename vtype::reg_t other)
570+ {
575571 __m256d v1 = _mm256_castsi256_pd (vtype::cast_to (reg));
576572 __m256d v2 = _mm256_castsi256_pd (vtype::cast_to (other));
577573
578- if constexpr (scale == 2 ){
579- v1 = _mm256_blend_pd (v1, v2, 0b0101 );
580- }else if constexpr (scale == 4 ){
574+ if constexpr (scale == 2 ) { v1 = _mm256_blend_pd (v1, v2, 0b0101 ); }
575+ else if constexpr (scale == 4 ) {
581576 v1 = _mm256_blend_pd (v1, v2, 0b0011 );
582- }else {
577+ }
578+ else {
583579 static_assert (scale == -1 , " should not be reached" );
584580 }
585581
0 commit comments