From d93e58781e6b83982cd628cf2b6ceed474f1fedf Mon Sep 17 00:00:00 2001 From: Jonathan Marler Date: Wed, 25 Sep 2024 13:22:52 -0600 Subject: [PATCH] fixes for non-avx targets when emulating avx for non-avx targets, the _m256i type is being typedef'd, however, this type is defined in avxintrin.h which is only included via immintrin.h if avx is enabled. I've added an inline definition that seems to work on my x86_64 machine with clang when __AVX__ is not defined. Also, mm256_maddubs_epi16 was unable to compile for non-avx targets, I've addressed this by only defining this function when either __AVX2__ or __SSSE3__ is defined. --- src/vec_avx.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/vec_avx.h b/src/vec_avx.h index a5040b45..cbab3303 100644 --- a/src/vec_avx.h +++ b/src/vec_avx.h @@ -168,6 +168,11 @@ typedef struct { __m128i lo; __m128i hi; } mm256i_emu; + +#ifndef __AVX__ +typedef long long __m256i __attribute__((__vector_size__(32), __aligned__(32))); +#endif + typedef __m256i real_m256i; #define __m256i mm256i_emu @@ -237,6 +242,8 @@ static inline mm256i_emu mm256_madd_epi16(mm256i_emu a, mm256i_emu b) { } #define _mm256_madd_epi16(a,b) mm256_madd_epi16(a,b) +#if defined(__AVX2__) || defined(__SSSE3__) + static inline mm256i_emu mm256_maddubs_epi16(mm256i_emu a, mm256i_emu b) { mm256i_emu ret; ret.lo = _mm_maddubs_epi16(a.lo, b.lo); @@ -245,7 +252,7 @@ static inline mm256i_emu mm256_maddubs_epi16(mm256i_emu a, mm256i_emu b) { } #define _mm256_maddubs_epi16(a,b) mm256_maddubs_epi16(a,b) - +#endif /* Emulating the conversion functions is tricky because they use __m256i but are defined in AVX. So we need to make a special when only AVX is available. */