@@ -49,10 +49,10 @@ struct zmm_vector<float16> {
4949
5050 static opmask_t ge (reg_t x, reg_t y)
5151 {
52- reg_t sign_x = _mm512_and_si512 (x, _mm512_set1_epi16 (0x8000 ));
53- reg_t sign_y = _mm512_and_si512 (y, _mm512_set1_epi16 (0x8000 ));
54- reg_t exp_x = _mm512_and_si512 (x, _mm512_set1_epi16 (0x7c00 ));
55- reg_t exp_y = _mm512_and_si512 (y, _mm512_set1_epi16 (0x7c00 ));
52+ reg_t sign_x = _mm512_and_si512 (x, _mm512_set1_epi16 (static_cast < short >( 0x8000 ) ));
53+ reg_t sign_y = _mm512_and_si512 (y, _mm512_set1_epi16 (static_cast < short >( 0x8000 ) ));
54+ reg_t exp_x = _mm512_and_si512 (x, _mm512_set1_epi16 (static_cast < short >( 0x7c00 ) ));
55+ reg_t exp_y = _mm512_and_si512 (y, _mm512_set1_epi16 (static_cast < short >( 0x7c00 ) ));
5656 reg_t mant_x = _mm512_and_si512 (x, _mm512_set1_epi16 (0x3ff ));
5757 reg_t mant_y = _mm512_and_si512 (y, _mm512_set1_epi16 (0x3ff ));
5858
@@ -62,7 +62,7 @@ struct zmm_vector<float16> {
6262 __mmask32 neg = _mm512_mask_cmpeq_epu16_mask (
6363 sign_eq,
6464 sign_x,
65- _mm512_set1_epi16 (0x8000 )); // both numbers are -ve
65+ _mm512_set1_epi16 (static_cast < short >( 0x8000 ) )); // both numbers are -ve
6666
6767 // compare exponents only if signs are equal:
6868 mask_ge = mask_ge
@@ -136,7 +136,7 @@ struct zmm_vector<float16> {
136136 static type_t float_to_uint16 (float val)
137137 {
138138 __m128 xmm = _mm_load_ss (&val);
139- __m128i xmm2 = _mm_cvtps_ph (xmm, _MM_FROUND_NO_EXC);
139+ __m128i xmm2 = _mm_cvtps_ph (xmm, 0 ); // Use 0 (round to nearest) instead of _MM_FROUND_NO_EXC
140140 return _mm_extract_epi16 (xmm2, 0 );
141141 }
142142 static type_t reducemax (reg_t v)
0 commit comments