5
5
#include < cstdint>
6
6
#include < cstring>
7
7
#include < iterator>
8
+ #include < type_traits>
8
9
9
10
#include " float_common.h"
10
11
12
+ #ifdef FASTFLOAT_SSE2
13
+ #include < emmintrin.h>
14
+ #endif
15
+
16
+
11
17
namespace fast_float {
12
18
19
+ template <typename UC>
20
+ fastfloat_really_inline constexpr bool has_simd_opt () {
21
+ #ifdef FASTFLOAT_HAS_SIMD
22
+ return std::is_same<UC, char16_t >::value;
23
+ #else
24
+ return false ;
25
+ #endif
26
+ }
27
+
13
28
// Next function can be micro-optimized, but compilers are entirely
14
29
// able to optimize it well.
15
30
template <typename UC>
@@ -28,12 +43,14 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
28
43
| (val & 0x00000000000000FF ) << 56 ;
29
44
}
30
45
46
+ // Read 8 UC into a u64. Truncates UC if not char.
47
+ template <typename UC>
31
48
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
32
- uint64_t read_u64 (const char *chars) {
33
- if (cpp20_and_in_constexpr ()) {
49
+ uint64_t read8_to_u64 (const UC *chars) {
50
+ if (cpp20_and_in_constexpr () || !std::is_same<UC, char >::value ) {
34
51
uint64_t val = 0 ;
35
52
for (int i = 0 ; i < 8 ; ++i) {
36
- val |= uint64_t (*chars) << (i*8 );
53
+ val |= uint64_t (uint8_t ( *chars) ) << (i*8 );
37
54
++chars;
38
55
}
39
56
return val;
@@ -47,6 +64,39 @@ uint64_t read_u64(const char *chars) {
47
64
return val;
48
65
}
49
66
67
+ #ifdef FASTFLOAT_SSE2
68
+
69
+ fastfloat_really_inline
70
+ uint64_t simd_read8_to_u64 (const __m128i data) {
71
+ FASTFLOAT_SIMD_DISABLE_WARNINGS
72
+ const __m128i packed = _mm_packus_epi16 (data, data);
73
+ #ifdef FASTFLOAT_64BIT
74
+ return uint64_t (_mm_cvtsi128_si64 (packed));
75
+ #else
76
+ uint64_t value;
77
+ // Visual Studio + older versions of GCC don't support _mm_storeu_si64
78
+ _mm_storel_epi64 (reinterpret_cast <__m128i*>(&value), packed);
79
+ return value;
80
+ #endif
81
+ FASTFLOAT_SIMD_RESTORE_WARNINGS
82
+ }
83
+
84
+ fastfloat_really_inline
85
+ uint64_t simd_read8_to_u64 (const char16_t * chars) {
86
+ FASTFLOAT_SIMD_DISABLE_WARNINGS
87
+ return simd_read8_to_u64 (_mm_loadu_si128 (reinterpret_cast <const __m128i*>(chars)));
88
+ FASTFLOAT_SIMD_RESTORE_WARNINGS
89
+ }
90
+
91
+ #endif
92
+
93
+ // dummy for compile
94
+ template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
95
+ uint64_t simd_read8_to_u64 (UC const *) {
96
+ return 0 ;
97
+ }
98
+
99
+
50
100
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
51
101
void write_u64 (uint8_t *chars, uint64_t val) {
52
102
if (cpp20_and_in_constexpr ()) {
@@ -76,40 +126,80 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
76
126
return uint32_t (val);
77
127
}
78
128
79
- fastfloat_really_inline constexpr
80
- uint32_t parse_eight_digits_unrolled (const char16_t *) noexcept {
81
- return 0 ;
82
- }
83
-
84
- fastfloat_really_inline constexpr
85
- uint32_t parse_eight_digits_unrolled (const char32_t *) noexcept {
86
- return 0 ;
87
- }
88
129
130
+ // Call this if chars are definitely 8 digits.
131
+ template <typename UC>
89
132
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
90
- uint32_t parse_eight_digits_unrolled (const char *chars) noexcept {
91
- return parse_eight_digits_unrolled (read_u64 (chars));
133
+ uint32_t parse_eight_digits_unrolled (UC const * chars) noexcept {
134
+ if (cpp20_and_in_constexpr () || !has_simd_opt<UC>()) {
135
+ return parse_eight_digits_unrolled (read8_to_u64 (chars)); // truncation okay
136
+ }
137
+ return parse_eight_digits_unrolled (simd_read8_to_u64 (chars));
92
138
}
93
139
140
+
94
141
// credit @aqrit
95
- fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast (uint64_t val) noexcept {
142
+ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast (uint64_t val) noexcept {
96
143
return !((((val + 0x4646464646464646 ) | (val - 0x3030303030303030 )) &
97
144
0x8080808080808080 ));
98
145
}
99
146
100
- fastfloat_really_inline constexpr
101
- bool is_made_of_eight_digits_fast (const char16_t *) noexcept {
102
- return false ;
147
+
148
+ #ifdef FASTFLOAT_HAS_SIMD
149
+
150
+ // Call this if chars might not be 8 digits.
151
+ // Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
152
+ // ensures we don't load SIMD registers twice.
153
+ fastfloat_really_inline FASTFLOAT_CONSTEXPR20
154
+ bool simd_parse_if_eight_digits_unrolled (const char16_t * chars, uint64_t & i) noexcept {
155
+ if (cpp20_and_in_constexpr ()) {
156
+ return false ;
157
+ }
158
+ #ifdef FASTFLOAT_SSE2
159
+ FASTFLOAT_SIMD_DISABLE_WARNINGS
160
+ const __m128i data = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(chars));
161
+
162
+ // (x - '0') <= 9
163
+ // http://0x80.pl/articles/simd-parsing-int-sequences.html
164
+ const __m128i t0 = _mm_add_epi16 (data, _mm_set1_epi16 (32720 ));
165
+ const __m128i t1 = _mm_cmpgt_epi16 (t0, _mm_set1_epi16 (-32759 ));
166
+
167
+ if (_mm_movemask_epi8 (t1) == 0 ) {
168
+ i = i * 100000000 + parse_eight_digits_unrolled (simd_read8_to_u64 (data));
169
+ return true ;
170
+ }
171
+ else return false ;
172
+ FASTFLOAT_SIMD_RESTORE_WARNINGS
173
+ #endif
103
174
}
104
175
105
- fastfloat_really_inline constexpr
106
- bool is_made_of_eight_digits_fast (const char32_t *) noexcept {
107
- return false ;
176
+ #endif
177
+
178
+ // dummy for compile
179
+ template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
180
+ uint64_t simd_parse_if_eight_digits_unrolled (UC const *, uint64_t &) {
181
+ return 0 ;
182
+ }
183
+
184
+
185
+ template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char >::value)>
186
+ fastfloat_really_inline FASTFLOAT_CONSTEXPR20
187
+ void loop_parse_if_eight_digits (const UC*& p, const UC* const pend, uint64_t & i) {
188
+ if (!has_simd_opt<UC>()) {
189
+ return ;
190
+ }
191
+ while ((std::distance (p, pend) >= 8 ) && simd_parse_if_eight_digits_unrolled (p, i)) { // in rare cases, this will overflow, but that's ok
192
+ p += 8 ;
193
+ }
108
194
}
109
195
110
196
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
111
- bool is_made_of_eight_digits_fast (const char *chars) noexcept {
112
- return is_made_of_eight_digits_fast (read_u64 (chars));
197
+ void loop_parse_if_eight_digits (const char *& p, const char * const pend, uint64_t & i) {
198
+ // optimizes better than parse_if_eight_digits_unrolled() for UC = char.
199
+ while ((std::distance (p, pend) >= 8 ) && is_made_of_eight_digits_fast (read8_to_u64 (p))) {
200
+ i = i * 100000000 + parse_eight_digits_unrolled (read8_to_u64 (p)); // in rare cases, this will overflow, but that's ok
201
+ p += 8 ;
202
+ }
113
203
}
114
204
115
205
template <typename UC>
@@ -124,8 +214,10 @@ struct parsed_number_string_t {
124
214
span<const UC> integer{}; // non-nullable
125
215
span<const UC> fraction{}; // nullable
126
216
};
127
- using byte_span = span<char >;
217
+
218
+ using byte_span = span<const char >;
128
219
using parsed_number_string = parsed_number_string_t <char >;
220
+
129
221
// Assuming that you use no more than 19 digits, this will
130
222
// parse an ASCII string.
131
223
template <typename UC>
@@ -171,12 +263,8 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
171
263
UC const * before = p;
172
264
// can occur at most twice without overflowing, but let it occur more, since
173
265
// for integers with many digits, digit parsing is the primary bottleneck.
174
- if (std::is_same<UC,char >::value) {
175
- while ((std::distance (p, pend) >= 8 ) && is_made_of_eight_digits_fast (p)) {
176
- i = i * 100000000 + parse_eight_digits_unrolled (p); // in rare cases, this will overflow, but that's ok
177
- p += 8 ;
178
- }
179
- }
266
+ loop_parse_if_eight_digits (p, pend, i);
267
+
180
268
while ((p != pend) && is_integer (*p)) {
181
269
uint8_t digit = uint8_t (*p - UC (' 0' ));
182
270
++p;
@@ -241,29 +329,31 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
241
329
if (*start == UC (' 0' )) { digit_count --; }
242
330
start++;
243
331
}
332
+
244
333
if (digit_count > 19 ) {
245
334
answer.too_many_digits = true ;
246
335
// Let us start again, this time, avoiding overflows.
247
336
// We don't need to check if is_integer, since we use the
248
337
// pre-tokenized spans from above.
249
338
i = 0 ;
250
339
p = answer.integer .ptr ;
251
- UC const * int_end = p + answer.integer .len ();
252
- const uint64_t minimal_nineteen_digit_integer{1000000000000000000 };
253
- while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
340
+ UC const * int_end = p + answer.integer .len ();
341
+ const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 };
342
+ while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
254
343
i = i * 10 + uint64_t (*p - UC (' 0' ));
255
344
++p;
256
345
}
257
346
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
258
347
exponent = end_of_integer_part - p + exp_number;
259
- } else { // We have a value with a fractional component.
260
- p = answer.fraction .ptr ;
261
- UC const * frac_end = p + answer.fraction .len ();
262
- while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
263
- i = i * 10 + uint64_t (*p - UC (' 0' ));
264
- ++p;
265
- }
266
- exponent = answer.fraction .ptr - p + exp_number;
348
+ }
349
+ else { // We have a value with a fractional component.
350
+ p = answer.fraction .ptr ;
351
+ UC const * frac_end = p + answer.fraction .len ();
352
+ while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
353
+ i = i * 10 + uint64_t (*p - UC (' 0' ));
354
+ ++p;
355
+ }
356
+ exponent = answer.fraction .ptr - p + exp_number;
267
357
}
268
358
// We have now corrected both exponent and i, to a truncated value
269
359
}
0 commit comments