Skip to content

Commit 079911c

Browse files
committed
Merge remote-tracking branch 'remotes/rth/tags/pull-fpu-20181005' into staging
Testing infrastructure for softfpu (not run by default). Drop countLeadingZeros. Fix div_floats. Add udiv_qrnnd specializations for x86_64, s390x, ppc64 hosts. # gpg: Signature made Fri 05 Oct 2018 19:00:09 BST # gpg: using RSA key 64DF38E8AF7E215F # gpg: Good signature from "Richard Henderson <[email protected]>" # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth/tags/pull-fpu-20181005: softfloat: Specialize udiv_qrnnd for ppc64 softfloat: Specialize udiv_qrnnd for s390x softfloat: Specialize udiv_qrnnd for x86_64 softfloat: Fix division softfloat: Replace countLeadingZeros32/64 with clz32/64 tests/fp/fp-test: add floating point tests gitmodules: add berkeley's softfloat + testfloat version 3 softfloat: remove float64_trunc_to_int Signed-off-by: Peter Maydell <[email protected]>
2 parents bb94c11 + 27ae510 commit 079911c

File tree

13 files changed

+2392
-125
lines changed

13 files changed

+2392
-125
lines changed

.gitmodules

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,9 @@
4343
[submodule "roms/u-boot-sam460ex"]
4444
path = roms/u-boot-sam460ex
4545
url = git://git.qemu.org/u-boot-sam460ex.git
46+
[submodule "tests/fp/berkeley-testfloat-3"]
47+
path = tests/fp/berkeley-testfloat-3
48+
url = git://github.com/cota/berkeley-testfloat-3
49+
[submodule "tests/fp/berkeley-softfloat-3"]
50+
path = tests/fp/berkeley-softfloat-3
51+
url = git://github.com/cota/berkeley-softfloat-3

configure

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,8 @@ if test -e "$source_path/.git"
296296
then
297297
git_update=yes
298298
git_submodules="ui/keycodemapdb"
299+
git_submodules="$git_submodules tests/fp/berkeley-testfloat-3"
300+
git_submodules="$git_submodules tests/fp/berkeley-softfloat-3"
299301
else
300302
git_update=no
301303
git_submodules=""
@@ -7449,12 +7451,14 @@ fi
74497451

74507452
# build tree in object directory in case the source is not in the current directory
74517453
DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests tests/vm"
7454+
DIRS="$DIRS tests/fp"
74527455
DIRS="$DIRS docs docs/interop fsdev scsi"
74537456
DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
74547457
DIRS="$DIRS roms/seabios roms/vgabios"
74557458
FILES="Makefile tests/tcg/Makefile qdict-test-data.txt"
74567459
FILES="$FILES tests/tcg/cris/Makefile tests/tcg/cris/.gdbinit"
74577460
FILES="$FILES tests/tcg/lm32/Makefile tests/tcg/xtensa/Makefile po/Makefile"
7461+
FILES="$FILES tests/fp/Makefile"
74587462
FILES="$FILES pc-bios/optionrom/Makefile pc-bios/keymaps"
74597463
FILES="$FILES pc-bios/spapr-rtas/Makefile"
74607464
FILES="$FILES pc-bios/s390-ccw/Makefile"

fpu/softfloat.c

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,19 +1112,38 @@ static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
11121112
bool sign = a.sign ^ b.sign;
11131113

11141114
if (a.cls == float_class_normal && b.cls == float_class_normal) {
1115-
uint64_t temp_lo, temp_hi;
1115+
uint64_t n0, n1, q, r;
11161116
int exp = a.exp - b.exp;
1117+
1118+
/*
1119+
* We want a 2*N / N-bit division to produce exactly an N-bit
1120+
* result, so that we do not lose any precision and so that we
1121+
* do not have to renormalize afterward. If A.frac < B.frac,
1122+
* then division would produce an (N-1)-bit result; shift A left
1123+
* by one to produce the an N-bit result, and decrement the
1124+
* exponent to match.
1125+
*
1126+
* The udiv_qrnnd algorithm that we're using requires normalization,
1127+
* i.e. the msb of the denominator must be set. Since we know that
1128+
* DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1129+
* by one (more), and the remainder must be shifted right by one.
1130+
*/
11171131
if (a.frac < b.frac) {
11181132
exp -= 1;
1119-
shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
1120-
&temp_hi, &temp_lo);
1133+
shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
11211134
} else {
1122-
shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
1123-
&temp_hi, &temp_lo);
1135+
shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
11241136
}
1125-
/* LSB of quot is set if inexact which roundandpack will use
1126-
* to set flags. Yet again we re-use a for the result */
1127-
a.frac = div128To64(temp_lo, temp_hi, b.frac);
1137+
q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1138+
1139+
/*
1140+
* Set lsb if there is a remainder, to set inexact.
1141+
* As mentioned above, to find the actual value of the remainder we
1142+
* would need to shift right, but (1) we are only concerned about
1143+
* non-zero-ness, and (2) the remainder will always be even because
1144+
* both inputs to the division primitive are even.
1145+
*/
1146+
a.frac = q | (r != 0);
11281147
a.sign = sign;
11291148
a.exp = exp;
11301149
return a;
@@ -1409,13 +1428,6 @@ float64 float64_round_to_int(float64 a, float_status *s)
14091428
return float64_round_pack_canonical(pr, s);
14101429
}
14111430

1412-
float64 float64_trunc_to_int(float64 a, float_status *s)
1413-
{
1414-
FloatParts pa = float64_unpack_canonical(a, s);
1415-
FloatParts pr = round_to_int(pa, float_round_to_zero, 0, s);
1416-
return float64_round_pack_canonical(pr, s);
1417-
}
1418-
14191431
/*
14201432
* Returns the result of converting the floating-point value `a' to
14211433
* the two's complement integer format. The conversion is performed
@@ -2690,7 +2702,7 @@ static void
26902702
{
26912703
int8_t shiftCount;
26922704

2693-
shiftCount = countLeadingZeros32( aSig ) - 8;
2705+
shiftCount = clz32(aSig) - 8;
26942706
*zSigPtr = aSig<<shiftCount;
26952707
*zExpPtr = 1 - shiftCount;
26962708

@@ -2798,7 +2810,7 @@ static float32
27982810
{
27992811
int8_t shiftCount;
28002812

2801-
shiftCount = countLeadingZeros32( zSig ) - 1;
2813+
shiftCount = clz32(zSig) - 1;
28022814
return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
28032815
status);
28042816

@@ -2831,7 +2843,7 @@ static void
28312843
{
28322844
int8_t shiftCount;
28332845

2834-
shiftCount = countLeadingZeros64( aSig ) - 11;
2846+
shiftCount = clz64(aSig) - 11;
28352847
*zSigPtr = aSig<<shiftCount;
28362848
*zExpPtr = 1 - shiftCount;
28372849

@@ -2969,7 +2981,7 @@ static float64
29692981
{
29702982
int8_t shiftCount;
29712983

2972-
shiftCount = countLeadingZeros64( zSig ) - 1;
2984+
shiftCount = clz64(zSig) - 1;
29732985
return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
29742986
status);
29752987

@@ -2987,7 +2999,7 @@ void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
29872999
{
29883000
int8_t shiftCount;
29893001

2990-
shiftCount = countLeadingZeros64( aSig );
3002+
shiftCount = clz64(aSig);
29913003
*zSigPtr = aSig<<shiftCount;
29923004
*zExpPtr = 1 - shiftCount;
29933005
}
@@ -3226,7 +3238,7 @@ floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
32263238
zSig1 = 0;
32273239
zExp -= 64;
32283240
}
3229-
shiftCount = countLeadingZeros64( zSig0 );
3241+
shiftCount = clz64(zSig0);
32303242
shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
32313243
zExp -= shiftCount;
32323244
return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
@@ -3303,7 +3315,7 @@ static void
33033315
int8_t shiftCount;
33043316

33053317
if ( aSig0 == 0 ) {
3306-
shiftCount = countLeadingZeros64( aSig1 ) - 15;
3318+
shiftCount = clz64(aSig1) - 15;
33073319
if ( shiftCount < 0 ) {
33083320
*zSig0Ptr = aSig1>>( - shiftCount );
33093321
*zSig1Ptr = aSig1<<( shiftCount & 63 );
@@ -3315,7 +3327,7 @@ static void
33153327
*zExpPtr = - shiftCount - 63;
33163328
}
33173329
else {
3318-
shiftCount = countLeadingZeros64( aSig0 ) - 15;
3330+
shiftCount = clz64(aSig0) - 15;
33193331
shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
33203332
*zExpPtr = 1 - shiftCount;
33213333
}
@@ -3504,7 +3516,7 @@ static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
35043516
zSig1 = 0;
35053517
zExp -= 64;
35063518
}
3507-
shiftCount = countLeadingZeros64( zSig0 ) - 15;
3519+
shiftCount = clz64(zSig0) - 15;
35083520
if ( 0 <= shiftCount ) {
35093521
zSig2 = 0;
35103522
shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
@@ -3536,7 +3548,7 @@ floatx80 int32_to_floatx80(int32_t a, float_status *status)
35363548
if ( a == 0 ) return packFloatx80( 0, 0, 0 );
35373549
zSign = ( a < 0 );
35383550
absA = zSign ? - a : a;
3539-
shiftCount = countLeadingZeros32( absA ) + 32;
3551+
shiftCount = clz32(absA) + 32;
35403552
zSig = absA;
35413553
return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
35423554

@@ -3558,7 +3570,7 @@ float128 int32_to_float128(int32_t a, float_status *status)
35583570
if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
35593571
zSign = ( a < 0 );
35603572
absA = zSign ? - a : a;
3561-
shiftCount = countLeadingZeros32( absA ) + 17;
3573+
shiftCount = clz32(absA) + 17;
35623574
zSig0 = absA;
35633575
return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
35643576

@@ -3580,7 +3592,7 @@ floatx80 int64_to_floatx80(int64_t a, float_status *status)
35803592
if ( a == 0 ) return packFloatx80( 0, 0, 0 );
35813593
zSign = ( a < 0 );
35823594
absA = zSign ? - a : a;
3583-
shiftCount = countLeadingZeros64( absA );
3595+
shiftCount = clz64(absA);
35843596
return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
35853597

35863598
}
@@ -3602,7 +3614,7 @@ float128 int64_to_float128(int64_t a, float_status *status)
36023614
if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
36033615
zSign = ( a < 0 );
36043616
absA = zSign ? - a : a;
3605-
shiftCount = countLeadingZeros64( absA ) + 49;
3617+
shiftCount = clz64(absA) + 49;
36063618
zExp = 0x406E - shiftCount;
36073619
if ( 64 <= shiftCount ) {
36083620
zSig1 = 0;

include/fpu/softfloat-macros.h

Lines changed: 53 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -79,17 +79,6 @@ this code that are retained.
7979
* version 2 or later. See the COPYING file in the top-level directory.
8080
*/
8181

82-
/*----------------------------------------------------------------------------
83-
| This macro tests for minimum version of the GNU C compiler.
84-
*----------------------------------------------------------------------------*/
85-
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
86-
# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
87-
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
88-
#else
89-
# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
90-
#endif
91-
92-
9382
/*----------------------------------------------------------------------------
9483
| Shifts `a' right by the number of bits given in `count'. If any nonzero
9584
| bits are shifted off, they are ``jammed'' into the least significant bit of
@@ -340,15 +329,30 @@ static inline void
340329
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
341330
*----------------------------------------------------------------------------*/
342331

343-
static inline void
344-
shortShift128Left(
345-
uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
332+
static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count,
333+
uint64_t *z0Ptr, uint64_t *z1Ptr)
346334
{
335+
*z1Ptr = a1 << count;
336+
*z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
337+
}
347338

348-
*z1Ptr = a1<<count;
349-
*z0Ptr =
350-
( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
339+
/*----------------------------------------------------------------------------
340+
| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
341+
| number of bits given in `count'. Any bits shifted off are lost. The value
342+
| of `count' may be greater than 64. The result is broken into two 64-bit
343+
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
344+
*----------------------------------------------------------------------------*/
351345

346+
static inline void shift128Left(uint64_t a0, uint64_t a1, int count,
347+
uint64_t *z0Ptr, uint64_t *z1Ptr)
348+
{
349+
if (count < 64) {
350+
*z1Ptr = a1 << count;
351+
*z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
352+
} else {
353+
*z1Ptr = 0;
354+
*z0Ptr = a1 << (count - 64);
355+
}
352356
}
353357

354358
/*----------------------------------------------------------------------------
@@ -630,8 +634,36 @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
630634
*
631635
* Licensed under the GPLv2/LGPLv3
632636
*/
633-
static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d)
637+
static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
638+
uint64_t n0, uint64_t d)
634639
{
640+
#if defined(__x86_64__)
641+
uint64_t q;
642+
asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
643+
return q;
644+
#elif defined(__s390x__)
645+
/* Need to use a TImode type to get an even register pair for DLGR. */
646+
unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
647+
asm("dlgr %0, %1" : "+r"(n) : "r"(d));
648+
*r = n >> 64;
649+
return n;
650+
#elif defined(_ARCH_PPC64)
651+
/* From Power ISA 3.0B, programming note for divdeu. */
652+
uint64_t q1, q2, Q, r1, r2, R;
653+
asm("divdeu %0,%2,%4; divdu %1,%3,%4"
654+
: "=&r"(q1), "=r"(q2)
655+
: "r"(n1), "r"(n0), "r"(d));
656+
r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
657+
r2 = n0 - (q2 * d);
658+
Q = q1 + q2;
659+
R = r1 + r2;
660+
if (R >= d || R < r2) { /* overflow implies R > d */
661+
Q += 1;
662+
R -= d;
663+
}
664+
*r = R;
665+
return Q;
666+
#else
635667
uint64_t d0, d1, q0, q1, r1, r0, m;
636668

637669
d0 = (uint32_t)d;
@@ -669,8 +701,9 @@ static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d)
669701
}
670702
r0 -= m;
671703

672-
/* Return remainder in LSB */
673-
return (q1 << 32) | q0 | (r0 != 0);
704+
*r = r0;
705+
return (q1 << 32) | q0;
706+
#endif
674707
}
675708

676709
/*----------------------------------------------------------------------------
@@ -712,82 +745,6 @@ static inline uint32_t estimateSqrt32(int aExp, uint32_t a)
712745

713746
}
714747

715-
/*----------------------------------------------------------------------------
716-
| Returns the number of leading 0 bits before the most-significant 1 bit of
717-
| `a'. If `a' is zero, 32 is returned.
718-
*----------------------------------------------------------------------------*/
719-
720-
static inline int8_t countLeadingZeros32(uint32_t a)
721-
{
722-
#if SOFTFLOAT_GNUC_PREREQ(3, 4)
723-
if (a) {
724-
return __builtin_clz(a);
725-
} else {
726-
return 32;
727-
}
728-
#else
729-
static const int8_t countLeadingZerosHigh[] = {
730-
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
731-
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
732-
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
733-
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
734-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
735-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
736-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
737-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
738-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
739-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
740-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
741-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
742-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
743-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
744-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
745-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
746-
};
747-
int8_t shiftCount;
748-
749-
shiftCount = 0;
750-
if ( a < 0x10000 ) {
751-
shiftCount += 16;
752-
a <<= 16;
753-
}
754-
if ( a < 0x1000000 ) {
755-
shiftCount += 8;
756-
a <<= 8;
757-
}
758-
shiftCount += countLeadingZerosHigh[ a>>24 ];
759-
return shiftCount;
760-
#endif
761-
}
762-
763-
/*----------------------------------------------------------------------------
764-
| Returns the number of leading 0 bits before the most-significant 1 bit of
765-
| `a'. If `a' is zero, 64 is returned.
766-
*----------------------------------------------------------------------------*/
767-
768-
static inline int8_t countLeadingZeros64(uint64_t a)
769-
{
770-
#if SOFTFLOAT_GNUC_PREREQ(3, 4)
771-
if (a) {
772-
return __builtin_clzll(a);
773-
} else {
774-
return 64;
775-
}
776-
#else
777-
int8_t shiftCount;
778-
779-
shiftCount = 0;
780-
if ( a < ( (uint64_t) 1 )<<32 ) {
781-
shiftCount += 32;
782-
}
783-
else {
784-
a >>= 32;
785-
}
786-
shiftCount += countLeadingZeros32( a );
787-
return shiftCount;
788-
#endif
789-
}
790-
791748
/*----------------------------------------------------------------------------
792749
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
793750
| is equal to the 128-bit value formed by concatenating `b0' and `b1'.

0 commit comments

Comments
 (0)