Skip to content

Commit ffe715a

Browse files
committed
implement floor and ceil in assembly on i586
1 parent e8cfc94 commit ffe715a

File tree

2 files changed

+56
-48
lines changed

2 files changed

+56
-48
lines changed

libm-test/src/precision.rs

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
271271

272272
impl MaybeOverride<(f64,)> for SpecialCase {
273273
fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
274-
if cfg!(x86_no_sse)
275-
&& ctx.base_name == BaseName::Ceil
276-
&& ctx.basis == CheckBasis::Musl
277-
&& input.0 < 0.0
278-
&& input.0 > -1.0
279-
&& expected == F::ZERO
280-
&& actual == F::ZERO
281-
{
282-
// musl returns -0.0, we return +0.0
283-
return XFAIL("i586 ceil signed zero");
284-
}
285-
286274
if cfg!(x86_no_sse)
287275
&& (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
288276
&& (expected - actual).abs() <= F::ONE
@@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase {
292280
return XFAIL("i586 rint rounding mode");
293281
}
294282

295-
if cfg!(x86_no_sse)
296-
&& (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
297-
&& expected.eq_repr(F::NEG_ZERO)
298-
&& actual.eq_repr(F::ZERO)
299-
{
300-
// FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
301-
// See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
302-
return XFAIL("i586 ceil/floor signed zero");
303-
}
304-
305283
if cfg!(x86_no_sse)
306284
&& (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
307285
{

libm/src/math/arch/i586.rs

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,67 @@
11
//! Architecture-specific support for x86-32 without SSE2
22
3-
use super::super::fabs;
4-
53
/// Use an alternative implementation on x86, because the
64
/// main implementation fails with the x87 FPU used by
75
/// debian i386, probably due to excess precision issues.
8-
/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
9-
pub fn ceil(x: f64) -> f64 {
10-
if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
11-
let truncated = x as i64 as f64;
12-
if truncated < x {
13-
return truncated + 1.0;
14-
} else {
15-
return truncated;
16-
}
17-
} else {
18-
return x;
19-
}
6+
///
7+
/// Based on https://github.com/NetBSD/src/blob/trunk/lib/libm/arch/i387/s_ceil.S
8+
/// (written by J.T. Conklin <[email protected]>).
9+
#[unsafe(naked)]
10+
pub extern "C" fn ceil(_: f64) -> f64 {
11+
core::arch::naked_asm!(
12+
"pushl %ebp",
13+
"movl %esp,%ebp",
14+
"subl $8,%esp",
15+
// Store fpu control word.
16+
"fstcw -4(%ebp)",
17+
"movw -4(%ebp),%dx",
18+
// Round towards +oo.
19+
"orw $0x0800,%dx",
20+
"andw $0xfbff,%dx",
21+
"movw %dx,-8(%ebp)",
22+
// Load modified control word
23+
"fldcw -8(%ebp)",
24+
// Round.
25+
"fldl 8(%ebp)",
26+
"frndint",
27+
// Restore original control word.
28+
"fldcw -4(%ebp)",
29+
// Restore esp and ebp and return
30+
"leave",
31+
"ret",
32+
options(att_syntax)
33+
)
2034
}
2135

2236
/// Use an alternative implementation on x86, because the
2337
/// main implementation fails with the x87 FPU used by
2438
/// debian i386, probably due to excess precision issues.
25-
/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
26-
pub fn floor(x: f64) -> f64 {
27-
if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
28-
let truncated = x as i64 as f64;
29-
if truncated > x {
30-
return truncated - 1.0;
31-
} else {
32-
return truncated;
33-
}
34-
} else {
35-
return x;
36-
}
39+
///
40+
/// Based on https://github.com/NetBSD/src/blob/trunk/lib/libm/arch/i387/s_floor.S
41+
/// (written by J.T. Conklin <[email protected]>).
42+
#[unsafe(naked)]
43+
pub extern "C" fn floor(_: f64) -> f64 {
44+
core::arch::naked_asm!(
45+
"pushl %ebp",
46+
"movl %esp,%ebp",
47+
"subl $8,%esp",
48+
// Store fpu control word.
49+
"fstcw -4(%ebp)",
50+
"movw -4(%ebp),%dx",
51+
// Round towards -oo.
52+
"orw $0x0400,%dx",
53+
"andw $0xf7ff,%dx",
54+
"movw %dx,-8(%ebp)",
55+
// Load modified control word
56+
"fldcw -8(%ebp)",
57+
// Round.
58+
"fldl 8(%ebp)",
59+
"frndint",
60+
// Restore original control word.
61+
"fldcw -4(%ebp)",
62+
// Restore esp and ebp and return
63+
"leave",
64+
"ret",
65+
options(att_syntax)
66+
)
3767
}

0 commit comments

Comments
 (0)