Skip to content

Commit 94e3ee3

Browse files
committed
Recompiler improvements:
* cpu/drcbearm64.cpp, cpu/drcbex64.cpp: Optimised MULU/MULS flag calculation. * cpu/drcbearm64.cpp: Handle degenerate forms of ROLAND/ROLINS when simplifier is disabled. * cpu/drcbearm64.cpp, cpu/drcbex64.cpp, cpu/drcbex86.cpp: Removed assert that made SETFLGS impossible to use in debug builds.
1 parent bb3568a commit 94e3ee3

File tree

3 files changed

+147
-124
lines changed

3 files changed

+147
-124
lines changed

src/devices/cpu/drcbearm64.cpp

Lines changed: 70 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2396,7 +2396,6 @@ void drcbe_arm64::op_getflgs(a64::Assembler &a, const uml::instruction &inst)
23962396
void drcbe_arm64::op_setflgs(a64::Assembler &a, const uml::instruction &inst)
23972397
{
23982398
assert(inst.size() == 4);
2399-
assert_no_condition(inst);
24002399

24012400
be_parameter flagsp(*this, inst.param(0), PTYPE_MRI);
24022401

@@ -3254,7 +3253,11 @@ void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst)
32543253
optimized = true;
32553254
}
32563255

3257-
if (is_right_aligned)
3256+
if (maskp.is_immediate_value(0))
3257+
{
3258+
a.mov(output, select_register(a64::xzr, inst.size()));
3259+
}
3260+
else if (is_right_aligned)
32583261
{
32593262
// Optimize a contiguous right-aligned mask
32603263
const auto s2 = -int(s) & (instbits - 1);
@@ -3372,7 +3375,13 @@ void drcbe_arm64::op_rolins(a64::Assembler &a, const uml::instruction &inst)
33723375
const bool is_contiguous = (invlamask & (invlamask + 1)) == 0;
33733376
const auto s = shiftp.immediate() & (instbits - 1);
33743377

3375-
if (is_right_aligned || is_contiguous)
3378+
if (maskp.is_immediate_value(0))
3379+
{
3380+
mov_reg_param(a, inst.size(), dst, dstp);
3381+
3382+
optimized = true;
3383+
}
3384+
else if (is_right_aligned || is_contiguous)
33763385
{
33773386
mov_reg_param(a, inst.size(), dst, dstp);
33783387

@@ -3836,12 +3845,14 @@ void drcbe_arm64::op_mulu(a64::Assembler &a, const uml::instruction &inst)
38363845
if (inst.size() == 8)
38373846
{
38383847
a.mul(lo, src1, src2);
3839-
a.umulh(hi, src1, src2);
3848+
if (compute_hi || inst.flags())
3849+
a.umulh(hi, src1, src2);
38403850
}
38413851
else
38423852
{
38433853
a.umull(lo, src1, src2);
3844-
a.lsr(hi, lo, 32);
3854+
if (compute_hi || inst.flags())
3855+
a.lsr(hi, lo, 32);
38453856
}
38463857
}
38473858

@@ -3851,23 +3862,31 @@ void drcbe_arm64::op_mulu(a64::Assembler &a, const uml::instruction &inst)
38513862

38523863
if (inst.flags())
38533864
{
3854-
a.mrs(SCRATCH_REG1, a64::Predicate::SysReg::kNZCV);
3865+
if (inst.flags() & uml::FLAG_Z)
3866+
{
3867+
a.cmp(lo, 0);
3868+
a.ccmp(hi, 0, 0, a64::CondCode::kEQ);
3869+
}
38553870

3856-
a.tst(lo, lo);
3857-
a.cset(TEMP_REG1, a64::CondCode::kEQ);
3858-
a.tst(hi, hi);
3859-
a.cset(TEMP_REG3, a64::CondCode::kEQ);
3860-
a.and_(TEMP_REG1, TEMP_REG1, TEMP_REG3);
3861-
a.bfi(SCRATCH_REG1, TEMP_REG1, 30, 1); // zero flag
3871+
if (inst.flags() & (uml::FLAG_V | uml::FLAG_S))
3872+
{
3873+
a.mrs(SCRATCH_REG1, a64::Predicate::SysReg::kNZCV);
38623874

3863-
a.tst(hi, hi); // overflow check
3864-
a.cset(TEMP_REG3, a64::CondCode::kNE);
3865-
a.bfi(SCRATCH_REG1, TEMP_REG3, 28, 1); // overflow flag
3875+
if (inst.flags() & uml::FLAG_V)
3876+
{
3877+
a.tst(hi, hi); // overflow check
3878+
a.cset(TEMP_REG3, a64::CondCode::kNE);
3879+
a.bfi(SCRATCH_REG1, TEMP_REG3, 28, 1); // overflow flag
3880+
}
38663881

3867-
a.lsr(TEMP_REG3, hi, inst.size() * 8 - 1); // take top bit of result as sign flag
3868-
a.bfi(SCRATCH_REG1, TEMP_REG3, 31, 1); // sign flag
3882+
if (inst.flags() & uml::FLAG_S)
3883+
{
3884+
a.lsr(TEMP_REG3, hi, inst.size() * 8 - 1); // take top bit of result as sign flag
3885+
a.bfi(SCRATCH_REG1, TEMP_REG3, 31, 1); // sign flag
3886+
}
38693887

3870-
a.msr(a64::Predicate::SysReg::kNZCV, SCRATCH_REG1);
3888+
a.msr(a64::Predicate::SysReg::kNZCV, SCRATCH_REG1);
3889+
}
38713890

38723891
m_carry_state = carry_state::POISON;
38733892
}
@@ -3963,12 +3982,14 @@ void drcbe_arm64::op_muls(a64::Assembler &a, const uml::instruction &inst)
39633982
if (inst.size() == 8)
39643983
{
39653984
a.mul(lo, src1, src2);
3966-
a.smulh(hi, src1, src2);
3985+
if (compute_hi || inst.flags())
3986+
a.smulh(hi, src1, src2);
39673987
}
39683988
else
39693989
{
39703990
a.smull(lo, src1, src2);
3971-
a.lsr(hi, lo, 32);
3991+
if (compute_hi || inst.flags())
3992+
a.lsr(hi, lo, 32);
39723993
}
39733994
}
39743995

@@ -3978,33 +3999,41 @@ void drcbe_arm64::op_muls(a64::Assembler &a, const uml::instruction &inst)
39783999

39794000
if (inst.flags())
39804001
{
3981-
a.mrs(SCRATCH_REG1, a64::Predicate::SysReg::kNZCV);
3982-
3983-
a.tst(lo, lo);
3984-
a.cset(TEMP_REG1, a64::CondCode::kEQ);
3985-
a.tst(hi, hi);
3986-
a.cset(SCRATCH_REG2, a64::CondCode::kEQ);
3987-
a.and_(TEMP_REG1, TEMP_REG1, SCRATCH_REG2);
3988-
a.bfi(SCRATCH_REG1, TEMP_REG1, 30, 1); // zero flag
3989-
3990-
if (inst.size() == 4)
4002+
if (inst.flags() & uml::FLAG_Z)
39914003
{
3992-
a.sxtw(TEMP_REG1, lo.w());
3993-
a.cmp(TEMP_REG1, lo);
4004+
a.cmp(lo, 0);
4005+
a.ccmp(hi, 0, 0, a64::CondCode::kEQ);
39944006
}
3995-
else
4007+
4008+
if (inst.flags() & (uml::FLAG_V | uml::FLAG_S))
39964009
{
3997-
a.asr(TEMP_REG1, lo, 63);
3998-
a.cmp(TEMP_REG1, hi);
3999-
}
4010+
a.mrs(SCRATCH_REG1, a64::Predicate::SysReg::kNZCV);
40004011

4001-
a.cset(TEMP_REG1, a64::CondCode::kNE);
4002-
a.bfi(SCRATCH_REG1, TEMP_REG1, 28, 1); // overflow flag
4012+
if (inst.flags() & uml::FLAG_V)
4013+
{
4014+
if (inst.size() == 4)
4015+
{
4016+
a.sxtw(TEMP_REG1, lo.w());
4017+
a.cmp(TEMP_REG1, lo);
4018+
}
4019+
else
4020+
{
4021+
a.asr(TEMP_REG1, lo, 63);
4022+
a.cmp(TEMP_REG1, hi);
4023+
}
40034024

4004-
a.lsr(TEMP_REG1, hi, inst.size() * 8 - 1); // take top bit of result as sign flag
4005-
a.bfi(SCRATCH_REG1, TEMP_REG1, 31, 1); // sign flag
4025+
a.cset(TEMP_REG1, a64::CondCode::kNE);
4026+
a.bfi(SCRATCH_REG1, TEMP_REG1, 28, 1); // overflow flag
4027+
}
40064028

4007-
a.msr(a64::Predicate::SysReg::kNZCV, SCRATCH_REG1);
4029+
if (inst.flags() & uml::FLAG_S)
4030+
{
4031+
a.lsr(TEMP_REG1, hi, inst.size() * 8 - 1); // take top bit of result as sign flag
4032+
a.bfi(SCRATCH_REG1, TEMP_REG1, 31, 1); // sign flag
4033+
}
4034+
4035+
a.msr(a64::Predicate::SysReg::kNZCV, SCRATCH_REG1);
4036+
}
40084037

40094038
m_carry_state = carry_state::POISON;
40104039
}

0 commit comments

Comments
 (0)