@@ -2396,7 +2396,6 @@ void drcbe_arm64::op_getflgs(a64::Assembler &a, const uml::instruction &inst)
2396
2396
void drcbe_arm64::op_setflgs (a64::Assembler &a, const uml::instruction &inst)
2397
2397
{
2398
2398
assert (inst.size () == 4 );
2399
- assert_no_condition (inst);
2400
2399
2401
2400
be_parameter flagsp (*this , inst.param (0 ), PTYPE_MRI);
2402
2401
@@ -3254,7 +3253,11 @@ void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst)
3254
3253
optimized = true ;
3255
3254
}
3256
3255
3257
- if (is_right_aligned)
3256
+ if (maskp.is_immediate_value (0 ))
3257
+ {
3258
+ a.mov (output, select_register (a64::xzr, inst.size ()));
3259
+ }
3260
+ else if (is_right_aligned)
3258
3261
{
3259
3262
// Optimize a contiguous right-aligned mask
3260
3263
const auto s2 = -int (s) & (instbits - 1 );
@@ -3372,7 +3375,13 @@ void drcbe_arm64::op_rolins(a64::Assembler &a, const uml::instruction &inst)
3372
3375
const bool is_contiguous = (invlamask & (invlamask + 1 )) == 0 ;
3373
3376
const auto s = shiftp.immediate () & (instbits - 1 );
3374
3377
3375
- if (is_right_aligned || is_contiguous)
3378
+ if (maskp.is_immediate_value (0 ))
3379
+ {
3380
+ mov_reg_param (a, inst.size (), dst, dstp);
3381
+
3382
+ optimized = true ;
3383
+ }
3384
+ else if (is_right_aligned || is_contiguous)
3376
3385
{
3377
3386
mov_reg_param (a, inst.size (), dst, dstp);
3378
3387
@@ -3836,12 +3845,14 @@ void drcbe_arm64::op_mulu(a64::Assembler &a, const uml::instruction &inst)
3836
3845
if (inst.size () == 8 )
3837
3846
{
3838
3847
a.mul (lo, src1, src2);
3839
- a.umulh (hi, src1, src2);
3848
+ if (compute_hi || inst.flags ())
3849
+ a.umulh (hi, src1, src2);
3840
3850
}
3841
3851
else
3842
3852
{
3843
3853
a.umull (lo, src1, src2);
3844
- a.lsr (hi, lo, 32 );
3854
+ if (compute_hi || inst.flags ())
3855
+ a.lsr (hi, lo, 32 );
3845
3856
}
3846
3857
}
3847
3858
@@ -3851,23 +3862,31 @@ void drcbe_arm64::op_mulu(a64::Assembler &a, const uml::instruction &inst)
3851
3862
3852
3863
if (inst.flags ())
3853
3864
{
3854
- a.mrs (SCRATCH_REG1, a64::Predicate::SysReg::kNZCV );
3865
+ if (inst.flags () & uml::FLAG_Z)
3866
+ {
3867
+ a.cmp (lo, 0 );
3868
+ a.ccmp (hi, 0 , 0 , a64::CondCode::kEQ );
3869
+ }
3855
3870
3856
- a.tst (lo, lo);
3857
- a.cset (TEMP_REG1, a64::CondCode::kEQ );
3858
- a.tst (hi, hi);
3859
- a.cset (TEMP_REG3, a64::CondCode::kEQ );
3860
- a.and_ (TEMP_REG1, TEMP_REG1, TEMP_REG3);
3861
- a.bfi (SCRATCH_REG1, TEMP_REG1, 30 , 1 ); // zero flag
3871
+ if (inst.flags () & (uml::FLAG_V | uml::FLAG_S))
3872
+ {
3873
+ a.mrs (SCRATCH_REG1, a64::Predicate::SysReg::kNZCV );
3862
3874
3863
- a.tst (hi, hi); // overflow check
3864
- a.cset (TEMP_REG3, a64::CondCode::kNE );
3865
- a.bfi (SCRATCH_REG1, TEMP_REG3, 28 , 1 ); // overflow flag
3875
+ if (inst.flags () & uml::FLAG_V)
3876
+ {
3877
+ a.tst (hi, hi); // overflow check
3878
+ a.cset (TEMP_REG3, a64::CondCode::kNE );
3879
+ a.bfi (SCRATCH_REG1, TEMP_REG3, 28 , 1 ); // overflow flag
3880
+ }
3866
3881
3867
- a.lsr (TEMP_REG3, hi, inst.size () * 8 - 1 ); // take top bit of result as sign flag
3868
- a.bfi (SCRATCH_REG1, TEMP_REG3, 31 , 1 ); // sign flag
3882
+ if (inst.flags () & uml::FLAG_S)
3883
+ {
3884
+ a.lsr (TEMP_REG3, hi, inst.size () * 8 - 1 ); // take top bit of result as sign flag
3885
+ a.bfi (SCRATCH_REG1, TEMP_REG3, 31 , 1 ); // sign flag
3886
+ }
3869
3887
3870
- a.msr (a64::Predicate::SysReg::kNZCV , SCRATCH_REG1);
3888
+ a.msr (a64::Predicate::SysReg::kNZCV , SCRATCH_REG1);
3889
+ }
3871
3890
3872
3891
m_carry_state = carry_state::POISON;
3873
3892
}
@@ -3963,12 +3982,14 @@ void drcbe_arm64::op_muls(a64::Assembler &a, const uml::instruction &inst)
3963
3982
if (inst.size () == 8 )
3964
3983
{
3965
3984
a.mul (lo, src1, src2);
3966
- a.smulh (hi, src1, src2);
3985
+ if (compute_hi || inst.flags ())
3986
+ a.smulh (hi, src1, src2);
3967
3987
}
3968
3988
else
3969
3989
{
3970
3990
a.smull (lo, src1, src2);
3971
- a.lsr (hi, lo, 32 );
3991
+ if (compute_hi || inst.flags ())
3992
+ a.lsr (hi, lo, 32 );
3972
3993
}
3973
3994
}
3974
3995
@@ -3978,33 +3999,41 @@ void drcbe_arm64::op_muls(a64::Assembler &a, const uml::instruction &inst)
3978
3999
3979
4000
if (inst.flags ())
3980
4001
{
3981
- a.mrs (SCRATCH_REG1, a64::Predicate::SysReg::kNZCV );
3982
-
3983
- a.tst (lo, lo);
3984
- a.cset (TEMP_REG1, a64::CondCode::kEQ );
3985
- a.tst (hi, hi);
3986
- a.cset (SCRATCH_REG2, a64::CondCode::kEQ );
3987
- a.and_ (TEMP_REG1, TEMP_REG1, SCRATCH_REG2);
3988
- a.bfi (SCRATCH_REG1, TEMP_REG1, 30 , 1 ); // zero flag
3989
-
3990
- if (inst.size () == 4 )
4002
+ if (inst.flags () & uml::FLAG_Z)
3991
4003
{
3992
- a.sxtw (TEMP_REG1, lo. w () );
3993
- a.cmp (TEMP_REG1, lo );
4004
+ a.cmp (lo, 0 );
4005
+ a.ccmp (hi, 0 , 0 , a64::CondCode:: kEQ );
3994
4006
}
3995
- else
4007
+
4008
+ if (inst.flags () & (uml::FLAG_V | uml::FLAG_S))
3996
4009
{
3997
- a.asr (TEMP_REG1, lo, 63 );
3998
- a.cmp (TEMP_REG1, hi);
3999
- }
4010
+ a.mrs (SCRATCH_REG1, a64::Predicate::SysReg::kNZCV );
4000
4011
4001
- a.cset (TEMP_REG1, a64::CondCode::kNE );
4002
- a.bfi (SCRATCH_REG1, TEMP_REG1, 28 , 1 ); // overflow flag
4012
+ if (inst.flags () & uml::FLAG_V)
4013
+ {
4014
+ if (inst.size () == 4 )
4015
+ {
4016
+ a.sxtw (TEMP_REG1, lo.w ());
4017
+ a.cmp (TEMP_REG1, lo);
4018
+ }
4019
+ else
4020
+ {
4021
+ a.asr (TEMP_REG1, lo, 63 );
4022
+ a.cmp (TEMP_REG1, hi);
4023
+ }
4003
4024
4004
- a.lsr (TEMP_REG1, hi, inst.size () * 8 - 1 ); // take top bit of result as sign flag
4005
- a.bfi (SCRATCH_REG1, TEMP_REG1, 31 , 1 ); // sign flag
4025
+ a.cset (TEMP_REG1, a64::CondCode::kNE );
4026
+ a.bfi (SCRATCH_REG1, TEMP_REG1, 28 , 1 ); // overflow flag
4027
+ }
4006
4028
4007
- a.msr (a64::Predicate::SysReg::kNZCV , SCRATCH_REG1);
4029
+ if (inst.flags () & uml::FLAG_S)
4030
+ {
4031
+ a.lsr (TEMP_REG1, hi, inst.size () * 8 - 1 ); // take top bit of result as sign flag
4032
+ a.bfi (SCRATCH_REG1, TEMP_REG1, 31 , 1 ); // sign flag
4033
+ }
4034
+
4035
+ a.msr (a64::Predicate::SysReg::kNZCV , SCRATCH_REG1);
4036
+ }
4008
4037
4009
4038
m_carry_state = carry_state::POISON;
4010
4039
}
0 commit comments