@@ -1372,11 +1372,6 @@ def BREV64 :
1372
1372
// restriction in PTX?
1373
1373
//
1374
1374
// dest and src may be int32 or int64, but start and end are always int32.
1375
- def SDTBFE :
1376
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
1377
- SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
1378
- def bfe : SDNode<"NVPTXISD::BFE", SDTBFE>;
1379
-
1380
1375
def SDTBFI :
1381
1376
SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1382
1377
SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
@@ -1387,22 +1382,13 @@ def SDTPRMT :
1387
1382
SDTCisVT<2, i32>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
1388
1383
def prmt : SDNode<"NVPTXISD::PRMT", SDTPRMT>;
1389
1384
1390
- multiclass BFE<string Instr, ValueType T, RegisterClass RC> {
1385
+ multiclass BFE<string Instr, RegisterClass RC> {
1391
1386
def rrr
1392
- : BasicNVPTXInst<(outs RC:$d),
1393
- (ins RC:$a, B32:$b, B32:$c),
1394
- Instr,
1395
- [(set T:$d, (bfe T:$a, i32:$b, i32:$c))]>;
1387
+ : BasicNVPTXInst<(outs RC:$d), (ins RC:$a, B32:$b, B32:$c), Instr>;
1396
1388
def rri
1397
- : BasicNVPTXInst<(outs RC:$d),
1398
- (ins RC:$a, B32:$b, i32imm:$c),
1399
- Instr,
1400
- [(set T:$d, (bfe T:$a, i32:$b, imm:$c))]>;
1389
+ : BasicNVPTXInst<(outs RC:$d), (ins RC:$a, B32:$b, i32imm:$c), Instr>;
1401
1390
def rii
1402
- : BasicNVPTXInst<(outs RC:$d),
1403
- (ins RC:$a, i32imm:$b, i32imm:$c),
1404
- Instr,
1405
- [(set T:$d, (bfe T:$a, imm:$b, imm:$c))]>;
1391
+ : BasicNVPTXInst<(outs RC:$d), (ins RC:$a, i32imm:$b, i32imm:$c), Instr>;
1406
1392
}
1407
1393
1408
1394
multiclass BFI<string Instr, ValueType T, RegisterClass RC, Operand ImmCls> {
@@ -1447,10 +1433,10 @@ let hasSideEffects = false in {
1447
1433
// the same patterns, so the first one wins. Having unsigned byte extraction
1448
1434
// has the benefit of always having zero in unused bits, which makes some
1449
1435
// optimizations easier (e.g. no need to mask them).
1450
- defm BFE_U32 : BFE<"bfe.u32", i32, B32>;
1451
- defm BFE_S32 : BFE<"bfe.s32", i32, B32>;
1452
- defm BFE_U64 : BFE<"bfe.u64", i64, B64>;
1453
- defm BFE_S64 : BFE<"bfe.s64", i64, B64>;
1436
+ defm BFE_U32 : BFE<"bfe.u32", B32>;
1437
+ defm BFE_S32 : BFE<"bfe.s32", B32>;
1438
+ defm BFE_U64 : BFE<"bfe.u64", B64>;
1439
+ defm BFE_S64 : BFE<"bfe.s64", B64>;
1454
1440
1455
1441
defm BFI_B32 : BFI<"bfi.b32", i32, B32, i32imm>;
1456
1442
defm BFI_B64 : BFI<"bfi.b64", i64, B64, i64imm>;
@@ -1487,19 +1473,26 @@ def : Pat<(fshr i32:$hi, i32:$lo, (shl i32:$amt, (i32 3))),
1487
1473
(PRMT_B32rrr $lo, $hi, $amt, PrmtF4E)>;
1488
1474
1489
1475
1476
+ def byte_extract_prmt : ImmLeaf<i32, [{
1477
+ return (Imm == 0x7770) || (Imm == 0x7771) || (Imm == 0x7772) || (Imm == 0x7773);
1478
+ }]>;
1479
+
1480
+ def to_sign_extend_selector : SDNodeXForm<imm, [{
1481
+ const APInt &V = N->getAPIntValue();
1482
+ const APInt B = V.trunc(4);
1483
+ const APInt BSext = B | 8;
1484
+ const APInt R = BSext.concat(BSext).concat(BSext).concat(B).zext(32);
1485
+ return CurDAG->getTargetConstant(R, SDLoc(N), MVT::i32);
1486
+ }]>;
1487
+
1488
+
1490
1489
// byte extraction + signed/unsigned extension to i32.
1491
- def : Pat<(i32 (sext_inreg (bfe i32:$s, i32:$o, 8), i8)),
1492
- (BFE_S32rri $s, $o, 8)>;
1493
- def : Pat<(i32 (sext_inreg (bfe i32:$s, imm:$o, 8), i8)),
1494
- (BFE_S32rii $s, imm:$o, 8)>;
1495
- def : Pat<(i32 (and (bfe i32:$s, i32:$o, 8), 255)),
1496
- (BFE_U32rri $s, $o, 8)>;
1497
- def : Pat<(i32 (and (bfe i32:$s, imm:$o, 8), 255)),
1498
- (BFE_U32rii $s, imm:$o, 8)>;
1490
+ def : Pat<(i32 (sext_inreg (prmt i32:$s, 0, byte_extract_prmt:$sel, PrmtNONE), i8)),
1491
+ (PRMT_B32rii $s, 0, (to_sign_extend_selector $sel), PrmtNONE)>;
1499
1492
1500
1493
// byte extraction + signed extension to i16
1501
- def : Pat<(i16 (sext_inreg (trunc (bfe i32:$s, imm:$o, 8 )), i8)),
1502
- (CVT_s8_s32 (BFE_S32rii $s, imm:$o, 8 ), CvtNONE)>;
1494
+ def : Pat<(i16 (sext_inreg (trunc (prmt i32:$s, 0, byte_extract_prmt:$sel, PrmtNONE )), i8)),
1495
+ (CVT_u16_u32 (PRMT_B32rii $s, 0, (to_sign_extend_selector $sel), PrmtNONE ), CvtNONE)>;
1503
1496
1504
1497
1505
1498
// Byte extraction via shift/trunc/sext
@@ -1709,28 +1702,36 @@ def cond_not_signed : PatLeaf<(cond), [{
1709
1702
return !isSignedIntSetCC(N->get());
1710
1703
}]>;
1711
1704
1712
- // comparisons of i8 extracted with BFE as i32
1713
- // It's faster to do comparison directly on i32 extracted by BFE ,
1705
+ // comparisons of i8 extracted with PRMT as i32
1706
+ // It's faster to do comparison directly on i32 extracted by PRMT ,
1714
1707
// instead of the long conversion and sign extending.
1715
- def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (bfe B32 :$a, B32:$oa, 8 ))), i8)),
1716
- (i16 (sext_inreg (i16 (trunc (bfe B32 :$b, B32:$ob, 8 ))), i8)),
1708
+ def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE ))), i8)),
1709
+ (i16 (sext_inreg (i16 (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE ))), i8)),
1717
1710
cond_signed:$cc),
1718
- (SETP_i32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), (cond2cc $cc))>;
1711
+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1712
+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1713
+ (cond2cc $cc))>;
1719
1714
1720
- def: Pat<(setcc (i16 (sext_inreg (trunc (bfe B32 :$a, imm:$oa, 8 )), i8)),
1721
- (i16 (sext_inreg (trunc (bfe B32 :$b, imm:$ob, 8 )), i8)),
1715
+ def: Pat<(setcc (i16 (sext_inreg (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE )), i8)),
1716
+ (i16 (sext_inreg (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE )), i8)),
1722
1717
cond_signed:$cc),
1723
- (SETP_i32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), (cond2cc $cc))>;
1718
+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1719
+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1720
+ (cond2cc $cc))>;
1724
1721
1725
- def: Pat<(setcc (i16 (and ( trunc (bfe B32 :$a, B32:$oa, 8)), 255 )),
1726
- (i16 (and ( trunc (bfe B32 :$b, B32:$ob, 8)), 255 )),
1722
+ def: Pat<(setcc (i16 (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE) )),
1723
+ (i16 (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE) )),
1727
1724
cond_signed:$cc),
1728
- (SETP_i32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), (cond2cc $cc))>;
1725
+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1726
+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1727
+ (cond2cc $cc))>;
1729
1728
1730
- def: Pat<(setcc (i16 (and ( trunc (bfe B32 :$a, imm:$oa, 8)), 255 )),
1731
- (i16 (and ( trunc (bfe B32 :$b, imm:$ob, 8)), 255 )),
1729
+ def: Pat<(setcc (i16 (trunc (prmt i32 :$a, 0, byte_extract_prmt:$sel_a, PrmtNONE) )),
1730
+ (i16 (trunc (prmt i32 :$b, 0, byte_extract_prmt:$sel_b, PrmtNONE) )),
1732
1731
cond_not_signed:$cc),
1733
- (SETP_i32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), (cond2cc $cc))>;
1732
+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1733
+ (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1734
+ (cond2cc $cc))>;
1734
1735
1735
1736
def SDTDeclareArrayParam :
1736
1737
SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
0 commit comments