@@ -1476,7 +1476,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
1476
1476
1477
1477
setMinFunctionAlignment(Align(4));
1478
1478
1479
- switch (Subtarget.getCPUDirective()) {
1479
+ auto CPUDirective = Subtarget.getCPUDirective();
1480
+ switch (CPUDirective) {
1480
1481
default: break;
1481
1482
case PPC::DIR_970:
1482
1483
case PPC::DIR_A2:
@@ -1508,15 +1509,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
1508
1509
1509
1510
// The Freescale cores do better with aggressive inlining of memcpy and
1510
1511
// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1511
- if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1512
- Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1512
+ if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1513
1513
MaxStoresPerMemset = 32;
1514
1514
MaxStoresPerMemsetOptSize = 16;
1515
1515
MaxStoresPerMemcpy = 32;
1516
1516
MaxStoresPerMemcpyOptSize = 8;
1517
1517
MaxStoresPerMemmove = 32;
1518
1518
MaxStoresPerMemmoveOptSize = 8;
1519
- } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1519
+ } else if (CPUDirective == PPC::DIR_A2) {
1520
1520
// The A2 also benefits from (very) aggressive inlining of memcpy and
1521
1521
// friends. The overhead of a the function call, even when warm, can be
1522
1522
// over one hundred cycles.
@@ -1529,6 +1529,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
1529
1529
MaxLoadsPerMemcmpOptSize = 4;
1530
1530
}
1531
1531
1532
+ // Enable generation of STXVP instructions by default for mcpu=future.
1533
+ if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1534
+ DisableAutoPairedVecSt.getNumOccurrences() == 0)
1535
+ DisableAutoPairedVecSt = false;
1536
+
1532
1537
IsStrictFPEnabled = true;
1533
1538
1534
1539
// Let the subtarget (CPU) decide if a predictable select is more expensive
0 commit comments