Skip to content

Commit 70bc7d1

Browse files
authored
[AArch64] Corrected Latency Descriptions for NeoverseV2 (#147339)
Update the Neoverse V2 Scheduler to reflect the correct latencies along with having updated the relevant mca tests.
1 parent fa14361 commit 70bc7d1

File tree

2 files changed

+46
-45
lines changed

2 files changed

+46
-45
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def V2Write_20c_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 20;
157157
def V2Write_2c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 2; }
158158
def V2Write_2c_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
159159
def V2Write_3c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 3; }
160+
def V2Write_3c_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 3; }
160161
def V2Write_4c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 4; }
161162
def V2Write_4c_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
162163
def V2Write_6c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 6; }
@@ -256,8 +257,8 @@ def V2Write_4c_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
256257
let NumMicroOps = 2;
257258
}
258259

259-
def V2Write_4c_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
260-
let Latency = 4;
260+
def V2Write_5c_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
261+
let Latency = 5;
261262
let NumMicroOps = 2;
262263
}
263264

@@ -376,8 +377,8 @@ def V2Write_6c_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> {
376377
let NumMicroOps = 2;
377378
}
378379

379-
def V2Write_4c_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
380-
let Latency = 4;
380+
def V2Write_6c_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
381+
let Latency = 6;
381382
let NumMicroOps = 2;
382383
}
383384

@@ -1468,14 +1469,14 @@ def : SchedAlias<WriteVq, V2Write_2c_1V>;
14681469
def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>;
14691470

14701471
// ASIMD arith, reduce, 4H/4S
1471-
def : InstRW<[V2Write_2c_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
1472+
def : InstRW<[V2Write_3c_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
14721473

14731474
// ASIMD arith, reduce, 8B/8H
1474-
def : InstRW<[V2Write_4c_1V13_1V],
1475+
def : InstRW<[V2Write_5c_1V13_1V],
14751476
(instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
14761477

14771478
// ASIMD arith, reduce, 16B
1478-
def : InstRW<[V2Write_4c_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
1479+
def : InstRW<[V2Write_6c_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
14791480

14801481
// ASIMD dot product
14811482
// ASIMD dot product using signed and unsigned integers
@@ -1486,15 +1487,15 @@ def : InstRW<[V2Wr_VDOT, V2Rd_VDOT],
14861487
def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
14871488

14881489
// ASIMD max/min, reduce, 4H/4S
1489-
def : InstRW<[V2Write_2c_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
1490+
def : InstRW<[V2Write_3c_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
14901491
"^[SU](MAX|MIN)Vv4i32v$")>;
14911492

14921493
// ASIMD max/min, reduce, 8B/8H
1493-
def : InstRW<[V2Write_4c_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
1494+
def : InstRW<[V2Write_5c_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
14941495
"^[SU](MAX|MIN)Vv8i16v$")>;
14951496

14961497
// ASIMD max/min, reduce, 16B
1497-
def : InstRW<[V2Write_4c_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
1498+
def : InstRW<[V2Write_6c_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
14981499

14991500
// ASIMD multiply
15001501
def : InstRW<[V2Write_4c_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;

llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,11 +1257,11 @@ zip2 v0.8h, v0.8h, v0.8h
12571257
# CHECK-NEXT: 1 2 0.25 addhn2 v0.8h, v0.4s, v0.4s
12581258
# CHECK-NEXT: 1 2 0.25 addp v0.2d, v0.2d, v0.2d
12591259
# CHECK-NEXT: 1 2 0.25 addp v0.8b, v0.8b, v0.8b
1260-
# CHECK-NEXT: 1 2 0.50 addv s0, v0.4s
1261-
# CHECK-NEXT: 1 2 0.50 addv h0, v0.4h
1262-
# CHECK-NEXT: 2 4 0.50 addv h0, v0.8h
1263-
# CHECK-NEXT: 2 4 0.50 addv b0, v0.8b
1264-
# CHECK-NEXT: 2 4 1.00 addv b0, v0.16b
1260+
# CHECK-NEXT: 1 3 0.50 addv s0, v0.4s
1261+
# CHECK-NEXT: 1 3 0.50 addv h0, v0.4h
1262+
# CHECK-NEXT: 2 5 0.50 addv h0, v0.8h
1263+
# CHECK-NEXT: 2 5 0.50 addv b0, v0.8b
1264+
# CHECK-NEXT: 2 6 1.00 addv b0, v0.16b
12651265
# CHECK-NEXT: 1 2 0.25 aesd v0.16b, v0.16b
12661266
# CHECK-NEXT: 1 2 0.25 aese v0.16b, v0.16b
12671267
# CHECK-NEXT: 1 2 0.25 aesimc v0.16b, v0.16b
@@ -1779,11 +1779,11 @@ zip2 v0.8h, v0.8h, v0.8h
17791779
# CHECK-NEXT: 1 2 0.25 saddlp v0.4h, v0.8b
17801780
# CHECK-NEXT: 1 2 0.25 saddlp v0.4s, v0.8h
17811781
# CHECK-NEXT: 1 2 0.25 saddlp v0.8h, v0.16b
1782-
# CHECK-NEXT: 1 2 0.50 saddlv d0, v0.4s
1783-
# CHECK-NEXT: 1 2 0.50 saddlv s0, v0.4h
1784-
# CHECK-NEXT: 2 4 0.50 saddlv s0, v0.8h
1785-
# CHECK-NEXT: 2 4 0.50 saddlv h0, v0.8b
1786-
# CHECK-NEXT: 2 4 1.00 saddlv h0, v0.16b
1782+
# CHECK-NEXT: 1 3 0.50 saddlv d0, v0.4s
1783+
# CHECK-NEXT: 1 3 0.50 saddlv s0, v0.4h
1784+
# CHECK-NEXT: 2 5 0.50 saddlv s0, v0.8h
1785+
# CHECK-NEXT: 2 5 0.50 saddlv h0, v0.8b
1786+
# CHECK-NEXT: 2 6 1.00 saddlv h0, v0.16b
17871787
# CHECK-NEXT: 1 2 0.25 saddw v0.2d, v0.2d, v0.2s
17881788
# CHECK-NEXT: 1 2 0.25 saddw v0.4s, v0.4s, v0.4h
17891789
# CHECK-NEXT: 1 2 0.25 saddw v0.8h, v0.8h, v0.8b
@@ -1846,22 +1846,22 @@ zip2 v0.8h, v0.8h, v0.8h
18461846
# CHECK-NEXT: 1 2 0.25 smaxp v0.2s, v0.2s, v0.2s
18471847
# CHECK-NEXT: 1 2 0.25 smaxp v0.4h, v0.4h, v0.4h
18481848
# CHECK-NEXT: 1 2 0.25 smaxp v0.8b, v0.8b, v0.8b
1849-
# CHECK-NEXT: 2 4 0.50 smaxv b0, v0.8b
1850-
# CHECK-NEXT: 2 4 1.00 smaxv b0, v0.16b
1851-
# CHECK-NEXT: 1 2 0.50 smaxv h0, v0.4h
1852-
# CHECK-NEXT: 2 4 0.50 smaxv h0, v0.8h
1853-
# CHECK-NEXT: 1 2 0.50 smaxv s0, v0.4s
1849+
# CHECK-NEXT: 2 5 0.50 smaxv b0, v0.8b
1850+
# CHECK-NEXT: 2 6 1.00 smaxv b0, v0.16b
1851+
# CHECK-NEXT: 1 3 0.50 smaxv h0, v0.4h
1852+
# CHECK-NEXT: 2 5 0.50 smaxv h0, v0.8h
1853+
# CHECK-NEXT: 1 3 0.50 smaxv s0, v0.4s
18541854
# CHECK-NEXT: 1 2 0.25 smin v0.16b, v0.16b, v0.16b
18551855
# CHECK-NEXT: 1 2 0.25 smin v0.4s, v0.4s, v0.4s
18561856
# CHECK-NEXT: 1 2 0.25 smin v0.8h, v0.8h, v0.8h
18571857
# CHECK-NEXT: 1 2 0.25 sminp v0.16b, v0.16b, v0.16b
18581858
# CHECK-NEXT: 1 2 0.25 sminp v0.4s, v0.4s, v0.4s
18591859
# CHECK-NEXT: 1 2 0.25 sminp v0.8h, v0.8h, v0.8h
1860-
# CHECK-NEXT: 2 4 0.50 sminv b0, v0.8b
1861-
# CHECK-NEXT: 2 4 1.00 sminv b0, v0.16b
1862-
# CHECK-NEXT: 1 2 0.50 sminv h0, v0.4h
1863-
# CHECK-NEXT: 2 4 0.50 sminv h0, v0.8h
1864-
# CHECK-NEXT: 1 2 0.50 sminv s0, v0.4s
1860+
# CHECK-NEXT: 2 5 0.50 sminv b0, v0.8b
1861+
# CHECK-NEXT: 2 6 1.00 sminv b0, v0.16b
1862+
# CHECK-NEXT: 1 3 0.50 sminv h0, v0.4h
1863+
# CHECK-NEXT: 2 5 0.50 sminv h0, v0.8h
1864+
# CHECK-NEXT: 1 3 0.50 sminv s0, v0.4s
18651865
# CHECK-NEXT: 1 4 0.50 smlal v0.2d, v0.2s, v0.2s
18661866
# CHECK-NEXT: 1 4 0.50 smlal v0.4s, v0.4h, v0.4h
18671867
# CHECK-NEXT: 1 4 0.50 smlal v0.8h, v0.8b, v0.8b
@@ -2221,11 +2221,11 @@ zip2 v0.8h, v0.8h, v0.8h
22212221
# CHECK-NEXT: 1 2 0.25 uaddlp v0.4h, v0.8b
22222222
# CHECK-NEXT: 1 2 0.25 uaddlp v0.4s, v0.8h
22232223
# CHECK-NEXT: 1 2 0.25 uaddlp v0.8h, v0.16b
2224-
# CHECK-NEXT: 1 2 0.50 uaddlv d0, v0.4s
2225-
# CHECK-NEXT: 1 2 0.50 uaddlv s0, v0.4h
2226-
# CHECK-NEXT: 2 4 0.50 uaddlv s0, v0.8h
2227-
# CHECK-NEXT: 2 4 0.50 uaddlv h0, v0.8b
2228-
# CHECK-NEXT: 2 4 1.00 uaddlv h0, v0.16b
2224+
# CHECK-NEXT: 1 3 0.50 uaddlv d0, v0.4s
2225+
# CHECK-NEXT: 1 3 0.50 uaddlv s0, v0.4h
2226+
# CHECK-NEXT: 2 5 0.50 uaddlv s0, v0.8h
2227+
# CHECK-NEXT: 2 5 0.50 uaddlv h0, v0.8b
2228+
# CHECK-NEXT: 2 6 1.00 uaddlv h0, v0.16b
22292229
# CHECK-NEXT: 1 2 0.25 uaddw v0.2d, v0.2d, v0.2s
22302230
# CHECK-NEXT: 1 2 0.25 uaddw v0.4s, v0.4s, v0.4h
22312231
# CHECK-NEXT: 1 2 0.25 uaddw v0.8h, v0.8h, v0.8b
@@ -2257,22 +2257,22 @@ zip2 v0.8h, v0.8h, v0.8h
22572257
# CHECK-NEXT: 1 2 0.25 umaxp v0.16b, v0.16b, v0.16b
22582258
# CHECK-NEXT: 1 2 0.25 umaxp v0.4s, v0.4s, v0.4s
22592259
# CHECK-NEXT: 1 2 0.25 umaxp v0.8h, v0.8h, v0.8h
2260-
# CHECK-NEXT: 2 4 0.50 umaxv b0, v0.8b
2261-
# CHECK-NEXT: 2 4 1.00 umaxv b0, v0.16b
2262-
# CHECK-NEXT: 1 2 0.50 umaxv h0, v0.4h
2263-
# CHECK-NEXT: 2 4 0.50 umaxv h0, v0.8h
2264-
# CHECK-NEXT: 1 2 0.50 umaxv s0, v0.4s
2260+
# CHECK-NEXT: 2 5 0.50 umaxv b0, v0.8b
2261+
# CHECK-NEXT: 2 6 1.00 umaxv b0, v0.16b
2262+
# CHECK-NEXT: 1 3 0.50 umaxv h0, v0.4h
2263+
# CHECK-NEXT: 2 5 0.50 umaxv h0, v0.8h
2264+
# CHECK-NEXT: 1 3 0.50 umaxv s0, v0.4s
22652265
# CHECK-NEXT: 1 2 0.25 umin v0.2s, v0.2s, v0.2s
22662266
# CHECK-NEXT: 1 2 0.25 umin v0.4h, v0.4h, v0.4h
22672267
# CHECK-NEXT: 1 2 0.25 umin v0.8b, v0.8b, v0.8b
22682268
# CHECK-NEXT: 1 2 0.25 uminp v0.2s, v0.2s, v0.2s
22692269
# CHECK-NEXT: 1 2 0.25 uminp v0.4h, v0.4h, v0.4h
22702270
# CHECK-NEXT: 1 2 0.25 uminp v0.8b, v0.8b, v0.8b
2271-
# CHECK-NEXT: 2 4 0.50 uminv b0, v0.8b
2272-
# CHECK-NEXT: 2 4 1.00 uminv b0, v0.16b
2273-
# CHECK-NEXT: 1 2 0.50 uminv h0, v0.4h
2274-
# CHECK-NEXT: 2 4 0.50 uminv h0, v0.8h
2275-
# CHECK-NEXT: 1 2 0.50 uminv s0, v0.4s
2271+
# CHECK-NEXT: 2 5 0.50 uminv b0, v0.8b
2272+
# CHECK-NEXT: 2 6 1.00 uminv b0, v0.16b
2273+
# CHECK-NEXT: 1 3 0.50 uminv h0, v0.4h
2274+
# CHECK-NEXT: 2 5 0.50 uminv h0, v0.8h
2275+
# CHECK-NEXT: 1 3 0.50 uminv s0, v0.4s
22762276
# CHECK-NEXT: 1 4 0.50 umlal v0.2d, v0.2s, v0.2s
22772277
# CHECK-NEXT: 1 4 0.50 umlal v0.4s, v0.4h, v0.4h
22782278
# CHECK-NEXT: 1 4 0.50 umlal v0.8h, v0.8b, v0.8b

0 commit comments

Comments
 (0)