-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[AMDGPU] gfx1250 MC support for v_mov_b64 #147859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] gfx1250 MC support for v_mov_b64 #147859
Conversation
It is incomplete in terms of the DPP diagnistics, that is much more involved change.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Stanislav Mekhanoshin (rampitec) ChangesIt is incomplete in terms of the DPP diagnistics, that is much Full diff: https://github.com/llvm/llvm-project/pull/147859.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5797a2ad0b4ec..55077a94f09a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2305,6 +2305,10 @@ def isNotGFX1250Plus :
Predicate<"!Subtarget->hasGFX1250Insts()">,
AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>;
+def isGFX940orGFX1250 :
+ Predicate<"Subtarget->hasGFX940Insts() || Subtarget->hasGFX1250Insts()">,
+ AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX1250Insts)>;
+
def HasIEEEMinimumMaximumInsts :
Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">,
AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 5e88684c102ce..53db9e3162e45 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -140,10 +140,14 @@ multiclass VOP1Inst <string opName, VOPProfile P,
if P.HasExtDPP then
def _dpp : VOP1_DPP_Pseudo <opName, P>;
- let SubtargetPredicate = isGFX11Plus in {
- if P.HasExtVOP3DPP then
- def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
- } // End SubtargetPredicate = isGFX11Plus
+ if P.HasExtVOP3DPP then
+ def _e64_dpp : VOP3_DPP_Pseudo <opName, P> {
+ let SubtargetPredicate = isGFX11Plus;
+ }
+ else if P.HasExt64BitDPP then
+ def _e64_dpp : VOP3_DPP_Pseudo <opName, P> {
+ let OtherPredicates = [HasDPALU_DPP];
+ }
def : LetDummies, AMDGPUMnemonicAlias<opName#"_e32", opName>;
def : LetDummies, AMDGPUMnemonicAlias<opName#"_e64", opName>;
@@ -236,7 +240,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
-let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+let SubtargetPredicate = isGFX940orGFX1250, SchedRW = [Write64Bit] in
defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
} // End isMoveImm = 1
@@ -1117,6 +1121,8 @@ defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>;
defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>;
defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
+defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
+
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 591c590bf3781..63a13062069de 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -1,6 +1,30 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+v_mov_b64_e32 v[4:5], v[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], v[254:255]
+// GFX1250: v_mov_b64_e32 v[4:5], v[254:255] ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], s[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], s[2:3] ; encoding: [0x02,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], vcc
+// GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], exec
+// GFX1250: v_mov_b64_e32 v[4:5], exec ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], null
+// GFX1250: v_mov_b64_e32 v[4:5], null ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], -1
+// GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], 0.5
+// GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e]
+
v_cvt_f32_bf16 v5, v1
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 7b16c22b47acc..b0a879a6cd726 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -1,6 +1,30 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+v_mov_b64_e32 v[4:5], v[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], v[254:255]
+// GFX1250: v_mov_b64_e32 v[4:5], v[254:255] ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], s[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], s[2:3] ; encoding: [0x02,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], vcc
+// GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], exec
+// GFX1250: v_mov_b64_e32 v[4:5], exec ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], null
+// GFX1250: v_mov_b64_e32 v[4:5], null ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], -1
+// GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], 0.5
+// GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e]
+
v_cvt_f32_bf16 v5, v1
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
index af583fe9697e4..1a87993fb8e0b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -2,6 +2,30 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
+0xc1,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+0xf0,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e]
+
+0x7e,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], exec ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+0x7c,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], null ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+0x02,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], s[2:3] ; encoding: [0x02,0x3a,0x08,0x7e]
+
+0xfe,0x3b,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], v[254:255] ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+0x02,0x3b,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e]
+
+0x6a,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e]
+
0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00
# GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/198/builds/6026 Here is the relevant piece of the build log for the reference
|
It is incomplete in terms of the DPP diagnistics, that is much
more involved change.