[AMDGPU] gfx1250 MC support for v_mov_b64 #147859

rampitec · 2025-07-09T23:57:59Z

It is incomplete in terms of the DPP diagnistics, that is much
more involved change.

It is incomplete in terms of the DPP diagnistics, that is much more involved change.

rampitec · 2025-07-09T23:58:34Z

[AMDGPU] gfx1250: MC support for 64-bit literals #147861
[AMDGPU] gfx1250 MC support for v_mov_b64 #147859 👈 (View in Graphite)
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-07-09T23:59:36Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Stanislav Mekhanoshin (rampitec)

Changes

It is incomplete in terms of the DPP diagnistics, that is much
more involved change.

Full diff: https://github.com/llvm/llvm-project/pull/147859.diff

5 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+4)
(modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+11-5)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s (+24)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s (+24)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt (+24)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5797a2ad0b4ec..55077a94f09a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2305,6 +2305,10 @@ def isNotGFX1250Plus :
   Predicate<"!Subtarget->hasGFX1250Insts()">,
   AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>;
 
+def isGFX940orGFX1250 :
+  Predicate<"Subtarget->hasGFX940Insts() || Subtarget->hasGFX1250Insts()">,
+  AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX1250Insts)>;
+
 def HasIEEEMinimumMaximumInsts :
   Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">,
   AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 5e88684c102ce..53db9e3162e45 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -140,10 +140,14 @@ multiclass VOP1Inst <string opName, VOPProfile P,
   if P.HasExtDPP then
     def _dpp : VOP1_DPP_Pseudo <opName, P>;
 
-  let SubtargetPredicate = isGFX11Plus in {
-    if P.HasExtVOP3DPP then
-      def _e64_dpp  : VOP3_DPP_Pseudo <opName, P>;
-  } // End SubtargetPredicate = isGFX11Plus
+  if P.HasExtVOP3DPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+      let SubtargetPredicate = isGFX11Plus;
+    }
+  else if P.HasExt64BitDPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+      let OtherPredicates = [HasDPALU_DPP];
+    }
 
   def : LetDummies, AMDGPUMnemonicAlias<opName#"_e32", opName>;
   def : LetDummies, AMDGPUMnemonicAlias<opName#"_e64", opName>;
@@ -236,7 +240,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
 
-let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+let SubtargetPredicate = isGFX940orGFX1250, SchedRW = [Write64Bit] in
 defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
 } // End isMoveImm = 1
 
@@ -1117,6 +1121,8 @@ defm V_CVT_NORM_U16_F16      : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>;
 defm V_CVT_F16_F32           : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>;
 defm V_CVT_F32_F16           : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
 
+defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
+
 defm V_CVT_F32_BF16          : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
 defm V_CVT_PK_F16_FP8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
 defm V_CVT_PK_F16_BF8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 591c590bf3781..63a13062069de 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -1,6 +1,30 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
 
+v_mov_b64_e32 v[4:5], v[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], v[2:3]            ; encoding: [0x02,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], v[254:255]
+// GFX1250: v_mov_b64_e32 v[4:5], v[254:255]        ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], s[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], s[2:3]            ; encoding: [0x02,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], vcc
+// GFX1250: v_mov_b64_e32 v[4:5], vcc               ; encoding: [0x6a,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], exec
+// GFX1250: v_mov_b64_e32 v[4:5], exec              ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], null
+// GFX1250: v_mov_b64_e32 v[4:5], null              ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], -1
+// GFX1250: v_mov_b64_e32 v[4:5], -1                ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], 0.5
+// GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 7b16c22b47acc..b0a879a6cd726 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -1,6 +1,30 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
 
+v_mov_b64_e32 v[4:5], v[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], v[2:3]            ; encoding: [0x02,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], v[254:255]
+// GFX1250: v_mov_b64_e32 v[4:5], v[254:255]        ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], s[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], s[2:3]            ; encoding: [0x02,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], vcc
+// GFX1250: v_mov_b64_e32 v[4:5], vcc               ; encoding: [0x6a,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], exec
+// GFX1250: v_mov_b64_e32 v[4:5], exec              ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], null
+// GFX1250: v_mov_b64_e32 v[4:5], null              ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], -1
+// GFX1250: v_mov_b64_e32 v[4:5], -1                ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], 0.5
+// GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
index af583fe9697e4..1a87993fb8e0b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -2,6 +2,30 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
 
+0xc1,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], -1                ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+0xf0,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
+
+0x7e,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], exec              ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+0x7c,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], null              ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+0x02,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], s[2:3]            ; encoding: [0x02,0x3a,0x08,0x7e]
+
+0xfe,0x3b,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], v[254:255]        ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+0x02,0x3b,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], v[2:3]            ; encoding: [0x02,0x3b,0x08,0x7e]
+
+0x6a,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], vcc               ; encoding: [0x6a,0x3a,0x08,0x7e]
+
 0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00
 # GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000         ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]

llvm-ci · 2025-07-10T05:22:26Z

LLVM Buildbot has detected a new failure on builder clang-aarch64-sve2-vla running on linaro-g4-01 while building llvm at step 7 "ninja check 1".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/198/builds/6026

Here is the relevant piece of the build log for the reference

Step 7 (ninja check 1) failure: stage 1 checked (failure)
******************** TEST 'LLVM :: tools/llvm-exegesis/RISCV/rvv/filter.test' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
/home/tcwg-buildbot/worker/clang-aarch64-sve2-vla/stage1/bin/llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK     --riscv-filter-config='vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=10 | /home/tcwg-buildbot/worker/clang-aarch64-sve2-vla/stage1/bin/FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve2-vla/llvm/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test # RUN: at line 1
+ /home/tcwg-buildbot/worker/clang-aarch64-sve2-vla/stage1/bin/FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve2-vla/llvm/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
+ /home/tcwg-buildbot/worker/clang-aarch64-sve2-vla/stage1/bin/llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK '--riscv-filter-config=vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=10
PseudoVNCLIPU_WX_M1_MASK: Failed to produce any snippet via: instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, for uses, one unique register for each position
FileCheck error: '<stdin>' is empty.
FileCheck command line:  /home/tcwg-buildbot/worker/clang-aarch64-sve2-vla/stage1/bin/FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve2-vla/llvm/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test

--

********************

[AMDGPU] gfx1250 MC support for v_mov_b64

59b44d5

It is incomplete in terms of the DPP diagnistics, that is much more involved change.

rampitec requested review from shiltian and searlmc1 July 9, 2025 23:58

rampitec marked this pull request as ready for review July 9, 2025 23:59

llvmbot added backend:AMDGPU mc Machine (object) code labels Jul 9, 2025

shiltian approved these changes Jul 10, 2025

View reviewed changes

rampitec mentioned this pull request Jul 10, 2025

[AMDGPU] gfx1250: MC support for 64-bit literals #147861

Merged

rampitec merged commit fd894f6 into main Jul 10, 2025
14 checks passed

rampitec deleted the users/rampitec/07-09-_amdgpu_gfx1250_mc_support_for_v_mov_b64 branch July 10, 2025 04:31

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] gfx1250 MC support for v_mov_b64 #147859

[AMDGPU] gfx1250 MC support for v_mov_b64 #147859

Uh oh!

rampitec commented Jul 9, 2025

Uh oh!

rampitec commented Jul 9, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jul 9, 2025 •

edited

Loading

Uh oh!

Uh oh!

llvm-ci commented Jul 10, 2025

Uh oh!

Uh oh!

[AMDGPU] gfx1250 MC support for v_mov_b64 #147859

[AMDGPU] gfx1250 MC support for v_mov_b64 #147859

Uh oh!

Conversation

rampitec commented Jul 9, 2025

Uh oh!

rampitec commented Jul 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

llvm-ci commented Jul 10, 2025

Uh oh!

Uh oh!

rampitec commented Jul 9, 2025 •

edited

Loading

llvmbot commented Jul 9, 2025 •

edited

Loading