From 59b44d5e7892ddbc68f806dd4b1382e0717ba20f Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Wed, 9 Jul 2025 16:56:54 -0700
Subject: [PATCH] [AMDGPU] gfx1250 MC support for v_mov_b64

It is incomplete in terms of the DPP diagnistics, that is much
more involved change.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  4 ++++
 llvm/lib/Target/AMDGPU/VOP1Instructions.td    | 16 +++++++++----
 llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s | 24 +++++++++++++++++++
 llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s        | 24 +++++++++++++++++++
 .../Disassembler/AMDGPU/gfx1250_dasm_vop1.txt | 24 +++++++++++++++++++
 5 files changed, 87 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5797a2ad0b4ec..55077a94f09a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2305,6 +2305,10 @@ def isNotGFX1250Plus :
   Predicate<"!Subtarget->hasGFX1250Insts()">,
   AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>;
 
+def isGFX940orGFX1250 :
+  Predicate<"Subtarget->hasGFX940Insts() || Subtarget->hasGFX1250Insts()">,
+  AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX1250Insts)>;
+
 def HasIEEEMinimumMaximumInsts :
   Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">,
   AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 5e88684c102ce..53db9e3162e45 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -140,10 +140,14 @@ multiclass VOP1Inst <string opName, VOPProfile P,
   if P.HasExtDPP then
     def _dpp : VOP1_DPP_Pseudo <opName, P>;
 
-  let SubtargetPredicate = isGFX11Plus in {
-    if P.HasExtVOP3DPP then
-      def _e64_dpp  : VOP3_DPP_Pseudo <opName, P>;
-  } // End SubtargetPredicate = isGFX11Plus
+  if P.HasExtVOP3DPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+      let SubtargetPredicate = isGFX11Plus;
+    }
+  else if P.HasExt64BitDPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+      let OtherPredicates = [HasDPALU_DPP];
+    }
 
   def : LetDummies, AMDGPUMnemonicAlias<opName#"_e32", opName>;
   def : LetDummies, AMDGPUMnemonicAlias<opName#"_e64", opName>;
@@ -236,7 +240,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
 
-let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+let SubtargetPredicate = isGFX940orGFX1250, SchedRW = [Write64Bit] in
 defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
 } // End isMoveImm = 1
 
@@ -1117,6 +1121,8 @@ defm V_CVT_NORM_U16_F16      : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>;
 defm V_CVT_F16_F32           : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>;
 defm V_CVT_F32_F16           : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
 
+defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
+
 defm V_CVT_F32_BF16          : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
 defm V_CVT_PK_F16_FP8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
 defm V_CVT_PK_F16_BF8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 591c590bf3781..63a13062069de 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -1,6 +1,30 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
 
+v_mov_b64_e32 v[4:5], v[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], v[2:3]            ; encoding: [0x02,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], v[254:255]
+// GFX1250: v_mov_b64_e32 v[4:5], v[254:255]        ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], s[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], s[2:3]            ; encoding: [0x02,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], vcc
+// GFX1250: v_mov_b64_e32 v[4:5], vcc               ; encoding: [0x6a,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], exec
+// GFX1250: v_mov_b64_e32 v[4:5], exec              ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], null
+// GFX1250: v_mov_b64_e32 v[4:5], null              ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], -1
+// GFX1250: v_mov_b64_e32 v[4:5], -1                ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], 0.5
+// GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 7b16c22b47acc..b0a879a6cd726 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -1,6 +1,30 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
 
+v_mov_b64_e32 v[4:5], v[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], v[2:3]            ; encoding: [0x02,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], v[254:255]
+// GFX1250: v_mov_b64_e32 v[4:5], v[254:255]        ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], s[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], s[2:3]            ; encoding: [0x02,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], vcc
+// GFX1250: v_mov_b64_e32 v[4:5], vcc               ; encoding: [0x6a,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], exec
+// GFX1250: v_mov_b64_e32 v[4:5], exec              ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], null
+// GFX1250: v_mov_b64_e32 v[4:5], null              ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], -1
+// GFX1250: v_mov_b64_e32 v[4:5], -1                ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], 0.5
+// GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
index af583fe9697e4..1a87993fb8e0b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -2,6 +2,30 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
 
+0xc1,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], -1                ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+0xf0,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
+
+0x7e,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], exec              ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+0x7c,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], null              ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+0x02,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], s[2:3]            ; encoding: [0x02,0x3a,0x08,0x7e]
+
+0xfe,0x3b,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], v[254:255]        ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+0x02,0x3b,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], v[2:3]            ; encoding: [0x02,0x3b,0x08,0x7e]
+
+0x6a,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], vcc               ; encoding: [0x6a,0x3a,0x08,0x7e]
+
 0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00
 # GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000         ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]