llvm · rampitec · Jul 10, 2025 · Jul 9, 2025
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2305,6 +2305,10 @@ def isNotGFX1250Plus :
   Predicate<"!Subtarget->hasGFX1250Insts()">,
   AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>;
 
+def isGFX940orGFX1250 :
+  Predicate<"Subtarget->hasGFX940Insts() || Subtarget->hasGFX1250Insts()">,
+  AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX1250Insts)>;
+
 def HasIEEEMinimumMaximumInsts :
   Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">,
   AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>;

diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -140,10 +140,14 @@ multiclass VOP1Inst <string opName, VOPProfile P,
   if P.HasExtDPP then
     def _dpp : VOP1_DPP_Pseudo <opName, P>;
 
-  let SubtargetPredicate = isGFX11Plus in {
-    if P.HasExtVOP3DPP then
-      def _e64_dpp  : VOP3_DPP_Pseudo <opName, P>;
-  } // End SubtargetPredicate = isGFX11Plus
+  if P.HasExtVOP3DPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+      let SubtargetPredicate = isGFX11Plus;
+    }
+  else if P.HasExt64BitDPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <opName, P> {
+      let OtherPredicates = [HasDPALU_DPP];
+    }
 
   def : LetDummies, AMDGPUMnemonicAlias<opName#"_e32", opName>;
   def : LetDummies, AMDGPUMnemonicAlias<opName#"_e64", opName>;
@@ -236,7 +240,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
 
-let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+let SubtargetPredicate = isGFX940orGFX1250, SchedRW = [Write64Bit] in
 defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
 } // End isMoveImm = 1
 
@@ -1117,6 +1121,8 @@ defm V_CVT_NORM_U16_F16      : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>;
 defm V_CVT_F16_F32           : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>;
 defm V_CVT_F32_F16           : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
 
+defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
+
 defm V_CVT_F32_BF16          : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
 defm V_CVT_PK_F16_FP8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
 defm V_CVT_PK_F16_BF8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;

diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -1,6 +1,30 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
 
+v_mov_b64_e32 v[4:5], v[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], v[2:3]            ; encoding: [0x02,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], v[254:255]
+// GFX1250: v_mov_b64_e32 v[4:5], v[254:255]        ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], s[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], s[2:3]            ; encoding: [0x02,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], vcc
+// GFX1250: v_mov_b64_e32 v[4:5], vcc               ; encoding: [0x6a,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], exec
+// GFX1250: v_mov_b64_e32 v[4:5], exec              ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], null
+// GFX1250: v_mov_b64_e32 v[4:5], null              ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], -1
+// GFX1250: v_mov_b64_e32 v[4:5], -1                ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], 0.5
+// GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 

diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -1,6 +1,30 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
 
+v_mov_b64_e32 v[4:5], v[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], v[2:3]            ; encoding: [0x02,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], v[254:255]
+// GFX1250: v_mov_b64_e32 v[4:5], v[254:255]        ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+v_mov_b64 v[4:5], s[2:3]
+// GFX1250: v_mov_b64_e32 v[4:5], s[2:3]            ; encoding: [0x02,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], vcc
+// GFX1250: v_mov_b64_e32 v[4:5], vcc               ; encoding: [0x6a,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], exec
+// GFX1250: v_mov_b64_e32 v[4:5], exec              ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], null
+// GFX1250: v_mov_b64_e32 v[4:5], null              ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], -1
+// GFX1250: v_mov_b64_e32 v[4:5], -1                ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+v_mov_b64 v[4:5], 0.5
+// GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 

diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -2,6 +2,30 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
 
+0xc1,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], -1                ; encoding: [0xc1,0x3a,0x08,0x7e]
+
+0xf0,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
+
+0x7e,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], exec              ; encoding: [0x7e,0x3a,0x08,0x7e]
+
+0x7c,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], null              ; encoding: [0x7c,0x3a,0x08,0x7e]
+
+0x02,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], s[2:3]            ; encoding: [0x02,0x3a,0x08,0x7e]
+
+0xfe,0x3b,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], v[254:255]        ; encoding: [0xfe,0x3b,0x08,0x7e]
+
+0x02,0x3b,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], v[2:3]            ; encoding: [0x02,0x3b,0x08,0x7e]
+
+0x6a,0x3a,0x08,0x7e
+# GFX1250: v_mov_b64_e32 v[4:5], vcc               ; encoding: [0x6a,0x3a,0x08,0x7e]
+
 0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00
 # GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000         ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]