From 59b44d5e7892ddbc68f806dd4b1382e0717ba20f Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 9 Jul 2025 16:56:54 -0700 Subject: [PATCH] [AMDGPU] gfx1250 MC support for v_mov_b64 It is incomplete in terms of the DPP diagnistics, that is much more involved change. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 4 ++++ llvm/lib/Target/AMDGPU/VOP1Instructions.td | 16 +++++++++---- llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s | 24 +++++++++++++++++++ llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s | 24 +++++++++++++++++++ .../Disassembler/AMDGPU/gfx1250_dasm_vop1.txt | 24 +++++++++++++++++++ 5 files changed, 87 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5797a2ad0b4ec..55077a94f09a1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2305,6 +2305,10 @@ def isNotGFX1250Plus : Predicate<"!Subtarget->hasGFX1250Insts()">, AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>; +def isGFX940orGFX1250 : + Predicate<"Subtarget->hasGFX940Insts() || Subtarget->hasGFX1250Insts()">, + AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX1250Insts)>; + def HasIEEEMinimumMaximumInsts : Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">, AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 5e88684c102ce..53db9e3162e45 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -140,10 +140,14 @@ multiclass VOP1Inst ; - let SubtargetPredicate = isGFX11Plus in { - if P.HasExtVOP3DPP then - def _e64_dpp : VOP3_DPP_Pseudo ; - } // End SubtargetPredicate = isGFX11Plus + if P.HasExtVOP3DPP then + def _e64_dpp : VOP3_DPP_Pseudo { + let SubtargetPredicate = isGFX11Plus; + } + else if P.HasExt64BitDPP then + def _e64_dpp : VOP3_DPP_Pseudo { + let OtherPredicates = [HasDPALU_DPP]; + } def : LetDummies, AMDGPUMnemonicAlias; def : LetDummies, AMDGPUMnemonicAlias; @@ -236,7 +240,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { let isReMaterializable = 1, isAsCheapAsAMove = 1 in { defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; -let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in +let SubtargetPredicate = isGFX940orGFX1250, SchedRW = [Write64Bit] in defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; } // End isMoveImm = 1 @@ -1117,6 +1121,8 @@ defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>; defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>; defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>; +defm V_MOV_B64 : VOP1_Real_FULL ; + defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">; defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>; defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>; diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index 591c590bf3781..63a13062069de 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -1,6 +1,30 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s +v_mov_b64_e32 v[4:5], v[2:3] +// GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e] + +v_mov_b64 v[4:5], v[254:255] +// GFX1250: v_mov_b64_e32 v[4:5], v[254:255] ; encoding: [0xfe,0x3b,0x08,0x7e] + +v_mov_b64 v[4:5], s[2:3] +// GFX1250: v_mov_b64_e32 v[4:5], s[2:3] ; encoding: [0x02,0x3a,0x08,0x7e] + +v_mov_b64 v[4:5], vcc +// GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e] + +v_mov_b64 v[4:5], exec +// GFX1250: v_mov_b64_e32 v[4:5], exec ; encoding: [0x7e,0x3a,0x08,0x7e] + +v_mov_b64 v[4:5], null +// GFX1250: v_mov_b64_e32 v[4:5], null ; encoding: [0x7c,0x3a,0x08,0x7e] + +v_mov_b64 v[4:5], -1 +// GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e] + +v_mov_b64 v[4:5], 0.5 +// GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e] + v_cvt_f32_bf16 v5, v1 // GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index 7b16c22b47acc..b0a879a6cd726 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -1,6 +1,30 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s +v_mov_b64_e32 v[4:5], v[2:3] +// GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e] + +v_mov_b64 v[4:5], v[254:255] +// GFX1250: v_mov_b64_e32 v[4:5], v[254:255] ; encoding: [0xfe,0x3b,0x08,0x7e] + +v_mov_b64 v[4:5], s[2:3] +// GFX1250: v_mov_b64_e32 v[4:5], s[2:3] ; encoding: [0x02,0x3a,0x08,0x7e] + +v_mov_b64 v[4:5], vcc +// GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e] + +v_mov_b64 v[4:5], exec +// GFX1250: v_mov_b64_e32 v[4:5], exec ; encoding: [0x7e,0x3a,0x08,0x7e] + +v_mov_b64 v[4:5], null +// GFX1250: v_mov_b64_e32 v[4:5], null ; encoding: [0x7c,0x3a,0x08,0x7e] + +v_mov_b64 v[4:5], -1 +// GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e] + +v_mov_b64 v[4:5], 0.5 +// GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e] + v_cvt_f32_bf16 v5, v1 // GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt index af583fe9697e4..1a87993fb8e0b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt @@ -2,6 +2,30 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s +0xc1,0x3a,0x08,0x7e +# GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e] + +0xf0,0x3a,0x08,0x7e +# GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e] + +0x7e,0x3a,0x08,0x7e +# GFX1250: v_mov_b64_e32 v[4:5], exec ; encoding: [0x7e,0x3a,0x08,0x7e] + +0x7c,0x3a,0x08,0x7e +# GFX1250: v_mov_b64_e32 v[4:5], null ; encoding: [0x7c,0x3a,0x08,0x7e] + +0x02,0x3a,0x08,0x7e +# GFX1250: v_mov_b64_e32 v[4:5], s[2:3] ; encoding: [0x02,0x3a,0x08,0x7e] + +0xfe,0x3b,0x08,0x7e +# GFX1250: v_mov_b64_e32 v[4:5], v[254:255] ; encoding: [0xfe,0x3b,0x08,0x7e] + +0x02,0x3b,0x08,0x7e +# GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e] + +0x6a,0x3a,0x08,0x7e +# GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e] + 0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00 # GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]