Skip to content

[AMDGPU] gfx1250: MC support for 64-bit literals #147861

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,12 @@ def FeaturePointSampleAccel : SubtargetFeature<"point-sample-accel",
"Has point sample acceleration feature"
>;

def Feature64BitLiterals : SubtargetFeature<"64-bit-literals",
"Has64BitLiterals",
"true",
"Can use 64-bit literals with single DWORD instructions"
>;

def FeatureWaitXcnt : SubtargetFeature<"wait-xcnt",
"HasWaitXcnt",
"true",
Expand Down Expand Up @@ -1931,6 +1937,7 @@ def FeatureISAVersion12_50 : FeatureSet<
[FeatureGFX12,
FeatureGFX1250Insts,
FeatureCuMode,
Feature64BitLiterals,
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureFmacF64Inst,
Expand Down Expand Up @@ -2305,6 +2312,10 @@ def isNotGFX1250Plus :
Predicate<"!Subtarget->hasGFX1250Insts()">,
AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>;

def isGFX940orGFX1250 :
Predicate<"Subtarget->hasGFX940Insts() || Subtarget->hasGFX1250Insts()">,
AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX1250Insts)>;

def HasIEEEMinimumMaximumInsts :
Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">,
AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>;
Expand Down Expand Up @@ -2674,6 +2685,9 @@ def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
def HasBVHDualAndBVH8Insts : Predicate<"Subtarget->hasBVHDualAndBVH8Insts()">,
AssemblerPredicate<(all_of FeatureBVHDualAndBVH8Insts)>;

def Has64BitLiterals : Predicate<"Subtarget->has64BitLiterals()">,
AssemblerPredicate<(all_of Feature64BitLiterals)>;

def HasWaitXcnt : Predicate<"Subtarget->hasWaitXcnt()">,
AssemblerPredicate<(all_of FeatureWaitXcnt)>;

Expand Down
126 changes: 95 additions & 31 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool Neg = false;
bool Sext = false;
bool Lit = false;
bool Lit64 = false;

bool hasFPModifiers() const { return Abs || Neg; }
bool hasIntModifiers() const { return Sext; }
Expand Down Expand Up @@ -480,7 +481,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isSSrc_b64() const {
// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
// See isVSrc64().
return isSCSrc_b64() || isLiteralImm(MVT::i64);
return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
(((const MCTargetAsmParser *)AsmParser)
->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
isExpr());
}

bool isSSrc_f32() const {
Expand Down Expand Up @@ -1537,6 +1541,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}

bool has64BitLiterals() const {
return getFeatureBits()[AMDGPU::Feature64BitLiterals];
}

bool hasFlatOffsets() const {
return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
}
Expand Down Expand Up @@ -1663,10 +1671,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
bool parseSP3NegModifier();
ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
bool HasLit = false);
bool HasLit = false, bool HasLit64 = false);
ParseStatus parseReg(OperandVector &Operands);
ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
bool HasLit = false);
bool HasLit = false, bool HasLit64 = false);
ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
bool AllowImm = true);
ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
Expand Down Expand Up @@ -2123,6 +2131,9 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
return false;
}

bool Allow64Bit =
(type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();

if (!Imm.IsFPImm) {
// We got int literal token.

Expand All @@ -2134,8 +2145,11 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
}

unsigned Size = type.getSizeInBits();
if (Size == 64)
if (Size == 64) {
if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
return true;
Size = 32;
}

// FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
// types.
Expand Down Expand Up @@ -2287,12 +2301,18 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
}

// Non-inlineable
if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
if (AMDGPU::isSISrcFPOperand(InstDesc,
OpNum)) { // Expected 64-bit fp operand
bool HasMandatoryLiteral =
AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
// For fp operands we check if low 32 bits are zeros
if (Literal.getLoBits(32) != 0) {
const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
"Can't encode literal as exact 64-bit floating-point operand. "
"Low 32-bits will be set to zero");
if (Literal.getLoBits(32) != 0 &&
(InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
!HasMandatoryLiteral) {
const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
Inst.getLoc(),
"Can't encode literal as exact 64-bit floating-point operand. "
"Low 32-bits will be set to zero");
Val &= 0xffffffff00000000u;
}

Expand Down Expand Up @@ -2392,8 +2412,25 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;

case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
setImmKindConst();
return;
}

// When the 32 MSBs are not zero (effectively means it can't be safely
// truncated to uint32_t), if the target doesn't support 64-bit literals, or
// the lit modifier is explicitly used, we need to truncate it to the 32
// LSBs.
if (!AsmParser->has64BitLiterals() || getModifiers().Lit)
Val = Lo_32(Val);

Inst.addOperand(MCOperand::createImm(Val));
setImmKindLiteral();
return;

case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Expand All @@ -2402,8 +2439,20 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;
}

Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
: Lo_32(Val);
// If the target doesn't support 64-bit literals, we need to use the
// constant as the high 32 MSBs of a double-precision floating point value.
if (!AsmParser->has64BitLiterals()) {
Val = static_cast<uint64_t>(Val) << 32;
} else {
// Now the target does support 64-bit literals, there are two cases
// where we still want to use src_literal encoding:
// 1) explicitly forced by using lit modifier;
// 2) the value is a valid 32-bit representation (signed or unsigned),
// meanwhile not forced by lit64 modifier.
if (getModifiers().Lit ||
(!getModifiers().Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
Val = static_cast<uint64_t>(Val) << 32;
}

Inst.addOperand(MCOperand::createImm(Val));
setImmKindLiteral();
Expand Down Expand Up @@ -3151,19 +3200,20 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
}

ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
bool HasSP3AbsModifier, bool HasLit) {
bool HasSP3AbsModifier, bool HasLit,
bool HasLit64) {
// TODO: add syntactic sugar for 1/(2*PI)

if (isRegister())
if (isRegister() || isModifier())
return ParseStatus::NoMatch;
assert(!isModifier());

if (!HasLit) {
HasLit = trySkipId("lit");
if (HasLit) {
if (!HasLit && !HasLit64) {
HasLit64 = trySkipId("lit64");
HasLit = !HasLit64 && trySkipId("lit");
if (HasLit || HasLit64) {
if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
return ParseStatus::Failure;
ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit, HasLit64);
if (S.isSuccess() &&
!skipToken(AsmToken::RParen, "expected closing parentheses"))
return ParseStatus::Failure;
Expand All @@ -3185,6 +3235,7 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,

AMDGPUOperand::Modifiers Mods;
Mods.Lit = HasLit;
Mods.Lit64 = HasLit64;

if (IsReal) {
// Floating-point expressions are not supported.
Expand Down Expand Up @@ -3235,7 +3286,7 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
Op.setModifiers(Mods);
} else {
if (HasLit)
if (HasLit || HasLit64)
return ParseStatus::NoMatch;
Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
}
Expand All @@ -3259,13 +3310,14 @@ ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
}

ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
bool HasSP3AbsMod, bool HasLit) {
bool HasSP3AbsMod, bool HasLit,
bool HasLit64) {
ParseStatus Res = parseReg(Operands);
if (!Res.isNoMatch())
return Res;
if (isModifier())
return ParseStatus::NoMatch;
return parseImm(Operands, HasSP3AbsMod, HasLit);
return parseImm(Operands, HasSP3AbsMod, HasLit, HasLit64);
}

bool
Expand Down Expand Up @@ -3361,7 +3413,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
bool AllowImm) {
bool Neg, SP3Neg;
bool Abs, SP3Abs;
bool Lit;
bool Lit64, Lit;
SMLoc Loc;

// Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
Expand All @@ -3381,7 +3433,15 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
return ParseStatus::Failure;

Lit = trySkipId("lit");
Lit64 = trySkipId("lit64");
if (Lit64) {
if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
return ParseStatus::Failure;
if (!has64BitLiterals())
return Error(Loc, "lit64 is not supported on this GPU");
}

Lit = !Lit64 && trySkipId("lit");
if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
return ParseStatus::Failure;

Expand All @@ -3392,14 +3452,16 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,

ParseStatus Res;
if (AllowImm) {
Res = parseRegOrImm(Operands, SP3Abs, Lit);
Res = parseRegOrImm(Operands, SP3Abs, Lit, Lit64);
} else {
Res = parseReg(Operands);
}
if (!Res.isSuccess())
return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64)
? ParseStatus::Failure
: Res;

if (Lit && !Operands.back()->isImm())
if ((Lit || Lit64) && !Operands.back()->isImm())
Error(Loc, "expected immediate with lit modifier");

if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
Expand All @@ -3408,15 +3470,17 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
return ParseStatus::Failure;
if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
return ParseStatus::Failure;
if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
if ((Lit || Lit64) &&
!skipToken(AsmToken::RParen, "expected closing parentheses"))
return ParseStatus::Failure;

AMDGPUOperand::Modifiers Mods;
Mods.Abs = Abs || SP3Abs;
Mods.Neg = Neg || SP3Neg;
Mods.Lit = Lit;
Mods.Lit64 = Lit64;

if (Mods.hasFPModifiers() || Lit) {
if (Mods.hasFPModifiers() || Lit || Lit64) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
if (Op.isExpr())
return Error(Op.getStartLoc(), "expected an absolute expression");
Expand Down Expand Up @@ -4588,7 +4652,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {

unsigned NumExprs = 0;
unsigned NumLiterals = 0;
uint32_t LiteralValue;
uint64_t LiteralValue;

for (int OpIdx : OpIndices) {
if (OpIdx == -1) break;
Expand All @@ -4597,7 +4661,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
// Exclude special imm operands (like that used by s_set_gpr_idx_on)
if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
uint32_t Value = static_cast<uint32_t>(MO.getImm());
uint64_t Value = static_cast<uint64_t>(MO.getImm());
if (NumLiterals == 0 || LiteralValue != Value) {
LiteralValue = Value;
++NumLiterals;
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1484,6 +1484,20 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
}

MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const {
assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));

if (!HasLiteral) {
if (Bytes.size() < 8) {
return errOperand(0, "cannot read literal64, inst bytes left " +
Twine(Bytes.size()));
}
HasLiteral = true;
Literal64 = eatBytes<uint64_t>(Bytes);
}
return MCOperand::createImm(Literal64);
}

MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
using namespace AMDGPU::EncValues;

Expand Down Expand Up @@ -1767,6 +1781,10 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(unsigned Width,
Val == LITERAL_CONST)
return MCOperand::createImm(Val);

if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
return decodeLiteral64Constant();
}

switch (Width) {
case 32:
case 16:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ class AMDGPUDisassembler : public MCDisassembler {

MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
MCOperand decodeLiteralConstant(bool ExtendFP64) const;
MCOperand decodeLiteral64Constant() const;

MCOperand decodeSrcOp(unsigned Width, unsigned Val) const;

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasSALUFloatInsts = false;
bool HasPseudoScalarTrans = false;
bool HasRestrictedSOffset = false;
bool Has64BitLiterals = false;
bool HasBitOp3Insts = false;
bool HasTransposeLoadF4F6Insts = false;
bool HasPrngInst = false;
Expand Down Expand Up @@ -1384,6 +1385,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// GFX1250.
bool hasWaitXCnt() const { return HasWaitXcnt; }

// A single DWORD instructions can use a 64-bit literal.
bool has64BitLiterals() const { return Has64BitLiterals; }

bool hasPointSampleAccel() const { return HasPointSampleAccel; }

bool hasLdsBarrierArriveAtomic() const { return HasLdsBarrierArriveAtomic; }
Expand Down
Loading
Loading