Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10635,30 +10635,30 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (!Def || Def->getParent() != CmpInstr.getParent())
return false;

bool CanOptimize = false;
const auto foldableSelect = [](MachineInstr *Def) -> bool {
if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
bool Op1IsNonZeroImm =
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
bool Op2IsZeroImm =
Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
if (Op1IsNonZeroImm && Op2IsZeroImm)
return true;
}
return false;
};

// For S_OP that set SCC = DST!=0, do the transformation
//
// s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
if (setsSCCifResultIsNonZero(*Def))
CanOptimize = true;

// s_cmp_lg_* is redundant because the SCC input value for S_CSELECT* has
// the same value that will be calculated by s_cmp_lg_*
// If foldableSelect, s_cmp_lg_* is redundant because the SCC input value
// for S_CSELECT* already has the same value that will be calculated by
// s_cmp_lg_*
//
// s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
// imm), 0)
if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
bool Op1IsNonZeroImm =
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
bool Op2IsZeroImm =
Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
if (Op1IsNonZeroImm && Op2IsZeroImm)
CanOptimize = true;
}

if (!CanOptimize)
if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
return false;

MachineInstr *KillsSCC = nullptr;
Expand Down
44 changes: 23 additions & 21 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -711,42 +711,44 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {

static bool setsSCCifResultIsNonZero(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::S_ABS_I32:
case AMDGPU::S_ABSDIFF_I32:
case AMDGPU::S_ABS_I32:
case AMDGPU::S_AND_B32:
case AMDGPU::S_AND_B64:
case AMDGPU::S_ANDN2_B32:
case AMDGPU::S_ANDN2_B64:
case AMDGPU::S_ASHR_I32:
case AMDGPU::S_ASHR_I64:
case AMDGPU::S_BCNT0_I32_B32:
case AMDGPU::S_BCNT0_I32_B64:
case AMDGPU::S_BCNT1_I32_B32:
case AMDGPU::S_BCNT1_I32_B64:
case AMDGPU::S_BFE_I32:
case AMDGPU::S_BFE_I64:
case AMDGPU::S_BFE_U32:
case AMDGPU::S_BFE_U64:
case AMDGPU::S_LSHL_B32:
case AMDGPU::S_LSHL_B64:
case AMDGPU::S_LSHR_B32:
case AMDGPU::S_LSHR_B64:
case AMDGPU::S_AND_B32:
case AMDGPU::S_AND_B64:
case AMDGPU::S_OR_B32:
case AMDGPU::S_OR_B64:
case AMDGPU::S_XOR_B32:
case AMDGPU::S_XOR_B64:
case AMDGPU::S_NOT_B32:
case AMDGPU::S_NOT_B64:
case AMDGPU::S_NAND_B32:
case AMDGPU::S_NAND_B64:
case AMDGPU::S_NOR_B32:
case AMDGPU::S_NOR_B64:
case AMDGPU::S_XNOR_B32:
case AMDGPU::S_XNOR_B64:
case AMDGPU::S_ANDN2_B32:
case AMDGPU::S_ANDN2_B64:
case AMDGPU::S_NOT_B32:
case AMDGPU::S_NOT_B64:
case AMDGPU::S_OR_B32:
case AMDGPU::S_OR_B64:
case AMDGPU::S_ORN2_B32:
case AMDGPU::S_ORN2_B64:
case AMDGPU::S_BFE_I32:
case AMDGPU::S_BFE_I64:
case AMDGPU::S_BFE_U32:
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BCNT0_I32_B32:
case AMDGPU::S_BCNT0_I32_B64:
case AMDGPU::S_BCNT1_I32_B32:
case AMDGPU::S_BCNT1_I32_B64:
case AMDGPU::S_QUADMASK_B32:
case AMDGPU::S_QUADMASK_B64:
case AMDGPU::S_WQM_B32:
case AMDGPU::S_WQM_B64:
case AMDGPU::S_XNOR_B32:
case AMDGPU::S_XNOR_B64:
case AMDGPU::S_XOR_B32:
case AMDGPU::S_XOR_B64:
return true;
default:
return false;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/s_cmp_0.ll
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ define amdgpu_ps i32 @not64(i64 inreg %val0) {
; Negative tests
; --------------------------------------------------------------------------------

@1 = external dso_local addrspace(4) constant i32
@1 = weak dso_local addrspace(4) constant i32 zeroinitializer

define amdgpu_ps i32 @si_pc_add_rel_offset_must_not_optimize() {
; CHECK-LABEL: si_pc_add_rel_offset_must_not_optimize:
Expand Down