File Manager

003 File Manager

Current Path: /usr/src/contrib/llvm-project/llvm/lib/Target/AMDGPU

usr / src / contrib / llvm-project / llvm / lib / Target / AMDGPU /

📁 ..
📄 AMDGPU.h(11.46 KB)
📄 AMDGPU.td(36.97 KB)
📄 AMDGPUAliasAnalysis.cpp(5.58 KB)
📄 AMDGPUAliasAnalysis.h(3.32 KB)
📄 AMDGPUAlwaysInlinePass.cpp(4.83 KB)
📄 AMDGPUAnnotateKernelFeatures.cpp(11.94 KB)
📄 AMDGPUAnnotateUniformValues.cpp(6.13 KB)
📄 AMDGPUArgumentUsageInfo.cpp(7.66 KB)
📄 AMDGPUArgumentUsageInfo.h(4.81 KB)
📄 AMDGPUAsmPrinter.cpp(50.42 KB)
📄 AMDGPUAsmPrinter.h(5.13 KB)
📄 AMDGPUAtomicOptimizer.cpp(23.79 KB)
📄 AMDGPUCallLowering.cpp(28.66 KB)
📄 AMDGPUCallLowering.h(2.37 KB)
📄 AMDGPUCallingConv.td(7.33 KB)
📄 AMDGPUCodeGenPrepare.cpp(46.42 KB)
📄 AMDGPUCombine.td(2.79 KB)
📄 AMDGPUExportClustering.cpp(4.52 KB)
📄 AMDGPUExportClustering.h(533 B)
📄 AMDGPUFeatures.td(1.81 KB)
📄 AMDGPUFixFunctionBitcasts.cpp(1.87 KB)
📄 AMDGPUFrameLowering.cpp(1.98 KB)
📄 AMDGPUFrameLowering.h(1.39 KB)
📄 AMDGPUGISel.td(11.57 KB)
📄 AMDGPUGenRegisterBankInfo.def(5.83 KB)
📄 AMDGPUGlobalISelUtils.cpp(1.77 KB)
📄 AMDGPUGlobalISelUtils.h(2.07 KB)
📄 AMDGPUHSAMetadataStreamer.cpp(31.21 KB)
📄 AMDGPUHSAMetadataStreamer.h(5.46 KB)
📄 AMDGPUISelDAGToDAG.cpp(101.59 KB)
📄 AMDGPUISelLowering.cpp(168.65 KB)
📄 AMDGPUISelLowering.h(19.23 KB)
📄 AMDGPUInline.cpp(7.97 KB)
📄 AMDGPUInstrInfo.cpp(1.71 KB)
📄 AMDGPUInstrInfo.h(1.66 KB)
📄 AMDGPUInstrInfo.td(17.18 KB)
📄 AMDGPUInstructionSelector.cpp(128.53 KB)
📄 AMDGPUInstructionSelector.h(11.04 KB)
📄 AMDGPUInstructions.td(25.36 KB)
📄 AMDGPULegalizerInfo.cpp(149.32 KB)
📄 AMDGPULegalizerInfo.h(8.49 KB)
📄 AMDGPULibCalls.cpp(53.89 KB)
📄 AMDGPULibFunc.cpp(37.85 KB)
📄 AMDGPULibFunc.h(10.99 KB)
📄 AMDGPULowerIntrinsics.cpp(4.55 KB)
📄 AMDGPULowerKernelArguments.cpp(8.89 KB)
📄 AMDGPULowerKernelAttributes.cpp(7.78 KB)
📄 AMDGPUMCInstLower.cpp(14.27 KB)
📄 AMDGPUMachineCFGStructurizer.cpp(101.97 KB)
📄 AMDGPUMachineFunction.cpp(2.24 KB)
📄 AMDGPUMachineFunction.h(2.13 KB)
📄 AMDGPUMachineModuleInfo.cpp(1.34 KB)
📄 AMDGPUMachineModuleInfo.h(5.46 KB)
📄 AMDGPUMacroFusion.cpp(2.28 KB)
📄 AMDGPUMacroFusion.h(679 B)
📄 AMDGPUOpenCLEnqueuedBlockLowering.cpp(5.31 KB)
📄 AMDGPUPTNote.h(1.29 KB)
📄 AMDGPUPerfHintAnalysis.cpp(12.17 KB)
📄 AMDGPUPerfHintAnalysis.h(1.67 KB)
📄 AMDGPUPostLegalizerCombiner.cpp(12.02 KB)
📄 AMDGPUPreLegalizerCombiner.cpp(5.45 KB)
📄 AMDGPUPrintfRuntimeBinding.cpp(21.7 KB)
📄 AMDGPUPromoteAlloca.cpp(35.24 KB)
📄 AMDGPUPropagateAttributes.cpp(11.76 KB)
📄 AMDGPURegBankCombiner.cpp(5.36 KB)
📄 AMDGPURegisterBankInfo.cpp(161.67 KB)
📄 AMDGPURegisterBankInfo.h(7.41 KB)
📄 AMDGPURegisterBanks.td(921 B)
📄 AMDGPURewriteOutArguments.cpp(15.82 KB)
📄 AMDGPUSearchableTables.td(21.04 KB)
📄 AMDGPUSubtarget.cpp(29.62 KB)
📄 AMDGPUSubtarget.h(35.82 KB)
📄 AMDGPUTargetMachine.cpp(42.67 KB)
📄 AMDGPUTargetMachine.h(4.52 KB)
📄 AMDGPUTargetObjectFile.cpp(1.54 KB)
📄 AMDGPUTargetObjectFile.h(1.14 KB)
📄 AMDGPUTargetTransformInfo.cpp(39.07 KB)
📄 AMDGPUTargetTransformInfo.h(11.11 KB)
📄 AMDGPUUnifyDivergentExitNodes.cpp(13.84 KB)
📄 AMDGPUUnifyMetadata.cpp(4.46 KB)
📄 AMDILCFGStructurizer.cpp(56.32 KB)
📄 AMDKernelCodeT.h(32.84 KB)
📁 AsmParser
📄 BUFInstructions.td(110.75 KB)
📄 CaymanInstructions.td(7.93 KB)
📄 DSInstructions.td(52.37 KB)
📁 Disassembler
📄 EvergreenInstructions.td(28.24 KB)
📄 FLATInstructions.td(66.93 KB)
📄 GCNDPPCombine.cpp(19.92 KB)
📄 GCNHazardRecognizer.cpp(45.3 KB)
📄 GCNHazardRecognizer.h(3.96 KB)
📄 GCNILPSched.cpp(11.3 KB)
📄 GCNIterativeScheduler.cpp(20.62 KB)
📄 GCNIterativeScheduler.h(4.16 KB)
📄 GCNMinRegStrategy.cpp(8.47 KB)
📄 GCNNSAReassign.cpp(10.92 KB)
📄 GCNProcessors.td(4.84 KB)
📄 GCNRegBankReassign.cpp(26.68 KB)
📄 GCNRegPressure.cpp(16.27 KB)
📄 GCNRegPressure.h(9.15 KB)
📄 GCNSchedStrategy.cpp(21.67 KB)
📄 GCNSchedStrategy.h(3.77 KB)
📁 MCTargetDesc
📄 MIMGInstructions.td(39.85 KB)
📄 R600.td(1.51 KB)
📄 R600AsmPrinter.cpp(4.46 KB)
📄 R600AsmPrinter.h(1.5 KB)
📄 R600ClauseMergePass.cpp(7.38 KB)
📄 R600ControlFlowFinalizer.cpp(23.4 KB)
📄 R600Defines.h(4.25 KB)
📄 R600EmitClauseMarkers.cpp(12.1 KB)
📄 R600ExpandSpecialInstrs.cpp(10.11 KB)
📄 R600FrameLowering.cpp(1.83 KB)
📄 R600FrameLowering.h(1.25 KB)
📄 R600ISelLowering.cpp(81.88 KB)
📄 R600ISelLowering.h(4.8 KB)
📄 R600InstrFormats.td(11.58 KB)
📄 R600InstrInfo.cpp(49.47 KB)
📄 R600InstrInfo.h(13.7 KB)
📄 R600Instructions.td(55.13 KB)
📄 R600MachineFunctionInfo.cpp(551 B)
📄 R600MachineFunctionInfo.h(824 B)
📄 R600MachineScheduler.cpp(13.57 KB)
📄 R600MachineScheduler.h(2.53 KB)
📄 R600OpenCLImageTypeLoweringPass.cpp(11.75 KB)
📄 R600OptimizeVectorRegisters.cpp(13.4 KB)
📄 R600Packetizer.cpp(13.4 KB)
📄 R600Processors.td(4.42 KB)
📄 R600RegisterInfo.cpp(3.95 KB)
📄 R600RegisterInfo.h(2 KB)
📄 R600RegisterInfo.td(9.75 KB)
📄 R600Schedule.td(1.62 KB)
📄 R700Instructions.td(783 B)
📄 SIAddIMGInit.cpp(6.24 KB)
📄 SIAnnotateControlFlow.cpp(11.18 KB)
📄 SIDefines.h(20.86 KB)
📄 SIFixSGPRCopies.cpp(29.46 KB)
📄 SIFixVGPRCopies.cpp(2 KB)
📄 SIFixupVectorISel.cpp(8.75 KB)
📄 SIFoldOperands.cpp(54.56 KB)
📄 SIFormMemoryClauses.cpp(12.76 KB)
📄 SIFrameLowering.cpp(48.08 KB)
📄 SIFrameLowering.h(2.98 KB)
📄 SIISelLowering.cpp(423.43 KB)
📄 SIISelLowering.h(22.13 KB)
📄 SIInsertHardClauses.cpp(7.01 KB)
📄 SIInsertSkips.cpp(15.29 KB)
📄 SIInsertWaitcnts.cpp(58.33 KB)
📄 SIInstrFormats.td(9.44 KB)
📄 SIInstrInfo.cpp(247.15 KB)
📄 SIInstrInfo.h(41.24 KB)
📄 SIInstrInfo.td(90.7 KB)
📄 SIInstructions.td(77.7 KB)
📄 SILoadStoreOptimizer.cpp(76.21 KB)
📄 SILowerControlFlow.cpp(22.66 KB)
📄 SILowerI1Copies.cpp(27.83 KB)
📄 SILowerSGPRSpills.cpp(12.68 KB)
📄 SIMachineFunctionInfo.cpp(20.01 KB)
📄 SIMachineFunctionInfo.h(26.91 KB)
📄 SIMachineScheduler.cpp(69.44 KB)
📄 SIMachineScheduler.h(15.65 KB)
📄 SIMemoryLegalizer.cpp(45.84 KB)
📄 SIModeRegister.cpp(17.43 KB)
📄 SIOptimizeExecMasking.cpp(12.81 KB)
📄 SIOptimizeExecMaskingPreRA.cpp(11.13 KB)
📄 SIPeepholeSDWA.cpp(42.84 KB)
📄 SIPostRABundler.cpp(3.6 KB)
📄 SIPreAllocateWWMRegs.cpp(6.09 KB)
📄 SIPreEmitPeephole.cpp(10.51 KB)
📄 SIProgramInfo.h(2.04 KB)
📄 SIRegisterInfo.cpp(71.51 KB)
📄 SIRegisterInfo.h(13.04 KB)
📄 SIRegisterInfo.td(37.28 KB)
📄 SIRemoveShortExecBranches.cpp(4.96 KB)
📄 SISchedule.td(7.58 KB)
📄 SIShrinkInstructions.cpp(26.86 KB)
📄 SIWholeQuadMode.cpp(30.22 KB)
📄 SMInstructions.td(48.14 KB)
📄 SOPInstructions.td(60.51 KB)
📁 TargetInfo
📁 Utils
📄 VIInstrFormats.td(645 B)
📄 VOP1Instructions.td(35.53 KB)
📄 VOP2Instructions.td(65.04 KB)
📄 VOP3Instructions.td(53.14 KB)
📄 VOP3PInstructions.td(26.47 KB)
📄 VOPCInstructions.td(63.31 KB)
📄 VOPInstructions.td(23.76 KB)

Editing: GCNILPSched.cpp

//===---------------------------- GCNILPSched.cpp - -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
//
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"

using namespace llvm;

#define DEBUG_TYPE "machine-scheduler"

namespace {

class GCNILPScheduler {
  struct Candidate : ilist_node<Candidate> {
    SUnit *SU;

Candidate(SUnit *SU_)
      : SU(SU_) {}
  };

SpecificBumpPtrAllocator<Candidate> Alloc;
  typedef simple_ilist<Candidate> Queue;
  Queue PendingQueue;
  Queue AvailQueue;
  unsigned CurQueueId = 0;

std::vector<unsigned> SUNumbers;

/// CurCycle - The current scheduler state corresponds to this cycle.
  unsigned CurCycle = 0;

unsigned getNodePriority(const SUnit *SU) const;

const SUnit *pickBest(const SUnit *left, const SUnit *right);
  Candidate* pickCandidate();

void releasePending();
  void advanceToCycle(unsigned NextCycle);
  void releasePredecessors(const SUnit* SU);

public:
  std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
                                     const ScheduleDAG &DAG);
};
} // namespace

/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
/// Smaller number is the higher priority.
static unsigned
CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
  if (SethiUllmanNumber != 0)
    return SethiUllmanNumber;

unsigned Extra = 0;
  for (const SDep &Pred : SU->Preds) {
    if (Pred.isCtrl()) continue;  // ignore chain preds
    SUnit *PredSU = Pred.getSUnit();
    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
    if (PredSethiUllman > SethiUllmanNumber) {
      SethiUllmanNumber = PredSethiUllman;
      Extra = 0;
    }
    else if (PredSethiUllman == SethiUllmanNumber)
      ++Extra;
  }

SethiUllmanNumber += Extra;

if (SethiUllmanNumber == 0)
    SethiUllmanNumber = 1;

return SethiUllmanNumber;
}

// Lower priority means schedule further down. For bottom-up scheduling, lower
// priority SUs are scheduled before higher priority SUs.
unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
  assert(SU->NodeNum < SUNumbers.size());
  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
    // If SU does not have a register use, i.e. it doesn't produce a value
    // that would be consumed (e.g. store), then it terminates a chain of
    // computation.  Give it a large SethiUllman number so it will be
    // scheduled right before its predecessors that it doesn't lengthen
    // their live ranges.
    return 0xffff;

if (SU->NumPreds == 0 && SU->NumSuccs != 0)
    // If SU does not have a register def, schedule it close to its uses
    // because it does not lengthen any live ranges.
    return 0;

return SUNumbers[SU->NodeNum];
}

/// closestSucc - Returns the scheduled cycle of the successor which is
/// closest to the current cycle.
static unsigned closestSucc(const SUnit *SU) {
  unsigned MaxHeight = 0;
  for (const SDep &Succ : SU->Succs) {
    if (Succ.isCtrl()) continue;  // ignore chain succs
    unsigned Height = Succ.getSUnit()->getHeight();
    // If there are bunch of CopyToRegs stacked up, they should be considered
    // to be at the same position.
    if (Height > MaxHeight)
      MaxHeight = Height;
  }
  return MaxHeight;
}

/// calcMaxScratches - Returns an cost estimate of the worse case requirement
/// for scratch registers, i.e. number of data dependencies.
static unsigned calcMaxScratches(const SUnit *SU) {
  unsigned Scratches = 0;
  for (const SDep &Pred : SU->Preds) {
    if (Pred.isCtrl()) continue;  // ignore chain preds
    Scratches++;
  }
  return Scratches;
}

// Return -1 if left has higher priority, 1 if right has higher priority.
// Return 0 if latency-based priority is equivalent.
static int BUCompareLatency(const SUnit *left, const SUnit *right) {
  // Scheduling an instruction that uses a VReg whose postincrement has not yet
  // been scheduled will induce a copy. Model this as an extra cycle of latency.
  int LHeight = (int)left->getHeight();
  int RHeight = (int)right->getHeight();

// If either node is scheduling for latency, sort them by height/depth
  // and latency.

// If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
  // is enabled, grouping instructions by cycle, then its height is already
  // covered so only its depth matters. We also reach this point if both stall
  // but have the same height.
  if (LHeight != RHeight)
    return LHeight > RHeight ? 1 : -1;

int LDepth = left->getDepth();
  int RDepth = right->getDepth();
  if (LDepth != RDepth) {
    LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
                      << ") depth " << LDepth << " vs SU (" << right->NodeNum
                      << ") depth " << RDepth << "\n");
    return LDepth < RDepth ? 1 : -1;
  }
  if (left->Latency != right->Latency)
    return left->Latency > right->Latency ? 1 : -1;

return 0;
}

const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
{
  // TODO: add register pressure lowering checks

bool const DisableSchedCriticalPath = false;
  int MaxReorderWindow = 6;
  if (!DisableSchedCriticalPath) {
    int spread = (int)left->getDepth() - (int)right->getDepth();
    if (std::abs(spread) > MaxReorderWindow) {
      LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
                        << left->getDepth() << " != SU(" << right->NodeNum
                        << "): " << right->getDepth() << "\n");
      return left->getDepth() < right->getDepth() ? right : left;
    }
  }

bool const DisableSchedHeight = false;
  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
    int spread = (int)left->getHeight() - (int)right->getHeight();
    if (std::abs(spread) > MaxReorderWindow)
      return left->getHeight() > right->getHeight() ? right : left;
  }

// Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
  unsigned LPriority = getNodePriority(left);
  unsigned RPriority = getNodePriority(right);

if (LPriority != RPriority)
    return LPriority > RPriority ? right : left;

// Try schedule def + use closer when Sethi-Ullman numbers are the same.
  // e.g.
  // t1 = op t2, c1
  // t3 = op t4, c2
  //
  // and the following instructions are both ready.
  // t2 = op c3
  // t4 = op c4
  //
  // Then schedule t2 = op first.
  // i.e.
  // t4 = op c4
  // t2 = op c3
  // t1 = op t2, c1
  // t3 = op t4, c2
  //
  // This creates more short live intervals.
  unsigned LDist = closestSucc(left);
  unsigned RDist = closestSucc(right);
  if (LDist != RDist)
    return LDist < RDist ? right : left;

// How many registers becomes live when the node is scheduled.
  unsigned LScratch = calcMaxScratches(left);
  unsigned RScratch = calcMaxScratches(right);
  if (LScratch != RScratch)
    return LScratch > RScratch ? right : left;

bool const DisableSchedCycles = false;
  if (!DisableSchedCycles) {
    int result = BUCompareLatency(left, right);
    if (result != 0)
      return result > 0 ? right : left;
    return left;
  }
  else {
    if (left->getHeight() != right->getHeight())
      return (left->getHeight() > right->getHeight()) ? right : left;

if (left->getDepth() != right->getDepth())
      return (left->getDepth() < right->getDepth()) ? right : left;
  }

assert(left->NodeQueueId && right->NodeQueueId &&
        "NodeQueueId cannot be zero");
  return (left->NodeQueueId > right->NodeQueueId) ? right : left;
}

GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
  if (AvailQueue.empty())
    return nullptr;
  auto Best = AvailQueue.begin();
  for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
    auto NewBestSU = pickBest(Best->SU, I->SU);
    if (NewBestSU != Best->SU) {
      assert(NewBestSU == I->SU);
      Best = I;
    }
  }
  return &*Best;
}

void GCNILPScheduler::releasePending() {
  // Check to see if any of the pending instructions are ready to issue.  If
  // so, add them to the available queue.
  for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
    auto &C = *I++;
    if (C.SU->getHeight() <= CurCycle) {
      PendingQueue.remove(C);
      AvailQueue.push_back(C);
      C.SU->NodeQueueId = CurQueueId++;
    }
  }
}

/// Move the scheduler state forward by the specified number of Cycles.
void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
  if (NextCycle <= CurCycle)
    return;
  CurCycle = NextCycle;
  releasePending();
}

void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
  for (const auto &PredEdge : SU->Preds) {
    auto PredSU = PredEdge.getSUnit();
    if (PredEdge.isWeak())
      continue;
    assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);

PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());

if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
      PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
  }
}

std::vector<const SUnit*>
GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
                          const ScheduleDAG &DAG) {
  auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;

std::vector<SUnit> SUSavedCopy;
  SUSavedCopy.resize(SUnits.size());

// we cannot save only those fields we touch: some of them are private
  // so save units verbatim: this assumes SUnit should have value semantics
  for (const SUnit &SU : SUnits)
    SUSavedCopy[SU.NodeNum] = SU;

SUNumbers.assign(SUnits.size(), 0);
  for (const SUnit &SU : SUnits)
    CalcNodeSethiUllmanNumber(&SU, SUNumbers);

for (auto SU : BotRoots) {
    AvailQueue.push_back(
      *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
  }
  releasePredecessors(&DAG.ExitSU);

std::vector<const SUnit*> Schedule;
  Schedule.reserve(SUnits.size());
  while (true) {
    if (AvailQueue.empty() && !PendingQueue.empty()) {
      auto EarliestSU = std::min_element(
        PendingQueue.begin(), PendingQueue.end(),
        [=](const Candidate& C1, const Candidate& C2) {
        return C1.SU->getHeight() < C2.SU->getHeight();
      })->SU;
      advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
    }
    if (AvailQueue.empty())
      break;

LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
                         "Ready queue:";
               for (auto &C
                    : AvailQueue) dbgs()
               << ' ' << C.SU->NodeNum;
               dbgs() << '\n';);

auto C = pickCandidate();
    assert(C);
    AvailQueue.remove(*C);
    auto SU = C->SU;
    LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));

advanceToCycle(SU->getHeight());

releasePredecessors(SU);
    Schedule.push_back(SU);
    SU->isScheduled = true;
  }
  assert(SUnits.size() == Schedule.size());

std::reverse(Schedule.begin(), Schedule.end());

// restore units
  for (auto &SU : SUnits)
    SU = SUSavedCopy[SU.NodeNum];

return Schedule;
}

namespace llvm {
std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
                                              const ScheduleDAG &DAG) {
  GCNILPScheduler S;
  return S.schedule(BotRoots, DAG);
}
}

003 File Manager

Editing: GCNILPSched.cpp

Upload File

Create Folder