003 File Manager
Current Path:
/usr/src/contrib/llvm-project/llvm/lib/Target/AMDGPU
usr
/
src
/
contrib
/
llvm-project
/
llvm
/
lib
/
Target
/
AMDGPU
/
📁
..
📄
AMDGPU.h
(11.46 KB)
📄
AMDGPU.td
(36.97 KB)
📄
AMDGPUAliasAnalysis.cpp
(5.58 KB)
📄
AMDGPUAliasAnalysis.h
(3.32 KB)
📄
AMDGPUAlwaysInlinePass.cpp
(4.83 KB)
📄
AMDGPUAnnotateKernelFeatures.cpp
(11.94 KB)
📄
AMDGPUAnnotateUniformValues.cpp
(6.13 KB)
📄
AMDGPUArgumentUsageInfo.cpp
(7.66 KB)
📄
AMDGPUArgumentUsageInfo.h
(4.81 KB)
📄
AMDGPUAsmPrinter.cpp
(50.42 KB)
📄
AMDGPUAsmPrinter.h
(5.13 KB)
📄
AMDGPUAtomicOptimizer.cpp
(23.79 KB)
📄
AMDGPUCallLowering.cpp
(28.66 KB)
📄
AMDGPUCallLowering.h
(2.37 KB)
📄
AMDGPUCallingConv.td
(7.33 KB)
📄
AMDGPUCodeGenPrepare.cpp
(46.42 KB)
📄
AMDGPUCombine.td
(2.79 KB)
📄
AMDGPUExportClustering.cpp
(4.52 KB)
📄
AMDGPUExportClustering.h
(533 B)
📄
AMDGPUFeatures.td
(1.81 KB)
📄
AMDGPUFixFunctionBitcasts.cpp
(1.87 KB)
📄
AMDGPUFrameLowering.cpp
(1.98 KB)
📄
AMDGPUFrameLowering.h
(1.39 KB)
📄
AMDGPUGISel.td
(11.57 KB)
📄
AMDGPUGenRegisterBankInfo.def
(5.83 KB)
📄
AMDGPUGlobalISelUtils.cpp
(1.77 KB)
📄
AMDGPUGlobalISelUtils.h
(2.07 KB)
📄
AMDGPUHSAMetadataStreamer.cpp
(31.21 KB)
📄
AMDGPUHSAMetadataStreamer.h
(5.46 KB)
📄
AMDGPUISelDAGToDAG.cpp
(101.59 KB)
📄
AMDGPUISelLowering.cpp
(168.65 KB)
📄
AMDGPUISelLowering.h
(19.23 KB)
📄
AMDGPUInline.cpp
(7.97 KB)
📄
AMDGPUInstrInfo.cpp
(1.71 KB)
📄
AMDGPUInstrInfo.h
(1.66 KB)
📄
AMDGPUInstrInfo.td
(17.18 KB)
📄
AMDGPUInstructionSelector.cpp
(128.53 KB)
📄
AMDGPUInstructionSelector.h
(11.04 KB)
📄
AMDGPUInstructions.td
(25.36 KB)
📄
AMDGPULegalizerInfo.cpp
(149.32 KB)
📄
AMDGPULegalizerInfo.h
(8.49 KB)
📄
AMDGPULibCalls.cpp
(53.89 KB)
📄
AMDGPULibFunc.cpp
(37.85 KB)
📄
AMDGPULibFunc.h
(10.99 KB)
📄
AMDGPULowerIntrinsics.cpp
(4.55 KB)
📄
AMDGPULowerKernelArguments.cpp
(8.89 KB)
📄
AMDGPULowerKernelAttributes.cpp
(7.78 KB)
📄
AMDGPUMCInstLower.cpp
(14.27 KB)
📄
AMDGPUMachineCFGStructurizer.cpp
(101.97 KB)
📄
AMDGPUMachineFunction.cpp
(2.24 KB)
📄
AMDGPUMachineFunction.h
(2.13 KB)
📄
AMDGPUMachineModuleInfo.cpp
(1.34 KB)
📄
AMDGPUMachineModuleInfo.h
(5.46 KB)
📄
AMDGPUMacroFusion.cpp
(2.28 KB)
📄
AMDGPUMacroFusion.h
(679 B)
📄
AMDGPUOpenCLEnqueuedBlockLowering.cpp
(5.31 KB)
📄
AMDGPUPTNote.h
(1.29 KB)
📄
AMDGPUPerfHintAnalysis.cpp
(12.17 KB)
📄
AMDGPUPerfHintAnalysis.h
(1.67 KB)
📄
AMDGPUPostLegalizerCombiner.cpp
(12.02 KB)
📄
AMDGPUPreLegalizerCombiner.cpp
(5.45 KB)
📄
AMDGPUPrintfRuntimeBinding.cpp
(21.7 KB)
📄
AMDGPUPromoteAlloca.cpp
(35.24 KB)
📄
AMDGPUPropagateAttributes.cpp
(11.76 KB)
📄
AMDGPURegBankCombiner.cpp
(5.36 KB)
📄
AMDGPURegisterBankInfo.cpp
(161.67 KB)
📄
AMDGPURegisterBankInfo.h
(7.41 KB)
📄
AMDGPURegisterBanks.td
(921 B)
📄
AMDGPURewriteOutArguments.cpp
(15.82 KB)
📄
AMDGPUSearchableTables.td
(21.04 KB)
📄
AMDGPUSubtarget.cpp
(29.62 KB)
📄
AMDGPUSubtarget.h
(35.82 KB)
📄
AMDGPUTargetMachine.cpp
(42.67 KB)
📄
AMDGPUTargetMachine.h
(4.52 KB)
📄
AMDGPUTargetObjectFile.cpp
(1.54 KB)
📄
AMDGPUTargetObjectFile.h
(1.14 KB)
📄
AMDGPUTargetTransformInfo.cpp
(39.07 KB)
📄
AMDGPUTargetTransformInfo.h
(11.11 KB)
📄
AMDGPUUnifyDivergentExitNodes.cpp
(13.84 KB)
📄
AMDGPUUnifyMetadata.cpp
(4.46 KB)
📄
AMDILCFGStructurizer.cpp
(56.32 KB)
📄
AMDKernelCodeT.h
(32.84 KB)
📁
AsmParser
📄
BUFInstructions.td
(110.75 KB)
📄
CaymanInstructions.td
(7.93 KB)
📄
DSInstructions.td
(52.37 KB)
📁
Disassembler
📄
EvergreenInstructions.td
(28.24 KB)
📄
FLATInstructions.td
(66.93 KB)
📄
GCNDPPCombine.cpp
(19.92 KB)
📄
GCNHazardRecognizer.cpp
(45.3 KB)
📄
GCNHazardRecognizer.h
(3.96 KB)
📄
GCNILPSched.cpp
(11.3 KB)
📄
GCNIterativeScheduler.cpp
(20.62 KB)
📄
GCNIterativeScheduler.h
(4.16 KB)
📄
GCNMinRegStrategy.cpp
(8.47 KB)
📄
GCNNSAReassign.cpp
(10.92 KB)
📄
GCNProcessors.td
(4.84 KB)
📄
GCNRegBankReassign.cpp
(26.68 KB)
📄
GCNRegPressure.cpp
(16.27 KB)
📄
GCNRegPressure.h
(9.15 KB)
📄
GCNSchedStrategy.cpp
(21.67 KB)
📄
GCNSchedStrategy.h
(3.77 KB)
📁
MCTargetDesc
📄
MIMGInstructions.td
(39.85 KB)
📄
R600.td
(1.51 KB)
📄
R600AsmPrinter.cpp
(4.46 KB)
📄
R600AsmPrinter.h
(1.5 KB)
📄
R600ClauseMergePass.cpp
(7.38 KB)
📄
R600ControlFlowFinalizer.cpp
(23.4 KB)
📄
R600Defines.h
(4.25 KB)
📄
R600EmitClauseMarkers.cpp
(12.1 KB)
📄
R600ExpandSpecialInstrs.cpp
(10.11 KB)
📄
R600FrameLowering.cpp
(1.83 KB)
📄
R600FrameLowering.h
(1.25 KB)
📄
R600ISelLowering.cpp
(81.88 KB)
📄
R600ISelLowering.h
(4.8 KB)
📄
R600InstrFormats.td
(11.58 KB)
📄
R600InstrInfo.cpp
(49.47 KB)
📄
R600InstrInfo.h
(13.7 KB)
📄
R600Instructions.td
(55.13 KB)
📄
R600MachineFunctionInfo.cpp
(551 B)
📄
R600MachineFunctionInfo.h
(824 B)
📄
R600MachineScheduler.cpp
(13.57 KB)
📄
R600MachineScheduler.h
(2.53 KB)
📄
R600OpenCLImageTypeLoweringPass.cpp
(11.75 KB)
📄
R600OptimizeVectorRegisters.cpp
(13.4 KB)
📄
R600Packetizer.cpp
(13.4 KB)
📄
R600Processors.td
(4.42 KB)
📄
R600RegisterInfo.cpp
(3.95 KB)
📄
R600RegisterInfo.h
(2 KB)
📄
R600RegisterInfo.td
(9.75 KB)
📄
R600Schedule.td
(1.62 KB)
📄
R700Instructions.td
(783 B)
📄
SIAddIMGInit.cpp
(6.24 KB)
📄
SIAnnotateControlFlow.cpp
(11.18 KB)
📄
SIDefines.h
(20.86 KB)
📄
SIFixSGPRCopies.cpp
(29.46 KB)
📄
SIFixVGPRCopies.cpp
(2 KB)
📄
SIFixupVectorISel.cpp
(8.75 KB)
📄
SIFoldOperands.cpp
(54.56 KB)
📄
SIFormMemoryClauses.cpp
(12.76 KB)
📄
SIFrameLowering.cpp
(48.08 KB)
📄
SIFrameLowering.h
(2.98 KB)
📄
SIISelLowering.cpp
(423.43 KB)
📄
SIISelLowering.h
(22.13 KB)
📄
SIInsertHardClauses.cpp
(7.01 KB)
📄
SIInsertSkips.cpp
(15.29 KB)
📄
SIInsertWaitcnts.cpp
(58.33 KB)
📄
SIInstrFormats.td
(9.44 KB)
📄
SIInstrInfo.cpp
(247.15 KB)
📄
SIInstrInfo.h
(41.24 KB)
📄
SIInstrInfo.td
(90.7 KB)
📄
SIInstructions.td
(77.7 KB)
📄
SILoadStoreOptimizer.cpp
(76.21 KB)
📄
SILowerControlFlow.cpp
(22.66 KB)
📄
SILowerI1Copies.cpp
(27.83 KB)
📄
SILowerSGPRSpills.cpp
(12.68 KB)
📄
SIMachineFunctionInfo.cpp
(20.01 KB)
📄
SIMachineFunctionInfo.h
(26.91 KB)
📄
SIMachineScheduler.cpp
(69.44 KB)
📄
SIMachineScheduler.h
(15.65 KB)
📄
SIMemoryLegalizer.cpp
(45.84 KB)
📄
SIModeRegister.cpp
(17.43 KB)
📄
SIOptimizeExecMasking.cpp
(12.81 KB)
📄
SIOptimizeExecMaskingPreRA.cpp
(11.13 KB)
📄
SIPeepholeSDWA.cpp
(42.84 KB)
📄
SIPostRABundler.cpp
(3.6 KB)
📄
SIPreAllocateWWMRegs.cpp
(6.09 KB)
📄
SIPreEmitPeephole.cpp
(10.51 KB)
📄
SIProgramInfo.h
(2.04 KB)
📄
SIRegisterInfo.cpp
(71.51 KB)
📄
SIRegisterInfo.h
(13.04 KB)
📄
SIRegisterInfo.td
(37.28 KB)
📄
SIRemoveShortExecBranches.cpp
(4.96 KB)
📄
SISchedule.td
(7.58 KB)
📄
SIShrinkInstructions.cpp
(26.86 KB)
📄
SIWholeQuadMode.cpp
(30.22 KB)
📄
SMInstructions.td
(48.14 KB)
📄
SOPInstructions.td
(60.51 KB)
📁
TargetInfo
📁
Utils
📄
VIInstrFormats.td
(645 B)
📄
VOP1Instructions.td
(35.53 KB)
📄
VOP2Instructions.td
(65.04 KB)
📄
VOP3Instructions.td
(53.14 KB)
📄
VOP3PInstructions.td
(26.47 KB)
📄
VOPCInstructions.td
(63.31 KB)
📄
VOPInstructions.td
(23.76 KB)
Editing: GCNILPSched.cpp
//===---------------------------- GCNILPSched.cpp - -----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "machine-scheduler" namespace { class GCNILPScheduler { struct Candidate : ilist_node<Candidate> { SUnit *SU; Candidate(SUnit *SU_) : SU(SU_) {} }; SpecificBumpPtrAllocator<Candidate> Alloc; typedef simple_ilist<Candidate> Queue; Queue PendingQueue; Queue AvailQueue; unsigned CurQueueId = 0; std::vector<unsigned> SUNumbers; /// CurCycle - The current scheduler state corresponds to this cycle. unsigned CurCycle = 0; unsigned getNodePriority(const SUnit *SU) const; const SUnit *pickBest(const SUnit *left, const SUnit *right); Candidate* pickCandidate(); void releasePending(); void advanceToCycle(unsigned NextCycle); void releasePredecessors(const SUnit* SU); public: std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots, const ScheduleDAG &DAG); }; } // namespace /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. /// Smaller number is the higher priority. static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; if (SethiUllmanNumber != 0) return SethiUllmanNumber; unsigned Extra = 0; for (const SDep &Pred : SU->Preds) { if (Pred.isCtrl()) continue; // ignore chain preds SUnit *PredSU = Pred.getSUnit(); unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); if (PredSethiUllman > SethiUllmanNumber) { SethiUllmanNumber = PredSethiUllman; Extra = 0; } else if (PredSethiUllman == SethiUllmanNumber) ++Extra; } SethiUllmanNumber += Extra; if (SethiUllmanNumber == 0) SethiUllmanNumber = 1; return SethiUllmanNumber; } // Lower priority means schedule further down. For bottom-up scheduling, lower // priority SUs are scheduled before higher priority SUs. unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const { assert(SU->NodeNum < SUNumbers.size()); if (SU->NumSuccs == 0 && SU->NumPreds != 0) // If SU does not have a register use, i.e. it doesn't produce a value // that would be consumed (e.g. store), then it terminates a chain of // computation. Give it a large SethiUllman number so it will be // scheduled right before its predecessors that it doesn't lengthen // their live ranges. return 0xffff; if (SU->NumPreds == 0 && SU->NumSuccs != 0) // If SU does not have a register def, schedule it close to its uses // because it does not lengthen any live ranges. return 0; return SUNumbers[SU->NodeNum]; } /// closestSucc - Returns the scheduled cycle of the successor which is /// closest to the current cycle. static unsigned closestSucc(const SUnit *SU) { unsigned MaxHeight = 0; for (const SDep &Succ : SU->Succs) { if (Succ.isCtrl()) continue; // ignore chain succs unsigned Height = Succ.getSUnit()->getHeight(); // If there are bunch of CopyToRegs stacked up, they should be considered // to be at the same position. if (Height > MaxHeight) MaxHeight = Height; } return MaxHeight; } /// calcMaxScratches - Returns an cost estimate of the worse case requirement /// for scratch registers, i.e. number of data dependencies. static unsigned calcMaxScratches(const SUnit *SU) { unsigned Scratches = 0; for (const SDep &Pred : SU->Preds) { if (Pred.isCtrl()) continue; // ignore chain preds Scratches++; } return Scratches; } // Return -1 if left has higher priority, 1 if right has higher priority. // Return 0 if latency-based priority is equivalent. static int BUCompareLatency(const SUnit *left, const SUnit *right) { // Scheduling an instruction that uses a VReg whose postincrement has not yet // been scheduled will induce a copy. Model this as an extra cycle of latency. int LHeight = (int)left->getHeight(); int RHeight = (int)right->getHeight(); // If either node is scheduling for latency, sort them by height/depth // and latency. // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer // is enabled, grouping instructions by cycle, then its height is already // covered so only its depth matters. We also reach this point if both stall // but have the same height. if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; int LDepth = left->getDepth(); int RDepth = right->getDepth(); if (LDepth != RDepth) { LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum << ") depth " << LDepth << " vs SU (" << right->NodeNum << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } if (left->Latency != right->Latency) return left->Latency > right->Latency ? 1 : -1; return 0; } const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right) { // TODO: add register pressure lowering checks bool const DisableSchedCriticalPath = false; int MaxReorderWindow = 6; if (!DisableSchedCriticalPath) { int spread = (int)left->getDepth() - (int)right->getDepth(); if (std::abs(spread) > MaxReorderWindow) { LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " << left->getDepth() << " != SU(" << right->NodeNum << "): " << right->getDepth() << "\n"); return left->getDepth() < right->getDepth() ? right : left; } } bool const DisableSchedHeight = false; if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { int spread = (int)left->getHeight() - (int)right->getHeight(); if (std::abs(spread) > MaxReorderWindow) return left->getHeight() > right->getHeight() ? right : left; } // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. unsigned LPriority = getNodePriority(left); unsigned RPriority = getNodePriority(right); if (LPriority != RPriority) return LPriority > RPriority ? right : left; // Try schedule def + use closer when Sethi-Ullman numbers are the same. // e.g. // t1 = op t2, c1 // t3 = op t4, c2 // // and the following instructions are both ready. // t2 = op c3 // t4 = op c4 // // Then schedule t2 = op first. // i.e. // t4 = op c4 // t2 = op c3 // t1 = op t2, c1 // t3 = op t4, c2 // // This creates more short live intervals. unsigned LDist = closestSucc(left); unsigned RDist = closestSucc(right); if (LDist != RDist) return LDist < RDist ? right : left; // How many registers becomes live when the node is scheduled. unsigned LScratch = calcMaxScratches(left); unsigned RScratch = calcMaxScratches(right); if (LScratch != RScratch) return LScratch > RScratch ? right : left; bool const DisableSchedCycles = false; if (!DisableSchedCycles) { int result = BUCompareLatency(left, right); if (result != 0) return result > 0 ? right : left; return left; } else { if (left->getHeight() != right->getHeight()) return (left->getHeight() > right->getHeight()) ? right : left; if (left->getDepth() != right->getDepth()) return (left->getDepth() < right->getDepth()) ? right : left; } assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); return (left->NodeQueueId > right->NodeQueueId) ? right : left; } GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() { if (AvailQueue.empty()) return nullptr; auto Best = AvailQueue.begin(); for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) { auto NewBestSU = pickBest(Best->SU, I->SU); if (NewBestSU != Best->SU) { assert(NewBestSU == I->SU); Best = I; } } return &*Best; } void GCNILPScheduler::releasePending() { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) { auto &C = *I++; if (C.SU->getHeight() <= CurCycle) { PendingQueue.remove(C); AvailQueue.push_back(C); C.SU->NodeQueueId = CurQueueId++; } } } /// Move the scheduler state forward by the specified number of Cycles. void GCNILPScheduler::advanceToCycle(unsigned NextCycle) { if (NextCycle <= CurCycle) return; CurCycle = NextCycle; releasePending(); } void GCNILPScheduler::releasePredecessors(const SUnit* SU) { for (const auto &PredEdge : SU->Preds) { auto PredSU = PredEdge.getSUnit(); if (PredEdge.isWeak()) continue; assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0); PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency()); if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0) PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU)); } } std::vector<const SUnit*> GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots, const ScheduleDAG &DAG) { auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits; std::vector<SUnit> SUSavedCopy; SUSavedCopy.resize(SUnits.size()); // we cannot save only those fields we touch: some of them are private // so save units verbatim: this assumes SUnit should have value semantics for (const SUnit &SU : SUnits) SUSavedCopy[SU.NodeNum] = SU; SUNumbers.assign(SUnits.size(), 0); for (const SUnit &SU : SUnits) CalcNodeSethiUllmanNumber(&SU, SUNumbers); for (auto SU : BotRoots) { AvailQueue.push_back( *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU))); } releasePredecessors(&DAG.ExitSU); std::vector<const SUnit*> Schedule; Schedule.reserve(SUnits.size()); while (true) { if (AvailQueue.empty() && !PendingQueue.empty()) { auto EarliestSU = std::min_element( PendingQueue.begin(), PendingQueue.end(), [=](const Candidate& C1, const Candidate& C2) { return C1.SU->getHeight() < C2.SU->getHeight(); })->SU; advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight())); } if (AvailQueue.empty()) break; LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n" "Ready queue:"; for (auto &C : AvailQueue) dbgs() << ' ' << C.SU->NodeNum; dbgs() << '\n';); auto C = pickCandidate(); assert(C); AvailQueue.remove(*C); auto SU = C->SU; LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU)); advanceToCycle(SU->getHeight()); releasePredecessors(SU); Schedule.push_back(SU); SU->isScheduled = true; } assert(SUnits.size() == Schedule.size()); std::reverse(Schedule.begin(), Schedule.end()); // restore units for (auto &SU : SUnits) SU = SUSavedCopy[SU.NodeNum]; return Schedule; } namespace llvm { std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots, const ScheduleDAG &DAG) { GCNILPScheduler S; return S.schedule(BotRoots, DAG); } }
Upload File
Create Folder