003 File Manager
Current Path:
/usr/src/contrib/llvm-project/llvm/lib/Target/AMDGPU
usr
/
src
/
contrib
/
llvm-project
/
llvm
/
lib
/
Target
/
AMDGPU
/
📁
..
📄
AMDGPU.h
(11.46 KB)
📄
AMDGPU.td
(36.97 KB)
📄
AMDGPUAliasAnalysis.cpp
(5.58 KB)
📄
AMDGPUAliasAnalysis.h
(3.32 KB)
📄
AMDGPUAlwaysInlinePass.cpp
(4.83 KB)
📄
AMDGPUAnnotateKernelFeatures.cpp
(11.94 KB)
📄
AMDGPUAnnotateUniformValues.cpp
(6.13 KB)
📄
AMDGPUArgumentUsageInfo.cpp
(7.66 KB)
📄
AMDGPUArgumentUsageInfo.h
(4.81 KB)
📄
AMDGPUAsmPrinter.cpp
(50.42 KB)
📄
AMDGPUAsmPrinter.h
(5.13 KB)
📄
AMDGPUAtomicOptimizer.cpp
(23.79 KB)
📄
AMDGPUCallLowering.cpp
(28.66 KB)
📄
AMDGPUCallLowering.h
(2.37 KB)
📄
AMDGPUCallingConv.td
(7.33 KB)
📄
AMDGPUCodeGenPrepare.cpp
(46.42 KB)
📄
AMDGPUCombine.td
(2.79 KB)
📄
AMDGPUExportClustering.cpp
(4.52 KB)
📄
AMDGPUExportClustering.h
(533 B)
📄
AMDGPUFeatures.td
(1.81 KB)
📄
AMDGPUFixFunctionBitcasts.cpp
(1.87 KB)
📄
AMDGPUFrameLowering.cpp
(1.98 KB)
📄
AMDGPUFrameLowering.h
(1.39 KB)
📄
AMDGPUGISel.td
(11.57 KB)
📄
AMDGPUGenRegisterBankInfo.def
(5.83 KB)
📄
AMDGPUGlobalISelUtils.cpp
(1.77 KB)
📄
AMDGPUGlobalISelUtils.h
(2.07 KB)
📄
AMDGPUHSAMetadataStreamer.cpp
(31.21 KB)
📄
AMDGPUHSAMetadataStreamer.h
(5.46 KB)
📄
AMDGPUISelDAGToDAG.cpp
(101.59 KB)
📄
AMDGPUISelLowering.cpp
(168.65 KB)
📄
AMDGPUISelLowering.h
(19.23 KB)
📄
AMDGPUInline.cpp
(7.97 KB)
📄
AMDGPUInstrInfo.cpp
(1.71 KB)
📄
AMDGPUInstrInfo.h
(1.66 KB)
📄
AMDGPUInstrInfo.td
(17.18 KB)
📄
AMDGPUInstructionSelector.cpp
(128.53 KB)
📄
AMDGPUInstructionSelector.h
(11.04 KB)
📄
AMDGPUInstructions.td
(25.36 KB)
📄
AMDGPULegalizerInfo.cpp
(149.32 KB)
📄
AMDGPULegalizerInfo.h
(8.49 KB)
📄
AMDGPULibCalls.cpp
(53.89 KB)
📄
AMDGPULibFunc.cpp
(37.85 KB)
📄
AMDGPULibFunc.h
(10.99 KB)
📄
AMDGPULowerIntrinsics.cpp
(4.55 KB)
📄
AMDGPULowerKernelArguments.cpp
(8.89 KB)
📄
AMDGPULowerKernelAttributes.cpp
(7.78 KB)
📄
AMDGPUMCInstLower.cpp
(14.27 KB)
📄
AMDGPUMachineCFGStructurizer.cpp
(101.97 KB)
📄
AMDGPUMachineFunction.cpp
(2.24 KB)
📄
AMDGPUMachineFunction.h
(2.13 KB)
📄
AMDGPUMachineModuleInfo.cpp
(1.34 KB)
📄
AMDGPUMachineModuleInfo.h
(5.46 KB)
📄
AMDGPUMacroFusion.cpp
(2.28 KB)
📄
AMDGPUMacroFusion.h
(679 B)
📄
AMDGPUOpenCLEnqueuedBlockLowering.cpp
(5.31 KB)
📄
AMDGPUPTNote.h
(1.29 KB)
📄
AMDGPUPerfHintAnalysis.cpp
(12.17 KB)
📄
AMDGPUPerfHintAnalysis.h
(1.67 KB)
📄
AMDGPUPostLegalizerCombiner.cpp
(12.02 KB)
📄
AMDGPUPreLegalizerCombiner.cpp
(5.45 KB)
📄
AMDGPUPrintfRuntimeBinding.cpp
(21.7 KB)
📄
AMDGPUPromoteAlloca.cpp
(35.24 KB)
📄
AMDGPUPropagateAttributes.cpp
(11.76 KB)
📄
AMDGPURegBankCombiner.cpp
(5.36 KB)
📄
AMDGPURegisterBankInfo.cpp
(161.67 KB)
📄
AMDGPURegisterBankInfo.h
(7.41 KB)
📄
AMDGPURegisterBanks.td
(921 B)
📄
AMDGPURewriteOutArguments.cpp
(15.82 KB)
📄
AMDGPUSearchableTables.td
(21.04 KB)
📄
AMDGPUSubtarget.cpp
(29.62 KB)
📄
AMDGPUSubtarget.h
(35.82 KB)
📄
AMDGPUTargetMachine.cpp
(42.67 KB)
📄
AMDGPUTargetMachine.h
(4.52 KB)
📄
AMDGPUTargetObjectFile.cpp
(1.54 KB)
📄
AMDGPUTargetObjectFile.h
(1.14 KB)
📄
AMDGPUTargetTransformInfo.cpp
(39.07 KB)
📄
AMDGPUTargetTransformInfo.h
(11.11 KB)
📄
AMDGPUUnifyDivergentExitNodes.cpp
(13.84 KB)
📄
AMDGPUUnifyMetadata.cpp
(4.46 KB)
📄
AMDILCFGStructurizer.cpp
(56.32 KB)
📄
AMDKernelCodeT.h
(32.84 KB)
📁
AsmParser
📄
BUFInstructions.td
(110.75 KB)
📄
CaymanInstructions.td
(7.93 KB)
📄
DSInstructions.td
(52.37 KB)
📁
Disassembler
📄
EvergreenInstructions.td
(28.24 KB)
📄
FLATInstructions.td
(66.93 KB)
📄
GCNDPPCombine.cpp
(19.92 KB)
📄
GCNHazardRecognizer.cpp
(45.3 KB)
📄
GCNHazardRecognizer.h
(3.96 KB)
📄
GCNILPSched.cpp
(11.3 KB)
📄
GCNIterativeScheduler.cpp
(20.62 KB)
📄
GCNIterativeScheduler.h
(4.16 KB)
📄
GCNMinRegStrategy.cpp
(8.47 KB)
📄
GCNNSAReassign.cpp
(10.92 KB)
📄
GCNProcessors.td
(4.84 KB)
📄
GCNRegBankReassign.cpp
(26.68 KB)
📄
GCNRegPressure.cpp
(16.27 KB)
📄
GCNRegPressure.h
(9.15 KB)
📄
GCNSchedStrategy.cpp
(21.67 KB)
📄
GCNSchedStrategy.h
(3.77 KB)
📁
MCTargetDesc
📄
MIMGInstructions.td
(39.85 KB)
📄
R600.td
(1.51 KB)
📄
R600AsmPrinter.cpp
(4.46 KB)
📄
R600AsmPrinter.h
(1.5 KB)
📄
R600ClauseMergePass.cpp
(7.38 KB)
📄
R600ControlFlowFinalizer.cpp
(23.4 KB)
📄
R600Defines.h
(4.25 KB)
📄
R600EmitClauseMarkers.cpp
(12.1 KB)
📄
R600ExpandSpecialInstrs.cpp
(10.11 KB)
📄
R600FrameLowering.cpp
(1.83 KB)
📄
R600FrameLowering.h
(1.25 KB)
📄
R600ISelLowering.cpp
(81.88 KB)
📄
R600ISelLowering.h
(4.8 KB)
📄
R600InstrFormats.td
(11.58 KB)
📄
R600InstrInfo.cpp
(49.47 KB)
📄
R600InstrInfo.h
(13.7 KB)
📄
R600Instructions.td
(55.13 KB)
📄
R600MachineFunctionInfo.cpp
(551 B)
📄
R600MachineFunctionInfo.h
(824 B)
📄
R600MachineScheduler.cpp
(13.57 KB)
📄
R600MachineScheduler.h
(2.53 KB)
📄
R600OpenCLImageTypeLoweringPass.cpp
(11.75 KB)
📄
R600OptimizeVectorRegisters.cpp
(13.4 KB)
📄
R600Packetizer.cpp
(13.4 KB)
📄
R600Processors.td
(4.42 KB)
📄
R600RegisterInfo.cpp
(3.95 KB)
📄
R600RegisterInfo.h
(2 KB)
📄
R600RegisterInfo.td
(9.75 KB)
📄
R600Schedule.td
(1.62 KB)
📄
R700Instructions.td
(783 B)
📄
SIAddIMGInit.cpp
(6.24 KB)
📄
SIAnnotateControlFlow.cpp
(11.18 KB)
📄
SIDefines.h
(20.86 KB)
📄
SIFixSGPRCopies.cpp
(29.46 KB)
📄
SIFixVGPRCopies.cpp
(2 KB)
📄
SIFixupVectorISel.cpp
(8.75 KB)
📄
SIFoldOperands.cpp
(54.56 KB)
📄
SIFormMemoryClauses.cpp
(12.76 KB)
📄
SIFrameLowering.cpp
(48.08 KB)
📄
SIFrameLowering.h
(2.98 KB)
📄
SIISelLowering.cpp
(423.43 KB)
📄
SIISelLowering.h
(22.13 KB)
📄
SIInsertHardClauses.cpp
(7.01 KB)
📄
SIInsertSkips.cpp
(15.29 KB)
📄
SIInsertWaitcnts.cpp
(58.33 KB)
📄
SIInstrFormats.td
(9.44 KB)
📄
SIInstrInfo.cpp
(247.15 KB)
📄
SIInstrInfo.h
(41.24 KB)
📄
SIInstrInfo.td
(90.7 KB)
📄
SIInstructions.td
(77.7 KB)
📄
SILoadStoreOptimizer.cpp
(76.21 KB)
📄
SILowerControlFlow.cpp
(22.66 KB)
📄
SILowerI1Copies.cpp
(27.83 KB)
📄
SILowerSGPRSpills.cpp
(12.68 KB)
📄
SIMachineFunctionInfo.cpp
(20.01 KB)
📄
SIMachineFunctionInfo.h
(26.91 KB)
📄
SIMachineScheduler.cpp
(69.44 KB)
📄
SIMachineScheduler.h
(15.65 KB)
📄
SIMemoryLegalizer.cpp
(45.84 KB)
📄
SIModeRegister.cpp
(17.43 KB)
📄
SIOptimizeExecMasking.cpp
(12.81 KB)
📄
SIOptimizeExecMaskingPreRA.cpp
(11.13 KB)
📄
SIPeepholeSDWA.cpp
(42.84 KB)
📄
SIPostRABundler.cpp
(3.6 KB)
📄
SIPreAllocateWWMRegs.cpp
(6.09 KB)
📄
SIPreEmitPeephole.cpp
(10.51 KB)
📄
SIProgramInfo.h
(2.04 KB)
📄
SIRegisterInfo.cpp
(71.51 KB)
📄
SIRegisterInfo.h
(13.04 KB)
📄
SIRegisterInfo.td
(37.28 KB)
📄
SIRemoveShortExecBranches.cpp
(4.96 KB)
📄
SISchedule.td
(7.58 KB)
📄
SIShrinkInstructions.cpp
(26.86 KB)
📄
SIWholeQuadMode.cpp
(30.22 KB)
📄
SMInstructions.td
(48.14 KB)
📄
SOPInstructions.td
(60.51 KB)
📁
TargetInfo
📁
Utils
📄
VIInstrFormats.td
(645 B)
📄
VOP1Instructions.td
(35.53 KB)
📄
VOP2Instructions.td
(65.04 KB)
📄
VOP3Instructions.td
(53.14 KB)
📄
VOP3PInstructions.td
(26.47 KB)
📄
VOPCInstructions.td
(63.31 KB)
📄
VOPInstructions.td
(23.76 KB)
Editing: R600MachineScheduler.cpp
//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// R600 Machine Scheduler interface // //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" #include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "machine-scheduler" void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness"); DAG = static_cast<ScheduleDAGMILive*>(dag); const R600Subtarget &ST = DAG->MF.getSubtarget<R600Subtarget>(); TII = static_cast<const R600InstrInfo*>(DAG->TII); TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); VLIW5 = !ST.hasCaymanISA(); MRI = &DAG->MRI; CurInstKind = IDOther; CurEmitted = 0; OccupedSlotsMask = 31; InstKindLimit[IDAlu] = TII->getMaxAlusPerClause(); InstKindLimit[IDOther] = 32; InstKindLimit[IDFetch] = ST.getTexVTXClauseSize(); AluInstCount = 0; FetchInstCount = 0; } void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc, std::vector<SUnit *> &QDst) { QDst.insert(QDst.end(), QSrc.begin(), QSrc.end()); QSrc.clear(); } static unsigned getWFCountLimitedByGPR(unsigned GPRCount) { assert (GPRCount && "GPRCount cannot be 0"); return 248 / GPRCount; } SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { SUnit *SU = nullptr; NextInstKind = IDOther; IsTopNode = false; // check if we might want to switch current clause type bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) || (Available[CurInstKind].empty()); bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) && (!Available[IDFetch].empty() || !Available[IDOther].empty()); if (CurInstKind == IDAlu && !Available[IDFetch].empty()) { // We use the heuristic provided by AMD Accelerated Parallel Processing // OpenCL Programming Guide : // The approx. number of WF that allows TEX inst to hide ALU inst is : // 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU)) float ALUFetchRationEstimate = (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) / (FetchInstCount + Available[IDFetch].size()); if (ALUFetchRationEstimate == 0) { AllowSwitchFromAlu = true; } else { unsigned NeededWF = 62.5f / ALUFetchRationEstimate; LLVM_DEBUG(dbgs() << NeededWF << " approx. Wavefronts Required\n"); // We assume the local GPR requirements to be "dominated" by the requirement // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and // after TEX are indeed likely to consume or generate values from/for the // TEX clause. // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need // one GPR) or TmXYZW = TnXYZW (need 2 GPR). // (TODO : use RegisterPressure) // If we are going too use too many GPR, we flush Fetch instruction to lower // register pressure on 128 bits regs. unsigned NearRegisterRequirement = 2 * Available[IDFetch].size(); if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement)) AllowSwitchFromAlu = true; } } if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) || (!AllowSwitchFromAlu && CurInstKind == IDAlu))) { // try to pick ALU SU = pickAlu(); if (!SU && !PhysicalRegCopy.empty()) { SU = PhysicalRegCopy.front(); PhysicalRegCopy.erase(PhysicalRegCopy.begin()); } if (SU) { if (CurEmitted >= InstKindLimit[IDAlu]) CurEmitted = 0; NextInstKind = IDAlu; } } if (!SU) { // try to pick FETCH SU = pickOther(IDFetch); if (SU) NextInstKind = IDFetch; } // try to pick other if (!SU) { SU = pickOther(IDOther); if (SU) NextInstKind = IDOther; } LLVM_DEBUG(if (SU) { dbgs() << " ** Pick node **\n"; DAG->dumpNode(*SU); } else { dbgs() << "NO NODE \n"; for (unsigned i = 0; i < DAG->SUnits.size(); i++) { const SUnit &S = DAG->SUnits[i]; if (!S.isScheduled) DAG->dumpNode(S); } }); return SU; } void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { if (NextInstKind != CurInstKind) { LLVM_DEBUG(dbgs() << "Instruction Type Switch\n"); if (NextInstKind != IDAlu) OccupedSlotsMask |= 31; CurEmitted = 0; CurInstKind = NextInstKind; } if (CurInstKind == IDAlu) { AluInstCount ++; switch (getAluKind(SU)) { case AluT_XYZW: CurEmitted += 4; break; case AluDiscarded: break; default: { ++CurEmitted; for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(), E = SU->getInstr()->operands_end(); It != E; ++It) { MachineOperand &MO = *It; if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X) ++CurEmitted; } } } } else { ++CurEmitted; } LLVM_DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); if (CurInstKind != IDFetch) { MoveUnits(Pending[IDFetch], Available[IDFetch]); } else FetchInstCount++; } static bool isPhysicalRegCopy(MachineInstr *MI) { if (MI->getOpcode() != R600::COPY) return false; return !Register::isVirtualRegister(MI->getOperand(1).getReg()); } void R600SchedStrategy::releaseTopNode(SUnit *SU) { LLVM_DEBUG(dbgs() << "Top Releasing "; DAG->dumpNode(*SU)); } void R600SchedStrategy::releaseBottomNode(SUnit *SU) { LLVM_DEBUG(dbgs() << "Bottom Releasing "; DAG->dumpNode(*SU)); if (isPhysicalRegCopy(SU->getInstr())) { PhysicalRegCopy.push_back(SU); return; } int IK = getInstKind(SU); // There is no export clause, we can schedule one as soon as its ready if (IK == IDOther) Available[IDOther].push_back(SU); else Pending[IK].push_back(SU); } bool R600SchedStrategy::regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const { if (!Register::isVirtualRegister(Reg)) { return RC->contains(Reg); } else { return MRI->getRegClass(Reg) == RC; } } R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { MachineInstr *MI = SU->getInstr(); if (TII->isTransOnly(*MI)) return AluTrans; switch (MI->getOpcode()) { case R600::PRED_X: return AluPredX; case R600::INTERP_PAIR_XY: case R600::INTERP_PAIR_ZW: case R600::INTERP_VEC_LOAD: case R600::DOT_4: return AluT_XYZW; case R600::COPY: if (MI->getOperand(1).isUndef()) { // MI will become a KILL, don't considers it in scheduling return AluDiscarded; } break; default: break; } // Does the instruction take a whole IG ? // XXX: Is it possible to add a helper function in R600InstrInfo that can // be used here and in R600PacketizerList::isSoloInstruction() ? if(TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()) || TII->isReductionOp(MI->getOpcode()) || MI->getOpcode() == R600::GROUP_BARRIER) { return AluT_XYZW; } if (TII->isLDSInstr(MI->getOpcode())) { return AluT_X; } // Is the result already assigned to a channel ? unsigned DestSubReg = MI->getOperand(0).getSubReg(); switch (DestSubReg) { case R600::sub0: return AluT_X; case R600::sub1: return AluT_Y; case R600::sub2: return AluT_Z; case R600::sub3: return AluT_W; default: break; } // Is the result already member of a X/Y/Z/W class ? Register DestReg = MI->getOperand(0).getReg(); if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) || regBelongsToClass(DestReg, &R600::R600_AddrRegClass)) return AluT_X; if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass)) return AluT_Y; if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass)) return AluT_Z; if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass)) return AluT_W; if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass)) return AluT_XYZW; // LDS src registers cannot be used in the Trans slot. if (TII->readsLDSSrcReg(*MI)) return AluT_XYZW; return AluAny; } int R600SchedStrategy::getInstKind(SUnit* SU) { int Opcode = SU->getInstr()->getOpcode(); if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode)) return IDFetch; if (TII->isALUInstr(Opcode)) { return IDAlu; } switch (Opcode) { case R600::PRED_X: case R600::COPY: case R600::CONST_COPY: case R600::INTERP_PAIR_XY: case R600::INTERP_PAIR_ZW: case R600::INTERP_VEC_LOAD: case R600::DOT_4: return IDAlu; default: return IDOther; } } SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) { if (Q.empty()) return nullptr; for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend(); It != E; ++It) { SUnit *SU = *It; InstructionsGroupCandidate.push_back(SU->getInstr()); if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) && (!AnyALU || !TII->isVectorOnly(*SU->getInstr()))) { InstructionsGroupCandidate.pop_back(); Q.erase((It + 1).base()); return SU; } else { InstructionsGroupCandidate.pop_back(); } } return nullptr; } void R600SchedStrategy::LoadAlu() { std::vector<SUnit *> &QSrc = Pending[IDAlu]; for (unsigned i = 0, e = QSrc.size(); i < e; ++i) { AluKind AK = getAluKind(QSrc[i]); AvailableAlus[AK].push_back(QSrc[i]); } QSrc.clear(); } void R600SchedStrategy::PrepareNextSlot() { LLVM_DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; // if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS) // OccupedSlotsMask |= 16; InstructionsGroupCandidate.clear(); LoadAlu(); } void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst); if (DstIndex == -1) { return; } Register DestReg = MI->getOperand(DstIndex).getReg(); // PressureRegister crashes if an operand is def and used in the same inst // and we try to constraint its regclass for (MachineInstr::mop_iterator It = MI->operands_begin(), E = MI->operands_end(); It != E; ++It) { MachineOperand &MO = *It; if (MO.isReg() && !MO.isDef() && MO.getReg() == DestReg) return; } // Constrains the regclass of DestReg to assign it to Slot switch (Slot) { case 0: MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass); break; case 1: MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass); break; case 2: MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass); break; case 3: MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass); break; } } SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) { static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu); if (SlotedSU) return SlotedSU; SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu); if (UnslotedSU) AssignSlot(UnslotedSU->getInstr(), Slot); return UnslotedSU; } unsigned R600SchedStrategy::AvailablesAluCount() const { return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() + AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() + AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() + AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size(); } SUnit* R600SchedStrategy::pickAlu() { while (AvailablesAluCount() || !Pending[IDAlu].empty()) { if (!OccupedSlotsMask) { // Bottom up scheduling : predX must comes first if (!AvailableAlus[AluPredX].empty()) { OccupedSlotsMask |= 31; return PopInst(AvailableAlus[AluPredX], false); } // Flush physical reg copies (RA will discard them) if (!AvailableAlus[AluDiscarded].empty()) { OccupedSlotsMask |= 31; return PopInst(AvailableAlus[AluDiscarded], false); } // If there is a T_XYZW alu available, use it if (!AvailableAlus[AluT_XYZW].empty()) { OccupedSlotsMask |= 15; return PopInst(AvailableAlus[AluT_XYZW], false); } } bool TransSlotOccuped = OccupedSlotsMask & 16; if (!TransSlotOccuped && VLIW5) { if (!AvailableAlus[AluTrans].empty()) { OccupedSlotsMask |= 16; return PopInst(AvailableAlus[AluTrans], false); } SUnit *SU = AttemptFillSlot(3, true); if (SU) { OccupedSlotsMask |= 16; return SU; } } for (int Chan = 3; Chan > -1; --Chan) { bool isOccupied = OccupedSlotsMask & (1 << Chan); if (!isOccupied) { SUnit *SU = AttemptFillSlot(Chan, false); if (SU) { OccupedSlotsMask |= (1 << Chan); InstructionsGroupCandidate.push_back(SU->getInstr()); return SU; } } } PrepareNextSlot(); } return nullptr; } SUnit* R600SchedStrategy::pickOther(int QID) { SUnit *SU = nullptr; std::vector<SUnit *> &AQ = Available[QID]; if (AQ.empty()) { MoveUnits(Pending[QID], AQ); } if (!AQ.empty()) { SU = AQ.back(); AQ.pop_back(); } return SU; }
Upload File
Create Folder