003 File Manager
Current Path:
/usr/src/contrib/llvm-project/llvm/lib/Target/AMDGPU
usr
/
src
/
contrib
/
llvm-project
/
llvm
/
lib
/
Target
/
AMDGPU
/
📁
..
📄
AMDGPU.h
(11.46 KB)
📄
AMDGPU.td
(36.97 KB)
📄
AMDGPUAliasAnalysis.cpp
(5.58 KB)
📄
AMDGPUAliasAnalysis.h
(3.32 KB)
📄
AMDGPUAlwaysInlinePass.cpp
(4.83 KB)
📄
AMDGPUAnnotateKernelFeatures.cpp
(11.94 KB)
📄
AMDGPUAnnotateUniformValues.cpp
(6.13 KB)
📄
AMDGPUArgumentUsageInfo.cpp
(7.66 KB)
📄
AMDGPUArgumentUsageInfo.h
(4.81 KB)
📄
AMDGPUAsmPrinter.cpp
(50.42 KB)
📄
AMDGPUAsmPrinter.h
(5.13 KB)
📄
AMDGPUAtomicOptimizer.cpp
(23.79 KB)
📄
AMDGPUCallLowering.cpp
(28.66 KB)
📄
AMDGPUCallLowering.h
(2.37 KB)
📄
AMDGPUCallingConv.td
(7.33 KB)
📄
AMDGPUCodeGenPrepare.cpp
(46.42 KB)
📄
AMDGPUCombine.td
(2.79 KB)
📄
AMDGPUExportClustering.cpp
(4.52 KB)
📄
AMDGPUExportClustering.h
(533 B)
📄
AMDGPUFeatures.td
(1.81 KB)
📄
AMDGPUFixFunctionBitcasts.cpp
(1.87 KB)
📄
AMDGPUFrameLowering.cpp
(1.98 KB)
📄
AMDGPUFrameLowering.h
(1.39 KB)
📄
AMDGPUGISel.td
(11.57 KB)
📄
AMDGPUGenRegisterBankInfo.def
(5.83 KB)
📄
AMDGPUGlobalISelUtils.cpp
(1.77 KB)
📄
AMDGPUGlobalISelUtils.h
(2.07 KB)
📄
AMDGPUHSAMetadataStreamer.cpp
(31.21 KB)
📄
AMDGPUHSAMetadataStreamer.h
(5.46 KB)
📄
AMDGPUISelDAGToDAG.cpp
(101.59 KB)
📄
AMDGPUISelLowering.cpp
(168.65 KB)
📄
AMDGPUISelLowering.h
(19.23 KB)
📄
AMDGPUInline.cpp
(7.97 KB)
📄
AMDGPUInstrInfo.cpp
(1.71 KB)
📄
AMDGPUInstrInfo.h
(1.66 KB)
📄
AMDGPUInstrInfo.td
(17.18 KB)
📄
AMDGPUInstructionSelector.cpp
(128.53 KB)
📄
AMDGPUInstructionSelector.h
(11.04 KB)
📄
AMDGPUInstructions.td
(25.36 KB)
📄
AMDGPULegalizerInfo.cpp
(149.32 KB)
📄
AMDGPULegalizerInfo.h
(8.49 KB)
📄
AMDGPULibCalls.cpp
(53.89 KB)
📄
AMDGPULibFunc.cpp
(37.85 KB)
📄
AMDGPULibFunc.h
(10.99 KB)
📄
AMDGPULowerIntrinsics.cpp
(4.55 KB)
📄
AMDGPULowerKernelArguments.cpp
(8.89 KB)
📄
AMDGPULowerKernelAttributes.cpp
(7.78 KB)
📄
AMDGPUMCInstLower.cpp
(14.27 KB)
📄
AMDGPUMachineCFGStructurizer.cpp
(101.97 KB)
📄
AMDGPUMachineFunction.cpp
(2.24 KB)
📄
AMDGPUMachineFunction.h
(2.13 KB)
📄
AMDGPUMachineModuleInfo.cpp
(1.34 KB)
📄
AMDGPUMachineModuleInfo.h
(5.46 KB)
📄
AMDGPUMacroFusion.cpp
(2.28 KB)
📄
AMDGPUMacroFusion.h
(679 B)
📄
AMDGPUOpenCLEnqueuedBlockLowering.cpp
(5.31 KB)
📄
AMDGPUPTNote.h
(1.29 KB)
📄
AMDGPUPerfHintAnalysis.cpp
(12.17 KB)
📄
AMDGPUPerfHintAnalysis.h
(1.67 KB)
📄
AMDGPUPostLegalizerCombiner.cpp
(12.02 KB)
📄
AMDGPUPreLegalizerCombiner.cpp
(5.45 KB)
📄
AMDGPUPrintfRuntimeBinding.cpp
(21.7 KB)
📄
AMDGPUPromoteAlloca.cpp
(35.24 KB)
📄
AMDGPUPropagateAttributes.cpp
(11.76 KB)
📄
AMDGPURegBankCombiner.cpp
(5.36 KB)
📄
AMDGPURegisterBankInfo.cpp
(161.67 KB)
📄
AMDGPURegisterBankInfo.h
(7.41 KB)
📄
AMDGPURegisterBanks.td
(921 B)
📄
AMDGPURewriteOutArguments.cpp
(15.82 KB)
📄
AMDGPUSearchableTables.td
(21.04 KB)
📄
AMDGPUSubtarget.cpp
(29.62 KB)
📄
AMDGPUSubtarget.h
(35.82 KB)
📄
AMDGPUTargetMachine.cpp
(42.67 KB)
📄
AMDGPUTargetMachine.h
(4.52 KB)
📄
AMDGPUTargetObjectFile.cpp
(1.54 KB)
📄
AMDGPUTargetObjectFile.h
(1.14 KB)
📄
AMDGPUTargetTransformInfo.cpp
(39.07 KB)
📄
AMDGPUTargetTransformInfo.h
(11.11 KB)
📄
AMDGPUUnifyDivergentExitNodes.cpp
(13.84 KB)
📄
AMDGPUUnifyMetadata.cpp
(4.46 KB)
📄
AMDILCFGStructurizer.cpp
(56.32 KB)
📄
AMDKernelCodeT.h
(32.84 KB)
📁
AsmParser
📄
BUFInstructions.td
(110.75 KB)
📄
CaymanInstructions.td
(7.93 KB)
📄
DSInstructions.td
(52.37 KB)
📁
Disassembler
📄
EvergreenInstructions.td
(28.24 KB)
📄
FLATInstructions.td
(66.93 KB)
📄
GCNDPPCombine.cpp
(19.92 KB)
📄
GCNHazardRecognizer.cpp
(45.3 KB)
📄
GCNHazardRecognizer.h
(3.96 KB)
📄
GCNILPSched.cpp
(11.3 KB)
📄
GCNIterativeScheduler.cpp
(20.62 KB)
📄
GCNIterativeScheduler.h
(4.16 KB)
📄
GCNMinRegStrategy.cpp
(8.47 KB)
📄
GCNNSAReassign.cpp
(10.92 KB)
📄
GCNProcessors.td
(4.84 KB)
📄
GCNRegBankReassign.cpp
(26.68 KB)
📄
GCNRegPressure.cpp
(16.27 KB)
📄
GCNRegPressure.h
(9.15 KB)
📄
GCNSchedStrategy.cpp
(21.67 KB)
📄
GCNSchedStrategy.h
(3.77 KB)
📁
MCTargetDesc
📄
MIMGInstructions.td
(39.85 KB)
📄
R600.td
(1.51 KB)
📄
R600AsmPrinter.cpp
(4.46 KB)
📄
R600AsmPrinter.h
(1.5 KB)
📄
R600ClauseMergePass.cpp
(7.38 KB)
📄
R600ControlFlowFinalizer.cpp
(23.4 KB)
📄
R600Defines.h
(4.25 KB)
📄
R600EmitClauseMarkers.cpp
(12.1 KB)
📄
R600ExpandSpecialInstrs.cpp
(10.11 KB)
📄
R600FrameLowering.cpp
(1.83 KB)
📄
R600FrameLowering.h
(1.25 KB)
📄
R600ISelLowering.cpp
(81.88 KB)
📄
R600ISelLowering.h
(4.8 KB)
📄
R600InstrFormats.td
(11.58 KB)
📄
R600InstrInfo.cpp
(49.47 KB)
📄
R600InstrInfo.h
(13.7 KB)
📄
R600Instructions.td
(55.13 KB)
📄
R600MachineFunctionInfo.cpp
(551 B)
📄
R600MachineFunctionInfo.h
(824 B)
📄
R600MachineScheduler.cpp
(13.57 KB)
📄
R600MachineScheduler.h
(2.53 KB)
📄
R600OpenCLImageTypeLoweringPass.cpp
(11.75 KB)
📄
R600OptimizeVectorRegisters.cpp
(13.4 KB)
📄
R600Packetizer.cpp
(13.4 KB)
📄
R600Processors.td
(4.42 KB)
📄
R600RegisterInfo.cpp
(3.95 KB)
📄
R600RegisterInfo.h
(2 KB)
📄
R600RegisterInfo.td
(9.75 KB)
📄
R600Schedule.td
(1.62 KB)
📄
R700Instructions.td
(783 B)
📄
SIAddIMGInit.cpp
(6.24 KB)
📄
SIAnnotateControlFlow.cpp
(11.18 KB)
📄
SIDefines.h
(20.86 KB)
📄
SIFixSGPRCopies.cpp
(29.46 KB)
📄
SIFixVGPRCopies.cpp
(2 KB)
📄
SIFixupVectorISel.cpp
(8.75 KB)
📄
SIFoldOperands.cpp
(54.56 KB)
📄
SIFormMemoryClauses.cpp
(12.76 KB)
📄
SIFrameLowering.cpp
(48.08 KB)
📄
SIFrameLowering.h
(2.98 KB)
📄
SIISelLowering.cpp
(423.43 KB)
📄
SIISelLowering.h
(22.13 KB)
📄
SIInsertHardClauses.cpp
(7.01 KB)
📄
SIInsertSkips.cpp
(15.29 KB)
📄
SIInsertWaitcnts.cpp
(58.33 KB)
📄
SIInstrFormats.td
(9.44 KB)
📄
SIInstrInfo.cpp
(247.15 KB)
📄
SIInstrInfo.h
(41.24 KB)
📄
SIInstrInfo.td
(90.7 KB)
📄
SIInstructions.td
(77.7 KB)
📄
SILoadStoreOptimizer.cpp
(76.21 KB)
📄
SILowerControlFlow.cpp
(22.66 KB)
📄
SILowerI1Copies.cpp
(27.83 KB)
📄
SILowerSGPRSpills.cpp
(12.68 KB)
📄
SIMachineFunctionInfo.cpp
(20.01 KB)
📄
SIMachineFunctionInfo.h
(26.91 KB)
📄
SIMachineScheduler.cpp
(69.44 KB)
📄
SIMachineScheduler.h
(15.65 KB)
📄
SIMemoryLegalizer.cpp
(45.84 KB)
📄
SIModeRegister.cpp
(17.43 KB)
📄
SIOptimizeExecMasking.cpp
(12.81 KB)
📄
SIOptimizeExecMaskingPreRA.cpp
(11.13 KB)
📄
SIPeepholeSDWA.cpp
(42.84 KB)
📄
SIPostRABundler.cpp
(3.6 KB)
📄
SIPreAllocateWWMRegs.cpp
(6.09 KB)
📄
SIPreEmitPeephole.cpp
(10.51 KB)
📄
SIProgramInfo.h
(2.04 KB)
📄
SIRegisterInfo.cpp
(71.51 KB)
📄
SIRegisterInfo.h
(13.04 KB)
📄
SIRegisterInfo.td
(37.28 KB)
📄
SIRemoveShortExecBranches.cpp
(4.96 KB)
📄
SISchedule.td
(7.58 KB)
📄
SIShrinkInstructions.cpp
(26.86 KB)
📄
SIWholeQuadMode.cpp
(30.22 KB)
📄
SMInstructions.td
(48.14 KB)
📄
SOPInstructions.td
(60.51 KB)
📁
TargetInfo
📁
Utils
📄
VIInstrFormats.td
(645 B)
📄
VOP1Instructions.td
(35.53 KB)
📄
VOP2Instructions.td
(65.04 KB)
📄
VOP3Instructions.td
(53.14 KB)
📄
VOP3PInstructions.td
(26.47 KB)
📄
VOPCInstructions.td
(63.31 KB)
📄
VOPInstructions.td
(23.76 KB)
Editing: R600EmitClauseMarkers.cpp
//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold /// 128 Alu instructions ; these instructions can access up to 4 prefetched /// 4 lines of 16 registers from constant buffers. Such ALU clauses are /// initiated by CF_ALU instructions. //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600RegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> #include <cstdint> #include <utility> #include <vector> using namespace llvm; namespace llvm { void initializeR600EmitClauseMarkersPass(PassRegistry&); } // end namespace llvm namespace { class R600EmitClauseMarkers : public MachineFunctionPass { private: const R600InstrInfo *TII = nullptr; int Address = 0; unsigned OccupiedDwords(MachineInstr &MI) const { switch (MI.getOpcode()) { case R600::INTERP_PAIR_XY: case R600::INTERP_PAIR_ZW: case R600::INTERP_VEC_LOAD: case R600::DOT_4: return 4; case R600::KILL: return 0; default: break; } // These will be expanded to two ALU instructions in the // ExpandSpecialInstructions pass. if (TII->isLDSRetInstr(MI.getOpcode())) return 2; if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) || TII->isReductionOp(MI.getOpcode())) return 4; unsigned NumLiteral = 0; for (MachineInstr::mop_iterator It = MI.operands_begin(), E = MI.operands_end(); It != E; ++It) { MachineOperand &MO = *It; if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X) ++NumLiteral; } return 1 + NumLiteral; } bool isALU(const MachineInstr &MI) const { if (TII->isALUInstr(MI.getOpcode())) return true; if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode())) return true; switch (MI.getOpcode()) { case R600::PRED_X: case R600::INTERP_PAIR_XY: case R600::INTERP_PAIR_ZW: case R600::INTERP_VEC_LOAD: case R600::COPY: case R600::DOT_4: return true; default: return false; } } bool IsTrivialInst(MachineInstr &MI) const { switch (MI.getOpcode()) { case R600::KILL: case R600::RETURN: case R600::IMPLICIT_DEF: return true; default: return false; } } std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const { // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 // (See also R600ISelLowering.cpp) // ConstIndex value is in [0, 4095]; return std::pair<unsigned, unsigned>( ((Sel >> 2) - 512) >> 12, // KC_BANK // Line Number of ConstIndex // A line contains 16 constant registers however KCX bank can lock // two line at the same time ; thus we want to get an even line number. // Line number can be retrieved with (>>4), using (>>5) <<1 generates // an even number. ((((Sel >> 2) - 512) & 4095) >> 5) << 1); } bool SubstituteKCacheBank(MachineInstr &MI, std::vector<std::pair<unsigned, unsigned>> &CachedConsts, bool UpdateInstr = true) const { std::vector<std::pair<unsigned, unsigned>> UsedKCache; if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4) return true; const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts = TII->getSrcs(MI); assert( (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) && "Can't assign Const"); for (unsigned i = 0, n = Consts.size(); i < n; ++i) { if (Consts[i].first->getReg() != R600::ALU_CONST) continue; unsigned Sel = Consts[i].second; unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; unsigned KCacheIndex = Index * 4 + Chan; const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel); if (CachedConsts.empty()) { CachedConsts.push_back(BankLine); UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); continue; } if (CachedConsts[0] == BankLine) { UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); continue; } if (CachedConsts.size() == 1) { CachedConsts.push_back(BankLine); UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); continue; } if (CachedConsts[1] == BankLine) { UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); continue; } return false; } if (!UpdateInstr) return true; for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) { if (Consts[i].first->getReg() != R600::ALU_CONST) continue; switch(UsedKCache[j].first) { case 0: Consts[i].first->setReg( R600::R600_KC0RegClass.getRegister(UsedKCache[j].second)); break; case 1: Consts[i].first->setReg( R600::R600_KC1RegClass.getRegister(UsedKCache[j].second)); break; default: llvm_unreachable("Wrong Cache Line"); } j++; } return true; } bool canClauseLocalKillFitInClause( unsigned AluInstCount, std::vector<std::pair<unsigned, unsigned>> KCacheBanks, MachineBasicBlock::iterator Def, MachineBasicBlock::iterator BBEnd) { const R600RegisterInfo &TRI = TII->getRegisterInfo(); //TODO: change this to defs? for (MachineInstr::const_mop_iterator MOI = Def->operands_begin(), MOE = Def->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg() || !MOI->isDef() || TRI.isPhysRegLiveAcrossClauses(MOI->getReg())) continue; // Def defines a clause local register, so check that its use will fit // in the clause. unsigned LastUseCount = 0; for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) { AluInstCount += OccupiedDwords(*UseI); // Make sure we won't need to end the clause due to KCache limitations. if (!SubstituteKCacheBank(*UseI, KCacheBanks, false)) return false; // We have reached the maximum instruction limit before finding the // use that kills this register, so we cannot use this def in the // current clause. if (AluInstCount >= TII->getMaxAlusPerClause()) return false; // TODO: Is this true? kill flag appears to work OK below // Register kill flags have been cleared by the time we get to this // pass, but it is safe to assume that all uses of this register // occur in the same basic block as its definition, because // it is illegal for the scheduler to schedule them in // different blocks. if (UseI->readsRegister(MOI->getReg(), &TRI)) LastUseCount = AluInstCount; // Exit early if the current use kills the register if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI)) break; } if (LastUseCount) return LastUseCount <= TII->getMaxAlusPerClause(); llvm_unreachable("Clause local register live at end of clause."); } return true; } MachineBasicBlock::iterator MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { MachineBasicBlock::iterator ClauseHead = I; std::vector<std::pair<unsigned, unsigned>> KCacheBanks; bool PushBeforeModifier = false; unsigned AluInstCount = 0; for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { if (IsTrivialInst(*I)) continue; if (!isALU(*I)) break; if (AluInstCount > TII->getMaxAlusPerClause()) break; if (I->getOpcode() == R600::PRED_X) { // We put PRED_X in its own clause to ensure that ifcvt won't create // clauses with more than 128 insts. // IfCvt is indeed checking that "then" and "else" branches of an if // statement have less than ~60 insts thus converted clauses can't be // bigger than ~121 insts (predicate setter needs to be in the same // clause as predicated alus). if (AluInstCount > 0) break; if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH) PushBeforeModifier = true; AluInstCount ++; continue; } // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as: // // * KILL or INTERP instructions // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits // * Uses waterfalling (i.e. INDEX_MODE = AR.X) // // XXX: These checks have not been implemented yet. if (TII->mustBeLastInClause(I->getOpcode())) { I++; break; } // If this instruction defines a clause local register, make sure // its use can fit in this clause. if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E)) break; if (!SubstituteKCacheBank(*I, KCacheBanks)) break; AluInstCount += OccupiedDwords(*I); } unsigned Opcode = PushBeforeModifier ? R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU; BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) // We don't use the ADDR field until R600ControlFlowFinalizer pass, where // it is safe to assume it is 0. However if we always put 0 here, the ifcvt // pass may assume that identical ALU clause starter at the beginning of a // true and false branch can be factorized which is not the case. .addImm(Address++) // ADDR .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 .addImm(KCacheBanks.empty()?0:2) // KM0 .addImm((KCacheBanks.size() < 2)?0:2) // KM1 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 .addImm(AluInstCount) // COUNT .addImm(1); // Enabled return I; } public: static char ID; R600EmitClauseMarkers() : MachineFunctionPass(ID) { initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override { const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); TII = ST.getInstrInfo(); for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); BB != BB_E; ++BB) { MachineBasicBlock &MBB = *BB; MachineBasicBlock::iterator I = MBB.begin(); if (I != MBB.end() && I->getOpcode() == R600::CF_ALU) continue; // BB was already parsed for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { if (isALU(*I)) { auto next = MakeALUClause(MBB, I); assert(next != I); I = next; } else ++I; } } return false; } StringRef getPassName() const override { return "R600 Emit Clause Markers Pass"; } }; char R600EmitClauseMarkers::ID = 0; } // end anonymous namespace INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers", "R600 Emit Clause Markters", false, false) INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers", "R600 Emit Clause Markters", false, false) FunctionPass *llvm::createR600EmitClauseMarkers() { return new R600EmitClauseMarkers(); }
Upload File
Create Folder