003 File Manager
Current Path:
/usr/src/contrib/llvm-project/llvm/lib/Target/AMDGPU
usr
/
src
/
contrib
/
llvm-project
/
llvm
/
lib
/
Target
/
AMDGPU
/
📁
..
📄
AMDGPU.h
(11.46 KB)
📄
AMDGPU.td
(36.97 KB)
📄
AMDGPUAliasAnalysis.cpp
(5.58 KB)
📄
AMDGPUAliasAnalysis.h
(3.32 KB)
📄
AMDGPUAlwaysInlinePass.cpp
(4.83 KB)
📄
AMDGPUAnnotateKernelFeatures.cpp
(11.94 KB)
📄
AMDGPUAnnotateUniformValues.cpp
(6.13 KB)
📄
AMDGPUArgumentUsageInfo.cpp
(7.66 KB)
📄
AMDGPUArgumentUsageInfo.h
(4.81 KB)
📄
AMDGPUAsmPrinter.cpp
(50.42 KB)
📄
AMDGPUAsmPrinter.h
(5.13 KB)
📄
AMDGPUAtomicOptimizer.cpp
(23.79 KB)
📄
AMDGPUCallLowering.cpp
(28.66 KB)
📄
AMDGPUCallLowering.h
(2.37 KB)
📄
AMDGPUCallingConv.td
(7.33 KB)
📄
AMDGPUCodeGenPrepare.cpp
(46.42 KB)
📄
AMDGPUCombine.td
(2.79 KB)
📄
AMDGPUExportClustering.cpp
(4.52 KB)
📄
AMDGPUExportClustering.h
(533 B)
📄
AMDGPUFeatures.td
(1.81 KB)
📄
AMDGPUFixFunctionBitcasts.cpp
(1.87 KB)
📄
AMDGPUFrameLowering.cpp
(1.98 KB)
📄
AMDGPUFrameLowering.h
(1.39 KB)
📄
AMDGPUGISel.td
(11.57 KB)
📄
AMDGPUGenRegisterBankInfo.def
(5.83 KB)
📄
AMDGPUGlobalISelUtils.cpp
(1.77 KB)
📄
AMDGPUGlobalISelUtils.h
(2.07 KB)
📄
AMDGPUHSAMetadataStreamer.cpp
(31.21 KB)
📄
AMDGPUHSAMetadataStreamer.h
(5.46 KB)
📄
AMDGPUISelDAGToDAG.cpp
(101.59 KB)
📄
AMDGPUISelLowering.cpp
(168.65 KB)
📄
AMDGPUISelLowering.h
(19.23 KB)
📄
AMDGPUInline.cpp
(7.97 KB)
📄
AMDGPUInstrInfo.cpp
(1.71 KB)
📄
AMDGPUInstrInfo.h
(1.66 KB)
📄
AMDGPUInstrInfo.td
(17.18 KB)
📄
AMDGPUInstructionSelector.cpp
(128.53 KB)
📄
AMDGPUInstructionSelector.h
(11.04 KB)
📄
AMDGPUInstructions.td
(25.36 KB)
📄
AMDGPULegalizerInfo.cpp
(149.32 KB)
📄
AMDGPULegalizerInfo.h
(8.49 KB)
📄
AMDGPULibCalls.cpp
(53.89 KB)
📄
AMDGPULibFunc.cpp
(37.85 KB)
📄
AMDGPULibFunc.h
(10.99 KB)
📄
AMDGPULowerIntrinsics.cpp
(4.55 KB)
📄
AMDGPULowerKernelArguments.cpp
(8.89 KB)
📄
AMDGPULowerKernelAttributes.cpp
(7.78 KB)
📄
AMDGPUMCInstLower.cpp
(14.27 KB)
📄
AMDGPUMachineCFGStructurizer.cpp
(101.97 KB)
📄
AMDGPUMachineFunction.cpp
(2.24 KB)
📄
AMDGPUMachineFunction.h
(2.13 KB)
📄
AMDGPUMachineModuleInfo.cpp
(1.34 KB)
📄
AMDGPUMachineModuleInfo.h
(5.46 KB)
📄
AMDGPUMacroFusion.cpp
(2.28 KB)
📄
AMDGPUMacroFusion.h
(679 B)
📄
AMDGPUOpenCLEnqueuedBlockLowering.cpp
(5.31 KB)
📄
AMDGPUPTNote.h
(1.29 KB)
📄
AMDGPUPerfHintAnalysis.cpp
(12.17 KB)
📄
AMDGPUPerfHintAnalysis.h
(1.67 KB)
📄
AMDGPUPostLegalizerCombiner.cpp
(12.02 KB)
📄
AMDGPUPreLegalizerCombiner.cpp
(5.45 KB)
📄
AMDGPUPrintfRuntimeBinding.cpp
(21.7 KB)
📄
AMDGPUPromoteAlloca.cpp
(35.24 KB)
📄
AMDGPUPropagateAttributes.cpp
(11.76 KB)
📄
AMDGPURegBankCombiner.cpp
(5.36 KB)
📄
AMDGPURegisterBankInfo.cpp
(161.67 KB)
📄
AMDGPURegisterBankInfo.h
(7.41 KB)
📄
AMDGPURegisterBanks.td
(921 B)
📄
AMDGPURewriteOutArguments.cpp
(15.82 KB)
📄
AMDGPUSearchableTables.td
(21.04 KB)
📄
AMDGPUSubtarget.cpp
(29.62 KB)
📄
AMDGPUSubtarget.h
(35.82 KB)
📄
AMDGPUTargetMachine.cpp
(42.67 KB)
📄
AMDGPUTargetMachine.h
(4.52 KB)
📄
AMDGPUTargetObjectFile.cpp
(1.54 KB)
📄
AMDGPUTargetObjectFile.h
(1.14 KB)
📄
AMDGPUTargetTransformInfo.cpp
(39.07 KB)
📄
AMDGPUTargetTransformInfo.h
(11.11 KB)
📄
AMDGPUUnifyDivergentExitNodes.cpp
(13.84 KB)
📄
AMDGPUUnifyMetadata.cpp
(4.46 KB)
📄
AMDILCFGStructurizer.cpp
(56.32 KB)
📄
AMDKernelCodeT.h
(32.84 KB)
📁
AsmParser
📄
BUFInstructions.td
(110.75 KB)
📄
CaymanInstructions.td
(7.93 KB)
📄
DSInstructions.td
(52.37 KB)
📁
Disassembler
📄
EvergreenInstructions.td
(28.24 KB)
📄
FLATInstructions.td
(66.93 KB)
📄
GCNDPPCombine.cpp
(19.92 KB)
📄
GCNHazardRecognizer.cpp
(45.3 KB)
📄
GCNHazardRecognizer.h
(3.96 KB)
📄
GCNILPSched.cpp
(11.3 KB)
📄
GCNIterativeScheduler.cpp
(20.62 KB)
📄
GCNIterativeScheduler.h
(4.16 KB)
📄
GCNMinRegStrategy.cpp
(8.47 KB)
📄
GCNNSAReassign.cpp
(10.92 KB)
📄
GCNProcessors.td
(4.84 KB)
📄
GCNRegBankReassign.cpp
(26.68 KB)
📄
GCNRegPressure.cpp
(16.27 KB)
📄
GCNRegPressure.h
(9.15 KB)
📄
GCNSchedStrategy.cpp
(21.67 KB)
📄
GCNSchedStrategy.h
(3.77 KB)
📁
MCTargetDesc
📄
MIMGInstructions.td
(39.85 KB)
📄
R600.td
(1.51 KB)
📄
R600AsmPrinter.cpp
(4.46 KB)
📄
R600AsmPrinter.h
(1.5 KB)
📄
R600ClauseMergePass.cpp
(7.38 KB)
📄
R600ControlFlowFinalizer.cpp
(23.4 KB)
📄
R600Defines.h
(4.25 KB)
📄
R600EmitClauseMarkers.cpp
(12.1 KB)
📄
R600ExpandSpecialInstrs.cpp
(10.11 KB)
📄
R600FrameLowering.cpp
(1.83 KB)
📄
R600FrameLowering.h
(1.25 KB)
📄
R600ISelLowering.cpp
(81.88 KB)
📄
R600ISelLowering.h
(4.8 KB)
📄
R600InstrFormats.td
(11.58 KB)
📄
R600InstrInfo.cpp
(49.47 KB)
📄
R600InstrInfo.h
(13.7 KB)
📄
R600Instructions.td
(55.13 KB)
📄
R600MachineFunctionInfo.cpp
(551 B)
📄
R600MachineFunctionInfo.h
(824 B)
📄
R600MachineScheduler.cpp
(13.57 KB)
📄
R600MachineScheduler.h
(2.53 KB)
📄
R600OpenCLImageTypeLoweringPass.cpp
(11.75 KB)
📄
R600OptimizeVectorRegisters.cpp
(13.4 KB)
📄
R600Packetizer.cpp
(13.4 KB)
📄
R600Processors.td
(4.42 KB)
📄
R600RegisterInfo.cpp
(3.95 KB)
📄
R600RegisterInfo.h
(2 KB)
📄
R600RegisterInfo.td
(9.75 KB)
📄
R600Schedule.td
(1.62 KB)
📄
R700Instructions.td
(783 B)
📄
SIAddIMGInit.cpp
(6.24 KB)
📄
SIAnnotateControlFlow.cpp
(11.18 KB)
📄
SIDefines.h
(20.86 KB)
📄
SIFixSGPRCopies.cpp
(29.46 KB)
📄
SIFixVGPRCopies.cpp
(2 KB)
📄
SIFixupVectorISel.cpp
(8.75 KB)
📄
SIFoldOperands.cpp
(54.56 KB)
📄
SIFormMemoryClauses.cpp
(12.76 KB)
📄
SIFrameLowering.cpp
(48.08 KB)
📄
SIFrameLowering.h
(2.98 KB)
📄
SIISelLowering.cpp
(423.43 KB)
📄
SIISelLowering.h
(22.13 KB)
📄
SIInsertHardClauses.cpp
(7.01 KB)
📄
SIInsertSkips.cpp
(15.29 KB)
📄
SIInsertWaitcnts.cpp
(58.33 KB)
📄
SIInstrFormats.td
(9.44 KB)
📄
SIInstrInfo.cpp
(247.15 KB)
📄
SIInstrInfo.h
(41.24 KB)
📄
SIInstrInfo.td
(90.7 KB)
📄
SIInstructions.td
(77.7 KB)
📄
SILoadStoreOptimizer.cpp
(76.21 KB)
📄
SILowerControlFlow.cpp
(22.66 KB)
📄
SILowerI1Copies.cpp
(27.83 KB)
📄
SILowerSGPRSpills.cpp
(12.68 KB)
📄
SIMachineFunctionInfo.cpp
(20.01 KB)
📄
SIMachineFunctionInfo.h
(26.91 KB)
📄
SIMachineScheduler.cpp
(69.44 KB)
📄
SIMachineScheduler.h
(15.65 KB)
📄
SIMemoryLegalizer.cpp
(45.84 KB)
📄
SIModeRegister.cpp
(17.43 KB)
📄
SIOptimizeExecMasking.cpp
(12.81 KB)
📄
SIOptimizeExecMaskingPreRA.cpp
(11.13 KB)
📄
SIPeepholeSDWA.cpp
(42.84 KB)
📄
SIPostRABundler.cpp
(3.6 KB)
📄
SIPreAllocateWWMRegs.cpp
(6.09 KB)
📄
SIPreEmitPeephole.cpp
(10.51 KB)
📄
SIProgramInfo.h
(2.04 KB)
📄
SIRegisterInfo.cpp
(71.51 KB)
📄
SIRegisterInfo.h
(13.04 KB)
📄
SIRegisterInfo.td
(37.28 KB)
📄
SIRemoveShortExecBranches.cpp
(4.96 KB)
📄
SISchedule.td
(7.58 KB)
📄
SIShrinkInstructions.cpp
(26.86 KB)
📄
SIWholeQuadMode.cpp
(30.22 KB)
📄
SMInstructions.td
(48.14 KB)
📄
SOPInstructions.td
(60.51 KB)
📁
TargetInfo
📁
Utils
📄
VIInstrFormats.td
(645 B)
📄
VOP1Instructions.td
(35.53 KB)
📄
VOP2Instructions.td
(65.04 KB)
📄
VOP3Instructions.td
(53.14 KB)
📄
VOP3PInstructions.td
(26.47 KB)
📄
VOPCInstructions.td
(63.31 KB)
📄
VOPInstructions.td
(23.76 KB)
Editing: AMDGPUPropagateAttributes.cpp
//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// \brief This pass propagates attributes from kernels to the non-entry /// functions. Most of the library functions were not compiled for specific ABI, /// yet will be correctly compiled if proper attrbutes are propagated from the /// caller. /// /// The pass analyzes call graph and propagates ABI target features through the /// call graph. /// /// It can run in two modes: as a function or module pass. A function pass /// simply propagates attributes. A module pass clones functions if there are /// callers with different ABI. If a function is clonned all call sites will /// be updated to use a correct clone. /// /// A function pass is limited in functionality but can run early in the /// pipeline. A module pass is more powerful but has to run late, so misses /// library folding opportunities. // //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Cloning.h" #include <string> #define DEBUG_TYPE "amdgpu-propagate-attributes" using namespace llvm; namespace llvm { extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; } namespace { // Target features to propagate. static constexpr const FeatureBitset TargetFeatures = { AMDGPU::FeatureWavefrontSize16, AMDGPU::FeatureWavefrontSize32, AMDGPU::FeatureWavefrontSize64 }; // Attributes to propagate. static constexpr const char* AttributeNames[] = { "amdgpu-waves-per-eu" }; static constexpr unsigned NumAttr = sizeof(AttributeNames) / sizeof(AttributeNames[0]); class AMDGPUPropagateAttributes { class FnProperties { private: explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {} public: explicit FnProperties(const TargetMachine &TM, const Function &F) { Features = TM.getSubtargetImpl(F)->getFeatureBits(); for (unsigned I = 0; I < NumAttr; ++I) if (F.hasFnAttribute(AttributeNames[I])) Attributes[I] = F.getFnAttribute(AttributeNames[I]); } bool operator == (const FnProperties &Other) const { if ((Features & TargetFeatures) != (Other.Features & TargetFeatures)) return false; for (unsigned I = 0; I < NumAttr; ++I) if (Attributes[I] != Other.Attributes[I]) return false; return true; } FnProperties adjustToCaller(const FnProperties &CallerProps) const { FnProperties New((Features & ~TargetFeatures) | CallerProps.Features); for (unsigned I = 0; I < NumAttr; ++I) New.Attributes[I] = CallerProps.Attributes[I]; return New; } FeatureBitset Features; Optional<Attribute> Attributes[NumAttr]; }; class Clone { public: Clone(const FnProperties &Props, Function *OrigF, Function *NewF) : Properties(Props), OrigF(OrigF), NewF(NewF) {} FnProperties Properties; Function *OrigF; Function *NewF; }; const TargetMachine *TM; // Clone functions as needed or just set attributes. bool AllowClone; // Option propagation roots. SmallSet<Function *, 32> Roots; // Clones of functions with their attributes. SmallVector<Clone, 32> Clones; // Find a clone with required features. Function *findFunction(const FnProperties &PropsNeeded, Function *OrigF); // Clone function \p F and set \p NewProps on the clone. // Cole takes the name of original function. Function *cloneWithProperties(Function &F, const FnProperties &NewProps); // Set new function's features in place. void setFeatures(Function &F, const FeatureBitset &NewFeatures); // Set new function's attributes in place. void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs); std::string getFeatureString(const FeatureBitset &Features) const; // Propagate attributes from Roots. bool process(); public: AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : TM(TM), AllowClone(AllowClone) {} // Use F as a root and propagate its attributes. bool process(Function &F); // Propagate attributes starting from kernel functions. bool process(Module &M); }; // Allows to propagate attributes early, but no clonning is allowed as it must // be a function pass to run before any optimizations. // TODO: We shall only need a one instance of module pass, but that needs to be // in the linker pipeline which is currently not possible. class AMDGPUPropagateAttributesEarly : public FunctionPass { const TargetMachine *TM; public: static char ID; // Pass identification AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : FunctionPass(ID), TM(TM) { initializeAMDGPUPropagateAttributesEarlyPass( *PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; }; // Allows to propagate attributes with clonning but does that late in the // pipeline. class AMDGPUPropagateAttributesLate : public ModulePass { const TargetMachine *TM; public: static char ID; // Pass identification AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : ModulePass(ID), TM(TM) { initializeAMDGPUPropagateAttributesLatePass( *PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override; }; } // end anonymous namespace. char AMDGPUPropagateAttributesEarly::ID = 0; char AMDGPUPropagateAttributesLate::ID = 0; INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, "amdgpu-propagate-attributes-early", "Early propagate attributes from kernels to functions", false, false) INITIALIZE_PASS(AMDGPUPropagateAttributesLate, "amdgpu-propagate-attributes-late", "Late propagate attributes from kernels to functions", false, false) Function * AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded, Function *OrigF) { // TODO: search for clone's clones. for (Clone &C : Clones) if (C.OrigF == OrigF && PropsNeeded == C.Properties) return C.NewF; return nullptr; } bool AMDGPUPropagateAttributes::process(Module &M) { for (auto &F : M.functions()) if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) Roots.insert(&F); return process(); } bool AMDGPUPropagateAttributes::process(Function &F) { Roots.insert(&F); return process(); } bool AMDGPUPropagateAttributes::process() { bool Changed = false; SmallSet<Function *, 32> NewRoots; SmallSet<Function *, 32> Replaced; if (Roots.empty()) return false; Module &M = *(*Roots.begin())->getParent(); do { Roots.insert(NewRoots.begin(), NewRoots.end()); NewRoots.clear(); for (auto &F : M.functions()) { if (F.isDeclaration()) continue; const FnProperties CalleeProps(*TM, F); SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; SmallSet<CallBase *, 32> Visited; for (User *U : F.users()) { Instruction *I = dyn_cast<Instruction>(U); if (!I) continue; CallBase *CI = dyn_cast<CallBase>(I); if (!CI) continue; Function *Caller = CI->getCaller(); if (!Caller || !Visited.insert(CI).second) continue; if (!Roots.count(Caller) && !NewRoots.count(Caller)) continue; const FnProperties CallerProps(*TM, *Caller); if (CalleeProps == CallerProps) { if (!Roots.count(&F)) NewRoots.insert(&F); continue; } Function *NewF = findFunction(CallerProps, &F); if (!NewF) { const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps); if (!AllowClone) { // This may set different features on different iteartions if // there is a contradiction in callers' attributes. In this case // we rely on a second pass running on Module, which is allowed // to clone. setFeatures(F, NewProps.Features); setAttributes(F, NewProps.Attributes); NewRoots.insert(&F); Changed = true; break; } NewF = cloneWithProperties(F, NewProps); Clones.push_back(Clone(CallerProps, &F, NewF)); NewRoots.insert(NewF); } ToReplace.push_back(std::make_pair(CI, NewF)); Replaced.insert(&F); Changed = true; } while (!ToReplace.empty()) { auto R = ToReplace.pop_back_val(); R.first->setCalledFunction(R.second); } } } while (!NewRoots.empty()); for (Function *F : Replaced) { if (F->use_empty()) F->eraseFromParent(); } Roots.clear(); Clones.clear(); return Changed; } Function * AMDGPUPropagateAttributes::cloneWithProperties(Function &F, const FnProperties &NewProps) { LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); ValueToValueMapTy dummy; Function *NewF = CloneFunction(&F, dummy); setFeatures(*NewF, NewProps.Features); setAttributes(*NewF, NewProps.Attributes); NewF->setVisibility(GlobalValue::DefaultVisibility); NewF->setLinkage(GlobalValue::InternalLinkage); // Swap names. If that is the only clone it will retain the name of now // dead value. Preserve original name for externally visible functions. if (F.hasName() && F.hasLocalLinkage()) { std::string NewName = std::string(NewF->getName()); NewF->takeName(&F); F.setName(NewName); } return NewF; } void AMDGPUPropagateAttributes::setFeatures(Function &F, const FeatureBitset &NewFeatures) { std::string NewFeatureStr = getFeatureString(NewFeatures); LLVM_DEBUG(dbgs() << "Set features " << getFeatureString(NewFeatures & TargetFeatures) << " on " << F.getName() << '\n'); F.removeFnAttr("target-features"); F.addFnAttr("target-features", NewFeatureStr); } void AMDGPUPropagateAttributes::setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs) { LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n"); for (unsigned I = 0; I < NumAttr; ++I) { F.removeFnAttr(AttributeNames[I]); if (NewAttrs[I]) { LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n'); F.addFnAttr(*NewAttrs[I]); } } } std::string AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const { std::string Ret; for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { if (Features[KV.Value]) Ret += (StringRef("+") + KV.Key + ",").str(); else if (TargetFeatures[KV.Value]) Ret += (StringRef("-") + KV.Key + ",").str(); } Ret.pop_back(); // Remove last comma. return Ret; } bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) return false; return AMDGPUPropagateAttributes(TM, false).process(F); } bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { if (!TM) return false; return AMDGPUPropagateAttributes(TM, true).process(M); } FunctionPass *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { return new AMDGPUPropagateAttributesEarly(TM); } ModulePass *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { return new AMDGPUPropagateAttributesLate(TM); }
Upload File
Create Folder