003 File Manager
Current Path:
/usr/src/contrib/llvm-project/llvm/lib/Analysis
usr
/
src
/
contrib
/
llvm-project
/
llvm
/
lib
/
Analysis
/
📁
..
📄
AliasAnalysis.cpp
(33.55 KB)
📄
AliasAnalysisEvaluator.cpp
(15.64 KB)
📄
AliasAnalysisSummary.cpp
(3.49 KB)
📄
AliasAnalysisSummary.h
(10.17 KB)
📄
AliasSetTracker.cpp
(25.86 KB)
📄
Analysis.cpp
(5.29 KB)
📄
AssumeBundleQueries.cpp
(7.96 KB)
📄
AssumptionCache.cpp
(10.94 KB)
📄
BasicAliasAnalysis.cpp
(85.81 KB)
📄
BlockFrequencyInfo.cpp
(12.39 KB)
📄
BlockFrequencyInfoImpl.cpp
(28.6 KB)
📄
BranchProbabilityInfo.cpp
(43.48 KB)
📄
CFG.cpp
(9.9 KB)
📄
CFGPrinter.cpp
(11.2 KB)
📄
CFLAndersAliasAnalysis.cpp
(33.01 KB)
📄
CFLGraph.h
(21.23 KB)
📄
CFLSteensAliasAnalysis.cpp
(13.24 KB)
📄
CGSCCPassManager.cpp
(31.2 KB)
📄
CallGraph.cpp
(12.86 KB)
📄
CallGraphSCCPass.cpp
(26.31 KB)
📄
CallPrinter.cpp
(9.48 KB)
📄
CaptureTracking.cpp
(15.38 KB)
📄
CmpInstAnalysis.cpp
(4.63 KB)
📄
CodeMetrics.cpp
(6.99 KB)
📄
ConstantFolding.cpp
(105.15 KB)
📄
CostModel.cpp
(3.87 KB)
📄
DDG.cpp
(11.29 KB)
📄
Delinearization.cpp
(4.49 KB)
📄
DemandedBits.cpp
(16.27 KB)
📄
DependenceAnalysis.cpp
(150.78 KB)
📄
DependenceGraphBuilder.cpp
(19.24 KB)
📄
DivergenceAnalysis.cpp
(15.59 KB)
📄
DomPrinter.cpp
(9.67 KB)
📄
DomTreeUpdater.cpp
(15.21 KB)
📄
DominanceFrontier.cpp
(3.2 KB)
📄
EHPersonalities.cpp
(5.89 KB)
📄
GlobalsModRef.cpp
(41 KB)
📄
GuardUtils.cpp
(3.27 KB)
📄
HeatUtils.cpp
(2.85 KB)
📄
IVDescriptors.cpp
(42.28 KB)
📄
IVUsers.cpp
(16.12 KB)
📄
IndirectCallPromotionAnalysis.cpp
(4.33 KB)
📄
InlineAdvisor.cpp
(15.28 KB)
📄
InlineCost.cpp
(99.47 KB)
📄
InlineFeaturesAnalysis.cpp
(1.59 KB)
📄
InlineSizeEstimatorAnalysis.cpp
(10.95 KB)
📄
InstCount.cpp
(2.45 KB)
📄
InstructionPrecedenceTracking.cpp
(4.8 KB)
📄
InstructionSimplify.cpp
(216.91 KB)
📄
Interval.cpp
(1.78 KB)
📄
IntervalPartition.cpp
(4.5 KB)
📄
LazyBlockFrequencyInfo.cpp
(2.81 KB)
📄
LazyBranchProbabilityInfo.cpp
(2.96 KB)
📄
LazyCallGraph.cpp
(67.33 KB)
📄
LazyValueInfo.cpp
(76.38 KB)
📄
LegacyDivergenceAnalysis.cpp
(14.82 KB)
📄
Lint.cpp
(29.07 KB)
📄
Loads.cpp
(20.6 KB)
📄
LoopAccessAnalysis.cpp
(88.02 KB)
📄
LoopAnalysisManager.cpp
(6.6 KB)
📄
LoopCacheAnalysis.cpp
(23.53 KB)
📄
LoopInfo.cpp
(37.15 KB)
📄
LoopNestAnalysis.cpp
(10.62 KB)
📄
LoopPass.cpp
(12.89 KB)
📄
LoopUnrollAnalyzer.cpp
(7.26 KB)
📄
MLInlineAdvisor.cpp
(11.36 KB)
📄
MemDepPrinter.cpp
(5.13 KB)
📄
MemDerefPrinter.cpp
(2.53 KB)
📄
MemoryBuiltins.cpp
(41.14 KB)
📄
MemoryDependenceAnalysis.cpp
(69.89 KB)
📄
MemoryLocation.cpp
(7.92 KB)
📄
MemorySSA.cpp
(90.16 KB)
📄
MemorySSAUpdater.cpp
(57.9 KB)
📄
ModuleDebugInfoPrinter.cpp
(4.02 KB)
📄
ModuleSummaryAnalysis.cpp
(38.13 KB)
📄
MustExecute.cpp
(31.18 KB)
📄
ObjCARCAliasAnalysis.cpp
(5.81 KB)
📄
ObjCARCAnalysisUtils.cpp
(1.07 KB)
📄
ObjCARCInstKind.cpp
(23.15 KB)
📄
OptimizationRemarkEmitter.cpp
(4.23 KB)
📄
PHITransAddr.cpp
(16.05 KB)
📄
PhiValues.cpp
(8.4 KB)
📄
PostDominators.cpp
(3.59 KB)
📄
ProfileSummaryInfo.cpp
(18.07 KB)
📄
PtrUseVisitor.cpp
(1.28 KB)
📄
RegionInfo.cpp
(6.5 KB)
📄
RegionPass.cpp
(9.23 KB)
📄
RegionPrinter.cpp
(8.61 KB)
📄
ReleaseModeModelRunner.cpp
(2.83 KB)
📄
ScalarEvolution.cpp
(475.26 KB)
📄
ScalarEvolutionAliasAnalysis.cpp
(5.96 KB)
📄
ScalarEvolutionDivision.cpp
(7.51 KB)
📄
ScalarEvolutionNormalization.cpp
(4.59 KB)
📄
ScopedNoAliasAA.cpp
(7.38 KB)
📄
StackLifetime.cpp
(12.22 KB)
📄
StackSafetyAnalysis.cpp
(31.81 KB)
📄
StratifiedSets.h
(18.67 KB)
📄
SyncDependenceAnalysis.cpp
(12.97 KB)
📄
SyntheticCountsUtils.cpp
(3.81 KB)
📄
TFUtils.cpp
(8.99 KB)
📄
TargetLibraryInfo.cpp
(58.98 KB)
📄
TargetTransformInfo.cpp
(48.15 KB)
📄
Trace.cpp
(1.8 KB)
📄
TypeBasedAliasAnalysis.cpp
(26.04 KB)
📄
TypeMetadataUtils.cpp
(5.93 KB)
📄
VFABIDemangling.cpp
(16.46 KB)
📄
ValueLattice.cpp
(1.19 KB)
📄
ValueLatticeUtils.cpp
(1.53 KB)
📄
ValueTracking.cpp
(243.08 KB)
📄
VectorUtils.cpp
(48.57 KB)
📁
models
Editing: TargetTransformInfo.cpp
//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include <utility> using namespace llvm; using namespace PatternMatch; #define DEBUG_TYPE "tti" static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns.")); namespace { /// No-op implementation of the TTI interface using the utility base /// classes. /// /// This is used when no target specific information is available. struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> { explicit NoTTIImpl(const DataLayout &DL) : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {} }; } // namespace bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) { // If the loop has irreducible control flow, it can not be converted to // Hardware loop. LoopBlocksRPO RPOT(L); RPOT.perform(&LI); if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI)) return false; return true; } IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) : II(&I), RetTy(I.getType()), IID(I.getIntrinsicID()) { FunctionType *FTy = I.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); Arguments.insert(Arguments.begin(), I.arg_begin(), I.arg_end()); if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) FMF = FPMO->getFastMathFlags(); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI) : II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id) { if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI)) FMF = FPMO->getFastMathFlags(); FunctionType *FTy = CI.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, unsigned Factor) : RetTy(CI.getType()), IID(Id), VF(Factor) { if (auto *FPMO = dyn_cast<FPMathOperator>(&CI)) FMF = FPMO->getFastMathFlags(); Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); FunctionType *FTy = CI.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, unsigned Factor, unsigned ScalarCost) : RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) { if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI)) FMF = FPMO->getFastMathFlags(); Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); FunctionType *FTy = CI.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys, FastMathFlags Flags) : RetTy(RTy), IID(Id), FMF(Flags) { ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys, FastMathFlags Flags, unsigned ScalarCost) : RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) { ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys, FastMathFlags Flags, unsigned ScalarCost, const IntrinsicInst *I) : II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) { ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys) : RetTy(RTy), IID(Id) { ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); } IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty, ArrayRef<const Value *> Args) : RetTy(Ty), IID(Id) { Arguments.insert(Arguments.begin(), Args.begin(), Args.end()); ParamTys.reserve(Arguments.size()); for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) ParamTys.push_back(Arguments[Idx]->getType()); } bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop, bool ForceHardwareLoopPHI) { SmallVector<BasicBlock *, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); for (BasicBlock *BB : ExitingBlocks) { // If we pass the updated counter back through a phi, we need to know // which latch the updated value will be coming from. if (!L->isLoopLatch(BB)) { if (ForceHardwareLoopPHI || CounterInReg) continue; } const SCEV *EC = SE.getExitCount(L, BB); if (isa<SCEVCouldNotCompute>(EC)) continue; if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { if (ConstEC->getValue()->isZero()) continue; } else if (!SE.isLoopInvariant(EC, L)) continue; if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth()) continue; // If this exiting block is contained in a nested loop, it is not eligible // for insertion of the branch-and-decrement since the inner loop would // end up messing up the value in the CTR. if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop) continue; // We now have a loop-invariant count of loop iterations (which is not the // constant zero) for which we know that this loop will not exit via this // existing block. // We need to make sure that this block will run on every loop iteration. // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; for (BasicBlock *Pred : predecessors(L->getHeader())) { if (!L->contains(Pred)) continue; if (!DT.dominates(BB, Pred)) { NotAlways = true; break; } } if (NotAlways) continue; // Make sure this blocks ends with a conditional branch. Instruction *TI = BB->getTerminator(); if (!TI) continue; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (!BI->isConditional()) continue; ExitBranch = BI; } else continue; // Note that this block may not be the loop latch block, even if the loop // has a latch block. ExitBlock = BB; ExitCount = EC; break; } if (!ExitBlock) return false; return true; } TargetTransformInfo::TargetTransformInfo(const DataLayout &DL) : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {} TargetTransformInfo::~TargetTransformInfo() {} TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg) : TTIImpl(std::move(Arg.TTIImpl)) {} TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) { TTIImpl = std::move(RHS.TTIImpl); return *this; } unsigned TargetTransformInfo::getInliningThresholdMultiplier() const { return TTIImpl->getInliningThresholdMultiplier(); } int TargetTransformInfo::getInlinerVectorBonusPercent() const { return TTIImpl->getInlinerVectorBonusPercent(); } int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef<const Value *> Operands, TTI::TargetCostKind CostKind) const { return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind); } unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters( const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const { return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); } int TargetTransformInfo::getUserCost(const User *U, ArrayRef<const Value *> Operands, enum TargetCostKind CostKind) const { int Cost = TTIImpl->getUserCost(U, Operands, CostKind); assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) && "TTI should not produce negative costs!"); return Cost; } bool TargetTransformInfo::hasBranchDivergence() const { return TTIImpl->hasBranchDivergence(); } bool TargetTransformInfo::useGPUDivergenceAnalysis() const { return TTIImpl->useGPUDivergenceAnalysis(); } bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { return TTIImpl->isSourceOfDivergence(V); } bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const { return TTIImpl->isAlwaysUniform(V); } unsigned TargetTransformInfo::getFlatAddressSpace() const { return TTIImpl->getFlatAddressSpace(); } bool TargetTransformInfo::collectFlatAddressOperands( SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const { return TTIImpl->collectFlatAddressOperands(OpIndexes, IID); } bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS); } Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace( IntrinsicInst *II, Value *OldV, Value *NewV) const { return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); } bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } bool TargetTransformInfo::isHardwareLoopProfitable( Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const { return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); } bool TargetTransformInfo::preferPredicateOverEpilogue( Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) const { return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); } bool TargetTransformInfo::emitGetActiveLaneMask() const { return TTIImpl->emitGetActiveLaneMask(); } void TargetTransformInfo::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { return TTIImpl->getUnrollingPreferences(L, SE, UP); } void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const { return TTIImpl->getPeelingPreferences(L, SE, PP); } bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return TTIImpl->isLegalAddImmediate(Imm); } bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { return TTIImpl->isLegalICmpImmediate(Imm); } bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I) const { return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace, I); } bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { return TTIImpl->isLSRCostLess(C1, C2); } bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const { return TTIImpl->isProfitableLSRChainElement(I); } bool TargetTransformInfo::canMacroFuseCmp() const { return TTIImpl->canMacroFuseCmp(); } bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const { return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); } bool TargetTransformInfo::shouldFavorPostInc() const { return TTIImpl->shouldFavorPostInc(); } bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { return TTIImpl->shouldFavorBackedgeIndex(L); } bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedStore(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedLoad(DataType, Alignment); } bool TargetTransformInfo::isLegalNTStore(Type *DataType, Align Alignment) const { return TTIImpl->isLegalNTStore(DataType, Alignment); } bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const { return TTIImpl->isLegalNTLoad(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedGather(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedGather(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedScatter(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const { return TTIImpl->isLegalMaskedCompressStore(DataType); } bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const { return TTIImpl->isLegalMaskedExpandLoad(DataType); } bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { return TTIImpl->hasDivRemOp(DataType, IsSigned); } bool TargetTransformInfo::hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { return TTIImpl->hasVolatileVariant(I, AddrSpace); } bool TargetTransformInfo::prefersVectorizedAddressing() const { return TTIImpl->prefersVectorizedAddressing(); } int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const { int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } bool TargetTransformInfo::LSRWithInstrQueries() const { return TTIImpl->LSRWithInstrQueries(); } bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { return TTIImpl->isTruncateFree(Ty1, Ty2); } bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const { return TTIImpl->isProfitableToHoist(I); } bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); } bool TargetTransformInfo::isTypeLegal(Type *Ty) const { return TTIImpl->isTypeLegal(Ty); } bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } bool TargetTransformInfo::shouldBuildLookupTablesForConstant( Constant *C) const { return TTIImpl->shouldBuildLookupTablesForConstant(C); } bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { return TTIImpl->useColdCCForColdCall(F); } unsigned TargetTransformInfo::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const { return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } unsigned TargetTransformInfo::getOperandsScalarizationOverhead( ArrayRef<const Value *> Args, unsigned VF) const { return TTIImpl->getOperandsScalarizationOverhead(Args, VF); } bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const { return TTIImpl->supportsEfficientVectorElementLoadStore(); } bool TargetTransformInfo::enableAggressiveInterleaving( bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } TargetTransformInfo::MemCmpExpansionOptions TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp); } bool TargetTransformInfo::enableInterleavedAccessVectorization() const { return TTIImpl->enableInterleavedAccessVectorization(); } bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const { return TTIImpl->enableMaskedInterleavedAccessVectorization(); } bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const { return TTIImpl->isFPVectorizationPotentiallyUnsafe(); } bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const { return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, Alignment, Fast); } TargetTransformInfo::PopcntSupportKind TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { return TTIImpl->getPopcntSupport(IntTyWidthInBit); } bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { return TTIImpl->haveFastSqrt(Ty); } bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty); } int TargetTransformInfo::getFPOpCost(Type *Ty) const { int Cost = TTIImpl->getFPOpCost(Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const { return TTIImpl->getNumberOfRegisters(ClassID); } unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const { return TTIImpl->getRegisterClassForType(Vector, Ty); } const char *TargetTransformInfo::getRegisterClassName(unsigned ClassID) const { return TTIImpl->getRegisterClassName(ClassID); } unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const { return TTIImpl->getMinVectorRegisterBitWidth(); } bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const { return TTIImpl->shouldMaximizeVectorBandwidth(OptSize); } unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const { return TTIImpl->getMinimumVF(ElemWidth); } bool TargetTransformInfo::shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { return TTIImpl->shouldConsiderAddressTypePromotion( I, AllowPromotionWithoutCommonHeader); } unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } llvm::Optional<unsigned> TargetTransformInfo::getCacheSize(CacheLevel Level) const { return TTIImpl->getCacheSize(Level); } llvm::Optional<unsigned> TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const { return TTIImpl->getCacheAssociativity(Level); } unsigned TargetTransformInfo::getPrefetchDistance() const { return TTIImpl->getPrefetchDistance(); } unsigned TargetTransformInfo::getMinPrefetchStride( unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const { return TTIImpl->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, NumPrefetches, HasCall); } unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const { return TTIImpl->getMaxPrefetchIterationsAhead(); } bool TargetTransformInfo::enableWritePrefetching() const { return TTIImpl->enableWritePrefetching(); } unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { return TTIImpl->getMaxInterleaveFactor(VF); } TargetTransformInfo::OperandValueKind TargetTransformInfo::getOperandInfo(const Value *V, OperandValueProperties &OpProps) { OperandValueKind OpInfo = OK_AnyValue; OpProps = OP_None; if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getValue().isPowerOf2()) OpProps = OP_PowerOf2; return OK_UniformConstantValue; } // A broadcast shuffle creates a uniform value. // TODO: Add support for non-zero index broadcasts. // TODO: Add support for different source vector width. if (const auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V)) if (ShuffleInst->isZeroEltSplat()) OpInfo = OK_UniformValue; const Value *Splat = getSplatValue(V); // Check for a splat of a constant or for a non uniform vector of constants // and check if the constant(s) are all powers of two. if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { OpInfo = OK_NonUniformConstantValue; if (Splat) { OpInfo = OK_UniformConstantValue; if (auto *CI = dyn_cast<ConstantInt>(Splat)) if (CI->getValue().isPowerOf2()) OpProps = OP_PowerOf2; } else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) { OpProps = OP_PowerOf2; for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I))) if (CI->getValue().isPowerOf2()) continue; OpProps = OP_None; break; } } } // Check for a splat of a uniform value. This is not loop aware, so return // true only for the obviously uniform cases (argument, globalvalue) if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat))) OpInfo = OK_UniformValue; return OpInfo; } int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, const Instruction *CxtI) const { int Cost = TTIImpl->getArithmeticInstrCost( Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty, int Index, VectorType *SubTp) const { int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const { int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMaskedMemoryOpCost( unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getGatherScatterOpCost( unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const { int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const { int Cost = TTIImpl->getInterleavedMemoryOpCost( Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return TTIImpl->getNumberOfParts(Tp); } int TargetTransformInfo::getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *Ptr) const { int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMemcpyCost(const Instruction *I) const { int Cost = TTIImpl->getMemcpyCost(I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMinMaxReductionCost( VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { return TTIImpl->getCostOfKeepingLiveOverCall(Tys); } bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { return TTIImpl->getTgtMemIntrinsic(Inst, Info); } unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const { return TTIImpl->getAtomicMemIntrinsicMaxElementSize(); } Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( IntrinsicInst *Inst, Type *ExpectedType) const { return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } Type *TargetTransformInfo::getMemcpyLoopLoweringType( LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const { return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, DestAddrSpace, SrcAlign, DestAlign); } void TargetTransformInfo::getMemcpyLoopResidualLoweringType( SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const { TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign, DestAlign); } bool TargetTransformInfo::areInlineCompatible(const Function *Caller, const Function *Callee) const { return TTIImpl->areInlineCompatible(Caller, Callee); } bool TargetTransformInfo::areFunctionArgsABICompatible( const Function *Caller, const Function *Callee, SmallPtrSetImpl<Argument *> &Args) const { return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args); } bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const { return TTIImpl->isIndexedLoadLegal(Mode, Ty); } bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const { return TTIImpl->isIndexedStoreLegal(Mode, Ty); } unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { return TTIImpl->getLoadStoreVecRegBitWidth(AS); } bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const { return TTIImpl->isLegalToVectorizeLoad(LI); } bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const { return TTIImpl->isLegalToVectorizeStore(SI); } bool TargetTransformInfo::isLegalToVectorizeLoadChain( unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const { return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace); } bool TargetTransformInfo::isLegalToVectorizeStoreChain( unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const { return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, AddrSpace); } unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const { return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); } unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const { return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); } bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const { return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); } bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } unsigned TargetTransformInfo::getGISelRematGlobalCost() const { return TTIImpl->getGISelRematGlobalCost(); } int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { return TTIImpl->getInstructionLatency(I); } static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level) { // We don't need a shuffle if we just want to have element 0 in position 0 of // the vector. if (!SI && Level == 0 && IsLeft) return true; else if (!SI) return false; SmallVector<int, 32> Mask(SI->getType()->getNumElements(), -1); // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether // we look at the left or right side. for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) Mask[i] = val; ArrayRef<int> ActualMask = SI->getShuffleMask(); return Mask == ActualMask; } static Optional<TTI::ReductionData> getReductionData(Instruction *I) { Value *L, *R; if (m_BinOp(m_Value(L), m_Value(R)).match(I)) return TTI::ReductionData(TTI::RK_Arithmetic, I->getOpcode(), L, R); if (auto *SI = dyn_cast<SelectInst>(I)) { if (m_SMin(m_Value(L), m_Value(R)).match(SI) || m_SMax(m_Value(L), m_Value(R)).match(SI) || m_OrdFMin(m_Value(L), m_Value(R)).match(SI) || m_OrdFMax(m_Value(L), m_Value(R)).match(SI) || m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast<CmpInst>(SI->getCondition()); return TTI::ReductionData(TTI::RK_MinMax, CI->getOpcode(), L, R); } if (m_UMin(m_Value(L), m_Value(R)).match(SI) || m_UMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast<CmpInst>(SI->getCondition()); return TTI::ReductionData(TTI::RK_UnsignedMinMax, CI->getOpcode(), L, R); } } return llvm::None; } static TTI::ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels) { // Match one level of pairwise operations. // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 if (!I) return TTI::RK_None; assert(I->getType()->isVectorTy() && "Expecting a vector type"); Optional<TTI::ReductionData> RD = getReductionData(I); if (!RD) return TTI::RK_None; ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS); if (!LS && Level) return TTI::RK_None; ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS); if (!RS && Level) return TTI::RK_None; // On level 0 we can omit one shufflevector instruction. if (!Level && !RS && !LS) return TTI::RK_None; // Shuffle inputs must match. Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; Value *NextLevelOp = nullptr; if (NextLevelOpR && NextLevelOpL) { // If we have two shuffles their operands must match. if (NextLevelOpL != NextLevelOpR) return TTI::RK_None; NextLevelOp = NextLevelOpL; } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { // On the first level we can omit the shufflevector <0, undef,...>. So the // input to the other shufflevector <1, undef> must match with one of the // inputs to the current binary operation. // Example: // %NextLevelOpL = shufflevector %R, <1, undef ...> // %BinOp = fadd %NextLevelOpL, %R if (NextLevelOpL && NextLevelOpL != RD->RHS) return TTI::RK_None; else if (NextLevelOpR && NextLevelOpR != RD->LHS) return TTI::RK_None; NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS; } else return TTI::RK_None; // Check that the next levels binary operation exists and matches with the // current one. if (Level + 1 != NumLevels) { if (!isa<Instruction>(NextLevelOp)) return TTI::RK_None; Optional<TTI::ReductionData> NextLevelRD = getReductionData(cast<Instruction>(NextLevelOp)); if (!NextLevelRD || !RD->hasSameData(*NextLevelRD)) return TTI::RK_None; } // Shuffle mask for pairwise operation must match. if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) return TTI::RK_None; } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) return TTI::RK_None; } else { return TTI::RK_None; } if (++Level == NumLevels) return RD->Kind; // Match next level. return matchPairwiseReductionAtLevel(dyn_cast<Instruction>(NextLevelOp), Level, NumLevels); } TTI::ReductionKind TTI::matchPairwiseReduction( const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) { if (!EnableReduxCost) return TTI::RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); unsigned Idx = ~0u; if (CI) Idx = CI->getZExtValue(); if (Idx != 0) return TTI::RK_None; auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); if (!RdxStart) return TTI::RK_None; Optional<TTI::ReductionData> RD = getReductionData(RdxStart); if (!RD) return TTI::RK_None; auto *VecTy = cast<VectorType>(RdxStart->getType()); unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) return TTI::RK_None; // We look for a sequence of shuffle,shuffle,add triples like the following // that builds a pairwise reduction tree. // // (X0, X1, X2, X3) // (X0 + X1, X2 + X3, undef, undef) // ((X0 + X1) + (X2 + X3), undef, undef, undef) // // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 // %r = extractelement <4 x float> %bin.rdx8, i32 0 if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == TTI::RK_None) return TTI::RK_None; Opcode = RD->Opcode; Ty = VecTy; return RD->Kind; } static std::pair<Value *, ShuffleVectorInst *> getShuffleAndOtherOprd(Value *L, Value *R) { ShuffleVectorInst *S = nullptr; if ((S = dyn_cast<ShuffleVectorInst>(L))) return std::make_pair(R, S); S = dyn_cast<ShuffleVectorInst>(R); return std::make_pair(L, S); } TTI::ReductionKind TTI::matchVectorSplittingReduction( const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) { if (!EnableReduxCost) return TTI::RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); unsigned Idx = ~0u; if (CI) Idx = CI->getZExtValue(); if (Idx != 0) return TTI::RK_None; auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); if (!RdxStart) return TTI::RK_None; Optional<TTI::ReductionData> RD = getReductionData(RdxStart); if (!RD) return TTI::RK_None; auto *VecTy = cast<VectorType>(ReduxRoot->getOperand(0)->getType()); unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) return TTI::RK_None; // We look for a sequence of shuffles and adds like the following matching one // fadd, shuffle vector pair at a time. // // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 // %r = extractelement <4 x float> %bin.rdx8, i32 0 unsigned MaskStart = 1; Instruction *RdxOp = RdxStart; SmallVector<int, 32> ShuffleMask(NumVecElems, 0); unsigned NumVecElemsRemain = NumVecElems; while (NumVecElemsRemain - 1) { // Check for the right reduction operation. if (!RdxOp) return TTI::RK_None; Optional<TTI::ReductionData> RDLevel = getReductionData(RdxOp); if (!RDLevel || !RDLevel->hasSameData(*RD)) return TTI::RK_None; Value *NextRdxOp; ShuffleVectorInst *Shuffle; std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS); // Check the current reduction operation and the shuffle use the same value. if (Shuffle == nullptr) return TTI::RK_None; if (Shuffle->getOperand(0) != NextRdxOp) return TTI::RK_None; // Check that shuffle masks matches. for (unsigned j = 0; j != MaskStart; ++j) ShuffleMask[j] = MaskStart + j; // Fill the rest of the mask with -1 for undef. std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); ArrayRef<int> Mask = Shuffle->getShuffleMask(); if (ShuffleMask != Mask) return TTI::RK_None; RdxOp = dyn_cast<Instruction>(NextRdxOp); NumVecElemsRemain /= 2; MaskStart *= 2; } Opcode = RD->Opcode; Ty = VecTy; return RD->Kind; } int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; switch (I->getOpcode()) { case Instruction::GetElementPtr: case Instruction::Ret: case Instruction::PHI: case Instruction::Br: case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::FNeg: case Instruction::Select: case Instruction::ICmp: case Instruction::FCmp: case Instruction::Store: case Instruction::Load: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: case Instruction::AddrSpaceCast: case Instruction::ExtractElement: case Instruction::InsertElement: case Instruction::ExtractValue: case Instruction::ShuffleVector: case Instruction::Call: return getUserCost(I, CostKind); default: // We don't have any information on this instruction. return -1; } } TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} TargetIRAnalysis::TargetIRAnalysis( std::function<Result(const Function &)> TTICallback) : TTICallback(std::move(TTICallback)) {} TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F, FunctionAnalysisManager &) { return TTICallback(F); } AnalysisKey TargetIRAnalysis::Key; TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) { return Result(F.getParent()->getDataLayout()); } // Register the basic pass. INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti", "Target Transform Information", false, true) char TargetTransformInfoWrapperPass::ID = 0; void TargetTransformInfoWrapperPass::anchor() {} TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass() : ImmutablePass(ID) { initializeTargetTransformInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass( TargetIRAnalysis TIRA) : ImmutablePass(ID), TIRA(std::move(TIRA)) { initializeTargetTransformInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) { FunctionAnalysisManager DummyFAM; TTI = TIRA.run(F, DummyFAM); return *TTI; } ImmutablePass * llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) { return new TargetTransformInfoWrapperPass(std::move(TIRA)); }
Upload File
Create Folder