File Manager

003 File Manager

Current Path: /usr/src/contrib/llvm-project/llvm/lib/Target/AMDGPU

usr / src / contrib / llvm-project / llvm / lib / Target / AMDGPU /

📁 ..
📄 AMDGPU.h(11.46 KB)
📄 AMDGPU.td(36.97 KB)
📄 AMDGPUAliasAnalysis.cpp(5.58 KB)
📄 AMDGPUAliasAnalysis.h(3.32 KB)
📄 AMDGPUAlwaysInlinePass.cpp(4.83 KB)
📄 AMDGPUAnnotateKernelFeatures.cpp(11.94 KB)
📄 AMDGPUAnnotateUniformValues.cpp(6.13 KB)
📄 AMDGPUArgumentUsageInfo.cpp(7.66 KB)
📄 AMDGPUArgumentUsageInfo.h(4.81 KB)
📄 AMDGPUAsmPrinter.cpp(50.42 KB)
📄 AMDGPUAsmPrinter.h(5.13 KB)
📄 AMDGPUAtomicOptimizer.cpp(23.79 KB)
📄 AMDGPUCallLowering.cpp(28.66 KB)
📄 AMDGPUCallLowering.h(2.37 KB)
📄 AMDGPUCallingConv.td(7.33 KB)
📄 AMDGPUCodeGenPrepare.cpp(46.42 KB)
📄 AMDGPUCombine.td(2.79 KB)
📄 AMDGPUExportClustering.cpp(4.52 KB)
📄 AMDGPUExportClustering.h(533 B)
📄 AMDGPUFeatures.td(1.81 KB)
📄 AMDGPUFixFunctionBitcasts.cpp(1.87 KB)
📄 AMDGPUFrameLowering.cpp(1.98 KB)
📄 AMDGPUFrameLowering.h(1.39 KB)
📄 AMDGPUGISel.td(11.57 KB)
📄 AMDGPUGenRegisterBankInfo.def(5.83 KB)
📄 AMDGPUGlobalISelUtils.cpp(1.77 KB)
📄 AMDGPUGlobalISelUtils.h(2.07 KB)
📄 AMDGPUHSAMetadataStreamer.cpp(31.21 KB)
📄 AMDGPUHSAMetadataStreamer.h(5.46 KB)
📄 AMDGPUISelDAGToDAG.cpp(101.59 KB)
📄 AMDGPUISelLowering.cpp(168.65 KB)
📄 AMDGPUISelLowering.h(19.23 KB)
📄 AMDGPUInline.cpp(7.97 KB)
📄 AMDGPUInstrInfo.cpp(1.71 KB)
📄 AMDGPUInstrInfo.h(1.66 KB)
📄 AMDGPUInstrInfo.td(17.18 KB)
📄 AMDGPUInstructionSelector.cpp(128.53 KB)
📄 AMDGPUInstructionSelector.h(11.04 KB)
📄 AMDGPUInstructions.td(25.36 KB)
📄 AMDGPULegalizerInfo.cpp(149.32 KB)
📄 AMDGPULegalizerInfo.h(8.49 KB)
📄 AMDGPULibCalls.cpp(53.89 KB)
📄 AMDGPULibFunc.cpp(37.85 KB)
📄 AMDGPULibFunc.h(10.99 KB)
📄 AMDGPULowerIntrinsics.cpp(4.55 KB)
📄 AMDGPULowerKernelArguments.cpp(8.89 KB)
📄 AMDGPULowerKernelAttributes.cpp(7.78 KB)
📄 AMDGPUMCInstLower.cpp(14.27 KB)
📄 AMDGPUMachineCFGStructurizer.cpp(101.97 KB)
📄 AMDGPUMachineFunction.cpp(2.24 KB)
📄 AMDGPUMachineFunction.h(2.13 KB)
📄 AMDGPUMachineModuleInfo.cpp(1.34 KB)
📄 AMDGPUMachineModuleInfo.h(5.46 KB)
📄 AMDGPUMacroFusion.cpp(2.28 KB)
📄 AMDGPUMacroFusion.h(679 B)
📄 AMDGPUOpenCLEnqueuedBlockLowering.cpp(5.31 KB)
📄 AMDGPUPTNote.h(1.29 KB)
📄 AMDGPUPerfHintAnalysis.cpp(12.17 KB)
📄 AMDGPUPerfHintAnalysis.h(1.67 KB)
📄 AMDGPUPostLegalizerCombiner.cpp(12.02 KB)
📄 AMDGPUPreLegalizerCombiner.cpp(5.45 KB)
📄 AMDGPUPrintfRuntimeBinding.cpp(21.7 KB)
📄 AMDGPUPromoteAlloca.cpp(35.24 KB)
📄 AMDGPUPropagateAttributes.cpp(11.76 KB)
📄 AMDGPURegBankCombiner.cpp(5.36 KB)
📄 AMDGPURegisterBankInfo.cpp(161.67 KB)
📄 AMDGPURegisterBankInfo.h(7.41 KB)
📄 AMDGPURegisterBanks.td(921 B)
📄 AMDGPURewriteOutArguments.cpp(15.82 KB)
📄 AMDGPUSearchableTables.td(21.04 KB)
📄 AMDGPUSubtarget.cpp(29.62 KB)
📄 AMDGPUSubtarget.h(35.82 KB)
📄 AMDGPUTargetMachine.cpp(42.67 KB)
📄 AMDGPUTargetMachine.h(4.52 KB)
📄 AMDGPUTargetObjectFile.cpp(1.54 KB)
📄 AMDGPUTargetObjectFile.h(1.14 KB)
📄 AMDGPUTargetTransformInfo.cpp(39.07 KB)
📄 AMDGPUTargetTransformInfo.h(11.11 KB)
📄 AMDGPUUnifyDivergentExitNodes.cpp(13.84 KB)
📄 AMDGPUUnifyMetadata.cpp(4.46 KB)
📄 AMDILCFGStructurizer.cpp(56.32 KB)
📄 AMDKernelCodeT.h(32.84 KB)
📁 AsmParser
📄 BUFInstructions.td(110.75 KB)
📄 CaymanInstructions.td(7.93 KB)
📄 DSInstructions.td(52.37 KB)
📁 Disassembler
📄 EvergreenInstructions.td(28.24 KB)
📄 FLATInstructions.td(66.93 KB)
📄 GCNDPPCombine.cpp(19.92 KB)
📄 GCNHazardRecognizer.cpp(45.3 KB)
📄 GCNHazardRecognizer.h(3.96 KB)
📄 GCNILPSched.cpp(11.3 KB)
📄 GCNIterativeScheduler.cpp(20.62 KB)
📄 GCNIterativeScheduler.h(4.16 KB)
📄 GCNMinRegStrategy.cpp(8.47 KB)
📄 GCNNSAReassign.cpp(10.92 KB)
📄 GCNProcessors.td(4.84 KB)
📄 GCNRegBankReassign.cpp(26.68 KB)
📄 GCNRegPressure.cpp(16.27 KB)
📄 GCNRegPressure.h(9.15 KB)
📄 GCNSchedStrategy.cpp(21.67 KB)
📄 GCNSchedStrategy.h(3.77 KB)
📁 MCTargetDesc
📄 MIMGInstructions.td(39.85 KB)
📄 R600.td(1.51 KB)
📄 R600AsmPrinter.cpp(4.46 KB)
📄 R600AsmPrinter.h(1.5 KB)
📄 R600ClauseMergePass.cpp(7.38 KB)
📄 R600ControlFlowFinalizer.cpp(23.4 KB)
📄 R600Defines.h(4.25 KB)
📄 R600EmitClauseMarkers.cpp(12.1 KB)
📄 R600ExpandSpecialInstrs.cpp(10.11 KB)
📄 R600FrameLowering.cpp(1.83 KB)
📄 R600FrameLowering.h(1.25 KB)
📄 R600ISelLowering.cpp(81.88 KB)
📄 R600ISelLowering.h(4.8 KB)
📄 R600InstrFormats.td(11.58 KB)
📄 R600InstrInfo.cpp(49.47 KB)
📄 R600InstrInfo.h(13.7 KB)
📄 R600Instructions.td(55.13 KB)
📄 R600MachineFunctionInfo.cpp(551 B)
📄 R600MachineFunctionInfo.h(824 B)
📄 R600MachineScheduler.cpp(13.57 KB)
📄 R600MachineScheduler.h(2.53 KB)
📄 R600OpenCLImageTypeLoweringPass.cpp(11.75 KB)
📄 R600OptimizeVectorRegisters.cpp(13.4 KB)
📄 R600Packetizer.cpp(13.4 KB)
📄 R600Processors.td(4.42 KB)
📄 R600RegisterInfo.cpp(3.95 KB)
📄 R600RegisterInfo.h(2 KB)
📄 R600RegisterInfo.td(9.75 KB)
📄 R600Schedule.td(1.62 KB)
📄 R700Instructions.td(783 B)
📄 SIAddIMGInit.cpp(6.24 KB)
📄 SIAnnotateControlFlow.cpp(11.18 KB)
📄 SIDefines.h(20.86 KB)
📄 SIFixSGPRCopies.cpp(29.46 KB)
📄 SIFixVGPRCopies.cpp(2 KB)
📄 SIFixupVectorISel.cpp(8.75 KB)
📄 SIFoldOperands.cpp(54.56 KB)
📄 SIFormMemoryClauses.cpp(12.76 KB)
📄 SIFrameLowering.cpp(48.08 KB)
📄 SIFrameLowering.h(2.98 KB)
📄 SIISelLowering.cpp(423.43 KB)
📄 SIISelLowering.h(22.13 KB)
📄 SIInsertHardClauses.cpp(7.01 KB)
📄 SIInsertSkips.cpp(15.29 KB)
📄 SIInsertWaitcnts.cpp(58.33 KB)
📄 SIInstrFormats.td(9.44 KB)
📄 SIInstrInfo.cpp(247.15 KB)
📄 SIInstrInfo.h(41.24 KB)
📄 SIInstrInfo.td(90.7 KB)
📄 SIInstructions.td(77.7 KB)
📄 SILoadStoreOptimizer.cpp(76.21 KB)
📄 SILowerControlFlow.cpp(22.66 KB)
📄 SILowerI1Copies.cpp(27.83 KB)
📄 SILowerSGPRSpills.cpp(12.68 KB)
📄 SIMachineFunctionInfo.cpp(20.01 KB)
📄 SIMachineFunctionInfo.h(26.91 KB)
📄 SIMachineScheduler.cpp(69.44 KB)
📄 SIMachineScheduler.h(15.65 KB)
📄 SIMemoryLegalizer.cpp(45.84 KB)
📄 SIModeRegister.cpp(17.43 KB)
📄 SIOptimizeExecMasking.cpp(12.81 KB)
📄 SIOptimizeExecMaskingPreRA.cpp(11.13 KB)
📄 SIPeepholeSDWA.cpp(42.84 KB)
📄 SIPostRABundler.cpp(3.6 KB)
📄 SIPreAllocateWWMRegs.cpp(6.09 KB)
📄 SIPreEmitPeephole.cpp(10.51 KB)
📄 SIProgramInfo.h(2.04 KB)
📄 SIRegisterInfo.cpp(71.51 KB)
📄 SIRegisterInfo.h(13.04 KB)
📄 SIRegisterInfo.td(37.28 KB)
📄 SIRemoveShortExecBranches.cpp(4.96 KB)
📄 SISchedule.td(7.58 KB)
📄 SIShrinkInstructions.cpp(26.86 KB)
📄 SIWholeQuadMode.cpp(30.22 KB)
📄 SMInstructions.td(48.14 KB)
📄 SOPInstructions.td(60.51 KB)
📁 TargetInfo
📁 Utils
📄 VIInstrFormats.td(645 B)
📄 VOP1Instructions.td(35.53 KB)
📄 VOP2Instructions.td(65.04 KB)
📄 VOP3Instructions.td(53.14 KB)
📄 VOP3PInstructions.td(26.47 KB)
📄 VOPCInstructions.td(63.31 KB)
📄 VOPInstructions.td(23.76 KB)

Editing: AMDGPUPerfHintAnalysis.cpp

//===- AMDGPUPerfHintAnalysis.cpp - analysis of functions memory traffic --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Analyzes if a function potentially memory bound and if a kernel
/// kernel may benefit from limiting number of waves to reduce cache thrashing.
///
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"

using namespace llvm;

#define DEBUG_TYPE "amdgpu-perf-hint"

static cl::opt<unsigned>
    MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden,
                   cl::desc("Function mem bound threshold in %"));

static cl::opt<unsigned>
    LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden,
                    cl::desc("Kernel limit wave threshold in %"));

static cl::opt<unsigned>
    IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden,
             cl::desc("Indirect access memory instruction weight"));

static cl::opt<unsigned>
    LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden,
             cl::desc("Large stride memory access weight"));

static cl::opt<unsigned>
    LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden,
                      cl::desc("Large stride memory access threshold"));

STATISTIC(NumMemBound, "Number of functions marked as memory bound");
STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");

char llvm::AMDGPUPerfHintAnalysis::ID = 0;
char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID;

INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE,
                "Analysis if a function is memory bound", true, true)

namespace {

struct AMDGPUPerfHint {
  friend AMDGPUPerfHintAnalysis;

public:
  AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
                 const TargetLowering *TLI_)
      : FIM(FIM_), DL(nullptr), TLI(TLI_) {}

bool runOnFunction(Function &F);

private:
  struct MemAccessInfo {
    const Value *V;
    const Value *Base;
    int64_t Offset;
    MemAccessInfo() : V(nullptr), Base(nullptr), Offset(0) {}
    bool isLargeStride(MemAccessInfo &Reference) const;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    Printable print() const {
      return Printable([this](raw_ostream &OS) {
        OS << "Value: " << *V << '\n'
           << "Base: " << *Base << " Offset: " << Offset << '\n';
      });
    }
#endif
  };

MemAccessInfo makeMemAccessInfo(Instruction *) const;

MemAccessInfo LastAccess; // Last memory access info

AMDGPUPerfHintAnalysis::FuncInfoMap &FIM;

const DataLayout *DL;

const TargetLowering *TLI;

AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
  static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
  static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);

bool isIndirectAccess(const Instruction *Inst) const;

/// Check if the instruction is large stride.
  /// The purpose is to identify memory access pattern like:
  /// x = a[i];
  /// y = a[i+1000];
  /// z = a[i+2000];
  /// In the above example, the second and third memory access will be marked
  /// large stride memory access.
  bool isLargeStride(const Instruction *Inst);

bool isGlobalAddr(const Value *V) const;
  bool isLocalAddr(const Value *V) const;
  bool isConstantAddr(const Value *V) const;
};

static const Value *getMemoryInstrPtr(const Instruction *Inst) {
  if (auto LI = dyn_cast<LoadInst>(Inst)) {
    return LI->getPointerOperand();
  }
  if (auto SI = dyn_cast<StoreInst>(Inst)) {
    return SI->getPointerOperand();
  }
  if (auto AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
    return AI->getPointerOperand();
  }
  if (auto AI = dyn_cast<AtomicRMWInst>(Inst)) {
    return AI->getPointerOperand();
  }
  if (auto MI = dyn_cast<AnyMemIntrinsic>(Inst)) {
    return MI->getRawDest();
  }

return nullptr;
}

bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
  LLVM_DEBUG(dbgs() << "[isIndirectAccess] " << *Inst << '\n');
  SmallSet<const Value *, 32> WorkSet;
  SmallSet<const Value *, 32> Visited;
  if (const Value *MO = getMemoryInstrPtr(Inst)) {
    if (isGlobalAddr(MO))
      WorkSet.insert(MO);
  }

while (!WorkSet.empty()) {
    const Value *V = *WorkSet.begin();
    WorkSet.erase(*WorkSet.begin());
    if (!Visited.insert(V).second)
      continue;
    LLVM_DEBUG(dbgs() << "  check: " << *V << '\n');

if (auto LD = dyn_cast<LoadInst>(V)) {
      auto M = LD->getPointerOperand();
      if (isGlobalAddr(M) || isLocalAddr(M) || isConstantAddr(M)) {
        LLVM_DEBUG(dbgs() << "    is IA\n");
        return true;
      }
      continue;
    }

if (auto GEP = dyn_cast<GetElementPtrInst>(V)) {
      auto P = GEP->getPointerOperand();
      WorkSet.insert(P);
      for (unsigned I = 1, E = GEP->getNumIndices() + 1; I != E; ++I)
        WorkSet.insert(GEP->getOperand(I));
      continue;
    }

if (auto U = dyn_cast<UnaryInstruction>(V)) {
      WorkSet.insert(U->getOperand(0));
      continue;
    }

if (auto BO = dyn_cast<BinaryOperator>(V)) {
      WorkSet.insert(BO->getOperand(0));
      WorkSet.insert(BO->getOperand(1));
      continue;
    }

if (auto S = dyn_cast<SelectInst>(V)) {
      WorkSet.insert(S->getFalseValue());
      WorkSet.insert(S->getTrueValue());
      continue;
    }

if (auto E = dyn_cast<ExtractElementInst>(V)) {
      WorkSet.insert(E->getVectorOperand());
      continue;
    }

LLVM_DEBUG(dbgs() << "    dropped\n");
  }

LLVM_DEBUG(dbgs() << "  is not IA\n");
  return false;
}

AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
  AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F];

LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');

for (auto &B : F) {
    LastAccess = MemAccessInfo();
    for (auto &I : B) {
      if (getMemoryInstrPtr(&I)) {
        if (isIndirectAccess(&I))
          ++FI.IAMInstCount;
        if (isLargeStride(&I))
          ++FI.LSMInstCount;
        ++FI.MemInstCount;
        ++FI.InstCount;
        continue;
      }
      if (auto *CB = dyn_cast<CallBase>(&I)) {
        Function *Callee = CB->getCalledFunction();
        if (!Callee || Callee->isDeclaration()) {
          ++FI.InstCount;
          continue;
        }
        if (&F == Callee) // Handle immediate recursion
          continue;

auto Loc = FIM.find(Callee);
        if (Loc == FIM.end())
          continue;

FI.MemInstCount += Loc->second.MemInstCount;
        FI.InstCount += Loc->second.InstCount;
        FI.IAMInstCount += Loc->second.IAMInstCount;
        FI.LSMInstCount += Loc->second.LSMInstCount;
      } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
        TargetLoweringBase::AddrMode AM;
        auto *Ptr = GetPointerBaseWithConstantOffset(GEP, AM.BaseOffs, *DL);
        AM.BaseGV = dyn_cast_or_null<GlobalValue>(const_cast<Value *>(Ptr));
        AM.HasBaseReg = !AM.BaseGV;
        if (TLI->isLegalAddressingMode(*DL, AM, GEP->getResultElementType(),
                                       GEP->getPointerAddressSpace()))
          // Offset will likely be folded into load or store
          continue;
        ++FI.InstCount;
      } else {
        ++FI.InstCount;
      }
    }
  }

return &FI;
}

bool AMDGPUPerfHint::runOnFunction(Function &F) {
  const Module &M = *F.getParent();
  DL = &M.getDataLayout();

if (F.hasFnAttribute("amdgpu-wave-limiter") &&
      F.hasFnAttribute("amdgpu-memory-bound"))
    return false;

const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);

LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Info->MemInstCount
                    << '\n'
                    << " IAMInst: " << Info->IAMInstCount << '\n'
                    << " LSMInst: " << Info->LSMInstCount << '\n'
                    << " TotalInst: " << Info->InstCount << '\n');

if (isMemBound(*Info)) {
    LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
    NumMemBound++;
    F.addFnAttr("amdgpu-memory-bound", "true");
  }

if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
    LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
    NumLimitWave++;
    F.addFnAttr("amdgpu-wave-limiter", "true");
  }

return true;
}

bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
  return FI.MemInstCount * 100 / FI.InstCount > MemBoundThresh;
}

bool AMDGPUPerfHint::needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
  return ((FI.MemInstCount + FI.IAMInstCount * IAWeight +
           FI.LSMInstCount * LSWeight) *
          100 / FI.InstCount) > LimitWaveThresh;
}

bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
  if (auto PT = dyn_cast<PointerType>(V->getType())) {
    unsigned As = PT->getAddressSpace();
    // Flat likely points to global too.
    return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
  }
  return false;
}

bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
  if (auto PT = dyn_cast<PointerType>(V->getType()))
    return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
  return false;
}

bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) {
  LLVM_DEBUG(dbgs() << "[isLargeStride] " << *Inst << '\n');

MemAccessInfo MAI = makeMemAccessInfo(const_cast<Instruction *>(Inst));
  bool IsLargeStride = MAI.isLargeStride(LastAccess);
  if (MAI.Base)
    LastAccess = std::move(MAI);

return IsLargeStride;
}

AMDGPUPerfHint::MemAccessInfo
AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
  MemAccessInfo MAI;
  const Value *MO = getMemoryInstrPtr(Inst);

LLVM_DEBUG(dbgs() << "[isLargeStride] MO: " << *MO << '\n');
  // Do not treat local-addr memory access as large stride.
  if (isLocalAddr(MO))
    return MAI;

MAI.V = MO;
  MAI.Base = GetPointerBaseWithConstantOffset(MO, MAI.Offset, *DL);
  return MAI;
}

bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
  if (auto PT = dyn_cast<PointerType>(V->getType())) {
    unsigned As = PT->getAddressSpace();
    return As == AMDGPUAS::CONSTANT_ADDRESS ||
           As == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
  }
  return false;
}

bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
    MemAccessInfo &Reference) const {

if (!Base || !Reference.Base || Base != Reference.Base)
    return false;

uint64_t Diff = Offset > Reference.Offset ? Offset - Reference.Offset
                                            : Reference.Offset - Offset;
  bool Result = Diff > LargeStrideThresh;
  LLVM_DEBUG(dbgs() << "[isLargeStride compare]\n"
               << print() << "<=>\n"
               << Reference.print() << "Result:" << Result << '\n');
  return Result;
}
} // namespace

bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
  if (!TPC)
    return false;

const TargetMachine &TM = TPC->getTM<TargetMachine>();

bool Changed = false;
  for (CallGraphNode *I : SCC) {
    Function *F = I->getFunction();
    if (!F || F->isDeclaration())
      continue;

const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
    AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());

if (Analyzer.runOnFunction(*F))
      Changed = true;
  }

return Changed;
}

bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
  auto FI = FIM.find(F);
  if (FI == FIM.end())
    return false;

return AMDGPUPerfHint::isMemBound(FI->second);
}

bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
  auto FI = FIM.find(F);
  if (FI == FIM.end())
    return false;

return AMDGPUPerfHint::needLimitWave(FI->second);
}

003 File Manager

Editing: AMDGPUPerfHintAnalysis.cpp

Upload File

Create Folder