003 File Manager
Current Path:
/usr/src/contrib/llvm-project/llvm/lib/Target/AArch64
usr
/
src
/
contrib
/
llvm-project
/
llvm
/
lib
/
Target
/
AArch64
/
📁
..
📄
AArch64.h
(4.09 KB)
📄
AArch64.td
(49.85 KB)
📄
AArch64A53Fix835769.cpp
(8.3 KB)
📄
AArch64A57FPLoadBalancing.cpp
(25.73 KB)
📄
AArch64AdvSIMDScalarPass.cpp
(16.1 KB)
📄
AArch64AsmPrinter.cpp
(49.26 KB)
📄
AArch64BranchTargets.cpp
(4.91 KB)
📄
AArch64CallingConvention.cpp
(6.63 KB)
📄
AArch64CallingConvention.h
(2.62 KB)
📄
AArch64CallingConvention.td
(23.84 KB)
📄
AArch64CleanupLocalDynamicTLSPass.cpp
(5.53 KB)
📄
AArch64CollectLOH.cpp
(20.07 KB)
📄
AArch64Combine.td
(3.27 KB)
📄
AArch64CompressJumpTables.cpp
(5.06 KB)
📄
AArch64CondBrTuning.cpp
(10.19 KB)
📄
AArch64ConditionOptimizer.cpp
(15.26 KB)
📄
AArch64ConditionalCompares.cpp
(33.26 KB)
📄
AArch64DeadRegisterDefinitionsPass.cpp
(7.76 KB)
📄
AArch64ExpandImm.cpp
(14.25 KB)
📄
AArch64ExpandImm.h
(959 B)
📄
AArch64ExpandPseudoInsts.cpp
(38.08 KB)
📄
AArch64FalkorHWPFFix.cpp
(23.3 KB)
📄
AArch64FastISel.cpp
(171.76 KB)
📄
AArch64FrameLowering.cpp
(124.13 KB)
📄
AArch64FrameLowering.h
(5.54 KB)
📄
AArch64GenRegisterBankInfo.def
(11 KB)
📄
AArch64ISelDAGToDAG.cpp
(180.26 KB)
📄
AArch64ISelLowering.cpp
(578.95 KB)
📄
AArch64ISelLowering.h
(33.88 KB)
📄
AArch64InstrAtomics.td
(20.33 KB)
📄
AArch64InstrFormats.td
(430.93 KB)
📄
AArch64InstrGISel.td
(4.29 KB)
📄
AArch64InstrInfo.cpp
(243.23 KB)
📄
AArch64InstrInfo.h
(19.92 KB)
📄
AArch64InstrInfo.td
(374.84 KB)
📄
AArch64LoadStoreOptimizer.cpp
(77.04 KB)
📄
AArch64MCInstLower.cpp
(11.72 KB)
📄
AArch64MCInstLower.h
(1.69 KB)
📄
AArch64MachineFunctionInfo.cpp
(1.02 KB)
📄
AArch64MachineFunctionInfo.h
(12.9 KB)
📄
AArch64MacroFusion.cpp
(11.47 KB)
📄
AArch64MacroFusion.h
(891 B)
📄
AArch64PBQPRegAlloc.cpp
(11.35 KB)
📄
AArch64PBQPRegAlloc.h
(1.3 KB)
📄
AArch64PerfectShuffle.h
(382.04 KB)
📄
AArch64PfmCounters.td
(713 B)
📄
AArch64PromoteConstant.cpp
(22.43 KB)
📄
AArch64RedundantCopyElimination.cpp
(17.09 KB)
📄
AArch64RegisterBanks.td
(719 B)
📄
AArch64RegisterInfo.cpp
(29.6 KB)
📄
AArch64RegisterInfo.h
(5.5 KB)
📄
AArch64RegisterInfo.td
(51 KB)
📄
AArch64SIMDInstrOpt.cpp
(26.07 KB)
📄
AArch64SLSHardening.cpp
(15.92 KB)
📄
AArch64SVEInstrInfo.td
(169.43 KB)
📄
AArch64SchedA53.td
(15.28 KB)
📄
AArch64SchedA57.td
(34.69 KB)
📄
AArch64SchedA57WriteRes.td
(19.87 KB)
📄
AArch64SchedCyclone.td
(29.82 KB)
📄
AArch64SchedExynosM3.td
(42.57 KB)
📄
AArch64SchedExynosM4.td
(49.81 KB)
📄
AArch64SchedExynosM5.td
(50.74 KB)
📄
AArch64SchedFalkor.td
(5.3 KB)
📄
AArch64SchedFalkorDetails.td
(67.66 KB)
📄
AArch64SchedKryo.td
(6.21 KB)
📄
AArch64SchedKryoDetails.td
(82.63 KB)
📄
AArch64SchedPredExynos.td
(7.5 KB)
📄
AArch64SchedPredicates.td
(27.86 KB)
📄
AArch64SchedThunderX.td
(14.99 KB)
📄
AArch64SchedThunderX2T99.td
(68.58 KB)
📄
AArch64SchedThunderX3T110.td
(68.77 KB)
📄
AArch64Schedule.td
(3.86 KB)
📄
AArch64SelectionDAGInfo.cpp
(5.54 KB)
📄
AArch64SelectionDAGInfo.h
(1.46 KB)
📄
AArch64SpeculationHardening.cpp
(29.6 KB)
📄
AArch64StackOffset.h
(5.01 KB)
📄
AArch64StackTagging.cpp
(24.31 KB)
📄
AArch64StackTaggingPreRA.cpp
(7.25 KB)
📄
AArch64StorePairSuppress.cpp
(6.26 KB)
📄
AArch64Subtarget.cpp
(12.38 KB)
📄
AArch64Subtarget.h
(18.49 KB)
📄
AArch64SystemOperands.td
(81.75 KB)
📄
AArch64TargetMachine.cpp
(26.65 KB)
📄
AArch64TargetMachine.h
(3.25 KB)
📄
AArch64TargetObjectFile.cpp
(3.35 KB)
📄
AArch64TargetObjectFile.h
(2.31 KB)
📄
AArch64TargetTransformInfo.cpp
(42.6 KB)
📄
AArch64TargetTransformInfo.h
(8.68 KB)
📁
AsmParser
📁
Disassembler
📁
GISel
📁
MCTargetDesc
📄
SVEInstrFormats.td
(304.03 KB)
📄
SVEIntrinsicOpts.cpp
(8.13 KB)
📁
TargetInfo
📁
Utils
Editing: AArch64PBQPRegAlloc.cpp
//===-- AArch64PBQPRegAlloc.cpp - AArch64 specific PBQP constraints -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This file contains the AArch64 / Cortex-A57 specific register allocation // constraints for use by the PBQP register allocator. // // It is essentially a transcription of what is contained in // AArch64A57FPLoadBalancing, which tries to use a balanced // mix of odd and even D-registers when performing a critical sequence of // independent, non-quadword FP/ASIMD floating-point multiply-accumulates. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "aarch64-pbqp" #include "AArch64PBQPRegAlloc.h" #include "AArch64.h" #include "AArch64RegisterInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocPBQP.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { #ifndef NDEBUG bool isFPReg(unsigned reg) { return AArch64::FPR32RegClass.contains(reg) || AArch64::FPR64RegClass.contains(reg) || AArch64::FPR128RegClass.contains(reg); } #endif bool isOdd(unsigned reg) { switch (reg) { default: llvm_unreachable("Register is not from the expected class !"); case AArch64::S1: case AArch64::S3: case AArch64::S5: case AArch64::S7: case AArch64::S9: case AArch64::S11: case AArch64::S13: case AArch64::S15: case AArch64::S17: case AArch64::S19: case AArch64::S21: case AArch64::S23: case AArch64::S25: case AArch64::S27: case AArch64::S29: case AArch64::S31: case AArch64::D1: case AArch64::D3: case AArch64::D5: case AArch64::D7: case AArch64::D9: case AArch64::D11: case AArch64::D13: case AArch64::D15: case AArch64::D17: case AArch64::D19: case AArch64::D21: case AArch64::D23: case AArch64::D25: case AArch64::D27: case AArch64::D29: case AArch64::D31: case AArch64::Q1: case AArch64::Q3: case AArch64::Q5: case AArch64::Q7: case AArch64::Q9: case AArch64::Q11: case AArch64::Q13: case AArch64::Q15: case AArch64::Q17: case AArch64::Q19: case AArch64::Q21: case AArch64::Q23: case AArch64::Q25: case AArch64::Q27: case AArch64::Q29: case AArch64::Q31: return true; case AArch64::S0: case AArch64::S2: case AArch64::S4: case AArch64::S6: case AArch64::S8: case AArch64::S10: case AArch64::S12: case AArch64::S14: case AArch64::S16: case AArch64::S18: case AArch64::S20: case AArch64::S22: case AArch64::S24: case AArch64::S26: case AArch64::S28: case AArch64::S30: case AArch64::D0: case AArch64::D2: case AArch64::D4: case AArch64::D6: case AArch64::D8: case AArch64::D10: case AArch64::D12: case AArch64::D14: case AArch64::D16: case AArch64::D18: case AArch64::D20: case AArch64::D22: case AArch64::D24: case AArch64::D26: case AArch64::D28: case AArch64::D30: case AArch64::Q0: case AArch64::Q2: case AArch64::Q4: case AArch64::Q6: case AArch64::Q8: case AArch64::Q10: case AArch64::Q12: case AArch64::Q14: case AArch64::Q16: case AArch64::Q18: case AArch64::Q20: case AArch64::Q22: case AArch64::Q24: case AArch64::Q26: case AArch64::Q28: case AArch64::Q30: return false; } } bool haveSameParity(unsigned reg1, unsigned reg2) { assert(isFPReg(reg1) && "Expecting an FP register for reg1"); assert(isFPReg(reg2) && "Expecting an FP register for reg2"); return isOdd(reg1) == isOdd(reg2); } } bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra) { if (Rd == Ra) return false; LiveIntervals &LIs = G.getMetadata().LIS; if (Register::isPhysicalRegister(Rd) || Register::isPhysicalRegister(Ra)) { LLVM_DEBUG(dbgs() << "Rd is a physical reg:" << Register::isPhysicalRegister(Rd) << '\n'); LLVM_DEBUG(dbgs() << "Ra is a physical reg:" << Register::isPhysicalRegister(Ra) << '\n'); return false; } PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd); PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(Ra); const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed = &G.getNodeMetadata(node1).getAllowedRegs(); const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRaAllowed = &G.getNodeMetadata(node2).getAllowedRegs(); PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2); // The edge does not exist. Create one with the appropriate interference // costs. if (edge == G.invalidEdgeId()) { const LiveInterval &ld = LIs.getInterval(Rd); const LiveInterval &la = LIs.getInterval(Ra); bool livesOverlap = ld.overlaps(la); PBQPRAGraph::RawMatrix costs(vRdAllowed->size() + 1, vRaAllowed->size() + 1, 0); for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) { unsigned pRd = (*vRdAllowed)[i]; for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) { unsigned pRa = (*vRaAllowed)[j]; if (livesOverlap && TRI->regsOverlap(pRd, pRa)) costs[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); else costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0; } } G.addEdge(node1, node2, std::move(costs)); return true; } if (G.getEdgeNode1Id(edge) == node2) { std::swap(node1, node2); std::swap(vRdAllowed, vRaAllowed); } // Enforce minCost(sameParity(RaClass)) > maxCost(otherParity(RdClass)) PBQPRAGraph::RawMatrix costs(G.getEdgeCosts(edge)); for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) { unsigned pRd = (*vRdAllowed)[i]; // Get the maximum cost (excluding unallocatable reg) for same parity // registers PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min(); for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) { unsigned pRa = (*vRaAllowed)[j]; if (haveSameParity(pRd, pRa)) if (costs[i + 1][j + 1] != std::numeric_limits<PBQP::PBQPNum>::infinity() && costs[i + 1][j + 1] > sameParityMax) sameParityMax = costs[i + 1][j + 1]; } // Ensure all registers with a different parity have a higher cost // than sameParityMax for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) { unsigned pRa = (*vRaAllowed)[j]; if (!haveSameParity(pRd, pRa)) if (sameParityMax > costs[i + 1][j + 1]) costs[i + 1][j + 1] = sameParityMax + 1.0; } } G.updateEdgeCosts(edge, std::move(costs)); return true; } void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra) { LiveIntervals &LIs = G.getMetadata().LIS; // Do some Chain management if (Chains.count(Ra)) { if (Rd != Ra) { LLVM_DEBUG(dbgs() << "Moving acc chain from " << printReg(Ra, TRI) << " to " << printReg(Rd, TRI) << '\n';); Chains.remove(Ra); Chains.insert(Rd); } } else { LLVM_DEBUG(dbgs() << "Creating new acc chain for " << printReg(Rd, TRI) << '\n';); Chains.insert(Rd); } PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd); const LiveInterval &ld = LIs.getInterval(Rd); for (auto r : Chains) { // Skip self if (r == Rd) continue; const LiveInterval &lr = LIs.getInterval(r); if (ld.overlaps(lr)) { const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed = &G.getNodeMetadata(node1).getAllowedRegs(); PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(r); const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRrAllowed = &G.getNodeMetadata(node2).getAllowedRegs(); PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2); assert(edge != G.invalidEdgeId() && "PBQP error ! The edge should exist !"); LLVM_DEBUG(dbgs() << "Refining constraint !\n";); if (G.getEdgeNode1Id(edge) == node2) { std::swap(node1, node2); std::swap(vRdAllowed, vRrAllowed); } // Enforce that cost is higher with all other Chains of the same parity PBQP::Matrix costs(G.getEdgeCosts(edge)); for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) { unsigned pRd = (*vRdAllowed)[i]; // Get the maximum cost (excluding unallocatable reg) for all other // parity registers PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min(); for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) { unsigned pRa = (*vRrAllowed)[j]; if (!haveSameParity(pRd, pRa)) if (costs[i + 1][j + 1] != std::numeric_limits<PBQP::PBQPNum>::infinity() && costs[i + 1][j + 1] > sameParityMax) sameParityMax = costs[i + 1][j + 1]; } // Ensure all registers with same parity have a higher cost // than sameParityMax for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) { unsigned pRa = (*vRrAllowed)[j]; if (haveSameParity(pRd, pRa)) if (sameParityMax > costs[i + 1][j + 1]) costs[i + 1][j + 1] = sameParityMax + 1.0; } } G.updateEdgeCosts(edge, std::move(costs)); } } } static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg, const MachineInstr &MI) { const LiveInterval &LI = LIs.getInterval(reg); SlotIndex SI = LIs.getInstructionIndex(MI); return LI.expiredAt(SI); } void A57ChainingConstraint::apply(PBQPRAGraph &G) { const MachineFunction &MF = G.getMetadata().MF; LiveIntervals &LIs = G.getMetadata().LIS; TRI = MF.getSubtarget().getRegisterInfo(); LLVM_DEBUG(MF.dump()); for (const auto &MBB: MF) { Chains.clear(); // FIXME: really needed ? Could not work at MF level ? for (const auto &MI: MBB) { // Forget Chains which have expired for (auto r : Chains) { SmallVector<unsigned, 8> toDel; if(regJustKilledBefore(LIs, r, MI)) { LLVM_DEBUG(dbgs() << "Killing chain " << printReg(r, TRI) << " at "; MI.print(dbgs());); toDel.push_back(r); } while (!toDel.empty()) { Chains.remove(toDel.back()); toDel.pop_back(); } } switch (MI.getOpcode()) { case AArch64::FMSUBSrrr: case AArch64::FMADDSrrr: case AArch64::FNMSUBSrrr: case AArch64::FNMADDSrrr: case AArch64::FMSUBDrrr: case AArch64::FMADDDrrr: case AArch64::FNMSUBDrrr: case AArch64::FNMADDDrrr: { Register Rd = MI.getOperand(0).getReg(); Register Ra = MI.getOperand(3).getReg(); if (addIntraChainConstraint(G, Rd, Ra)) addInterChainConstraint(G, Rd, Ra); break; } case AArch64::FMLAv2f32: case AArch64::FMLSv2f32: { Register Rd = MI.getOperand(0).getReg(); addInterChainConstraint(G, Rd, Rd); break; } default: break; } } } }
Upload File
Create Folder