//===-- GCNPreRAOptimizations.cpp -----------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// This pass combines split register tuple initialization into a single psuedo: /// /// undef %0.sub1:sreg_64 = S_MOV_B32 1 /// %0.sub0:sreg_64 = S_MOV_B32 2 /// => /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001 /// /// This is to allow rematerialization of a value instead of spilling. It is /// supposed to be done after register coalescer to allow it to do its job and /// before actual register allocation to allow rematerialization. /// /// Right now the pass only handles 64 bit SGPRs with immediate initializers, /// although the same shall be possible with other register classes and /// instructions if necessary. /// //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" using namespace llvm; #define DEBUG_TYPE "amdgpu-pre-ra-optimizations" namespace { class GCNPreRAOptimizations : public MachineFunctionPass { private: const SIInstrInfo *TII; MachineRegisterInfo *MRI; LiveIntervals *LIS; bool processReg(Register Reg); public: static char ID; GCNPreRAOptimizations() : MachineFunctionPass(ID) { initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "AMDGPU Pre-RA optimizations"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } }; } // End anonymous namespace. INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, "AMDGPU Pre-RA optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations", false, false) char GCNPreRAOptimizations::ID = 0; char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID; FunctionPass *llvm::createGCNPreRAOptimizationsPass() { return new GCNPreRAOptimizations(); } bool GCNPreRAOptimizations::processReg(Register Reg) { MachineInstr *Def0 = nullptr; MachineInstr *Def1 = nullptr; uint64_t Init = 0; for (MachineInstr &I : MRI->def_instructions(Reg)) { if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() || I.getNumOperands() != 2) return false; switch (I.getOperand(0).getSubReg()) { default: return false; case AMDGPU::sub0: if (Def0) return false; Def0 = &I; Init |= I.getOperand(1).getImm() & 0xffffffff; break; case AMDGPU::sub1: if (Def1) return false; Def1 = &I; Init |= static_cast(I.getOperand(1).getImm()) << 32; break; } } if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent()) return false; LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1 << " =>\n"); if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1), LIS->getInstructionIndex(*Def0))) std::swap(Def0, Def1); LIS->RemoveMachineInstrFromMaps(*Def0); LIS->RemoveMachineInstrFromMaps(*Def1); auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg) .addImm(Init); Def0->eraseFromParent(); Def1->eraseFromParent(); LIS->InsertMachineInstrInMaps(*NewI); LIS->removeInterval(Reg); LIS->createAndComputeVirtRegInterval(Reg); LLVM_DEBUG(dbgs() << " " << *NewI); return true; } bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); MRI = &MF.getRegInfo(); LIS = &getAnalysis(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); bool Changed = false; for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { Register Reg = Register::index2VirtReg(I); if (!LIS->hasInterval(Reg)) continue; const TargetRegisterClass *RC = MRI->getRegClass(Reg); if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) continue; Changed |= processReg(Reg); } return Changed; }