LanaiMemAluCombiner.cpp 13.5 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
//===-- LanaiMemAluCombiner.cpp - Pass to combine memory & ALU operations -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Simple pass to combine memory and ALU operations
//
// The Lanai ISA supports instructions where a load/store modifies the base
// register used in the load/store operation. This pass finds suitable
// load/store and ALU instructions and combines them into one instruction.
//
// For example,
//   ld [ %r6 -- ], %r12
// is a supported instruction that is not currently generated by the instruction
// selection pass of this backend. This pass generates these instructions by
// merging
//   add %r6, -4, %r6
// followed by
//   ld [ %r6 ], %r12
// in the same machine basic block into one machine instruction.
//===----------------------------------------------------------------------===//

#include "LanaiAluCode.h"
#include "LanaiTargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;

#define GET_INSTRMAP_INFO
#include "LanaiGenInstrInfo.inc"

#define DEBUG_TYPE "lanai-mem-alu-combiner"

STATISTIC(NumLdStAluCombined, "Number of memory and ALU instructions combined");

static llvm::cl::opt<bool> DisableMemAluCombiner(
    "disable-lanai-mem-alu-combiner", llvm::cl::init(false),
    llvm::cl::desc("Do not combine ALU and memory operators"),
    llvm::cl::Hidden);

namespace llvm {
void initializeLanaiMemAluCombinerPass(PassRegistry &);
} // namespace llvm

namespace {
typedef MachineBasicBlock::iterator MbbIterator;
typedef MachineFunction::iterator MfIterator;

class LanaiMemAluCombiner : public MachineFunctionPass {
public:
  static char ID;
  explicit LanaiMemAluCombiner() : MachineFunctionPass(ID) {
    initializeLanaiMemAluCombinerPass(*PassRegistry::getPassRegistry());
  }

  StringRef getPassName() const override {
    return "Lanai load / store optimization pass";
  }

  bool runOnMachineFunction(MachineFunction &F) override;

  MachineFunctionProperties getRequiredProperties() const override {
    return MachineFunctionProperties().set(
        MachineFunctionProperties::Property::NoVRegs);
  }

private:
  MbbIterator findClosestSuitableAluInstr(MachineBasicBlock *BB,
                                          const MbbIterator &MemInstr,
                                          bool Decrement);
  void insertMergedInstruction(MachineBasicBlock *BB,
                               const MbbIterator &MemInstr,
                               const MbbIterator &AluInstr, bool Before);
  bool combineMemAluInBasicBlock(MachineBasicBlock *BB);

  // Target machine description which we query for register names, data
  // layout, etc.
  const TargetInstrInfo *TII;
};
} // namespace

char LanaiMemAluCombiner::ID = 0;

INITIALIZE_PASS(LanaiMemAluCombiner, DEBUG_TYPE,
                "Lanai memory ALU combiner pass", false, false)

namespace {
bool isSpls(uint16_t Opcode) { return Lanai::splsIdempotent(Opcode) == Opcode; }

// Determine the opcode for the merged instruction created by considering the
// old memory operation's opcode and whether the merged opcode will have an
// immediate offset.
unsigned mergedOpcode(unsigned OldOpcode, bool ImmediateOffset) {
  switch (OldOpcode) {
  case Lanai::LDW_RI:
  case Lanai::LDW_RR:
    if (ImmediateOffset)
      return Lanai::LDW_RI;
    return Lanai::LDW_RR;
  case Lanai::LDHs_RI:
  case Lanai::LDHs_RR:
    if (ImmediateOffset)
      return Lanai::LDHs_RI;
    return Lanai::LDHs_RR;
  case Lanai::LDHz_RI:
  case Lanai::LDHz_RR:
    if (ImmediateOffset)
      return Lanai::LDHz_RI;
    return Lanai::LDHz_RR;
  case Lanai::LDBs_RI:
  case Lanai::LDBs_RR:
    if (ImmediateOffset)
      return Lanai::LDBs_RI;
    return Lanai::LDBs_RR;
  case Lanai::LDBz_RI:
  case Lanai::LDBz_RR:
    if (ImmediateOffset)
      return Lanai::LDBz_RI;
    return Lanai::LDBz_RR;
  case Lanai::SW_RI:
  case Lanai::SW_RR:
    if (ImmediateOffset)
      return Lanai::SW_RI;
    return Lanai::SW_RR;
  case Lanai::STB_RI:
  case Lanai::STB_RR:
    if (ImmediateOffset)
      return Lanai::STB_RI;
    return Lanai::STB_RR;
  case Lanai::STH_RI:
  case Lanai::STH_RR:
    if (ImmediateOffset)
      return Lanai::STH_RI;
    return Lanai::STH_RR;
  default:
    return 0;
  }
}

// Check if the machine instruction has non-volatile memory operands of the type
// supported for combining with ALU instructions.
bool isNonVolatileMemoryOp(const MachineInstr &MI) {
  if (!MI.hasOneMemOperand())
    return false;

  // Determine if the machine instruction is a supported memory operation by
  // testing if the computed merge opcode is a valid memory operation opcode.
  if (mergedOpcode(MI.getOpcode(), false) == 0)
    return false;

  const MachineMemOperand *MemOperand = *MI.memoperands_begin();

  // Don't move volatile memory accesses
  // TODO: unclear if we need to be as conservative about atomics
  if (MemOperand->isVolatile() || MemOperand->isAtomic())
    return false;

  return true;
}

// Test to see if two machine operands are of the same type. This test is less
// strict than the MachineOperand::isIdenticalTo function.
bool isSameOperand(const MachineOperand &Op1, const MachineOperand &Op2) {
  if (Op1.getType() != Op2.getType())
    return false;

  switch (Op1.getType()) {
  case MachineOperand::MO_Register:
    return Op1.getReg() == Op2.getReg();
  case MachineOperand::MO_Immediate:
    return Op1.getImm() == Op2.getImm();
  default:
    return false;
  }
}

bool isZeroOperand(const MachineOperand &Op) {
  return ((Op.isReg() && Op.getReg() == Lanai::R0) ||
          (Op.isImm() && Op.getImm() == 0));
}

// Determines whether a register is used by an instruction.
bool InstrUsesReg(const MbbIterator &Instr, const MachineOperand *Reg) {
  for (MachineInstr::const_mop_iterator Mop = Instr->operands_begin();
       Mop != Instr->operands_end(); ++Mop) {
    if (isSameOperand(*Mop, *Reg))
      return true;
  }
  return false;
}

// Converts between machine opcode and AluCode.
// Flag using/modifying ALU operations should not be considered for merging and
// are omitted from this list.
LPAC::AluCode mergedAluCode(unsigned AluOpcode) {
  switch (AluOpcode) {
  case Lanai::ADD_I_LO:
  case Lanai::ADD_R:
    return LPAC::ADD;
  case Lanai::SUB_I_LO:
  case Lanai::SUB_R:
    return LPAC::SUB;
  case Lanai::AND_I_LO:
  case Lanai::AND_R:
    return LPAC::AND;
  case Lanai::OR_I_LO:
  case Lanai::OR_R:
    return LPAC::OR;
  case Lanai::XOR_I_LO:
  case Lanai::XOR_R:
    return LPAC::XOR;
  case Lanai::SHL_R:
    return LPAC::SHL;
  case Lanai::SRL_R:
    return LPAC::SRL;
  case Lanai::SRA_R:
    return LPAC::SRA;
  case Lanai::SA_I:
  case Lanai::SL_I:
  default:
    return LPAC::UNKNOWN;
  }
}

// Insert a new combined memory and ALU operation instruction.
//
// This function builds a new machine instruction using the MachineInstrBuilder
// class and inserts it before the memory instruction.
void LanaiMemAluCombiner::insertMergedInstruction(MachineBasicBlock *BB,
                                                  const MbbIterator &MemInstr,
                                                  const MbbIterator &AluInstr,
                                                  bool Before) {
  // Insert new combined load/store + alu operation
  MachineOperand Dest = MemInstr->getOperand(0);
  MachineOperand Base = MemInstr->getOperand(1);
  MachineOperand MemOffset = MemInstr->getOperand(2);
  MachineOperand AluOffset = AluInstr->getOperand(2);

  // Abort if ALU offset is not a register or immediate
  assert((AluOffset.isReg() || AluOffset.isImm()) &&
         "Unsupported operand type in merge");

  // Determined merged instructions opcode and ALU code
  LPAC::AluCode AluOpcode = mergedAluCode(AluInstr->getOpcode());
  unsigned NewOpc = mergedOpcode(MemInstr->getOpcode(), AluOffset.isImm());

  assert(AluOpcode != LPAC::UNKNOWN && "Unknown ALU code in merging");
  assert(NewOpc != 0 && "Unknown merged node opcode");

  // Build and insert new machine instruction
  MachineInstrBuilder InstrBuilder =
      BuildMI(*BB, MemInstr, MemInstr->getDebugLoc(), TII->get(NewOpc));
  InstrBuilder.addReg(Dest.getReg(), getDefRegState(true));
  InstrBuilder.addReg(Base.getReg(), getKillRegState(true));

  // Add offset to machine instruction
  if (AluOffset.isReg())
    InstrBuilder.addReg(AluOffset.getReg());
  else if (AluOffset.isImm())
    InstrBuilder.addImm(AluOffset.getImm());
  else
    llvm_unreachable("Unsupported ld/st ALU merge.");

  // Create a pre-op if the ALU operation preceded the memory operation or the
  // MemOffset is non-zero (i.e. the memory value should be adjusted before
  // accessing it), else create a post-op.
  if (Before || !isZeroOperand(MemOffset))
    InstrBuilder.addImm(LPAC::makePreOp(AluOpcode));
  else
    InstrBuilder.addImm(LPAC::makePostOp(AluOpcode));

  // Transfer memory operands.
  InstrBuilder.setMemRefs(MemInstr->memoperands());
}

// Function determines if ALU operation (in alu_iter) can be combined with
// a load/store with base and offset.
bool isSuitableAluInstr(bool IsSpls, const MbbIterator &AluIter,
                        const MachineOperand &Base,
                        const MachineOperand &Offset) {
  // ALU operations have 3 operands
  if (AluIter->getNumOperands() != 3)
    return false;

  MachineOperand &Dest = AluIter->getOperand(0);
  MachineOperand &Op1 = AluIter->getOperand(1);
  MachineOperand &Op2 = AluIter->getOperand(2);

  // Only match instructions using the base register as destination and with the
  // base and first operand equal
  if (!isSameOperand(Dest, Base) || !isSameOperand(Dest, Op1))
    return false;

  if (Op2.isImm()) {
    // It is not a match if the 2nd operand in the ALU operation is an
    // immediate but the ALU operation is not an addition.
    if (AluIter->getOpcode() != Lanai::ADD_I_LO)
      return false;

    if (Offset.isReg() && Offset.getReg() == Lanai::R0)
      return true;

    if (Offset.isImm() &&
        ((Offset.getImm() == 0 &&
          // Check that the Op2 would fit in the immediate field of the
          // memory operation.
          ((IsSpls && isInt<10>(Op2.getImm())) ||
           (!IsSpls && isInt<16>(Op2.getImm())))) ||
         Offset.getImm() == Op2.getImm()))
      return true;
  } else if (Op2.isReg()) {
    // The Offset and 2nd operand are both registers and equal
    if (Offset.isReg() && Op2.getReg() == Offset.getReg())
      return true;
  } else
    // Only consider operations with register or immediate values
    return false;

  return false;
}

MbbIterator LanaiMemAluCombiner::findClosestSuitableAluInstr(
    MachineBasicBlock *BB, const MbbIterator &MemInstr, const bool Decrement) {
  MachineOperand *Base = &MemInstr->getOperand(1);
  MachineOperand *Offset = &MemInstr->getOperand(2);
  bool IsSpls = isSpls(MemInstr->getOpcode());

  MbbIterator First = MemInstr;
  MbbIterator Last = Decrement ? BB->begin() : BB->end();

  while (First != Last) {
    Decrement ? --First : ++First;

    if (First == Last)
      break;

    // Skip over debug instructions
    if (First->isDebugInstr())
      continue;

    if (isSuitableAluInstr(IsSpls, First, *Base, *Offset)) {
      return First;
    }

    // Usage of the base or offset register is not a form suitable for merging.
    if (First != Last) {
      if (InstrUsesReg(First, Base))
        break;
      if (Offset->isReg() && InstrUsesReg(First, Offset))
        break;
    }
  }

  return MemInstr;
}

bool LanaiMemAluCombiner::combineMemAluInBasicBlock(MachineBasicBlock *BB) {
  bool Modified = false;

  MbbIterator MBBIter = BB->begin(), End = BB->end();
  while (MBBIter != End) {
    bool IsMemOp = isNonVolatileMemoryOp(*MBBIter);

    if (IsMemOp) {
      MachineOperand AluOperand = MBBIter->getOperand(3);
      unsigned int DestReg = MBBIter->getOperand(0).getReg(),
                   BaseReg = MBBIter->getOperand(1).getReg();
      assert(AluOperand.isImm() && "Unexpected memory operator type");
      LPAC::AluCode AluOpcode = static_cast<LPAC::AluCode>(AluOperand.getImm());

      // Skip memory operations that already modify the base register or if
      // the destination and base register are the same
      if (!LPAC::modifiesOp(AluOpcode) && DestReg != BaseReg) {
        for (int Inc = 0; Inc <= 1; ++Inc) {
          MbbIterator AluIter =
              findClosestSuitableAluInstr(BB, MBBIter, Inc == 0);
          if (AluIter != MBBIter) {
            insertMergedInstruction(BB, MBBIter, AluIter, Inc == 0);

            ++NumLdStAluCombined;
            Modified = true;

            // Erase the matching ALU instruction
            BB->erase(AluIter);
            // Erase old load/store instruction
            BB->erase(MBBIter++);
            break;
          }
        }
      }
    }
    if (MBBIter == End)
      break;
    ++MBBIter;
  }

  return Modified;
}

// Driver function that iterates over the machine basic building blocks of a
// machine function
bool LanaiMemAluCombiner::runOnMachineFunction(MachineFunction &MF) {
  if (DisableMemAluCombiner)
    return false;

  TII = MF.getSubtarget<LanaiSubtarget>().getInstrInfo();
  bool Modified = false;
  for (MfIterator MFI = MF.begin(); MFI != MF.end(); ++MFI) {
    Modified |= combineMemAluInBasicBlock(&*MFI);
  }
  return Modified;
}
} // namespace

FunctionPass *llvm::createLanaiMemAluCombinerPass() {
  return new LanaiMemAluCombiner();
}