LanaiMemAluCombiner.cpp
13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
//===-- LanaiMemAluCombiner.cpp - Pass to combine memory & ALU operations -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Simple pass to combine memory and ALU operations
//
// The Lanai ISA supports instructions where a load/store modifies the base
// register used in the load/store operation. This pass finds suitable
// load/store and ALU instructions and combines them into one instruction.
//
// For example,
// ld [ %r6 -- ], %r12
// is a supported instruction that is not currently generated by the instruction
// selection pass of this backend. This pass generates these instructions by
// merging
// add %r6, -4, %r6
// followed by
// ld [ %r6 ], %r12
// in the same machine basic block into one machine instruction.
//===----------------------------------------------------------------------===//
#include "LanaiAluCode.h"
#include "LanaiTargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
#define GET_INSTRMAP_INFO
#include "LanaiGenInstrInfo.inc"
#define DEBUG_TYPE "lanai-mem-alu-combiner"
STATISTIC(NumLdStAluCombined, "Number of memory and ALU instructions combined");
static llvm::cl::opt<bool> DisableMemAluCombiner(
"disable-lanai-mem-alu-combiner", llvm::cl::init(false),
llvm::cl::desc("Do not combine ALU and memory operators"),
llvm::cl::Hidden);
namespace llvm {
void initializeLanaiMemAluCombinerPass(PassRegistry &);
} // namespace llvm
namespace {
typedef MachineBasicBlock::iterator MbbIterator;
typedef MachineFunction::iterator MfIterator;
class LanaiMemAluCombiner : public MachineFunctionPass {
public:
static char ID;
explicit LanaiMemAluCombiner() : MachineFunctionPass(ID) {
initializeLanaiMemAluCombinerPass(*PassRegistry::getPassRegistry());
}
StringRef getPassName() const override {
return "Lanai load / store optimization pass";
}
bool runOnMachineFunction(MachineFunction &F) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
private:
MbbIterator findClosestSuitableAluInstr(MachineBasicBlock *BB,
const MbbIterator &MemInstr,
bool Decrement);
void insertMergedInstruction(MachineBasicBlock *BB,
const MbbIterator &MemInstr,
const MbbIterator &AluInstr, bool Before);
bool combineMemAluInBasicBlock(MachineBasicBlock *BB);
// Target machine description which we query for register names, data
// layout, etc.
const TargetInstrInfo *TII;
};
} // namespace
char LanaiMemAluCombiner::ID = 0;
INITIALIZE_PASS(LanaiMemAluCombiner, DEBUG_TYPE,
"Lanai memory ALU combiner pass", false, false)
namespace {
bool isSpls(uint16_t Opcode) { return Lanai::splsIdempotent(Opcode) == Opcode; }
// Determine the opcode for the merged instruction created by considering the
// old memory operation's opcode and whether the merged opcode will have an
// immediate offset.
unsigned mergedOpcode(unsigned OldOpcode, bool ImmediateOffset) {
switch (OldOpcode) {
case Lanai::LDW_RI:
case Lanai::LDW_RR:
if (ImmediateOffset)
return Lanai::LDW_RI;
return Lanai::LDW_RR;
case Lanai::LDHs_RI:
case Lanai::LDHs_RR:
if (ImmediateOffset)
return Lanai::LDHs_RI;
return Lanai::LDHs_RR;
case Lanai::LDHz_RI:
case Lanai::LDHz_RR:
if (ImmediateOffset)
return Lanai::LDHz_RI;
return Lanai::LDHz_RR;
case Lanai::LDBs_RI:
case Lanai::LDBs_RR:
if (ImmediateOffset)
return Lanai::LDBs_RI;
return Lanai::LDBs_RR;
case Lanai::LDBz_RI:
case Lanai::LDBz_RR:
if (ImmediateOffset)
return Lanai::LDBz_RI;
return Lanai::LDBz_RR;
case Lanai::SW_RI:
case Lanai::SW_RR:
if (ImmediateOffset)
return Lanai::SW_RI;
return Lanai::SW_RR;
case Lanai::STB_RI:
case Lanai::STB_RR:
if (ImmediateOffset)
return Lanai::STB_RI;
return Lanai::STB_RR;
case Lanai::STH_RI:
case Lanai::STH_RR:
if (ImmediateOffset)
return Lanai::STH_RI;
return Lanai::STH_RR;
default:
return 0;
}
}
// Check if the machine instruction has non-volatile memory operands of the type
// supported for combining with ALU instructions.
bool isNonVolatileMemoryOp(const MachineInstr &MI) {
if (!MI.hasOneMemOperand())
return false;
// Determine if the machine instruction is a supported memory operation by
// testing if the computed merge opcode is a valid memory operation opcode.
if (mergedOpcode(MI.getOpcode(), false) == 0)
return false;
const MachineMemOperand *MemOperand = *MI.memoperands_begin();
// Don't move volatile memory accesses
// TODO: unclear if we need to be as conservative about atomics
if (MemOperand->isVolatile() || MemOperand->isAtomic())
return false;
return true;
}
// Test to see if two machine operands are of the same type. This test is less
// strict than the MachineOperand::isIdenticalTo function.
bool isSameOperand(const MachineOperand &Op1, const MachineOperand &Op2) {
if (Op1.getType() != Op2.getType())
return false;
switch (Op1.getType()) {
case MachineOperand::MO_Register:
return Op1.getReg() == Op2.getReg();
case MachineOperand::MO_Immediate:
return Op1.getImm() == Op2.getImm();
default:
return false;
}
}
bool isZeroOperand(const MachineOperand &Op) {
return ((Op.isReg() && Op.getReg() == Lanai::R0) ||
(Op.isImm() && Op.getImm() == 0));
}
// Determines whether a register is used by an instruction.
bool InstrUsesReg(const MbbIterator &Instr, const MachineOperand *Reg) {
for (MachineInstr::const_mop_iterator Mop = Instr->operands_begin();
Mop != Instr->operands_end(); ++Mop) {
if (isSameOperand(*Mop, *Reg))
return true;
}
return false;
}
// Converts between machine opcode and AluCode.
// Flag using/modifying ALU operations should not be considered for merging and
// are omitted from this list.
LPAC::AluCode mergedAluCode(unsigned AluOpcode) {
switch (AluOpcode) {
case Lanai::ADD_I_LO:
case Lanai::ADD_R:
return LPAC::ADD;
case Lanai::SUB_I_LO:
case Lanai::SUB_R:
return LPAC::SUB;
case Lanai::AND_I_LO:
case Lanai::AND_R:
return LPAC::AND;
case Lanai::OR_I_LO:
case Lanai::OR_R:
return LPAC::OR;
case Lanai::XOR_I_LO:
case Lanai::XOR_R:
return LPAC::XOR;
case Lanai::SHL_R:
return LPAC::SHL;
case Lanai::SRL_R:
return LPAC::SRL;
case Lanai::SRA_R:
return LPAC::SRA;
case Lanai::SA_I:
case Lanai::SL_I:
default:
return LPAC::UNKNOWN;
}
}
// Insert a new combined memory and ALU operation instruction.
//
// This function builds a new machine instruction using the MachineInstrBuilder
// class and inserts it before the memory instruction.
void LanaiMemAluCombiner::insertMergedInstruction(MachineBasicBlock *BB,
const MbbIterator &MemInstr,
const MbbIterator &AluInstr,
bool Before) {
// Insert new combined load/store + alu operation
MachineOperand Dest = MemInstr->getOperand(0);
MachineOperand Base = MemInstr->getOperand(1);
MachineOperand MemOffset = MemInstr->getOperand(2);
MachineOperand AluOffset = AluInstr->getOperand(2);
// Abort if ALU offset is not a register or immediate
assert((AluOffset.isReg() || AluOffset.isImm()) &&
"Unsupported operand type in merge");
// Determined merged instructions opcode and ALU code
LPAC::AluCode AluOpcode = mergedAluCode(AluInstr->getOpcode());
unsigned NewOpc = mergedOpcode(MemInstr->getOpcode(), AluOffset.isImm());
assert(AluOpcode != LPAC::UNKNOWN && "Unknown ALU code in merging");
assert(NewOpc != 0 && "Unknown merged node opcode");
// Build and insert new machine instruction
MachineInstrBuilder InstrBuilder =
BuildMI(*BB, MemInstr, MemInstr->getDebugLoc(), TII->get(NewOpc));
InstrBuilder.addReg(Dest.getReg(), getDefRegState(true));
InstrBuilder.addReg(Base.getReg(), getKillRegState(true));
// Add offset to machine instruction
if (AluOffset.isReg())
InstrBuilder.addReg(AluOffset.getReg());
else if (AluOffset.isImm())
InstrBuilder.addImm(AluOffset.getImm());
else
llvm_unreachable("Unsupported ld/st ALU merge.");
// Create a pre-op if the ALU operation preceded the memory operation or the
// MemOffset is non-zero (i.e. the memory value should be adjusted before
// accessing it), else create a post-op.
if (Before || !isZeroOperand(MemOffset))
InstrBuilder.addImm(LPAC::makePreOp(AluOpcode));
else
InstrBuilder.addImm(LPAC::makePostOp(AluOpcode));
// Transfer memory operands.
InstrBuilder.setMemRefs(MemInstr->memoperands());
}
// Function determines if ALU operation (in alu_iter) can be combined with
// a load/store with base and offset.
bool isSuitableAluInstr(bool IsSpls, const MbbIterator &AluIter,
const MachineOperand &Base,
const MachineOperand &Offset) {
// ALU operations have 3 operands
if (AluIter->getNumOperands() != 3)
return false;
MachineOperand &Dest = AluIter->getOperand(0);
MachineOperand &Op1 = AluIter->getOperand(1);
MachineOperand &Op2 = AluIter->getOperand(2);
// Only match instructions using the base register as destination and with the
// base and first operand equal
if (!isSameOperand(Dest, Base) || !isSameOperand(Dest, Op1))
return false;
if (Op2.isImm()) {
// It is not a match if the 2nd operand in the ALU operation is an
// immediate but the ALU operation is not an addition.
if (AluIter->getOpcode() != Lanai::ADD_I_LO)
return false;
if (Offset.isReg() && Offset.getReg() == Lanai::R0)
return true;
if (Offset.isImm() &&
((Offset.getImm() == 0 &&
// Check that the Op2 would fit in the immediate field of the
// memory operation.
((IsSpls && isInt<10>(Op2.getImm())) ||
(!IsSpls && isInt<16>(Op2.getImm())))) ||
Offset.getImm() == Op2.getImm()))
return true;
} else if (Op2.isReg()) {
// The Offset and 2nd operand are both registers and equal
if (Offset.isReg() && Op2.getReg() == Offset.getReg())
return true;
} else
// Only consider operations with register or immediate values
return false;
return false;
}
MbbIterator LanaiMemAluCombiner::findClosestSuitableAluInstr(
MachineBasicBlock *BB, const MbbIterator &MemInstr, const bool Decrement) {
MachineOperand *Base = &MemInstr->getOperand(1);
MachineOperand *Offset = &MemInstr->getOperand(2);
bool IsSpls = isSpls(MemInstr->getOpcode());
MbbIterator First = MemInstr;
MbbIterator Last = Decrement ? BB->begin() : BB->end();
while (First != Last) {
Decrement ? --First : ++First;
if (First == Last)
break;
// Skip over debug instructions
if (First->isDebugInstr())
continue;
if (isSuitableAluInstr(IsSpls, First, *Base, *Offset)) {
return First;
}
// Usage of the base or offset register is not a form suitable for merging.
if (First != Last) {
if (InstrUsesReg(First, Base))
break;
if (Offset->isReg() && InstrUsesReg(First, Offset))
break;
}
}
return MemInstr;
}
bool LanaiMemAluCombiner::combineMemAluInBasicBlock(MachineBasicBlock *BB) {
bool Modified = false;
MbbIterator MBBIter = BB->begin(), End = BB->end();
while (MBBIter != End) {
bool IsMemOp = isNonVolatileMemoryOp(*MBBIter);
if (IsMemOp) {
MachineOperand AluOperand = MBBIter->getOperand(3);
unsigned int DestReg = MBBIter->getOperand(0).getReg(),
BaseReg = MBBIter->getOperand(1).getReg();
assert(AluOperand.isImm() && "Unexpected memory operator type");
LPAC::AluCode AluOpcode = static_cast<LPAC::AluCode>(AluOperand.getImm());
// Skip memory operations that already modify the base register or if
// the destination and base register are the same
if (!LPAC::modifiesOp(AluOpcode) && DestReg != BaseReg) {
for (int Inc = 0; Inc <= 1; ++Inc) {
MbbIterator AluIter =
findClosestSuitableAluInstr(BB, MBBIter, Inc == 0);
if (AluIter != MBBIter) {
insertMergedInstruction(BB, MBBIter, AluIter, Inc == 0);
++NumLdStAluCombined;
Modified = true;
// Erase the matching ALU instruction
BB->erase(AluIter);
// Erase old load/store instruction
BB->erase(MBBIter++);
break;
}
}
}
}
if (MBBIter == End)
break;
++MBBIter;
}
return Modified;
}
// Driver function that iterates over the machine basic building blocks of a
// machine function
bool LanaiMemAluCombiner::runOnMachineFunction(MachineFunction &MF) {
if (DisableMemAluCombiner)
return false;
TII = MF.getSubtarget<LanaiSubtarget>().getInstrInfo();
bool Modified = false;
for (MfIterator MFI = MF.begin(); MFI != MF.end(); ++MFI) {
Modified |= combineMemAluInBasicBlock(&*MFI);
}
return Modified;
}
} // namespace
FunctionPass *llvm::createLanaiMemAluCombinerPass() {
return new LanaiMemAluCombiner();
}