last-callsite.ll
7.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=0 -S | FileCheck %s
; The 'test1_' prefixed functions test the basic 'last callsite' inline
; threshold adjustment where we specifically inline the last call site of an
; internal function regardless of cost.
define internal void @test1_f() {
entry:
%p = alloca i32
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
ret void
}
; Identical to @test1_f but doesn't get inlined because there is more than one
; call. If this *does* get inlined, the body used both here and in @test1_f
; isn't a good test for different threshold based on the last call.
define internal void @test1_g() {
entry:
%p = alloca i32
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
ret void
}
define void @test1() {
; CHECK-LABEL: define void @test1()
entry:
call void @test1_f()
; CHECK-NOT: @test1_f
call void @test1_g()
call void @test1_g()
; CHECK: call void @test1_g()
; CHECK: call void @test1_g()
ret void
}
; The 'test2_' prefixed functions test that we can discover the last callsite
; bonus after having inlined the prior call site. For this to work, we need
; a callsite dependent cost so we have a trivial predicate guarding all the
; cost, and set that in a particular direction.
define internal void @test2_f(i1 %b) {
entry:
%p = alloca i32
br i1 %b, label %then, label %exit
then:
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
br label %exit
exit:
ret void
}
; Identical to @test2_f but doesn't get inlined because there is more than one
; call. If this *does* get inlined, the body used both here and in @test2_f
; isn't a good test for different threshold based on the last call.
define internal void @test2_g(i1 %b) {
entry:
%p = alloca i32
br i1 %b, label %then, label %exit
then:
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
br label %exit
exit:
ret void
}
define void @test2() {
; CHECK-LABEL: define void @test2()
entry:
; The first call is trivial to inline due to the argument.
call void @test2_f(i1 false)
; CHECK-NOT: @test2_f
; The second call is too expensive to inline unless we update the number of
; calls after inlining the second.
call void @test2_f(i1 true)
; CHECK-NOT: @test2_f
; Sanity check that two calls with the hard predicate remain uninlined.
call void @test2_g(i1 true)
call void @test2_g(i1 true)
; CHECK: call void @test2_g(i1 true)
; CHECK: call void @test2_g(i1 true)
ret void
}
; The 'test3_' prefixed functions are similar to the 'test2_' functions but the
; relative order of the trivial and hard to inline callsites is reversed. This
; checks that the order of calls isn't significant to whether we observe the
; "last callsite" threshold difference because the next-to-last gets inlined.
; FIXME: We don't currently catch this case.
define internal void @test3_f(i1 %b) {
entry:
%p = alloca i32
br i1 %b, label %then, label %exit
then:
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
br label %exit
exit:
ret void
}
; Identical to @test3_f but doesn't get inlined because there is more than one
; call. If this *does* get inlined, the body used both here and in @test3_f
; isn't a good test for different threshold based on the last call.
define internal void @test3_g(i1 %b) {
entry:
%p = alloca i32
br i1 %b, label %then, label %exit
then:
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
br label %exit
exit:
ret void
}
define void @test3() {
; CHECK-LABEL: define void @test3()
entry:
; The first call is too expensive to inline unless we update the number of
; calls after inlining the second.
call void @test3_f(i1 true)
; FIXME: We should inline this call without iteration.
; CHECK: call void @test3_f(i1 true)
; But the second call is trivial to inline due to the argument.
call void @test3_f(i1 false)
; CHECK-NOT: @test3_f
; Sanity check that two calls with the hard predicate remain uninlined.
call void @test3_g(i1 true)
call void @test3_g(i1 true)
; CHECK: call void @test3_g(i1 true)
; CHECK: call void @test3_g(i1 true)
ret void
}
; The 'test4_' prefixed functions are similar to the 'test2_' prefixed
; functions but include unusual constant expressions that make discovering that
; a function is dead harder.
define internal void @test4_f(i1 %b) {
entry:
%p = alloca i32
br i1 %b, label %then, label %exit
then:
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
br label %exit
exit:
ret void
}
; Identical to @test4_f but doesn't get inlined because there is more than one
; call. If this *does* get inlined, the body used both here and in @test4_f
; isn't a good test for different threshold based on the last call.
define internal void @test4_g(i1 %b) {
entry:
%p = alloca i32
br i1 %b, label %then, label %exit
then:
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
store volatile i32 0, i32* %p
br label %exit
exit:
ret void
}
define void @test4() {
; CHECK-LABEL: define void @test4()
entry:
; The first call is trivial to inline due to the argument. However this
; argument also uses the function being called as part of a complex
; constant expression. Merely inlining and deleting the call isn't enough to
; drop the use count here, we need to GC the dead constant expression as
; well.
call void @test4_f(i1 icmp ne (i64 ptrtoint (void (i1)* @test4_f to i64), i64 ptrtoint(void (i1)* @test4_f to i64)))
; CHECK-NOT: @test4_f
; The second call is too expensive to inline unless we update the number of
; calls after inlining the second.
call void @test4_f(i1 true)
; CHECK-NOT: @test4_f
; And check that a single call to a function which is used by a complex
; constant expression cannot be inlined because the constant expression forms
; a second use. If this part starts failing we need to use more complex
; constant expressions to reference a particular function with them.
%sink = alloca i1
store volatile i1 icmp ne (i64 ptrtoint (void (i1)* @test4_g to i64), i64 ptrtoint(void (i1)* @test4_g to i64)), i1* %sink
call void @test4_g(i1 true)
; CHECK: store volatile i1 false
; CHECK: call void @test4_g(i1 true)
ret void
}