loops_with_profile_info.ll
6.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
; RUN: opt < %s -analyze -block-freq | FileCheck %s
; RUN: opt < %s -passes='print<block-freq>' -disable-output 2>&1 | FileCheck %s
; This code contains three loops. One is triple-nested, the
; second is double nested and the third is a single loop. At
; runtime, all three loops execute 1,000,000 times each. We use to
; give different frequencies to each of the loops because loop
; scales were limited to no more than 4,096.
;
; This was penalizing the hotness of the second and third loops
; because BFI was reducing the loop scale for for.cond16 and
; for.cond26 to a max of 4,096.
;
; Without this restriction, all loops are now correctly given the same
; frequency values.
;
; Original C code:
;
;
; int g;
; __attribute__((noinline)) void bar() {
; g++;
; }
;
; extern int printf(const char*, ...);
;
; int main()
; {
; int i, j, k;
;
; g = 0;
; for (i = 0; i < 100; i++)
; for (j = 0; j < 100; j++)
; for (k = 0; k < 100; k++)
; bar();
;
; printf ("g = %d\n", g);
; g = 0;
;
; for (i = 0; i < 100; i++)
; for (j = 0; j < 10000; j++)
; bar();
;
; printf ("g = %d\n", g);
; g = 0;
;
;
; for (i = 0; i < 1000000; i++)
; bar();
;
; printf ("g = %d\n", g);
; g = 0;
; }
@g = common global i32 0, align 4
@.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1
declare void @bar()
declare i32 @printf(i8*, ...)
; CHECK: Printing analysis {{.*}} for function 'main':
; CHECK-NEXT: block-frequency-info: main
define i32 @main() {
entry:
%retval = alloca i32, align 4
%i = alloca i32, align 4
%j = alloca i32, align 4
%k = alloca i32, align 4
store i32 0, i32* %retval
store i32 0, i32* @g, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc10, %entry
%0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 100
br i1 %cmp, label %for.body, label %for.end12, !prof !1
for.body: ; preds = %for.cond
store i32 0, i32* %j, align 4
br label %for.cond1
for.cond1: ; preds = %for.inc7, %for.body
%1 = load i32, i32* %j, align 4
%cmp2 = icmp slt i32 %1, 100
br i1 %cmp2, label %for.body3, label %for.end9, !prof !2
for.body3: ; preds = %for.cond1
store i32 0, i32* %k, align 4
br label %for.cond4
for.cond4: ; preds = %for.inc, %for.body3
%2 = load i32, i32* %k, align 4
%cmp5 = icmp slt i32 %2, 100
br i1 %cmp5, label %for.body6, label %for.end, !prof !3
; CHECK: - for.body6: float = 500000.5, int = 4000004
for.body6: ; preds = %for.cond4
call void @bar()
br label %for.inc
for.inc: ; preds = %for.body6
%3 = load i32, i32* %k, align 4
%inc = add nsw i32 %3, 1
store i32 %inc, i32* %k, align 4
br label %for.cond4
for.end: ; preds = %for.cond4
br label %for.inc7
for.inc7: ; preds = %for.end
%4 = load i32, i32* %j, align 4
%inc8 = add nsw i32 %4, 1
store i32 %inc8, i32* %j, align 4
br label %for.cond1
for.end9: ; preds = %for.cond1
br label %for.inc10
for.inc10: ; preds = %for.end9
%5 = load i32, i32* %i, align 4
%inc11 = add nsw i32 %5, 1
store i32 %inc11, i32* %i, align 4
br label %for.cond
for.end12: ; preds = %for.cond
%6 = load i32, i32* @g, align 4
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6)
store i32 0, i32* @g, align 4
store i32 0, i32* %i, align 4
br label %for.cond13
for.cond13: ; preds = %for.inc22, %for.end12
%7 = load i32, i32* %i, align 4
%cmp14 = icmp slt i32 %7, 100
br i1 %cmp14, label %for.body15, label %for.end24, !prof !1
for.body15: ; preds = %for.cond13
store i32 0, i32* %j, align 4
br label %for.cond16
for.cond16: ; preds = %for.inc19, %for.body15
%8 = load i32, i32* %j, align 4
%cmp17 = icmp slt i32 %8, 10000
br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
; CHECK: - for.body18: float = 499999.9, int = 3999998
for.body18: ; preds = %for.cond16
call void @bar()
br label %for.inc19
for.inc19: ; preds = %for.body18
%9 = load i32, i32* %j, align 4
%inc20 = add nsw i32 %9, 1
store i32 %inc20, i32* %j, align 4
br label %for.cond16
for.end21: ; preds = %for.cond16
br label %for.inc22
for.inc22: ; preds = %for.end21
%10 = load i32, i32* %i, align 4
%inc23 = add nsw i32 %10, 1
store i32 %inc23, i32* %i, align 4
br label %for.cond13
for.end24: ; preds = %for.cond13
%11 = load i32, i32* @g, align 4
%call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11)
store i32 0, i32* @g, align 4
store i32 0, i32* %i, align 4
br label %for.cond26
for.cond26: ; preds = %for.inc29, %for.end24
%12 = load i32, i32* %i, align 4
%cmp27 = icmp slt i32 %12, 1000000
br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
; CHECK: - for.body28: float = 499995.2, int = 3999961
for.body28: ; preds = %for.cond26
call void @bar()
br label %for.inc29
for.inc29: ; preds = %for.body28
%13 = load i32, i32* %i, align 4
%inc30 = add nsw i32 %13, 1
store i32 %inc30, i32* %i, align 4
br label %for.cond26
for.end31: ; preds = %for.cond26
%14 = load i32, i32* @g, align 4
%call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14)
store i32 0, i32* @g, align 4
%15 = load i32, i32* %retval
ret i32 %15
}
!llvm.ident = !{!0}
!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"}
!1 = !{!"branch_weights", i32 101, i32 2}
!2 = !{!"branch_weights", i32 10001, i32 101}
!3 = !{!"branch_weights", i32 1000001, i32 10001}
!4 = !{!"branch_weights", i32 1000001, i32 101}
!5 = !{!"branch_weights", i32 1000001, i32 2}