preserve_memoryssa.ll
6.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
; RUN: opt < %s -early-cse-memssa -verify-memoryssa -disable-output
; REQUIRES: asserts
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Tests below highlight scenarios where EarlyCSE does not preserve MemorySSA
; optimized accesses. Current MemorySSA verify will accept these.
; Test 1:
; AA cannot tell here that the last load does not alias the only store.
; The first two loads are a common expression, EarlyCSE removes the second one,
; and then AA can see that the last load is a Use(LoE). Hence not optimized as
; it claims. Note that if we replace the GEP indices 2 and 1, AA sees NoAlias
; for the last load, before CSE-ing the first 2 loads.
%struct.ImageParameters = type { i32, i32, i32 }
@img = external global %struct.ImageParameters*, align 8
define void @test1_macroblock() {
entry:
; MemoryUse(LoE)
%0 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
%Pos_2 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 2
; 1 = MemoryDef(LoE)
store i32 undef, i32* %Pos_2, align 8
; MemoryUse(LoE)
%1 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
%Pos_1 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %1, i64 0, i32 1
; MemoryUse(1) MayAlias
%2 = load i32, i32* %Pos_1, align 4
unreachable
}
; Test 2:
; EarlyCSE simplifies %string to undef. Def and Use used to be MustAlias, with
; undef they are NoAlias. The Use can be optimized further to LoE. We can
; de-optimize uses of replaced instructions, but in general this is not enough
; (see next tests).
%struct.TermS = type { i32, i32, i32, i32, i32, i8* }
define fastcc void @test2_term_string() {
entry:
%string = getelementptr inbounds %struct.TermS, %struct.TermS* undef, i64 0, i32 5
; 1 = MemoryDef(LoE)
store i8* undef, i8** %string, align 8
; MemoryUse(1) MustAlias
%0 = load i8*, i8** %string, align 8
unreachable
}
; Test 3:
; EarlyCSE simplifies %0 to undef. So the second Def now stores to undef.
; We now find the second load (Use(2) can be optimized further to LoE)
; When replacing instructions, we can deoptimize all uses of the replaced
; instruction and all uses of transitive accesses. However this does not stop
; MemorySSA from being tripped by AA (see test4).
%struct.Grammar = type { i8*, i8*, %struct.anon }
%struct.anon = type { i32, i32, %struct.Term**, [3 x %struct.Term*] }
%struct.Term = type { i32 }
define fastcc void @test3_term_string(%struct.Grammar* %g) {
entry:
; 1 = MemoryDef(LoE)
store i8* undef, i8** undef, align 8
; MemoryUse(LoE)
%0 = load i8*, i8** undef, align 8
%arrayidx = getelementptr inbounds i8, i8* %0, i64 undef
; 2 = MemoryDef(1)
store i8 0, i8* %arrayidx, align 1
%v = getelementptr inbounds %struct.Grammar, %struct.Grammar* %g, i64 0, i32 2, i32 2
; MemoryUse(2) MayAlias
%1 = load %struct.Term**, %struct.Term*** %v, align 8
unreachable
}
; Test 4:
; Removing dead/unused instructions in if.then274 makes AA smarter. Before
; removal, it finds %4 MayAlias the store above. After removal this can be
; optimized to LoE. Hence after EarlyCSE, there is an access who claims is
; optimized and it can be optimized further.
; We can't escape such cases in general when relying on Alias Analysis.
; The only fail-safe way to actually preserve MemorySSA when removing or
; replacing instructions (i.e. get the *same* MemorySSA as if it was computed
; for the updated IR) is to recompute it from scratch. What we get now is still
; a correct update, but with accesses that claim to be optimized and can be
; optimized further if we were to re-run MemorySSA on the IR.
%struct.gnode.0.1.3.6.9.18.20.79 = type { i32, i32, i32, i32, i32, i32, i32, %struct.gnode.0.1.3.6.9.18.20.79* }
@gnodeArray = external global %struct.gnode.0.1.3.6.9.18.20.79**, align 8
define void @test4_shortest() {
entry:
%exl.i = alloca [5 x i32], align 16
br i1 undef, label %if.then274, label %for.cond404
if.then274: ; preds = %if.end256
%0 = bitcast [5 x i32]* %exl.i to i8*
%arrayidx.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
%arrayidx1.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
%arrayidx2.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
%arrayidx3.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
%1 = bitcast [5 x i32]* %exl.i to i8*
%arrayidx.i1034 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
%arrayidx1.i1035 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
%arrayidx2.i1036 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
%arrayidx3.i1037 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
unreachable
for.cond404: ; preds = %if.end256
%2 = bitcast [5 x i32]* %exl.i to i8*
%arrayidx.i960 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
%arrayidx1.i961 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
%arrayidx2.i962 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
; 1 = MemoryDef(LoE)
store i32 undef, i32* %arrayidx2.i962, align 4
%arrayidx3.i963 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
; MemoryUse(LoE)
%3 = load %struct.gnode.0.1.3.6.9.18.20.79**, %struct.gnode.0.1.3.6.9.18.20.79*** @gnodeArray, align 8
%arrayidx6.i968 = getelementptr inbounds %struct.gnode.0.1.3.6.9.18.20.79*, %struct.gnode.0.1.3.6.9.18.20.79** %3, i64 undef
; MemoryUse(1) MayAlias
%4 = load %struct.gnode.0.1.3.6.9.18.20.79*, %struct.gnode.0.1.3.6.9.18.20.79** %arrayidx6.i968, align 8
br i1 undef, label %for.cond26.preheader.i974, label %if.then20.for.body_crit_edge.i999
for.cond26.preheader.i974: ; preds = %if.then20.i996
%5 = bitcast [5 x i32]* %exl.i to i8*
%arrayidx.i924 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
%arrayidx1.i925 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
%arrayidx2.i926 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
%arrayidx3.i927 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
unreachable
if.then20.for.body_crit_edge.i999: ; preds = %if.then20.i996
%arrayidx9.phi.trans.insert.i997 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 undef
unreachable
}