fmul-sqrt.ll
9.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine < %s | FileCheck %s
declare double @llvm.sqrt.f64(double) nounwind readnone speculatable
declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
declare void @use(double)
; sqrt(a) * sqrt(b) no math flags
define double @sqrt_a_sqrt_b(double %a, double %b) {
; CHECK-LABEL: @sqrt_a_sqrt_b(
; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[B:%.*]])
; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret double [[MUL]]
;
%1 = call double @llvm.sqrt.f64(double %a)
%2 = call double @llvm.sqrt.f64(double %b)
%mul = fmul double %1, %2
ret double %mul
}
; sqrt(a) * sqrt(b) fast-math, multiple uses
define double @sqrt_a_sqrt_b_multiple_uses(double %a, double %b) {
; CHECK-LABEL: @sqrt_a_sqrt_b_multiple_uses(
; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.sqrt.f64(double [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.sqrt.f64(double [[B:%.*]])
; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
; CHECK-NEXT: call void @use(double [[TMP2]])
; CHECK-NEXT: ret double [[MUL]]
;
%1 = call fast double @llvm.sqrt.f64(double %a)
%2 = call fast double @llvm.sqrt.f64(double %b)
%mul = fmul fast double %1, %2
call void @use(double %2)
ret double %mul
}
; sqrt(a) * sqrt(b) => sqrt(a*b) with fast-math
define double @sqrt_a_sqrt_b_reassoc_nnan(double %a, double %b) {
; CHECK-LABEL: @sqrt_a_sqrt_b_reassoc_nnan(
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nnan double [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = call reassoc nnan double @llvm.sqrt.f64(double [[TMP1]])
; CHECK-NEXT: ret double [[TMP2]]
;
%1 = call double @llvm.sqrt.f64(double %a)
%2 = call double @llvm.sqrt.f64(double %b)
%mul = fmul reassoc nnan double %1, %2
ret double %mul
}
; nnan disallows the possibility that both operands are negative,
; so we won't return a number when the answer should be NaN.
define double @sqrt_a_sqrt_b_reassoc(double %a, double %b) {
; CHECK-LABEL: @sqrt_a_sqrt_b_reassoc(
; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[B:%.*]])
; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret double [[MUL]]
;
%1 = call double @llvm.sqrt.f64(double %a)
%2 = call double @llvm.sqrt.f64(double %b)
%mul = fmul reassoc double %1, %2
ret double %mul
}
; sqrt(a) * sqrt(b) * sqrt(c) * sqrt(d) => sqrt(a*b*c*d) with fast-math
; 'reassoc nnan' on the fmuls is all that is required, but check propagation of other FMF.
define double @sqrt_a_sqrt_b_sqrt_c_sqrt_d_reassoc(double %a, double %b, double %c, double %d) {
; CHECK-LABEL: @sqrt_a_sqrt_b_sqrt_c_sqrt_d_reassoc(
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nnan arcp double [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nnan double [[TMP1]], [[C:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf double [[TMP2]], [[D:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = call reassoc nnan ninf double @llvm.sqrt.f64(double [[TMP3]])
; CHECK-NEXT: ret double [[TMP4]]
;
%1 = call double @llvm.sqrt.f64(double %a)
%2 = call double @llvm.sqrt.f64(double %b)
%3 = call double @llvm.sqrt.f64(double %c)
%4 = call double @llvm.sqrt.f64(double %d)
%mul = fmul reassoc nnan arcp double %1, %2
%mul1 = fmul reassoc nnan double %mul, %3
%mul2 = fmul reassoc nnan ninf double %mul1, %4
ret double %mul2
}
define double @rsqrt_squared(double %x) {
; CHECK-LABEL: @rsqrt_squared(
; CHECK-NEXT: [[SQUARED:%.*]] = fdiv fast double 1.000000e+00, [[X:%.*]]
; CHECK-NEXT: ret double [[SQUARED]]
;
%sqrt = call fast double @llvm.sqrt.f64(double %x)
%rsqrt = fdiv fast double 1.0, %sqrt
%squared = fmul fast double %rsqrt, %rsqrt
ret double %squared
}
define double @rsqrt_x_reassociate_extra_use(double %x, double * %p) {
; CHECK-LABEL: @rsqrt_x_reassociate_extra_use(
; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
; CHECK-NEXT: [[RSQRT:%.*]] = fdiv double 1.000000e+00, [[SQRT]]
; CHECK-NEXT: [[RES:%.*]] = fdiv reassoc nsz double [[X:%.*]], [[SQRT]]
; CHECK-NEXT: store double [[RSQRT]], double* [[P:%.*]], align 8
; CHECK-NEXT: ret double [[RES]]
;
%sqrt = call double @llvm.sqrt.f64(double %x)
%rsqrt = fdiv double 1.0, %sqrt
%res = fmul reassoc nsz double %rsqrt, %x
store double %rsqrt, double* %p
ret double %res
}
define <2 x float> @x_add_y_rsqrt_reassociate_extra_use(<2 x float> %x, <2 x float> %y, <2 x float>* %p) {
; CHECK-LABEL: @x_add_y_rsqrt_reassociate_extra_use(
; CHECK-NEXT: [[ADD:%.*]] = fadd fast <2 x float> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ADD]])
; CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[SQRT]]
; CHECK-NEXT: [[RES:%.*]] = fdiv fast <2 x float> [[ADD]], [[SQRT]]
; CHECK-NEXT: store <2 x float> [[RSQRT]], <2 x float>* [[P:%.*]], align 8
; CHECK-NEXT: ret <2 x float> [[RES]]
;
%add = fadd fast <2 x float> %x, %y ; thwart complexity-based canonicalization
%sqrt = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %add)
%rsqrt = fdiv fast <2 x float> <float 1.0, float 1.0>, %sqrt
%res = fmul fast <2 x float> %add, %rsqrt
store <2 x float> %rsqrt, <2 x float>* %p
ret <2 x float> %res
}
define double @sqrt_divisor_squared(double %x, double %y) {
; CHECK-LABEL: @sqrt_divisor_squared(
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nnan nsz double [[Y:%.*]], [[Y]]
; CHECK-NEXT: [[SQUARED:%.*]] = fdiv reassoc nnan nsz double [[TMP1]], [[X:%.*]]
; CHECK-NEXT: ret double [[SQUARED]]
;
%sqrt = call double @llvm.sqrt.f64(double %x)
%div = fdiv double %y, %sqrt
%squared = fmul reassoc nnan nsz double %div, %div
ret double %squared
}
define <2 x float> @sqrt_dividend_squared(<2 x float> %x, <2 x float> %y) {
; CHECK-LABEL: @sqrt_dividend_squared(
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[Y:%.*]], [[Y]]
; CHECK-NEXT: [[SQUARED:%.*]] = fdiv fast <2 x float> [[X:%.*]], [[TMP1]]
; CHECK-NEXT: ret <2 x float> [[SQUARED]]
;
%sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
%div = fdiv fast <2 x float> %sqrt, %y
%squared = fmul fast <2 x float> %div, %div
ret <2 x float> %squared
}
; We do not transform this because it would result in an extra instruction.
; This might still be a good optimization for the backend.
define double @sqrt_divisor_squared_extra_use(double %x, double %y) {
; CHECK-LABEL: @sqrt_divisor_squared_extra_use(
; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[Y:%.*]], [[SQRT]]
; CHECK-NEXT: call void @use(double [[DIV]])
; CHECK-NEXT: [[SQUARED:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]]
; CHECK-NEXT: ret double [[SQUARED]]
;
%sqrt = call double @llvm.sqrt.f64(double %x)
%div = fdiv double %y, %sqrt
call void @use(double %div)
%squared = fmul reassoc nnan nsz double %div, %div
ret double %squared
}
define double @sqrt_dividend_squared_extra_use(double %x, double %y) {
; CHECK-LABEL: @sqrt_dividend_squared_extra_use(
; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
; CHECK-NEXT: call void @use(double [[SQRT]])
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[Y:%.*]], [[Y]]
; CHECK-NEXT: [[SQUARED:%.*]] = fdiv fast double [[X]], [[TMP1]]
; CHECK-NEXT: ret double [[SQUARED]]
;
%sqrt = call double @llvm.sqrt.f64(double %x)
call void @use(double %sqrt)
%div = fdiv fast double %sqrt, %y
%squared = fmul fast double %div, %div
ret double %squared
}
; Negative test - require 'nsz'.
define double @sqrt_divisor_not_enough_FMF(double %x, double %y) {
; CHECK-LABEL: @sqrt_divisor_not_enough_FMF(
; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[Y:%.*]], [[SQRT]]
; CHECK-NEXT: [[SQUARED:%.*]] = fmul reassoc nnan double [[DIV]], [[DIV]]
; CHECK-NEXT: ret double [[SQUARED]]
;
%sqrt = call double @llvm.sqrt.f64(double %x)
%div = fdiv double %y, %sqrt
%squared = fmul reassoc nnan double %div, %div
ret double %squared
}
; TODO: This is a special-case of the general pattern. If we have a constant
; operand, the extra use limitation could be eased because this does not
; result in an extra instruction (1.0 * 1.0 is constant folded).
define double @rsqrt_squared_extra_use(double %x) {
; CHECK-LABEL: @rsqrt_squared_extra_use(
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
; CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast double 1.000000e+00, [[SQRT]]
; CHECK-NEXT: call void @use(double [[RSQRT]])
; CHECK-NEXT: [[SQUARED:%.*]] = fmul fast double [[RSQRT]], [[RSQRT]]
; CHECK-NEXT: ret double [[SQUARED]]
;
%sqrt = call fast double @llvm.sqrt.f64(double %x)
%rsqrt = fdiv fast double 1.0, %sqrt
call void @use(double %rsqrt)
%squared = fmul fast double %rsqrt, %rsqrt
ret double %squared
}