loops.mlir
20.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck %s
// Test that we can lower all the way to LLVM without crashing, don't check results here.
// RUN: mlir-opt %s --convert-linalg-to-llvm -o=/dev/null 2>&1
// CHECK-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
// CHECK-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)>
// CHECK-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
// CHECK-DAG: #[[Stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)>
// CHECK-DAG: #[[Stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)>
// CHECK-DAG: #[[Stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)>
func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
%c0 = constant 0 : index
%c1 = constant 1 : index
%A = view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32, offset: ?, strides: [?, 1]>
%B = view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32, offset: ?, strides: [?, 1]>
%C = view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32, offset: ?, strides: [?, 1]>
linalg.matmul(%A, %B, %C) : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>
return
}
// CHECK-LABEL: func @matmul(%{{.*}}: memref<?xi8>,
// CHECK-SAME: [[M:arg[0-9]+]]: index
// CHECK-SAME: [[N:arg[0-9]+]]: index
// CHECK-SAME: [[K:arg[0-9]+]]: index
// CHECK: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32, #[[strided2D]]>
// CHECK: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32, #[[strided2D]]>
// CHECK: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32, #[[strided2D]]>
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
// CHECK-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32, #[[strided2D]]>
// CHECK-DAG: %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32, #[[strided2D]]>
// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
// CHECK-DAG: %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32, #[[strided2D]]>
// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
// CHECK: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32, #[[strided2D]]>
func @matvec(%arg0: memref<?xi8>, %M: index, %N: index) {
%c0 = constant 0 : index
%c1 = constant 1 : index
%2 = view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32, offset: ?, strides: [?, 1]>
%3 = view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32, offset: ?, strides: [1]>
%4 = view %arg0[%c0][%N] : memref<?xi8> to memref<?xf32, offset: ?, strides: [1]>
linalg.matvec(%2, %3, %4) : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?xf32, offset: ?, strides: [1]>, memref<?xf32, offset: ?, strides: [1]>
return
}
// CHECK-LABEL: func @matvec(%{{.*}}: memref<?xi8>,
// CHECK-SAME: [[M:arg[0-9]+]]: index
// CHECK-SAME: [[K:arg[0-9]+]]: index
// CHECK: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32, #[[strided2D]]>
// CHECK: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32, #[[strided1D]]>
// CHECK: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32, #[[strided1D]]>
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
// CHECK-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32, #[[strided2D]]>
// CHECK-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32, #[[strided1D]]>
// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
// CHECK-DAG: %[[c:.*]] = load %[[C]][%{{.*}}] : memref<?xf32, #[[strided1D]]>
// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
// CHECK: store %[[res]], %[[C]][%{{.*}}] : memref<?xf32, #[[strided1D]]>
func @dot(%arg0: memref<?xi8>, %M: index) {
%c0 = constant 0 : index
%c1 = constant 1 : index
%1 = view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32, offset: ?, strides: [1]>
%2 = view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32, offset: ?, strides: [1]>
%3 = view %arg0[][] : memref<?xi8> to memref<f32>
linalg.dot(%1, %2, %3) : memref<?xf32, offset: ?, strides: [1]>, memref<?xf32, offset: ?, strides: [1]>, memref<f32>
return
}
// CHECK-LABEL: func @dot(%{{.*}}: memref<?xi8>,
// CHECK-SAME: [[K:arg[0-9]+]]: index
// CHECK: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32, #[[strided1D]]>
// CHECK: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32, #[[strided1D]]>
// CHECK: %[[C:.*]] = std.view %{{.*}}[][] : memref<?xi8> to memref<f32>
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
// CHECK-DAG: %[[a:.*]] = load %[[A]][%{{.*}}] : memref<?xf32, #[[strided1D]]>
// CHECK-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32, #[[strided1D]]>
// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
// CHECK-DAG: %[[c:.*]] = load %[[C]][] : memref<f32>
// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
// CHECK: store %[[res]], %[[C]][] : memref<f32>
func @dot_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, offset: ?, strides: [1]>, %arg2: memref<f32>) {
linalg.dot(%arg0, %arg1, %arg2) : memref<?xf32, offset: ?, strides: [1]>, memref<?xf32, offset: ?, strides: [1]>, memref<f32>
return
}
// CHECK-LABEL: func @dot_view(
// CHECK: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<f32>) {
// CHECK: %[[K:.*]] = dim %arg0, 0 : memref<?xf32, #[[strided1D]]>
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
// CHECK-DAG: %[[a:.*]] = load %arg0[%{{.*}}] : memref<?xf32, #[[strided1D]]>
// CHECK-DAG: %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
// CHECK-DAG: %[[c:.*]] = load %{{.*}}[] : memref<f32>
// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
// CHECK: store %[[res]], %{{.*}}[] : memref<f32>
func @fill_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: f32) {
linalg.fill(%arg0, %arg1) : memref<?xf32, offset: ?, strides: [1]>, f32
return
}
// CHECK-LABEL: func @fill_view(
// CHECK: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: f32) {
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
func @fill_view0(%arg0: memref<f32>, %arg1: f32) {
linalg.fill(%arg0, %arg1) : memref<f32>, f32
return
}
// CHECK-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) {
// CHECK: store %{{.*}}, %{{.*}}[] : memref<f32>
func @fill_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: f32) {
linalg.fill(%arg0, %arg1) : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, f32
return
}
// CHECK-LABEL: func @fill_view3(
// CHECK: %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: f32) {
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
func @copy_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, offset: ?, strides: [1]>) {
linalg.copy(%arg0, %arg1) : memref<?xf32, offset: ?, strides: [1]>, memref<?xf32, offset: ?, strides: [1]>
return
}
// CHECK-LABEL: func @copy_view(
// CHECK: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<?xf32, #[[strided1D]]>) {
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
// CHECK: store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
func @copy_view0(%arg0: memref<f32>, %arg1: memref<f32>) {
linalg.copy(%arg0, %arg1) : memref<f32>, memref<f32>
return
}
// CHECK-LABEL: func @copy_view0(%{{.*}}: memref<f32>, %{{.*}}: memref<f32>) {
// CHECK: %{{.*}} = load %{{.*}}[] : memref<f32>
// CHECK: store %{{.*}}, %{{.*}}[] : memref<f32>
func @copy_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.copy(%arg0, %arg1) {inputPermutation = affine_map<(i, j, k) -> (i, k, j)>,
outputPermutation = affine_map<(i, j, k) -> (k, j, i)>} :
memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
return
}
// CHECK-LABEL: func @copy_view3
// CHECK: (%{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>) {
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
return
}
// CHECK-LABEL: func @conv_view3(
// CHECK: %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>) {
// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %[[Q:.*]] = dim %arg0, 1 : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %[[K:.*]] = dim %arg0, 2 : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %[[B:.*]] = dim %arg1, 0 : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
// CHECK: %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
func @conv_view4(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg2: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>) {
linalg.conv(%arg0, %arg1, %arg2) {dilations = [4, 5], strides = [2, 3]} : memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>
return
}
// CHECK-LABEL: func @conv_view4(
// CHECK: %{{.*}}: memref<?x?x?x?xf32, #[[strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[strided4D]]>) {
// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECK: %[[Z1:.*]] = dim %arg0, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECK: %[[Q:.*]] = dim %arg0, 2 : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECK: %[[K:.*]] = dim %arg0, 3 : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECK: %[[B:.*]] = dim %arg1, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECK: %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
// CHECK: %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}})
// CHECK: %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}})
// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[strided4D]]>
// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[strided4D]]>
func @foo(%0: f32, %1: f32, %2: f32) -> (f32, f32) {
%f0 = constant 0.0 : f32
return %f0, %f0 : f32, f32
}
#accesses = [
affine_map<(i, j, k) -> (i, j)>,
affine_map<(i, j, k) -> (i, j, k)>,
affine_map<(i, j, k) -> (i, k, j)>
]
#trait = {
args_in = 1,
args_out = 2,
iterator_types = ["parallel", "parallel", "parallel"],
indexing_maps = #accesses,
fun = @foo,
library_call = "some_external_function_name_1",
doc = "B(i,j,k), C(i,k,j) = foo(A(i, j), B(i,j,k), C(i,k,j))"
}
func @generic_function(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.generic #trait %arg0, %arg1, %arg2:
memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
return
}
// CHECK-LABEL: @foo
// CHECK-LABEL: @generic_function
// CHECK: loop.for %[[i:.*]] = {{.*}}
// CHECK: loop.for %[[j:.*]] = {{.*}}
// CHECK: loop.for %[[k:.*]] = {{.*}}
// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %[[res:.*]]:2 = call @foo(%[[a]], %[[b]], %[[c]]) : (f32, f32, f32) -> (f32, f32)
// CHECK: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
#trait2 = {
args_in = 1,
args_out = 2,
iterator_types = ["parallel", "parallel", "parallel"],
indexing_maps = #accesses,
library_call = "some_external_function_name_2",
doc = "B(i,j,k), C(i,k,j) = foo(A(i, j), B(i,j,k), C(i,k,j))"
}
func @generic_region(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.generic #trait2 %arg0, %arg1, %arg2 {
^bb0(%a: f32, %b: f32, %c: f32):
%d = mulf %a, %b : f32
%e = addf %c, %d : f32
linalg.yield %d, %e : f32, f32
}: memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
return
}
// CHECK-LABEL: @generic_region
// CHECK: loop.for %[[i:.*]] = {{.*}}
// CHECK: loop.for %[[j:.*]] = {{.*}}
// CHECK: loop.for %[[k:.*]] = {{.*}}
// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %[[d:.*]] = mulf %[[a]], %[[b]] : f32
// CHECK: %[[e:.*]] = addf %[[c]], %[[d]] : f32
// CHECK: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
func @indexed_foo(%i: index, %j: index, %k: index, %0: f32, %1: f32, %2: f32) -> (f32, f32) {
%i_int = index_cast %i: index to i32
%i_float = sitofp %i_int : i32 to f32
return %i_float, %i_float : f32, f32
}
#trait3 = {
args_in = 1,
args_out = 2,
iterator_types = ["parallel", "parallel", "parallel"],
indexing_maps = #accesses,
fun = @indexed_foo,
library_call = "some_external_function_name_1",
doc = "b(i,j,k), c(i,k,j) = foo(a(i, j), b(i,j,k), c(i,k,j))"
}
func @indexed_generic_function(
%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
%arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>,
%arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.indexed_generic #trait3 %arg0, %arg1, %arg2:
memref<?x?xf32, offset: ?, strides: [?, 1]>,
memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>,
memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
return
}
// CHECK-LABEL: @indexed_foo
// CHECK-LABEL: @indexed_generic_function
// CHECK: loop.for %[[i:.*]] = {{.*}}
// CHECK: loop.for %[[j:.*]] = {{.*}}
// CHECK: loop.for %[[k:.*]] = {{.*}}
// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: %[[res:.*]]:2 = call @indexed_foo(%[[i]], %[[j]], %[[k]], %[[a]], %[[b]], %[[c]]) : (index, index, index, f32, f32, f32) -> (f32, f32)
// CHECK: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
// CHECK: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
#trait4 = {
args_in = 1,
args_out = 2,
iterator_types = ["parallel", "parallel", "parallel"],
indexing_maps = #accesses,
library_call = "some_external_function_name_2",
doc = "B(i,j,k), C(i,k,j) = foo(A(i, j) * B(i,j,k), i * j * k + C(i,k,j))"
}
func @indexed_generic_region(
%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
%arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>,
%arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.indexed_generic #trait4 %arg0, %arg1, %arg2 {
^bb0(%i: index, %j: index, %k: index, %a: f32, %b: f32, %c: f32):
%result_1 = mulf %a, %b : f32
%ij = addi %i, %j : index
%ijk = addi %ij, %k : index
%ijk_int = index_cast %ijk : index to i32
%ijk_float = sitofp %ijk_int : i32 to f32
%result_2 = addf %c, %ijk_float : f32
linalg.yield %result_1, %result_2 : f32, f32
}: memref<?x?xf32, offset: ?, strides: [?, 1]>,
memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>,
memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
return
}
// CHECK-LABEL: @indexed_generic_region
// CHECK: loop.for %[[i:.*]] = {{.*}}
// CHECK: loop.for %[[j:.*]] = {{.*}}
// CHECK: loop.for %[[k:.*]] = {{.*}}
// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]]
// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]]
// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]]
// CHECK: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32
// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index
// CHECK: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index
// CHECK: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32
// CHECK: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32
// CHECK: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32
// CHECK: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]]
// CHECK: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]]