x86_64-arguments.c
16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \
// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \
// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512
#include <stdarg.h>
// CHECK-LABEL: define signext i8 @f0()
char f0(void) {
return 0;
}
// CHECK-LABEL: define signext i16 @f1()
short f1(void) {
return 0;
}
// CHECK-LABEL: define i32 @f2()
int f2(void) {
return 0;
}
// CHECK-LABEL: define float @f3()
float f3(void) {
return 0;
}
// CHECK-LABEL: define double @f4()
double f4(void) {
return 0;
}
// CHECK-LABEL: define x86_fp80 @f5()
long double f5(void) {
return 0;
}
// CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
void f6(char a0, short a1, int a2, long long a3, void *a4) {
}
// CHECK-LABEL: define void @f7(i32 %a0)
typedef enum { A, B, C } e7;
void f7(e7 a0) {
}
// Test merging/passing of upper eightbyte with X87 class.
//
// CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result)
// CHECK-LABEL: define void @f8_2(%union.u8* byval(%union.u8) align 16 %a0)
union u8 {
long double a;
int b;
};
union u8 f8_1() { while (1) {} }
void f8_2(union u8 a0) {}
// CHECK-LABEL: define i64 @f9()
struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} }
// CHECK-LABEL: define void @f10(i64 %a0.coerce)
struct s10 { int a; int b; int : 0; };
void f10(struct s10 a0) {}
// CHECK-LABEL: define void @f11(%union.anon* noalias sret %agg.result)
union { long double a; float b; } f11() { while (1) {} }
// CHECK-LABEL: define i32 @f12_0()
// CHECK-LABEL: define void @f12_1(i32 %a0.coerce)
struct s12 { int a __attribute__((aligned(16))); };
struct s12 f12_0(void) { while (1) {} }
void f12_1(struct s12 a0) {}
// Check that sret parameter is accounted for when checking available integer
// registers.
// CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval({{.*}}) align 8 %e, i32 %f)
struct s13_0 { long long f0[3]; };
struct s13_1 { long long f0[2]; };
struct s13_0 f13(int a, int b, int c, int d,
struct s13_1 e, int f) { while (1) {} }
// CHECK: define void @f14({{.*}}, i8 signext %X)
void f14(int a, int b, int c, int d, int e, int f, char X) {}
// CHECK: define void @f15({{.*}}, i8* %X)
void f15(int a, int b, int c, int d, int e, int f, void *X) {}
// CHECK: define void @f16({{.*}}, float %X)
void f16(float a, float b, float c, float d, float e, float f, float g, float h,
float X) {}
// CHECK: define void @f17({{.*}}, x86_fp80 %X)
void f17(float a, float b, float c, float d, float e, float f, float g, float h,
long double X) {}
// Check for valid coercion. The struct should be passed/returned as i32, not
// as i64 for better code quality.
// rdar://8135035
// CHECK-LABEL: define void @f18(i32 %a, i32 %f18_arg1.coerce)
struct f18_s0 { int f0; };
void f18(int a, struct f18_s0 f18_arg1) { while (1) {} }
// Check byval alignment.
// CHECK-LABEL: define void @f19(%struct.s19* byval(%struct.s19) align 16 %x)
struct s19 {
long double a;
};
void f19(struct s19 x) {}
// CHECK-LABEL: define void @f20(%struct.s20* byval(%struct.s20) align 32 %x)
struct __attribute__((aligned(32))) s20 {
int x;
int y;
};
void f20(struct s20 x) {}
struct StringRef {
long x;
const char *Ptr;
};
// rdar://7375902
// CHECK-LABEL: define i8* @f21(i64 %S.coerce0, i8* %S.coerce1)
const char *f21(struct StringRef S) { return S.x+S.Ptr; }
// PR7567
typedef __attribute__ ((aligned(16))) struct f22s { unsigned long long x[2]; } L;
void f22(L x, L y) { }
// CHECK: @f22
// CHECK: %x = alloca{{.*}}, align 16
// CHECK: %y = alloca{{.*}}, align 16
// PR7714
struct f23S {
short f0;
unsigned f1;
int f2;
};
void f23(int A, struct f23S B) {
// CHECK-LABEL: define void @f23(i32 %A, i64 %B.coerce0, i32 %B.coerce1)
}
struct f24s { long a; int b; };
struct f23S f24(struct f23S *X, struct f24s *P2) {
return *X;
// CHECK: define { i64, i32 } @f24(%struct.f23S* %X, %struct.f24s* %P2)
}
// rdar://8248065
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 f25(v4f32 X) {
// CHECK-LABEL: define <4 x float> @f25(<4 x float> %X)
// CHECK-NOT: alloca
// CHECK: alloca <4 x float>
// CHECK-NOT: alloca
// CHECK: store <4 x float> %X, <4 x float>*
// CHECK-NOT: store
// CHECK: ret <4 x float>
return X+X;
}
struct foo26 {
int *X;
float *Y;
};
struct foo26 f26(struct foo26 *P) {
// CHECK: define { i32*, float* } @f26(%struct.foo26* %P)
return *P;
}
struct v4f32wrapper {
v4f32 v;
};
struct v4f32wrapper f27(struct v4f32wrapper X) {
// CHECK-LABEL: define <4 x float> @f27(<4 x float> %X.coerce)
return X;
}
// PR22563 - We should unwrap simple structs and arrays to pass
// and return them in the appropriate vector registers if possible.
typedef float v8f32 __attribute__((__vector_size__(32)));
struct v8f32wrapper {
v8f32 v;
};
struct v8f32wrapper f27a(struct v8f32wrapper X) {
// AVX-LABEL: define <8 x float> @f27a(<8 x float> %X.coerce)
return X;
}
struct v8f32wrapper_wrapper {
v8f32 v[1];
};
struct v8f32wrapper_wrapper f27b(struct v8f32wrapper_wrapper X) {
// AVX-LABEL: define <8 x float> @f27b(<8 x float> %X.coerce)
return X;
}
// rdar://5711709
struct f28c {
double x;
int y;
};
void f28(struct f28c C) {
// CHECK-LABEL: define void @f28(double %C.coerce0, i32 %C.coerce1)
}
struct f29a {
struct c {
double x;
int y;
} x[1];
};
void f29a(struct f29a A) {
// CHECK-LABEL: define void @f29a(double %A.coerce0, i32 %A.coerce1)
}
// rdar://8249586
struct S0 { char f0[8]; char f2; char f3; char f4; };
void f30(struct S0 p_4) {
// CHECK-LABEL: define void @f30(i64 %p_4.coerce0, i24 %p_4.coerce1)
}
// Pass the third element as a float when followed by tail padding.
// rdar://8251384
struct f31foo { float a, b, c; };
float f31(struct f31foo X) {
// CHECK-LABEL: define float @f31(<2 x float> %X.coerce0, float %X.coerce1)
return X.c;
}
_Complex float f32(_Complex float A, _Complex float B) {
// rdar://6379669
// CHECK-LABEL: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce)
return A+B;
}
// rdar://8357396
struct f33s { long x; float c,d; };
void f33(va_list X) {
va_arg(X, struct f33s);
}
typedef unsigned long long v1i64 __attribute__((__vector_size__(8)));
// rdar://8359248
// CHECK-LABEL: define double @f34(double %arg.coerce)
v1i64 f34(v1i64 arg) { return arg; }
// rdar://8358475
// CHECK-LABEL: define double @f35(double %arg.coerce)
typedef unsigned long v1i64_2 __attribute__((__vector_size__(8)));
v1i64_2 f35(v1i64_2 arg) { return arg+arg; }
// rdar://9122143
// CHECK: declare void @func(%struct._str* byval(%struct._str) align 16)
typedef struct _str {
union {
long double a;
long c;
};
} str;
void func(str s);
str ss;
void f9122143()
{
func(ss);
}
// CHECK-LABEL: define double @f36(double %arg.coerce)
typedef unsigned v2i32 __attribute((__vector_size__(8)));
v2i32 f36(v2i32 arg) { return arg; }
// AVX: declare void @f38(<8 x float>)
// AVX: declare void @f37(<8 x float>)
// SSE: declare void @f38(%struct.s256* byval(%struct.s256) align 32)
// SSE: declare void @f37(<8 x float>* byval(<8 x float>) align 32)
typedef float __m256 __attribute__ ((__vector_size__ (32)));
typedef struct {
__m256 m;
} s256;
s256 x38;
__m256 x37;
void f38(s256 x);
void f37(__m256 x);
void f39() { f38(x38); f37(x37); }
// The two next tests make sure that the struct below is passed
// in the same way regardless of avx being used
// CHECK: declare void @func40(%struct.t128* byval(%struct.t128) align 16)
typedef float __m128 __attribute__ ((__vector_size__ (16)));
typedef struct t128 {
__m128 m;
__m128 n;
} two128;
extern void func40(two128 s);
void func41(two128 s) {
func40(s);
}
// CHECK: declare void @func42(%struct.t128_2* byval(%struct.t128_2) align 16)
typedef struct xxx {
__m128 array[2];
} Atwo128;
typedef struct t128_2 {
Atwo128 x;
} SA;
extern void func42(SA s);
void func43(SA s) {
func42(s);
}
// CHECK-LABEL: define i32 @f44
// CHECK: ptrtoint
// CHECK-NEXT: add i64 %{{[0-9]+}}, 31
// CHECK-NEXT: and i64 %{{[0-9]+}}, -32
// CHECK-NEXT: inttoptr
typedef int T44 __attribute((vector_size(32)));
struct s44 { T44 x; int y; };
int f44(int i, ...) {
__builtin_va_list ap;
__builtin_va_start(ap, i);
struct s44 s = __builtin_va_arg(ap, struct s44);
__builtin_va_end(ap);
return s.y;
}
// Text that vec3 returns the correct LLVM IR type.
// AVX-LABEL: define i32 @foo(<3 x i64> %X)
typedef long long3 __attribute((ext_vector_type(3)));
int foo(long3 X)
{
return 0;
}
// Make sure we don't use a varargs convention for a function without a
// prototype where AVX types are involved.
// AVX: @test45
// AVX: call i32 bitcast (i32 (...)* @f45 to i32 (<8 x float>)*)
int f45();
__m256 x45;
void test45() { f45(x45); }
// Make sure we use byval to pass 64-bit vectors in memory; the LLVM call
// lowering can't handle this case correctly because it runs after legalization.
// CHECK: @test46
// CHECK: call void @f46({{.*}}<2 x float>* byval(<2 x float>) align 8 {{.*}}, <2 x float>* byval(<2 x float>) align 8 {{.*}})
typedef float v46 __attribute((vector_size(8)));
void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46);
void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); }
// Check that we pass the struct below without using byval, which helps out
// codegen.
//
// CHECK: @test47
// CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
struct s47 { unsigned a; };
void f47(int,int,int,int,int,int,struct s47);
void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); }
// rdar://12723368
// In the following example, there are holes in T4 at the 3rd byte and the 4th
// byte, however, T2 does not have those holes. T4 is chosen to be the
// representing type for union T1, but we can't use load or store of T4 since
// it will skip the 3rd byte and the 4th byte.
// In general, Since we don't accurately represent the data fields of a union,
// do not use load or store of the representing llvm type for the union.
typedef _Complex int T2;
typedef _Complex char T5;
typedef _Complex int T7;
typedef struct T4 { T5 field0; T7 field1; } T4;
typedef union T1 { T2 field0; T4 field1; } T1;
extern T1 T1_retval;
T1 test48(void) {
// CHECK: @test48
// CHECK: memcpy
// CHECK: memcpy
return T1_retval;
}
void test49_helper(double, ...);
void test49(double d, double e) {
test49_helper(d, e);
}
// CHECK-LABEL: define void @test49(
// CHECK: [[T0:%.*]] = load double, double*
// CHECK-NEXT: [[T1:%.*]] = load double, double*
// CHECK-NEXT: call void (double, ...) @test49_helper(double [[T0]], double [[T1]])
void test50_helper();
void test50(double d, double e) {
test50_helper(d, e);
}
// CHECK-LABEL: define void @test50(
// CHECK: [[T0:%.*]] = load double, double*
// CHECK-NEXT: [[T1:%.*]] = load double, double*
// CHECK-NEXT: call void (double, double, ...) bitcast (void (...)* @test50_helper to void (double, double, ...)*)(double [[T0]], double [[T1]])
struct test51_s { __uint128_t intval; };
void test51(struct test51_s *s, __builtin_va_list argList) {
*s = __builtin_va_arg(argList, struct test51_s);
}
// CHECK-LABEL: define void @test51
// CHECK: [[TMP_ADDR:%.*]] = alloca [[STRUCT_TEST51:%.*]], align 16
// CHECK: br i1
// CHECK: [[REG_SAVE_AREA_PTR:%.*]] = getelementptr inbounds {{.*}}, i32 0, i32 3
// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[REG_SAVE_AREA_PTR]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 {{.*}}
// CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]]
// CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8*
// CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8*
// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[CASTED_TMP_ADDR]], i8* align 8 [[RECASTED_VALUE_ADDR]], i64 16, i1 false)
// CHECK-NEXT: add i32 {{.*}}, 16
// CHECK-NEXT: store i32 {{.*}}, i32* {{.*}}
// CHECK-NEXT: br label
void test52_helper(int, ...);
__m256 x52;
void test52() {
test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
}
// AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
void test53(__m256 *m, __builtin_va_list argList) {
*m = __builtin_va_arg(argList, __m256);
}
// AVX-LABEL: define void @test53
// AVX-NOT: br i1
// AVX: ret void
void test54_helper(__m256, ...);
__m256 x54;
void test54() {
test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
}
// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval({ double, double }) align 8 {{%[^)]+}})
typedef float __m512 __attribute__ ((__vector_size__ (64)));
typedef struct {
__m512 m;
} s512;
s512 x55;
__m512 x56;
// On AVX512, aggregates which contain a __m512 type are classified as SSE/SSEUP
// as per https://github.com/hjl-tools/x86-psABI/commit/30f9c9 3.2.3p2 Rule 1
//
// AVX512: declare void @f55(<16 x float>)
// NO-AVX512: declare void @f55(%struct.s512* byval(%struct.s512) align 64)
void f55(s512 x);
// __m512 has type SSE/SSEUP on AVX512.
//
// AVX512: declare void @f56(<16 x float>)
// NO-AVX512: declare void @f56(<16 x float>* byval(<16 x float>) align 64)
void f56(__m512 x);
void f57() { f55(x55); f56(x56); }
// Like for __m128 on AVX, check that the struct below is passed
// in the same way regardless of AVX512 being used.
//
// CHECK: declare void @f58(%struct.t256* byval(%struct.t256) align 32)
typedef struct t256 {
__m256 m;
__m256 n;
} two256;
extern void f58(two256 s);
void f59(two256 s) {
f58(s);
}
// CHECK: declare void @f60(%struct.sat256* byval(%struct.sat256) align 32)
typedef struct at256 {
__m256 array[2];
} Atwo256;
typedef struct sat256 {
Atwo256 x;
} SAtwo256;
extern void f60(SAtwo256 s);
void f61(SAtwo256 s) {
f60(s);
}
// AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
void f62_helper(int, ...);
__m512 x62;
void f62() {
f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
}
// Like for __m256 on AVX, we always pass __m512 in memory, and don't
// need to use the register save area.
//
// AVX512-LABEL: define void @f63
// AVX512-NOT: br i1
// AVX512: ret void
void f63(__m512 *m, __builtin_va_list argList) {
*m = __builtin_va_arg(argList, __m512);
}
// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval({ double, double }) align 8 {{%[^)]+}})
void f64_helper(__m512, ...);
__m512 x64;
void f64() {
f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
}
struct t65 {
__m256 m;
int : 0;
};
// SSE-LABEL: @f65(%struct.t65* byval(%struct.t65) align 32 %{{[^,)]+}})
// AVX: @f65(<8 x float> %{{[^,)]+}})
void f65(struct t65 a0) {
}