fceil.ll
4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.ceil.f32(float) nounwind readnone
declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone
declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
; FUNC-LABEL: {{^}}fceil_f32:
; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: CEIL {{\*? *}}[[RESULT]]
define amdgpu_kernel void @fceil_f32(float addrspace(1)* %out, float %x) {
%y = call float @llvm.ceil.f32(float %x) nounwind readnone
store float %y, float addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}fceil_v2f32:
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: CEIL {{\*? *}}[[RESULT]]
; EG: CEIL {{\*? *}}[[RESULT]]
define amdgpu_kernel void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
%y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone
store <2 x float> %y, <2 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}fceil_v3f32:
; FIXME-SI: v_ceil_f32_e32
; FIXME-SI: v_ceil_f32_e32
; FIXME-SI: v_ceil_f32_e32
; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
define amdgpu_kernel void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
%y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone
store <3 x float> %y, <3 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}fceil_v4f32:
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: CEIL {{\*? *}}[[RESULT]]
; EG: CEIL {{\*? *}}[[RESULT]]
; EG: CEIL {{\*? *}}[[RESULT]]
; EG: CEIL {{\*? *}}[[RESULT]]
define amdgpu_kernel void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
%y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
store <4 x float> %y, <4 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}fceil_v8f32:
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
define amdgpu_kernel void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
%y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone
store <8 x float> %y, <8 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}fceil_v16f32:
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT4:T[0-9]+]]{{\.[XYZW]}}
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
define amdgpu_kernel void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
%y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone
store <16 x float> %y, <16 x float> addrspace(1)* %out
ret void
}