sext-ind-var.ll
3.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
; RUN: opt -loop-reduce -S < %s | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"
; LSR used not to be able to generate a float* induction variable in
; these cases due to scalar evolution not propagating nsw from an
; instruction to the SCEV, preventing distributing sext into the
; corresponding addrec.
; Test this pattern:
;
; for (int i = 0; i < numIterations; ++i)
; sum += ptr[i + offset];
;
define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
; CHECK-LABEL: @testadd
; CHECK: sext i32 %offset to i64
; CHECK: loop:
; CHECK-DAG: phi float*
; CHECK-DAG: phi i32
; CHECK-NOT: sext
entry:
br label %loop
loop:
%i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
%sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
%index32 = add nuw nsw i32 %i, %offset
%index64 = sext i32 %index32 to i64
%ptr = getelementptr inbounds float, float* %input, i64 %index64
%addend = load float, float* %ptr, align 4
%nextsum = fadd float %sum, %addend
%nexti = add nuw nsw i32 %i, 1
%exitcond = icmp eq i32 %nexti, %numIterations
br i1 %exitcond, label %exit, label %loop
exit:
ret float %nextsum
}
; Test this pattern:
;
; for (int i = 0; i < numIterations; ++i)
; sum += ptr[i - offset];
;
define float @testsub(float* %input, i32 %offset, i32 %numIterations) {
; CHECK-LABEL: @testsub
; CHECK: sext i32 %offset to i64
; CHECK: loop:
; CHECK-DAG: phi float*
; CHECK-DAG: phi i32
; CHECK-NOT: sext
entry:
br label %loop
loop:
%i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
%sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
%index32 = sub nuw nsw i32 %i, %offset
%index64 = sext i32 %index32 to i64
%ptr = getelementptr inbounds float, float* %input, i64 %index64
%addend = load float, float* %ptr, align 4
%nextsum = fadd float %sum, %addend
%nexti = add nuw nsw i32 %i, 1
%exitcond = icmp eq i32 %nexti, %numIterations
br i1 %exitcond, label %exit, label %loop
exit:
ret float %nextsum
}
; Test this pattern:
;
; for (int i = 0; i < numIterations; ++i)
; sum += ptr[i * stride];
;
define float @testmul(float* %input, i32 %stride, i32 %numIterations) {
; CHECK-LABEL: @testmul
; CHECK: sext i32 %stride to i64
; CHECK: loop:
; CHECK-DAG: phi float*
; CHECK-DAG: phi i32
; CHECK-NOT: sext
entry:
br label %loop
loop:
%i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
%sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
%index32 = mul nuw nsw i32 %i, %stride
%index64 = sext i32 %index32 to i64
%ptr = getelementptr inbounds float, float* %input, i64 %index64
%addend = load float, float* %ptr, align 4
%nextsum = fadd float %sum, %addend
%nexti = add nuw nsw i32 %i, 1
%exitcond = icmp eq i32 %nexti, %numIterations
br i1 %exitcond, label %exit, label %loop
exit:
ret float %nextsum
}
; Test this pattern:
;
; for (int i = 0; i < numIterations; ++i)
; sum += ptr[3 * (i << 7)];
;
; The multiplication by 3 is to make the address calculation expensive
; enough to force the introduction of a pointer induction variable.
define float @testshl(float* %input, i32 %numIterations) {
; CHECK-LABEL: @testshl
; CHECK: loop:
; CHECK-DAG: phi float*
; CHECK-DAG: phi i32
; CHECK-NOT: sext
entry:
br label %loop
loop:
%i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
%sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
%index32 = shl nuw nsw i32 %i, 7
%index32mul = mul nuw nsw i32 %index32, 3
%index64 = sext i32 %index32mul to i64
%ptr = getelementptr inbounds float, float* %input, i64 %index64
%addend = load float, float* %ptr, align 4
%nextsum = fadd float %sum, %addend
%nexti = add nuw nsw i32 %i, 1
%exitcond = icmp eq i32 %nexti, %numIterations
br i1 %exitcond, label %exit, label %loop
exit:
ret float %nextsum
}