multiblock-loops.ll 11.2 KB
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s

target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind

define void @test13(i32* noalias %P) {
; CHECK-LABEL: @test13(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[FOR:%.*]]
; CHECK:       for:
; CHECK-NEXT:    store i32 0, i32* [[P:%.*]]
; CHECK-NEXT:    br i1 false, label [[FOR]], label [[END:%.*]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for
for:
  store i32 0, i32* %P
  br i1 false, label %for, label %end
end:
  ret void
}


define void @test14(i32* noalias %P) {
; CHECK-LABEL: @test14(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[FOR:%.*]]
; CHECK:       for:
; CHECK-NEXT:    store i32 0, i32* [[P:%.*]]
; CHECK-NEXT:    br i1 false, label [[FOR]], label [[END:%.*]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  store i32 1, i32* %P
  br label %for
for:
  store i32 0, i32* %P
  br i1 false, label %for, label %end
end:
  ret void
}

define void @test18(i32* noalias %P) {
; CHECK-LABEL: @test18(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8*
; CHECK-NEXT:    store i32 0, i32* [[P]]
; CHECK-NEXT:    br label [[FOR:%.*]]
; CHECK:       for:
; CHECK-NEXT:    store i8 1, i8* [[P2]]
; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P]]
; CHECK-NEXT:    store i8 2, i8* [[P2]]
; CHECK-NEXT:    br i1 false, label [[FOR]], label [[END:%.*]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  %P2 = bitcast i32* %P to i8*
  store i32 0, i32* %P
  br label %for
for:
  store i8 1, i8* %P2
  %x = load i32, i32* %P
  store i8 2, i8* %P2
  br i1 false, label %for, label %end
end:
  ret void
}

define void @test21(i32* noalias %P) {
; CHECK-LABEL: @test21(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT:    br label [[FOR:%.*]]
; CHECK:       for:
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
; CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT:    br i1 false, label [[FOR]], label [[END:%.*]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
  %p3 = bitcast i32* %arrayidx0 to i8*
  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
  br label %for
for:
  %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1
  store i32 1, i32* %arrayidx1, align 4
  br i1 false, label %for, label %end
end:
  ret void
}

define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %x, i32* noalias nocapture %b) local_unnamed_addr {
; CHECK-LABEL: @test_loop(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP27]], label [[FOR_BODY4_LR_PH_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body4.lr.ph.preheader:
; CHECK-NEXT:    br label [[FOR_BODY4_LR_PH:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    ret void
; CHECK:       for.body4.lr.ph:
; CHECK-NEXT:    [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]]
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]]
; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
; CHECK:       for.body4:
; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ 0, [[FOR_BODY4_LR_PH]] ], [ [[ADD9:%.*]], [[FOR_BODY4]] ]
; CHECK-NEXT:    [[J_026:%.*]] = phi i32 [ 0, [[FOR_BODY4_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY4]] ]
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[J_026]], [[MUL]]
; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[J_026]]
; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
; CHECK-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
; CHECK-NEXT:    [[ADD9]] = add nsw i32 [[MUL7]], [[TMP0]]
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_026]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]]
; CHECK:       for.cond.cleanup3:
; CHECK-NEXT:    store i32 [[ADD9]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[INC11]] = add nuw nsw i32 [[I_028]], 1
; CHECK-NEXT:    [[EXITCOND29:%.*]] = icmp eq i32 [[INC11]], [[N]]
; CHECK-NEXT:    br i1 [[EXITCOND29]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_LR_PH]]
;
entry:
  %cmp27 = icmp sgt i32 %N, 0
  br i1 %cmp27, label %for.body4.lr.ph.preheader, label %for.cond.cleanup

for.body4.lr.ph.preheader:                        ; preds = %entry
  br label %for.body4.lr.ph

for.cond.cleanup:                                 ; preds = %for.cond.cleanup3, %entry
  ret void

for.body4.lr.ph:                                  ; preds = %for.body4.lr.ph.preheader, %for.cond.cleanup3
  %i.028 = phi i32 [ %inc11, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ]
  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.028
  store i32 0, i32* %arrayidx, align 4
  %mul = mul nsw i32 %i.028, %N
  br label %for.body4

for.body4:                                        ; preds = %for.body4, %for.body4.lr.ph
  %0 = phi i32 [ 0, %for.body4.lr.ph ], [ %add9, %for.body4 ]
  %j.026 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ]
  %add = add nsw i32 %j.026, %mul
  %arrayidx5 = getelementptr inbounds i32, i32* %A, i32 %add
  %1 = load i32, i32* %arrayidx5, align 4
  %arrayidx6 = getelementptr inbounds i32, i32* %x, i32 %j.026
  %2 = load i32, i32* %arrayidx6, align 4
  %mul7 = mul nsw i32 %2, %1
  %add9 = add nsw i32 %mul7, %0
  %inc = add nuw nsw i32 %j.026, 1
  %exitcond = icmp eq i32 %inc, %N
  br i1 %exitcond, label %for.cond.cleanup3, label %for.body4

for.cond.cleanup3:                                ; preds = %for.body4
  store i32 %add9, i32* %arrayidx, align 4
  %inc11 = add nuw nsw i32 %i.028, 1
  %exitcond29 = icmp eq i32 %inc11, %N
  br i1 %exitcond29, label %for.cond.cleanup, label %for.body4.lr.ph
}

declare i1 @cond() readnone

; TODO: We can eliminate the store in for.header, but we currently hit a MemoryPhi.
define void @loop_multiple_def_uses(i32* noalias %P) {
; CHECK-LABEL: @loop_multiple_def_uses(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
; CHECK:       for.header:
; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
; CHECK-NEXT:    br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
; CHECK:       for.body:
; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[P]]
; CHECK-NEXT:    br label [[FOR_HEADER]]
; CHECK:       end:
; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
; CHECK-NEXT:    ret void
;
entry:
  br label %for.header

for.header:
  store i32 1, i32* %P, align 4
  %c1 = call i1 @cond()
  br i1 %c1, label %for.body, label %end

for.body:
  store i32 1, i32* %P, align 4
  %lv = load i32, i32* %P
  br label %for.header

end:
  store i32 3, i32* %P, align 4
  ret void
}

; We cannot eliminate the store in for.header, as it is only partially
; overwritten in for.body and read afterwards.
define void @loop_multiple_def_uses_partial_write(i32* noalias %p) {
; CHECK-LABEL: @loop_multiple_def_uses_partial_write(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
; CHECK:       for.header:
; CHECK-NEXT:    store i32 1239491, i32* [[P:%.*]], align 4
; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
; CHECK-NEXT:    br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
; CHECK:       for.body:
; CHECK-NEXT:    [[C:%.*]] = bitcast i32* [[P]] to i8*
; CHECK-NEXT:    store i8 1, i8* [[C]], align 4
; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[P]]
; CHECK-NEXT:    br label [[FOR_HEADER]]
; CHECK:       end:
; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
; CHECK-NEXT:    ret void
;
entry:
  br label %for.header

for.header:
  store i32 1239491, i32* %p, align 4
  %c1 = call i1 @cond()
  br i1 %c1, label %for.body, label %end

for.body:
  %c = bitcast i32* %p to i8*
  store i8 1, i8* %c, align 4
  %lv = load i32, i32* %p
  br label %for.header

end:
  store i32 3, i32* %p, align 4
  ret void
}

; We cannot eliminate the store in for.header, as the location is not overwritten
; in for.body and read afterwards.
define void @loop_multiple_def_uses_mayalias_write(i32* %p, i32* %q) {
; CHECK-LABEL: @loop_multiple_def_uses_mayalias_write(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
; CHECK:       for.header:
; CHECK-NEXT:    store i32 1239491, i32* [[P:%.*]], align 4
; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
; CHECK-NEXT:    br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
; CHECK:       for.body:
; CHECK-NEXT:    store i32 1, i32* [[Q:%.*]], align 4
; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[P]]
; CHECK-NEXT:    br label [[FOR_HEADER]]
; CHECK:       end:
; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
; CHECK-NEXT:    ret void
;
entry:
  br label %for.header

for.header:
  store i32 1239491, i32* %p, align 4
  %c1 = call i1 @cond()
  br i1 %c1, label %for.body, label %end

for.body:
  store i32 1, i32* %q, align 4
  %lv = load i32, i32* %p
  br label %for.header

end:
  store i32 3, i32* %p, align 4
  ret void
}

%struct.hoge = type { i32, i32 }

@global = external local_unnamed_addr global %struct.hoge*, align 8

define void @widget(i8* %tmp) {
; CHECK-LABEL: @widget(
; CHECK-NEXT:  bb:
; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP:%.*]], i8* nonnull align 16 undef, i64 64, i1 false)
; CHECK-NEXT:    br label [[BB1:%.*]]
; CHECK:       bb1:
; CHECK-NEXT:    [[TMP2:%.*]] = load %struct.hoge*, %struct.hoge** @global, align 8
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[TMP2]], i64 undef, i32 1
; CHECK-NEXT:    store i32 0, i32* [[TMP3]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = load %struct.hoge*, %struct.hoge** @global, align 8
; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[TMP4]], i64 undef, i32 1
; CHECK-NEXT:    store i32 10, i32* [[TMP5]], align 4
; CHECK-NEXT:    br label [[BB1]]
;
bb:
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %tmp, i8* nonnull align 16 undef, i64 64, i1 false)
  br label %bb1

bb1:                                              ; preds = %bb1, %bb
  %tmp2 = load %struct.hoge*, %struct.hoge** @global, align 8
  %tmp3 = getelementptr inbounds %struct.hoge, %struct.hoge* %tmp2, i64 undef, i32 1
  store i32 0, i32* %tmp3, align 4
  %tmp4 = load %struct.hoge*, %struct.hoge** @global, align 8
  %tmp5 = getelementptr inbounds %struct.hoge, %struct.hoge* %tmp4, i64 undef, i32 1
  store i32 10, i32* %tmp5, align 4
  br label %bb1
}

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)