@@ -112,6 +112,69 @@ define <8 x i16> @combine_zero_v8i16(<8 x i16> %a0) {
112112 ret <8 x i16 > %1
113113}
114114
115+ ; fold (usub_sat x, 1) -> sub(x, zext(x != 0))
116+ define i32 @combine_dec_i32 (i32 %a0 ) {
117+ ; CHECK-LABEL: combine_dec_i32:
118+ ; CHECK: # %bb.0:
119+ ; CHECK-NEXT: xorl %eax, %eax
120+ ; CHECK-NEXT: subl $1, %edi
121+ ; CHECK-NEXT: cmovael %edi, %eax
122+ ; CHECK-NEXT: retq
123+ %1 = call i32 @llvm.usub.sat.i32 (i32 %a0 , i32 1 )
124+ ret i32 %1
125+ }
126+
127+ ; fold (usub_sat x, 1) -> add(x, sext(x != 0))
128+ define <4 x i32 > @combine_dec_v4i32 (<4 x i32 > %a0 ) {
129+ ; SSE2-LABEL: combine_dec_v4i32:
130+ ; SSE2: # %bb.0:
131+ ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
132+ ; SSE2-NEXT: paddd %xmm0, %xmm1
133+ ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
134+ ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
135+ ; SSE2-NEXT: pand %xmm1, %xmm0
136+ ; SSE2-NEXT: retq
137+ ;
138+ ; SSE41-LABEL: combine_dec_v4i32:
139+ ; SSE41: # %bb.0:
140+ ; SSE41-NEXT: pmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
141+ ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
142+ ; SSE41-NEXT: paddd %xmm1, %xmm0
143+ ; SSE41-NEXT: retq
144+ ;
145+ ; SSE42-LABEL: combine_dec_v4i32:
146+ ; SSE42: # %bb.0:
147+ ; SSE42-NEXT: pmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
148+ ; SSE42-NEXT: pcmpeqd %xmm1, %xmm1
149+ ; SSE42-NEXT: paddd %xmm1, %xmm0
150+ ; SSE42-NEXT: retq
151+ ;
152+ ; AVX1-LABEL: combine_dec_v4i32:
153+ ; AVX1: # %bb.0:
154+ ; AVX1-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
155+ ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
156+ ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
157+ ; AVX1-NEXT: retq
158+ ;
159+ ; AVX2-LABEL: combine_dec_v4i32:
160+ ; AVX2: # %bb.0:
161+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
162+ ; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
163+ ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
164+ ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
165+ ; AVX2-NEXT: retq
166+ ;
167+ ; AVX512-LABEL: combine_dec_v4i32:
168+ ; AVX512: # %bb.0:
169+ ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
170+ ; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
171+ ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
172+ ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
173+ ; AVX512-NEXT: retq
174+ %1 = call <4 x i32 > @llvm.usub.sat.v4i32 (<4 x i32 > %a0 , <4 x i32 > splat (i32 1 ))
175+ ret <4 x i32 > %1
176+ }
177+
115178; fold (usub_sat x, x) -> 0
116179define i32 @combine_self_i32 (i32 %a0 ) {
117180; CHECK-LABEL: combine_self_i32:
0 commit comments