@@ -90,74 +90,9 @@ define <8 x i16> @udiv_exact_v8i16_by_255(<8 x i16> %x) {
9090define <16 x i16 > @udiv_exact_v16i16_by_255 (<16 x i16 > %x ) {
9191; CHECK-LABEL: udiv_exact_v16i16_by_255:
9292; CHECK: // %bb.0:
93- ; CHECK-NEXT: umov w9, v0.h[0]
94- ; CHECK-NEXT: umov w11, v1.h[0]
95- ; CHECK-NEXT: mov w8, #258 // =0x102
96- ; CHECK-NEXT: movk w8, #257, lsl #16
97- ; CHECK-NEXT: umov w10, v0.h[1]
98- ; CHECK-NEXT: umov w12, v1.h[1]
99- ; CHECK-NEXT: umov w13, v0.h[2]
100- ; CHECK-NEXT: umov w14, v1.h[2]
101- ; CHECK-NEXT: umull x9, w9, w8
102- ; CHECK-NEXT: umull x11, w11, w8
103- ; CHECK-NEXT: umull x10, w10, w8
104- ; CHECK-NEXT: umull x12, w12, w8
105- ; CHECK-NEXT: lsr x9, x9, #32
106- ; CHECK-NEXT: lsr x11, x11, #32
107- ; CHECK-NEXT: umull x13, w13, w8
108- ; CHECK-NEXT: fmov s2, w9
109- ; CHECK-NEXT: lsr x10, x10, #32
110- ; CHECK-NEXT: umov w9, v0.h[3]
111- ; CHECK-NEXT: fmov s3, w11
112- ; CHECK-NEXT: lsr x12, x12, #32
113- ; CHECK-NEXT: umull x11, w14, w8
114- ; CHECK-NEXT: umov w14, v1.h[3]
115- ; CHECK-NEXT: mov v2.h[1], w10
116- ; CHECK-NEXT: lsr x10, x13, #32
117- ; CHECK-NEXT: mov v3.h[1], w12
118- ; CHECK-NEXT: umov w12, v0.h[4]
119- ; CHECK-NEXT: lsr x11, x11, #32
120- ; CHECK-NEXT: umull x9, w9, w8
121- ; CHECK-NEXT: umull x13, w14, w8
122- ; CHECK-NEXT: umov w14, v1.h[4]
123- ; CHECK-NEXT: mov v2.h[2], w10
124- ; CHECK-NEXT: mov v3.h[2], w11
125- ; CHECK-NEXT: lsr x9, x9, #32
126- ; CHECK-NEXT: umull x10, w12, w8
127- ; CHECK-NEXT: lsr x12, x13, #32
128- ; CHECK-NEXT: umov w11, v0.h[5]
129- ; CHECK-NEXT: umull x13, w14, w8
130- ; CHECK-NEXT: umov w14, v1.h[5]
131- ; CHECK-NEXT: mov v2.h[3], w9
132- ; CHECK-NEXT: lsr x9, x10, #32
133- ; CHECK-NEXT: mov v3.h[3], w12
134- ; CHECK-NEXT: lsr x12, x13, #32
135- ; CHECK-NEXT: umull x10, w11, w8
136- ; CHECK-NEXT: umov w11, v0.h[6]
137- ; CHECK-NEXT: umull x13, w14, w8
138- ; CHECK-NEXT: umov w14, v1.h[6]
139- ; CHECK-NEXT: mov v2.h[4], w9
140- ; CHECK-NEXT: umov w9, v0.h[7]
141- ; CHECK-NEXT: mov v3.h[4], w12
142- ; CHECK-NEXT: lsr x10, x10, #32
143- ; CHECK-NEXT: lsr x12, x13, #32
144- ; CHECK-NEXT: umull x11, w11, w8
145- ; CHECK-NEXT: umull x13, w14, w8
146- ; CHECK-NEXT: umov w14, v1.h[7]
147- ; CHECK-NEXT: mov v2.h[5], w10
148- ; CHECK-NEXT: umull x9, w9, w8
149- ; CHECK-NEXT: mov v3.h[5], w12
150- ; CHECK-NEXT: lsr x10, x11, #32
151- ; CHECK-NEXT: lsr x11, x13, #32
152- ; CHECK-NEXT: umull x8, w14, w8
153- ; CHECK-NEXT: lsr x9, x9, #32
154- ; CHECK-NEXT: mov v2.h[6], w10
155- ; CHECK-NEXT: mov v3.h[6], w11
156- ; CHECK-NEXT: lsr x8, x8, #32
157- ; CHECK-NEXT: mov v2.h[7], w9
158- ; CHECK-NEXT: mov v3.h[7], w8
159- ; CHECK-NEXT: mov v0.16b, v2.16b
160- ; CHECK-NEXT: mov v1.16b, v3.16b
93+ ; CHECK-NEXT: mvni v2.8h, #1, lsl #8
94+ ; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h
95+ ; CHECK-NEXT: mul v1.8h, v1.8h, v2.8h
16196; CHECK-NEXT: ret
16297 %div = udiv exact <16 x i16 > %x , splat (i16 255 )
16398 ret <16 x i16 > %div
0 commit comments