Skip to content

Commit 383bbe3

Browse files
[DAGCombiner] Allow promoted constants when lowering vector UDIV exacts
1 parent 503714c commit 383bbe3

File tree

2 files changed

+8
-71
lines changed

2 files changed

+8
-71
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6403,7 +6403,6 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
64036403
const SDLoc &dl, SelectionDAG &DAG,
64046404
SmallVectorImpl<SDNode *> &Created) {
64056405
EVT VT = N->getValueType(0);
6406-
EVT SVT = VT.getScalarType();
64076406
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
64086407
EVT ShSVT = ShVT.getScalarType();
64096408

@@ -6413,6 +6412,8 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
64136412
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
64146413
if (C->isZero())
64156414
return false;
6415+
6416+
EVT CT = C->getValueType(0);
64166417
APInt Divisor = C->getAPIntValue();
64176418
unsigned Shift = Divisor.countr_zero();
64186419
if (Shift) {
@@ -6422,14 +6423,15 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
64226423
// Calculate the multiplicative inverse modulo BW.
64236424
APInt Factor = Divisor.multiplicativeInverse();
64246425
Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6425-
Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6426+
Factors.push_back(DAG.getConstant(Factor, dl, CT));
64266427
return true;
64276428
};
64286429

64296430
SDValue Op1 = N->getOperand(1);
64306431

64316432
// Collect all magic values from the build vector.
6432-
if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6433+
if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern, /*AllowUndefs=*/false,
6434+
/*AllowTruncation=*/true))
64336435
return SDValue();
64346436

64356437
SDValue Shift, Factor;

llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll

Lines changed: 3 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -53,74 +53,9 @@ define <8 x i16> @udiv_exact_v8i16_by_255(<8 x i16> %x) {
5353
define <16 x i16> @udiv_exact_v16i16_by_255(<16 x i16> %x) {
5454
; CHECK-LABEL: udiv_exact_v16i16_by_255:
5555
; CHECK: // %bb.0:
56-
; CHECK-NEXT: umov w9, v0.h[0]
57-
; CHECK-NEXT: umov w11, v1.h[0]
58-
; CHECK-NEXT: mov w8, #258 // =0x102
59-
; CHECK-NEXT: movk w8, #257, lsl #16
60-
; CHECK-NEXT: umov w10, v0.h[1]
61-
; CHECK-NEXT: umov w12, v1.h[1]
62-
; CHECK-NEXT: umov w13, v0.h[2]
63-
; CHECK-NEXT: umov w14, v1.h[2]
64-
; CHECK-NEXT: umull x9, w9, w8
65-
; CHECK-NEXT: umull x11, w11, w8
66-
; CHECK-NEXT: umull x10, w10, w8
67-
; CHECK-NEXT: umull x12, w12, w8
68-
; CHECK-NEXT: lsr x9, x9, #32
69-
; CHECK-NEXT: lsr x11, x11, #32
70-
; CHECK-NEXT: umull x13, w13, w8
71-
; CHECK-NEXT: fmov s2, w9
72-
; CHECK-NEXT: lsr x10, x10, #32
73-
; CHECK-NEXT: umov w9, v0.h[3]
74-
; CHECK-NEXT: fmov s3, w11
75-
; CHECK-NEXT: lsr x12, x12, #32
76-
; CHECK-NEXT: umull x11, w14, w8
77-
; CHECK-NEXT: umov w14, v1.h[3]
78-
; CHECK-NEXT: mov v2.h[1], w10
79-
; CHECK-NEXT: lsr x10, x13, #32
80-
; CHECK-NEXT: mov v3.h[1], w12
81-
; CHECK-NEXT: umov w12, v0.h[4]
82-
; CHECK-NEXT: lsr x11, x11, #32
83-
; CHECK-NEXT: umull x9, w9, w8
84-
; CHECK-NEXT: umull x13, w14, w8
85-
; CHECK-NEXT: umov w14, v1.h[4]
86-
; CHECK-NEXT: mov v2.h[2], w10
87-
; CHECK-NEXT: mov v3.h[2], w11
88-
; CHECK-NEXT: lsr x9, x9, #32
89-
; CHECK-NEXT: umull x10, w12, w8
90-
; CHECK-NEXT: lsr x12, x13, #32
91-
; CHECK-NEXT: umov w11, v0.h[5]
92-
; CHECK-NEXT: umull x13, w14, w8
93-
; CHECK-NEXT: umov w14, v1.h[5]
94-
; CHECK-NEXT: mov v2.h[3], w9
95-
; CHECK-NEXT: lsr x9, x10, #32
96-
; CHECK-NEXT: mov v3.h[3], w12
97-
; CHECK-NEXT: lsr x12, x13, #32
98-
; CHECK-NEXT: umull x10, w11, w8
99-
; CHECK-NEXT: umov w11, v0.h[6]
100-
; CHECK-NEXT: umull x13, w14, w8
101-
; CHECK-NEXT: umov w14, v1.h[6]
102-
; CHECK-NEXT: mov v2.h[4], w9
103-
; CHECK-NEXT: umov w9, v0.h[7]
104-
; CHECK-NEXT: mov v3.h[4], w12
105-
; CHECK-NEXT: lsr x10, x10, #32
106-
; CHECK-NEXT: lsr x12, x13, #32
107-
; CHECK-NEXT: umull x11, w11, w8
108-
; CHECK-NEXT: umull x13, w14, w8
109-
; CHECK-NEXT: umov w14, v1.h[7]
110-
; CHECK-NEXT: mov v2.h[5], w10
111-
; CHECK-NEXT: umull x9, w9, w8
112-
; CHECK-NEXT: mov v3.h[5], w12
113-
; CHECK-NEXT: lsr x10, x11, #32
114-
; CHECK-NEXT: lsr x11, x13, #32
115-
; CHECK-NEXT: umull x8, w14, w8
116-
; CHECK-NEXT: lsr x9, x9, #32
117-
; CHECK-NEXT: mov v2.h[6], w10
118-
; CHECK-NEXT: mov v3.h[6], w11
119-
; CHECK-NEXT: lsr x8, x8, #32
120-
; CHECK-NEXT: mov v2.h[7], w9
121-
; CHECK-NEXT: mov v3.h[7], w8
122-
; CHECK-NEXT: mov v0.16b, v2.16b
123-
; CHECK-NEXT: mov v1.16b, v3.16b
56+
; CHECK-NEXT: mvni v2.8h, #1, lsl #8
57+
; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h
58+
; CHECK-NEXT: mul v1.8h, v1.8h, v2.8h
12459
; CHECK-NEXT: ret
12560
%div = udiv exact <16 x i16> %x, splat (i16 255)
12661
ret <16 x i16> %div

0 commit comments

Comments
 (0)