Skip to content

Commit e12807a

Browse files
[DAGCombiner] Allow promoted constants when lowering vector UDIV exacts
1 parent 2c34770 commit e12807a

File tree

2 files changed

+8
-71
lines changed

2 files changed

+8
-71
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6403,7 +6403,6 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
64036403
const SDLoc &dl, SelectionDAG &DAG,
64046404
SmallVectorImpl<SDNode *> &Created) {
64056405
EVT VT = N->getValueType(0);
6406-
EVT SVT = VT.getScalarType();
64076406
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
64086407
EVT ShSVT = ShVT.getScalarType();
64096408

@@ -6413,6 +6412,8 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
64136412
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
64146413
if (C->isZero())
64156414
return false;
6415+
6416+
EVT CT = C->getValueType(0);
64166417
APInt Divisor = C->getAPIntValue();
64176418
unsigned Shift = Divisor.countr_zero();
64186419
if (Shift) {
@@ -6422,14 +6423,15 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
64226423
// Calculate the multiplicative inverse modulo BW.
64236424
APInt Factor = Divisor.multiplicativeInverse();
64246425
Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6425-
Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6426+
Factors.push_back(DAG.getConstant(Factor, dl, CT));
64266427
return true;
64276428
};
64286429

64296430
SDValue Op1 = N->getOperand(1);
64306431

64316432
// Collect all magic values from the build vector.
6432-
if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6433+
if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern, /*AllowUndefs=*/false,
6434+
/*AllowTruncation=*/true))
64336435
return SDValue();
64346436

64356437
SDValue Shift, Factor;

llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll

Lines changed: 3 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -90,74 +90,9 @@ define <8 x i16> @udiv_exact_v8i16_by_255(<8 x i16> %x) {
9090
define <16 x i16> @udiv_exact_v16i16_by_255(<16 x i16> %x) {
9191
; CHECK-LABEL: udiv_exact_v16i16_by_255:
9292
; CHECK: // %bb.0:
93-
; CHECK-NEXT: umov w9, v0.h[0]
94-
; CHECK-NEXT: umov w11, v1.h[0]
95-
; CHECK-NEXT: mov w8, #258 // =0x102
96-
; CHECK-NEXT: movk w8, #257, lsl #16
97-
; CHECK-NEXT: umov w10, v0.h[1]
98-
; CHECK-NEXT: umov w12, v1.h[1]
99-
; CHECK-NEXT: umov w13, v0.h[2]
100-
; CHECK-NEXT: umov w14, v1.h[2]
101-
; CHECK-NEXT: umull x9, w9, w8
102-
; CHECK-NEXT: umull x11, w11, w8
103-
; CHECK-NEXT: umull x10, w10, w8
104-
; CHECK-NEXT: umull x12, w12, w8
105-
; CHECK-NEXT: lsr x9, x9, #32
106-
; CHECK-NEXT: lsr x11, x11, #32
107-
; CHECK-NEXT: umull x13, w13, w8
108-
; CHECK-NEXT: fmov s2, w9
109-
; CHECK-NEXT: lsr x10, x10, #32
110-
; CHECK-NEXT: umov w9, v0.h[3]
111-
; CHECK-NEXT: fmov s3, w11
112-
; CHECK-NEXT: lsr x12, x12, #32
113-
; CHECK-NEXT: umull x11, w14, w8
114-
; CHECK-NEXT: umov w14, v1.h[3]
115-
; CHECK-NEXT: mov v2.h[1], w10
116-
; CHECK-NEXT: lsr x10, x13, #32
117-
; CHECK-NEXT: mov v3.h[1], w12
118-
; CHECK-NEXT: umov w12, v0.h[4]
119-
; CHECK-NEXT: lsr x11, x11, #32
120-
; CHECK-NEXT: umull x9, w9, w8
121-
; CHECK-NEXT: umull x13, w14, w8
122-
; CHECK-NEXT: umov w14, v1.h[4]
123-
; CHECK-NEXT: mov v2.h[2], w10
124-
; CHECK-NEXT: mov v3.h[2], w11
125-
; CHECK-NEXT: lsr x9, x9, #32
126-
; CHECK-NEXT: umull x10, w12, w8
127-
; CHECK-NEXT: lsr x12, x13, #32
128-
; CHECK-NEXT: umov w11, v0.h[5]
129-
; CHECK-NEXT: umull x13, w14, w8
130-
; CHECK-NEXT: umov w14, v1.h[5]
131-
; CHECK-NEXT: mov v2.h[3], w9
132-
; CHECK-NEXT: lsr x9, x10, #32
133-
; CHECK-NEXT: mov v3.h[3], w12
134-
; CHECK-NEXT: lsr x12, x13, #32
135-
; CHECK-NEXT: umull x10, w11, w8
136-
; CHECK-NEXT: umov w11, v0.h[6]
137-
; CHECK-NEXT: umull x13, w14, w8
138-
; CHECK-NEXT: umov w14, v1.h[6]
139-
; CHECK-NEXT: mov v2.h[4], w9
140-
; CHECK-NEXT: umov w9, v0.h[7]
141-
; CHECK-NEXT: mov v3.h[4], w12
142-
; CHECK-NEXT: lsr x10, x10, #32
143-
; CHECK-NEXT: lsr x12, x13, #32
144-
; CHECK-NEXT: umull x11, w11, w8
145-
; CHECK-NEXT: umull x13, w14, w8
146-
; CHECK-NEXT: umov w14, v1.h[7]
147-
; CHECK-NEXT: mov v2.h[5], w10
148-
; CHECK-NEXT: umull x9, w9, w8
149-
; CHECK-NEXT: mov v3.h[5], w12
150-
; CHECK-NEXT: lsr x10, x11, #32
151-
; CHECK-NEXT: lsr x11, x13, #32
152-
; CHECK-NEXT: umull x8, w14, w8
153-
; CHECK-NEXT: lsr x9, x9, #32
154-
; CHECK-NEXT: mov v2.h[6], w10
155-
; CHECK-NEXT: mov v3.h[6], w11
156-
; CHECK-NEXT: lsr x8, x8, #32
157-
; CHECK-NEXT: mov v2.h[7], w9
158-
; CHECK-NEXT: mov v3.h[7], w8
159-
; CHECK-NEXT: mov v0.16b, v2.16b
160-
; CHECK-NEXT: mov v1.16b, v3.16b
93+
; CHECK-NEXT: mvni v2.8h, #1, lsl #8
94+
; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h
95+
; CHECK-NEXT: mul v1.8h, v1.8h, v2.8h
16196
; CHECK-NEXT: ret
16297
%div = udiv exact <16 x i16> %x, splat (i16 255)
16398
ret <16 x i16> %div

0 commit comments

Comments
 (0)