Skip to content

Commit 55aff64

Browse files
authored
[PowerPC] fold i128 equality/inequality compares of two loads into a vectorized compare using vcmpequb.p when Altivec is available (#158657)
The patch add 16 bytes load size for function PPCTTIImpl::enableMemCmpExpansion and fold i128 equality/inequality compares of two loads into a vectorized compare using vcmpequb.p when Altivec is available. Rationale: A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops. On VSX-capable subtargets, we can instead reinterpret the i128 loads as v16i8 vectors and use the Altive vcmpequb.p instruction to perform a full 128-bit equality check in a single vector compare. Example Result: This transformation replaces memcmp(a, b, 16) with two vector loads and one vector compare instruction.
1 parent 7129d76 commit 55aff64

File tree

6 files changed

+206
-228
lines changed

6 files changed

+206
-228
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15658,6 +15658,123 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
1565815658
ShiftCst);
1565915659
}
1566015660

15661+
// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15662+
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS) {
15663+
15664+
auto isValidForConvert = [](SDValue &Operand) {
15665+
if (!Operand.hasOneUse())
15666+
return false;
15667+
15668+
if (Operand.getValueType() != MVT::i128)
15669+
return false;
15670+
15671+
if (Operand.getOpcode() == ISD::Constant)
15672+
return true;
15673+
15674+
auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15675+
if (!LoadNode)
15676+
return false;
15677+
15678+
// If memory operation is volatile, do not perform any
15679+
// optimization or transformation. Volatile operations must be preserved
15680+
// as written to ensure correct program behavior, so we return an empty
15681+
// SDValue to indicate no action.
15682+
15683+
if (LoadNode->isVolatile())
15684+
return false;
15685+
15686+
// Only combine loads if both use the unindexed addressing mode.
15687+
// PowerPC AltiVec/VMX does not support vector loads or stores with
15688+
// pre/post-increment addressing. Indexed modes may imply implicit
15689+
// pointer updates, which are not compatible with AltiVec vector
15690+
// instructions.
15691+
if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15692+
return false;
15693+
15694+
// Only combine loads if both are non-extending loads
15695+
// (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15696+
// ISD::SEXTLOAD) perform zero or sign extension, which may change the
15697+
// loaded value's semantics and are not compatible with vector loads.
15698+
if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15699+
return false;
15700+
15701+
return true;
15702+
};
15703+
15704+
return (isValidForConvert(LHS) && isValidForConvert(RHS));
15705+
}
15706+
15707+
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N,
15708+
const SDLoc &DL) {
15709+
15710+
assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15711+
15712+
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15713+
assert(CC == ISD::SETNE ||
15714+
CC == ISD::SETEQ && "CC mus be ISD::SETNE or ISD::SETEQ");
15715+
15716+
auto getV16i8Load = [&](const SDValue &Operand) {
15717+
if (Operand.getOpcode() == ISD::Constant)
15718+
return DAG.getBitcast(MVT::v16i8, Operand);
15719+
15720+
assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15721+
15722+
auto *LoadNode = cast<LoadSDNode>(Operand);
15723+
return DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15724+
LoadNode->getBasePtr(), LoadNode->getMemOperand());
15725+
};
15726+
15727+
// Following code transforms the DAG
15728+
// t0: ch,glue = EntryToken
15729+
// t2: i64,ch = CopyFromReg t0, Register:i64 %0
15730+
// t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15731+
// undef:i64
15732+
// t4: i64,ch = CopyFromReg t0, Register:i64 %1
15733+
// t5: i128,ch =
15734+
// load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15735+
// setcc t3, t5, setne:ch
15736+
//
15737+
// ---->
15738+
//
15739+
// t0: ch,glue = EntryToken
15740+
// t2: i64,ch = CopyFromReg t0, Register:i64 %0
15741+
// t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15742+
// undef:i64
15743+
// t4: i64,ch = CopyFromReg t0, Register:i64 %1
15744+
// t5: v16i8,ch =
15745+
// load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15746+
// t6: i32 =
15747+
// llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15748+
// Constant:i32<2>, t3, t5
15749+
// t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15750+
15751+
// Or transforms the DAG
15752+
// t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15753+
// t8: i1 =
15754+
// setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15755+
//
15756+
// --->
15757+
//
15758+
// t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15759+
// t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15760+
// t7: i32 =
15761+
// llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15762+
15763+
SDValue LHSVec = getV16i8Load(N->getOperand(0));
15764+
SDValue RHSVec = getV16i8Load(N->getOperand(1));
15765+
15766+
SDValue IntrID =
15767+
DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15768+
SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15769+
SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15770+
IntrID, CRSel, LHSVec, RHSVec);
15771+
// ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15772+
// so we need to invert the CC opcode.
15773+
return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15774+
DAG.getConstant(0, DL, MVT::i32),
15775+
CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15776+
}
15777+
1566115778
SDValue PPCTargetLowering::combineSetCC(SDNode *N,
1566215779
DAGCombinerInfo &DCI) const {
1566315780
assert(N->getOpcode() == ISD::SETCC &&
@@ -15684,6 +15801,22 @@ SDValue PPCTargetLowering::combineSetCC(SDNode *N,
1568415801
SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
1568515802
return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
1568615803
}
15804+
15805+
// Optimization: Fold i128 equality/inequality compares of two loads into a
15806+
// vectorized compare using vcmpequb.p when Altivec is available.
15807+
//
15808+
// Rationale:
15809+
// A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15810+
// On VSX-capable subtargets, we can instead reinterpret the i128 loads
15811+
// as v16i8 vectors and use the Altive vcmpequb.p instruction to
15812+
// perform a full 128-bit equality check in a single vector compare.
15813+
//
15814+
// Example Result:
15815+
// This transformation replaces memcmp(a, b, 16) with two vector loads
15816+
// and one vector compare instruction.
15817+
15818+
if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15819+
return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
1568715820
}
1568815821

1568915822
return DAGCombineTruncBoolExt(N, DCI);

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,11 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) const {
443443
PPCTTIImpl::TTI::MemCmpExpansionOptions
444444
PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
445445
TTI::MemCmpExpansionOptions Options;
446-
Options.LoadSizes = {8, 4, 2, 1};
446+
if (getST()->hasAltivec())
447+
Options.LoadSizes = {16, 8, 4, 2, 1};
448+
else
449+
Options.LoadSizes = {8, 4, 2, 1};
450+
447451
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
448452
return Options;
449453
}

llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,13 @@ define signext i32 @zeroEqualityTest02(ptr %x, ptr %y) {
3535
define signext i32 @zeroEqualityTest01(ptr %x, ptr %y) {
3636
; CHECK-LABEL: zeroEqualityTest01:
3737
; CHECK: # %bb.0:
38-
; CHECK-NEXT: ld 5, 0(3)
39-
; CHECK-NEXT: ld 6, 0(4)
40-
; CHECK-NEXT: cmpld 5, 6
41-
; CHECK-NEXT: bne 0, .LBB1_2
42-
; CHECK-NEXT: # %bb.1: # %loadbb1
43-
; CHECK-NEXT: ld 5, 8(3)
44-
; CHECK-NEXT: ld 4, 8(4)
45-
; CHECK-NEXT: li 3, 0
46-
; CHECK-NEXT: cmpld 5, 4
47-
; CHECK-NEXT: beqlr 0
48-
; CHECK-NEXT: .LBB1_2: # %res_block
49-
; CHECK-NEXT: li 3, 1
38+
; CHECK-NEXT: lxvd2x 34, 0, 4
39+
; CHECK-NEXT: lxvd2x 35, 0, 3
40+
; CHECK-NEXT: vcmpequb. 2, 3, 2
41+
; CHECK-NEXT: mfocrf 3, 2
42+
; CHECK-NEXT: rlwinm 3, 3, 25, 31, 31
43+
; CHECK-NEXT: cntlzw 3, 3
44+
; CHECK-NEXT: srwi 3, 3, 5
5045
; CHECK-NEXT: blr
5146
%call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 16)
5247
%not.tobool = icmp ne i32 %call, 0
@@ -85,7 +80,7 @@ define signext i32 @zeroEqualityTest03(ptr %x, ptr %y) {
8580
; Validate with > 0
8681
define signext i32 @zeroEqualityTest04() {
8782
; CHECK-LABEL: zeroEqualityTest04:
88-
; CHECK: # %bb.0: # %loadbb
83+
; CHECK: # %bb.0:
8984
; CHECK-NEXT: li 3, 0
9085
; CHECK-NEXT: blr
9186
%call = tail call signext i32 @memcmp(ptr @zeroEqualityTest02.buffer1, ptr @zeroEqualityTest02.buffer2, i64 16)
@@ -97,7 +92,7 @@ define signext i32 @zeroEqualityTest04() {
9792
; Validate with < 0
9893
define signext i32 @zeroEqualityTest05() {
9994
; CHECK-LABEL: zeroEqualityTest05:
100-
; CHECK: # %bb.0: # %loadbb
95+
; CHECK: # %bb.0:
10196
; CHECK-NEXT: li 3, 0
10297
; CHECK-NEXT: blr
10398
%call = tail call signext i32 @memcmp(ptr @zeroEqualityTest03.buffer1, ptr @zeroEqualityTest03.buffer2, i64 16)
@@ -109,7 +104,7 @@ define signext i32 @zeroEqualityTest05() {
109104
; Validate with memcmp()?:
110105
define signext i32 @equalityFoldTwoConstants() {
111106
; CHECK-LABEL: equalityFoldTwoConstants:
112-
; CHECK: # %bb.0: # %loadbb
107+
; CHECK: # %bb.0:
113108
; CHECK-NEXT: li 3, 1
114109
; CHECK-NEXT: blr
115110
%call = tail call signext i32 @memcmp(ptr @zeroEqualityTest04.buffer1, ptr @zeroEqualityTest04.buffer2, i64 16)
@@ -121,24 +116,13 @@ define signext i32 @equalityFoldTwoConstants() {
121116
define signext i32 @equalityFoldOneConstant(ptr %X) {
122117
; CHECK-LABEL: equalityFoldOneConstant:
123118
; CHECK: # %bb.0:
124-
; CHECK-NEXT: li 5, 1
125-
; CHECK-NEXT: ld 4, 0(3)
126-
; CHECK-NEXT: rldic 5, 5, 32, 31
127-
; CHECK-NEXT: cmpld 4, 5
128-
; CHECK-NEXT: bne 0, .LBB6_2
129-
; CHECK-NEXT: # %bb.1: # %loadbb1
130-
; CHECK-NEXT: lis 5, -32768
131-
; CHECK-NEXT: ld 4, 8(3)
132-
; CHECK-NEXT: li 3, 0
133-
; CHECK-NEXT: ori 5, 5, 1
134-
; CHECK-NEXT: rldic 5, 5, 1, 30
135-
; CHECK-NEXT: cmpld 4, 5
136-
; CHECK-NEXT: beq 0, .LBB6_3
137-
; CHECK-NEXT: .LBB6_2: # %res_block
138-
; CHECK-NEXT: li 3, 1
139-
; CHECK-NEXT: .LBB6_3: # %endblock
140-
; CHECK-NEXT: cntlzw 3, 3
141-
; CHECK-NEXT: srwi 3, 3, 5
119+
; CHECK-NEXT: lxvd2x 34, 0, 3
120+
; CHECK-NEXT: addis 3, 2, .LCPI6_0@toc@ha
121+
; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l
122+
; CHECK-NEXT: lxvd2x 35, 0, 3
123+
; CHECK-NEXT: vcmpequb. 2, 2, 3
124+
; CHECK-NEXT: mfocrf 3, 2
125+
; CHECK-NEXT: rlwinm 3, 3, 25, 31, 31
142126
; CHECK-NEXT: blr
143127
%call = tail call signext i32 @memcmp(ptr @zeroEqualityTest04.buffer1, ptr %X, i64 16)
144128
%not.tobool = icmp eq i32 %call, 0

llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll

Lines changed: 20 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -14,110 +14,38 @@
1414
define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) {
1515
; CHECK-AIX32-P8-LABEL: cmpeq16:
1616
; CHECK-AIX32-P8: # %bb.0: # %entry
17-
; CHECK-AIX32-P8-NEXT: lwz r5, 4(r3)
18-
; CHECK-AIX32-P8-NEXT: lwz r6, 0(r3)
19-
; CHECK-AIX32-P8-NEXT: lwz r7, 4(r4)
20-
; CHECK-AIX32-P8-NEXT: lwz r8, 0(r4)
21-
; CHECK-AIX32-P8-NEXT: xor r6, r6, r8
22-
; CHECK-AIX32-P8-NEXT: xor r5, r5, r7
23-
; CHECK-AIX32-P8-NEXT: or. r5, r5, r6
24-
; CHECK-AIX32-P8-NEXT: bne cr0, L..BB0_2
25-
; CHECK-AIX32-P8-NEXT: # %bb.1: # %loadbb1
26-
; CHECK-AIX32-P8-NEXT: lwz r5, 12(r3)
27-
; CHECK-AIX32-P8-NEXT: lwz r3, 8(r3)
28-
; CHECK-AIX32-P8-NEXT: lwz r6, 12(r4)
29-
; CHECK-AIX32-P8-NEXT: lwz r4, 8(r4)
30-
; CHECK-AIX32-P8-NEXT: xor r3, r3, r4
31-
; CHECK-AIX32-P8-NEXT: xor r4, r5, r6
32-
; CHECK-AIX32-P8-NEXT: or. r3, r4, r3
33-
; CHECK-AIX32-P8-NEXT: li r3, 0
34-
; CHECK-AIX32-P8-NEXT: beq cr0, L..BB0_3
35-
; CHECK-AIX32-P8-NEXT: L..BB0_2: # %res_block
36-
; CHECK-AIX32-P8-NEXT: li r3, 1
37-
; CHECK-AIX32-P8-NEXT: L..BB0_3: # %endblock
38-
; CHECK-AIX32-P8-NEXT: cntlzw r3, r3
39-
; CHECK-AIX32-P8-NEXT: rlwinm r3, r3, 27, 31, 31
17+
; CHECK-AIX32-P8-NEXT: lxvw4x vs34, 0, r4
18+
; CHECK-AIX32-P8-NEXT: lxvw4x vs35, 0, r3
19+
; CHECK-AIX32-P8-NEXT: vcmpequb. v2, v3, v2
20+
; CHECK-AIX32-P8-NEXT: mfocrf r3, 2
21+
; CHECK-AIX32-P8-NEXT: rlwinm r3, r3, 25, 31, 31
4022
; CHECK-AIX32-P8-NEXT: blr
4123
;
4224
; CHECK-AIX32-P10-LABEL: cmpeq16:
4325
; CHECK-AIX32-P10: # %bb.0: # %entry
44-
; CHECK-AIX32-P10-NEXT: lwz r5, 4(r3)
45-
; CHECK-AIX32-P10-NEXT: lwz r6, 0(r3)
46-
; CHECK-AIX32-P10-NEXT: lwz r7, 4(r4)
47-
; CHECK-AIX32-P10-NEXT: xor r5, r5, r7
48-
; CHECK-AIX32-P10-NEXT: lwz r8, 0(r4)
49-
; CHECK-AIX32-P10-NEXT: xor r6, r6, r8
50-
; CHECK-AIX32-P10-NEXT: or. r5, r5, r6
51-
; CHECK-AIX32-P10-NEXT: bne cr0, L..BB0_2
52-
; CHECK-AIX32-P10-NEXT: # %bb.1: # %loadbb1
53-
; CHECK-AIX32-P10-NEXT: lwz r5, 12(r3)
54-
; CHECK-AIX32-P10-NEXT: lwz r3, 8(r3)
55-
; CHECK-AIX32-P10-NEXT: lwz r6, 12(r4)
56-
; CHECK-AIX32-P10-NEXT: lwz r4, 8(r4)
57-
; CHECK-AIX32-P10-NEXT: xor r3, r3, r4
58-
; CHECK-AIX32-P10-NEXT: xor r4, r5, r6
59-
; CHECK-AIX32-P10-NEXT: or. r3, r4, r3
60-
; CHECK-AIX32-P10-NEXT: li r3, 0
61-
; CHECK-AIX32-P10-NEXT: beq cr0, L..BB0_3
62-
; CHECK-AIX32-P10-NEXT: L..BB0_2: # %res_block
63-
; CHECK-AIX32-P10-NEXT: li r3, 1
64-
; CHECK-AIX32-P10-NEXT: L..BB0_3: # %endblock
65-
; CHECK-AIX32-P10-NEXT: cntlzw r3, r3
66-
; CHECK-AIX32-P10-NEXT: rlwinm r3, r3, 27, 31, 31
26+
; CHECK-AIX32-P10-NEXT: lxv vs34, 0(r4)
27+
; CHECK-AIX32-P10-NEXT: lxv vs35, 0(r3)
28+
; CHECK-AIX32-P10-NEXT: vcmpequb. v2, v3, v2
29+
; CHECK-AIX32-P10-NEXT: setbc r3, 4*cr6+lt
6730
; CHECK-AIX32-P10-NEXT: blr
6831
;
6932
; CHECK-LINUX32-P8-LABEL: cmpeq16:
7033
; CHECK-LINUX32-P8: # %bb.0: # %entry
71-
; CHECK-LINUX32-P8-NEXT: lwz r5, 0(r3)
72-
; CHECK-LINUX32-P8-NEXT: lwz r6, 4(r3)
73-
; CHECK-LINUX32-P8-NEXT: lwz r7, 0(r4)
74-
; CHECK-LINUX32-P8-NEXT: lwz r8, 4(r4)
75-
; CHECK-LINUX32-P8-NEXT: xor r6, r6, r8
76-
; CHECK-LINUX32-P8-NEXT: xor r5, r5, r7
77-
; CHECK-LINUX32-P8-NEXT: or. r5, r5, r6
78-
; CHECK-LINUX32-P8-NEXT: bne cr0, .LBB0_2
79-
; CHECK-LINUX32-P8-NEXT: # %bb.1: # %loadbb1
80-
; CHECK-LINUX32-P8-NEXT: lwz r5, 8(r3)
81-
; CHECK-LINUX32-P8-NEXT: lwz r3, 12(r3)
82-
; CHECK-LINUX32-P8-NEXT: lwz r6, 8(r4)
83-
; CHECK-LINUX32-P8-NEXT: lwz r4, 12(r4)
84-
; CHECK-LINUX32-P8-NEXT: xor r3, r3, r4
85-
; CHECK-LINUX32-P8-NEXT: xor r4, r5, r6
86-
; CHECK-LINUX32-P8-NEXT: or. r3, r4, r3
87-
; CHECK-LINUX32-P8-NEXT: li r3, 0
88-
; CHECK-LINUX32-P8-NEXT: beq cr0, .LBB0_3
89-
; CHECK-LINUX32-P8-NEXT: .LBB0_2: # %res_block
90-
; CHECK-LINUX32-P8-NEXT: li r3, 1
91-
; CHECK-LINUX32-P8-NEXT: .LBB0_3: # %endblock
92-
; CHECK-LINUX32-P8-NEXT: cntlzw r3, r3
93-
; CHECK-LINUX32-P8-NEXT: rlwinm r3, r3, 27, 31, 31
34+
; CHECK-LINUX32-P8-NEXT: lxvd2x vs0, 0, r4
35+
; CHECK-LINUX32-P8-NEXT: xxswapd vs34, vs0
36+
; CHECK-LINUX32-P8-NEXT: lxvd2x vs0, 0, r3
37+
; CHECK-LINUX32-P8-NEXT: xxswapd vs35, vs0
38+
; CHECK-LINUX32-P8-NEXT: vcmpequb. v2, v3, v2
39+
; CHECK-LINUX32-P8-NEXT: mfocrf r3, 2
40+
; CHECK-LINUX32-P8-NEXT: rlwinm r3, r3, 25, 31, 31
9441
; CHECK-LINUX32-P8-NEXT: blr
9542
;
9643
; CHECK-LINUX32-P10-LABEL: cmpeq16:
9744
; CHECK-LINUX32-P10: # %bb.0: # %entry
98-
; CHECK-LINUX32-P10-NEXT: lwz r5, 0(r3)
99-
; CHECK-LINUX32-P10-NEXT: lwz r6, 4(r3)
100-
; CHECK-LINUX32-P10-NEXT: lwz r7, 0(r4)
101-
; CHECK-LINUX32-P10-NEXT: xor r5, r5, r7
102-
; CHECK-LINUX32-P10-NEXT: lwz r8, 4(r4)
103-
; CHECK-LINUX32-P10-NEXT: xor r6, r6, r8
104-
; CHECK-LINUX32-P10-NEXT: or. r5, r5, r6
105-
; CHECK-LINUX32-P10-NEXT: bne cr0, .LBB0_2
106-
; CHECK-LINUX32-P10-NEXT: # %bb.1: # %loadbb1
107-
; CHECK-LINUX32-P10-NEXT: lwz r5, 8(r3)
108-
; CHECK-LINUX32-P10-NEXT: lwz r3, 12(r3)
109-
; CHECK-LINUX32-P10-NEXT: lwz r6, 8(r4)
110-
; CHECK-LINUX32-P10-NEXT: lwz r4, 12(r4)
111-
; CHECK-LINUX32-P10-NEXT: xor r3, r3, r4
112-
; CHECK-LINUX32-P10-NEXT: xor r4, r5, r6
113-
; CHECK-LINUX32-P10-NEXT: or. r3, r4, r3
114-
; CHECK-LINUX32-P10-NEXT: li r3, 0
115-
; CHECK-LINUX32-P10-NEXT: beq cr0, .LBB0_3
116-
; CHECK-LINUX32-P10-NEXT: .LBB0_2: # %res_block
117-
; CHECK-LINUX32-P10-NEXT: li r3, 1
118-
; CHECK-LINUX32-P10-NEXT: .LBB0_3: # %endblock
119-
; CHECK-LINUX32-P10-NEXT: cntlzw r3, r3
120-
; CHECK-LINUX32-P10-NEXT: rlwinm r3, r3, 27, 31, 31
45+
; CHECK-LINUX32-P10-NEXT: lxv vs34, 0(r4)
46+
; CHECK-LINUX32-P10-NEXT: lxv vs35, 0(r3)
47+
; CHECK-LINUX32-P10-NEXT: vcmpequb. v2, v3, v2
48+
; CHECK-LINUX32-P10-NEXT: setbc r3, 4*cr6+lt
12149
; CHECK-LINUX32-P10-NEXT: blr
12250
entry:
12351
%bcmp = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(16) %a, ptr noundef nonnull dereferenceable(16) %b, i32 16)

0 commit comments

Comments
 (0)