@@ -873,7 +873,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
873873 ISD::FMINIMUMNUM, ISD::MUL, ISD::SHL,
874874 ISD::SREM, ISD::UREM, ISD::VSELECT,
875875 ISD::BUILD_VECTOR, ISD::ADDRSPACECAST, ISD::LOAD,
876- ISD::STORE, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND});
876+ ISD::STORE, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND,
877+ ISD::INTRINSIC_WO_CHAIN});
877878
878879 // setcc for f16x2 and bf16x2 needs special handling to prevent
879880 // legalizer's attempt to scalarize it due to v2i1 not being legal.
@@ -6504,6 +6505,38 @@ static SDValue sinkProxyReg(SDValue R, SDValue Chain,
65046505 }
65056506}
65066507
6508+ // Combine add.sat(a, fneg(b)) -> sub.sat(a, b)
6509+ static SDValue combineAddSatWithNeg (SDNode *N, SelectionDAG &DAG,
6510+ unsigned SubOpc) {
6511+ SDValue Op2 = N->getOperand (2 );
6512+
6513+ if (Op2.getOpcode () != ISD::FNEG)
6514+ return SDValue ();
6515+
6516+ SDLoc DL (N);
6517+ return DAG.getNode (SubOpc, DL, N->getValueType (0 ), N->getOperand (1 ),
6518+ Op2.getOperand (0 ));
6519+ }
6520+
6521+ static SDValue combineIntrinsicWOChain (SDNode *N,
6522+ TargetLowering::DAGCombinerInfo &DCI,
6523+ const NVPTXSubtarget &STI) {
6524+ unsigned IntID = N->getConstantOperandVal (0 );
6525+
6526+ switch (IntID) {
6527+ case Intrinsic::nvvm_add_rn_sat_f16:
6528+ return combineAddSatWithNeg (N, DCI.DAG , NVPTXISD::SUB_RN_SAT_F16);
6529+ case Intrinsic::nvvm_add_rn_ftz_sat_f16:
6530+ return combineAddSatWithNeg (N, DCI.DAG , NVPTXISD::SUB_RN_FTZ_SAT_F16);
6531+ case Intrinsic::nvvm_add_rn_sat_f16x2:
6532+ return combineAddSatWithNeg (N, DCI.DAG , NVPTXISD::SUB_RN_SAT_F16X2);
6533+ case Intrinsic::nvvm_add_rn_ftz_sat_f16x2:
6534+ return combineAddSatWithNeg (N, DCI.DAG , NVPTXISD::SUB_RN_FTZ_SAT_F16X2);
6535+ default :
6536+ return SDValue ();
6537+ }
6538+ }
6539+
65076540static SDValue combineProxyReg (SDNode *N,
65086541 TargetLowering::DAGCombinerInfo &DCI) {
65096542
@@ -6570,6 +6603,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
65706603 return combineSTORE (N, DCI, STI);
65716604 case ISD::VSELECT:
65726605 return PerformVSELECTCombine (N, DCI);
6606+ case ISD::INTRINSIC_WO_CHAIN:
6607+ return combineIntrinsicWOChain (N, DCI, STI);
65736608 }
65746609 return SDValue ();
65756610}
0 commit comments