@@ -389,36 +389,26 @@ def __nvvm_fma_rn_relu_bf16 : NVPTXBuiltinSMAndPTX<"__bf16(__bf16, __bf16, __bf1
389389def __nvvm_fma_rn_bf16x2 : NVPTXBuiltinSMAndPTX<" _Vector<2, __bf16>(_Vector<2, __bf16>, _Vector<2, __bf16>, _Vector<2, __bf16>)" , SM_80, PTX70>;
390390def __nvvm_fma_rn_relu_bf16x2 : NVPTXBuiltinSMAndPTX<" _Vector<2, __bf16>(_Vector<2, __bf16>, _Vector<2, __bf16>, _Vector<2, __bf16>)" , SM_80, PTX70>;
391391def __nvvm_fma_rn_ftz_f : NVPTXBuiltin<" float(float, float, float)" >;
392+ def __nvvm_fma_rn_ftz_sat_f : NVPTXBuiltin<" float(float, float, float)" >;
392393def __nvvm_fma_rn_f : NVPTXBuiltin<" float(float, float, float)" >;
394+ def __nvvm_fma_rn_sat_f : NVPTXBuiltin<" float(float, float, float)" >;
393395def __nvvm_fma_rz_ftz_f : NVPTXBuiltin<" float(float, float, float)" >;
396+ def __nvvm_fma_rz_ftz_sat_f : NVPTXBuiltin<" float(float, float, float)" >;
394397def __nvvm_fma_rz_f : NVPTXBuiltin<" float(float, float, float)" >;
398+ def __nvvm_fma_rz_sat_f : NVPTXBuiltin<" float(float, float, float)" >;
395399def __nvvm_fma_rm_ftz_f : NVPTXBuiltin<" float(float, float, float)" >;
400+ def __nvvm_fma_rm_ftz_sat_f : NVPTXBuiltin<" float(float, float, float)" >;
396401def __nvvm_fma_rm_f : NVPTXBuiltin<" float(float, float, float)" >;
402+ def __nvvm_fma_rm_sat_f : NVPTXBuiltin<" float(float, float, float)" >;
397403def __nvvm_fma_rp_ftz_f : NVPTXBuiltin<" float(float, float, float)" >;
404+ def __nvvm_fma_rp_ftz_sat_f : NVPTXBuiltin<" float(float, float, float)" >;
398405def __nvvm_fma_rp_f : NVPTXBuiltin<" float(float, float, float)" >;
406+ def __nvvm_fma_rp_sat_f : NVPTXBuiltin<" float(float, float, float)" >;
399407def __nvvm_fma_rn_d : NVPTXBuiltin<" double(double, double, double)" >;
400408def __nvvm_fma_rz_d : NVPTXBuiltin<" double(double, double, double)" >;
401409def __nvvm_fma_rm_d : NVPTXBuiltin<" double(double, double, double)" >;
402410def __nvvm_fma_rp_d : NVPTXBuiltin<" double(double, double, double)" >;
403411
404- def __nvvm_fma_mixed_rn_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, __fp16, float)" , SM_100, PTX86>;
405- def __nvvm_fma_mixed_rz_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, __fp16, float)" , SM_100, PTX86>;
406- def __nvvm_fma_mixed_rm_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, __fp16, float)" , SM_100, PTX86>;
407- def __nvvm_fma_mixed_rp_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, __fp16, float)" , SM_100, PTX86>;
408- def __nvvm_fma_mixed_rn_sat_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, __fp16, float)" , SM_100, PTX86>;
409- def __nvvm_fma_mixed_rz_sat_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, __fp16, float)" , SM_100, PTX86>;
410- def __nvvm_fma_mixed_rm_sat_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, __fp16, float)" , SM_100, PTX86>;
411- def __nvvm_fma_mixed_rp_sat_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, __fp16, float)" , SM_100, PTX86>;
412-
413- def __nvvm_fma_mixed_rn_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, __bf16, float)" , SM_100, PTX86>;
414- def __nvvm_fma_mixed_rz_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, __bf16, float)" , SM_100, PTX86>;
415- def __nvvm_fma_mixed_rm_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, __bf16, float)" , SM_100, PTX86>;
416- def __nvvm_fma_mixed_rp_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, __bf16, float)" , SM_100, PTX86>;
417- def __nvvm_fma_mixed_rn_sat_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, __bf16, float)" , SM_100, PTX86>;
418- def __nvvm_fma_mixed_rz_sat_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, __bf16, float)" , SM_100, PTX86>;
419- def __nvvm_fma_mixed_rm_sat_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, __bf16, float)" , SM_100, PTX86>;
420- def __nvvm_fma_mixed_rp_sat_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, __bf16, float)" , SM_100, PTX86>;
421-
422412// Rcp
423413
424414def __nvvm_rcp_rn_ftz_f : NVPTXBuiltin<" float(float)" >;
@@ -465,64 +455,50 @@ def __nvvm_rsqrt_approx_d : NVPTXBuiltin<"double(double)">;
465455// Add
466456
467457def __nvvm_add_rn_ftz_f : NVPTXBuiltin<" float(float, float)" >;
458+ def __nvvm_add_rn_ftz_sat_f : NVPTXBuiltin<" float(float, float)" >;
468459def __nvvm_add_rn_f : NVPTXBuiltin<" float(float, float)" >;
460+ def __nvvm_add_rn_sat_f : NVPTXBuiltin<" float(float, float)" >;
469461def __nvvm_add_rz_ftz_f : NVPTXBuiltin<" float(float, float)" >;
462+ def __nvvm_add_rz_ftz_sat_f : NVPTXBuiltin<" float(float, float)" >;
470463def __nvvm_add_rz_f : NVPTXBuiltin<" float(float, float)" >;
464+ def __nvvm_add_rz_sat_f : NVPTXBuiltin<" float(float, float)" >;
471465def __nvvm_add_rm_ftz_f : NVPTXBuiltin<" float(float, float)" >;
466+ def __nvvm_add_rm_ftz_sat_f : NVPTXBuiltin<" float(float, float)" >;
472467def __nvvm_add_rm_f : NVPTXBuiltin<" float(float, float)" >;
468+ def __nvvm_add_rm_sat_f : NVPTXBuiltin<" float(float, float)" >;
473469def __nvvm_add_rp_ftz_f : NVPTXBuiltin<" float(float, float)" >;
470+ def __nvvm_add_rp_ftz_sat_f : NVPTXBuiltin<" float(float, float)" >;
474471def __nvvm_add_rp_f : NVPTXBuiltin<" float(float, float)" >;
472+ def __nvvm_add_rp_sat_f : NVPTXBuiltin<" float(float, float)" >;
475473
476474def __nvvm_add_rn_d : NVPTXBuiltin<" double(double, double)" >;
477475def __nvvm_add_rz_d : NVPTXBuiltin<" double(double, double)" >;
478476def __nvvm_add_rm_d : NVPTXBuiltin<" double(double, double)" >;
479477def __nvvm_add_rp_d : NVPTXBuiltin<" double(double, double)" >;
480478
481- def __nvvm_add_mixed_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, float)" , SM_100, PTX86>;
482- def __nvvm_add_mixed_rn_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, float)" , SM_100, PTX86>;
483- def __nvvm_add_mixed_rz_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, float)" , SM_100, PTX86>;
484- def __nvvm_add_mixed_rm_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, float)" , SM_100, PTX86>;
485- def __nvvm_add_mixed_rp_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, float)" , SM_100, PTX86>;
486- def __nvvm_add_mixed_sat_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, float)" , SM_100, PTX86>;
487- def __nvvm_add_mixed_rn_sat_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, float)" , SM_100, PTX86>;
488- def __nvvm_add_mixed_rz_sat_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, float)" , SM_100, PTX86>;
489- def __nvvm_add_mixed_rm_sat_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, float)" , SM_100, PTX86>;
490- def __nvvm_add_mixed_rp_sat_f16_f32 : NVPTXBuiltinSMAndPTX<" float(__fp16, float)" , SM_100, PTX86>;
491-
492- def __nvvm_add_mixed_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, float)" , SM_100, PTX86>;
493- def __nvvm_add_mixed_rn_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, float)" , SM_100, PTX86>;
494- def __nvvm_add_mixed_rz_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, float)" , SM_100, PTX86>;
495- def __nvvm_add_mixed_rm_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, float)" , SM_100, PTX86>;
496- def __nvvm_add_mixed_rp_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, float)" , SM_100, PTX86>;
497- def __nvvm_add_mixed_sat_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, float)" , SM_100, PTX86>;
498- def __nvvm_add_mixed_rn_sat_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, float)" , SM_100, PTX86>;
499- def __nvvm_add_mixed_rz_sat_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, float)" , SM_100, PTX86>;
500- def __nvvm_add_mixed_rm_sat_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, float)" , SM_100, PTX86>;
501- def __nvvm_add_mixed_rp_sat_bf16_f32 : NVPTXBuiltinSMAndPTX<" float(__bf16, float)" , SM_100, PTX86>;
502-
503479// Sub
504480
505- def __nvvm_sub_mixed_f16_f32 : NVPTXBuiltinSMAndPTX <" float(__fp16 , float)" , SM_100, PTX86 >;
506- def __nvvm_sub_mixed_rn_f16_f32 : NVPTXBuiltinSMAndPTX <" float(__fp16 , float)" , SM_100, PTX86 >;
507- def __nvvm_sub_mixed_rz_f16_f32 : NVPTXBuiltinSMAndPTX <" float(__fp16 , float)" , SM_100, PTX86 >;
508- def __nvvm_sub_mixed_rm_f16_f32 : NVPTXBuiltinSMAndPTX <" float(__fp16 , float)" , SM_100, PTX86 >;
509- def __nvvm_sub_mixed_rp_f16_f32 : NVPTXBuiltinSMAndPTX <" float(__fp16 , float)" , SM_100, PTX86 >;
510- def __nvvm_sub_mixed_sat_f16_f32 : NVPTXBuiltinSMAndPTX <" float(__fp16 , float)" , SM_100, PTX86 >;
511- def __nvvm_sub_mixed_rn_sat_f16_f32 : NVPTXBuiltinSMAndPTX <" float(__fp16 , float)" , SM_100, PTX86 >;
512- def __nvvm_sub_mixed_rz_sat_f16_f32 : NVPTXBuiltinSMAndPTX <" float(__fp16 , float)" , SM_100, PTX86 >;
513- def __nvvm_sub_mixed_rm_sat_f16_f32 : NVPTXBuiltinSMAndPTX <" float(__fp16 , float)" , SM_100, PTX86 >;
514- def __nvvm_sub_mixed_rp_sat_f16_f32 : NVPTXBuiltinSMAndPTX <" float(__fp16 , float)" , SM_100, PTX86 >;
515-
516- def __nvvm_sub_mixed_bf16_f32 : NVPTXBuiltinSMAndPTX <" float(__bf16 , float)" , SM_100, PTX86 >;
517- def __nvvm_sub_mixed_rn_bf16_f32 : NVPTXBuiltinSMAndPTX <" float(__bf16 , float)" , SM_100, PTX86 >;
518- def __nvvm_sub_mixed_rz_bf16_f32 : NVPTXBuiltinSMAndPTX <" float(__bf16 , float)" , SM_100, PTX86 >;
519- def __nvvm_sub_mixed_rm_bf16_f32 : NVPTXBuiltinSMAndPTX <" float(__bf16 , float)" , SM_100, PTX86 >;
520- def __nvvm_sub_mixed_rp_bf16_f32 : NVPTXBuiltinSMAndPTX <" float(__bf16 , float)" , SM_100, PTX86 >;
521- def __nvvm_sub_mixed_sat_bf16_f32 : NVPTXBuiltinSMAndPTX< " float(__bf16, float) " , SM_100, PTX86>;
522- def __nvvm_sub_mixed_rn_sat_bf16_f32 : NVPTXBuiltinSMAndPTX< " float(__bf16, float) " , SM_100, PTX86 >;
523- def __nvvm_sub_mixed_rz_sat_bf16_f32 : NVPTXBuiltinSMAndPTX< " float(__bf16, float) " , SM_100, PTX86 >;
524- def __nvvm_sub_mixed_rm_sat_bf16_f32 : NVPTXBuiltinSMAndPTX< " float(__bf16, float) " , SM_100, PTX86 >;
525- def __nvvm_sub_mixed_rp_sat_bf16_f32 : NVPTXBuiltinSMAndPTX< " float(__bf16, float) " , SM_100, PTX86 >;
481+ def __nvvm_sub_rn_ftz_f : NVPTXBuiltin <" float(float , float)" >;
482+ def __nvvm_sub_rn_ftz_sat_f : NVPTXBuiltin <" float(float , float)" >;
483+ def __nvvm_sub_rn_f : NVPTXBuiltin <" float(float , float)" >;
484+ def __nvvm_sub_rn_sat_f : NVPTXBuiltin <" float(float , float)" >;
485+ def __nvvm_sub_rz_ftz_f : NVPTXBuiltin <" float(float , float)" >;
486+ def __nvvm_sub_rz_ftz_sat_f : NVPTXBuiltin <" float(float , float)" >;
487+ def __nvvm_sub_rz_f : NVPTXBuiltin <" float(float , float)" >;
488+ def __nvvm_sub_rz_sat_f : NVPTXBuiltin <" float(float , float)" >;
489+ def __nvvm_sub_rm_ftz_f : NVPTXBuiltin <" float(float , float)" >;
490+ def __nvvm_sub_rm_ftz_sat_f : NVPTXBuiltin <" float(float , float)" >;
491+ def __nvvm_sub_rm_f : NVPTXBuiltin< " float(float, float) " >;
492+ def __nvvm_sub_rm_sat_f : NVPTXBuiltin <" float(float , float)" >;
493+ def __nvvm_sub_rp_ftz_f : NVPTXBuiltin <" float(float , float)" >;
494+ def __nvvm_sub_rp_ftz_sat_f : NVPTXBuiltin <" float(float , float)" >;
495+ def __nvvm_sub_rp_f : NVPTXBuiltin <" float(float , float)" >;
496+ def __nvvm_sub_rp_sat_f : NVPTXBuiltin <" float(float , float)" >;
497+
498+ def __nvvm_sub_rn_d : NVPTXBuiltin< " double(double, double) " >;
499+ def __nvvm_sub_rz_d : NVPTXBuiltin< " double(double, double) " >;
500+ def __nvvm_sub_rm_d : NVPTXBuiltin< " double(double, double) " >;
501+ def __nvvm_sub_rp_d : NVPTXBuiltin< " double(double, double) " >;
526502
527503// Convert
528504
0 commit comments