@@ -491,8 +491,8 @@ SIMSIMD_PUBLIC void simsimd_js_f32_skylake(simsimd_f32_t const *a, simsimd_f32_t
491491 __m512 ratio_b_vec = _mm512_mul_ps (_mm512_add_ps (b_vec , epsilon_vec ), m_recip_approx );
492492 __m512 log_ratio_a_vec = _simsimd_log2_f32_skylake (ratio_a_vec );
493493 __m512 log_ratio_b_vec = _simsimd_log2_f32_skylake (ratio_b_vec );
494- sum_a_vec = _mm512_maskz_fmadd_ps ( nonzero_mask , a_vec , log_ratio_a_vec , sum_a_vec );
495- sum_b_vec = _mm512_maskz_fmadd_ps ( nonzero_mask , b_vec , log_ratio_b_vec , sum_b_vec );
494+ sum_a_vec = _mm512_mask3_fmadd_ps ( a_vec , log_ratio_a_vec , sum_a_vec , nonzero_mask );
495+ sum_b_vec = _mm512_mask3_fmadd_ps ( b_vec , log_ratio_b_vec , sum_b_vec , nonzero_mask );
496496 if (n ) goto simsimd_js_f32_skylake_cycle ;
497497
498498 simsimd_f32_t log2_normalizer = 0.693147181f ;
@@ -584,8 +584,8 @@ SIMSIMD_PUBLIC void simsimd_js_f16_sapphire(simsimd_f16_t const *a, simsimd_f16_
584584 __m512h ratio_b_vec = _mm512_mul_ph (_mm512_add_ph (b_vec , epsilon_vec ), m_recip_approx );
585585 __m512h log_ratio_a_vec = _simsimd_log2_f16_sapphire (ratio_a_vec );
586586 __m512h log_ratio_b_vec = _simsimd_log2_f16_sapphire (ratio_b_vec );
587- sum_a_vec = _mm512_maskz_fmadd_ph ( nonzero_mask , a_vec , log_ratio_a_vec , sum_a_vec );
588- sum_b_vec = _mm512_maskz_fmadd_ph ( nonzero_mask , b_vec , log_ratio_b_vec , sum_b_vec );
587+ sum_a_vec = _mm512_mask3_fmadd_ph ( a_vec , log_ratio_a_vec , sum_a_vec , nonzero_mask );
588+ sum_b_vec = _mm512_mask3_fmadd_ph ( b_vec , log_ratio_b_vec , sum_b_vec , nonzero_mask );
589589 if (n ) goto simsimd_js_f16_sapphire_cycle ;
590590
591591 simsimd_f32_t log2_normalizer = 0.693147181f ;
0 commit comments