Skip to content

Commit b9d7834

Browse files
committed
Fix: Jensen-Shannon masked accumulation
1 parent 8f3ef10 commit b9d7834

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

include/simsimd/probability.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -491,8 +491,8 @@ SIMSIMD_PUBLIC void simsimd_js_f32_skylake(simsimd_f32_t const *a, simsimd_f32_t
491491
__m512 ratio_b_vec = _mm512_mul_ps(_mm512_add_ps(b_vec, epsilon_vec), m_recip_approx);
492492
__m512 log_ratio_a_vec = _simsimd_log2_f32_skylake(ratio_a_vec);
493493
__m512 log_ratio_b_vec = _simsimd_log2_f32_skylake(ratio_b_vec);
494-
sum_a_vec = _mm512_maskz_fmadd_ps(nonzero_mask, a_vec, log_ratio_a_vec, sum_a_vec);
495-
sum_b_vec = _mm512_maskz_fmadd_ps(nonzero_mask, b_vec, log_ratio_b_vec, sum_b_vec);
494+
sum_a_vec = _mm512_mask3_fmadd_ps(a_vec, log_ratio_a_vec, sum_a_vec, nonzero_mask);
495+
sum_b_vec = _mm512_mask3_fmadd_ps(b_vec, log_ratio_b_vec, sum_b_vec, nonzero_mask);
496496
if (n) goto simsimd_js_f32_skylake_cycle;
497497

498498
simsimd_f32_t log2_normalizer = 0.693147181f;
@@ -584,8 +584,8 @@ SIMSIMD_PUBLIC void simsimd_js_f16_sapphire(simsimd_f16_t const *a, simsimd_f16_
584584
__m512h ratio_b_vec = _mm512_mul_ph(_mm512_add_ph(b_vec, epsilon_vec), m_recip_approx);
585585
__m512h log_ratio_a_vec = _simsimd_log2_f16_sapphire(ratio_a_vec);
586586
__m512h log_ratio_b_vec = _simsimd_log2_f16_sapphire(ratio_b_vec);
587-
sum_a_vec = _mm512_maskz_fmadd_ph(nonzero_mask, a_vec, log_ratio_a_vec, sum_a_vec);
588-
sum_b_vec = _mm512_maskz_fmadd_ph(nonzero_mask, b_vec, log_ratio_b_vec, sum_b_vec);
587+
sum_a_vec = _mm512_mask3_fmadd_ph(a_vec, log_ratio_a_vec, sum_a_vec, nonzero_mask);
588+
sum_b_vec = _mm512_mask3_fmadd_ph(b_vec, log_ratio_b_vec, sum_b_vec, nonzero_mask);
589589
if (n) goto simsimd_js_f16_sapphire_cycle;
590590

591591
simsimd_f32_t log2_normalizer = 0.693147181f;

0 commit comments

Comments
 (0)