Skip to content

Commit 69a1c27

Browse files
collect & dedup: add generic to skip u32::MAX-1 during collecting
1 parent fdec69c commit 69a1c27

File tree

6 files changed

+98
-26
lines changed

6 files changed

+98
-26
lines changed

bench/src/bin/paper.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ fn plot() {
147147
.collect_into(v2);
148148
v2.clear();
149149
minimizers::canonical_minimizers_seq_simd(packed_seq, &can_hasher, w, &mut cache)
150-
.collect_and_dedup_into(v2);
150+
.collect_and_dedup_into::<false>(v2);
151151
v2.clear();
152152
}
153153

src/collect.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::{
88
mem::transmute,
99
};
1010

11-
use crate::S;
11+
use crate::{S, minimizers::SKIPPED};
1212
use packed_seq::{ChunkIt, L, PaddedIt, intrinsics::transpose};
1313
use wide::u32x8;
1414

@@ -76,9 +76,9 @@ pub fn collect_and_dedup_with_index_into_scalar(
7676

7777
pub trait CollectAndDedup: Sized {
7878
/// Convenience wrapper around `collect_and_dedup_into`.
79-
fn collect_and_dedup<const SUPER: bool>(self) -> Vec<u32> {
79+
fn collect_and_dedup<const SKIP_MAX: bool>(self) -> Vec<u32> {
8080
let mut v = vec![];
81-
self.collect_and_dedup_into(&mut v);
81+
self.collect_and_dedup_into::<SKIP_MAX>(&mut v);
8282
v
8383
}
8484

@@ -95,8 +95,8 @@ pub trait CollectAndDedup: Sized {
9595
///
9696
/// The output is simply the deduplicated input values.
9797
#[inline(always)]
98-
fn collect_and_dedup_into(self, out_vec: &mut Vec<u32>) {
99-
self.collect_and_dedup_into_impl::<false>(out_vec, &mut vec![]);
98+
fn collect_and_dedup_into<const SKIP_MAX: bool>(self, out_vec: &mut Vec<u32>) {
99+
self.collect_and_dedup_into_impl::<false, SKIP_MAX>(out_vec, &mut vec![]);
100100
}
101101

102102
/// Collect a SIMD-iterator into a single vector, and duplicate adjacent equal elements.
@@ -105,15 +105,15 @@ pub trait CollectAndDedup: Sized {
105105
/// The deduplicated input values are written in `out_vec` and the index of the stream it first appeared, i.e., the start of its super-k-mer, is written in `idx_vec`.
106106
#[inline(always)]
107107
fn collect_and_dedup_with_index_into(self, out_vec: &mut Vec<u32>, idx_vec: &mut Vec<u32>) {
108-
self.collect_and_dedup_into_impl::<true>(out_vec, idx_vec);
108+
self.collect_and_dedup_into_impl::<true, false>(out_vec, idx_vec);
109109
}
110110

111111
/// Collect a SIMD-iterator into a single vector, and duplicate adjacent equal elements.
112112
/// Works by taking 8 elements from each stream, and then transposing the SIMD-matrix before writing out the results.
113113
///
114114
/// By default (when `SUPER` is false), the deduplicated input values are written in `out_vec`.
115115
/// When `SUPER` is true, the index of the stream in which the input value first appeared, i.e., the start of its super-k-mer, is additionale written in `idx_vec`.
116-
fn collect_and_dedup_into_impl<const SUPER: bool>(
116+
fn collect_and_dedup_into_impl<const SUPER: bool, const SKIP_MAX: bool>(
117117
self,
118118
out_vec: &mut Vec<u32>,
119119
idx_vec: &mut Vec<u32>,
@@ -126,7 +126,7 @@ thread_local! {
126126

127127
impl<I: ChunkIt<u32x8>> CollectAndDedup for PaddedIt<I> {
128128
#[inline(always)]
129-
fn collect_and_dedup_into_impl<const SUPER: bool>(
129+
fn collect_and_dedup_into_impl<const SUPER: bool, const SKIP_MAX: bool>(
130130
self,
131131
out_vec: &mut Vec<u32>,
132132
idx_vec: &mut Vec<u32>,
@@ -207,7 +207,7 @@ impl<I: ChunkIt<u32x8>> CollectAndDedup for PaddedIt<I> {
207207
&mut write_idx[j],
208208
);
209209
} else {
210-
crate::intrinsics::append_unique_vals(
210+
crate::intrinsics::append_unique_vals::<SKIP_MAX>(
211211
old[j],
212212
lane,
213213
lane,
@@ -237,7 +237,7 @@ impl<I: ChunkIt<u32x8>> CollectAndDedup for PaddedIt<I> {
237237
for j in 0..8 {
238238
let lane = t[j].as_array_ref();
239239
for (p, x) in lane.iter().take(k).enumerate() {
240-
if v[j].last() != Some(x) {
240+
if v[j].last() != Some(x) && (!SKIP_MAX || *x != SKIPPED) {
241241
v[j].push(*x);
242242
if SUPER {
243243
v2[j].push(

src/intrinsics/dedup.rs

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,26 @@
11
use crate::S;
2+
use crate::minimizers::SKIPPED;
23
use core::mem::transmute;
34
use packed_seq::L;
45

56
/// Dedup adjacent `new` values (starting with the last element of `old`).
67
/// If an element is different from the preceding element, append the corresponding element of `vals` to `v[write_idx]`.
78
#[inline(always)]
89
#[cfg(not(any(target_feature = "avx2", target_feature = "neon")))]
9-
pub unsafe fn append_unique_vals(old: S, new: S, vals: S, v: &mut [u32], write_idx: &mut usize) {
10+
pub unsafe fn append_unique_vals<const SKIP_MAX: bool>(
11+
old: S,
12+
new: S,
13+
vals: S,
14+
v: &mut [u32],
15+
write_idx: &mut usize,
16+
) {
1017
unsafe {
1118
let old = old.to_array();
1219
let new = new.to_array();
1320
let vals = vals.to_array();
1421
let mut prec = old[7];
1522
for (i, &curr) in new.iter().enumerate() {
16-
if curr != prec {
23+
if curr != prec && cur != SKIPPED {
1724
v.as_mut_ptr().add(*write_idx).write(vals[i]);
1825
*write_idx += 1;
1926
prec = curr;
@@ -26,7 +33,7 @@ pub unsafe fn append_unique_vals(old: S, new: S, vals: S, v: &mut [u32], write_i
2633
/// If an element is different from the preceding element, append the corresponding element of `vals` to `v[write_idx]` and `vals2` to `v2[write_idx]`.
2734
#[inline(always)]
2835
#[cfg(not(any(target_feature = "avx2", target_feature = "neon")))]
29-
pub unsafe fn append_unique_vals_2(
36+
pub unsafe fn append_unique_vals_2<const SKIP_MAX: bool>(
3037
old: S,
3138
new: S,
3239
vals: S,
@@ -60,19 +67,29 @@ pub unsafe fn append_unique_vals_2(
6067
/// <https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/edfd0e8b809d9a57527a7990c4bb44b9d1d05a69/2017/04/10/removeduplicates.cpp>
6168
#[cfg(target_feature = "avx2")]
6269
#[inline(always)]
63-
pub unsafe fn append_unique_vals(old: S, new: S, vals: S, v: &mut [u32], write_idx: &mut usize) {
70+
pub unsafe fn append_unique_vals<const SKIP_MAX: bool>(
71+
old: S,
72+
new: S,
73+
vals: S,
74+
v: &mut [u32],
75+
write_idx: &mut usize,
76+
) {
6477
unsafe {
6578
use core::arch::x86_64::*;
6679

6780
let old = transmute(old);
68-
let new = transmute(new);
6981
let vals = transmute(vals);
7082

71-
let recon = _mm256_blend_epi32(old, new, 0b01111111);
83+
let recon = _mm256_blend_epi32(old, transmute(new), 0b01111111);
7284
let movebyone_mask = _mm256_set_epi32(6, 5, 4, 3, 2, 1, 0, 7); // rotate shuffle
73-
let vec_tmp = _mm256_permutevar8x32_epi32(recon, movebyone_mask);
85+
let vec_tmp: S = transmute(_mm256_permutevar8x32_epi32(recon, movebyone_mask));
7486

75-
let m = _mm256_movemask_ps(transmute(_mm256_cmpeq_epi32(vec_tmp, new))) as usize;
87+
let mut m = vec_tmp.cmp_eq(new);
88+
if SKIP_MAX {
89+
// skip everything equal to prev, or equal to MAX.
90+
m |= new.cmp_eq(S::splat(SKIPPED));
91+
}
92+
let m = _mm256_movemask_ps(transmute(m)) as usize;
7693
let numberofnewvalues = L - m.count_ones() as usize;
7794
let key = transmute(UNIQSHUF[m]);
7895
let val = _mm256_permutevar8x32_epi32(vals, key);
@@ -129,7 +146,13 @@ pub unsafe fn append_unique_vals_2(
129146
/// <https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/edfd0e8b809d9a57527a7990c4bb44b9d1d05a69/2017/04/10/removeduplicates.cpp>
130147
#[inline(always)]
131148
#[cfg(target_feature = "neon")]
132-
pub unsafe fn append_unique_vals(old: S, new: S, vals: S, v: &mut [u32], write_idx: &mut usize) {
149+
pub unsafe fn append_unique_vals<const SKIP_MAX: bool>(
150+
old: S,
151+
new: S,
152+
vals: S,
153+
v: &mut [u32],
154+
write_idx: &mut usize,
155+
) {
133156
unsafe {
134157
use core::arch::aarch64::{vaddvq_u32, vqtbl2q_u8, vst1_u32_x4};
135158
use wide::u32x4;
@@ -164,7 +187,11 @@ pub unsafe fn append_unique_vals(old: S, new: S, vals: S, v: &mut [u32], write_i
164187
let r2 = vqtbl2q_u8(t, i2);
165188
let prec: S = transmute((r1, r2));
166189

167-
let dup = prec.cmp_eq(new);
190+
let mut dup = prec.cmp_eq(new);
191+
if SKIP_MAX {
192+
dup |= new.cmp_eq(S::splat(SKIPPED));
193+
}
194+
// emulate movemask
168195
let (d1, d2): (u32x4, u32x4) = transmute(dup);
169196
let pow1 = u32x4::new([1, 2, 4, 8]);
170197
let pow2 = u32x4::new([16, 32, 64, 128]);
@@ -555,7 +582,7 @@ mod test {
555582
let start = Instant::now();
556583
for new in chunks {
557584
unsafe {
558-
append_unique_vals(old, new, new, &mut v2, &mut write_idx);
585+
append_unique_vals::<false>(old, new, new, &mut v2, &mut write_idx);
559586
}
560587
old = new;
561588
}

src/lib.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -303,11 +303,10 @@ impl<'h, const CANONICAL: bool, H: KmerHasher> Builder<'h, CANONICAL, H, ()> {
303303
canonical_minimizers_seq_scalar(seq, hasher, self.w, cache),
304304
min_pos,
305305
),
306-
(true, false) => {
307-
minimizers_seq_simd(seq, hasher, self.w, cache).collect_and_dedup_into(min_pos)
308-
}
306+
(true, false) => minimizers_seq_simd(seq, hasher, self.w, cache)
307+
.collect_and_dedup_into::<false>(min_pos),
309308
(true, true) => canonical_minimizers_seq_simd(seq, hasher, self.w, cache)
310-
.collect_and_dedup_into(min_pos),
309+
.collect_and_dedup_into::<false>(min_pos),
311310
});
312311
Output {
313312
k: self.k,

src/minimizers.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ use packed_seq::{Advance, ChunkIt, Delay, PaddedIt, Seq};
1515
use seq_hash::KmerHasher;
1616
use wide::u32x8;
1717

18+
pub const SKIPPED: u32 = u32::MAX - 1;
19+
1820
/// Minimizer position of a single window.
1921
pub fn one_minimizer<'s>(seq: impl Seq<'s>, hasher: &impl KmerHasher) -> usize {
2022
hasher

src/test.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,50 @@ fn collect_and_dedup_with_index_scalar() {
341341
}
342342

343343
#[test]
344+
fn collect_and_dedup_skip_max() {
345+
let x = u32::MAX - 1;
346+
let v = [0, 1, 1, x, 2, 3, x, x, 4].map(S::splat);
347+
348+
let mut out = vec![];
349+
PaddedIt {
350+
it: v.iter().copied(),
351+
padding: 0,
352+
}
353+
.collect_and_dedup_into::<false>(&mut out);
354+
assert!(
355+
out.starts_with(&[0, 1, x, 2, 3, x, 4, 0, 1]),
356+
"out: {out:?}"
357+
);
358+
359+
let mut out = vec![];
360+
PaddedIt {
361+
it: v.iter().copied(),
362+
padding: 0,
363+
}
364+
.collect_and_dedup_into::<true>(&mut out);
365+
assert!(out.starts_with(&[0, 1, 2, 3, 4, 0, 1]), "out: {out:?}");
366+
367+
let v = [1, x, x, x, x, x, x, 2, x, x, x, x].map(S::splat);
368+
369+
let mut out = vec![];
370+
PaddedIt {
371+
it: v.iter().copied(),
372+
padding: 0,
373+
}
374+
.collect_and_dedup_into::<false>(&mut out);
375+
assert!(out.starts_with(&[1, x, 2, x, 1, x]), "out: {out:?}");
376+
377+
let mut out = vec![];
378+
PaddedIt {
379+
it: v.iter().copied(),
380+
padding: 0,
381+
}
382+
.collect_and_dedup_into::<true>(&mut out);
383+
assert!(out.starts_with(&[1, 2, 1, 2]), "out: {out:?}");
384+
}
385+
386+
#[test]
387+
#[allow(unused)]
344388
fn readme_example() {
345389
use packed_seq::{PackedSeqVec, SeqVec};
346390

0 commit comments

Comments
 (0)