Skip to content

Commit 6110ae6

Browse files
committed
Add PgTsVector type and implement deserialization
This commit introduces the `PgTsVector`, `PgTsVectorEntry` types. We also add an implementation of `FromSql<TsVector, Pg> for PgTsVector` for deserialization. Two new tests were also added to check deserializing tsvectors with and without lexeme positions.
1 parent 1d30bee commit 6110ae6

File tree

2 files changed

+176
-1
lines changed

2 files changed

+176
-1
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ repository = "https://github.com/diesel-rs/diesel_full_text_search"
77
edition = "2021"
88

99
[dependencies]
10+
byteorder = "1.5.0"
1011
diesel = { version = "~2.2.0", features = ["postgres_backend"], default-features = false }
1112

1213
[features]

src/lib.rs

Lines changed: 175 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
mod types {
2-
use diesel::sql_types::*;
2+
use std::io::{BufRead, Cursor};
3+
4+
use byteorder::{NetworkEndian, ReadBytesExt};
5+
use diesel::{deserialize::FromSql, pg::Pg, sql_types::*, Queryable};
36

47
#[derive(Clone, Copy, SqlType)]
58
#[diesel(postgres_type(oid = 3615, array_oid = 3645))]
@@ -18,6 +21,70 @@ mod types {
1821
#[derive(SqlType)]
1922
#[diesel(postgres_type(name = "regconfig"))]
2023
pub struct RegConfig;
24+
25+
impl FromSql<TsVector, Pg> for PgTsVector {
26+
fn from_sql(
27+
bytes: <Pg as diesel::backend::Backend>::RawValue<'_>,
28+
) -> diesel::deserialize::Result<Self> {
29+
let mut cursor = Cursor::new(bytes.as_bytes());
30+
31+
// From Postgres `tsvector.c`:
32+
//
33+
// The binary format is as follows:
34+
//
35+
// uint32 number of lexemes
36+
//
37+
// for each lexeme:
38+
// lexeme text in client encoding, null-terminated
39+
// uint16 number of positions
40+
// for each position:
41+
// uint16 WordEntryPos
42+
43+
// Number of lexemes (uint32)
44+
let num_lexemes = cursor.read_u32::<NetworkEndian>()?;
45+
46+
let mut entries = Vec::with_capacity(num_lexemes as usize);
47+
48+
for _ in 0..num_lexemes {
49+
let mut lexeme = Vec::new();
50+
cursor.read_until(0, &mut lexeme)?;
51+
// Remove null terminator
52+
lexeme.pop();
53+
let lexeme = String::from_utf8(lexeme)?;
54+
55+
// Number of positions (uint16)
56+
let num_positions = cursor.read_u16::<NetworkEndian>()?;
57+
58+
let mut positions = Vec::with_capacity(num_positions as usize);
59+
for _ in 0..num_positions {
60+
positions.push(cursor.read_u16::<NetworkEndian>()?);
61+
}
62+
63+
entries.push(PgTsVectorEntry { lexeme, positions });
64+
}
65+
66+
Ok(PgTsVector { entries })
67+
}
68+
}
69+
70+
impl Queryable<TsVector, Pg> for PgTsVector {
71+
type Row = Self;
72+
73+
fn build(row: Self::Row) -> diesel::deserialize::Result<Self> {
74+
Ok(row)
75+
}
76+
}
77+
78+
#[derive(Debug, Clone, PartialEq)]
79+
pub struct PgTsVector {
80+
pub entries: Vec<PgTsVectorEntry>,
81+
}
82+
83+
#[derive(Debug, Clone, PartialEq)]
84+
pub struct PgTsVectorEntry {
85+
pub lexeme: String,
86+
pub positions: Vec<u16>,
87+
}
2188
}
2289

2390
pub mod configuration {
@@ -219,3 +286,110 @@ mod dsl {
219286
pub use self::dsl::*;
220287
pub use self::functions::*;
221288
pub use self::types::*;
289+
290+
#[cfg(test)]
291+
mod tests {
292+
use super::*;
293+
294+
use diesel::dsl::sql;
295+
use diesel::pg::PgConnection;
296+
use diesel::prelude::*;
297+
298+
#[test]
299+
fn test_tsvector_from_sql_with_positions() {
300+
let database_url = std::env::var("DATABASE_URL").expect("DATABASE_URL must be set");
301+
let mut conn =
302+
PgConnection::establish(&database_url).expect("Error connecting to database");
303+
304+
let query = diesel::select(sql::<TsVector>(
305+
"to_tsvector('a fat cat sat on a mat and ate a fat rat')",
306+
));
307+
let result: PgTsVector = query.get_result(&mut conn).expect("Error executing query");
308+
309+
let expected = PgTsVector {
310+
entries: vec![
311+
PgTsVectorEntry {
312+
lexeme: "ate".to_owned(),
313+
positions: vec![9],
314+
},
315+
PgTsVectorEntry {
316+
lexeme: "cat".to_owned(),
317+
positions: vec![3],
318+
},
319+
PgTsVectorEntry {
320+
lexeme: "fat".to_owned(),
321+
positions: vec![2, 11],
322+
},
323+
PgTsVectorEntry {
324+
lexeme: "mat".to_owned(),
325+
positions: vec![7],
326+
},
327+
PgTsVectorEntry {
328+
lexeme: "rat".to_owned(),
329+
positions: vec![12],
330+
},
331+
PgTsVectorEntry {
332+
lexeme: "sat".to_owned(),
333+
positions: vec![4],
334+
},
335+
],
336+
};
337+
338+
assert_eq!(expected, result);
339+
}
340+
341+
#[test]
342+
fn test_tsvector_from_sql_without_positions() {
343+
let database_url = std::env::var("DATABASE_URL").expect("DATABASE_URL must be set");
344+
let mut conn =
345+
PgConnection::establish(&database_url).expect("Error connecting to database");
346+
347+
let query = diesel::select(sql::<TsVector>(
348+
"'a fat cat sat on a mat and ate a fat rat'::tsvector",
349+
));
350+
let result: PgTsVector = query.get_result(&mut conn).expect("Error executing query");
351+
352+
let expected = PgTsVector {
353+
entries: vec![
354+
PgTsVectorEntry {
355+
lexeme: "a".to_owned(),
356+
positions: vec![],
357+
},
358+
PgTsVectorEntry {
359+
lexeme: "and".to_owned(),
360+
positions: vec![],
361+
},
362+
PgTsVectorEntry {
363+
lexeme: "ate".to_owned(),
364+
positions: vec![],
365+
},
366+
PgTsVectorEntry {
367+
lexeme: "cat".to_owned(),
368+
positions: vec![],
369+
},
370+
PgTsVectorEntry {
371+
lexeme: "fat".to_owned(),
372+
positions: vec![],
373+
},
374+
PgTsVectorEntry {
375+
lexeme: "mat".to_owned(),
376+
positions: vec![],
377+
},
378+
PgTsVectorEntry {
379+
lexeme: "on".to_owned(),
380+
positions: vec![],
381+
},
382+
PgTsVectorEntry {
383+
lexeme: "rat".to_owned(),
384+
positions: vec![],
385+
},
386+
PgTsVectorEntry {
387+
lexeme: "sat".to_owned(),
388+
positions: vec![],
389+
},
390+
],
391+
};
392+
393+
assert_eq!(expected, result);
394+
}
395+
}

0 commit comments

Comments
 (0)