From 346ff4d3b2dc542109759651771302c2592380a8 Mon Sep 17 00:00:00 2001 From: VsevolodZakharov Date: Thu, 21 Jul 2022 10:45:20 +0300 Subject: [PATCH 1/7] Queries binding --- src/query.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/src/query.rs b/src/query.rs index 40e43829..072f3565 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,5 +1,6 @@ use pyo3::prelude::*; use tantivy as tv; +use tv::schema::{Term, Field}; /// Tantivy's Query #[pyclass] @@ -8,14 +9,61 @@ pub(crate) struct Query { } impl Query { - pub(crate) fn get(&self) -> &dyn tv::query::Query { + pub fn get(&self) -> &dyn tv::query::Query { &self.inner } } #[pymethods] impl Query { + #[staticmethod] + fn term(field_id: u32, text: &str) -> Query { + let term = Term::from_field_text(Field::from_field_id(field_id), text); + Query { inner: Box::new( tv::query::TermQuery::new(term, tv::schema::IndexRecordOption::Basic) ) } + } + + #[staticmethod] + fn fuzzy_term(field_id: u32, distance: u8, text: &str) -> Query { + let ftq = tv::query::FuzzyTermQuery::new( + Term::from_field_text(Field::from_field_id(field_id), text), + distance, + true + ); + Query { inner: (Box::new(ftq)) } + } + + #[staticmethod] + fn regex(field_id: u32, pattern: &str) -> PyResult { + let rq = tv::query::RegexQuery::from_pattern(pattern, Field::from_field_id(field_id)); + match rq { + Ok(r) => Ok(Query { inner: Box::new( r ) }), + Err(_) => Err(pyo3::exceptions::PyValueError::new_err("RegEx syntax error")) + } + } + + #[staticmethod] + fn phrase(field_id: u32, words: Vec<&str>) -> Query { + let terms = words.iter().map(|&w| Term::from_field_text(Field::from_field_id(field_id), w)).collect::>(); + Query { inner: Box::new( tv::query::PhraseQuery::new(terms) ) } + } + + #[staticmethod] + fn boost(q : &Query, boost : f32) -> Query { + let bq = tv::query::BoostQuery::new(q.get().box_clone(), boost); + Query { inner: Box::new(bq) } + } + + #[staticmethod] + fn and(q1 : &Query, q2 : &Query) -> Query { + Query { inner: Box::new( tv::query::BooleanQuery::intersection(vec![q1.get().box_clone(), q2.get().box_clone()])) } + } + + #[staticmethod] + fn or(q1 : &Query, q2 : &Query) -> Query { + Query { inner: Box::new( tv::query::BooleanQuery::union(vec![q1.get().box_clone(), q2.get().box_clone()])) } + } + fn __repr__(&self) -> PyResult { - Ok(format!("Query({:?})", self.get())) + Ok(format!("{:?}", self.get())) } } From bd0862f7b33d09d57648806b3e2d6ac40523aca1 Mon Sep 17 00:00:00 2001 From: VsevolodZakharov Date: Thu, 21 Jul 2022 10:59:19 +0300 Subject: [PATCH 2/7] rustfmt --- Cargo.toml | 2 +- src/query.rs | 59 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 84a1bea6..6df5a418 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy" -version = "0.17.0" +version = "0.17.1" readme = "README.md" authors = ["Damir Jelić "] edition = "2018" diff --git a/src/query.rs b/src/query.rs index 072f3565..9f491609 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,6 +1,6 @@ use pyo3::prelude::*; use tantivy as tv; -use tv::schema::{Term, Field}; +use tv::schema::{Field, Term}; /// Tantivy's Query #[pyclass] @@ -19,7 +19,12 @@ impl Query { #[staticmethod] fn term(field_id: u32, text: &str) -> Query { let term = Term::from_field_text(Field::from_field_id(field_id), text); - Query { inner: Box::new( tv::query::TermQuery::new(term, tv::schema::IndexRecordOption::Basic) ) } + Query { + inner: Box::new(tv::query::TermQuery::new( + term, + tv::schema::IndexRecordOption::Basic, + )), + } } #[staticmethod] @@ -27,40 +32,64 @@ impl Query { let ftq = tv::query::FuzzyTermQuery::new( Term::from_field_text(Field::from_field_id(field_id), text), distance, - true + true, ); - Query { inner: (Box::new(ftq)) } + Query { + inner: (Box::new(ftq)), + } } #[staticmethod] fn regex(field_id: u32, pattern: &str) -> PyResult { - let rq = tv::query::RegexQuery::from_pattern(pattern, Field::from_field_id(field_id)); + let rq = tv::query::RegexQuery::from_pattern( + pattern, + Field::from_field_id(field_id), + ); match rq { - Ok(r) => Ok(Query { inner: Box::new( r ) }), - Err(_) => Err(pyo3::exceptions::PyValueError::new_err("RegEx syntax error")) + Ok(r) => Ok(Query { inner: Box::new(r) }), + Err(_) => Err(pyo3::exceptions::PyValueError::new_err( + "RegEx syntax error", + )), } } #[staticmethod] fn phrase(field_id: u32, words: Vec<&str>) -> Query { - let terms = words.iter().map(|&w| Term::from_field_text(Field::from_field_id(field_id), w)).collect::>(); - Query { inner: Box::new( tv::query::PhraseQuery::new(terms) ) } + let terms = words + .iter() + .map(|&w| Term::from_field_text(Field::from_field_id(field_id), w)) + .collect::>(); + Query { + inner: Box::new(tv::query::PhraseQuery::new(terms)), + } } #[staticmethod] - fn boost(q : &Query, boost : f32) -> Query { + fn boost(q: &Query, boost: f32) -> Query { let bq = tv::query::BoostQuery::new(q.get().box_clone(), boost); - Query { inner: Box::new(bq) } + Query { + inner: Box::new(bq), + } } #[staticmethod] - fn and(q1 : &Query, q2 : &Query) -> Query { - Query { inner: Box::new( tv::query::BooleanQuery::intersection(vec![q1.get().box_clone(), q2.get().box_clone()])) } + fn and(q1: &Query, q2: &Query) -> Query { + Query { + inner: Box::new(tv::query::BooleanQuery::intersection(vec![ + q1.get().box_clone(), + q2.get().box_clone(), + ])), + } } #[staticmethod] - fn or(q1 : &Query, q2 : &Query) -> Query { - Query { inner: Box::new( tv::query::BooleanQuery::union(vec![q1.get().box_clone(), q2.get().box_clone()])) } + fn or(q1: &Query, q2: &Query) -> Query { + Query { + inner: Box::new(tv::query::BooleanQuery::union(vec![ + q1.get().box_clone(), + q2.get().box_clone(), + ])), + } } fn __repr__(&self) -> PyResult { From 7d2976be8d3c2e117c8365043d583829c11c352e Mon Sep 17 00:00:00 2001 From: VsevolodZakharov Date: Thu, 21 Jul 2022 11:36:42 +0300 Subject: [PATCH 3/7] Add range query --- README.md | 4 ++++ src/query.rs | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/README.md b/README.md index 3fe76192..2f291277 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,10 @@ Running the tests is done using: make test +Building python wheel + pip install maturin + maturin build + # Usage The Python bindings have a similar API to Tantivy. To create a index first a schema diff --git a/src/query.rs b/src/query.rs index 9f491609..8689fe6c 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,4 +1,5 @@ use pyo3::prelude::*; +use std::ops::Bound; use tantivy as tv; use tv::schema::{Field, Term}; @@ -92,6 +93,17 @@ impl Query { } } + #[staticmethod] + fn range(field_id: u32, left: &str, right: &str) -> Query { + Query { + inner: Box::new(tv::query::RangeQuery::new_str_bounds( + Field::from_field_id(field_id), + Bound::Included(left), + Bound::Included(right), + )), + } + } + fn __repr__(&self) -> PyResult { Ok(format!("{:?}", self.get())) } From ba42af4bb182e49d2f4a5e9e4bedde92b0524fe6 Mon Sep 17 00:00:00 2001 From: VsevolodZakharov Date: Thu, 21 Jul 2022 11:44:10 +0300 Subject: [PATCH 4/7] Use to_pyerr --- src/query.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/query.rs b/src/query.rs index 8689fe6c..aa92ef99 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,3 +1,4 @@ +use crate::to_pyerr; use pyo3::prelude::*; use std::ops::Bound; use tantivy as tv; @@ -45,13 +46,11 @@ impl Query { let rq = tv::query::RegexQuery::from_pattern( pattern, Field::from_field_id(field_id), - ); - match rq { - Ok(r) => Ok(Query { inner: Box::new(r) }), - Err(_) => Err(pyo3::exceptions::PyValueError::new_err( - "RegEx syntax error", - )), - } + ) + .map_err(to_pyerr)?; + Ok(Query { + inner: Box::new(rq), + }) } #[staticmethod] From da0561e8919a1339290d6fb4c3348af29b02ed61 Mon Sep 17 00:00:00 2001 From: VsevolodZakharov Date: Wed, 27 Jul 2022 11:21:45 +0300 Subject: [PATCH 5/7] Export Query --- src/lib.rs | 2 ++ src/query.rs | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0593715c..5280968b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,7 @@ use index::Index; use schema::Schema; use schemabuilder::SchemaBuilder; use searcher::{DocAddress, Searcher}; +use query::Query; /// Python bindings for the search engine library Tantivy. /// @@ -75,6 +76,7 @@ fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/query.rs b/src/query.rs index aa92ef99..842a37d5 100644 --- a/src/query.rs +++ b/src/query.rs @@ -11,7 +11,7 @@ pub(crate) struct Query { } impl Query { - pub fn get(&self) -> &dyn tv::query::Query { + pub(crate) fn get(&self) -> &dyn tv::query::Query { &self.inner } } @@ -73,7 +73,7 @@ impl Query { } #[staticmethod] - fn and(q1: &Query, q2: &Query) -> Query { + fn and_q(q1: &Query, q2: &Query) -> Query { Query { inner: Box::new(tv::query::BooleanQuery::intersection(vec![ q1.get().box_clone(), @@ -83,7 +83,7 @@ impl Query { } #[staticmethod] - fn or(q1: &Query, q2: &Query) -> Query { + fn or_q(q1: &Query, q2: &Query) -> Query { Query { inner: Box::new(tv::query::BooleanQuery::union(vec![ q1.get().box_clone(), @@ -93,7 +93,7 @@ impl Query { } #[staticmethod] - fn range(field_id: u32, left: &str, right: &str) -> Query { + fn range_q(field_id: u32, left: &str, right: &str) -> Query { Query { inner: Box::new(tv::query::RangeQuery::new_str_bounds( Field::from_field_id(field_id), @@ -104,6 +104,6 @@ impl Query { } fn __repr__(&self) -> PyResult { - Ok(format!("{:?}", self.get())) + Ok(format!("{:#?}", self.get())) } } From a32c63beb39cf8e03962dc0f5c6d5f0f49663592 Mon Sep 17 00:00:00 2001 From: VsevolodZakharov Date: Wed, 27 Jul 2022 13:30:37 +0300 Subject: [PATCH 6/7] Lists for boolean queries --- src/query.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/query.rs b/src/query.rs index 842a37d5..b1864c8a 100644 --- a/src/query.rs +++ b/src/query.rs @@ -73,22 +73,20 @@ impl Query { } #[staticmethod] - fn and_q(q1: &Query, q2: &Query) -> Query { + fn and_q(qs : Vec>) -> Query { Query { - inner: Box::new(tv::query::BooleanQuery::intersection(vec![ - q1.get().box_clone(), - q2.get().box_clone(), - ])), + inner: Box::new(tv::query::BooleanQuery::intersection( + qs.iter().map(|q| q.get().box_clone()).collect::>() + )) } } #[staticmethod] - fn or_q(q1: &Query, q2: &Query) -> Query { + fn or_q(qs : Vec>) -> Query { Query { - inner: Box::new(tv::query::BooleanQuery::union(vec![ - q1.get().box_clone(), - q2.get().box_clone(), - ])), + inner: Box::new(tv::query::BooleanQuery::union( + qs.iter().map(|q| q.get().box_clone()).collect::>() + )) } } From ed329fb74fe85a5524e27707fa087b5ba65a6325 Mon Sep 17 00:00:00 2001 From: VsevolodZakharov Date: Mon, 8 Aug 2022 14:10:35 +0300 Subject: [PATCH 7/7] Fix PyO3 version to work with 'python:3.10' docker base image --- Cargo.toml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6df5a418..16560fd0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ name = "tantivy" crate-type = ["cdylib"] [build-dependencies] -pyo3-build-config = "0.16.3" +pyo3-build-config = "0.15.0" [dependencies] chrono = "0.4.19" @@ -19,7 +19,9 @@ tantivy = "0.17" itertools = "0.10.3" futures = "0.3.21" serde_json = "1.0.64" +serde = "1.0" +serde_derive = "1.0" [dependencies.pyo3] -version = "0.16.3" +version = "0.15.0" features = ["extension-module"]