feat: add chunk_capacity CLI option

apepkuss · apepkuss · commit e91c2a1ff525 · 2024-04-10T13:19:19.000+08:00
Signed-off-by: Xin Liu &lt;sam@secondstate.io&gt;
diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs
@@ -713,7 +713,7 @@ pub(crate) async fn chunks_handler(mut req: Request<Body>) -> Result<Response<Bo
         ));
     }
 
-    match llama_core::rag::chunk_text(&contents, extension) {
+    match llama_core::rag::chunk_text(&contents, extension, chunks_request.chunk_capacity) {
         Ok(chunks) => {
             let chunks_response = ChunksResponse {
                 id: chunks_request.id,
@@ -745,7 +745,10 @@ pub(crate) async fn chunks_handler(mut req: Request<Body>) -> Result<Response<Bo
     }
 }
 
-pub(crate) async fn doc_to_embeddings(req: Request<Body>) -> Result<Response<Body>, hyper::Error> {
+pub(crate) async fn doc_to_embeddings(
+    req: Request<Body>,
+    chunk_capacity: usize,
+) -> Result<Response<Body>, hyper::Error> {
     // upload the target rag document
     let file_object = if req.method() == Method::POST {
         let boundary = "boundary=";
@@ -907,7 +910,7 @@ pub(crate) async fn doc_to_embeddings(req: Request<Body>) -> Result<Response<Bod
             ));
         }
 
-        match llama_core::rag::chunk_text(&contents, extension) {
+        match llama_core::rag::chunk_text(&contents, extension, chunk_capacity) {
             Ok(chunks) => chunks,
             Err(e) => return error::internal_server_error(e.to_string()),
         }
diff --git a/src/backend/mod.rs b/src/backend/mod.rs
@@ -5,6 +5,7 @@ use hyper::{Body, Request, Response};
 
 pub(crate) async fn handle_llama_request(
     req: Request<Body>,
+    chunk_capacity: usize,
 ) -> Result<Response<Body>, hyper::Error> {
     match req.uri().path() {
         "/v1/chat/completions" => match QDRANT_CONFIG.get() {
@@ -18,7 +19,7 @@ pub(crate) async fn handle_llama_request(
         },
         "/v1/files" => ggml::files_handler(req).await,
         "/v1/chunks" => ggml::chunks_handler(req).await,
-        "/v1/create/rag" => ggml::doc_to_embeddings(req).await,
+        "/v1/create/rag" => ggml::doc_to_embeddings(req, chunk_capacity).await,
         _ => error::invalid_endpoint(req.uri().path()),
     }
 }
diff --git a/src/main.rs b/src/main.rs
@@ -72,12 +72,15 @@ struct Cli {
     /// Name of Qdrant collection
     #[arg(long, default_value = "default")]
     qdrant_collection_name: String,
-    /// Max number of retrieved result
-    #[arg(long, default_value = "3", value_parser = clap::value_parser!(u64))]
+    /// Max number of retrieved result (no less than 1)
+    #[arg(long, default_value = "5", value_parser = clap::value_parser!(u64))]
     qdrant_limit: u64,
     /// Minimal score threshold for the search result
     #[arg(long, default_value = "0.4", value_parser = clap::value_parser!(f32))]
     qdrant_score_threshold: f32,
+    /// Maximum number of tokens each chunk contains
+    #[arg(long, default_value = "100", value_parser = clap::value_parser!(usize))]
+    chunk_capacity: usize,
     /// Print prompt strings to stdout
     #[arg(long)]
     log_prompts: bool,
@@ -181,6 +184,10 @@ async fn main() -> Result<(), ServerError> {
         .set(qdrant_config)
         .map_err(|_| ServerError::Operation("Failed to set `QDRANT_CONFIG`.".to_string()))?;
 
+    log(format!(
+        "[INFO] Chunk capacity (in tokens): {}",
+        &cli.chunk_capacity
+    ));
     log(format!("[INFO] Enable prompt log: {}", &cli.log_prompts));
     log(format!("[INFO] Enable plugin log: {}", &cli.log_stat));
     log(format!("[INFO] Socket address: {}", &cli.socket_addr));
@@ -230,8 +237,13 @@ async fn main() -> Result<(), ServerError> {
 
     let new_service = make_service_fn(move |_| {
         let web_ui = cli.web_ui.to_string_lossy().to_string();
+        let chunk_capacity = cli.chunk_capacity;
 
-        async move { Ok::<_, Error>(service_fn(move |req| handle_request(req, web_ui.clone()))) }
+        async move {
+            Ok::<_, Error>(service_fn(move |req| {
+                handle_request(req, chunk_capacity, web_ui.clone())
+            }))
+        }
     });
 
     // socket address
@@ -255,6 +267,7 @@ async fn main() -> Result<(), ServerError> {
 
 async fn handle_request(
     req: Request<Body>,
+    chunk_capacity: usize,
     web_ui: String,
 ) -> Result<Response<Body>, hyper::Error> {
     let path_str = req.uri().path();
@@ -266,7 +279,7 @@ async fn handle_request(
 
     match root_path.as_str() {
         "/echo" => Ok(Response::new(Body::from("echo test"))),
-        "/v1" => backend::handle_llama_request(req).await,
+        "/v1" => backend::handle_llama_request(req, chunk_capacity).await,
         _ => Ok(static_response(path_str, web_ui)),
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -713,7 +713,7 @@ pub(crate) async fn chunks_handler(mut req: Request<Body>) -> Result<Response<Bo`
`713`	`713`	`));`
`714`	`714`	`}`
`715`	`715`
`716`		`- match llama_core::rag::chunk_text(&contents, extension) {`
	`716`	`+ match llama_core::rag::chunk_text(&contents, extension, chunks_request.chunk_capacity) {`
`717`	`717`	`Ok(chunks) => {`
`718`	`718`	`let chunks_response = ChunksResponse {`
`719`	`719`	`id: chunks_request.id,`
`@@ -745,7 +745,10 @@ pub(crate) async fn chunks_handler(mut req: Request<Body>) -> Result<Response<Bo`
`745`	`745`	`}`
`746`	`746`	`}`
`747`	`747`
`748`		`-pub(crate) async fn doc_to_embeddings(req: Request<Body>) -> Result<Response<Body>, hyper::Error> {`
	`748`	`+pub(crate) async fn doc_to_embeddings(`
	`749`	`+ req: Request<Body>,`
	`750`	`+ chunk_capacity: usize,`
	`751`	`+) -> Result<Response<Body>, hyper::Error> {`
`749`	`752`	`// upload the target rag document`
`750`	`753`	`let file_object = if req.method() == Method::POST {`
`751`	`754`	`let boundary = "boundary=";`
`@@ -907,7 +910,7 @@ pub(crate) async fn doc_to_embeddings(req: Request<Body>) -> Result<Response<Bod`
`907`	`910`	`));`
`908`	`911`	`}`
`909`	`912`
`910`		`- match llama_core::rag::chunk_text(&contents, extension) {`
	`913`	`+ match llama_core::rag::chunk_text(&contents, extension, chunk_capacity) {`
`911`	`914`	`Ok(chunks) => chunks,`
`912`	`915`	`Err(e) => return error::internal_server_error(e.to_string()),`
`913`	`916`	`}`