@@ -72,12 +72,15 @@ struct Cli {
72
72
/// Name of Qdrant collection
73
73
#[ arg( long, default_value = "default" ) ]
74
74
qdrant_collection_name : String ,
75
- /// Max number of retrieved result
76
- #[ arg( long, default_value = "3 " , value_parser = clap:: value_parser!( u64 ) ) ]
75
+ /// Max number of retrieved result (no less than 1)
76
+ #[ arg( long, default_value = "5 " , value_parser = clap:: value_parser!( u64 ) ) ]
77
77
qdrant_limit : u64 ,
78
78
/// Minimal score threshold for the search result
79
79
#[ arg( long, default_value = "0.4" , value_parser = clap:: value_parser!( f32 ) ) ]
80
80
qdrant_score_threshold : f32 ,
81
+ /// Maximum number of tokens each chunk contains
82
+ #[ arg( long, default_value = "100" , value_parser = clap:: value_parser!( usize ) ) ]
83
+ chunk_capacity : usize ,
81
84
/// Print prompt strings to stdout
82
85
#[ arg( long) ]
83
86
log_prompts : bool ,
@@ -181,6 +184,10 @@ async fn main() -> Result<(), ServerError> {
181
184
. set ( qdrant_config)
182
185
. map_err ( |_| ServerError :: Operation ( "Failed to set `QDRANT_CONFIG`." . to_string ( ) ) ) ?;
183
186
187
+ log ( format ! (
188
+ "[INFO] Chunk capacity (in tokens): {}" ,
189
+ & cli. chunk_capacity
190
+ ) ) ;
184
191
log ( format ! ( "[INFO] Enable prompt log: {}" , & cli. log_prompts) ) ;
185
192
log ( format ! ( "[INFO] Enable plugin log: {}" , & cli. log_stat) ) ;
186
193
log ( format ! ( "[INFO] Socket address: {}" , & cli. socket_addr) ) ;
@@ -230,8 +237,13 @@ async fn main() -> Result<(), ServerError> {
230
237
231
238
let new_service = make_service_fn ( move |_| {
232
239
let web_ui = cli. web_ui . to_string_lossy ( ) . to_string ( ) ;
240
+ let chunk_capacity = cli. chunk_capacity ;
233
241
234
- async move { Ok :: < _ , Error > ( service_fn ( move |req| handle_request ( req, web_ui. clone ( ) ) ) ) }
242
+ async move {
243
+ Ok :: < _ , Error > ( service_fn ( move |req| {
244
+ handle_request ( req, chunk_capacity, web_ui. clone ( ) )
245
+ } ) )
246
+ }
235
247
} ) ;
236
248
237
249
// socket address
@@ -255,6 +267,7 @@ async fn main() -> Result<(), ServerError> {
255
267
256
268
async fn handle_request (
257
269
req : Request < Body > ,
270
+ chunk_capacity : usize ,
258
271
web_ui : String ,
259
272
) -> Result < Response < Body > , hyper:: Error > {
260
273
let path_str = req. uri ( ) . path ( ) ;
@@ -266,7 +279,7 @@ async fn handle_request(
266
279
267
280
match root_path. as_str ( ) {
268
281
"/echo" => Ok ( Response :: new ( Body :: from ( "echo test" ) ) ) ,
269
- "/v1" => backend:: handle_llama_request ( req) . await ,
282
+ "/v1" => backend:: handle_llama_request ( req, chunk_capacity ) . await ,
270
283
_ => Ok ( static_response ( path_str, web_ui) ) ,
271
284
}
272
285
}
0 commit comments