Skip to content

Commit 4bf5a8a

Browse files
committed
feat: add --include-usage CLI option
Signed-off-by: Xin Liu <[email protected]>
1 parent 3fa15d7 commit 4bf5a8a

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ Options:
596596
-c, --ctx-size <CTX_SIZE>
597597
Sets context sizes for chat and embedding models, respectively. The sizes are separated by comma without space, for example, '--ctx-size 4096,384'. The first value is for the chat model, and the second is for the embedding model [default: 4096,384]
598598
-p, --prompt-template <PROMPT_TEMPLATE>
599-
Sets prompt templates for chat and embedding models, respectively. The prompt templates are separated by comma without space, for example, '--prompt-template llama-2-chat,embedding'. The first value is for the chat model, and the second is for the embedding model [possible values: llama-2-chat, llama-3-chat, llama-3-tool, mistral-instruct, mistral-tool, mistrallite, mistral-small-chat, openchat, codellama-instruct, codellama-super-instruct, human-assistant, vicuna-1.0-chat, vicuna-1.1-chat, vicuna-llava, chatml, chatml-tool, internlm-2-tool, baichuan-2, wizard-coder, zephyr, stablelm-zephyr, intel-neural, deepseek-chat, deepseek-coder, deepseek-chat-2, deepseek-chat-25, deepseek-chat-3, solar-instruct, phi-2-chat, phi-2-instruct, phi-3-chat, phi-3-instruct, phi-4-chat, gemma-instruct, octopus, glm-4-chat, groq-llama3-tool, mediatek-breeze, nemotron-chat, nemotron-tool, functionary-32, functionary-31, minicpmv, moxin-chat, falcon3, megrez, qwen2-vision, embedding, none]
599+
Sets prompt templates for chat and embedding models, respectively. The prompt templates are separated by comma without space, for example, '--prompt-template llama-2-chat,embedding'. The first value is for the chat model, and the second is for the embedding model [possible values: llama-2-chat, llama-3-chat, llama-3-tool, mistral-instruct, mistral-tool, mistrallite, mistral-small-chat, mistral-small-tool, openchat, codellama-instruct, codellama-super-instruct, human-assistant, vicuna-1.0-chat, vicuna-1.1-chat, vicuna-llava, chatml, chatml-tool, internlm-2-tool, baichuan-2, wizard-coder, zephyr, stablelm-zephyr, intel-neural, deepseek-chat, deepseek-coder, deepseek-chat-2, deepseek-chat-25, deepseek-chat-3, solar-instruct, phi-2-chat, phi-2-instruct, phi-3-chat, phi-3-instruct, phi-4-chat, gemma-instruct, octopus, glm-4-chat, groq-llama3-tool, mediatek-breeze, nemotron-chat, nemotron-tool, functionary-32, functionary-31, minicpmv, moxin-chat, falcon3, megrez, qwen2-vision, embedding, none]
600600
-r, --reverse-prompt <REVERSE_PROMPT>
601601
Halt generation at PROMPT, return control
602602
-n, --n-predict <N_PREDICT>
@@ -637,6 +637,8 @@ Options:
637637
Maximum number of user messages used in the retrieval [default: 1]
638638
--kw-search-url <KW_SEARCH_URL>
639639
URL of the keyword search service
640+
--include-usage
641+
Whether to include usage in the stream response. Defaults to false
640642
--socket-addr <SOCKET_ADDR>
641643
Socket address of LlamaEdge-RAG API Server instance. For example, `0.0.0.0:8080`
642644
--port <PORT>

src/main.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ struct Cli {
131131
/// URL of the keyword search service
132132
#[arg(long)]
133133
kw_search_url: Option<String>,
134+
/// Whether to include usage in the stream response. Defaults to false.
135+
#[arg(long, default_value = "false")]
136+
include_usage: bool,
134137
/// Socket address of LlamaEdge-RAG API Server instance. For example, `0.0.0.0:8080`.
135138
#[arg(long, default_value = None, value_parser = clap::value_parser!(SocketAddr), group = "socket_address_group")]
136139
socket_addr: Option<SocketAddr>,
@@ -420,6 +423,9 @@ async fn main() -> Result<(), ServerError> {
420423
KW_SEARCH_CONFIG.set(kw_search_config).unwrap();
421424
}
422425

426+
// log include_usage
427+
info!(target: "stdout", "include_usage: {}", cli.include_usage);
428+
423429
// create metadata for chat model
424430
let chat_metadata = GgmlMetadataBuilder::new(
425431
cli.model_name[0].clone(),
@@ -440,6 +446,7 @@ async fn main() -> Result<(), ServerError> {
440446
.with_json_schema(cli.json_schema)
441447
.enable_plugin_log(true)
442448
.enable_debug_log(plugin_debug)
449+
.include_usage(cli.include_usage)
443450
.build();
444451

445452
let chat_model_info = ModelConfig {

0 commit comments

Comments
 (0)