Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
INSERT INTO sys_params (`id`, `param_code`, `param_value`, `value_type`, `param_type`, `remark`, `creator`,
`create_date`, `updater`, `update_date`)
VALUES (1944947962685796353, 'enable_visual_memory_merge', 'false', 'boolean', 1, '是否合并视觉解析结果到llm',
1929745310624604161, '2025-07-15 10:31:44', 1929745310624604161, '2025-07-15 10:31:44');
Original file line number Diff line number Diff line change
Expand Up @@ -282,3 +282,10 @@ databaseChangeLog:
- sqlFile:
encoding: utf8
path: classpath:db/changelog/202507081646.sql
- changeSet:
id: 202507151050
author: ganbin
changes:
- sqlFile:
encoding: utf8
path: classpath:db/changelog/202507151050.sql
32 changes: 17 additions & 15 deletions main/xiaozhi-server/core/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ def change_system_prompt(self, prompt):
# 更新系统prompt至上下文
self.dialogue.update_system_message(self.prompt)

def chat(self, query, tool_call=False, depth=0):
def chat(self, query, tool_call=False, depth=0, enable_tts=True):
self.logger.bind(tag=TAG).info(f"大模型收到用户消息: {query}")
self.llm_finish_task = False

Expand All @@ -668,13 +668,14 @@ def chat(self, query, tool_call=False, depth=0):
# 为最顶层时新建会话ID和发送FIRST请求
if depth == 0:
self.sentence_id = str(uuid.uuid4().hex)
self.tts.tts_text_queue.put(
TTSMessageDTO(
sentence_id=self.sentence_id,
sentence_type=SentenceType.FIRST,
content_type=ContentType.ACTION,
if enable_tts:
self.tts.tts_text_queue.put(
TTSMessageDTO(
sentence_id=self.sentence_id,
sentence_type=SentenceType.FIRST,
content_type=ContentType.ACTION,
)
)
)

# Define intent functions
functions = None
Expand Down Expand Up @@ -756,14 +757,15 @@ def chat(self, query, tool_call=False, depth=0):
if content is not None and len(content) > 0:
if not tool_call_flag:
response_message.append(content)
self.tts.tts_text_queue.put(
TTSMessageDTO(
sentence_id=self.sentence_id,
sentence_type=SentenceType.MIDDLE,
content_type=ContentType.TEXT,
content_detail=content,
if enable_tts:
self.tts.tts_text_queue.put(
TTSMessageDTO(
sentence_id=self.sentence_id,
sentence_type=SentenceType.MIDDLE,
content_type=ContentType.TEXT,
content_detail=content,
)
)
)
# 处理function call
if tool_call_flag:
bHasError = False
Expand Down Expand Up @@ -817,7 +819,7 @@ def chat(self, query, tool_call=False, depth=0):
self.dialogue.put(
Message(role="assistant", content="".join(response_message))
)
if depth == 0:
if depth == 0 and enable_tts:
self.tts.tts_text_queue.put(
TTSMessageDTO(
sentence_id=self.sentence_id,
Expand Down
30 changes: 27 additions & 3 deletions main/xiaozhi-server/core/handle/intentHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from core.utils.dialogue import Message
from plugins_func.register import Action, ActionResponse
from core.providers.tts.dto.dto import TTSMessageDTO, SentenceType
from config.settings import load_config

TAG = __name__

Expand Down Expand Up @@ -128,9 +129,32 @@ def process_function_call():

if result:
if result.action == Action.RESPONSE: # 直接回复前端
text = result.response
if text is not None:
speak_txt(conn, text)
enable_visual_memory_merge = load_config().get('enable_visual_memory_merge')
if enable_visual_memory_merge and function_name == "self_camera_take_photo":
# 1. 直接 TTS 视觉模型的结果
text = result.response
if text is not None:
speak_txt(conn, text)
# 2. 结构化消息
vision_message = (
f"【系统观察】检测到用户提出 '{original_text}' 的拍照意图。视觉模型已完成处理,并返回以下环境分析报告:\n"
f"--------------------\n"
f"视觉分析结果:\n"
f"{result.response}\n"
f"--------------------\n"
f"请将上述视觉信息作为本次对话的重要记忆和后续提问的上下文依据。"
f"你不需要做任何回答,或简短恢复'ok'即可。"
)
# 3. 作为"用户消息"调用主LLM chat,仅用于记忆,不TTS
try:
conn.chat(vision_message, tool_call=False, enable_tts=False)
except Exception as e:
conn.logger.bind(tag=TAG).error(f"主LLM记忆同步失败: {e}")
return True
else:
text = result.response
if text is not None:
speak_txt(conn, text)
elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复
text = result.result
conn.dialogue.put(Message(role="tool", content=text))
Expand Down
11 changes: 11 additions & 0 deletions main/xiaozhi-server/core/providers/intent/intent_llm/intent_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,17 @@ def get_intent_system_prompt(self, functions_list: str) -> str:
"1. 只返回JSON格式,不要包含任何其他文字\n"
'2. 如果没有找到匹配的函数,返回{"function_call": {"name": "continue_chat"}}\n'
"3. 确保返回的JSON格式正确,包含所有必要的字段\n"
"**补充:**"
"在处理用户意图时,请注意上下文的连续性,避免重复识别已完成的操作。例如:"
"以拍照为例,可能第一轮用户说:"
"user: 打开摄像头。(用户可能想让你看看摄像头拍到了什么。等等)\n"
"你正常识别为拍照意图:并做出来类似如下回复\n"
"llm: 回复照片的内容。\n"
"第二轮对话:"
"user: 照片里有几个人。\n"
"---------------------"
"此时不要再次将意图识别为“拍照”或“打开摄像头”,而是应当继续处理当前对话上下文中的“照片内容”,即:"
'返回: {"function_call": {"name": "continue_chat"}}\n'
"特殊说明:\n"
"- 当用户单次输入包含多个指令时(如'打开灯并且调高音量')\n"
"- 请返回多个function_call组成的JSON数组\n"
Expand Down