From 79e559c0a021a214b76999c9a0719d41d2760bd6 Mon Sep 17 00:00:00 2001 From: Ballen2270 Date: Fri, 11 Apr 2025 09:59:51 +0800 Subject: [PATCH 1/3] azure tts support --- main/xiaozhi-server/config.yaml | 15 +++- .../core/providers/tts/azure.py | 80 +++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 main/xiaozhi-server/core/providers/tts/azure.py diff --git a/main/xiaozhi-server/config.yaml b/main/xiaozhi-server/config.yaml index 9649244ba..4a81b477f 100644 --- a/main/xiaozhi-server/config.yaml +++ b/main/xiaozhi-server/config.yaml @@ -344,7 +344,7 @@ LLM: model_name: qwen2.5:3b-AWQ # 使用的小模型名称,用于意图识别 base_url: http://localhost:9997 # Xinference服务地址 TTS: - # 当前支持的type为edge、doubao,可自行适配 + # 当前支持的type为edge、doubao、azure,可自行适配 EdgeTTS: # 定义TTS API类型 type: edge @@ -365,6 +365,19 @@ TTS: appid: 你的火山引擎语音合成服务appid access_token: 你的火山引擎语音合成服务access_token cluster: volcano_tts + AzureTTS: + # 定义TTS API类型 + type: azure + # Azure语音服务订阅密钥,可在Azure门户获取 + # 创建资源地址:https://portal.azure.com/#create/Microsoft.CognitiveServicesSpeechServices + subscription_key: 你的Azure语音合成服务密钥 + # 服务区域,如eastus、westus等 + region: westus + # 语音名称,可选值参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts + voice_name: zh-CN-XiaochenMultilingualNeural + # 输出格式,可选值参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=streaming#audio-outputs + output_format: riff-16khz-16bit-mono-pcm + output_dir: tmp/ CosyVoiceSiliconflow: type: siliconflow # 硅基流动TTS diff --git a/main/xiaozhi-server/core/providers/tts/azure.py b/main/xiaozhi-server/core/providers/tts/azure.py new file mode 100644 index 000000000..8978b0315 --- /dev/null +++ b/main/xiaozhi-server/core/providers/tts/azure.py @@ -0,0 +1,80 @@ +import os +import asyncio +import aiohttp +import time +from .base import TTSProviderBase + +class TTSProvider(TTSProviderBase): + def __init__(self, config, delete_audio_file): + super().__init__(config, delete_audio_file) + self.subscription_key = config.get("subscription_key") + self.region = config.get("region", "eastus") + self.voice_name = config.get("voice_name", "zh-CN-YunxiNeural") + self.output_format = config.get("output_format", "audio-24khz-48kbitrate-mono-mp3") + self.api_url = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1" + self.token_url = f"https://{self.region}.api.cognitive.microsoft.com/sts/v1.0/issueToken" + self.access_token = None + self.token_expiry = 0 + + def generate_filename(self, extension=".wav"): + """生成唯一的音频文件名""" + return os.path.join(self.output_file, f"azure_tts_{os.urandom(4).hex()}{extension}") + + async def _get_access_token(self): + """获取Azure TTS访问令牌""" + if time.time() < self.token_expiry and self.access_token: + return self.access_token + + headers = { + "Ocp-Apim-Subscription-Key": self.subscription_key, + "Content-Type": "application/x-www-form-urlencoded" + } + + try: + async with aiohttp.ClientSession() as session: + async with session.post( + self.token_url, + headers=headers + ) as response: + if response.status == 200: + self.access_token = await response.text() + self.token_expiry = time.time() + 540 # 令牌有效期9分钟(540秒) + return self.access_token + else: + error = await response.text() + raise Exception(f"获取Azure TTS令牌失败: {response.status} - {error}") + except Exception as e: + raise Exception(f"获取Azure TTS令牌异常: {e}") + + async def text_to_speak(self, text, output_file): + """调用Azure TTS API将文本转换为语音""" + token = await self._get_access_token() + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/ssml+xml", + "X-Microsoft-OutputFormat": self.output_format, + "User-Agent": "xiaozhi-server" + } + + ssml = f""" + + {text} + + """ + + try: + async with aiohttp.ClientSession() as session: + async with session.post( + self.api_url, + headers=headers, + data=ssml.encode("utf-8") + ) as response: + if response.status == 200: + with open(output_file, "wb") as f: + f.write(await response.read()) + else: + error = await response.text() + headers = response.headers + raise Exception(f"Azure TTS请求失败: {response.status} - 错误信息: {error}, 完整响应: {response}") + except Exception as e: + raise Exception(f"Azure TTS请求异常: {e}") From c897d324145f44f1e6eb520f618289f4e2e7a4b5 Mon Sep 17 00:00:00 2001 From: Ballen2270 Date: Fri, 11 Apr 2025 10:57:01 +0800 Subject: [PATCH 2/3] add grok llm surpport --- main/xiaozhi-server/config.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/main/xiaozhi-server/config.yaml b/main/xiaozhi-server/config.yaml index 4a81b477f..6b3062801 100644 --- a/main/xiaozhi-server/config.yaml +++ b/main/xiaozhi-server/config.yaml @@ -314,6 +314,14 @@ LLM: bot_id: "你的bot_id" user_id: "你的user_id" personal_access_token: 你的coze个人令牌 + XaiLLM: + # 定义LLM API类型 + type: Xai + # Xai API,需要先在xAI平台创建API密钥并获取api_key + api_key: 你的Xai web key + model_name: "grok-3-fast-beta" + base_url: "https://api.x.ai/v1" + max_tokens: 131072 LMStudioLLM: # 定义LLM API类型 type: openai From b42f9676f8c9cb626a110efbfb984d686de045f5 Mon Sep 17 00:00:00 2001 From: Ballen2270 Date: Mon, 14 Apr 2025 16:35:52 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=A8=A1=E7=89=88?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E4=B8=ADXaiLLM=E7=B1=BB=E5=9E=8B=E4=B8=8D?= =?UTF-8?q?=E6=AD=A3=E7=A1=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/xiaozhi-server/config.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/main/xiaozhi-server/config.yaml b/main/xiaozhi-server/config.yaml index 6b3062801..c1e122a02 100644 --- a/main/xiaozhi-server/config.yaml +++ b/main/xiaozhi-server/config.yaml @@ -316,9 +316,8 @@ LLM: personal_access_token: 你的coze个人令牌 XaiLLM: # 定义LLM API类型 - type: Xai - # Xai API,需要先在xAI平台创建API密钥并获取api_key - api_key: 你的Xai web key + type: openai + api_key: 你的Xai web key # Xai API,需要先在xAI平台创建API密钥并获取api_key model_name: "grok-3-fast-beta" base_url: "https://api.x.ai/v1" max_tokens: 131072