Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
2c39b51
Add tts UI
0xi4o Aug 13, 2025
3364539
Add tts backend
0xi4o Aug 13, 2025
47dd721
Add description to eleven labs credentials
0xi4o Aug 13, 2025
eca7d17
Fix issue with fetching eleven labs voices
0xi4o Aug 14, 2025
1902701
Fix issue with text to speech tab not showing correct saved voice
0xi4o Aug 14, 2025
ef1b0dc
Add option to autoplay tts audio after prediction completes
0xi4o Aug 14, 2025
08f0d75
Fix crash issue when first changing tts provider
0xi4o Aug 18, 2025
b30e4a9
Set up streaming response for text to speech audio
0xi4o Aug 20, 2025
2247646
Update controllers - fix issue with sse client getting removed before…
0xi4o Aug 20, 2025
2b5554a
Use existing sse streamer to stream tts audio before sse client is re…
0xi4o Aug 20, 2025
aa357c8
Add tts sse to redis publisher
0xi4o Aug 20, 2025
ad44c7b
Fix issues with TTS - openai voices, streaming audio, rate limiting, …
0xi4o Aug 22, 2025
8de200e
Refactor
0xi4o Aug 22, 2025
55b6be2
Refactor TTS - fix issues with tts loading and stop audio buttons
0xi4o Aug 24, 2025
d42c096
Abort TTS SSE when clicking the stop button
0xi4o Aug 24, 2025
45917a3
Update SSE handling for TTS
0xi4o Aug 24, 2025
4aad293
Fix merge conflicts
0xi4o Aug 25, 2025
eb07a42
Fix issue with test voice feature
0xi4o Aug 25, 2025
2e33a00
Fix issue with tts voices not loading
0xi4o Aug 26, 2025
95a63fa
Update generate tts endpoint and its usage in internal chat
0xi4o Aug 26, 2025
c1553d1
Whitelist tts generate endpoint
0xi4o Aug 26, 2025
5ea7140
Fix merge conflicts
0xi4o Sep 16, 2025
123ab3c
Refactor Text-to-Speech Provider Selection and Enhance UI Components
HenryHengZJ Sep 16, 2025
72ccf2e
- Implemented stopAllTTS function calls to halt existing TTS audio be…
HenryHengZJ Sep 16, 2025
f64900b
Updated the condition for enabling TTS providers to exclude the 'none…
HenryHengZJ Sep 16, 2025
b514a82
Remove unnecessary code
0xi4o Sep 18, 2025
4cd1c4a
Add ability to abort audio streaming in TTS and release lock on chat …
0xi4o Sep 18, 2025
f2da015
Remove logger
0xi4o Sep 19, 2025
b5718c3
Fix tts audio not playing when clicking speaker button
0xi4o Sep 19, 2025
3198e78
update
0xi4o Sep 21, 2025
27da0b6
TTS abort controller
0xi4o Sep 22, 2025
d2a177d
Fix abort not working for TTS autoplay
0xi4o Sep 24, 2025
50b5bb9
Send metadata event when aborting autoplay TTS
0xi4o Sep 26, 2025
3f89d82
Fix merge conflicts
0xi4o Sep 26, 2025
3db27e2
Fix UI issue
0xi4o Sep 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -127,5 +127,8 @@
}
]
]
},
"dependencies": {
"@elevenlabs/elevenlabs-js": "^2.8.0"
}
}
26 changes: 26 additions & 0 deletions packages/components/credentials/ElevenLabsApi.credential.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { INodeParams, INodeCredential } from '../src/Interface'

class ElevenLabsApi implements INodeCredential {
label: string
name: string
version: number
description: string
inputs: INodeParams[]

constructor() {
this.label = 'Eleven Labs API'
this.name = 'elevenLabsApi'
this.version = 1.0
this.description =
'Sign up for a Eleven Labs account and <a target="_blank" href="https://elevenlabs.io/app/settings/api-keys">create an API Key</a>.'
this.inputs = [
{
label: 'Eleven Labs API Key',
name: 'elevenLabsApiKey',
type: 'password'
}
]
}
}

module.exports = { credClass: ElevenLabsApi }
1 change: 1 addition & 0 deletions packages/components/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"@dqbd/tiktoken": "^1.0.21",
"@e2b/code-interpreter": "^1.5.1",
"@elastic/elasticsearch": "^8.9.0",
"@elevenlabs/elevenlabs-js": "^2.8.0",
"@flowiseai/nodevm": "^3.9.25",
"@getzep/zep-cloud": "~1.0.7",
"@getzep/zep-js": "^0.9.0",
Expand Down
3 changes: 3 additions & 0 deletions packages/components/src/Interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,9 @@ export interface IServerSideEventStreamer {
streamAbortEvent(chatId: string): void
streamEndEvent(chatId: string): void
streamUsageMetadataEvent(chatId: string, data: any): void
streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void
streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void
streamTTSEndEvent(chatId: string, chatMessageId: string): void
}

export enum FollowUpPromptProvider {
Expand Down
1 change: 1 addition & 0 deletions packages/components/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dotenv.config({ path: envPath, override: true })
export * from './Interface'
export * from './utils'
export * from './speechToText'
export * from './textToSpeech'
export * from './storageUtils'
export * from './handler'
export * from '../evaluation/EvaluationRunner'
Expand Down
240 changes: 240 additions & 0 deletions packages/components/src/textToSpeech.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
import { ICommonObject } from './Interface'
import { getCredentialData } from './utils'
import OpenAI from 'openai'
import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js'
import { Readable } from 'node:stream'
import type { ReadableStream } from 'node:stream/web'

const TextToSpeechType = {
OPENAI_TTS: 'openai',
ELEVEN_LABS_TTS: 'elevenlabs'
}

export const convertTextToSpeechStream = async (
text: string,
textToSpeechConfig: ICommonObject,
options: ICommonObject,
abortController: AbortController,
onStart: (format: string) => void,
onChunk: (chunk: Buffer) => void,
onEnd: () => void
): Promise<void> => {
return new Promise<void>((resolve, reject) => {
let streamDestroyed = false

// Handle abort signal early
if (abortController.signal.aborted) {
reject(new Error('TTS generation aborted'))
return
}
const processStream = async () => {
try {
if (textToSpeechConfig) {
const credentialId = textToSpeechConfig.credentialId as string
const credentialData = await getCredentialData(credentialId ?? '', options)

switch (textToSpeechConfig.name) {
case TextToSpeechType.OPENAI_TTS: {
onStart('mp3')

const openai = new OpenAI({
apiKey: credentialData.openAIApiKey
})

const response = await openai.audio.speech.create(
{
model: 'gpt-4o-mini-tts',
voice: (textToSpeechConfig.voice || 'alloy') as
| 'alloy'
| 'ash'
| 'ballad'
| 'coral'
| 'echo'
| 'fable'
| 'nova'
| 'onyx'
| 'sage'
| 'shimmer',
input: text,
response_format: 'mp3'
},
{
signal: abortController.signal
}
)

const stream = response.body as unknown as Readable
if (!stream) {
throw new Error('Failed to get response stream')
}

await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 20, abortController, () => {
streamDestroyed = true
})
break
}

case TextToSpeechType.ELEVEN_LABS_TTS: {
onStart('mp3')

const client = new ElevenLabsClient({
apiKey: credentialData.elevenLabsApiKey
})

const response = await client.textToSpeech.stream(
textToSpeechConfig.voice || '21m00Tcm4TlvDq8ikWAM',
{
text: text,
modelId: 'eleven_multilingual_v2'
},
{ abortSignal: abortController.signal }
)

const stream = Readable.fromWeb(response as unknown as ReadableStream)
if (!stream) {
throw new Error('Failed to get response stream')
}

await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 40, abortController, () => {
streamDestroyed = true
})
break
}
}
} else {
reject(new Error('Text to speech is not selected. Please configure TTS in the chatflow.'))
}
} catch (error) {
reject(error)
}
}

// Handle abort signal
abortController.signal.addEventListener('abort', () => {
if (!streamDestroyed) {
reject(new Error('TTS generation aborted'))
}
})

processStream()
})
}

const processStreamWithRateLimit = async (
stream: Readable,
onChunk: (chunk: Buffer) => void,
onEnd: () => void,
resolve: () => void,
reject: (error: any) => void,
targetChunkSize: number = 640,
rateLimitMs: number = 20,
abortController: AbortController,
onStreamDestroy?: () => void
) => {
const TARGET_CHUNK_SIZE = targetChunkSize
const RATE_LIMIT_MS = rateLimitMs

let buffer: Buffer = Buffer.alloc(0)
let isEnded = false

const processChunks = async () => {
while (!isEnded || buffer.length > 0) {
// Check if aborted
if (abortController.signal.aborted) {
if (!stream.destroyed) {
stream.destroy()
}
onStreamDestroy?.()
reject(new Error('TTS generation aborted'))
return
}

if (buffer.length >= TARGET_CHUNK_SIZE) {
const chunk = buffer.subarray(0, TARGET_CHUNK_SIZE)
buffer = buffer.subarray(TARGET_CHUNK_SIZE)
onChunk(chunk)
await sleep(RATE_LIMIT_MS)
} else if (isEnded && buffer.length > 0) {
onChunk(buffer)
buffer = Buffer.alloc(0)
} else if (!isEnded) {
await sleep(RATE_LIMIT_MS)
} else {
break
}
}

onEnd()
resolve()
}

stream.on('data', (chunk) => {
if (!abortController.signal.aborted) {
buffer = Buffer.concat([buffer, Buffer.from(chunk)])
}
})

stream.on('end', () => {
isEnded = true
})

stream.on('error', (error) => {
reject(error)
})

// Handle abort signal
abortController.signal.addEventListener('abort', () => {
if (!stream.destroyed) {
stream.destroy()
}
onStreamDestroy?.()
reject(new Error('TTS generation aborted'))
})

processChunks().catch(reject)
}

const sleep = (ms: number): Promise<void> => {
return new Promise((resolve) => setTimeout(resolve, ms))
}

export const getVoices = async (provider: string, credentialId: string, options: ICommonObject) => {
const credentialData = await getCredentialData(credentialId ?? '', options)

switch (provider) {
case TextToSpeechType.OPENAI_TTS:
return [
{ id: 'alloy', name: 'Alloy' },
{ id: 'ash', name: 'Ash' },
{ id: 'ballad', name: 'Ballad' },
{ id: 'coral', name: 'Coral' },
{ id: 'echo', name: 'Echo' },
{ id: 'fable', name: 'Fable' },
{ id: 'nova', name: 'Nova' },
{ id: 'onyx', name: 'Onyx' },
{ id: 'sage', name: 'Sage' },
{ id: 'shimmer', name: 'Shimmer' }
]

case TextToSpeechType.ELEVEN_LABS_TTS: {
const client = new ElevenLabsClient({
apiKey: credentialData.elevenLabsApiKey
})

const voices = await client.voices.search({
pageSize: 100,
voiceType: 'default',
category: 'premade'
})

return voices.voices.map((voice) => ({
id: voice.voiceId,
name: voice.name,
category: voice.category
}))
}

default:
throw new Error(`Unsupported TTS provider: ${provider}`)
}
}
1 change: 1 addition & 0 deletions packages/server/src/Interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ export interface IChatFlow {
apikeyid?: string
analytic?: string
speechToText?: string
textToSpeech?: string
chatbotConfig?: string
followUpPrompts?: string
apiConfig?: string
Expand Down
Loading