FlowiseAI · 0xi4o · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025 · Aug 14, 2025
diff --git a/package.json b/package.json
@@ -127,5 +127,8 @@
                 }
             ]
         ]
+    },
+    "dependencies": {
+        "@elevenlabs/elevenlabs-js": "^2.8.0"
     }
 }
diff --git a/packages/components/credentials/ElevenLabsApi.credential.ts b/packages/components/credentials/ElevenLabsApi.credential.ts
@@ -0,0 +1,26 @@
+import { INodeParams, INodeCredential } from '../src/Interface'
+
+class ElevenLabsApi implements INodeCredential {
+    label: string
+    name: string
+    version: number
+    description: string
+    inputs: INodeParams[]
+
+    constructor() {
+        this.label = 'Eleven Labs API'
+        this.name = 'elevenLabsApi'
+        this.version = 1.0
+        this.description =
+            'Sign up for a Eleven Labs account and <a target="_blank" href="https://elevenlabs.io/app/settings/api-keys">create an API Key</a>.'
+        this.inputs = [
+            {
+                label: 'Eleven Labs API Key',
+                name: 'elevenLabsApiKey',
+                type: 'password'
+            }
+        ]
+    }
+}
+
+module.exports = { credClass: ElevenLabsApi }
diff --git a/packages/components/package.json b/packages/components/package.json
@@ -33,6 +33,7 @@
         "@dqbd/tiktoken": "^1.0.21",
         "@e2b/code-interpreter": "^1.5.1",
         "@elastic/elasticsearch": "^8.9.0",
+        "@elevenlabs/elevenlabs-js": "^2.8.0",
         "@flowiseai/nodevm": "^3.9.25",
         "@getzep/zep-cloud": "~1.0.7",
         "@getzep/zep-js": "^0.9.0",

diff --git a/packages/components/src/Interface.ts b/packages/components/src/Interface.ts
@@ -441,6 +441,9 @@ export interface IServerSideEventStreamer {
     streamAbortEvent(chatId: string): void
     streamEndEvent(chatId: string): void
     streamUsageMetadataEvent(chatId: string, data: any): void
+    streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void
+    streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void
+    streamTTSEndEvent(chatId: string, chatMessageId: string): void
 }
 
 export enum FollowUpPromptProvider {

diff --git a/packages/components/src/index.ts b/packages/components/src/index.ts
@@ -7,6 +7,7 @@ dotenv.config({ path: envPath, override: true })
 export * from './Interface'
 export * from './utils'
 export * from './speechToText'
+export * from './textToSpeech'
 export * from './storageUtils'
 export * from './handler'
 export * from '../evaluation/EvaluationRunner'

diff --git a/packages/components/src/textToSpeech.ts b/packages/components/src/textToSpeech.ts
@@ -0,0 +1,240 @@
+import { ICommonObject } from './Interface'
+import { getCredentialData } from './utils'
+import OpenAI from 'openai'
+import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js'
+import { Readable } from 'node:stream'
+import type { ReadableStream } from 'node:stream/web'
+
+const TextToSpeechType = {
+    OPENAI_TTS: 'openai',
+    ELEVEN_LABS_TTS: 'elevenlabs'
+}
+
+export const convertTextToSpeechStream = async (
+    text: string,
+    textToSpeechConfig: ICommonObject,
+    options: ICommonObject,
+    abortController: AbortController,
+    onStart: (format: string) => void,
+    onChunk: (chunk: Buffer) => void,
+    onEnd: () => void
+): Promise<void> => {
+    return new Promise<void>((resolve, reject) => {
+        let streamDestroyed = false
+
+        // Handle abort signal early
+        if (abortController.signal.aborted) {
+            reject(new Error('TTS generation aborted'))
+            return
+        }
+        const processStream = async () => {
+            try {
+                if (textToSpeechConfig) {
+                    const credentialId = textToSpeechConfig.credentialId as string
+                    const credentialData = await getCredentialData(credentialId ?? '', options)
+
+                    switch (textToSpeechConfig.name) {
+                        case TextToSpeechType.OPENAI_TTS: {
+                            onStart('mp3')
+
+                            const openai = new OpenAI({
+                                apiKey: credentialData.openAIApiKey
+                            })
+
+                            const response = await openai.audio.speech.create(
+                                {
+                                    model: 'gpt-4o-mini-tts',
+                                    voice: (textToSpeechConfig.voice || 'alloy') as
+                                        | 'alloy'
+                                        | 'ash'
+                                        | 'ballad'
+                                        | 'coral'
+                                        | 'echo'
+                                        | 'fable'
+                                        | 'nova'
+                                        | 'onyx'
+                                        | 'sage'
+                                        | 'shimmer',
+                                    input: text,
+                                    response_format: 'mp3'
+                                },
+                                {
+                                    signal: abortController.signal
+                                }
+                            )
+
+                            const stream = response.body as unknown as Readable
+                            if (!stream) {
+                                throw new Error('Failed to get response stream')
+                            }
+
+                            await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 20, abortController, () => {
+                                streamDestroyed = true
+                            })
+                            break
+                        }
+
+                        case TextToSpeechType.ELEVEN_LABS_TTS: {
+                            onStart('mp3')
+
+                            const client = new ElevenLabsClient({
+                                apiKey: credentialData.elevenLabsApiKey
+                            })
+
+                            const response = await client.textToSpeech.stream(
+                                textToSpeechConfig.voice || '21m00Tcm4TlvDq8ikWAM',
+                                {
+                                    text: text,
+                                    modelId: 'eleven_multilingual_v2'
+                                },
+                                { abortSignal: abortController.signal }
+                            )
+
+                            const stream = Readable.fromWeb(response as unknown as ReadableStream)
+                            if (!stream) {
+                                throw new Error('Failed to get response stream')
+                            }
+
+                            await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 40, abortController, () => {
+                                streamDestroyed = true
+                            })
+                            break
+                        }
+                    }
+                } else {
+                    reject(new Error('Text to speech is not selected. Please configure TTS in the chatflow.'))
+                }
+            } catch (error) {
+                reject(error)
+            }
+        }
+
+        // Handle abort signal
+        abortController.signal.addEventListener('abort', () => {
+            if (!streamDestroyed) {
+                reject(new Error('TTS generation aborted'))
+            }
+        })
+
+        processStream()
+    })
+}
+
+const processStreamWithRateLimit = async (
+    stream: Readable,
+    onChunk: (chunk: Buffer) => void,
+    onEnd: () => void,
+    resolve: () => void,
+    reject: (error: any) => void,
+    targetChunkSize: number = 640,
+    rateLimitMs: number = 20,
+    abortController: AbortController,
+    onStreamDestroy?: () => void
+) => {
+    const TARGET_CHUNK_SIZE = targetChunkSize
+    const RATE_LIMIT_MS = rateLimitMs
+
+    let buffer: Buffer = Buffer.alloc(0)
+    let isEnded = false
+
+    const processChunks = async () => {
+        while (!isEnded || buffer.length > 0) {
+            // Check if aborted
+            if (abortController.signal.aborted) {
+                if (!stream.destroyed) {
+                    stream.destroy()
+                }
+                onStreamDestroy?.()
+                reject(new Error('TTS generation aborted'))
+                return
+            }
+
+            if (buffer.length >= TARGET_CHUNK_SIZE) {
+                const chunk = buffer.subarray(0, TARGET_CHUNK_SIZE)
+                buffer = buffer.subarray(TARGET_CHUNK_SIZE)
+                onChunk(chunk)
+                await sleep(RATE_LIMIT_MS)
+            } else if (isEnded && buffer.length > 0) {
+                onChunk(buffer)
+                buffer = Buffer.alloc(0)
+            } else if (!isEnded) {
+                await sleep(RATE_LIMIT_MS)
+            } else {
+                break
+            }
+        }
+
+        onEnd()
+        resolve()
+    }
+
+    stream.on('data', (chunk) => {
+        if (!abortController.signal.aborted) {
+            buffer = Buffer.concat([buffer, Buffer.from(chunk)])
+        }
+    })
+
+    stream.on('end', () => {
+        isEnded = true
+    })
+
+    stream.on('error', (error) => {
+        reject(error)
+    })
+
+    // Handle abort signal
+    abortController.signal.addEventListener('abort', () => {
+        if (!stream.destroyed) {
+            stream.destroy()
+        }
+        onStreamDestroy?.()
+        reject(new Error('TTS generation aborted'))
+    })
+
+    processChunks().catch(reject)
+}
+
+const sleep = (ms: number): Promise<void> => {
+    return new Promise((resolve) => setTimeout(resolve, ms))
+}
+
+export const getVoices = async (provider: string, credentialId: string, options: ICommonObject) => {
+    const credentialData = await getCredentialData(credentialId ?? '', options)
+
+    switch (provider) {
+        case TextToSpeechType.OPENAI_TTS:
+            return [
+                { id: 'alloy', name: 'Alloy' },
+                { id: 'ash', name: 'Ash' },
+                { id: 'ballad', name: 'Ballad' },
+                { id: 'coral', name: 'Coral' },
+                { id: 'echo', name: 'Echo' },
+                { id: 'fable', name: 'Fable' },
+                { id: 'nova', name: 'Nova' },
+                { id: 'onyx', name: 'Onyx' },
+                { id: 'sage', name: 'Sage' },
+                { id: 'shimmer', name: 'Shimmer' }
+            ]
+
+        case TextToSpeechType.ELEVEN_LABS_TTS: {
+            const client = new ElevenLabsClient({
+                apiKey: credentialData.elevenLabsApiKey
+            })
+
+            const voices = await client.voices.search({
+                pageSize: 100,
+                voiceType: 'default',
+                category: 'premade'
+            })
+
+            return voices.voices.map((voice) => ({
+                id: voice.voiceId,
+                name: voice.name,
+                category: voice.category
+            }))
+        }
+
+        default:
+            throw new Error(`Unsupported TTS provider: ${provider}`)
+    }
+}
diff --git a/packages/server/src/Interface.ts b/packages/server/src/Interface.ts
@@ -64,6 +64,7 @@ export interface IChatFlow {
     apikeyid?: string
     analytic?: string
     speechToText?: string
+    textToSpeech?: string
     chatbotConfig?: string
     followUpPrompts?: string
     apiConfig?: string
-Original file line number
+Diff line change
@@ Expand Up / @@ -127,5 +127,8 @@ @@
                     }
                 ]
             ]
+        },
+        "dependencies": {
+            "@elevenlabs/elevenlabs-js": "^2.8.0"
         }
     }