Skip to content

Commit b4b9214

Browse files
authored
chore(sync): prepare for 0.5.2 (#219)
2 parents 428b62e + e20d82a commit b4b9214

File tree

7 files changed

+34
-14
lines changed

7 files changed

+34
-14
lines changed

.changeset/eight-mugs-glow.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@livekit/agents-plugin-openai": patch
3+
---
4+
5+
fix(tts): add missing crypto import to OpenAI tts

.changeset/khaki-ties-design.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@livekit/agents": patch
3+
---
4+
5+
fix(pipeline): add transcription for AGENT_SPEECH_COMMITTED

.changeset/moody-poems-juggle.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@livekit/agents-plugin-openai": patch
3+
---
4+
5+
groq: add support for llama 3.3 70b

agents/src/pipeline/agent_output.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export class SynthesisHandle {
1313
static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
1414

1515
#speechId: string;
16+
text?: string;
1617
ttsSource: SpeechSource;
1718
#agentPlayout: AgentPlayout;
1819
tts: TTS;
@@ -97,7 +98,7 @@ export class AgentOutput {
9798
// eslint-disable-next-line @typescript-eslint/no-unused-vars
9899
return new CancellablePromise(async (resolve, _, onCancel) => {
99100
const ttsSource = await handle.ttsSource;
100-
let task: CancellablePromise<void>;
101+
let task: CancellablePromise<string>;
101102
if (typeof ttsSource === 'string') {
102103
task = stringSynthesisTask(ttsSource, handle);
103104
} else {
@@ -113,6 +114,10 @@ export class AgentOutput {
113114
} finally {
114115
if (handle.intFut.done) {
115116
gracefullyCancel(task);
117+
} else {
118+
task.then((text) => {
119+
handle.text = text;
120+
});
116121
}
117122
}
118123

@@ -121,9 +126,9 @@ export class AgentOutput {
121126
}
122127
}
123128

124-
const stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<void> => {
129+
const stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {
125130
// eslint-disable-next-line @typescript-eslint/no-unused-vars
126-
return new CancellablePromise<void>(async (resolve, _, onCancel) => {
131+
return new CancellablePromise(async (resolve, _, onCancel) => {
127132
let cancelled = false;
128133
onCancel(() => {
129134
cancelled = true;
@@ -141,16 +146,17 @@ const stringSynthesisTask = (text: string, handle: SynthesisHandle): Cancellable
141146
}
142147
handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);
143148

144-
resolve();
149+
resolve(text);
145150
});
146151
};
147152

148153
const streamSynthesisTask = (
149154
stream: AsyncIterable<string>,
150155
handle: SynthesisHandle,
151-
): CancellablePromise<void> => {
156+
): CancellablePromise<string> => {
152157
// eslint-disable-next-line @typescript-eslint/no-unused-vars
153-
return new CancellablePromise<void>(async (resolve, _, onCancel) => {
158+
return new CancellablePromise(async (resolve, _, onCancel) => {
159+
let fullText = '';
154160
let cancelled = false;
155161
onCancel(() => {
156162
cancelled = true;
@@ -170,12 +176,13 @@ const streamSynthesisTask = (
170176
readGeneratedAudio();
171177

172178
for await (const text of stream) {
179+
fullText += text;
173180
if (cancelled) break;
174181
ttsStream.pushText(text);
175182
}
176183
ttsStream.flush();
177184
ttsStream.endInput();
178185

179-
resolve();
186+
resolve(fullText);
180187
});
181188
};

agents/src/pipeline/pipeline_agent.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -520,8 +520,7 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
520520
// add it to the chat context for this new reply synthesis
521521
copiedCtx.messages.push(
522522
ChatMessage.create({
523-
// TODO(nbsp): uhhh unsure where to get the played text here
524-
// text: playingSpeech.synthesisHandle.(theres no ttsForwarder here)
523+
text: playingSpeech.synthesisHandle.text,
525524
role: ChatRole.ASSISTANT,
526525
}),
527526
);
@@ -620,8 +619,7 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
620619
}
621620
commitUserQuestionIfNeeded();
622621

623-
// TODO(nbsp): what goes here
624-
let collectedText = '';
622+
const collectedText = handle.synthesisHandle.text;
625623
const isUsingTools = handle.source instanceof LLMStream && !!handle.source.functionCalls.length;
626624
const extraToolsMessages = []; // additional messages from the functions to add to the context
627625
let interrupted = handle.interrupted;
@@ -685,8 +683,6 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
685683
const playHandle = answerSynthesis.play();
686684
await playHandle.join().await;
687685

688-
// TODO(nbsp): what text goes here
689-
collectedText = '';
690686
interrupted = answerSynthesis.interrupted;
691687
newFunctionCalls = answerLLMStream.functionCalls;
692688

plugins/openai/src/models.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ export type GroqChatModels =
5353
| 'llama-3.1-405b-reasoning'
5454
| 'llama-3.1-70b-versatile'
5555
| 'llama-3.1-8b-instant'
56+
| 'llama-3.3-70b-versatile'
5657
| 'llama3-groq-70b-8192-tool-use-preview'
5758
| 'llama3-groq-8b-8192-tool-use-preview'
5859
| 'llama-guard-3-8b'

plugins/openai/src/tts.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
//
33
// SPDX-License-Identifier: Apache-2.0
44
import { AudioByteStream, tts } from '@livekit/agents';
5+
import { randomUUID } from 'crypto';
56
import { OpenAI } from 'openai';
67
import type { TTSModels, TTSVoices } from './models.js';
78

@@ -81,7 +82,7 @@ export class ChunkedStream extends tts.ChunkedStream {
8182

8283
async #run(stream: Promise<Response>) {
8384
const buffer = await stream.then((r) => r.arrayBuffer());
84-
const requestId = crypto.randomUUID();
85+
const requestId = randomUUID();
8586
const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);
8687
const frames = audioByteStream.write(buffer);
8788

0 commit comments

Comments
 (0)