Skip to content

Commit 0984ffa

Browse files
committed
feat(platform): ElevenLabs stream for TTS
1 parent 4e61e81 commit 0984ffa

File tree

5 files changed

+135
-6
lines changed

5 files changed

+135
-6
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabs;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
14+
use Symfony\AI\Platform\Message\Content\Text;
15+
16+
require_once dirname(__DIR__).'/bootstrap.php';
17+
18+
$platform = PlatformFactory::create(
19+
apiKey: env('ELEVEN_LABS_API_KEY'),
20+
httpClient: http_client(),
21+
);
22+
$model = new ElevenLabs(options: [
23+
'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
24+
'stream' => true,
25+
]);
26+
27+
$result = $platform->invoke($model, new Text('The first move is what sets everything in motion.'));
28+
29+
foreach ($result->asStream() as $chunk) {
30+
echo $chunk->getContent().\PHP_EOL;
31+
}

src/platform/src/Bridge/ElevenLabs/ElevenLabsClient.php

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
use Symfony\AI\Platform\Model;
1616
use Symfony\AI\Platform\ModelClientInterface;
1717
use Symfony\AI\Platform\Result\RawHttpResult;
18+
use Symfony\AI\Platform\Result\RawHttpStreamResult;
1819
use Symfony\AI\Platform\Result\RawResultInterface;
1920
use Symfony\Contracts\HttpClient\HttpClientInterface;
2021

@@ -42,7 +43,7 @@ public function request(Model $model, array|string $payload, array $options = []
4243
}
4344

4445
if (\in_array($model->getName(), [ElevenLabs::SCRIBE_V1, ElevenLabs::SCRIBE_V1_EXPERIMENTAL], true)) {
45-
return $this->doSpeechToTextRequest($model, $payload, $options);
46+
return $this->doSpeechToTextRequest($model, $payload);
4647
}
4748

4849
$capabilities = $this->retrieveCapabilities($model);
@@ -56,9 +57,8 @@ public function request(Model $model, array|string $payload, array $options = []
5657

5758
/**
5859
* @param array<string|int, mixed> $payload
59-
* @param array<string, mixed> $options
6060
*/
61-
private function doSpeechToTextRequest(Model $model, array|string $payload, array $options): RawHttpResult
61+
private function doSpeechToTextRequest(Model $model, array|string $payload): RawHttpResult
6262
{
6363
return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/speech-to-text', $this->hostUrl), [
6464
'headers' => [
@@ -75,7 +75,7 @@ private function doSpeechToTextRequest(Model $model, array|string $payload, arra
7575
* @param array<string|int, mixed> $payload
7676
* @param array<string, mixed> $options
7777
*/
78-
private function doTextToSpeechRequest(Model $model, array|string $payload, array $options): RawHttpResult
78+
private function doTextToSpeechRequest(Model $model, array|string $payload, array $options): RawHttpResult|RawHttpStreamResult
7979
{
8080
if (!\array_key_exists('voice', $model->getOptions())) {
8181
throw new InvalidArgumentException('The voice option is required.');
@@ -86,6 +86,33 @@ private function doTextToSpeechRequest(Model $model, array|string $payload, arra
8686
}
8787

8888
$voice = $options['voice'] ??= $model->getOptions()['voice'];
89+
$stream = $options['stream'] ??= $model->getOptions()['stream'] ?? false;
90+
91+
if ($stream) {
92+
$streamSource = $this->httpClient->request('POST', \sprintf('%s/text-to-speech/%s/stream', $this->hostUrl, $voice), [
93+
'headers' => [
94+
'xi-api-key' => $this->apiKey,
95+
],
96+
'json' => [
97+
'text' => $payload['text'],
98+
'model_id' => $model->getName(),
99+
],
100+
]);
101+
102+
return new RawHttpStreamResult((function () use ($streamSource) {
103+
foreach ($this->httpClient->stream($streamSource) as $chunk) {
104+
if ($chunk->isFirst()) {
105+
continue;
106+
}
107+
108+
if ('' === $chunk->getContent()) {
109+
continue;
110+
}
111+
112+
yield $chunk;
113+
}
114+
})());
115+
}
89116

90117
return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/text-to-speech/%s', $this->hostUrl, $voice), [
91118
'headers' => [

src/platform/src/Bridge/ElevenLabs/ElevenLabsResultConverter.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use Symfony\AI\Platform\Result\BinaryResult;
1717
use Symfony\AI\Platform\Result\RawResultInterface;
1818
use Symfony\AI\Platform\Result\ResultInterface;
19+
use Symfony\AI\Platform\Result\StreamResult;
1920
use Symfony\AI\Platform\Result\TextResult;
2021
use Symfony\AI\Platform\ResultConverterInterface;
2122
use Symfony\Contracts\HttpClient\ResponseInterface;
@@ -36,6 +37,7 @@ public function convert(RawResultInterface $result, array $options = []): Result
3637
$response = $result->getObject();
3738

3839
return match (true) {
40+
$response instanceof \Generator => new StreamResult($response),
3941
str_contains($response->getInfo('url'), 'speech-to-text') => new TextResult($result->getData()['text']),
4042
str_contains($response->getInfo('url'), 'text-to-speech') => new BinaryResult($result->getObject()->getContent(), 'audio/mpeg'),
4143
default => throw new RuntimeException('Unsupported ElevenLabs response.'),
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Result;
13+
14+
use Symfony\Contracts\HttpClient\ChunkInterface;
15+
16+
/**
17+
* @author Guillaume Loulier <[email protected]>
18+
*/
19+
final readonly class RawHttpStreamResult implements RawResultInterface
20+
{
21+
public function __construct(
22+
private \Generator $generator,
23+
) {
24+
}
25+
26+
public function getData(): array
27+
{
28+
return array_map(
29+
static fn (ChunkInterface $item): string => $item->getContent(),
30+
iterator_to_array($this->generator),
31+
);
32+
}
33+
34+
public function getObject(): object
35+
{
36+
return $this->generator;
37+
}
38+
}

src/platform/tests/Bridge/ElevenLabs/ElevenLabsClientTest.php

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
namespace Symfony\AI\Platform\Tests\Bridge\ElevenLabs;
1313

1414
use PHPUnit\Framework\Attributes\CoversClass;
15-
use PHPUnit\Framework\Attributes\Group;
1615
use PHPUnit\Framework\Attributes\UsesClass;
1716
use PHPUnit\Framework\TestCase;
1817
use Symfony\AI\Platform\Bridge\ElevenLabs\Contract\AudioNormalizer;
@@ -21,6 +20,7 @@
2120
use Symfony\AI\Platform\Exception\InvalidArgumentException;
2221
use Symfony\AI\Platform\Message\Content\Audio;
2322
use Symfony\AI\Platform\Model;
23+
use Symfony\AI\Platform\Result\RawHttpStreamResult;
2424
use Symfony\Component\HttpClient\MockHttpClient;
2525
use Symfony\Component\HttpClient\Response\JsonMockResponse;
2626
use Symfony\Component\HttpClient\Response\MockResponse;
@@ -30,6 +30,7 @@
3030
#[UsesClass(Model::class)]
3131
#[UsesClass(Audio::class)]
3232
#[UsesClass(AudioNormalizer::class)]
33+
#[UsesClass(RawHttpStreamResult::class)]
3334
final class ElevenLabsClientTest extends TestCase
3435
{
3536
public function testSupportsModel()
@@ -133,7 +134,6 @@ public function testClientCannotPerformTextToSpeechRequestWithoutValidPayload()
133134
]), []);
134135
}
135136

136-
#[Group('foo')]
137137
public function testClientCanPerformTextToSpeechRequest()
138138
{
139139
$payload = Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3');
@@ -162,4 +162,35 @@ public function testClientCanPerformTextToSpeechRequest()
162162

163163
$this->assertSame(2, $httpClient->getRequestsCount());
164164
}
165+
166+
public function testClientCanPerformTextToSpeechRequestAsStream()
167+
{
168+
$payload = Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3');
169+
170+
$httpClient = new MockHttpClient([
171+
new JsonMockResponse([
172+
[
173+
'model_id' => ElevenLabs::ELEVEN_MULTILINGUAL_V2,
174+
'can_do_text_to_speech' => true,
175+
],
176+
]),
177+
new MockResponse($payload->asBinary()),
178+
]);
179+
180+
$client = new ElevenLabsClient(
181+
$httpClient,
182+
'https://api.elevenlabs.io/v1',
183+
'my-api-key',
184+
);
185+
186+
$result = $client->request(new ElevenLabs(options: [
187+
'voice' => 'Dslrhjl3ZpzrctukrQSN',
188+
'stream' => true,
189+
]), [
190+
'text' => 'foo',
191+
]);
192+
193+
$this->assertInstanceOf(RawHttpStreamResult::class, $result);
194+
$this->assertSame(2, $httpClient->getRequestsCount());
195+
}
165196
}

0 commit comments

Comments
 (0)