diff --git a/docs/components/platform.rst b/docs/components/platform.rst index 400d0ce96..eb316a6c3 100644 --- a/docs/components/platform.rst +++ b/docs/components/platform.rst @@ -501,6 +501,9 @@ This allows fast and isolated testing of AI-powered features without relying on This requires `cURL` and the `ext-curl` extension to be installed. +Adding Voice +~~~~~~~~~~~~ + Code Examples ~~~~~~~~~~~~~ diff --git a/examples/voice/agent-eleven-labs-voice.php b/examples/voice/agent-eleven-labs-voice.php new file mode 100644 index 000000000..0aaf75d43 --- /dev/null +++ b/examples/voice/agent-eleven-labs-voice.php @@ -0,0 +1,44 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Agent\Agent; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider; +use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Platform\Speech\SpeechConfiguration; +use Symfony\AI\Platform\Speech\SpeechProviderListener; +use Symfony\Component\EventDispatcher\EventDispatcher; + +require_once dirname(__DIR__).'/bootstrap.php'; + +$eventDispatcher = new EventDispatcher(); +$eventDispatcher->addSubscriber(new SpeechProviderListener([ + new ElevenLabsSpeechProvider(PlatformFactory::create( + apiKey: env('ELEVEN_LABS_API_KEY'), + httpClient: http_client(), + ), new SpeechConfiguration( + 'eleven_multilingual_v2', + 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN) + 'eleven_multilingual_v2' + )), +])); + +$platform = PlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client()); + +$agent = new Agent($platform, 'gpt-4o'); +$answer = $agent->call(new MessageBag( + Message::ofUser('Hello'), +), [ + ElevenLabsSpeechProvider::ELEVEN_LABS_STT_MODEL => true, +]); + +echo $answer->getSpeech(); diff --git a/src/agent/src/Agent.php b/src/agent/src/Agent.php index c51714bc2..7b505effb 100644 --- a/src/agent/src/Agent.php +++ b/src/agent/src/Agent.php @@ -69,7 +69,7 @@ public function getName(): string public function call(MessageBag $messages, array $options = []): ResultInterface { $input = new Input($this->getModel(), $messages, $options); - array_map(fn (InputProcessorInterface $processor) => $processor->processInput($input), $this->inputProcessors); + array_map(static fn (InputProcessorInterface $processor) => $processor->processInput($input), $this->inputProcessors); $model = $input->getModel(); $messages = $input->getMessageBag(); @@ -78,7 +78,7 @@ public function call(MessageBag $messages, array $options = []): ResultInterface $result = $this->platform->invoke($model, $messages, $options)->getResult(); $output = new Output($model, $result, $messages, $options); - array_map(fn (OutputProcessorInterface $processor) => $processor->processOutput($output), $this->outputProcessors); + array_map(static fn (OutputProcessorInterface $processor) => $processor->processOutput($output), $this->outputProcessors); return $output->getResult(); } diff --git a/src/agent/src/Output.php b/src/agent/src/Output.php index d069d47a7..a98206aa1 100644 --- a/src/agent/src/Output.php +++ b/src/agent/src/Output.php @@ -13,6 +13,7 @@ use Symfony\AI\Platform\Message\MessageBag; use Symfony\AI\Platform\Result\ResultInterface; +use Symfony\AI\Platform\Speech\Speech; /** * @author Christopher Hertel @@ -27,6 +28,7 @@ public function __construct( private ResultInterface $result, private readonly MessageBag $messageBag, private readonly array $options = [], + private ?Speech $speech = null, ) { } @@ -57,4 +59,14 @@ public function getOptions(): array { return $this->options; } + + public function setSpeech(?Speech $speech): void + { + $this->speech = $speech; + } + + public function getSpeech(): ?Speech + { + return $this->speech; + } } diff --git a/src/ai-bundle/config/options.php b/src/ai-bundle/config/options.php index 9e09b5226..44e704ffd 100644 --- a/src/ai-bundle/config/options.php +++ b/src/ai-bundle/config/options.php @@ -960,6 +960,18 @@ ->end() ->end() ->end() + ->arrayNode('voice') + ->children() + ->arrayNode('eleven_labs') + ->useAttributeAsKey('name') + ->arrayPrototype() + ->children() + ->stringNode('model')->cannotBeEmpty()->end() + ->end() + ->end() + ->end() + ->end() + ->end() ->arrayNode('vectorizer') ->info('Vectorizers for converting strings to Vector objects and transforming TextDocument arrays to VectorDocument arrays') ->useAttributeAsKey('name') diff --git a/src/ai-bundle/src/AiBundle.php b/src/ai-bundle/src/AiBundle.php index a15fafe21..35188e720 100644 --- a/src/ai-bundle/src/AiBundle.php +++ b/src/ai-bundle/src/AiBundle.php @@ -54,6 +54,7 @@ use Symfony\AI\Platform\Bridge\Cerebras\PlatformFactory as CerebrasPlatformFactory; use Symfony\AI\Platform\Bridge\DeepSeek\PlatformFactory as DeepSeekPlatformFactory; use Symfony\AI\Platform\Bridge\DockerModelRunner\PlatformFactory as DockerModelRunnerPlatformFactory; +use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider; use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory; use Symfony\AI\Platform\Bridge\Gemini\PlatformFactory as GeminiPlatformFactory; use Symfony\AI\Platform\Bridge\HuggingFace\PlatformFactory as HuggingFacePlatformFactory; @@ -75,6 +76,8 @@ use Symfony\AI\Platform\Platform; use Symfony\AI\Platform\PlatformInterface; use Symfony\AI\Platform\ResultConverterInterface; +use Symfony\AI\Platform\Speech\SpeechConfiguration; +use Symfony\AI\Platform\Speech\SpeechProviderInterface; use Symfony\AI\Store\Bridge\Azure\SearchStore as AzureSearchStore; use Symfony\AI\Store\Bridge\ChromaDb\Store as ChromaDbStore; use Symfony\AI\Store\Bridge\ClickHouse\Store as ClickHouseStore; @@ -248,6 +251,15 @@ public function loadExtension(array $config, ContainerConfigurator $container, C } } + foreach ($config['voice'] as $voiceProvider => $provider) { + $this->processSpeechConfig($voiceProvider, $provider, $builder); + } + + $speechProviders = array_keys($builder->findTaggedServiceIds('ai.speech_provider')); + if ([] === $speechProviders) { + $builder->removeDefinition('ai.speech_provider.listener'); + } + foreach ($config['vectorizer'] ?? [] as $vectorizerName => $vectorizer) { $this->processVectorizerConfig($vectorizerName, $vectorizer, $builder); } @@ -414,11 +426,9 @@ private function processPlatformConfig(string $type, array $platform, ContainerB } if ('eleven_labs' === $type) { - $platformId = 'ai.platform.eleven_labs'; $definition = (new Definition(Platform::class)) ->setFactory(ElevenLabsPlatformFactory::class.'::create') ->setLazy(true) - ->addTag('proxy', ['interface' => PlatformInterface::class]) ->setArguments([ $platform['api_key'], $platform['host'], @@ -427,9 +437,10 @@ private function processPlatformConfig(string $type, array $platform, ContainerB null, new Reference('event_dispatcher'), ]) + ->addTag('proxy', ['interface' => PlatformInterface::class]) ->addTag('ai.platform', ['name' => 'eleven_labs']); - $container->setDefinition($platformId, $definition); + $container->setDefinition('ai.platform.eleven_labs', $definition); return; } @@ -917,8 +928,9 @@ private function processAgentConfig(string $name, array $config, ContainerBuilde $agentDefinition ->setArgument(2, []) // placeholder until ProcessorCompilerPass process. ->setArgument(3, []) // placeholder until ProcessorCompilerPass process. - ->setArgument(4, $name) - ->setArgument(5, new Reference('logger', ContainerInterface::IGNORE_ON_INVALID_REFERENCE)) + ->setArgument(4, []) // placeholder until VoiceProviderCompilerPass process. + ->setArgument(5, $name) + ->setArgument(6, new Reference('logger', ContainerInterface::IGNORE_ON_INVALID_REFERENCE)) ; $container->setDefinition($agentId, $agentDefinition); @@ -1760,6 +1772,40 @@ private function processChatConfig(string $name, array $configuration, Container $container->registerAliasForArgument('ai.chat.'.$name, ChatInterface::class, $name); } + /** + * @param array $providers + */ + private function processSpeechConfig(string $name, array $providers, ContainerBuilder $container): void + { + if ('eleven_labs' === $name) { + foreach ($providers as $config) { + $configurationDefinition = new Definition(SpeechConfiguration::class); + $configurationDefinition + ->setLazy(true) + ->setArguments([ + $config['tts_model'], + $config['tts_voice'], + $config['stt_model'], + ]); + + $container->setDefinition('ai.speech.eleven_labs.configuration', $configurationDefinition); + + $definition = new Definition(ElevenLabsSpeechProvider::class); + $definition + ->setLazy(true) + ->setArguments([ + new Reference('ai.platform.eleven_labs'), + new Reference('ai.speech.eleven_labs.configuration'), + ]) + ->addTag('proxy', ['interface' => SpeechProviderInterface::class]) + ->addTag('kernel.event_subscriber') + ->addTag('ai.speech_provider'); + + $container->setDefinition('ai.speech.eleven_labs.'.$name, $definition); + } + } + } + /** * @param array $config */ diff --git a/src/ai-bundle/src/DependencyInjection/VoiceProviderCompilerPass.php b/src/ai-bundle/src/DependencyInjection/VoiceProviderCompilerPass.php new file mode 100644 index 000000000..48b87a8ae --- /dev/null +++ b/src/ai-bundle/src/DependencyInjection/VoiceProviderCompilerPass.php @@ -0,0 +1,26 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\AiBundle\DependencyInjection; + +use Symfony\Component\DependencyInjection\Compiler\CompilerPassInterface; +use Symfony\Component\DependencyInjection\ContainerBuilder; + +/** + * @author Guillaume Loulier + */ +final class VoiceProviderCompilerPass implements CompilerPassInterface +{ + public function process(ContainerBuilder $container): void + { + $voiceProviders = $container->findTaggedServiceIds('ai.voice.provider'); + } +} diff --git a/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechProvider.php b/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechProvider.php new file mode 100644 index 000000000..6582fedd4 --- /dev/null +++ b/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechProvider.php @@ -0,0 +1,46 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\ElevenLabs; + +use Symfony\AI\Platform\Platform; +use Symfony\AI\Platform\Result\DeferredResult; +use Symfony\AI\Platform\Speech\Speech; +use Symfony\AI\Platform\Speech\SpeechConfiguration; +use Symfony\AI\Platform\Speech\SpeechProviderInterface; + +/** + * @author Guillaume Loulier + */ +final class ElevenLabsSpeechProvider implements SpeechProviderInterface +{ + public const ELEVEN_LABS_STT_MODEL = 'eleven_labs.enable_tts'; + + public function __construct( + private readonly Platform $platform, + private readonly SpeechConfiguration $speechConfiguration, + ) { + } + + public function addSpeech(DeferredResult $result, array $options): void + { + unset($options[self::ELEVEN_LABS_STT_MODEL]); + + $speechResult = $this->platform->invoke($this->speechConfiguration->ttsModel, $result->asText(), $options); + + $result->setSpeech(new Speech($result->asText(), $speechResult)); + } + + public function support(DeferredResult $result, array $options): bool + { + return $options[self::ELEVEN_LABS_STT_MODEL] ?? false; + } +} diff --git a/src/platform/src/Result/BaseResult.php b/src/platform/src/Result/BaseResult.php index fb447594d..69e68380d 100644 --- a/src/platform/src/Result/BaseResult.php +++ b/src/platform/src/Result/BaseResult.php @@ -12,6 +12,7 @@ namespace Symfony\AI\Platform\Result; use Symfony\AI\Platform\Metadata\MetadataAwareTrait; +use Symfony\AI\Platform\Speech\SpeechAwareTrait; /** * Base result of converted result classes. @@ -22,4 +23,5 @@ abstract class BaseResult implements ResultInterface { use MetadataAwareTrait; use RawResultAwareTrait; + use SpeechAwareTrait; } diff --git a/src/platform/src/Result/DeferredResult.php b/src/platform/src/Result/DeferredResult.php index ea9ce05cd..f19629070 100644 --- a/src/platform/src/Result/DeferredResult.php +++ b/src/platform/src/Result/DeferredResult.php @@ -15,6 +15,7 @@ use Symfony\AI\Platform\Exception\UnexpectedResultTypeException; use Symfony\AI\Platform\Metadata\MetadataAwareTrait; use Symfony\AI\Platform\ResultConverterInterface; +use Symfony\AI\Platform\Speech\SpeechAwareTrait; use Symfony\AI\Platform\Vector\Vector; /** @@ -23,6 +24,7 @@ final class DeferredResult { use MetadataAwareTrait; + use SpeechAwareTrait; private bool $isConverted = false; private ResultInterface $convertedResult; @@ -132,6 +134,14 @@ public function asToolCalls(): array return $this->as(ToolCallResult::class)->getContent(); } + /** + * @throws ExceptionInterface + */ + public function asVoice(): string + { + return $this->as(VoiceResult::class)->getContent(); + } + /** * @param class-string $type * diff --git a/src/platform/src/Result/ResultInterface.php b/src/platform/src/Result/ResultInterface.php index 63ac7a435..546ce7af4 100644 --- a/src/platform/src/Result/ResultInterface.php +++ b/src/platform/src/Result/ResultInterface.php @@ -33,4 +33,6 @@ public function getRawResult(): ?RawResultInterface; * @throws RawResultAlreadySetException if the result is tried to be set more than once */ public function setRawResult(RawResultInterface $rawResult): void; + + public function getSpeech(): string; } diff --git a/src/platform/src/Speech/Speech.php b/src/platform/src/Speech/Speech.php new file mode 100644 index 000000000..4ec2886d5 --- /dev/null +++ b/src/platform/src/Speech/Speech.php @@ -0,0 +1,26 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +use Symfony\AI\Platform\Result\DeferredResult; + +/** + * @author Guillaume Loulier + */ +final class Speech +{ + public function __construct( + public readonly string|array $payload, + public DeferredResult $result, + ) { + } +} diff --git a/src/platform/src/Speech/SpeechAwareTrait.php b/src/platform/src/Speech/SpeechAwareTrait.php new file mode 100644 index 000000000..769da4427 --- /dev/null +++ b/src/platform/src/Speech/SpeechAwareTrait.php @@ -0,0 +1,30 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +/** + * @author Guillaume Loulier + */ +trait SpeechAwareTrait +{ + private Speech $speech; + + public function setSpeech(Speech $speech): void + { + $this->speech = $speech; + } + + public function getSpeech(): string + { + return $this->speech->result->asBinary(); + } +} diff --git a/src/platform/src/Speech/SpeechConfiguration.php b/src/platform/src/Speech/SpeechConfiguration.php new file mode 100644 index 000000000..5f8b6c502 --- /dev/null +++ b/src/platform/src/Speech/SpeechConfiguration.php @@ -0,0 +1,25 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +/** + * @author Guillaume Loulier + */ +final class SpeechConfiguration +{ + public function __construct( + public readonly ?string $ttsModel = null, + public readonly ?string $ttsVoice = null, + public readonly ?string $sttModel = null, + ) { + } +} diff --git a/src/platform/src/Speech/SpeechListenerInterface.php b/src/platform/src/Speech/SpeechListenerInterface.php new file mode 100644 index 000000000..eb3d3cca7 --- /dev/null +++ b/src/platform/src/Speech/SpeechListenerInterface.php @@ -0,0 +1,22 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +/** + * @author Guillaume Loulier + */ +interface SpeechListenerInterface +{ + public function listen(array|string|object $input, array $options): Speech; + + public function supportListening(array|string|object $input, array $options): bool; +} diff --git a/src/platform/src/Speech/SpeechProviderInterface.php b/src/platform/src/Speech/SpeechProviderInterface.php new file mode 100644 index 000000000..24a9a4824 --- /dev/null +++ b/src/platform/src/Speech/SpeechProviderInterface.php @@ -0,0 +1,24 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +use Symfony\AI\Platform\Result\DeferredResult; + +/** + * @author Guillaume Loulier + */ +interface SpeechProviderInterface +{ + public function addSpeech(DeferredResult $result, array $options): void; + + public function support(DeferredResult $result, array $options): bool; +} diff --git a/src/platform/src/Speech/SpeechProviderListener.php b/src/platform/src/Speech/SpeechProviderListener.php new file mode 100644 index 000000000..96a1f7da1 --- /dev/null +++ b/src/platform/src/Speech/SpeechProviderListener.php @@ -0,0 +1,50 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Speech; + +use Symfony\AI\Platform\Event\ResultEvent; +use Symfony\Component\EventDispatcher\EventSubscriberInterface; + +/** + * @author Guillaume Loulier + */ +final class SpeechProviderListener implements EventSubscriberInterface +{ + /** + * @param SpeechProviderInterface[] $speechProviders + */ + public function __construct( + private readonly iterable $speechProviders, + ) { + } + + public static function getSubscribedEvents(): array + { + return [ + ResultEvent::class => 'processResult', + ]; + } + + public function processResult(ResultEvent $event): void + { + $result = $event->getDeferredResult(); + $options = $event->getOptions(); + + foreach ($this->speechProviders as $speechProvider) { + if (!$speechProvider->support($result, $options)) { + continue; + } + + $speechProvider->addSpeech($result, $options); + } + } +} diff --git a/src/platform/tests/Bridge/ElevenLabs/ElevenLabsVoiceProviderTest.php b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsVoiceProviderTest.php new file mode 100644 index 000000000..ac5c74dd5 --- /dev/null +++ b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsVoiceProviderTest.php @@ -0,0 +1,18 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Bridge\ElevenLabs; + +use PHPUnit\Framework\TestCase; + +final class ElevenLabsVoiceProviderTest extends TestCase +{ +} diff --git a/src/platform/tests/Speech/SpeechProviderListenerTest.php b/src/platform/tests/Speech/SpeechProviderListenerTest.php new file mode 100644 index 000000000..1fa01b222 --- /dev/null +++ b/src/platform/tests/Speech/SpeechProviderListenerTest.php @@ -0,0 +1,25 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Speech; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Event\ResultEvent; +use Symfony\AI\Platform\Speech\SpeechProviderListener; + +final class SpeechProviderListenerTest extends TestCase +{ + public function testListenerIsConfigured() + { + $this->assertArrayHasKey(ResultEvent::class, SpeechProviderListener::getSubscribedEvents()); + $this->assertSame('processResult', SpeechProviderListener::getSubscribedEvents()[ResultEvent::class]); + } +}