Skip to content

Commit d0ee93c

Browse files
committed
refactor(aibundle): improvements on vector stores
1 parent 09215b4 commit d0ee93c

File tree

4 files changed

+2239
-483
lines changed

4 files changed

+2239
-483
lines changed

src/ai-bundle/composer.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@
2727
"symfony/string": "^7.3|^8.0"
2828
},
2929
"require-dev": {
30+
"codewithkyrian/chromadb-php": "^0.2.1 || ^0.3 || ^0.4",
3031
"google/auth": "^1.47",
3132
"mongodb/mongodb": "^1.21 || ^2.0",
3233
"phpstan/phpstan": "^2.1",
3334
"phpstan/phpstan-strict-rules": "^2.0",
3435
"phpunit/phpunit": "^11.5",
36+
"probots-io/pinecone-php": "^1.0",
3537
"symfony/expression-language": "^7.3|^8.0",
3638
"symfony/security-core": "^7.3|^8.0",
3739
"symfony/translation": "^7.3|^8.0"

src/ai-bundle/config/options.php

Lines changed: 87 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@
442442
->treatNullLike(['enabled' => true])
443443
->beforeNormalization()
444444
->ifArray()
445-
->then(function (array $v) {
445+
->then(function (array $v): array {
446446
return [
447447
'enabled' => $v['enabled'] ?? true,
448448
'services' => $v['services'] ?? $v,
@@ -564,7 +564,7 @@
564564
->stringNode('table')->isRequired()->cannotBeEmpty()->end()
565565
->end()
566566
->validate()
567-
->ifTrue(static fn ($v) => !isset($v['dsn']) && !isset($v['http_client']))
567+
->ifTrue(static fn ($v): bool => !isset($v['dsn']) && !isset($v['http_client']))
568568
->thenInvalid('Either "dsn" or "http_client" must be configured.')
569569
->end()
570570
->end()
@@ -576,8 +576,11 @@
576576
->stringNode('account_id')->cannotBeEmpty()->end()
577577
->stringNode('api_key')->cannotBeEmpty()->end()
578578
->stringNode('index_name')->cannotBeEmpty()->end()
579-
->integerNode('dimensions')->end()
580-
->stringNode('metric')->end()
579+
->integerNode('dimensions')->isRequired()->end()
580+
->stringNode('metric')
581+
->cannotBeEmpty()
582+
->defaultValue('cosine')
583+
->end()
581584
->stringNode('endpoint_url')->end()
582585
->end()
583586
->end()
@@ -588,24 +591,40 @@
588591
->children()
589592
->stringNode('endpoint')->cannotBeEmpty()->end()
590593
->stringNode('table')->cannotBeEmpty()->end()
591-
->stringNode('field')->end()
592-
->stringNode('type')->end()
593-
->stringNode('similarity')->end()
594-
->integerNode('dimensions')->end()
594+
->stringNode('field')->cannotBeEmpty()->end()
595+
->stringNode('type')->cannotBeEmpty()->end()
596+
->stringNode('similarity')->cannotBeEmpty()->end()
597+
->integerNode('dimensions')->isRequired()->end()
595598
->stringNode('quantization')->end()
596599
->end()
597600
->end()
598601
->end()
602+
->arrayNode('mariadb')
603+
->useAttributeAsKey('name')
604+
->arrayPrototype()
605+
->children()
606+
->stringNode('connection')->cannotBeEmpty()->end()
607+
->stringNode('table_name')->cannotBeEmpty()->end()
608+
->stringNode('index_name')->cannotBeEmpty()->end()
609+
->stringNode('vector_field_name')->cannotBeEmpty()->end()
610+
->arrayNode('setup_options')
611+
->children()
612+
->integerNode('dimensions')->end()
613+
->end()
614+
->end()
615+
->end()
616+
->end()
617+
->end()
599618
->arrayNode('meilisearch')
600619
->useAttributeAsKey('name')
601620
->arrayPrototype()
602621
->children()
603622
->stringNode('endpoint')->cannotBeEmpty()->end()
604623
->stringNode('api_key')->cannotBeEmpty()->end()
605624
->stringNode('index_name')->cannotBeEmpty()->end()
606-
->stringNode('embedder')->end()
607-
->stringNode('vector_field')->end()
608-
->integerNode('dimensions')->end()
625+
->stringNode('embedder')->cannotBeEmpty()->end()
626+
->stringNode('vector_field')->cannotBeEmpty()->end()
627+
->integerNode('dimensions')->isRequired()->end()
609628
->floatNode('semantic_ratio')
610629
->info('The ratio between semantic (vector) and full-text search (0.0 to 1.0). Default: 1.0 (100% semantic)')
611630
->defaultValue(1.0)
@@ -623,22 +642,6 @@
623642
->end()
624643
->end()
625644
->end()
626-
->arrayNode('mariadb')
627-
->useAttributeAsKey('name')
628-
->arrayPrototype()
629-
->children()
630-
->stringNode('connection')->cannotBeEmpty()->end()
631-
->stringNode('table_name')->cannotBeEmpty()->end()
632-
->stringNode('index_name')->cannotBeEmpty()->end()
633-
->stringNode('vector_field_name')->cannotBeEmpty()->end()
634-
->arrayNode('setup_options')
635-
->children()
636-
->integerNode('dimensions')->end()
637-
->end()
638-
->end()
639-
->end()
640-
->end()
641-
->end()
642645
->arrayNode('milvus')
643646
->useAttributeAsKey('name')
644647
->arrayPrototype()
@@ -647,8 +650,8 @@
647650
->stringNode('api_key')->isRequired()->end()
648651
->stringNode('database')->isRequired()->end()
649652
->stringNode('collection')->isRequired()->end()
650-
->stringNode('vector_field')->end()
651-
->integerNode('dimensions')->end()
653+
->stringNode('vector_field')->isRequired()->end()
654+
->integerNode('dimensions')->isRequired()->end()
652655
->stringNode('metric_type')->end()
653656
->end()
654657
->end()
@@ -664,7 +667,7 @@
664667
->stringNode('database')->isRequired()->end()
665668
->stringNode('collection')->isRequired()->end()
666669
->stringNode('index_name')->isRequired()->end()
667-
->stringNode('vector_field')->end()
670+
->stringNode('vector_field')->isRequired()->end()
668671
->booleanNode('bulk_write')->end()
669672
->end()
670673
->end()
@@ -679,9 +682,9 @@
679682
->stringNode('database')->cannotBeEmpty()->end()
680683
->stringNode('vector_index_name')->cannotBeEmpty()->end()
681684
->stringNode('node_name')->cannotBeEmpty()->end()
682-
->stringNode('vector_field')->end()
683-
->integerNode('dimensions')->end()
684-
->stringNode('distance')->end()
685+
->stringNode('vector_field')->isRequired()->end()
686+
->integerNode('dimensions')->isRequired()->end()
687+
->stringNode('distance')->isRequired()->end()
685688
->booleanNode('quantization')->end()
686689
->end()
687690
->end()
@@ -696,21 +699,49 @@
696699
->end()
697700
->stringNode('namespace')->end()
698701
->arrayNode('filter')
699-
->scalarPrototype()->end()
702+
->scalarPrototype()
703+
->defaultValue([])
704+
->end()
700705
->end()
701706
->integerNode('top_k')->end()
702707
->end()
703708
->end()
704709
->end()
710+
->arrayNode('postgres')
711+
->useAttributeAsKey('name')
712+
->arrayPrototype()
713+
->children()
714+
->stringNode('dsn')->cannotBeEmpty()->end()
715+
->stringNode('username')->end()
716+
->stringNode('password')->end()
717+
->stringNode('table_name')->isRequired()->end()
718+
->stringNode('vector_field')->isRequired()->end()
719+
->enumNode('distance')
720+
->info('Distance metric to use for vector similarity search')
721+
->enumFqcn(PostgresDistance::class)
722+
->defaultValue(PostgresDistance::L2)
723+
->end()
724+
->stringNode('dbal_connection')->cannotBeEmpty()->end()
725+
->end()
726+
->validate()
727+
->ifTrue(static fn (array $v): bool => !isset($v['dsn']) && !isset($v['dbal_connection']))
728+
->thenInvalid('Either "dsn" or "dbal_connection" must be configured.')
729+
->end()
730+
->validate()
731+
->ifTrue(static fn (array $v): bool => isset($v['dsn'], $v['dbal_connection']))
732+
->thenInvalid('Either "dsn" or "dbal_connection" can be configured, but not both.')
733+
->end()
734+
->end()
735+
->end()
705736
->arrayNode('qdrant')
706737
->useAttributeAsKey('name')
707738
->arrayPrototype()
708739
->children()
709740
->stringNode('endpoint')->cannotBeEmpty()->end()
710741
->stringNode('api_key')->cannotBeEmpty()->end()
711742
->stringNode('collection_name')->cannotBeEmpty()->end()
712-
->integerNode('dimensions')->end()
713-
->stringNode('distance')->end()
743+
->integerNode('dimensions')->isRequired()->end()
744+
->stringNode('distance')->isRequired()->end()
714745
->booleanNode('async')->end()
715746
->end()
716747
->end()
@@ -736,32 +767,15 @@
736767
->end()
737768
->end()
738769
->validate()
739-
->ifTrue(static fn ($v) => !isset($v['connection_parameters']) && !isset($v['client']))
770+
->ifTrue(static fn (array $v): bool => !isset($v['connection_parameters']) && !isset($v['client']))
740771
->thenInvalid('Either "connection_parameters" or "client" must be configured.')
741772
->end()
742773
->validate()
743-
->ifTrue(static fn ($v) => isset($v['connection_parameters']) && isset($v['client']))
774+
->ifTrue(static fn (array $v): bool => isset($v['connection_parameters']) && isset($v['client']))
744775
->thenInvalid('Either "connection_parameters" or "client" can be configured, but not both.')
745776
->end()
746777
->end()
747778
->end()
748-
->arrayNode('surreal_db')
749-
->useAttributeAsKey('name')
750-
->arrayPrototype()
751-
->children()
752-
->stringNode('endpoint')->cannotBeEmpty()->end()
753-
->stringNode('username')->cannotBeEmpty()->end()
754-
->stringNode('password')->cannotBeEmpty()->end()
755-
->stringNode('namespace')->cannotBeEmpty()->end()
756-
->stringNode('database')->cannotBeEmpty()->end()
757-
->stringNode('table')->end()
758-
->stringNode('vector_field')->end()
759-
->stringNode('strategy')->end()
760-
->integerNode('dimensions')->end()
761-
->booleanNode('namespaced_user')->end()
762-
->end()
763-
->end()
764-
->end()
765779
->arrayNode('supabase')
766780
->useAttributeAsKey('name')
767781
->arrayPrototype()
@@ -780,51 +794,42 @@
780794
->end()
781795
->end()
782796
->end()
783-
->arrayNode('typesense')
797+
->arrayNode('surrealdb')
784798
->useAttributeAsKey('name')
785799
->arrayPrototype()
786800
->children()
787801
->stringNode('endpoint')->cannotBeEmpty()->end()
788-
->stringNode('api_key')->isRequired()->end()
789-
->stringNode('collection')->isRequired()->end()
790-
->stringNode('vector_field')->end()
791-
->integerNode('dimensions')->end()
802+
->stringNode('username')->cannotBeEmpty()->end()
803+
->stringNode('password')->cannotBeEmpty()->end()
804+
->stringNode('namespace')->cannotBeEmpty()->end()
805+
->stringNode('database')->cannotBeEmpty()->end()
806+
->stringNode('table')->isRequired()->end()
807+
->stringNode('vector_field')->isRequired()->end()
808+
->stringNode('strategy')->isRequired()->end()
809+
->integerNode('dimensions')->isRequired()->end()
810+
->booleanNode('namespaced_user')->end()
792811
->end()
793812
->end()
794813
->end()
795-
->arrayNode('weaviate')
814+
->arrayNode('typesense')
796815
->useAttributeAsKey('name')
797816
->arrayPrototype()
798817
->children()
799818
->stringNode('endpoint')->cannotBeEmpty()->end()
800819
->stringNode('api_key')->isRequired()->end()
801820
->stringNode('collection')->isRequired()->end()
821+
->stringNode('vector_field')->isRequired()->end()
822+
->integerNode('dimensions')->isRequired()->end()
802823
->end()
803824
->end()
804825
->end()
805-
->arrayNode('postgres')
826+
->arrayNode('weaviate')
806827
->useAttributeAsKey('name')
807828
->arrayPrototype()
808829
->children()
809-
->stringNode('dsn')->cannotBeEmpty()->end()
810-
->stringNode('username')->end()
811-
->stringNode('password')->end()
812-
->stringNode('table_name')->isRequired()->end()
813-
->stringNode('vector_field')->end()
814-
->enumNode('distance')
815-
->info('Distance metric to use for vector similarity search')
816-
->enumFqcn(PostgresDistance::class)
817-
->defaultValue(PostgresDistance::L2)
818-
->end()
819-
->stringNode('dbal_connection')->cannotBeEmpty()->end()
820-
->end()
821-
->validate()
822-
->ifTrue(static fn ($v) => !isset($v['dsn']) && !isset($v['dbal_connection']))
823-
->thenInvalid('Either "dsn" or "dbal_connection" must be configured.')
824-
->end()
825-
->validate()
826-
->ifTrue(static fn ($v) => isset($v['dsn'], $v['dbal_connection']))
827-
->thenInvalid('Either "dsn" or "dbal_connection" can be configured, but not both.')
830+
->stringNode('endpoint')->cannotBeEmpty()->end()
831+
->stringNode('api_key')->isRequired()->end()
832+
->stringNode('collection')->isRequired()->end()
828833
->end()
829834
->end()
830835
->end()
@@ -933,7 +938,7 @@
933938
->end()
934939
->end()
935940
->end()
936-
->arrayNode('surreal_db')
941+
->arrayNode('surrealdb')
937942
->useAttributeAsKey('name')
938943
->arrayPrototype()
939944
->children()

0 commit comments

Comments
 (0)