Skip to content

Commit 21211e8

Browse files
[Feat] Implement keyword search in Qdrant
This commit implements keyword search in Qdrant. Signed-off-by: Varsha Prasad Narsing <[email protected]>
1 parent ef02b9e commit 21211e8

File tree

4 files changed

+156
-23
lines changed

4 files changed

+156
-23
lines changed

llama_stack/providers/remote/vector_io/qdrant/qdrant.py

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -128,13 +128,43 @@ async def query_vector(self, embedding: NDArray, k: int, score_threshold: float)
128128

129129
return QueryChunksResponse(chunks=chunks, scores=scores)
130130

131-
async def query_keyword(
132-
self,
133-
query_string: str,
134-
k: int,
135-
score_threshold: float,
136-
) -> QueryChunksResponse:
137-
raise NotImplementedError("Keyword search is not supported in Qdrant")
131+
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
132+
try:
133+
results = (
134+
await self.client.query_points(
135+
collection_name=self.collection_name,
136+
query_filter=models.Filter(
137+
must=[
138+
models.FieldCondition(
139+
key="chunk_content.content", match=models.MatchText(text=query_string)
140+
)
141+
]
142+
),
143+
limit=k,
144+
with_payload=True,
145+
with_vectors=False,
146+
score_threshold=score_threshold,
147+
)
148+
).points
149+
except Exception as e:
150+
log.error(f"Error querying keyword search in Qdrant collection {self.collection_name}: {e}")
151+
raise
152+
153+
chunks, scores = [], []
154+
for point in results:
155+
assert isinstance(point, models.ScoredPoint)
156+
assert point.payload is not None
157+
158+
try:
159+
chunk = Chunk(**point.payload["chunk_content"])
160+
except Exception:
161+
log.exception("Failed to parse chunk")
162+
continue
163+
164+
chunks.append(chunk)
165+
scores.append(point.score)
166+
167+
return QueryChunksResponse(chunks=chunks, scores=scores)
138168

139169
async def query_hybrid(
140170
self,

tests/integration/vector_io/test_openai_vector_stores.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
5555
],
5656
"keyword": [
5757
"inline::sqlite-vec",
58+
"inline::qdrant",
59+
"remote::qdrant",
5860
"remote::milvus",
5961
"inline::milvus",
6062
"remote::pgvector",

tests/unit/providers/vector_io/conftest.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
# This source code is licensed under the terms described in the LICENSE file in
55
# the root directory of this source tree.
66

7+
import os
78
import random
9+
import tempfile
810
from unittest.mock import AsyncMock, MagicMock, patch
911

1012
import numpy as np
@@ -18,7 +20,7 @@
1820
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
1921
from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
2022
from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig, SqliteKVStoreConfig
21-
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig
23+
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
2224
from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig
2325
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
2426
from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex, ChromaVectorIOAdapter, maybe_await
@@ -32,7 +34,7 @@
3234
MILVUS_ALIAS = "test_milvus"
3335

3436

35-
@pytest.fixture(params=["milvus", "sqlite_vec", "faiss", "chroma", "pgvector"])
37+
@pytest.fixture(params=["milvus", "sqlite_vec", "faiss", "chroma", "qdrant", "pgvector"])
3638
def vector_provider(request):
3739
return request.param
3840

@@ -286,40 +288,53 @@ async def chroma_vec_adapter(chroma_vec_db_path, mock_inference_api, embedding_d
286288

287289

288290
@pytest.fixture
289-
def qdrant_vec_db_path(tmp_path_factory):
291+
def qdrant_vec_db_path(tmp_path):
292+
"""Use tmp_path with additional isolation to ensure unique path per test."""
290293
import uuid
291294

292-
db_path = str(tmp_path_factory.getbasetemp() / f"test_qdrant_{uuid.uuid4()}.db")
293-
return db_path
295+
# Create a completely isolated temporary directory
296+
temp_dir = tempfile.mkdtemp(prefix=f"qdrant_test_{uuid.uuid4()}_")
297+
return temp_dir
294298

295299

296300
@pytest.fixture
297301
async def qdrant_vec_adapter(qdrant_vec_db_path, mock_inference_api, embedding_dimension):
302+
import shutil
298303
import uuid
299304

300-
config = QdrantVectorIOConfig(
301-
db_path=qdrant_vec_db_path,
305+
config = InlineQdrantVectorIOConfig(
306+
path=qdrant_vec_db_path,
302307
kvstore=SqliteKVStoreConfig(),
303308
)
304309
adapter = QdrantVectorIOAdapter(
305310
config=config,
306311
inference_api=mock_inference_api,
307312
files_api=None,
308313
)
309-
collection_id = f"qdrant_test_collection_{uuid.uuid4()}"
314+
315+
original_initialize = adapter.initialize
316+
317+
async def safe_initialize():
318+
if not hasattr(adapter, "_initialized") or not adapter._initialized:
319+
await original_initialize()
320+
adapter._initialized = True
321+
322+
adapter.initialize = safe_initialize
310323
await adapter.initialize()
311-
await adapter.register_vector_db(
312-
VectorDB(
313-
identifier=collection_id,
314-
provider_id="test_provider",
315-
embedding_model="test_model",
316-
embedding_dimension=embedding_dimension,
317-
)
318-
)
324+
325+
collection_id = f"qdrant_test_collection_{uuid.uuid4()}"
319326
adapter.test_collection_id = collection_id
327+
adapter._test_db_path = qdrant_vec_db_path
320328
yield adapter
329+
321330
await adapter.shutdown()
322331

332+
try:
333+
if os.path.exists(qdrant_vec_db_path):
334+
shutil.rmtree(qdrant_vec_db_path, ignore_errors=True)
335+
except Exception:
336+
pass
337+
323338

324339
@pytest.fixture
325340
async def qdrant_vec_index(qdrant_vec_db_path, embedding_dimension):

tests/unit/providers/vector_io/test_qdrant.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,89 @@ async def test_qdrant_register_and_unregister_vector_db(
145145
await qdrant_adapter.unregister_vector_db(vector_db_id)
146146
assert not (await qdrant_adapter.client.collection_exists(vector_db_id))
147147
assert len((await qdrant_adapter.client.get_collections()).collections) == 0
148+
149+
150+
# Keyword search tests
151+
async def test_query_chunks_keyword_search(qdrant_vec_index, sample_chunks, sample_embeddings):
152+
"""Test keyword search functionality in Qdrant."""
153+
await qdrant_vec_index.add_chunks(sample_chunks, sample_embeddings)
154+
query_string = "Sentence 5"
155+
response = await qdrant_vec_index.query_keyword(query_string=query_string, k=3, score_threshold=0.0)
156+
157+
assert isinstance(response, QueryChunksResponse)
158+
assert len(response.chunks) > 0, f"Expected some chunks, but got {len(response.chunks)}"
159+
160+
non_existent_query_str = "blablabla"
161+
response_no_results = await qdrant_vec_index.query_keyword(
162+
query_string=non_existent_query_str, k=1, score_threshold=0.0
163+
)
164+
165+
assert isinstance(response_no_results, QueryChunksResponse)
166+
assert len(response_no_results.chunks) == 0, f"Expected 0 results, but got {len(response_no_results.chunks)}"
167+
168+
169+
async def test_query_chunks_keyword_search_k_greater_than_results(qdrant_vec_index, sample_chunks, sample_embeddings):
170+
"""Test keyword search when k is greater than available results."""
171+
await qdrant_vec_index.add_chunks(sample_chunks, sample_embeddings)
172+
173+
query_str = "Sentence 1 from document 0" # Should match only one chunk
174+
response = await qdrant_vec_index.query_keyword(k=5, score_threshold=0.0, query_string=query_str)
175+
176+
assert isinstance(response, QueryChunksResponse)
177+
assert 0 < len(response.chunks) < 5, f"Expected results between [1, 4], got {len(response.chunks)}"
178+
assert any("Sentence 1 from document 0" in chunk.content for chunk in response.chunks), "Expected chunk not found"
179+
180+
181+
async def test_query_chunks_keyword_search_score_threshold(qdrant_vec_index, sample_chunks, sample_embeddings):
182+
"""Test keyword search with score threshold filtering."""
183+
await qdrant_vec_index.add_chunks(sample_chunks, sample_embeddings)
184+
185+
query_string = "Sentence 5"
186+
187+
# Test with low threshold (should return results)
188+
response_low_threshold = await qdrant_vec_index.query_keyword(query_string=query_string, k=3, score_threshold=0.0)
189+
assert len(response_low_threshold.chunks) > 0
190+
191+
# Test with negative threshold (should return results since scores are 0.0)
192+
response_negative_threshold = await qdrant_vec_index.query_keyword(
193+
query_string=query_string, k=3, score_threshold=-1.0
194+
)
195+
assert len(response_negative_threshold.chunks) > 0
196+
197+
198+
async def test_query_chunks_keyword_search_edge_cases(qdrant_vec_index, sample_chunks, sample_embeddings):
199+
"""Test keyword search edge cases."""
200+
await qdrant_vec_index.add_chunks(sample_chunks, sample_embeddings)
201+
202+
# Test with empty string
203+
response_empty = await qdrant_vec_index.query_keyword(query_string="", k=3, score_threshold=0.0)
204+
assert isinstance(response_empty, QueryChunksResponse)
205+
206+
# Test with very long query string
207+
long_query = "a" * 1000
208+
response_long = await qdrant_vec_index.query_keyword(query_string=long_query, k=3, score_threshold=0.0)
209+
assert isinstance(response_long, QueryChunksResponse)
210+
211+
# Test with special characters
212+
special_query = "!@#$%^&*()_+-=[]{}|;':\",./<>?"
213+
response_special = await qdrant_vec_index.query_keyword(query_string=special_query, k=3, score_threshold=0.0)
214+
assert isinstance(response_special, QueryChunksResponse)
215+
216+
217+
async def test_query_chunks_keyword_search_metadata_preservation(
218+
qdrant_vec_index, sample_chunks_with_metadata, sample_embeddings_with_metadata
219+
):
220+
"""Test that keyword search preserves chunk metadata."""
221+
await qdrant_vec_index.add_chunks(sample_chunks_with_metadata, sample_embeddings_with_metadata)
222+
223+
query_string = "Sentence 0"
224+
response = await qdrant_vec_index.query_keyword(query_string=query_string, k=2, score_threshold=0.0)
225+
226+
assert len(response.chunks) > 0
227+
for chunk in response.chunks:
228+
# Check that metadata is preserved
229+
assert hasattr(chunk, "metadata") or hasattr(chunk, "chunk_metadata")
230+
if hasattr(chunk, "chunk_metadata") and chunk.chunk_metadata:
231+
assert chunk.chunk_metadata.document_id is not None
232+
assert chunk.chunk_metadata.chunk_id is not None
233+
assert chunk.chunk_metadata.source is not None

0 commit comments

Comments
 (0)