Skip to content

Commit db6f7fd

Browse files
committed
refactor: Separate file
1 parent 12aeebe commit db6f7fd

File tree

4 files changed

+66
-3
lines changed

4 files changed

+66
-3
lines changed

chromadb/test/ef/test_ef.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def test_get_builtins_holds() -> None:
3030
"SentenceTransformerEmbeddingFunction",
3131
"Text2VecEmbeddingFunction",
3232
"ChromaLangchainEmbeddingFunction",
33+
"FastEmbedEmbeddingFunction",
3334
}
3435

3536
assert expected_builtins == embedding_functions.get_builtins()

chromadb/test/ef/test_fastembed_ef.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import pytest
22

3-
from chromadb.utils.embedding_functions import FastEmbedEmbeddingFunction
3+
from chromadb.utils.embedding_functions.fastembed_embedding_function import (
4+
FastEmbedEmbeddingFunction,
5+
)
46

57
# Skip test if the 'fastembed' package is not installed is not installed
68
fastembed = pytest.importorskip("fastembed", reason="fastembed not installed")
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from typing import Any, Optional, cast
2+
3+
from chromadb.api.types import Documents, EmbeddingFunction, Embeddings
4+
5+
6+
class FastEmbedEmbeddingFunction(EmbeddingFunction[Documents]):
7+
"""
8+
This class is used to generate embeddings for a list of texts using FastEmbed - https://qdrant.github.io/fastembed/.
9+
Find the list of supported models at https://qdrant.github.io/fastembed/examples/Supported_Models/.
10+
"""
11+
12+
def __init__(
13+
self,
14+
model_name: str = "BAAI/bge-small-en-v1.5",
15+
cache_dir: Optional[str] = None,
16+
threads: Optional[int] = None,
17+
**kwargs: Any,
18+
) -> None:
19+
"""
20+
Initialize fastembed.TextEmbedding
21+
22+
Args:
23+
model_name (str): The name of the model to use.
24+
cache_dir (str, optional): The path to the model cache directory.
25+
Can also be set using the `FASTEMBED_CACHE_PATH` env variable.
26+
threads (int, optional): The number of threads single onnxruntime session can use..
27+
28+
Raises:
29+
ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
30+
"""
31+
try:
32+
from fastembed import TextEmbedding
33+
except ImportError:
34+
raise ValueError(
35+
"The 'fastembed' package is not installed. Please install it with `pip install fastembed`"
36+
)
37+
self._model = TextEmbedding(
38+
model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs
39+
)
40+
41+
def __call__(self, input: Documents) -> Embeddings:
42+
"""
43+
Get the embeddings for a list of texts.
44+
45+
Args:
46+
input (Documents): A list of texts to get embeddings for.
47+
48+
Returns:
49+
Embeddings: The embeddings for the texts.
50+
51+
Example:
52+
>>> fastembed_ef = FastEmbedEmbeddingFunction(model_name="sentence-transformers/all-MiniLM-L6-v2")
53+
>>> texts = ["Hello, world!", "How are you?"]
54+
>>> embeddings = fastembed_ef(texts)
55+
"""
56+
embeddings = self._model.embed(input)
57+
return cast(
58+
Embeddings,
59+
[embedding.tolist() for embedding in embeddings],
60+
)

docs/docs.trychroma.com/pages/integrations/fastembed.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ This embedding function requires the `fastembed` package. To install it, run
1212

1313
You can find a list of all the supported models [here](https://qdrant.github.io/fastembed/examples/Supported_Models/).
1414

15-
## Example usage:
15+
## Example usage
1616

1717
Using the default BAAI/bge-small-en-v1.5 model.
1818

1919
```python
20-
from chromadb.utils.embedding_functions import FastEmbedEmbeddingFunction
20+
from chromadb.utils.embedding_functions.fastembed_embedding_function import FastEmbedEmbeddingFunction
2121
ef = FastEmbedEmbeddingFunction()
2222
```
2323

0 commit comments

Comments
 (0)