Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ffc5e91
1965 - Init the module
nablabits Apr 11, 2024
1c28da4
1965 - Move over `AmazonBedrockEmbeddingFunction`
nablabits Apr 13, 2024
6e4f190
1965 - Move over `create_langchain_embedding`
nablabits Apr 13, 2024
40dee43
1965 - Move over `CohereEmbeddingFunction`
nablabits Apr 13, 2024
385dcc0
1965 - Move over `google_embedding_function`
nablabits Apr 13, 2024
ed206d6
1965 - Move over `huggingface_embedding_function`
nablabits Apr 13, 2024
50076e4
1965 - Move over `InstructorEmbeddingFunction`
nablabits Apr 13, 2024
0aeb92e
1965 - Move over `JinaEmbeddingFunction`
nablabits Apr 13, 2024
18926e9
1965 - Move over `OllamaEmbeddingFunction`
nablabits Apr 13, 2024
1ec3d2a
1965 - Move over `ONNXMiniLM_L6_V2`
nablabits Apr 13, 2024
3642058
1965 - Move over `OpenCLIPEmbeddingFunction`
nablabits Apr 13, 2024
0196264
1965 - Move over `OpenAIEmbeddingFunction`
nablabits Apr 13, 2024
929a8d4
1965 - Move over `RoboflowEmbeddingFunction`
nablabits Apr 13, 2024
c2e2cc8
1965 - Move over `SentenceTransformerEmbeddingFunction`
nablabits Apr 13, 2024
2632601
1965 - Move over `Text2VecEmbeddingFunction`
nablabits Apr 13, 2024
6770d21
1965 - Move remaining functions
nablabits Apr 13, 2024
6ad7598
1965 - Lint Files
nablabits Apr 20, 2024
8f08d60
1965 - Lint onnx embedding function
nablabits Apr 20, 2024
fc6b3c8
1965 - Ensure that `get_builtins()` holds after the migration.
nablabits Apr 24, 2024
97dc885
Merge branch 'main' into feature/1965-split-up-embedding-functions
nablabits May 14, 2024
5c56387
Automate imports of EF in module
atroyn Jun 20, 2024
a548218
Automate imports of EF in module
atroyn Jun 20, 2024
cbb0b03
Additional tests
atroyn Jun 20, 2024
84aa4cf
Merge branch 'main' into feature/1965-split-up-embedding-functions
atroyn Jun 20, 2024
41a3e91
httpx everywhere
atroyn Jun 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions chromadb/api/types.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Optional, Union, TypeVar, List, Dict, Any, Tuple, cast
from numpy.typing import NDArray
import numpy as np
from typing_extensions import Literal, TypedDict, Protocol
from typing_extensions import Literal, TypedDict, Protocol, runtime_checkable
import chromadb.errors as errors
from chromadb.types import (
Metadata,
Expand Down Expand Up @@ -56,7 +56,7 @@ def maybe_cast_one_to_many_ids(target: OneOrMany[ID]) -> IDs:


def maybe_cast_one_to_many_embedding(
target: Union[OneOrMany[Embedding], OneOrMany[np.ndarray]]
target: Union[OneOrMany[Embedding], OneOrMany[np.ndarray]] # type: ignore[type-arg]
) -> Embeddings:
if isinstance(target, List):
# One Embedding
Expand Down Expand Up @@ -101,7 +101,7 @@ def maybe_cast_one_to_many_document(target: OneOrMany[Document]) -> Documents:


# Images
ImageDType = Union[np.uint, np.int_, np.float_]
ImageDType = Union[np.uint, np.int_, np.float_] # type: ignore[name-defined]
Image = NDArray[ImageDType]
Images = List[Image]

Expand Down Expand Up @@ -184,6 +184,7 @@ class IndexMetadata(TypedDict):
time_created: float


@runtime_checkable
class EmbeddingFunction(Protocol[D]):
def __call__(self, input: D) -> Embeddings:
...
Expand All @@ -199,8 +200,10 @@ def __call__(self: EmbeddingFunction[D], input: D) -> Embeddings:

setattr(cls, "__call__", __call__)

def embed_with_retries(self, input: D, **retry_kwargs: Dict) -> Embeddings:
return retry(**retry_kwargs)(self.__call__)(input)
def embed_with_retries(
self, input: D, **retry_kwargs: Dict[str, Any]
) -> Embeddings:
return cast(Embeddings, retry(**retry_kwargs)(self.__call__)(input))


def validate_embedding_function(
Expand Down
5 changes: 4 additions & 1 deletion chromadb/test/ef/test_default_ef.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import pytest
from hypothesis import given, settings

from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2, _verify_sha256
from chromadb.utils.embedding_functions.onnx_mini_lm_l6_v2 import (
ONNXMiniLM_L6_V2,
_verify_sha256,
)


def unique_by(x: Hashable) -> Hashable:
Expand Down
53 changes: 53 additions & 0 deletions chromadb/test/ef/test_ef.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from chromadb.utils import embedding_functions
from chromadb.api.types import EmbeddingFunction


def test_get_builtins_holds() -> None:
"""
Ensure that `get_builtins` is consistent after the ef migration.

This test is intended to be temporary until the ef migration is complete as
these expected builtins are likely to grow as long as users add new
embedding functions.

REMOVE ME ON THE NEXT EF ADDITION
"""
expected_builtins = {
"AmazonBedrockEmbeddingFunction",
"CohereEmbeddingFunction",
"GoogleGenerativeAiEmbeddingFunction",
"GooglePalmEmbeddingFunction",
"GoogleVertexEmbeddingFunction",
"HuggingFaceEmbeddingFunction",
"HuggingFaceEmbeddingServer",
"InstructorEmbeddingFunction",
"JinaEmbeddingFunction",
"ONNXMiniLM_L6_V2",
"OllamaEmbeddingFunction",
"OpenAIEmbeddingFunction",
"OpenCLIPEmbeddingFunction",
"RoboflowEmbeddingFunction",
"SentenceTransformerEmbeddingFunction",
"Text2VecEmbeddingFunction",
"ChromaLangchainEmbeddingFunction",
}

assert expected_builtins == embedding_functions.get_builtins()


def test_default_ef_exists() -> None:
assert hasattr(embedding_functions, "DefaultEmbeddingFunction")
default_ef = embedding_functions.DefaultEmbeddingFunction()

assert default_ef is not None
assert isinstance(default_ef, EmbeddingFunction)


def test_ef_imports() -> None:
for ef in embedding_functions.get_builtins():
# Langchain embedding function is a special snowflake
if ef == "ChromaLangchainEmbeddingFunction":
continue
assert hasattr(embedding_functions, ef)
assert isinstance(getattr(embedding_functions, ef), type)
assert issubclass(getattr(embedding_functions, ef), EmbeddingFunction)
Loading