Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
90ddc82
FEAT: add engine ability display
OliverBryant Oct 13, 2025
b44eacd
feat: frontend supports engine ability display
yiboyasss Oct 13, 2025
be4350e
FEAT: add engine ability display
OliverBryant Oct 14, 2025
1029e90
FEAT: add engine ability display
OliverBryant Oct 14, 2025
de09704
FEAT: add engine ability display
OliverBryant Oct 14, 2025
54be3f0
FEAT: add engine ability display
OliverBryant Oct 14, 2025
f7def93
FEAT: add engine ability display
OliverBryant Oct 14, 2025
3fa237a
FEAT: add engine ability display
OliverBryant Oct 14, 2025
988a800
FEAT: add engine ability display
OliverBryant Oct 14, 2025
e1d790e
FEAT: add engine ability display
OliverBryant Oct 14, 2025
944677a
FEAT: add engine ability display
OliverBryant Oct 14, 2025
4793cc8
modify accomplishment measure
OliverBryant Oct 21, 2025
cf756bf
modify accomplishment measure
OliverBryant Oct 21, 2025
941c537
modify accomplishment measure
OliverBryant Oct 21, 2025
c72e0ff
modify accomplishment measure
OliverBryant Oct 21, 2025
742d0ec
modify accomplishment measure
OliverBryant Oct 21, 2025
20bf20c
mypy test
OliverBryant Oct 21, 2025
17728c2
mypy test
OliverBryant Oct 21, 2025
a8767d9
mypy test
OliverBryant Oct 21, 2025
ab940b7
mypy test
OliverBryant Oct 21, 2025
32edf0a
mypy test
OliverBryant Oct 21, 2025
d54084c
mypy test
OliverBryant Oct 21, 2025
0366a15
mypy fix
OliverBryant Oct 21, 2025
0c5ecc9
mypy fix
OliverBryant Oct 21, 2025
dd479ad
mypy fix
OliverBryant Oct 21, 2025
b0d2997
mypy fix
OliverBryant Oct 21, 2025
af4f71e
mypy fix
OliverBryant Oct 22, 2025
8d229b9
Modify class name
OliverBryant Oct 22, 2025
bac40fc
Modify class name
OliverBryant Oct 22, 2025
6f3bf38
commit
OliverBryant Oct 22, 2025
ca76611
new engine ability display
OliverBryant Oct 29, 2025
2c3ff84
pre-commit
OliverBryant Oct 29, 2025
7d33ec3
mypy-error
OliverBryant Oct 29, 2025
e7925eb
fix mlx CI bug
OliverBryant Oct 29, 2025
d14bad2
fix CI bug
OliverBryant Oct 30, 2025
de6da90
modify embedding sentence_transformers
OliverBryant Nov 10, 2025
baf3029
modify embedding sentence_transformers
OliverBryant Nov 12, 2025
fc7d5d3
test CI error
OliverBryant Nov 13, 2025
012eae8
test CI error
OliverBryant Nov 13, 2025
478de30
test CI error
OliverBryant Nov 13, 2025
2351784
test CI error
OliverBryant Nov 13, 2025
2f556e1
mypy error
OliverBryant Nov 14, 2025
b4bb545
mypy error
OliverBryant Nov 14, 2025
a60bcfe
mypy error
OliverBryant Nov 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ jobs:
${{ env.SELF_HOST_PYTHON }} -m pip install tensorizer
${{ env.SELF_HOST_PYTHON }} -m pip install -U sentence-transformers
${{ env.SELF_HOST_PYTHON }} -m pip install -U FlagEmbedding
${{ env.SELF_HOST_PYTHON }} -m pip install -U "peft>=0.15.0"
${{ env.SELF_HOST_PYTHON }} -m pip install -U "peft<=0.17.1"
${{ env.SELF_HOST_PYTHON }} -m pip install "vllm" --index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.org/simple
${{ env.SELF_HOST_PYTHON }} -m pip install "xllamacpp>=0.2.0" --index-url https://xorbitsai.github.io/xllamacpp/whl/cu124 --extra-index-url https://pypi.org/simple
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=3000 \
--disable-warnings \
Expand Down
12 changes: 7 additions & 5 deletions xinference/model/embedding/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def __init__(

@classmethod
@abstractmethod
def check_lib(cls) -> bool:
def check_lib(cls) -> Union[bool, str]:
pass

@classmethod
Expand All @@ -173,7 +173,7 @@ def match_json(
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
) -> bool:
) -> Union[bool, str]:
pass

@classmethod
Expand All @@ -182,13 +182,15 @@ def match(
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
):
) -> bool:
"""
Return if the model_spec can be matched.
"""
if not cls.check_lib():
lib_result = cls.check_lib()
if lib_result != True:
return False
return cls.match_json(model_family, model_spec, quantization)
match_result = cls.match_json(model_family, model_spec, quantization)
return match_result == True

@abstractmethod
def load(self):
Expand Down
17 changes: 13 additions & 4 deletions xinference/model/embedding/flag/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,19 +282,28 @@ def encode(
return result

@classmethod
def check_lib(cls) -> bool:
return importlib.util.find_spec("FlagEmbedding") is not None
def check_lib(cls) -> Union[bool, str]:
return (
True
if importlib.util.find_spec("FlagEmbedding") is not None
else "FlagEmbedding library is not installed"
)

@classmethod
def match_json(
cls,
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
) -> bool:
) -> Union[bool, str]:
# Check library availability first
lib_result = cls.check_lib()
if lib_result != True:
return lib_result

if (
model_spec.model_format in ["pytorch"]
and model_family.model_name in FLAG_EMBEDDER_MODEL_LIST
):
return True
return False
return f"FlagEmbedding engine only supports pytorch format and models in FLAG_EMBEDDER_MODEL_LIST, got format: {model_spec.model_format}, model: {model_family.model_name}"
37 changes: 33 additions & 4 deletions xinference/model/embedding/llama_cpp/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,16 +229,45 @@ def _handle_embedding():
return Embedding(**r) # type: ignore

@classmethod
def check_lib(cls) -> bool:
return importlib.util.find_spec("xllamacpp") is not None
def check_lib(cls) -> Union[bool, str]:
return (
True
if importlib.util.find_spec("xllamacpp") is not None
else "xllamacpp library is not installed"
)

@classmethod
def match_json(
cls,
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
) -> bool:
) -> Union[bool, str]:
# Check library availability
lib_result = cls.check_lib()
if lib_result != True:
return lib_result

# Check model format compatibility
if model_spec.model_format not in ["ggufv2"]:
return False
return f"llama.cpp embedding only supports GGUF v2 format, got: {model_spec.model_format}"

# Check embedding-specific requirements
if not hasattr(model_spec, "model_file_name_template"):
return "GGUF embedding model requires proper file configuration (missing model_file_name_template)"

# Check model dimensions for llama.cpp compatibility
model_dimensions = model_family.dimensions
if model_dimensions > 4096: # llama.cpp may have limitations
return f"Large embedding model may have compatibility issues with llama.cpp ({model_dimensions} dimensions)"

# Check platform-specific considerations
import platform

current_platform = platform.system()

# llama.cpp works across platforms but may have performance differences
if current_platform == "Windows":
return "llama.cpp embedding may have limited performance on Windows"

return True
76 changes: 76 additions & 0 deletions xinference/model/embedding/match_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""
Error handling result structures for embedding model engine matching.

This module provides structured error handling for engine matching operations,
allowing engines to provide detailed failure reasons and suggestions.
"""

from dataclasses import dataclass
from typing import Any, Dict, Optional


@dataclass
class MatchResult:
"""
Result of engine matching operation with detailed error information.

This class provides structured information about whether an engine can handle
a specific model configuration, and if not, why and what alternatives exist.
"""

is_match: bool
reason: Optional[str] = None
error_type: Optional[str] = None
technical_details: Optional[str] = None

@classmethod
def success(cls) -> "MatchResult":
"""Create a successful match result."""
return cls(is_match=True)

@classmethod
def failure(
cls,
reason: str,
error_type: Optional[str] = None,
technical_details: Optional[str] = None,
) -> "MatchResult":
"""Create a failed match result with optional details."""
return cls(
is_match=False,
reason=reason,
error_type=error_type,
technical_details=technical_details,
)

def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for API responses."""
result: Dict[str, Any] = {"is_match": self.is_match}
if not self.is_match:
if self.reason:
result["reason"] = self.reason
if self.error_type:
result["error_type"] = self.error_type
if self.technical_details:
result["technical_details"] = self.technical_details
return result

def to_error_string(self) -> str:
"""Convert to error string for backward compatibility."""
if self.is_match:
return "Available"
error_msg = self.reason or "Unknown error"
return error_msg


# Error type constants for better categorization
class ErrorType:
HARDWARE_REQUIREMENT = "hardware_requirement"
OS_REQUIREMENT = "os_requirement"
MODEL_FORMAT = "model_format"
DEPENDENCY_MISSING = "dependency_missing"
MODEL_COMPATIBILITY = "model_compatibility"
DIMENSION_MISMATCH = "dimension_mismatch"
VERSION_REQUIREMENT = "version_requirement"
CONFIGURATION_ERROR = "configuration_error"
ENGINE_UNAVAILABLE = "engine_unavailable"
51 changes: 46 additions & 5 deletions xinference/model/embedding/sentence_transformers/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,15 +429,56 @@ def base64_to_image(base64_str: str) -> Image.Image:
return result

@classmethod
def check_lib(cls) -> bool:
return importlib.util.find_spec("sentence_transformers") is not None
def check_lib(cls) -> Union[bool, str]:
return (
True
if importlib.util.find_spec("sentence_transformers") is not None
else "sentence_transformers library is not installed"
)

@classmethod
def match_json(
cls,
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
) -> bool:
# As default embedding engine, sentence-transformer support all models
return model_spec.model_format in ["pytorch"]
) -> Union[bool, str]:
# Check library availability
lib_result = cls.check_lib()
if lib_result != True:
return lib_result

# Check model format compatibility
if model_spec.model_format not in ["pytorch"]:
return f"Sentence Transformers only supports pytorch format, got: {model_spec.model_format}"

# Check model dimensions compatibility
model_dimensions = model_family.dimensions
if model_dimensions > 8192: # Extremely large embedding models
return f"Extremely large embedding model detected ({model_dimensions} dimensions), may have performance issues"

# Check token limits
max_tokens = model_family.max_tokens
if max_tokens > 131072: # Extremely high token limits (128K)
return f"Extremely high token limit model detected (max_tokens: {max_tokens}), may cause memory issues"

# Check for special model requirements
model_name = model_family.model_name.lower()

# Check Qwen2 GTE models
if "gte" in model_name and "qwen2" in model_name:
# These models have specific requirements
if not hasattr(cls, "_check_qwen_gte_requirements"):
return "Qwen2 GTE models require special handling"

# Check Qwen3 models
if "qwen3" in model_name:
# Qwen3 has flash attention requirements - basic check
try:
pass

# This would be checked during actual loading
except Exception:
return "Qwen3 embedding model may have compatibility issues"

return True
Loading
Loading