xorbitsai · OliverBryant · Oct 13, 2025 · Oct 13, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
@@ -237,7 +237,8 @@ jobs:
             ${{ env.SELF_HOST_PYTHON }} -m pip install tensorizer
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U sentence-transformers
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U FlagEmbedding
-            ${{ env.SELF_HOST_PYTHON }} -m pip install -U "peft>=0.15.0"
+            ${{ env.SELF_HOST_PYTHON }} -m pip install -U "peft<=0.17.1"
+            ${{ env.SELF_HOST_PYTHON }} -m pip install "vllm" --index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.org/simple
             ${{ env.SELF_HOST_PYTHON }} -m pip install "xllamacpp>=0.2.0" --index-url https://xorbitsai.github.io/xllamacpp/whl/cu124 --extra-index-url https://pypi.org/simple
             ${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=3000 \
               --disable-warnings \

diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py
@@ -163,7 +163,7 @@ def __init__(
 
     @classmethod
     @abstractmethod
-    def check_lib(cls) -> bool:
+    def check_lib(cls) -> Union[bool, str]:
         pass
 
     @classmethod
@@ -173,7 +173,7 @@ def match_json(
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ) -> bool:
+    ) -> Union[bool, str]:
         pass
 
     @classmethod
@@ -182,13 +182,15 @@ def match(
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ):
+    ) -> bool:
         """
         Return if the model_spec can be matched.
         """
-        if not cls.check_lib():
+        lib_result = cls.check_lib()
+        if lib_result != True:
             return False
-        return cls.match_json(model_family, model_spec, quantization)
+        match_result = cls.match_json(model_family, model_spec, quantization)
+        return match_result == True
 
     @abstractmethod
     def load(self):

diff --git a/xinference/model/embedding/flag/core.py b/xinference/model/embedding/flag/core.py
@@ -282,19 +282,28 @@ def encode(
         return result
 
     @classmethod
-    def check_lib(cls) -> bool:
-        return importlib.util.find_spec("FlagEmbedding") is not None
+    def check_lib(cls) -> Union[bool, str]:
+        return (
+            True
+            if importlib.util.find_spec("FlagEmbedding") is not None
+            else "FlagEmbedding library is not installed"
+        )
 
     @classmethod
     def match_json(
         cls,
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ) -> bool:
+    ) -> Union[bool, str]:
+        # Check library availability first
+        lib_result = cls.check_lib()
+        if lib_result != True:
+            return lib_result
+
         if (
             model_spec.model_format in ["pytorch"]
             and model_family.model_name in FLAG_EMBEDDER_MODEL_LIST
         ):
             return True
-        return False
+        return f"FlagEmbedding engine only supports pytorch format and models in FLAG_EMBEDDER_MODEL_LIST, got format: {model_spec.model_format}, model: {model_family.model_name}"
diff --git a/xinference/model/embedding/llama_cpp/core.py b/xinference/model/embedding/llama_cpp/core.py
@@ -229,16 +229,45 @@ def _handle_embedding():
         return Embedding(**r)  # type: ignore
 
     @classmethod
-    def check_lib(cls) -> bool:
-        return importlib.util.find_spec("xllamacpp") is not None
+    def check_lib(cls) -> Union[bool, str]:
+        return (
+            True
+            if importlib.util.find_spec("xllamacpp") is not None
+            else "xllamacpp library is not installed"
+        )
 
     @classmethod
     def match_json(
         cls,
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ) -> bool:
+    ) -> Union[bool, str]:
+        # Check library availability
+        lib_result = cls.check_lib()
+        if lib_result != True:
+            return lib_result
+
+        # Check model format compatibility
         if model_spec.model_format not in ["ggufv2"]:
-            return False
+            return f"llama.cpp embedding only supports GGUF v2 format, got: {model_spec.model_format}"
+
+        # Check embedding-specific requirements
+        if not hasattr(model_spec, "model_file_name_template"):
+            return "GGUF embedding model requires proper file configuration (missing model_file_name_template)"
+
+        # Check model dimensions for llama.cpp compatibility
+        model_dimensions = model_family.dimensions
+        if model_dimensions > 4096:  # llama.cpp may have limitations
+            return f"Large embedding model may have compatibility issues with llama.cpp ({model_dimensions} dimensions)"
+
+        # Check platform-specific considerations
+        import platform
+
+        current_platform = platform.system()
+
+        # llama.cpp works across platforms but may have performance differences
+        if current_platform == "Windows":
+            return "llama.cpp embedding may have limited performance on Windows"
+
         return True
diff --git a/xinference/model/embedding/match_result.py b/xinference/model/embedding/match_result.py
@@ -0,0 +1,76 @@
+"""
+Error handling result structures for embedding model engine matching.
+
+This module provides structured error handling for engine matching operations,
+allowing engines to provide detailed failure reasons and suggestions.
+"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+
+@dataclass
+class MatchResult:
+    """
+    Result of engine matching operation with detailed error information.
+
+    This class provides structured information about whether an engine can handle
+    a specific model configuration, and if not, why and what alternatives exist.
+    """
+
+    is_match: bool
+    reason: Optional[str] = None
+    error_type: Optional[str] = None
+    technical_details: Optional[str] = None
+
+    @classmethod
+    def success(cls) -> "MatchResult":
+        """Create a successful match result."""
+        return cls(is_match=True)
+
+    @classmethod
+    def failure(
+        cls,
+        reason: str,
+        error_type: Optional[str] = None,
+        technical_details: Optional[str] = None,
+    ) -> "MatchResult":
+        """Create a failed match result with optional details."""
+        return cls(
+            is_match=False,
+            reason=reason,
+            error_type=error_type,
+            technical_details=technical_details,
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for API responses."""
+        result: Dict[str, Any] = {"is_match": self.is_match}
+        if not self.is_match:
+            if self.reason:
+                result["reason"] = self.reason
+            if self.error_type:
+                result["error_type"] = self.error_type
+            if self.technical_details:
+                result["technical_details"] = self.technical_details
+        return result
+
+    def to_error_string(self) -> str:
+        """Convert to error string for backward compatibility."""
+        if self.is_match:
+            return "Available"
+        error_msg = self.reason or "Unknown error"
+        return error_msg
+
+
+# Error type constants for better categorization
+class ErrorType:
+    HARDWARE_REQUIREMENT = "hardware_requirement"
+    OS_REQUIREMENT = "os_requirement"
+    MODEL_FORMAT = "model_format"
+    DEPENDENCY_MISSING = "dependency_missing"
+    MODEL_COMPATIBILITY = "model_compatibility"
+    DIMENSION_MISMATCH = "dimension_mismatch"
+    VERSION_REQUIREMENT = "version_requirement"
+    CONFIGURATION_ERROR = "configuration_error"
+    ENGINE_UNAVAILABLE = "engine_unavailable"
diff --git a/xinference/model/embedding/sentence_transformers/core.py b/xinference/model/embedding/sentence_transformers/core.py
@@ -429,15 +429,56 @@ def base64_to_image(base64_str: str) -> Image.Image:
         return result
 
     @classmethod
-    def check_lib(cls) -> bool:
-        return importlib.util.find_spec("sentence_transformers") is not None
+    def check_lib(cls) -> Union[bool, str]:
+        return (
+            True
+            if importlib.util.find_spec("sentence_transformers") is not None
+            else "sentence_transformers library is not installed"
+        )
 
     @classmethod
     def match_json(
         cls,
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ) -> bool:
-        # As default embedding engine, sentence-transformer support all models
-        return model_spec.model_format in ["pytorch"]
+    ) -> Union[bool, str]:
+        # Check library availability
+        lib_result = cls.check_lib()
+        if lib_result != True:
+            return lib_result
+
+        # Check model format compatibility
+        if model_spec.model_format not in ["pytorch"]:
+            return f"Sentence Transformers only supports pytorch format, got: {model_spec.model_format}"
+
+        # Check model dimensions compatibility
+        model_dimensions = model_family.dimensions
+        if model_dimensions > 8192:  # Extremely large embedding models
+            return f"Extremely large embedding model detected ({model_dimensions} dimensions), may have performance issues"
+
+        # Check token limits
+        max_tokens = model_family.max_tokens
+        if max_tokens > 131072:  # Extremely high token limits (128K)
+            return f"Extremely high token limit model detected (max_tokens: {max_tokens}), may cause memory issues"
+
+        # Check for special model requirements
+        model_name = model_family.model_name.lower()
+
+        # Check Qwen2 GTE models
+        if "gte" in model_name and "qwen2" in model_name:
+            # These models have specific requirements
+            if not hasattr(cls, "_check_qwen_gte_requirements"):
+                return "Qwen2 GTE models require special handling"
+
+        # Check Qwen3 models
+        if "qwen3" in model_name:
+            # Qwen3 has flash attention requirements - basic check
+            try:
+                pass
+
+                # This would be checked during actual loading
+            except Exception:
+                return "Qwen3 embedding model may have compatibility issues"
+
+        return True