Skip to content

Support Many clients via generic mappings #36

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 24 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
1f62b65
add callbacks for json validators
SecretiveShell Jan 23, 2025
ec6008e
dynamic httpx clients
SecretiveShell Jan 25, 2025
6ba2a2a
fix url
SecretiveShell Jan 25, 2025
9cf9a6b
minor fixes
SecretiveShell Jan 25, 2025
44982a8
add better error handling
SecretiveShell Jan 25, 2025
0d301ec
format codebase
SecretiveShell Jan 25, 2025
84e32b4
make sampler use client mapper
SecretiveShell Jan 25, 2025
ba245a0
add error handler to chunk validation
SecretiveShell Jan 26, 2025
cf8face
make openrouter call tools properly
SecretiveShell Jan 27, 2025
889f0ff
fix gemini auth issue
SecretiveShell Jan 27, 2025
4009de0
add error handler to non streaming chat completion
SecretiveShell Jan 29, 2025
28f85c1
Update launch.json
James4Ever0 Jan 30, 2025
8823a93
Update utils.py
James4Ever0 Jan 30, 2025
a0cf387
Update utils.py
James4Ever0 Jan 30, 2025
1fb17d7
Delete mcp_bridge/openai_clients/streamCompletion.py
James4Ever0 Jan 30, 2025
96e91b8
Update streamChatCompletion.py
James4Ever0 Jan 30, 2025
e4a83e0
Update chatCompletion.py
James4Ever0 Jan 30, 2025
e21cff5
Update utils.py
James4Ever0 Jan 30, 2025
1a322e5
Update streamChatCompletion.py
James4Ever0 Jan 30, 2025
4d45bc0
Merge pull request #40 from James4Ever0/fix-openrouter
SecretiveShell Jan 30, 2025
ed2cf9e
Merge branch 'master' into fix-openrouter
SecretiveShell Feb 1, 2025
a537308
enforce tool call ids
SecretiveShell Feb 6, 2025
76f5f79
add gemini response support
SecretiveShell Feb 8, 2025
0576c28
Merge branch 'master' into fix-openrouter
SecretiveShell Mar 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"request": "launch",
"django": true,
"module": "mcp_bridge.main",
"pythonArgs": ["-Xutf8"]
}
]
}
}
2 changes: 1 addition & 1 deletion mcp_bridge/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.5.1'
__version__ = '0.5.1'
12 changes: 10 additions & 2 deletions mcp_bridge/config/final.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@


class InferenceServer(BaseModel):
type: Literal["openai", "openrouter", "gemini"] = Field(
"openai", description="Type of inference server"
) # used to apply data mappers
base_url: str = Field(
default="http://localhost:11434/v1",
description="Base URL of the inference server",
Expand All @@ -24,14 +27,19 @@ class Logging(BaseModel):
class SamplingModel(BaseModel):
model: Annotated[str, Field(description="Name of the sampling model")]

intelligence: Annotated[float, Field(description="Intelligence of the sampling model")] = 0.5
intelligence: Annotated[
float, Field(description="Intelligence of the sampling model")
] = 0.5
cost: Annotated[float, Field(description="Cost of the sampling model")] = 0.5
speed: Annotated[float, Field(description="Speed of the sampling model")] = 0.5


class Sampling(BaseModel):
timeout: Annotated[int, Field(description="Timeout for sampling requests")] = 10
models: Annotated[list[SamplingModel], Field(description="List of sampling models")] = []
models: Annotated[
list[SamplingModel], Field(description="List of sampling models")
] = []


class SSEMCPServer(BaseModel):
# TODO: expand this once I find a good definition for this
Expand Down
47 changes: 44 additions & 3 deletions mcp_bridge/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

from lmos_openai_types import CreateChatCompletionRequest, CreateCompletionRequest

from mcp_bridge.config.final import InferenceServer
from mcp_bridge.openai_clients import (
client,
completions,
chat_completions,
streaming_chat_completions,
)
from mcp_bridge.http_clients import get_client

from mcp_bridge.openapi_tags import Tag
from mcp_bridge.config import config

router = APIRouter(prefix="/v1", tags=[Tag.openai])

Expand All @@ -34,6 +36,45 @@ async def openai_chat_completions(request: CreateChatCompletionRequest):

@router.get("/models")
async def models():
"""List models"""
response = await client.get("/models")
"""List models.

This is a passthrough to the inference server and returns the same response json."""

# this is an ugly hack to fix an upstream bug in gemini upstream
if config.inference_server.type == "gemini":
return list_gemini_models()

response = await get_client().get("/models")
return response.json()

def list_gemini_models():
"""temp hack to fix gemini bug"""
return {
"object": "list",
"data": [
{
"id": "gemini-2.0-flash-exp",
"object": "model",
"created": 1686935002,
"owned_by": "google",
},
{
"id": "gemini-1.5-flash",
"object": "model",
"created": 1686935002,
"owned_by": "google",
},
{
"id": "gemini-1.5-flash-8b",
"object": "model",
"created": 1686935002,
"owned_by": "google",
},
{
"id": "gemini-1.5-pro",
"object": "model",
"created": 1686935002,
"owned_by": "google",
}
],
}
35 changes: 35 additions & 0 deletions mcp_bridge/http_clients/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from httpx import AsyncClient
from mcp_bridge.config import config


# change this if you want to hard fork the repo
# its used to show ranking on openrouter and other inference providers
BRIDGE_REPO_URL = "https://github.com/SecretiveShell/MCP-Bridge"
BRIDGE_APP_TITLE = "MCP Bridge"


def get_client() -> AsyncClient:
client: AsyncClient = AsyncClient(
base_url=config.inference_server.base_url,
headers={"Content-Type": "application/json"},
timeout=10000,
)

# generic openai
if config.inference_server.type == "openai":
client.headers["Authorization"] = rf"Bearer {config.inference_server.api_key}"
return client

# openrouter
if config.inference_server.type == "openrouter":
client.headers["Authorization"] = rf"Bearer {config.inference_server.api_key}"
client.headers["HTTP-Referer"] = BRIDGE_REPO_URL
client.headers["X-Title"] = BRIDGE_APP_TITLE
return client

# gemini models
if config.inference_server.type == "gemini":
client.headers["Authorization"] = rf"Bearer {config.inference_server.api_key}"
return client

raise NotImplementedError("Inference Server Type not supported")
8 changes: 8 additions & 0 deletions mcp_bridge/inference_engine_mappers/chat/gemini/request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from lmos_openai_types import CreateChatCompletionRequest


def chat_completion_gemini_request(data: CreateChatCompletionRequest) -> dict:

dumped_data = data.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True)

return dumped_data
10 changes: 10 additions & 0 deletions mcp_bridge/inference_engine_mappers/chat/gemini/response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from lmos_openai_types import CreateChatCompletionResponse


def chat_completion_gemini_response(data: dict) -> CreateChatCompletionResponse:

if "id" not in data or data["id"] is "":
data["id"] = "default-id"

validated_data = CreateChatCompletionResponse.model_validate(data)
return validated_data
14 changes: 14 additions & 0 deletions mcp_bridge/inference_engine_mappers/chat/gemini/stream_response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from lmos_openai_types import CreateChatCompletionStreamResponse
from loguru import logger


def chat_completion_gemini_stream_response(
data: dict,
) -> CreateChatCompletionStreamResponse: # type: ignore

logger.debug(f"data: {data}")

if "id" not in data or data["id"] == "":
data["id"] = "default-id"

return CreateChatCompletionStreamResponse.model_validate(data)
19 changes: 19 additions & 0 deletions mcp_bridge/inference_engine_mappers/chat/generic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from lmos_openai_types import (
CreateChatCompletionRequest,
CreateChatCompletionResponse,
CreateChatCompletionStreamResponse,
)


def chat_completion_generic_request(data: CreateChatCompletionRequest) -> dict:
return data.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True)


def chat_completion_generic_response(data: dict) -> CreateChatCompletionResponse:
return CreateChatCompletionResponse.model_validate(data)


def chat_completion_generic_stream_response(
data: dict,
) -> CreateChatCompletionStreamResponse:
return CreateChatCompletionStreamResponse.model_validate(data)
36 changes: 36 additions & 0 deletions mcp_bridge/inference_engine_mappers/chat/openrouter/request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import json
import secrets
from typing import Any, cast
from lmos_openai_types import CreateChatCompletionRequest
from loguru import logger


def chat_completion_openrouter_request(data: CreateChatCompletionRequest) -> dict:

dumped_data = data.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True)

# make sure we have a tool call id for each tool call
try:
for message in dumped_data["messages"]:

message = cast(dict[str, Any], message)

if message["role"] == "assistant":
if message.get("tool_calls") is None:
continue
for tool_call in message["tool_calls"]:
tool_call["tool_call_id"] = tool_call.get("id", secrets.token_hex(16))

if message["role"] == "tool":
if message.get("tool_call_id") is None:
message["tool_call_id"] = secrets.token_hex(16)
if message.get("id") is None:
message["id"] = message["tool_call_id"]

except Exception as e:
print(e)

logger.debug(f"dumped data: {dumped_data}")
logger.debug(f"json dumped data: {json.dumps(dumped_data)}")

return dumped_data
21 changes: 21 additions & 0 deletions mcp_bridge/inference_engine_mappers/chat/openrouter/response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import secrets
from typing import cast
from lmos_openai_types import CreateChatCompletionResponse
from loguru import logger


def chat_completion_openrouter_response(data: dict) -> CreateChatCompletionResponse:
validated_data = CreateChatCompletionResponse.model_validate(data)

# make sure tool call ids are not none
for choice in validated_data.choices:
if choice.message.tool_calls is None:
continue
for tool_call in choice.message.tool_calls:
logger.error(f"tool call: {tool_call[1]}")
for calls in tool_call[1]:
if calls.id is None:
calls.id = secrets.token_hex(16)

logger.debug(f"validated data: {validated_data.model_dump_json()}")
return validated_data
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from lmos_openai_types import CreateChatCompletionStreamResponse


def chat_completion_openrouter_stream_response(
data: dict,
) -> CreateChatCompletionStreamResponse: # type: ignore
try:
data["choices"][0]["finish_reason"] = data["choices"][0][
"finish_reason"
].lower() # type: ignore
except Exception:
pass
return CreateChatCompletionStreamResponse.model_validate(data)
21 changes: 21 additions & 0 deletions mcp_bridge/inference_engine_mappers/chat/requester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from mcp_bridge.inference_engine_mappers.chat.gemini.request import chat_completion_gemini_request
from .generic import chat_completion_generic_request
from .openrouter.request import chat_completion_openrouter_request
from lmos_openai_types import CreateChatCompletionRequest
from mcp_bridge.config import config


def chat_completion_requester(data: CreateChatCompletionRequest) -> dict:
client_type = config.inference_server.type

match client_type:
# apply incoming data mappers
case "openai":
return chat_completion_generic_request(data)
case "openrouter":
# TODO: implement openrouter requester
return chat_completion_openrouter_request(data)
case "gemini":
return chat_completion_gemini_request(data)
case _:
return chat_completion_generic_request(data)
21 changes: 21 additions & 0 deletions mcp_bridge/inference_engine_mappers/chat/responder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from mcp_bridge.inference_engine_mappers.chat.gemini.response import chat_completion_gemini_response
from .generic import chat_completion_generic_response
from .openrouter.response import chat_completion_openrouter_response
from lmos_openai_types import CreateChatCompletionResponse
from mcp_bridge.config import config


def chat_completion_responder(data: dict) -> CreateChatCompletionResponse:
client_type = config.inference_server.type

match client_type:
# apply incoming data mappers
case "openai":
return chat_completion_generic_response(data)
case "openrouter":
# TODO: implement openrouter responser
return chat_completion_openrouter_response(data)
case "gemini":
return chat_completion_gemini_response(data)
case _:
return chat_completion_generic_response(data)
21 changes: 21 additions & 0 deletions mcp_bridge/inference_engine_mappers/chat/stream_responder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from mcp_bridge.inference_engine_mappers.chat.gemini.stream_response import chat_completion_gemini_stream_response
from .generic import chat_completion_generic_stream_response
from .openrouter.stream_response import chat_completion_openrouter_stream_response
from lmos_openai_types import CreateChatCompletionStreamResponse
from mcp_bridge.config import config


def chat_completion_stream_responder(data: dict) -> CreateChatCompletionStreamResponse:
client_type = config.inference_server.type

match client_type:
# apply incoming data mappers
case "openai":
return chat_completion_generic_stream_response(data)
case "openrouter":
# TODO: implement openrouter responser
return chat_completion_openrouter_stream_response(data)
case "gemini":
return chat_completion_gemini_stream_response(data)
case _:
return chat_completion_generic_stream_response(data)
8 changes: 7 additions & 1 deletion mcp_bridge/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from mcp_bridge.config import config
from loguru import logger


def create_app() -> FastAPI:
"""
Create and configure the FastAPI application.
Expand Down Expand Up @@ -46,11 +47,16 @@ def create_app() -> FastAPI:

return app


app = create_app()


def run():
import uvicorn
from mcp_bridge.config import config

uvicorn.run(app, host=config.network.host, port=config.network.port)


if __name__ == "__main__":
run()
run()
2 changes: 1 addition & 1 deletion mcp_bridge/mcpManagement/resources.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from fastapi import APIRouter, HTTPException
from fastapi import APIRouter
from mcp_bridge.mcp_clients.McpClientManager import ClientManager
from mcp.types import ListResourcesResult

Expand Down
Loading