Skip to content

⚡️ Speed up function model_request_stream_sync by 41% #31

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: try-refinement
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions pydantic_ai_slim/pydantic_ai/direct.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from . import agent, messages, models, settings
from .models import StreamedResponse, instrumented as instrumented_models
from functools import lru_cache

__all__ = (
'model_request',
Expand Down Expand Up @@ -188,11 +189,24 @@ async def main():
Returns:
A [stream response][pydantic_ai.models.StreamedResponse] async context manager.
"""
model_instance = _prepare_model(model, instrument)
# Using memoized _prepare_model if possible
# We use original _prepare_model only if not cacheable
try:
model_instance = _cached_prepare_model(model, instrument)
except TypeError:
# fallback for non-hashable type (e.g. 'model' instance), use original function
model_instance = _prepare_model(model, instrument)

# Only instantiate ModelRequestParameters() if needed.
mrp = model_request_parameters if model_request_parameters is not None else models.ModelRequestParameters()
# Get customize_request_parameters only once
customize_fn = model_instance.customize_request_parameters
customized_params = customize_fn(mrp)

return model_instance.request_stream(
messages,
model_settings,
model_instance.customize_request_parameters(model_request_parameters or models.ModelRequestParameters()),
customized_params,
)


Expand Down Expand Up @@ -264,6 +278,11 @@ def _prepare_model(

return instrumented_models.instrument_model(model_instance, instrument)

# Simple cache for _prepare_model to avoid repeated expensive model preparation
@lru_cache(maxsize=64)
def _cached_prepare_model(model, instrument):
return _prepare_model(model, instrument)


@dataclass
class StreamedResponseSync:
Expand Down
Loading