Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions client-sdks/stainless/openapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7472,6 +7472,10 @@ components:
type: string
type: object
- type: 'null'
top_logprobs:
anyOf:
- type: integer
- type: 'null'
type: object
required:
- input
Expand Down
4 changes: 4 additions & 0 deletions docs/static/deprecated-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4307,6 +4307,10 @@ components:
type: string
type: object
- type: 'null'
top_logprobs:
anyOf:
- type: integer
- type: 'null'
type: object
required:
- input
Expand Down
4 changes: 4 additions & 0 deletions docs/static/llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6114,6 +6114,10 @@ components:
type: string
type: object
- type: 'null'
top_logprobs:
anyOf:
- type: integer
- type: 'null'
type: object
required:
- input
Expand Down
4 changes: 4 additions & 0 deletions docs/static/stainless-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7472,6 +7472,10 @@ components:
type: string
type: object
- type: 'null'
top_logprobs:
anyOf:
- type: integer
- type: 'null'
type: object
required:
- input
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,15 @@ async def create_openai_response(
guardrails: list[ResponseGuardrail] | None = None,
max_tool_calls: int | None = None,
metadata: dict[str, str] | None = None,
top_logprobs: int | None = None,
) -> OpenAIResponseObject:
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
result = await self.openai_responses_impl.create_openai_response(
input,
model,
prompt,
instructions,
parallel_tool_calls,
previous_response_id,
conversation,
store,
Expand All @@ -131,9 +133,9 @@ async def create_openai_response(
include,
max_infer_iters,
guardrails,
parallel_tool_calls,
max_tool_calls,
metadata,
top_logprobs,
)
return result # type: ignore[no-any-return]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ async def create_openai_response(
model: str,
prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None,
parallel_tool_calls: bool | None = True,
previous_response_id: str | None = None,
conversation: str | None = None,
store: bool | None = True,
Expand All @@ -339,9 +340,9 @@ async def create_openai_response(
include: list[ResponseItemInclude] | None = None,
max_infer_iters: int | None = 10,
guardrails: list[str | ResponseGuardrailSpec] | None = None,
parallel_tool_calls: bool | None = None,
max_tool_calls: int | None = None,
metadata: dict[str, str] | None = None,
top_logprobs: int | None = None,
):
stream = bool(stream)
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
Expand Down Expand Up @@ -399,6 +400,7 @@ async def create_openai_response(
max_tool_calls=max_tool_calls,
metadata=metadata,
include=include,
top_logprobs=top_logprobs,
)

if stream:
Expand Down Expand Up @@ -454,6 +456,7 @@ async def _create_streaming_response(
max_tool_calls: int | None = None,
metadata: dict[str, str] | None = None,
include: list[ResponseItemInclude] | None = None,
top_logprobs: int | None = None,
) -> AsyncIterator[OpenAIResponseObjectStream]:
# These should never be None when called from create_openai_response (which sets defaults)
# but we assert here to help mypy understand the types
Expand Down Expand Up @@ -505,6 +508,7 @@ async def _create_streaming_response(
max_tool_calls=max_tool_calls,
metadata=metadata,
include=include,
top_logprobs=top_logprobs,
)

# Stream the response
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def __init__(
max_tool_calls: int | None = None,
metadata: dict[str, str] | None = None,
include: list[ResponseItemInclude] | None = None,
top_logprobs: int | None = None,
):
self.inference_api = inference_api
self.ctx = ctx
Expand All @@ -157,6 +158,7 @@ def __init__(
self.max_tool_calls = max_tool_calls
self.metadata = metadata
self.include = include
self.top_logprobs = top_logprobs
self.sequence_number = 0
# Store MCP tool mapping that gets built during tool processing
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
Expand Down Expand Up @@ -311,6 +313,8 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
True if self.include and ResponseItemInclude.message_output_text_logprobs in self.include else None
)

top_logprobs_param = self.top_logprobs if logprobs is True else None

params = OpenAIChatCompletionRequestWithExtraBody(
model=self.ctx.model,
messages=messages,
Expand All @@ -324,6 +328,7 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
"include_usage": True,
},
logprobs=logprobs,
top_logprobs=top_logprobs_param,
)
completion_result = await self.inference_api.openai_chat_completion(params)

Expand Down
2 changes: 2 additions & 0 deletions src/llama_stack_api/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ async def create_openai_response(
] = None,
max_tool_calls: int | None = None,
metadata: dict[str, str] | None = None,
top_logprobs: int | None = None,
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
"""Create a model response.

Expand All @@ -119,6 +120,7 @@ async def create_openai_response(
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
:param metadata: (Optional) Dictionary of metadata key-value pairs to attach to the response.
:param top_logprobs: (Optional) The number of top log probabilities to return for each token. Only used when logprobs are requested via the include parameter.
:returns: An OpenAIResponseObject.
"""
...
Expand Down