Skip to content

Commit 4175600

Browse files
committed
feat: add Prompts API to Responses API
1 parent f1748e2 commit 4175600

File tree

11 files changed

+4406
-1656
lines changed

11 files changed

+4406
-1656
lines changed

docs/static/llama-stack-spec.html

Lines changed: 1285 additions & 320 deletions
Large diffs are not rendered by default.

docs/static/llama-stack-spec.yaml

Lines changed: 2962 additions & 1332 deletions
Large diffs are not rendered by default.

llama_stack/apis/agents/agents.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
OpenAIResponseInputTool,
3939
OpenAIResponseObject,
4040
OpenAIResponseObjectStream,
41+
OpenAIResponsePromptParam,
4142
OpenAIResponseText,
4243
)
4344

@@ -789,6 +790,7 @@ async def create_openai_response(
789790
self,
790791
input: str | list[OpenAIResponseInput],
791792
model: str,
793+
prompt: OpenAIResponsePromptParam | None = None,
792794
instructions: str | None = None,
793795
previous_response_id: str | None = None,
794796
store: bool | None = True,
@@ -800,9 +802,9 @@ async def create_openai_response(
800802
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
801803
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
802804
"""Create a new OpenAI response.
803-
804805
:param input: Input message(s) to create the response.
805806
:param model: The underlying LLM used for completions.
807+
:param prompt: Prompt object with ID, version, and variables.
806808
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
807809
:param include: (Optional) Additional fields to include in the response.
808810
:returns: An OpenAIResponseObject.

llama_stack/apis/agents/openai_responses.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pydantic import BaseModel, Field
1010
from typing_extensions import TypedDict
1111

12+
from llama_stack.apis.prompts.prompts import Prompt
1213
from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
1314
from llama_stack.schema_utils import json_schema_type, register_schema
1415

@@ -336,6 +337,20 @@ class OpenAIResponseTextFormat(TypedDict, total=False):
336337
strict: bool | None
337338

338339

340+
@json_schema_type
341+
class OpenAIResponsePromptParam(BaseModel):
342+
"""Prompt object that is used for OpenAI responses.
343+
344+
:param id: Unique identifier of the prompt template
345+
:param version: Version number of the prompt to use (defaults to latest if not specified)
346+
:param variables: Dictionary of variable names to values for template substitution
347+
"""
348+
349+
id: str
350+
version: str | None = None
351+
variables: dict[str, Any] | None = None
352+
353+
339354
@json_schema_type
340355
class OpenAIResponseText(BaseModel):
341356
"""Text response configuration for OpenAI responses.
@@ -357,6 +372,7 @@ class OpenAIResponseObject(BaseModel):
357372
:param object: Object type identifier, always "response"
358373
:param output: List of generated output items (messages, tool calls, etc.)
359374
:param parallel_tool_calls: Whether tool calls can be executed in parallel
375+
:param prompt: (Optional) Prompt object with ID, version, and variables
360376
:param previous_response_id: (Optional) ID of the previous response in a conversation
361377
:param status: Current status of the response generation
362378
:param temperature: (Optional) Sampling temperature used for generation
@@ -373,6 +389,7 @@ class OpenAIResponseObject(BaseModel):
373389
output: list[OpenAIResponseOutput]
374390
parallel_tool_calls: bool = False
375391
previous_response_id: str | None = None
392+
prompt: Prompt | None = None
376393
status: str
377394
temperature: float | None = None
378395
# Default to text format to avoid breaking the loading of old responses

llama_stack/core/stack.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,10 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
312312
)
313313
impls[Api.prompts] = prompts_impl
314314

315+
# Set prompts API on agents provider if it exists
316+
if Api.agents in impls and hasattr(impls[Api.agents], "set_prompts_api"):
317+
impls[Api.agents].set_prompts_api(prompts_impl)
318+
315319

316320
class Stack:
317321
def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):

llama_stack/providers/inline/agents/meta_reference/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
2121
deps[Api.safety],
2222
deps[Api.tool_runtime],
2323
deps[Api.tool_groups],
24+
None, # prompts_api will be set later when available
2425
policy,
2526
)
2627
await impl.initialize()

llama_stack/providers/inline/agents/meta_reference/agents.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
Session,
2929
Turn,
3030
)
31-
from llama_stack.apis.agents.openai_responses import OpenAIResponseText
31+
from llama_stack.apis.agents.openai_responses import OpenAIResponsePromptParam, OpenAIResponseText
3232
from llama_stack.apis.common.responses import PaginatedResponse
3333
from llama_stack.apis.inference import (
3434
Inference,
@@ -37,6 +37,7 @@
3737
ToolResponseMessage,
3838
UserMessage,
3939
)
40+
from llama_stack.apis.prompts import Prompts
4041
from llama_stack.apis.safety import Safety
4142
from llama_stack.apis.tools import ToolGroups, ToolRuntime
4243
from llama_stack.apis.vector_io import VectorIO
@@ -63,6 +64,7 @@ def __init__(
6364
safety_api: Safety,
6465
tool_runtime_api: ToolRuntime,
6566
tool_groups_api: ToolGroups,
67+
prompts_api: Prompts | None,
6668
policy: list[AccessRule],
6769
):
6870
self.config = config
@@ -71,6 +73,7 @@ def __init__(
7173
self.safety_api = safety_api
7274
self.tool_runtime_api = tool_runtime_api
7375
self.tool_groups_api = tool_groups_api
76+
self.prompts_api = prompts_api
7477

7578
self.in_memory_store = InmemoryKVStoreImpl()
7679
self.openai_responses_impl: OpenAIResponsesImpl | None = None
@@ -86,8 +89,14 @@ async def initialize(self) -> None:
8689
tool_runtime_api=self.tool_runtime_api,
8790
responses_store=self.responses_store,
8891
vector_io_api=self.vector_io_api,
92+
prompts_api=self.prompts_api,
8993
)
9094

95+
def set_prompts_api(self, prompts_api: Prompts) -> None:
96+
self.prompts_api = prompts_api
97+
if hasattr(self, "openai_responses_impl") and self.openai_responses_impl:
98+
self.openai_responses_impl.prompts_api = prompts_api
99+
91100
async def create_agent(
92101
self,
93102
agent_config: AgentConfig,
@@ -320,6 +329,7 @@ async def create_openai_response(
320329
self,
321330
input: str | list[OpenAIResponseInput],
322331
model: str,
332+
prompt: OpenAIResponsePromptParam | None = None,
323333
instructions: str | None = None,
324334
previous_response_id: str | None = None,
325335
store: bool | None = True,
@@ -333,6 +343,7 @@ async def create_openai_response(
333343
return await self.openai_responses_impl.create_openai_response(
334344
input,
335345
model,
346+
prompt,
336347
instructions,
337348
previous_response_id,
338349
store,

llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,17 @@
2121
OpenAIResponseMessage,
2222
OpenAIResponseObject,
2323
OpenAIResponseObjectStream,
24+
OpenAIResponsePromptParam,
2425
OpenAIResponseText,
2526
OpenAIResponseTextFormat,
2627
)
2728
from llama_stack.apis.inference import (
2829
Inference,
30+
OpenAIMessageParam,
2931
OpenAISystemMessageParam,
3032
)
33+
from llama_stack.apis.prompts import Prompts
34+
from llama_stack.apis.prompts.prompts import Prompt
3135
from llama_stack.apis.tools import ToolGroups, ToolRuntime
3236
from llama_stack.apis.vector_io import VectorIO
3337
from llama_stack.log import get_logger
@@ -57,12 +61,14 @@ def __init__(
5761
tool_runtime_api: ToolRuntime,
5862
responses_store: ResponsesStore,
5963
vector_io_api: VectorIO, # VectorIO
64+
prompts_api: Prompts,
6065
):
6166
self.inference_api = inference_api
6267
self.tool_groups_api = tool_groups_api
6368
self.tool_runtime_api = tool_runtime_api
6469
self.responses_store = responses_store
6570
self.vector_io_api = vector_io_api
71+
self.prompts_api = prompts_api
6672
self.tool_executor = ToolExecutor(
6773
tool_groups_api=tool_groups_api,
6874
tool_runtime_api=tool_runtime_api,
@@ -97,6 +103,41 @@ async def _prepend_instructions(self, messages, instructions):
97103
if instructions:
98104
messages.insert(0, OpenAISystemMessageParam(content=instructions))
99105

106+
async def _prepend_prompt(
107+
self, messages: list[OpenAIMessageParam], prompt_params: OpenAIResponsePromptParam
108+
) -> Prompt:
109+
if not prompt_params or not prompt_params.id:
110+
return None
111+
112+
try:
113+
# Check if prompt exists in Llama Stack and retrieve it
114+
prompt_version = int(prompt_params.version) if prompt_params.version else None
115+
cur_prompt = await self.prompts_api.get_prompt(prompt_params.id, prompt_version)
116+
if cur_prompt and cur_prompt.prompt:
117+
cur_prompt_text = cur_prompt.prompt
118+
cur_prompt_variables = cur_prompt.variables
119+
120+
final_prompt_text = cur_prompt_text
121+
if prompt_params.variables:
122+
# check if the variables are valid
123+
for name in prompt_params.variables.keys():
124+
if name not in cur_prompt_variables:
125+
raise ValueError(f"Variable {name} not found in prompt {prompt_params.id}")
126+
127+
# replace the variables in the prompt text
128+
for name, value in prompt_params.variables.items():
129+
final_prompt_text = final_prompt_text.replace(f"{{{{ {name} }}}}", str(value))
130+
131+
messages.insert(0, OpenAISystemMessageParam(content=final_prompt_text))
132+
logger.info(f"Prompt {prompt_params.id} found and applied\nFinal prompt text: {final_prompt_text}")
133+
return cur_prompt
134+
135+
except ValueError:
136+
logger.warning(
137+
f"Prompt {prompt_params.id} with version {prompt_params.version} not found, skipping prompt prepending"
138+
)
139+
return None
140+
100141
async def get_openai_response(
101142
self,
102143
response_id: str,
@@ -171,6 +212,7 @@ async def create_openai_response(
171212
self,
172213
input: str | list[OpenAIResponseInput],
173214
model: str,
215+
prompt: OpenAIResponsePromptParam | None = None,
174216
instructions: str | None = None,
175217
previous_response_id: str | None = None,
176218
store: bool | None = True,
@@ -187,6 +229,7 @@ async def create_openai_response(
187229
stream_gen = self._create_streaming_response(
188230
input=input,
189231
model=model,
232+
prompt=prompt,
190233
instructions=instructions,
191234
previous_response_id=previous_response_id,
192235
store=store,
@@ -215,6 +258,7 @@ async def _create_streaming_response(
215258
self,
216259
input: str | list[OpenAIResponseInput],
217260
model: str,
261+
prompt: OpenAIResponsePromptParam | None = None,
218262
instructions: str | None = None,
219263
previous_response_id: str | None = None,
220264
store: bool | None = True,
@@ -226,6 +270,9 @@ async def _create_streaming_response(
226270
# Input preprocessing
227271
input = await self._prepend_previous_response(input, previous_response_id)
228272
messages = await convert_response_input_to_chat_messages(input)
273+
274+
# Prepend reusable prompt (if provided)
275+
prompt_obj = await self._prepend_prompt(messages, prompt)
229276
await self._prepend_instructions(messages, instructions)
230277

231278
# Structured outputs
@@ -249,6 +296,7 @@ async def _create_streaming_response(
249296
ctx=ctx,
250297
response_id=response_id,
251298
created_at=created_at,
299+
prompt=prompt_obj,
252300
text=text,
253301
max_infer_iters=max_infer_iters,
254302
tool_executor=self.tool_executor,

llama_stack/providers/inline/agents/meta_reference/responses/streaming.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
OpenAIChatCompletionToolCall,
4545
OpenAIChoice,
4646
)
47+
from llama_stack.apis.prompts.prompts import Prompt
4748
from llama_stack.log import get_logger
4849

4950
from .types import ChatCompletionContext, ChatCompletionResult
@@ -89,6 +90,7 @@ def __init__(
8990
ctx: ChatCompletionContext,
9091
response_id: str,
9192
created_at: int,
93+
prompt: Prompt | None,
9294
text: OpenAIResponseText,
9395
max_infer_iters: int,
9496
tool_executor, # Will be the tool execution logic from the main class
@@ -97,6 +99,7 @@ def __init__(
9799
self.ctx = ctx
98100
self.response_id = response_id
99101
self.created_at = created_at
102+
self.prompt = prompt
100103
self.text = text
101104
self.max_infer_iters = max_infer_iters
102105
self.tool_executor = tool_executor
@@ -115,6 +118,7 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
115118
object="response",
116119
status="in_progress",
117120
output=output_messages.copy(),
121+
prompt=self.prompt,
118122
text=self.text,
119123
)
120124

@@ -199,6 +203,7 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
199203
model=self.ctx.model,
200204
object="response",
201205
status="completed",
206+
prompt=self.prompt,
202207
text=self.text,
203208
output=output_messages,
204209
)

tests/unit/providers/agent/test_meta_reference_agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ async def agents_impl(config, mock_apis):
6060
mock_apis["safety_api"],
6161
mock_apis["tool_runtime_api"],
6262
mock_apis["tool_groups_api"],
63-
{},
63+
None, # prompts_api (will be set later via set_prompts_api if needed)
64+
[], # policy (empty list for tests)
6465
)
6566
await impl.initialize()
6667
yield impl

0 commit comments

Comments
 (0)