llamastack · stainless-app · Nov 15, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.4.0-alpha.8"
+  ".": "0.4.0-alpha.9"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 96
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-602ce64aa309cc15fa09388d99c9f298795686fc37605237cbc03c39d29aabf6.yml
-openapi_spec_hash: fc6995247b2555e8660bc9291eb10415
-config_hash: e8a35d9d37cb4774b4b0fe1b167dc156
+configured_endpoints: 103
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-2b99a80543f8bc8fa164167693c214651ac8e710f4726fb5869183b4d6c71a03.yml
+openapi_spec_hash: a5632057f5e4d956a71c20a79c0d879c
+config_hash: 0017f6c419cbbf7b949f9b2842917a79
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 0.4.0-alpha.9 (2025-11-14)
+
+Full Changelog: [v0.4.0-alpha.8...v0.4.0-alpha.9](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.8...v0.4.0-alpha.9)
+
+### Chores
+
+* use Pydantic to generate OpenAPI schema ([59f7867](https://github.com/llamastack/llama-stack-client-python/commit/59f7867d3d96eaa7048aae70fcb0a0fe4670a9fa))
+
 ## 0.4.0-alpha.8 (2025-11-14)
 
 Full Changelog: [v0.4.0-alpha.7...v0.4.0-alpha.8](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.7...v0.4.0-alpha.8)

diff --git a/README.md b/README.md
@@ -33,10 +33,7 @@ from llama_stack_client import LlamaStackClient
 
 client = LlamaStackClient()
 
-response = client.models.register(
-    model_id="model_id",
-)
-print(response.identifier)
+models = client.models.list()
 ```
 
 While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `LLAMA_STACK_CLIENT_API_KEY="My API Key"` to your `.env` file so that your API Key is not stored in source control.
@@ -93,10 +90,7 @@ client = AsyncLlamaStackClient(
 
 
 async def main() -> None:
-    response = await client.models.register(
-        model_id="model_id",
-    )
-    print(response.identifier)
+    models = await client.models.list()
 
 
 asyncio.run(main())
@@ -127,10 +121,7 @@ async def main() -> None:
     async with AsyncLlamaStackClient(
         http_client=DefaultAioHttpClient(),
     ) as client:
-        response = await client.models.register(
-            model_id="model_id",
-        )
-        print(response.identifier)
+        models = await client.models.list()
 
 
 asyncio.run(main())
@@ -156,7 +147,7 @@ stream = client.chat.completions.create(
     stream=True,
 )
 for completion in stream:
-    print(completion)
+    print(completion.id)
 ```
 
 The async client uses the exact same interface.
@@ -177,7 +168,7 @@ stream = await client.chat.completions.create(
     stream=True,
 )
 async for completion in stream:
-    print(completion)
+    print(completion.id)
 ```
 
 ## Using types
@@ -378,7 +369,7 @@ response = client.chat.completions.with_raw_response.create(
 print(response.headers.get('X-My-Header'))
 
 completion = response.parse()  # get the object that `chat.completions.create()` would have returned
-print(completion)
+print(completion.id)
 ```
 
 These methods return an [`APIResponse`](https://github.com/meta-llama/llama-stack-python/tree/main/src/llama_stack_client/_response.py) object.

diff --git a/api.md b/api.md
@@ -50,8 +50,8 @@ from llama_stack_client.types import ToolDef, ToolInvocationResult, ToolRuntimeL
 
 Methods:
 
-- <code title="post /v1/tool-runtime/invoke">client.tool_runtime.<a href="./src/llama_stack_client/resources/tool_runtime/tool_runtime.py">invoke_tool</a>(\*\*<a href="src/llama_stack_client/types/tool_runtime_invoke_tool_params.py">params</a>) -> <a href="./src/llama_stack_client/types/tool_invocation_result.py">ToolInvocationResult</a></code>
-- <code title="get /v1/tool-runtime/list-tools">client.tool_runtime.<a href="./src/llama_stack_client/resources/tool_runtime/tool_runtime.py">list_tools</a>(\*\*<a href="src/llama_stack_client/types/tool_runtime_list_tools_params.py">params</a>) -> <a href="./src/llama_stack_client/types/tool_runtime_list_tools_response.py">ToolRuntimeListToolsResponse</a></code>
+- <code title="post /v1/tool-runtime/invoke">client.tool_runtime.<a href="./src/llama_stack_client/resources/tool_runtime.py">invoke_tool</a>(\*\*<a href="src/llama_stack_client/types/tool_runtime_invoke_tool_params.py">params</a>) -> <a href="./src/llama_stack_client/types/tool_invocation_result.py">ToolInvocationResult</a></code>
+- <code title="get /v1/tool-runtime/list-tools">client.tool_runtime.<a href="./src/llama_stack_client/resources/tool_runtime.py">list_tools</a>(\*\*<a href="src/llama_stack_client/types/tool_runtime_list_tools_params.py">params</a>) -> <a href="./src/llama_stack_client/types/tool_runtime_list_tools_response.py">ToolRuntimeListToolsResponse</a></code>
 
 # Responses
 
@@ -131,6 +131,7 @@ Types:
 from llama_stack_client.types.conversations import (
     ItemCreateResponse,
     ItemListResponse,
+    ItemDeleteResponse,
     ItemGetResponse,
 )
 ```
@@ -139,6 +140,7 @@ Methods:
 
 - <code title="post /v1/conversations/{conversation_id}/items">client.conversations.items.<a href="./src/llama_stack_client/resources/conversations/items.py">create</a>(conversation_id, \*\*<a href="src/llama_stack_client/types/conversations/item_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/conversations/item_create_response.py">ItemCreateResponse</a></code>
 - <code title="get /v1/conversations/{conversation_id}/items">client.conversations.items.<a href="./src/llama_stack_client/resources/conversations/items.py">list</a>(conversation_id, \*\*<a href="src/llama_stack_client/types/conversations/item_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/conversations/item_list_response.py">SyncOpenAICursorPage[ItemListResponse]</a></code>
+- <code title="delete /v1/conversations/{conversation_id}/items/{item_id}">client.conversations.items.<a href="./src/llama_stack_client/resources/conversations/items.py">delete</a>(item_id, \*, conversation_id) -> <a href="./src/llama_stack_client/types/conversations/item_delete_response.py">ItemDeleteResponse</a></code>
 - <code title="get /v1/conversations/{conversation_id}/items/{item_id}">client.conversations.items.<a href="./src/llama_stack_client/resources/conversations/items.py">get</a>(item_id, \*, conversation_id) -> <a href="./src/llama_stack_client/types/conversations/item_get_response.py">ItemGetResponse</a></code>
 
 # Inspect
@@ -190,7 +192,7 @@ Methods:
 
 - <code title="post /v1/chat/completions">client.chat.completions.<a href="./src/llama_stack_client/resources/chat/completions.py">create</a>(\*\*<a href="src/llama_stack_client/types/chat/completion_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/chat/completion_create_response.py">CompletionCreateResponse</a></code>
 - <code title="get /v1/chat/completions/{completion_id}">client.chat.completions.<a href="./src/llama_stack_client/resources/chat/completions.py">retrieve</a>(completion_id) -> <a href="./src/llama_stack_client/types/chat/completion_retrieve_response.py">CompletionRetrieveResponse</a></code>
-- <code title="get /v1/chat/completions">client.chat.completions.<a href="./src/llama_stack_client/resources/chat/completions.py">list</a>(\*\*<a href="src/llama_stack_client/types/chat/completion_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/chat/completion_list_response.py">SyncOpenAICursorPage[CompletionListResponse]</a></code>
+- <code title="get /v1/chat/completions">client.chat.completions.<a href="./src/llama_stack_client/resources/chat/completions.py">list</a>(\*\*<a href="src/llama_stack_client/types/chat/completion_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/chat/completion_list_response.py">CompletionListResponse</a></code>
 
 # Completions
 
@@ -400,6 +402,7 @@ Methods:
 - <code title="get /v1/scoring-functions/{scoring_fn_id}">client.scoring_functions.<a href="./src/llama_stack_client/resources/scoring_functions.py">retrieve</a>(scoring_fn_id) -> <a href="./src/llama_stack_client/types/scoring_fn.py">ScoringFn</a></code>
 - <code title="get /v1/scoring-functions">client.scoring_functions.<a href="./src/llama_stack_client/resources/scoring_functions.py">list</a>() -> <a href="./src/llama_stack_client/types/scoring_function_list_response.py">ScoringFunctionListResponse</a></code>
 - <code title="post /v1/scoring-functions">client.scoring_functions.<a href="./src/llama_stack_client/resources/scoring_functions.py">register</a>(\*\*<a href="src/llama_stack_client/types/scoring_function_register_params.py">params</a>) -> None</code>
+- <code title="delete /v1/scoring-functions/{scoring_fn_id}">client.scoring_functions.<a href="./src/llama_stack_client/resources/scoring_functions.py">unregister</a>(scoring_fn_id) -> None</code>
 
 # Files
 
@@ -417,6 +420,26 @@ Methods:
 - <code title="delete /v1/files/{file_id}">client.files.<a href="./src/llama_stack_client/resources/files.py">delete</a>(file_id) -> <a href="./src/llama_stack_client/types/delete_file_response.py">DeleteFileResponse</a></code>
 - <code title="get /v1/files/{file_id}/content">client.files.<a href="./src/llama_stack_client/resources/files.py">content</a>(file_id) -> object</code>
 
+# Batches
+
+Types:
+
+```python
+from llama_stack_client.types import (
+    BatchCreateResponse,
+    BatchRetrieveResponse,
+    BatchListResponse,
+    BatchCancelResponse,
+)
+```
+
+Methods:
+
+- <code title="post /v1/batches">client.batches.<a href="./src/llama_stack_client/resources/batches.py">create</a>(\*\*<a href="src/llama_stack_client/types/batch_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/batch_create_response.py">BatchCreateResponse</a></code>
+- <code title="get /v1/batches/{batch_id}">client.batches.<a href="./src/llama_stack_client/resources/batches.py">retrieve</a>(batch_id) -> <a href="./src/llama_stack_client/types/batch_retrieve_response.py">BatchRetrieveResponse</a></code>
+- <code title="get /v1/batches">client.batches.<a href="./src/llama_stack_client/resources/batches.py">list</a>(\*\*<a href="src/llama_stack_client/types/batch_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/batch_list_response.py">SyncOpenAICursorPage[BatchListResponse]</a></code>
+- <code title="post /v1/batches/{batch_id}/cancel">client.batches.<a href="./src/llama_stack_client/resources/batches.py">cancel</a>(batch_id) -> <a href="./src/llama_stack_client/types/batch_cancel_response.py">BatchCancelResponse</a></code>
+
 # Alpha
 
 ## Inference
@@ -480,6 +503,7 @@ Methods:
 - <code title="get /v1alpha/eval/benchmarks/{benchmark_id}">client.alpha.benchmarks.<a href="./src/llama_stack_client/resources/alpha/benchmarks.py">retrieve</a>(benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/benchmark.py">Benchmark</a></code>
 - <code title="get /v1alpha/eval/benchmarks">client.alpha.benchmarks.<a href="./src/llama_stack_client/resources/alpha/benchmarks.py">list</a>() -> <a href="./src/llama_stack_client/types/alpha/benchmark_list_response.py">BenchmarkListResponse</a></code>
 - <code title="post /v1alpha/eval/benchmarks">client.alpha.benchmarks.<a href="./src/llama_stack_client/resources/alpha/benchmarks.py">register</a>(\*\*<a href="src/llama_stack_client/types/alpha/benchmark_register_params.py">params</a>) -> None</code>
+- <code title="delete /v1alpha/eval/benchmarks/{benchmark_id}">client.alpha.benchmarks.<a href="./src/llama_stack_client/resources/alpha/benchmarks.py">unregister</a>(benchmark_id) -> None</code>
 
 ## Eval
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.4.0-alpha.8"
+version = "0.4.0-alpha.9"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"

diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
@@ -47,6 +47,7 @@
         models,
         routes,
         safety,
+        batches,
         inspect,
         prompts,
         scoring,
@@ -67,6 +68,7 @@
     from .resources.tools import ToolsResource, AsyncToolsResource
     from .resources.routes import RoutesResource, AsyncRoutesResource
     from .resources.safety import SafetyResource, AsyncSafetyResource
+    from .resources.batches import BatchesResource, AsyncBatchesResource
     from .resources.inspect import InspectResource, AsyncInspectResource
     from .resources.scoring import ScoringResource, AsyncScoringResource
     from .resources.shields import ShieldsResource, AsyncShieldsResource
@@ -79,11 +81,11 @@
     from .resources.alpha.alpha import AlphaResource, AsyncAlphaResource
     from .resources.completions import CompletionsResource, AsyncCompletionsResource
     from .resources.moderations import ModerationsResource, AsyncModerationsResource
+    from .resources.tool_runtime import ToolRuntimeResource, AsyncToolRuntimeResource
     from .resources.models.models import ModelsResource, AsyncModelsResource
     from .resources.prompts.prompts import PromptsResource, AsyncPromptsResource
     from .resources.scoring_functions import ScoringFunctionsResource, AsyncScoringFunctionsResource
     from .resources.responses.responses import ResponsesResource, AsyncResponsesResource
-    from .resources.tool_runtime.tool_runtime import ToolRuntimeResource, AsyncToolRuntimeResource
     from .resources.conversations.conversations import ConversationsResource, AsyncConversationsResource
     from .resources.vector_stores.vector_stores import VectorStoresResource, AsyncVectorStoresResource
 
@@ -282,6 +284,12 @@ def files(self) -> FilesResource:
 
         return FilesResource(self)
 
+    @cached_property
+    def batches(self) -> BatchesResource:
+        from .resources.batches import BatchesResource
+
+        return BatchesResource(self)
+
     @cached_property
     def alpha(self) -> AlphaResource:
         from .resources.alpha import AlphaResource
@@ -592,6 +600,12 @@ def files(self) -> AsyncFilesResource:
 
         return AsyncFilesResource(self)
 
+    @cached_property
+    def batches(self) -> AsyncBatchesResource:
+        from .resources.batches import AsyncBatchesResource
+
+        return AsyncBatchesResource(self)
+
     @cached_property
     def alpha(self) -> AsyncAlphaResource:
         from .resources.alpha import AsyncAlphaResource
@@ -851,6 +865,12 @@ def files(self) -> files.FilesResourceWithRawResponse:
 
         return FilesResourceWithRawResponse(self._client.files)
 
+    @cached_property
+    def batches(self) -> batches.BatchesResourceWithRawResponse:
+        from .resources.batches import BatchesResourceWithRawResponse
+
+        return BatchesResourceWithRawResponse(self._client.batches)
+
     @cached_property
     def alpha(self) -> alpha.AlphaResourceWithRawResponse:
         from .resources.alpha import AlphaResourceWithRawResponse
@@ -996,6 +1016,12 @@ def files(self) -> files.AsyncFilesResourceWithRawResponse:
 
         return AsyncFilesResourceWithRawResponse(self._client.files)
 
+    @cached_property
+    def batches(self) -> batches.AsyncBatchesResourceWithRawResponse:
+        from .resources.batches import AsyncBatchesResourceWithRawResponse
+
+        return AsyncBatchesResourceWithRawResponse(self._client.batches)
+
     @cached_property
     def alpha(self) -> alpha.AsyncAlphaResourceWithRawResponse:
         from .resources.alpha import AsyncAlphaResourceWithRawResponse
@@ -1141,6 +1167,12 @@ def files(self) -> files.FilesResourceWithStreamingResponse:
 
         return FilesResourceWithStreamingResponse(self._client.files)
 
+    @cached_property
+    def batches(self) -> batches.BatchesResourceWithStreamingResponse:
+        from .resources.batches import BatchesResourceWithStreamingResponse
+
+        return BatchesResourceWithStreamingResponse(self._client.batches)
+
     @cached_property
     def alpha(self) -> alpha.AlphaResourceWithStreamingResponse:
         from .resources.alpha import AlphaResourceWithStreamingResponse
@@ -1286,6 +1318,12 @@ def files(self) -> files.AsyncFilesResourceWithStreamingResponse:
 
         return AsyncFilesResourceWithStreamingResponse(self._client.files)
 
+    @cached_property
+    def batches(self) -> batches.AsyncBatchesResourceWithStreamingResponse:
+        from .resources.batches import AsyncBatchesResourceWithStreamingResponse
+
+        return AsyncBatchesResourceWithStreamingResponse(self._client.batches)
+
     @cached_property
     def alpha(self) -> alpha.AsyncAlphaResourceWithStreamingResponse:
         from .resources.alpha import AsyncAlphaResourceWithStreamingResponse

diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
@@ -70,6 +70,14 @@
     SafetyResourceWithStreamingResponse,
     AsyncSafetyResourceWithStreamingResponse,
 )
+from .batches import (
+    BatchesResource,
+    AsyncBatchesResource,
+    BatchesResourceWithRawResponse,
+    AsyncBatchesResourceWithRawResponse,
+    BatchesResourceWithStreamingResponse,
+    AsyncBatchesResourceWithStreamingResponse,
+)
 from .inspect import (
     InspectResource,
     AsyncInspectResource,
@@ -318,6 +326,12 @@
     "AsyncFilesResourceWithRawResponse",
     "FilesResourceWithStreamingResponse",
     "AsyncFilesResourceWithStreamingResponse",
+    "BatchesResource",
+    "AsyncBatchesResource",
+    "BatchesResourceWithRawResponse",
+    "AsyncBatchesResourceWithRawResponse",
+    "BatchesResourceWithStreamingResponse",
+    "AsyncBatchesResourceWithStreamingResponse",
     "AlphaResource",
     "AsyncAlphaResource",
     "AlphaResourceWithRawResponse",