Skip to content
Open
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# ellmer (development version)

* ellmer now supports tools that return image or PDF content types, for example using `content_image_file()` or `content_image_pdf()`. (#735)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW new style is no empty line between bullets

* The following deprecated functions/arguments/methods have now been removed:
* `Chat$extract_data()` -> `chat$chat_structured()` (0.2.0)
* `Chat$extract_data_async()` -> `chat$chat_structured_async()` (0.2.0)
Expand Down
63 changes: 62 additions & 1 deletion R/chat-tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,67 @@ tool_results_as_turn <- function(results) {
Turn("user", contents = results[is_tool_result])
}

is_extra_content <- function(x) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I expected to see a check_tool_result() somewhere that would check that a tool call returns either a string, a ContentType, or a list of ContentTypes. Without that this test check feels a bit wibbly to me.

S7::S7_inherits(x, ContentImage) || S7::S7_inherits(x, ContentPDF)
}

tool_results_separate_content <- function(turn) {
if (!some(turn@contents, is_tool_result)) {
return(list(tool_results = list(), contents = turn@contents))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that most of the providers do c(data$tool_results, data$contents) I think you should put the contents first in the list. But can you tell me more about why this just doesn't return a single list?

Copy link
Collaborator Author

@gadenbuie gadenbuie Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In almost all cases, the tool results need to be first in the content list in the user turn. Most APIs will error out if the tool results aren't immediately next in the messages list after the assistant turn. So that motivated the "tool results first" in this helper.

They're two separate items in the list because some APIs want tool results in a set of separate messages. In particular, I believe this is the case with OpenAI. I thought it'd be better to return separate items and combine them as needed than to need to repeat the filtering in some places later. Secondarily, I also liked that the naming makes it clear that we're ordering content as tool results then contents when we do so, rather than hiding that detail in the helper function.

(Those are loose preferences though...)

}

tool_results <- list()
contents <- list()

for (result in turn@contents) {
if (!is_tool_result(result)) {
contents <- c(contents, list(result))
next
}

id <- result@request@id

# Check for extra content in the result value
if (is_extra_content(result@value)) {
contents <- c(
contents,
list(
ContentText(sprintf('<tool-content tool-call-id="%s">', id)),
result@value,
ContentText("</tool-content>")
)
)
result@value <- "[see below]"
}

# Check for extra content in list items
if (is_list(result@value)) {
for (j in seq_along(result@value)) {
if (is_extra_content(result@value[[j]])) {
contents <- c(
contents,
list(
ContentText(
sprintf('<tool-content tool-call-id="%s" item="%d">', id, j)
),
result@value[[j]],
ContentText("</tool-content>")
)
)
result@value[[j]] <- sprintf("[see below: item %d]", j)
}
}
}

tool_results <- c(tool_results, list(result))
}

list(
tool_results = if (length(tool_results) > 0) tool_results else NULL,
contents = contents
)
}

turn_get_tool_errors <- function(turn = NULL) {
if (is.null(turn)) {
return(NULL)
Expand Down Expand Up @@ -328,7 +389,7 @@ maybe_echo_tool <- function(x, echo = "output") {
} else {
icon <- cli::col_green(cli::symbol$record)
header <- ""
value <- tool_string(x)
value <- tool_string(x, force = TRUE)
}

value <- cli::style_italic(value)
Expand Down
21 changes: 16 additions & 5 deletions R/content.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ ContentImage <- new_class(
#' @param detail Not currently used.
ContentImageRemote <- new_class(
"ContentImageRemote",
parent = Content,
parent = ContentImage,
properties = list(
url = prop_string(),
detail = prop_string(default = "")
Expand All @@ -164,7 +164,7 @@ method(contents_markdown, ContentImageRemote) <- function(content) {
#' @param data Base64 encoded image data.
ContentImageInline <- new_class(
"ContentImageInline",
parent = Content,
parent = ContentImage,
properties = list(
type = prop_string(),
data = prop_string(allow_null = TRUE)
Expand Down Expand Up @@ -294,7 +294,7 @@ method(format, ContentToolResult) <- function(
if (tool_errored(x)) {
value <- paste0(cli::col_red("Error: "), tool_error_string(x))
} else {
value <- tool_string(x)
value <- tool_string(x, force = TRUE)
}

if (!is_string(value) || !grepl("\n", value)) {
Expand All @@ -308,7 +308,7 @@ tool_errored <- function(x) !is.null(x@error)
tool_error_string <- function(x) {
if (inherits(x@error, "condition")) conditionMessage(x@error) else x@error
}
tool_string <- function(x) {
tool_string <- function(x, force = FALSE) {
if (tool_errored(x)) {
paste0("Tool calling failed with error ", tool_error_string(x))
} else if (inherits(x@value, "AsIs")) {
Expand All @@ -318,7 +318,18 @@ tool_string <- function(x) {
} else if (is.character(x@value)) {
paste(x@value, collapse = "\n")
} else {
jsonlite::toJSON(x@value, auto_unbox = TRUE)
tryCatch(
jsonlite::toJSON(x@value, auto_unbox = TRUE, force = force),
error = function(err) {
cli::cli_abort(
c(
"Could not convert tool result from {.obj_type_friendly {x@value}} to JSON.",
"i" = "If you are the tool author, update the tool to convert the result to a string or JSON."
),
parent = err
)
}
)
}
}

Expand Down
6 changes: 5 additions & 1 deletion R/provider-anthropic.R
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,11 @@ method(as_json, list(ProviderAnthropic, Turn)) <- function(provider, x, ...) {
# (all messages must have non-empty content)
return(NULL)
}
list(role = x@role, content = as_json(provider, x@contents, ...))
data <- tool_results_separate_content(x)
list(
role = x@role,
content = as_json(provider, c(data$tool_results, data$contents), ...)
)
} else {
cli::cli_abort("Unknown role {turn@role}", .internal = TRUE)
}
Expand Down
6 changes: 5 additions & 1 deletion R/provider-aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,11 @@ method(as_json, list(ProviderAWSBedrock, Turn)) <- function(provider, x, ...) {
# bedrock passes system prompt as separate arg
NULL
} else if (x@role %in% c("user", "assistant")) {
list(role = x@role, content = as_json(provider, x@contents, ...))
data <- tool_results_separate_content(x)
list(
role = x@role,
content = as_json(provider, c(data$tool_results, data$contents), ...)
)
} else {
cli::cli_abort("Unknown role {turn@role}", .internal = TRUE)
}
Expand Down
8 changes: 5 additions & 3 deletions R/provider-databricks.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,11 @@ method(as_json, list(ProviderDatabricks, Turn)) <- function(provider, x, ...) {
list(list(role = "system", content = x@contents[[1]]@text))
} else if (x@role == "user") {
# Each tool result needs to go in its own message with role "tool".
is_tool <- map_lgl(x@contents, S7_inherits, ContentToolResult)
if (any(is_tool)) {
return(lapply(x@contents[is_tool], function(tool) {
data <- tool_results_separate_content(x)
if (length(data$tool_results) > 0) {
# If Databricks starts supporting image/pdf content, we'll need to merge
# tool results with other content instead of just returning here.
return(lapply(data$tool_results, function(tool) {
list(
role = "tool",
content = tool_string(tool),
Expand Down
8 changes: 4 additions & 4 deletions R/provider-deepseek.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,22 +79,22 @@ method(as_json, list(ProviderDeepSeek, ContentText)) <- function(

method(as_json, list(ProviderDeepSeek, Turn)) <- function(provider, x, ...) {
if (x@role == "user") {
data <- tool_results_separate_content(x)
# Text and tool results go in separate messages
texts <- keep(x@contents, S7_inherits, ContentText)
texts <- keep(data$contents, S7_inherits, ContentText)
texts_out <- lapply(texts, function(text) {
list(role = "user", content = as_json(provider, text, ...))
})

tools <- keep(x@contents, S7_inherits, ContentToolResult)
tools_out <- lapply(tools, function(tool) {
tools_out <- lapply(data$tool_results, function(tool) {
list(
role = "tool",
content = tool_string(tool),
tool_call_id = tool@request@id
)
})

c(texts_out, tools_out)
c(tools_out, texts_out)
} else if (x@role == "assistant") {
# Tool requests come out of content and go into own argument
text <- detect(x@contents, S7_inherits, ContentText)
Expand Down
6 changes: 5 additions & 1 deletion R/provider-google.R
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,11 @@ method(as_json, list(ProviderGoogleGemini, Turn)) <- function(
if (x@role == "system") {
# System messages go in the top-level API parameter
} else if (x@role == "user") {
list(role = x@role, parts = as_json(provider, x@contents, ...))
data <- tool_results_separate_content(x)
list(
role = x@role,
parts = as_json(provider, c(data$tool_results, data$contents), ...)
)
} else if (x@role == "assistant") {
list(role = "model", parts = as_json(provider, x@contents, ...))
} else {
Expand Down
14 changes: 8 additions & 6 deletions R/provider-openai.R
Original file line number Diff line number Diff line change
Expand Up @@ -278,24 +278,26 @@ method(as_json, list(ProviderOpenAI, Turn)) <- function(provider, x, ...) {
list(role = "system", content = x@contents[[1]]@text)
)
} else if (x@role == "user") {
# Each tool result needs to go in its own message with role "tool"
is_tool <- map_lgl(x@contents, S7_inherits, ContentToolResult)
content <- as_json(provider, x@contents[!is_tool], ...)
if (length(content) > 0) {
data <- tool_results_separate_content(x)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need to update chat_openai_responses() too?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think so! I merged the changes but have been in meetings all day; I'll pick this back up on Monday and will take a look at chat_openai_responses() then too.


if (length(data$contents) > 0) {
content <- as_json(provider, data$contents, ...)
user <- list(list(role = "user", content = content))
} else {
user <- list()
}

tools <- lapply(x@contents[is_tool], function(tool) {
# Each tool result needs to go in its own message with role "tool"
tools <- lapply(data$tool_results, function(tool) {
list(
role = "tool",
content = tool_string(tool),
tool_call_id = tool@request@id
)
})

c(user, tools)
# API errors if tool results do not follow previous assistant turn
c(tools, user)
} else if (x@role == "assistant") {
# Tool requests come out of content and go into own argument
is_tool <- map_lgl(x@contents, is_tool_request)
Expand Down
5 changes: 3 additions & 2 deletions R/provider-snowflake.R
Original file line number Diff line number Diff line change
Expand Up @@ -289,13 +289,14 @@ method(as_json, list(ProviderSnowflakeCortex, Turn)) <- function(
# Completely undocumented, but: it seems like the model is expecting the
# tool result in textual format here, too -- otherwise it gets confused,
# like it can't see the output.
content <- tool_string(x@contents[[1]])
content <- tool_string(x@contents[[1]], force = TRUE)
} else {
cli::cli_abort("Unsupported content type: {.cls {class(x@contents[[1]])}}.")
}
data <- tool_results_separate_content(x)
list(
role = x@role,
content_list = as_json(provider, x@contents, ...)
content_list = as_json(provider, c(data$tool_results, data$contents), ...)
)
}

Expand Down
85 changes: 85 additions & 0 deletions tests/testthat/test-chat-tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -786,3 +786,88 @@ test_that("match_tools() matches tools in a turn to a list of tools", {
turn_matched <- match_tools(turn, tools)
expect_equal(turn_matched, fixture_turn_with_tool_requests(with_tool = TRUE))
})

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we have a couple of end-to-end tests of a tool that returns an image and the chat understands it? Maybe just for anthropic + gemini + openai? Definitely need to use cassettes.

test_that("tool_results_separate_content() splits tool results and content", {
request <- ContentToolRequest("x1", "my_tool", list())
result <- ContentToolResult(
content_image_url("https://placecat.com/200/200"),
request = request
)
user_turn <- tool_results_as_turn(list(result))

data <- tool_results_separate_content(user_turn)
expect_length(data$tool_results, 1)
expect_s7_class(data$tool_results[[1]], ContentToolResult)
expect_true(is.character(data$tool_results[[1]]@value))

expect_length(data$contents, 3)
expect_equal(data$contents[[2]], result@value)
expect_s7_class(data$contents[[1]], ContentText)
expect_s7_class(data$contents[[3]], ContentText)
})

test_that("tool_results_separate_content() keeps content order", {
request <- ContentToolRequest("x1", "my_tool", list())
result <- ContentToolResult(
content_image_url("https://placecat.com/200/200"),
request = request
)
user_turn <- Turn(
"user",
list(
ContentText("Here is a cat image:"),
result,
ContentText("Isn't it cute?")
)
)

data <- tool_results_separate_content(user_turn)
expect_length(data$tool_results, 1)
expect_s7_class(data$tool_results[[1]], ContentToolResult)
expect_true(is.character(data$tool_results[[1]]@value))

expect_length(data$contents, 5)
expect_equal(data$contents[[1]]@text, "Here is a cat image:")
expect_s7_class(data$contents[[2]], ContentText) # Added text for tool result
expect_equal(data$contents[[3]], result@value)
expect_s7_class(data$contents[[4]], ContentText) # Closing text for tool result
expect_equal(data$contents[[5]]@text, "Isn't it cute?")
})

test_that("tool_results_separate_content() handles no tool results", {
user_turn <- Turn(
"user",
list(
ContentText("Hello!"),
ContentText("How are you?")
)
)

data <- tool_results_separate_content(user_turn)
expect_length(data$tool_results, 0)
expect_length(data$contents, 2)
expect_equal(data$contents, user_turn@contents)
})

test_that("tool_results_separate_content() handles list result values", {
request <- ContentToolRequest("x1", "my_tool", list())
result <- ContentToolResult(
list(
content_image_url("https://placecat.com/200/200"),
"not an image",
content_image_url("https://placecat.com/300/300")
),
request = request
)
user_turn <- Turn("user", list(result))
data <- tool_results_separate_content(user_turn)

expect_length(data$tool_results, 1)
expect_length(data$tool_results[[1]]@value, 3)
# Images are replaced with text placeholders in the tool result
expect_true(every(data$tool_results[[1]]@value, is.character))

expect_length(data$contents, 3 * 2)
expect_equal(data$contents[[2]], result@value[[1]])
expect_equal(data$contents[[5]], result@value[[3]])
})
Loading
Loading