-
Notifications
You must be signed in to change notification settings - Fork 103
feat: Support images and PDFs in tool results #735
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
45fcc4a
1aa7c46
b318a65
b2908db
ec909f4
a73cdb5
cee44b3
baff771
4d640c7
c703617
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -250,6 +250,67 @@ tool_results_as_turn <- function(results) { | |
| Turn("user", contents = results[is_tool_result]) | ||
| } | ||
|
|
||
| is_extra_content <- function(x) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I expected to see a |
||
| S7::S7_inherits(x, ContentImage) || S7::S7_inherits(x, ContentPDF) | ||
| } | ||
|
|
||
| tool_results_separate_content <- function(turn) { | ||
| if (!some(turn@contents, is_tool_result)) { | ||
| return(list(tool_results = list(), contents = turn@contents)) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that most of the providers do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In almost all cases, the tool results need to be first in the content list in the user turn. Most APIs will error out if the tool results aren't immediately next in the messages list after the assistant turn. So that motivated the "tool results first" in this helper. They're two separate items in the list because some APIs want tool results in a set of separate messages. In particular, I believe this is the case with OpenAI. I thought it'd be better to return separate items and combine them as needed than to need to repeat the filtering in some places later. Secondarily, I also liked that the naming makes it clear that we're ordering content as tool results then contents when we do so, rather than hiding that detail in the helper function. (Those are loose preferences though...) |
||
| } | ||
|
|
||
| tool_results <- list() | ||
| contents <- list() | ||
|
|
||
| for (result in turn@contents) { | ||
| if (!is_tool_result(result)) { | ||
| contents <- c(contents, list(result)) | ||
| next | ||
| } | ||
|
|
||
| id <- result@request@id | ||
|
|
||
| # Check for extra content in the result value | ||
| if (is_extra_content(result@value)) { | ||
| contents <- c( | ||
| contents, | ||
| list( | ||
| ContentText(sprintf('<tool-content tool-call-id="%s">', id)), | ||
| result@value, | ||
| ContentText("</tool-content>") | ||
| ) | ||
| ) | ||
| result@value <- "[see below]" | ||
| } | ||
|
|
||
| # Check for extra content in list items | ||
| if (is_list(result@value)) { | ||
| for (j in seq_along(result@value)) { | ||
| if (is_extra_content(result@value[[j]])) { | ||
| contents <- c( | ||
| contents, | ||
| list( | ||
| ContentText( | ||
| sprintf('<tool-content tool-call-id="%s" item="%d">', id, j) | ||
| ), | ||
| result@value[[j]], | ||
| ContentText("</tool-content>") | ||
| ) | ||
| ) | ||
| result@value[[j]] <- sprintf("[see below: item %d]", j) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| tool_results <- c(tool_results, list(result)) | ||
| } | ||
|
|
||
| list( | ||
| tool_results = if (length(tool_results) > 0) tool_results else NULL, | ||
| contents = contents | ||
| ) | ||
| } | ||
|
|
||
| turn_get_tool_errors <- function(turn = NULL) { | ||
| if (is.null(turn)) { | ||
| return(NULL) | ||
|
|
@@ -328,7 +389,7 @@ maybe_echo_tool <- function(x, echo = "output") { | |
| } else { | ||
| icon <- cli::col_green(cli::symbol$record) | ||
| header <- "" | ||
| value <- tool_string(x) | ||
| value <- tool_string(x, force = TRUE) | ||
| } | ||
|
|
||
| value <- cli::style_italic(value) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -278,24 +278,26 @@ method(as_json, list(ProviderOpenAI, Turn)) <- function(provider, x, ...) { | |
| list(role = "system", content = x@contents[[1]]@text) | ||
| ) | ||
| } else if (x@role == "user") { | ||
| # Each tool result needs to go in its own message with role "tool" | ||
| is_tool <- map_lgl(x@contents, S7_inherits, ContentToolResult) | ||
| content <- as_json(provider, x@contents[!is_tool], ...) | ||
| if (length(content) > 0) { | ||
| data <- tool_results_separate_content(x) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you need to update There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think so! I merged the changes but have been in meetings all day; I'll pick this back up on Monday and will take a look at |
||
|
|
||
| if (length(data$contents) > 0) { | ||
| content <- as_json(provider, data$contents, ...) | ||
| user <- list(list(role = "user", content = content)) | ||
| } else { | ||
| user <- list() | ||
| } | ||
|
|
||
| tools <- lapply(x@contents[is_tool], function(tool) { | ||
| # Each tool result needs to go in its own message with role "tool" | ||
| tools <- lapply(data$tool_results, function(tool) { | ||
| list( | ||
| role = "tool", | ||
| content = tool_string(tool), | ||
| tool_call_id = tool@request@id | ||
| ) | ||
| }) | ||
|
|
||
| c(user, tools) | ||
| # API errors if tool results do not follow previous assistant turn | ||
| c(tools, user) | ||
| } else if (x@role == "assistant") { | ||
| # Tool requests come out of content and go into own argument | ||
| is_tool <- map_lgl(x@contents, is_tool_request) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -786,3 +786,88 @@ test_that("match_tools() matches tools in a turn to a list of tools", { | |
| turn_matched <- match_tools(turn, tools) | ||
| expect_equal(turn_matched, fixture_turn_with_tool_requests(with_tool = TRUE)) | ||
| }) | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we have a couple of end-to-end tests of a tool that returns an image and the chat understands it? Maybe just for anthropic + gemini + openai? Definitely need to use cassettes. |
||
| test_that("tool_results_separate_content() splits tool results and content", { | ||
| request <- ContentToolRequest("x1", "my_tool", list()) | ||
| result <- ContentToolResult( | ||
| content_image_url("https://placecat.com/200/200"), | ||
| request = request | ||
| ) | ||
| user_turn <- tool_results_as_turn(list(result)) | ||
|
|
||
| data <- tool_results_separate_content(user_turn) | ||
| expect_length(data$tool_results, 1) | ||
| expect_s7_class(data$tool_results[[1]], ContentToolResult) | ||
| expect_true(is.character(data$tool_results[[1]]@value)) | ||
|
|
||
| expect_length(data$contents, 3) | ||
| expect_equal(data$contents[[2]], result@value) | ||
| expect_s7_class(data$contents[[1]], ContentText) | ||
| expect_s7_class(data$contents[[3]], ContentText) | ||
| }) | ||
|
|
||
| test_that("tool_results_separate_content() keeps content order", { | ||
| request <- ContentToolRequest("x1", "my_tool", list()) | ||
| result <- ContentToolResult( | ||
| content_image_url("https://placecat.com/200/200"), | ||
| request = request | ||
| ) | ||
| user_turn <- Turn( | ||
| "user", | ||
| list( | ||
| ContentText("Here is a cat image:"), | ||
| result, | ||
| ContentText("Isn't it cute?") | ||
| ) | ||
| ) | ||
|
|
||
| data <- tool_results_separate_content(user_turn) | ||
| expect_length(data$tool_results, 1) | ||
| expect_s7_class(data$tool_results[[1]], ContentToolResult) | ||
| expect_true(is.character(data$tool_results[[1]]@value)) | ||
|
|
||
| expect_length(data$contents, 5) | ||
| expect_equal(data$contents[[1]]@text, "Here is a cat image:") | ||
| expect_s7_class(data$contents[[2]], ContentText) # Added text for tool result | ||
| expect_equal(data$contents[[3]], result@value) | ||
| expect_s7_class(data$contents[[4]], ContentText) # Closing text for tool result | ||
| expect_equal(data$contents[[5]]@text, "Isn't it cute?") | ||
| }) | ||
|
|
||
| test_that("tool_results_separate_content() handles no tool results", { | ||
| user_turn <- Turn( | ||
| "user", | ||
| list( | ||
| ContentText("Hello!"), | ||
| ContentText("How are you?") | ||
| ) | ||
| ) | ||
|
|
||
| data <- tool_results_separate_content(user_turn) | ||
| expect_length(data$tool_results, 0) | ||
| expect_length(data$contents, 2) | ||
| expect_equal(data$contents, user_turn@contents) | ||
| }) | ||
|
|
||
| test_that("tool_results_separate_content() handles list result values", { | ||
| request <- ContentToolRequest("x1", "my_tool", list()) | ||
| result <- ContentToolResult( | ||
| list( | ||
| content_image_url("https://placecat.com/200/200"), | ||
| "not an image", | ||
| content_image_url("https://placecat.com/300/300") | ||
| ), | ||
| request = request | ||
| ) | ||
| user_turn <- Turn("user", list(result)) | ||
| data <- tool_results_separate_content(user_turn) | ||
|
|
||
| expect_length(data$tool_results, 1) | ||
| expect_length(data$tool_results[[1]]@value, 3) | ||
| # Images are replaced with text placeholders in the tool result | ||
| expect_true(every(data$tool_results[[1]]@value, is.character)) | ||
|
|
||
| expect_length(data$contents, 3 * 2) | ||
| expect_equal(data$contents[[2]], result@value[[1]]) | ||
| expect_equal(data$contents[[5]], result@value[[3]]) | ||
| }) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW new style is no empty line between bullets