fix: add input_tokens in usage of Anthropic messages (#2173)

Amnah199 · web-flow · commit 7d7c4487b2e3 · 2025-08-11T14:50:49.000+02:00
* Add input tokens

* Update test

* Fix the tests

* Small fixes

* Small fixes

* Small fixes

* Update tests

* remove comment line

* Update test_chat_generator.py
diff --git a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py
@@ -407,7 +407,6 @@ def _convert_anthropic_chunk_to_streaming_chunk(
             if chunk.delta.type == "text_delta":
                 content = chunk.delta.text
             elif chunk.delta.type == "input_json_delta":
-                # we assign index=0 because one chunk can have only one ToolCallDelta
                 tool_calls.append(ToolCallDelta(index=tool_call_index, arguments=chunk.delta.partial_json))
         # end of streaming message
         elif chunk.type == "message_delta":
@@ -490,12 +489,16 @@ def _process_response(
             chunks: List[StreamingChunk] = []
             model: Optional[str] = None
             tool_call_index = -1
+            input_tokens = None
             component_info = ComponentInfo.from_component(self)
             for chunk in response:
                 if chunk.type in ["message_start", "content_block_start", "content_block_delta", "message_delta"]:
                     # Extract model from message_start chunks
                     if chunk.type == "message_start":
                         model = chunk.message.model
+                        if chunk.message.usage.input_tokens is not None:
+                            input_tokens = chunk.message.usage.input_tokens
+
                     if chunk.type == "content_block_start" and chunk.content_block.type == "tool_use":
                         tool_call_index += 1
 
@@ -510,6 +513,11 @@ def _process_response(
             completion.meta.update(
                 {"received_at": datetime.now(timezone.utc).isoformat(), "model": model},
             )
+
+            if input_tokens is not None:
+                if "usage" not in completion.meta:
+                    completion.meta["usage"] = {}
+                completion.meta["usage"]["input_tokens"] = input_tokens
             return {"replies": [completion]}
         else:
             return {
diff --git a/integrations/anthropic/tests/test_chat_generator.py b/integrations/anthropic/tests/test_chat_generator.py
@@ -337,6 +337,8 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
         component = AnthropicChatGenerator(api_key=Secret.from_token("test-api-key"))
         component_info = ComponentInfo.from_component(component)
 
+        raw_chunks = []
+
         # Test message_start chunk
         message_start_chunk = RawMessageStartEvent(
             message=Message(
@@ -358,6 +360,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
             ),
             type="message_start",
         )
+        raw_chunks.append(message_start_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             message_start_chunk, component_info=component_info, tool_call_index=0
         )
@@ -373,6 +376,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
         text_block_start_chunk = RawContentBlockStartEvent(
             content_block=TextBlock(citations=None, text="", type="text"), index=0, type="content_block_start"
         )
+        raw_chunks.append(text_block_start_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             text_block_start_chunk, component_info=component_info, tool_call_index=0
         )
@@ -390,6 +394,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
             index=0,
             type="content_block_delta",
         )
+        raw_chunks.append(text_delta_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             text_delta_chunk, component_info=component_info, tool_call_index=0
         )
@@ -414,6 +419,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
             index=1,
             type="content_block_start",
         )
+        raw_chunks.append(tool_block_start_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             tool_block_start_chunk, component_info=component_info, tool_call_index=0
         )
@@ -431,6 +437,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
         empty_json_delta_chunk = RawContentBlockDeltaEvent(
             delta=InputJSONDelta(partial_json="", type="input_json_delta"), index=1, type="content_block_delta"
         )
+        raw_chunks.append(empty_json_delta_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             empty_json_delta_chunk, component_info=component_info, tool_call_index=0
         )
@@ -450,6 +457,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
             index=1,
             type="content_block_delta",
         )
+        raw_chunks.append(json_delta_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             json_delta_chunk, component_info=component_info, tool_call_index=0
         )
@@ -473,6 +481,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
                 server_tool_use=None,
             ),
         )
+        raw_chunks.append(message_delta_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             message_delta_chunk, component_info=component_info, tool_call_index=0
         )
@@ -496,6 +505,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
             index=2,
             type="content_block_start",
         )
+        raw_chunks.append(tool_block_start_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             tool_block_start_chunk, component_info=component_info, tool_call_index=1
         )
@@ -513,6 +523,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
         empty_json_delta_chunk = RawContentBlockDeltaEvent(
             delta=InputJSONDelta(partial_json="", type="input_json_delta"), index=1, type="content_block_delta"
         )
+        raw_chunks.append(empty_json_delta_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             empty_json_delta_chunk, component_info=component_info, tool_call_index=1
         )
@@ -532,6 +543,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
             index=2,
             type="content_block_delta",
         )
+        raw_chunks.append(json_delta_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             json_delta_chunk, component_info=component_info, tool_call_index=1
         )
@@ -555,6 +567,7 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
                 server_tool_use=None,
             ),
         )
+        raw_chunks.append(message_delta_chunk)
         streaming_chunk = component._convert_anthropic_chunk_to_streaming_chunk(
             message_delta_chunk, component_info=component_info, tool_call_index=0
         )
@@ -574,6 +587,16 @@ def test_convert_anthropic_completion_chunks_with_multiple_tool_calls_to_streami
         # message_stop_chunk = RawMessageStopEvent(type="message_stop")
         # but we don't stream it
 
+        generator = AnthropicChatGenerator(Secret.from_token("test-api-key"))
+        message = generator._process_response(raw_chunks)
+        assert message["replies"][0].meta["usage"] == {
+            "cache_creation_input_tokens": None,
+            "cache_read_input_tokens": None,
+            "input_tokens": 393,
+            "output_tokens": 77,
+            "server_tool_use": None,
+        }
+
     def test_convert_streaming_chunks_to_chat_message_with_multiple_tool_calls(self):
         """
         Test converting streaming chunks to a chat message with tool calls
@@ -703,7 +726,7 @@ def test_convert_streaming_chunks_to_chat_message_with_multiple_tool_calls(self)
                 meta={
                     "type": "message_delta",
                     "delta": {"stop_reason": "tool_calls", "stop_sequence": None},
-                    "usage": {"completion_tokens": 40},
+                    "usage": {"output_tokens": 40},
                 },
                 component_info=ComponentInfo.from_component(self),
                 finish_reason="tool_calls",
@@ -728,7 +751,7 @@ def test_convert_streaming_chunks_to_chat_message_with_multiple_tool_calls(self)
         # Verify meta information
         assert message._meta["index"] == 0
         assert message._meta["finish_reason"] == "tool_calls"
-        assert message._meta["usage"] == {"completion_tokens": 40}
+        assert message._meta["usage"] == {"output_tokens": 40}
 
     def test_convert_streaming_chunks_to_chat_message_tool_call_with_empty_arguments(self):
         """
@@ -815,7 +838,7 @@ def test_convert_streaming_chunks_to_chat_message_tool_call_with_empty_arguments
                 meta={
                     "type": "message_delta",
                     "delta": {"stop_reason": "tool_calls", "stop_sequence": None},
-                    "usage": {"completion_tokens": 40},
+                    "usage": {"output_tokens": 40},
                 },
                 component_info=ComponentInfo.from_component(self),
                 index=1,
@@ -838,7 +861,7 @@ def test_convert_streaming_chunks_to_chat_message_tool_call_with_empty_arguments
         # Verify meta information
         assert message._meta["index"] == 0
         assert message._meta["finish_reason"] == "tool_calls"
-        assert message._meta["usage"] == {"completion_tokens": 40}
+        assert message._meta["usage"] == {"output_tokens": 40}
 
     def test_serde_in_pipeline(self):
         tool = Tool(name="name", description="description", parameters={"x": {"type": "string"}}, function=print)
@@ -971,9 +994,10 @@ def __call__(self, chunk: StreamingChunk) -> None:
 
         assert "claude-sonnet-4-20250514" in message.meta["model"]
         assert message.meta["finish_reason"] == "stop"
-
         assert callback.counter > 1
         assert "Paris" in callback.responses
+        assert "input_tokens" in message.meta["usage"]
+        assert "output_tokens" in message.meta["usage"]
 
     def test_convert_message_to_anthropic_format(self):
         """
@@ -1171,6 +1195,7 @@ def test_live_run_with_tools(self, tools):
         assert tool_call.tool_name == "weather"
         assert tool_call.arguments == {"city": "Paris"}
         assert message.meta["finish_reason"] == "tool_calls"
+        assert "completion_tokens" in message.meta["usage"]
 
         new_messages = [
             *initial_messages,
@@ -1268,6 +1293,8 @@ def test_live_run_with_tools_streaming(self, tools):
         assert tool_call.tool_name == "weather"
         assert tool_call.arguments == {"city": "Paris"}
         assert message.meta["finish_reason"] == "tool_calls"
+        assert "output_tokens" in message.meta["usage"]
+        assert "input_tokens" in message.meta["usage"]
 
         new_messages = [
             *initial_messages,
@@ -1673,6 +1700,7 @@ async def test_run_async_with_params(self, chat_messages, mock_anthropic_complet
         assert "Hello! I'm Claude." in response["replies"][0].text
         assert response["replies"][0].meta["model"] == "claude-sonnet-4-20250514"
         assert response["replies"][0].meta["finish_reason"] == "stop"
+        assert "completion_tokens" in response["replies"][0].meta["usage"]
 
     @pytest.mark.asyncio
     @pytest.mark.skipif(
@@ -1691,6 +1719,7 @@ async def test_live_run_async(self):
         assert "Paris" in message.text
         assert "claude-sonnet-4-20250514" in message.meta["model"]
         assert message.meta["finish_reason"] == "stop"
+        assert "completion_tokens" in message.meta["usage"]
 
     @pytest.mark.asyncio
     @pytest.mark.skipif(
@@ -1726,6 +1755,8 @@ async def callback(chunk: StreamingChunk) -> None:
         assert "paris" in message.text.lower()
         assert "claude-sonnet-4-20250514" in message.meta["model"]
         assert message.meta["finish_reason"] == "stop"
+        assert "input_tokens" in message.meta["usage"]
+        assert "output_tokens" in message.meta["usage"]
 
         # Verify streaming behavior
         assert counter > 1  # Should have received multiple chunks
@@ -1767,3 +1798,4 @@ async def test_live_run_async_with_tools(self, tools):
         assert not final_message.tool_calls
         assert len(final_message.text) > 0
         assert "paris" in final_message.text.lower()
+        assert "completion_tokens" in final_message.meta["usage"]