Skip to content

return usage from Anthropic OpenAI adapter #6238

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jul 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions core/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,32 @@ export interface ThinkingChatMessage {
toolCalls?: ToolCallDelta[];
}

/**
* This is meant to be equivalent to the OpenAI [usage object](https://platform.openai.com/docs/api-reference/chat/object#chat/object-usage)
* but potentially with additional information that is needed for other providers.
*/
export interface Usage {
completionTokens: number;
promptTokens: number;
promptTokensDetails?: {
cachedTokens?: number;
/** This an Anthropic-specific property */
cacheWriteTokens?: number;
audioTokens?: number;
};
completionTokensDetails?: {
acceptedPredictionTokens?: number;
reasoningTokens?: number;
rejectedPredictionTokens?: number;
audioTokens?: number;
};
}

export interface AssistantChatMessage {
role: "assistant";
content: MessageContent;
toolCalls?: ToolCallDelta[];
usage?: Usage;
}

export interface SystemChatMessage {
Expand Down Expand Up @@ -491,19 +513,22 @@ export interface LLMInteractionStartChat extends LLMInteractionBase {
kind: "startChat";
messages: ChatMessage[];
options: CompletionOptions;
provider: string;
}

export interface LLMInteractionStartComplete extends LLMInteractionBase {
kind: "startComplete";
prompt: string;
options: CompletionOptions;
provider: string;
}

export interface LLMInteractionStartFim extends LLMInteractionBase {
kind: "startFim";
prefix: string;
suffix: string;
options: CompletionOptions;
provider: string;
}

export interface LLMInteractionChunk extends LLMInteractionBase {
Expand All @@ -520,6 +545,7 @@ export interface LLMInteractionEnd extends LLMInteractionBase {
promptTokens: number;
generatedTokens: number;
thinkingTokens: number;
usage: Usage | undefined;
}

export interface LLMInteractionSuccess extends LLMInteractionEnd {
Expand Down
27 changes: 27 additions & 0 deletions core/llm/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import {
RequestOptions,
TabAutocompleteOptions,
TemplateType,
Usage,
} from "../index.js";
import mergeJson from "../util/merge.js";
import { renderChatMessage } from "../util/messageContent.js";
Expand Down Expand Up @@ -327,6 +328,7 @@ export abstract class BaseLLM implements ILLM {
completion: string,
thinking: string | undefined,
interaction: ILLMInteractionLog | undefined,
usage: Usage | undefined,
error?: any,
): InteractionStatus {
let promptTokens = this.countTokens(prompt);
Expand Down Expand Up @@ -368,6 +370,7 @@ export abstract class BaseLLM implements ILLM {
promptTokens,
generatedTokens,
thinkingTokens,
usage,
});
return "cancelled";
} else {
Expand All @@ -379,6 +382,7 @@ export abstract class BaseLLM implements ILLM {
promptTokens,
generatedTokens,
thinkingTokens,
usage,
});
return "error";
}
Expand All @@ -388,6 +392,7 @@ export abstract class BaseLLM implements ILLM {
promptTokens,
generatedTokens,
thinkingTokens,
usage,
});
return "success";
}
Expand Down Expand Up @@ -574,6 +579,7 @@ export abstract class BaseLLM implements ILLM {
prefix,
suffix,
options: completionOptions,
provider: this.providerName,
});
if (this.llmRequestHook) {
this.llmRequestHook(completionOptions.model, fimLog);
Expand Down Expand Up @@ -622,6 +628,7 @@ export abstract class BaseLLM implements ILLM {
completion,
undefined,
interaction,
undefined,
);
} catch (e) {
status = this._logEnd(
Expand All @@ -630,6 +637,7 @@ export abstract class BaseLLM implements ILLM {
completion,
undefined,
interaction,
undefined,
e,
);
throw e;
Expand All @@ -641,6 +649,7 @@ export abstract class BaseLLM implements ILLM {
completion,
undefined,
interaction,
undefined,
"cancel",
);
}
Expand Down Expand Up @@ -681,6 +690,7 @@ export abstract class BaseLLM implements ILLM {
kind: "startComplete",
prompt,
options: completionOptions,
provider: this.providerName,
});
if (this.llmRequestHook) {
this.llmRequestHook(completionOptions.model, prompt);
Expand Down Expand Up @@ -736,6 +746,7 @@ export abstract class BaseLLM implements ILLM {
completion,
undefined,
interaction,
undefined,
);
} catch (e) {
status = this._logEnd(
Expand All @@ -744,6 +755,7 @@ export abstract class BaseLLM implements ILLM {
completion,
undefined,
interaction,
undefined,
e,
);
throw e;
Expand All @@ -755,6 +767,7 @@ export abstract class BaseLLM implements ILLM {
completion,
undefined,
interaction,
undefined,
"cancel",
);
}
Expand Down Expand Up @@ -796,6 +809,7 @@ export abstract class BaseLLM implements ILLM {
kind: "startComplete",
prompt: prompt,
options: completionOptions,
provider: this.providerName,
});
if (this.llmRequestHook) {
this.llmRequestHook(completionOptions.model, prompt);
Expand Down Expand Up @@ -829,6 +843,7 @@ export abstract class BaseLLM implements ILLM {
completion,
undefined,
interaction,
undefined,
);
} catch (e) {
status = this._logEnd(
Expand All @@ -837,6 +852,7 @@ export abstract class BaseLLM implements ILLM {
completion,
undefined,
interaction,
undefined,
e,
);
throw e;
Expand All @@ -848,6 +864,7 @@ export abstract class BaseLLM implements ILLM {
completion,
undefined,
interaction,
undefined,
"cancel",
);
}
Expand Down Expand Up @@ -944,6 +961,7 @@ export abstract class BaseLLM implements ILLM {
kind: "startChat",
messages,
options: completionOptions,
provider: this.providerName,
});
if (this.llmRequestHook) {
this.llmRequestHook(completionOptions.model, prompt);
Expand All @@ -952,6 +970,7 @@ export abstract class BaseLLM implements ILLM {

let thinking = "";
let completion = "";
let usage: Usage | undefined = undefined;

try {
if (this.templateMessages) {
Expand Down Expand Up @@ -1018,6 +1037,11 @@ export abstract class BaseLLM implements ILLM {
kind: "message",
message: chunk,
});

if (chunk.role === "assistant" && chunk.usage) {
usage = chunk.usage;
}

yield chunk;
}
}
Expand All @@ -1028,6 +1052,7 @@ export abstract class BaseLLM implements ILLM {
completion,
thinking,
interaction,
usage,
);
} catch (e) {
status = this._logEnd(
Expand All @@ -1036,6 +1061,7 @@ export abstract class BaseLLM implements ILLM {
completion,
thinking,
interaction,
usage,
e,
);
throw e;
Expand All @@ -1047,6 +1073,7 @@ export abstract class BaseLLM implements ILLM {
completion,
undefined,
interaction,
usage,
"cancel",
);
}
Expand Down
50 changes: 47 additions & 3 deletions core/llm/llms/Anthropic.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import { streamSse } from "@continuedev/fetch";
import { ChatMessage, CompletionOptions, LLMOptions } from "../../index.js";
import {
ChatMessage,
CompletionOptions,
LLMOptions,
Usage,
} from "../../index.js";
import { safeParseToolCallArgs } from "../../tools/parseArgs.js";
import { renderChatMessage, stripImages } from "../../util/messageContent.js";
import { BaseLLM } from "../index.js";
Expand Down Expand Up @@ -234,23 +239,56 @@ class Anthropic extends BaseLLM {

if (options.stream === false) {
const data = await response.json();
yield { role: "assistant", content: data.content[0].text };
const cost = data.usage
? {
inputTokens: data.usage.input_tokens,
outputTokens: data.usage.output_tokens,
totalTokens: data.usage.input_tokens + data.usage.output_tokens,
}
: {};
yield {
role: "assistant",
content: data.content[0].text,
...(Object.keys(cost).length > 0 ? { cost } : {}),
};
return;
}

let lastToolUseId: string | undefined;
let lastToolUseName: string | undefined;
let usage: Usage = {
promptTokens: 0,
completionTokens: 0,
promptTokensDetails: {
cachedTokens: 0,
cacheWriteTokens: 0,
},
};

for await (const value of streamSse(response)) {
// https://docs.anthropic.com/en/api/messages-streaming#event-types
switch (value.type) {
case "message_start":
// Capture initial usage information
usage.promptTokens = value.message.usage.input_tokens;
usage.promptTokensDetails!.cachedTokens =
value.message.usage.cache_read_input_tokens;
usage.promptTokensDetails!.cacheWriteTokens =
value.message.usage.cache_creation_input_tokens;
break;
case "message_delta":
// Update usage information during streaming
if (value.usage) {
usage.completionTokens = value.usage.output_tokens;
}
break;
case "content_block_start":
if (value.content_block.type === "tool_use") {
lastToolUseId = value.content_block.id;
lastToolUseName = value.content_block.name;
}
// handle redacted thinking
if (value.content_block.type === "redacted_thinking") {
console.log("redacted thinking", value.content_block.data);
yield {
role: "thinking",
content: "",
Expand Down Expand Up @@ -303,6 +341,12 @@ class Anthropic extends BaseLLM {
break;
}
}

yield {
role: "assistant",
content: "",
usage,
};
}
}

Expand Down
5 changes: 5 additions & 0 deletions core/llm/logFormatter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ describe("LLMLogFormatter", () => {
content: "You are a helpful assistant",
},
],
provider: "watsonx",
});
logger._logItem({
interactionId: "1",
Expand Down Expand Up @@ -79,6 +80,7 @@ describe("LLMLogFormatter", () => {
promptTokens: 10,
generatedTokens: 20,
thinkingTokens: 0,
usage: undefined,
});

expect(output.getText()).toBe(
Expand Down Expand Up @@ -113,6 +115,7 @@ describe("LLMLogFormatter", () => {
model: "granite3.2-dense:8b",
},
prompt: "A horse is a horse",
provider: "watsonx",
});
logger._logItem({
interactionId: "1",
Expand Down Expand Up @@ -149,6 +152,7 @@ describe("LLMLogFormatter", () => {
},
prefix: "A\nB",
suffix: "D\nE",
provider: "watsonx",
});
logger._logItem({
interactionId: "1",
Expand Down Expand Up @@ -225,6 +229,7 @@ describe("LLMLogFormatter", () => {
promptTokens: 10,
generatedTokens: 20,
thinkingTokens: 0,
usage: undefined,
});
logger._logItem({
interactionId: "3",
Expand Down
Loading
Loading