Skip to content

Commit b7403d2

Browse files
fix(openai): prioritize api-provided token over tiktoken calculation (#3142)
Co-authored-by: Nir Gazit <[email protected]>
1 parent ca529e4 commit b7403d2

File tree

6 files changed

+1063
-37
lines changed

6 files changed

+1063
-37
lines changed

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,12 @@ def get_token_count_from_string(string: str, model_name: str):
370370
f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
371371
)
372372
return None
373+
except Exception as ex:
374+
# Other exceptions in tiktok
375+
logger.warning(
376+
f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
377+
)
378+
return None
373379

374380
tiktoken_encodings[model_name] = encoding
375381
else:

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -516,43 +516,45 @@ def _set_completions(span, choices):
516516
def _set_streaming_token_metrics(
517517
request_kwargs, complete_response, span, token_counter, shared_attributes
518518
):
519-
# use tiktoken calculate token usage
520519
if not should_record_stream_token_usage():
521520
return
522521

523-
# kwargs={'model': 'gpt-3.5', 'messages': [{'role': 'user', 'content': '...'}], 'stream': True}
524522
prompt_usage = -1
525523
completion_usage = -1
526524

527-
# prompt_usage
528-
if request_kwargs and request_kwargs.get("messages"):
529-
prompt_content = ""
530-
# setting the default model_name as gpt-4. As this uses the embedding "cl100k_base" that
531-
# is used by most of the other model.
525+
# First, try to get usage from API response
526+
if complete_response.get("usage"):
527+
usage = complete_response["usage"]
528+
if usage.get("prompt_tokens"):
529+
prompt_usage = usage["prompt_tokens"]
530+
if usage.get("completion_tokens"):
531+
completion_usage = usage["completion_tokens"]
532+
533+
# If API response doesn't have usage, fallback to tiktoken calculation
534+
if prompt_usage == -1 or completion_usage == -1:
532535
model_name = (
533536
complete_response.get("model") or request_kwargs.get("model") or "gpt-4"
534537
)
535-
for msg in request_kwargs.get("messages"):
536-
if msg.get("content"):
537-
prompt_content += msg.get("content")
538-
if model_name:
539-
prompt_usage = get_token_count_from_string(prompt_content, model_name)
540-
541-
# completion_usage
542-
if complete_response.get("choices"):
543-
completion_content = ""
544-
# setting the default model_name as gpt-4. As this uses the embedding "cl100k_base" that
545-
# is used by most of the other model.
546-
model_name = complete_response.get("model") or "gpt-4"
547-
548-
for choice in complete_response.get("choices"):
549-
if choice.get("message") and choice.get("message").get("content"):
550-
completion_content += choice["message"]["content"]
551-
552-
if model_name:
553-
completion_usage = get_token_count_from_string(
554-
completion_content, model_name
555-
)
538+
539+
# Calculate prompt tokens if not available from API
540+
if prompt_usage == -1 and request_kwargs and request_kwargs.get("messages"):
541+
prompt_content = ""
542+
for msg in request_kwargs.get("messages"):
543+
if msg.get("content"):
544+
prompt_content += msg.get("content")
545+
if model_name and should_record_stream_token_usage():
546+
prompt_usage = get_token_count_from_string(prompt_content, model_name)
547+
548+
# Calculate completion tokens if not available from API
549+
if completion_usage == -1 and complete_response.get("choices"):
550+
completion_content = ""
551+
for choice in complete_response.get("choices"):
552+
if choice.get("message") and choice.get("message").get("content"):
553+
completion_content += choice["message"]["content"]
554+
if model_name and should_record_stream_token_usage():
555+
completion_usage = get_token_count_from_string(
556+
completion_content, model_name
557+
)
556558

557559
# span record
558560
_set_span_stream_usage(span, prompt_usage, completion_usage)
@@ -971,6 +973,13 @@ def _accumulate_stream_items(item, complete_response):
971973
complete_response["model"] = item.get("model")
972974
complete_response["id"] = item.get("id")
973975

976+
# capture usage information from the last stream chunks
977+
if item.get("usage"):
978+
complete_response["usage"] = item.get("usage")
979+
elif item.get("choices") and item["choices"][0].get("usage"):
980+
# Some LLM providers like moonshot mistakenly place token usage information within choices[0], handle this.
981+
complete_response["usage"] = item["choices"][0].get("usage")
982+
974983
# prompt filter results
975984
if item.get("prompt_filter_results"):
976985
complete_response["prompt_filter_results"] = item.get("prompt_filter_results")

packages/opentelemetry-instrumentation-openai/poetry.lock

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)