Skip to content

Commit 3e30544

Browse files
Merge branch 'main' of https://github.com/traceloop/openllmetry into bucket
2 parents 8c93f1e + 1fe6257 commit 3e30544

File tree

24 files changed

+1589
-1379
lines changed

24 files changed

+1589
-1379
lines changed

.github/workflows/ci.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ on:
1111
jobs:
1212
lint-pr:
1313
name: Lint PR
14-
runs-on: ubuntu-latest-m
14+
runs-on: ubuntu-latest
1515
if: github.event_name == 'pull_request_target' && contains('["opened", "edited", "synchronize"]', github.event.action)
1616
permissions:
1717
pull-requests: read
@@ -23,7 +23,7 @@ jobs:
2323

2424
lint:
2525
name: Lint
26-
runs-on: ubuntu-latest-m
26+
runs-on: ubuntu-latest
2727

2828
steps:
2929
- uses: actions/checkout@v4
@@ -49,7 +49,7 @@ jobs:
4949

5050
build-packages:
5151
name: Build Packages
52-
runs-on: ubuntu-latest-m
52+
runs-on: ubuntu-latest
5353
strategy:
5454
matrix:
5555
python-version: ["3.11"]
@@ -82,7 +82,7 @@ jobs:
8282

8383
test-packages:
8484
name: Test Packages
85-
runs-on: ubuntu-latest-m
85+
runs-on: ubuntu-latest
8686
permissions:
8787
contents: "read"
8888
id-token: "write"
@@ -118,4 +118,4 @@ jobs:
118118
- name: Test
119119
env:
120120
HAYSTACK_TELEMETRY_ENABLED: False
121-
run: npx nx affected -t test --exclude='sample-app' --exclude='opentelemetry-instrumentation-haystack' --parallel=3
121+
run: npx nx affected -t test --exclude='sample-app' --exclude='opentelemetry-instrumentation-haystack' --parallel=3

.github/workflows/release.yml

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
name: Release - Traceloop SDK & Standalone Instrumentations
22

33
on:
4-
workflow_dispatch:
4+
workflow_dispatch:
55

66
jobs:
77
bump-version:
8-
runs-on: ubuntu-latest-m
8+
runs-on: ubuntu-latest
99

1010
outputs:
1111
new_version: ${{ steps.cz.outputs.version }}
@@ -42,9 +42,8 @@ jobs:
4242
- name: Print Version
4343
run: echo "Bumped to version ${{ steps.cz.outputs.version }}"
4444

45-
4645
release-instrumentations:
47-
runs-on: ubuntu-latest-m
46+
runs-on: ubuntu-latest
4847
needs:
4948
- bump-version
5049
permissions:
@@ -67,23 +66,22 @@ jobs:
6766
- uses: actions/setup-node@v4
6867
with:
6968
node-version: 18
70-
69+
7170
- run: npm ci
72-
71+
7372
- name: Build Instrumentations
7473
run: npx nx run-many -t build-release --projects=tag:instrumentation
75-
74+
7675
- run: mkdir instrumentations-dist
7776
- run: cp packages/opentelemetry-instrumentation-*/dist/* instrumentations-dist
7877

7978
- name: Publish release distributions to PyPI
8079
uses: pypa/gh-action-pypi-publish@release/v1
8180
with:
8281
packages-dir: instrumentations-dist/
83-
84-
82+
8583
release-sdk:
86-
runs-on: ubuntu-latest-m
84+
runs-on: ubuntu-latest
8785
needs:
8886
- release-instrumentations
8987
permissions:
@@ -106,9 +104,9 @@ jobs:
106104
- uses: actions/setup-node@v4
107105
with:
108106
node-version: 18
109-
107+
110108
- run: npm ci
111-
109+
112110
- name: Build Traceloop SDK
113111
run: npx nx run traceloop-sdk:build-release
114112

@@ -118,7 +116,7 @@ jobs:
118116
packages-dir: packages/traceloop-sdk/dist/
119117

120118
test-sdk-installation:
121-
runs-on: ubuntu-latest-m
119+
runs-on: ubuntu-latest
122120
needs:
123121
- bump-version
124122
- release-sdk

packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from opentelemetry.instrumentation.langchain.span_utils import (
3535
SpanHolder,
3636
_set_span_attribute,
37+
extract_model_name_from_response_metadata,
3738
set_chat_request,
3839
set_chat_response,
3940
set_chat_response_usage,
@@ -446,7 +447,8 @@ def on_llm_end(
446447
id = response.llm_output.get("id")
447448
if id is not None and id != "":
448449
_set_span_attribute(span, GEN_AI_RESPONSE_ID, id)
449-
450+
if model_name is None:
451+
model_name = extract_model_name_from_response_metadata(response)
450452
token_usage = (response.llm_output or {}).get("token_usage") or (
451453
response.llm_output or {}
452454
).get("usage")
@@ -495,7 +497,7 @@ def on_llm_end(
495497
SpanAttributes.LLM_RESPONSE_MODEL: model_name or "unknown",
496498
},
497499
)
498-
set_chat_response_usage(span, response)
500+
set_chat_response_usage(span, response, self.token_histogram, token_usage is None, model_name)
499501
if should_emit_events():
500502
self._emit_llm_end_events(response)
501503
else:

packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
CallbackFilteredJSONEncoder,
1616
should_send_prompts,
1717
)
18+
from opentelemetry.metrics import Histogram
1819
from opentelemetry.semconv_ai import (
1920
SpanAttributes,
2021
)
@@ -271,13 +272,18 @@ def set_chat_response(span: Span, response: LLMResult) -> None:
271272
i += 1
272273

273274

274-
def set_chat_response_usage(span: Span, response: LLMResult):
275+
def set_chat_response_usage(
276+
span: Span,
277+
response: LLMResult,
278+
token_histogram: Histogram,
279+
record_token_usage: bool,
280+
model_name: str
281+
) -> None:
275282
input_tokens = 0
276283
output_tokens = 0
277284
total_tokens = 0
278285
cache_read_tokens = 0
279286

280-
i = 0
281287
for generations in response.generations:
282288
for generation in generations:
283289
if (
@@ -302,7 +308,6 @@ def set_chat_response_usage(span: Span, response: LLMResult):
302308
"input_token_details", {}
303309
)
304310
cache_read_tokens += input_token_details.get("cache_read", 0)
305-
i += 1
306311

307312
if (
308313
input_tokens > 0
@@ -330,6 +335,38 @@ def set_chat_response_usage(span: Span, response: LLMResult):
330335
SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS,
331336
cache_read_tokens,
332337
)
338+
if record_token_usage:
339+
if input_tokens > 0:
340+
token_histogram.record(
341+
input_tokens,
342+
attributes={
343+
SpanAttributes.LLM_SYSTEM: "Langchain",
344+
SpanAttributes.LLM_TOKEN_TYPE: "input",
345+
SpanAttributes.LLM_RESPONSE_MODEL: model_name,
346+
},
347+
)
348+
349+
if output_tokens > 0:
350+
token_histogram.record(
351+
output_tokens,
352+
attributes={
353+
SpanAttributes.LLM_SYSTEM: "Langchain",
354+
SpanAttributes.LLM_TOKEN_TYPE: "output",
355+
SpanAttributes.LLM_RESPONSE_MODEL: model_name,
356+
},
357+
)
358+
359+
360+
def extract_model_name_from_response_metadata(response: LLMResult) -> str:
361+
for generations in response.generations:
362+
for generation in generations:
363+
if (
364+
getattr(generation, "message", None)
365+
and getattr(generation.message, "response_metadata", None)
366+
and (model_name := generation.message.response_metadata.get("model_name"))
367+
):
368+
return model_name
369+
return "unknown"
333370

334371

335372
def _set_chat_tool_calls(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
interactions:
2+
- request:
3+
body: '{"messages": [{"content": "What is a good name for a company that makes
4+
colorful socks?", "role": "user"}], "model": "gpt-3.5-turbo", "n": 1, "stream":
5+
false, "temperature": 0.0}'
6+
headers:
7+
accept:
8+
- application/json
9+
accept-encoding:
10+
- gzip, deflate
11+
connection:
12+
- keep-alive
13+
content-length:
14+
- '178'
15+
content-type:
16+
- application/json
17+
host:
18+
- api.openai.com
19+
traceparent:
20+
- 00-d77972e6e3b0d96f4a65edea48fc5e34-92f98f134382afae-01
21+
user-agent:
22+
- OpenAI/Python 1.45.1
23+
x-stainless-arch:
24+
- arm64
25+
x-stainless-async:
26+
- 'false'
27+
x-stainless-lang:
28+
- python
29+
x-stainless-os:
30+
- MacOS
31+
x-stainless-package-version:
32+
- 1.45.1
33+
x-stainless-runtime:
34+
- CPython
35+
x-stainless-runtime-version:
36+
- 3.12.1
37+
method: POST
38+
uri: https://api.openai.com/v1/chat/completions
39+
response:
40+
body:
41+
string: !!binary |
42+
H4sIAAAAAAAAA2xQPU/DMBTc8yssz02VDwI0WydExVAB6gBCkeO8JgbHtuyX8lH1vyOHtElVFg93
43+
vnt3tw8IoaKiOaG8YchbI8Pl/c/u6mm92m1eeJpmD5s1cyq7W8VLI1s68wpdvgPHo2rOdWskoNDq
44+
j+YWGIJ3jW+SRbSIk9vrnmh1BdLLaoNhOs9C7GypwyhOskHZaMHB0Zy8BoQQsu9fn1FV8EVzEs2O
45+
SAvOsRpofvpECLVaeoQy54RDppDORpJrhaD62I9MqFJ/kufGAqvc9JeFbeeYT6k6KQf8cDordW2s
46+
Lt3An/CtUMI1hQXmtPInHGpDe/YQEPLW1+vOElNjdWuwQP0ByhsmQzs6DjqS6cChRiYnmiN+ZlZU
47+
gExIN1mHcsYbqEZlFEyaXZ78z+KvnVD1hUswOFH37RDaYitUDdZY0e/dL3kIfgEAAP//AwDTDjnS
48+
bgIAAA==
49+
headers:
50+
CF-Cache-Status:
51+
- DYNAMIC
52+
CF-RAY:
53+
- 8d38f65e8c5807d6-ATL
54+
Connection:
55+
- keep-alive
56+
Content-Encoding:
57+
- gzip
58+
Content-Type:
59+
- application/json
60+
Date:
61+
- Wed, 16 Oct 2024 15:08:07 GMT
62+
Server:
63+
- cloudflare
64+
Set-Cookie:
65+
- __cf_bm=OSPGzzsIkijMjLSfTJjIkSiCxrDnWuQJoJfatCLq_os-1729091287-1.0.1.1-VsHqFTiK6ZfwZayLMdlW8YQ1RAnQm5rP76aV3S2QPBtjrbPWclwVCrqNC0SMuxcZxXGHj2IP1r8OafsbY_cwXA;
66+
path=/; expires=Wed, 16-Oct-24 15:38:07 GMT; domain=.api.openai.com; HttpOnly;
67+
Secure; SameSite=None
68+
- _cfuvid=kMO6YhIaggKVe4UZFi5l9RM0fjUpZYSwQRUPEBZNJr8-1729091287006-0.0.1.1-604800000;
69+
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
70+
Transfer-Encoding:
71+
- chunked
72+
X-Content-Type-Options:
73+
- nosniff
74+
access-control-expose-headers:
75+
- X-Request-ID
76+
alt-svc:
77+
- h3=":443"; ma=86400
78+
openai-organization:
79+
- traceloop
80+
openai-processing-ms:
81+
- '160'
82+
openai-version:
83+
- '2020-10-01'
84+
strict-transport-security:
85+
- max-age=31536000; includeSubDomains; preload
86+
x-ratelimit-limit-requests:
87+
- '5000'
88+
x-ratelimit-limit-tokens:
89+
- '4000000'
90+
x-ratelimit-remaining-requests:
91+
- '4999'
92+
x-ratelimit-remaining-tokens:
93+
- '3999968'
94+
x-ratelimit-reset-requests:
95+
- 12ms
96+
x-ratelimit-reset-tokens:
97+
- 0s
98+
x-request-id:
99+
- req_1f22ead323fed2094d4f3f5c0187205d
100+
status:
101+
code: 200
102+
message: OK
103+
version: 1

packages/opentelemetry-instrumentation-langchain/tests/metrics/test_langchain_metrics.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from unittest.mock import patch
12
import pytest
23
from langchain.chains import LLMChain
34
from langchain.prompts import PromptTemplate
@@ -114,3 +115,66 @@ def test_llm_chain_streaming_metrics(instrument_legacy, reader, llm):
114115

115116
assert found_token_metric is True
116117
assert found_duration_metric is True
118+
119+
120+
def verify_token_metrics(data_points):
121+
for data_point in data_points:
122+
assert data_point.attributes[SpanAttributes.LLM_TOKEN_TYPE] in [
123+
"output",
124+
"input",
125+
]
126+
assert data_point.sum > 0
127+
assert data_point.attributes[SpanAttributes.LLM_SYSTEM] == "Langchain"
128+
129+
130+
def verify_duration_metrics(data_points):
131+
assert any(data_point.count > 0 for data_point in data_points)
132+
assert any(data_point.sum > 0 for data_point in data_points)
133+
for data_point in data_points:
134+
assert data_point.attributes[SpanAttributes.LLM_SYSTEM] == "Langchain"
135+
136+
137+
def verify_langchain_metrics(reader):
138+
metrics_data = reader.get_metrics_data()
139+
resource_metrics = metrics_data.resource_metrics
140+
assert len(resource_metrics) > 0
141+
142+
found_token_metric = False
143+
found_duration_metric = False
144+
145+
for rm in resource_metrics:
146+
for sm in rm.scope_metrics:
147+
for metric in sm.metrics:
148+
if metric.name == Meters.LLM_TOKEN_USAGE:
149+
found_token_metric = True
150+
verify_token_metrics(metric.data.data_points)
151+
152+
if metric.name == Meters.LLM_OPERATION_DURATION:
153+
found_duration_metric = True
154+
verify_duration_metrics(metric.data.data_points)
155+
156+
return found_token_metric, found_duration_metric
157+
158+
159+
@pytest.mark.vcr
160+
def test_llm_chain_metrics_with_none_llm_output(instrument_legacy, reader, chain, llm):
161+
"""
162+
This test verifies that the metrics system correctly handles edge cases where the
163+
LLM response contains a None value in the llm_output field, ensuring that token
164+
usage and operation duration metrics are still properly recorded.
165+
"""
166+
original_generate = llm._generate
167+
168+
# Create a patched version that returns results with None llm_output
169+
def patched_generate(*args, **kwargs):
170+
result = original_generate(*args, **kwargs)
171+
result.llm_output = None
172+
return result
173+
174+
with patch.object(llm, '_generate', side_effect=patched_generate):
175+
chain.run(product="colorful socks")
176+
177+
found_token_metric, found_duration_metric = verify_langchain_metrics(reader)
178+
179+
assert found_token_metric is True, "Token usage metrics not found"
180+
assert found_duration_metric is True, "Operation duration metrics not found"

0 commit comments

Comments
 (0)