From e293136f174b02bc4c0c396d99900eb68674bb56 Mon Sep 17 00:00:00 2001 From: minimAluminiumalism Date: Fri, 19 Sep 2025 14:24:07 +0800 Subject: [PATCH] feat(semconv): expand genai span kind --- .../instrumentation/langchain/__init__.py | 32 ++ .../langchain/callback_handler.py | 144 +++++++- .../poetry.lock | 68 ++-- .../pyproject.toml | 2 +- .../tests/test_agents.py | 36 +- .../tests/test_span_kinds.py | 307 ++++++++++++++++++ .../opentelemetry/semconv_ai/__init__.py | 4 + 7 files changed, 540 insertions(+), 53 deletions(-) create mode 100644 packages/opentelemetry-instrumentation-langchain/tests/test_span_kinds.py diff --git a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/__init__.py b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/__init__.py index 395e6a98ac..9996c143b7 100644 --- a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/__init__.py +++ b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/__init__.py @@ -83,6 +83,13 @@ def _instrument(self, **kwargs): wrapper=_BaseCallbackManagerInitWrapper(traceloopCallbackHandler), ) + # Wrap CallbackManager.configure to ensure our handler is included + wrap_function_wrapper( + module="langchain_core.callbacks.manager", + name="CallbackManager.configure", + wrapper=_CallbackManagerConfigureWrapper(traceloopCallbackHandler), + ) + if not self.disable_trace_context_propagation: self._wrap_openai_functions_for_tracing(traceloopCallbackHandler) @@ -168,6 +175,7 @@ def _wrap_openai_functions_for_tracing(self, traceloopCallbackHandler): def _uninstrument(self, **kwargs): unwrap("langchain_core.callbacks", "BaseCallbackManager.__init__") + unwrap("langchain_core.callbacks.manager", "CallbackManager.configure") if not self.disable_trace_context_propagation: if is_package_available("langchain_community"): unwrap("langchain_community.llms.openai", "BaseOpenAI._generate") @@ -208,6 +216,30 @@ def __call__( instance.add_handler(self._callback_handler, True) +class _CallbackManagerConfigureWrapper: + def __init__(self, callback_handler: "TraceloopCallbackHandler"): + self._callback_handler = callback_handler + + def __call__( + self, + wrapped, + instance, + args, + kwargs, + ): + result = wrapped(*args, **kwargs) + + if result and hasattr(result, 'add_handler'): + for handler in result.inheritable_handlers: + if isinstance(handler, type(self._callback_handler)): + break + else: + self._callback_handler._callback_manager = result + result.add_handler(self._callback_handler, True) + + return result + + # This class wraps a function call to inject tracing information (trace headers) into # OpenAI client requests. It assumes the following: # 1. The wrapped function includes a `run_manager` keyword argument that contains a `run_id`. diff --git a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py index c50a0cff6c..eb306e7908 100644 --- a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py +++ b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py @@ -359,6 +359,9 @@ def _create_llm_span( _set_span_attribute(span, SpanAttributes.LLM_SYSTEM, vendor) _set_span_attribute(span, SpanAttributes.LLM_REQUEST_TYPE, request_type.value) + span_kind = self._determine_llm_span_kind(serialized) + _set_span_attribute(span, SpanAttributes.TRACELOOP_SPAN_KIND, span_kind.value) + # we already have an LLM span by this point, # so skip any downstream instrumentation from here try: @@ -375,6 +378,72 @@ def _create_llm_span( return span + def _determine_llm_span_kind(self, serialized: Optional[dict[str, Any]]) -> TraceloopSpanKindValues: + """Determine the appropriate span kind for LLM operations based on model type.""" + if not serialized: + return TraceloopSpanKindValues.GENERATION + + class_name = _extract_class_name_from_serialized(serialized) + class_name_lower = class_name.lower() + + if any(keyword in class_name_lower for keyword in ['embedding', 'embed']): + return TraceloopSpanKindValues.EMBEDDING + + # Default to generation for other LLM operations + return TraceloopSpanKindValues.GENERATION + + def _determine_chain_span_kind( + self, + serialized: dict[str, Any], + name: str, + tags: Optional[list[str]] = None + ) -> TraceloopSpanKindValues: + if serialized and "id" in serialized: + class_path = serialized["id"] + if any("agent" in part.lower() for part in class_path): + return TraceloopSpanKindValues.AGENT + + if "agent" in name.lower(): + return TraceloopSpanKindValues.AGENT + + class_name = _extract_class_name_from_serialized(serialized) + name_lower = name.lower() + + # Tool detection for RunnableLambda and custom tool chains + if any(keyword in class_name.lower() for keyword in ['tool']): + return TraceloopSpanKindValues.TOOL + + # More precise tool detection: exclude operation like `parsers` + if any(keyword in name_lower for keyword in ['tool']) or ( + 'function' in name_lower and 'parser' not in name_lower + ): + return TraceloopSpanKindValues.TOOL + + if tags and any('tool' in tag.lower() for tag in tags): + return TraceloopSpanKindValues.TOOL + + # Retriever detection for RunnableLambda and custom tool chains + if any(keyword in class_name.lower() for keyword in ['retriever', 'retrieve', 'vectorstore']): + return TraceloopSpanKindValues.RETRIEVER + + if any(keyword in name_lower for keyword in ['retriever', 'retrieve', 'search']): + return TraceloopSpanKindValues.RETRIEVER + + # Embedding detection for RunnableLambda and custom chains + if any(keyword in class_name.lower() for keyword in ['embedding', 'embed']): + return TraceloopSpanKindValues.EMBEDDING + + if any(keyword in name_lower for keyword in ['embedding', 'embed']): + return TraceloopSpanKindValues.EMBEDDING + + if any(keyword in class_name.lower() for keyword in ['rerank', 'reorder']): + return TraceloopSpanKindValues.RERANKER + + if any(keyword in name_lower for keyword in ['rerank', 'reorder']): + return TraceloopSpanKindValues.RERANKER + + return TraceloopSpanKindValues.TASK + @dont_throw def on_chain_start( self, @@ -395,12 +464,18 @@ def on_chain_start( entity_path = "" name = self._get_name_from_callback(serialized, **kwargs) - kind = ( + + base_kind = ( TraceloopSpanKindValues.WORKFLOW if parent_run_id is None or parent_run_id not in self.spans else TraceloopSpanKindValues.TASK ) + if base_kind == TraceloopSpanKindValues.TASK: + kind = self._determine_chain_span_kind(serialized, name, tags) + else: + kind = base_kind + if kind == TraceloopSpanKindValues.WORKFLOW: workflow_name = name else: @@ -710,6 +785,73 @@ def on_tool_end( ) self._end_span(span, run_id) + @dont_throw + def on_retriever_start( + self, + serialized: dict[str, Any], + query: str, + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + tags: Optional[list[str]] = None, + metadata: Optional[dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + """Run when retriever starts running.""" + if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY): + return + + name = self._get_name_from_callback(serialized, kwargs=kwargs) + workflow_name = self.get_workflow_name(parent_run_id) + entity_path = self.get_entity_path(parent_run_id) + + span = self._create_task_span( + run_id, + parent_run_id, + name, + TraceloopSpanKindValues.RETRIEVER, + workflow_name, + name, + entity_path, + ) + if not should_emit_events() and should_send_prompts(): + span.set_attribute( + SpanAttributes.TRACELOOP_ENTITY_INPUT, + json.dumps( + { + "query": query, + "tags": tags, + "metadata": metadata, + "kwargs": kwargs, + }, + cls=CallbackFilteredJSONEncoder, + ), + ) + + @dont_throw + def on_retriever_end( + self, + documents: Any, + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + **kwargs: Any, + ) -> None: + """Run when retriever ends running.""" + if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY): + return + + span = self._get_span(run_id) + if not should_emit_events() and should_send_prompts(): + span.set_attribute( + SpanAttributes.TRACELOOP_ENTITY_OUTPUT, + json.dumps( + {"documents": str(documents)[:1000], "kwargs": kwargs}, # Limit output size + cls=CallbackFilteredJSONEncoder, + ), + ) + self._end_span(span, run_id) + def get_parent_span(self, parent_run_id: Optional[str] = None): if parent_run_id is None: return None diff --git a/packages/opentelemetry-instrumentation-langchain/poetry.lock b/packages/opentelemetry-instrumentation-langchain/poetry.lock index 22f468d079..375bc745f0 100644 --- a/packages/opentelemetry-instrumentation-langchain/poetry.lock +++ b/packages/opentelemetry-instrumentation-langchain/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -207,7 +207,7 @@ description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.7" groups = ["test"] -markers = "python_version <= \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, @@ -285,8 +285,8 @@ files = [ jmespath = ">=0.7.1,<2.0.0" python-dateutil = ">=2.1,<3.0.0" urllib3 = [ - {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, + {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, ] [package.extras] @@ -494,7 +494,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["dev", "test"] -markers = "python_version <= \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -733,7 +733,7 @@ description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.7" groups = ["test"] -markers = "python_version <= \"3.12\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")" +markers = "(platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") and python_version < \"3.13\"" files = [ {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"}, @@ -1824,7 +1824,7 @@ description = "CUBLAS native runtime libraries" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb"}, {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668"}, @@ -1838,7 +1838,7 @@ description = "CUDA profiling tools runtime libs." optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:166ee35a3ff1587f2490364f90eeeb8da06cd867bd5b701bf7f9a02b78bc63fc"}, {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.whl", hash = "sha256:358b4a1d35370353d52e12f0a7d1769fc01ff74a191689d3870b2123156184c4"}, @@ -1854,7 +1854,7 @@ description = "NVRTC native runtime libraries" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5847f1d6e5b757f1d2b3991a01082a44aad6f10ab3c5c0213fa3e25bddc25a13"}, {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53"}, @@ -1868,7 +1868,7 @@ description = "CUDA Runtime native Libraries" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6116fad3e049e04791c0256a9778c16237837c08b27ed8c8401e2e45de8d60cd"}, {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d461264ecb429c84c8879a7153499ddc7b19b5f8d84c204307491989a365588e"}, @@ -1884,7 +1884,7 @@ description = "cuDNN runtime libraries" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9fd4584468533c61873e5fda8ca41bac3a38bcb2d12350830c69b0a96a7e4def"}, {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2"}, @@ -1901,7 +1901,7 @@ description = "CUFFT native runtime libraries" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d16079550df460376455cba121db6564089176d9bac9e4f360493ca4741b22a6"}, {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8510990de9f96c803a051822618d42bf6cb8f069ff3f48d93a8486efdacb48fb"}, @@ -1920,7 +1920,7 @@ description = "cuFile GPUDirect libraries" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159"}, {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db"}, @@ -1933,7 +1933,7 @@ description = "CURAND native runtime libraries" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6e82df077060ea28e37f48a3ec442a8f47690c7499bff392a5938614b56c98d8"}, {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf"}, @@ -1949,7 +1949,7 @@ description = "CUDA solver native runtime libraries" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0ce237ef60acde1efc457335a2ddadfd7610b892d94efee7b776c64bb1cac9e0"}, {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c"}, @@ -1970,7 +1970,7 @@ description = "CUSPARSE native runtime libraries" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d25b62fb18751758fe3c93a4a08eff08effedfe4edf1c6bb5afd0890fe88f887"}, {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7aa32fa5470cf754f72d1116c7cbc300b4e638d3ae5304cfa4a638a5b87161b1"}, @@ -1989,7 +1989,7 @@ description = "NVIDIA cuSPARSELt" optional = false python-versions = "*" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8371549623ba601a06322af2133c4a44350575f5a3108fb75f3ef20b822ad5f1"}, {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46"}, @@ -2003,7 +2003,7 @@ description = "NVIDIA Collective Communication Library (NCCL) Runtime" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522"}, {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6"}, @@ -2016,7 +2016,7 @@ description = "Nvidia JIT LTO Library" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a"}, {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41"}, @@ -2030,7 +2030,7 @@ description = "NVIDIA Tools Extension" optional = false python-versions = ">=3" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f44f8d86bb7d5629988d61c8d3ae61dddb2015dee142740536bc7481b022fe4b"}, {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:adcaabb9d436c9761fca2b13959a2d237c5f9fd406c8e4b723c695409ff88059"}, @@ -2183,12 +2183,14 @@ name = "opentelemetry-semantic-conventions-ai" version = "0.4.13" description = "OpenTelemetry Semantic Conventions Extension for Large Language Models" optional = false -python-versions = "<4,>=3.9" +python-versions = ">=3.9,<4" groups = ["main", "test"] -files = [ - {file = "opentelemetry_semantic_conventions_ai-0.4.13-py3-none-any.whl", hash = "sha256:883a30a6bb5deaec0d646912b5f9f6dcbb9f6f72557b73d0f2560bf25d13e2d5"}, - {file = "opentelemetry_semantic_conventions_ai-0.4.13.tar.gz", hash = "sha256:94efa9fb4ffac18c45f54a3a338ffeb7eedb7e1bb4d147786e77202e159f0036"}, -] +files = [] +develop = true + +[package.source] +type = "directory" +url = "../opentelemetry-semantic-conventions-ai" [[package]] name = "orjson" @@ -3648,7 +3650,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.8" groups = ["dev", "test"] -markers = "python_version <= \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, @@ -3817,7 +3819,7 @@ description = "A language and compiler for custom Deep Learning operations" optional = false python-versions = "*" groups = ["test"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" files = [ {file = "triton-3.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fad99beafc860501d7fcc1fb7045d9496cbe2c882b1674640304949165a916e7"}, {file = "triton-3.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3161a2bf073d6b22c4e2f33f951f3e5e3001462b2570e6df9cd57565bdec2984"}, @@ -3842,7 +3844,7 @@ description = "Typing stubs for requests" optional = false python-versions = ">=3.7" groups = ["test"] -markers = "platform_python_implementation == \"PyPy\" or python_version < \"3.10\"" +markers = "platform_python_implementation == \"PyPy\" or python_version == \"3.9\"" files = [ {file = "types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0"}, {file = "types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9"}, @@ -3858,7 +3860,7 @@ description = "Typing stubs for requests" optional = false python-versions = ">=3.8" groups = ["test"] -markers = "platform_python_implementation != \"PyPy\" and python_version >= \"3.10\"" +markers = "python_version >= \"3.10\" and platform_python_implementation != \"PyPy\"" files = [ {file = "types-requests-2.32.0.20241016.tar.gz", hash = "sha256:0d9cad2f27515d0e3e3da7134a1b6f28fb97129d86b867f24d9c726452634d95"}, {file = "types_requests-2.32.0.20241016-py3-none-any.whl", hash = "sha256:4195d62d6d3e043a4eaaf08ff8a62184584d2e8684e9d2aa178c7915a7da3747"}, @@ -3874,7 +3876,7 @@ description = "Typing stubs for urllib3" optional = false python-versions = "*" groups = ["test"] -markers = "platform_python_implementation == \"PyPy\" or python_version < \"3.10\"" +markers = "platform_python_implementation == \"PyPy\" or python_version == \"3.9\"" files = [ {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, @@ -3927,7 +3929,7 @@ description = "HTTP library with thread-safe connection pooling, file post, and optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" groups = ["test"] -markers = "platform_python_implementation == \"PyPy\" or python_version < \"3.10\"" +markers = "platform_python_implementation == \"PyPy\" or python_version == \"3.9\"" files = [ {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, @@ -3945,7 +3947,7 @@ description = "HTTP library with thread-safe connection pooling, file post, and optional = false python-versions = ">=3.8" groups = ["test"] -markers = "platform_python_implementation != \"PyPy\" and python_version >= \"3.10\"" +markers = "python_version >= \"3.10\" and platform_python_implementation != \"PyPy\"" files = [ {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, @@ -3972,8 +3974,8 @@ files = [ [package.dependencies] PyYAML = "*" urllib3 = [ - {version = "*", markers = "platform_python_implementation != \"PyPy\" and python_version >= \"3.10\""}, {version = "<2", markers = "platform_python_implementation == \"PyPy\" or python_version < \"3.10\""}, + {version = "*", markers = "platform_python_implementation != \"PyPy\" and python_version >= \"3.10\""}, ] wrapt = "*" yarl = "*" @@ -4317,4 +4319,4 @@ instruments = [] [metadata] lock-version = "2.1" python-versions = ">=3.9,<4" -content-hash = "e80cc2e8dce5dabf8955c32872f48ce503e27dfb1dedc36801bd607f68ba0cd1" +content-hash = "2ceb111a6d9db8d55e8fd1726d478feffa12bb22db9ce93fc1932af2dd433f53" diff --git a/packages/opentelemetry-instrumentation-langchain/pyproject.toml b/packages/opentelemetry-instrumentation-langchain/pyproject.toml index 56d0c963ef..c1c8bb6b72 100644 --- a/packages/opentelemetry-instrumentation-langchain/pyproject.toml +++ b/packages/opentelemetry-instrumentation-langchain/pyproject.toml @@ -27,7 +27,7 @@ python = ">=3.9,<4" opentelemetry-api = "^1.28.0" opentelemetry-instrumentation = ">=0.50b0" opentelemetry-semantic-conventions = ">=0.55b0" -opentelemetry-semantic-conventions-ai = "^0.4.13" +opentelemetry-semantic-conventions-ai = { path = "../opentelemetry-semantic-conventions-ai", develop = true } [tool.poetry.group.dev.dependencies] autopep8 = "^2.3.1" diff --git a/packages/opentelemetry-instrumentation-langchain/tests/test_agents.py b/packages/opentelemetry-instrumentation-langchain/tests/test_agents.py index bac44f561a..2c03acd458 100644 --- a/packages/opentelemetry-instrumentation-langchain/tests/test_agents.py +++ b/packages/opentelemetry-instrumentation-langchain/tests/test_agents.py @@ -42,19 +42,19 @@ def test_agents(instrument_legacy, span_exporter, log_exporter): assert set([span.name for span in spans]) == { "RunnableLambda.task", - "RunnableParallel.task", - "RunnableAssign.task", + "RunnableParallel.agent", + "RunnableAssign.agent", "ChatPromptTemplate.task", "ChatOpenAI.chat", - "ToolsAgentOutputParser.task", + "ToolsAgentOutputParser.agent", "RunnableSequence.task", "tavily_search_results_json.tool", "RunnableLambda.task", - "RunnableParallel.task", - "RunnableAssign.task", + "RunnableParallel.agent", + "RunnableAssign.agent", "ChatPromptTemplate.task", "ChatOpenAI.chat", - "ToolsAgentOutputParser.task", + "ToolsAgentOutputParser.agent", "RunnableSequence.task", "AgentExecutor.workflow", } @@ -86,19 +86,19 @@ def test_agents_with_events_with_content( assert set([span.name for span in spans]) == { "RunnableLambda.task", - "RunnableParallel.task", - "RunnableAssign.task", + "RunnableParallel.agent", + "RunnableAssign.agent", "ChatPromptTemplate.task", "ChatOpenAI.chat", - "ToolsAgentOutputParser.task", + "ToolsAgentOutputParser.agent", "RunnableSequence.task", "tavily_search_results_json.tool", "RunnableLambda.task", - "RunnableParallel.task", - "RunnableAssign.task", + "RunnableParallel.agent", + "RunnableAssign.agent", "ChatPromptTemplate.task", "ChatOpenAI.chat", - "ToolsAgentOutputParser.task", + "ToolsAgentOutputParser.agent", "RunnableSequence.task", "AgentExecutor.workflow", } @@ -180,19 +180,19 @@ def test_agents_with_events_with_no_content( assert set([span.name for span in spans]) == { "RunnableLambda.task", - "RunnableParallel.task", - "RunnableAssign.task", + "RunnableParallel.agent", + "RunnableAssign.agent", "ChatPromptTemplate.task", "ChatOpenAI.chat", - "ToolsAgentOutputParser.task", + "ToolsAgentOutputParser.agent", "RunnableSequence.task", "tavily_search_results_json.tool", "RunnableLambda.task", - "RunnableParallel.task", - "RunnableAssign.task", + "RunnableParallel.agent", + "RunnableAssign.agent", "ChatPromptTemplate.task", "ChatOpenAI.chat", - "ToolsAgentOutputParser.task", + "ToolsAgentOutputParser.agent", "RunnableSequence.task", "AgentExecutor.workflow", } diff --git a/packages/opentelemetry-instrumentation-langchain/tests/test_span_kinds.py b/packages/opentelemetry-instrumentation-langchain/tests/test_span_kinds.py new file mode 100644 index 0000000000..a9b3cf1284 --- /dev/null +++ b/packages/opentelemetry-instrumentation-langchain/tests/test_span_kinds.py @@ -0,0 +1,307 @@ +"""Test new span kinds functionality.""" + +import pytest +from opentelemetry.semconv_ai import SpanAttributes, TraceloopSpanKindValues +from opentelemetry.instrumentation.langchain.callback_handler import TraceloopCallbackHandler +from uuid import uuid4 + + +class TestSpanKinds: + """Test span kind detection and creation.""" + + @pytest.fixture + def callback_handler(self, span_exporter, tracer_provider, meter_provider): + from opentelemetry.trace import get_tracer + from opentelemetry.metrics import get_meter + from opentelemetry.semconv_ai import Meters + + tracer = get_tracer(__name__, tracer_provider=tracer_provider) + meter = get_meter(__name__, meter_provider=meter_provider) + + duration_histogram = meter.create_histogram( + name=Meters.LLM_OPERATION_DURATION, + unit="s", + description="GenAI operation duration", + ) + token_histogram = meter.create_histogram( + name=Meters.LLM_TOKEN_USAGE, + unit="token", + description="Measures number of input and output tokens used", + ) + + return TraceloopCallbackHandler(tracer, duration_histogram, token_histogram) + + def test_determine_llm_span_kind_embedding(self, callback_handler): + """Test detection of embedding models.""" + serialized = { + "id": ["langchain", "embeddings", "openai", "OpenAIEmbeddings"], + "name": "OpenAIEmbeddings" + } + + kind = callback_handler._determine_llm_span_kind(serialized) + assert kind == TraceloopSpanKindValues.EMBEDDING + + def test_determine_llm_span_kind_generation(self, callback_handler): + """Test detection of generation models.""" + serialized = { + "id": ["langchain", "llms", "openai", "OpenAI"], + "name": "OpenAI" + } + + kind = callback_handler._determine_llm_span_kind(serialized) + assert kind == TraceloopSpanKindValues.GENERATION + + def test_determine_llm_span_kind_no_serialized(self, callback_handler): + """Test default behavior when no serialized data.""" + kind = callback_handler._determine_llm_span_kind(None) + assert kind == TraceloopSpanKindValues.GENERATION + + def test_retrieval_qa_chain(self, callback_handler): + """Test that RetrievalQA chain is classified as TASK""" + serialized = { + "id": ["langchain", "chains", "retrieval_qa", "base", "RetrievalQA"], + "name": "RetrievalQA" + } + + kind = callback_handler._determine_chain_span_kind(serialized, "RetrievalQA") + assert kind == TraceloopSpanKindValues.TASK + + def test_conversational_retrieval_chain(self, callback_handler): + """Test that ConversationalRetrievalChain is classified as TASK""" + serialized = { + "id": ["langchain", "chains", "conversational_retrieval", "base", "ConversationalRetrievalChain"], + "name": "ConversationalRetrievalChain" + } + + kind = callback_handler._determine_chain_span_kind(serialized, "ConversationalRetrievalChain") + assert kind == TraceloopSpanKindValues.TASK + + def test_reranker_by_class(self, callback_handler): + """Test detection of reranker by class name.""" + serialized = { + "id": ["langchain", "retrievers", "document_compressors", "LLMChainExtractor"], + "name": "DocumentReranker" + } + + kind = callback_handler._determine_chain_span_kind(serialized, "reranker") + assert kind == TraceloopSpanKindValues.RERANKER + + def test_reranker_by_name(self, callback_handler): + """Test detection of reranker by name.""" + serialized = { + "id": ["langchain", "chains", "base", "Chain"], + "name": "Chain" + } + + kind = callback_handler._determine_chain_span_kind(serialized, "document_reranker") + assert kind == TraceloopSpanKindValues.RERANKER + + def test_tool_by_name(self, callback_handler): + """Test tool detection by name in chain callbacks.""" + serialized = { + "id": ["langchain_core", "runnables", "base", "RunnableLambda"], + "name": "RunnableLambda" + } + + # Test name-based tool detection + kind = callback_handler._determine_chain_span_kind(serialized, "add_numbers_tool") + assert kind == TraceloopSpanKindValues.TOOL + + kind = callback_handler._determine_chain_span_kind(serialized, "calculator_function") + assert kind == TraceloopSpanKindValues.TOOL + + def test_tool_by_tags(self, callback_handler): + """Test tool detection by tags.""" + serialized = { + "id": ["langchain_core", "runnables", "base", "RunnableLambda"], + "name": "RunnableLambda" + } + + kind = callback_handler._determine_chain_span_kind(serialized, "runnable", tags=["tool", "calculator"]) + assert kind == TraceloopSpanKindValues.TOOL + + def test_embedding_by_name(self, callback_handler): + """Test embedding detection by name in chain callbacks.""" + serialized = { + "id": ["langchain_core", "runnables", "base", "RunnableLambda"], + "name": "RunnableLambda" + } + + # Test name-based detection + kind = callback_handler._determine_chain_span_kind(serialized, "OpenAIEmbeddings") + assert kind == TraceloopSpanKindValues.EMBEDDING + + kind = callback_handler._determine_chain_span_kind(serialized, "document_embedder") + assert kind == TraceloopSpanKindValues.EMBEDDING + + def test_agent_executor(self, callback_handler): + """Test that AgentExecutor is classified as AGENT (real LangChain component).""" + serialized = { + "id": ["langchain", "agents", "agent", "AgentExecutor"], + "name": "AgentExecutor" + } + + kind = callback_handler._determine_chain_span_kind(serialized, "AgentExecutor") + assert kind == TraceloopSpanKindValues.AGENT + + def test_default_task(self, callback_handler): + """Test default behavior returns TASK.""" + serialized = { + "id": ["langchain", "chains", "llm", "LLMChain"], + "name": "LLMChain" + } + + kind = callback_handler._determine_chain_span_kind(serialized, "llm_chain") + assert kind == TraceloopSpanKindValues.TASK + + def test_workflow_span(self, callback_handler, span_exporter): + """Test workflow span kind detection.""" + run_id = uuid4() + serialized = { + "id": ["langchain", "chains", "sequential", "SequentialChain"], + "name": "SequentialChain" + } + + callback_handler.on_chain_start( + serialized=serialized, + inputs={"input": "test"}, + run_id=run_id, + parent_run_id=None + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 0 # Span not finished yet + + span_holder = callback_handler.spans[run_id] + assert span_holder.span.name == "SequentialChain.workflow" + assert span_holder.span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == "workflow" + + def test_agent_executor_in_workflow(self, callback_handler, span_exporter): + """Test that AgentExecutor creates agent spans when used as child chain.""" + parent_run_id = uuid4() + run_id = uuid4() + + callback_handler.on_chain_start( + serialized={"id": ["langchain", "workflows", "rag"], "name": "RAGWorkflow"}, + inputs={}, + run_id=parent_run_id, + parent_run_id=None + ) + + # AgentExecutor as child component + serialized = { + "id": ["langchain", "agents", "agent", "AgentExecutor"], + "name": "AgentExecutor" + } + + callback_handler.on_chain_start( + serialized=serialized, + inputs={"input": "test query"}, + run_id=run_id, + parent_run_id=parent_run_id + ) + + span_holder = callback_handler.spans[run_id] + assert span_holder.span.name == "AgentExecutor.agent" + assert span_holder.span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == "agent" + + def test_embedding_model(self, callback_handler, span_exporter): + """Test embedding span kind detection in LLM.""" + run_id = uuid4() + serialized = { + "id": ["langchain", "embeddings", "openai", "OpenAIEmbeddings"], + "name": "OpenAIEmbeddings" + } + + callback_handler.on_llm_start( + serialized=serialized, + prompts=["test prompt"], + run_id=run_id, + parent_run_id=None + ) + + span_holder = callback_handler.spans[run_id] + assert span_holder.span.name == "OpenAIEmbeddings.completion" + assert span_holder.span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == "embedding" + + def test_chat_model(self, callback_handler, span_exporter): + """Test generation span kind detection in chat model.""" + run_id = uuid4() + serialized = { + "id": ["langchain", "chat_models", "openai", "ChatOpenAI"], + "name": "ChatOpenAI" + } + + from langchain_core.messages import HumanMessage + messages = [[HumanMessage(content="test message")]] + + callback_handler.on_chat_model_start( + serialized=serialized, + messages=messages, + run_id=run_id, + parent_run_id=None + ) + + span_holder = callback_handler.spans[run_id] + assert span_holder.span.name == "ChatOpenAI.chat" + assert span_holder.span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == "generation" + + def test_runnable_lambda_embedding_creates_embedding_span(self, callback_handler, span_exporter): + """Test RunnableLambda with embedding name creates EMBEDDING span.""" + parent_run_id = uuid4() + run_id = uuid4() + + callback_handler.on_chain_start( + serialized={"id": ["pipeline"], "name": "EmbeddingPipeline"}, + inputs={}, + run_id=parent_run_id, + parent_run_id=None + ) + + # RunnableLambda with embedding name + serialized = { + "id": ["langchain_core", "runnables", "base", "RunnableLambda"], + "name": "RunnableLambda" + } + + callback_handler.on_chain_start( + serialized=serialized, + inputs={"texts": ["doc1", "doc2"]}, + run_id=run_id, + parent_run_id=parent_run_id, + name="OpenAIEmbeddings" # Name passed via kwargs + ) + + span_holder = callback_handler.spans[run_id] + assert span_holder.span.name == "OpenAIEmbeddings.embedding" + assert span_holder.span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == "embedding" + + def test_vector_store_retriever(self, callback_handler, span_exporter): + """Test VectorStoreRetriever creates proper RETRIEVER spans (real LangChain component).""" + run_id = uuid4() + + serialized = { + "id": ["langchain_core", "vectorstores", "base", "VectorStoreRetriever"], + "name": "VectorStoreRetriever" + } + + callback_handler.on_retriever_start( + serialized=serialized, + query="test query", + run_id=run_id, + parent_run_id=None + ) + + span_holder = callback_handler.spans[run_id] + assert span_holder.span.name == "VectorStoreRetriever.retriever" + assert span_holder.span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == "retriever" + + from langchain_core.documents import Document + callback_handler.on_retriever_end( + documents=[Document(page_content="test doc")], + run_id=run_id, + parent_run_id=None + ) + + # Check span was ended + assert run_id not in callback_handler.spans diff --git a/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/__init__.py b/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/__init__.py index a080ef2d90..8eeb28d71f 100644 --- a/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/__init__.py +++ b/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/__init__.py @@ -303,4 +303,8 @@ class TraceloopSpanKindValues(Enum): TASK = "task" AGENT = "agent" TOOL = "tool" + EMBEDDING = "embedding" + RETRIEVER = "retriever" + RERANKER = "reranker" + GENERATION = "generation" UNKNOWN = "unknown"