Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -350,4 +350,18 @@ private MLCommonsSettings() {}
// Feature flag for enabling telemetry static metric collection job -- MLStatsJobProcessor
public static final Setting<Boolean> ML_COMMONS_STATIC_METRIC_COLLECTION_ENABLED = Setting
.boolSetting("plugins.ml_commons.metrics_static_collection_enabled", false, Setting.Property.NodeScope, Setting.Property.Final);

// Feature flag for enabling telemetry tracer
// This setting is Final because it controls the core tracing infrastructure initialization.
// Once the tracer is initialized, changing this setting would require a node restart
// to properly reinitialize the tracing components.
public static final Setting<Boolean> ML_COMMONS_TRACING_ENABLED = Setting
.boolSetting("plugins.ml_commons.tracing_enabled", false, Setting.Property.NodeScope, Setting.Property.Final);

// Feature flag for enabling telemetry agent tracing
// This setting is Dynamic because agent tracing can be enabled/disabled at runtime
// without requiring a node restart. The MLAgentTracer singleton can be updated
// to switch between real tracer and NoopTracer based on this setting.
public static final Setting<Boolean> ML_COMMONS_AGENT_TRACING_ENABLED = Setting
.boolSetting("plugins.ml_commons.agent_tracing_enabled", false, Setting.Property.NodeScope, Setting.Property.Dynamic);
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
package org.opensearch.ml.common.settings;

import static org.opensearch.ml.common.settings.MLCommonsSettings.ML_COMMONS_AGENT_FRAMEWORK_ENABLED;
import static org.opensearch.ml.common.settings.MLCommonsSettings.ML_COMMONS_AGENT_TRACING_ENABLED;
import static org.opensearch.ml.common.settings.MLCommonsSettings.ML_COMMONS_CONNECTOR_PRIVATE_IP_ENABLED;
import static org.opensearch.ml.common.settings.MLCommonsSettings.ML_COMMONS_CONTROLLER_ENABLED;
import static org.opensearch.ml.common.settings.MLCommonsSettings.ML_COMMONS_LOCAL_MODEL_ENABLED;
Expand All @@ -19,6 +20,7 @@
import static org.opensearch.ml.common.settings.MLCommonsSettings.ML_COMMONS_RAG_PIPELINE_FEATURE_ENABLED;
import static org.opensearch.ml.common.settings.MLCommonsSettings.ML_COMMONS_REMOTE_INFERENCE_ENABLED;
import static org.opensearch.ml.common.settings.MLCommonsSettings.ML_COMMONS_STATIC_METRIC_COLLECTION_ENABLED;
import static org.opensearch.ml.common.settings.MLCommonsSettings.ML_COMMONS_TRACING_ENABLED;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -51,6 +53,9 @@ public class MLFeatureEnabledSetting {
private volatile Boolean isMetricCollectionEnabled;
private volatile Boolean isStaticMetricCollectionEnabled;

private volatile Boolean isTracingEnabled;
private volatile Boolean isAgentTracingEnabled;

private final List<SettingsChangeListener> listeners = new ArrayList<>();

public MLFeatureEnabledSetting(ClusterService clusterService, Settings settings) {
Expand All @@ -66,6 +71,8 @@ public MLFeatureEnabledSetting(ClusterService clusterService, Settings settings)
isRagSearchPipelineEnabled = ML_COMMONS_RAG_PIPELINE_FEATURE_ENABLED.get(settings);
isMetricCollectionEnabled = ML_COMMONS_METRIC_COLLECTION_ENABLED.get(settings);
isStaticMetricCollectionEnabled = ML_COMMONS_STATIC_METRIC_COLLECTION_ENABLED.get(settings);
isTracingEnabled = ML_COMMONS_TRACING_ENABLED.get(settings);
isAgentTracingEnabled = ML_COMMONS_AGENT_TRACING_ENABLED.get(settings);

clusterService
.getClusterSettings()
Expand All @@ -88,6 +95,9 @@ public MLFeatureEnabledSetting(ClusterService clusterService, Settings settings)
clusterService
.getClusterSettings()
.addSettingsUpdateConsumer(MLCommonsSettings.ML_COMMONS_RAG_PIPELINE_FEATURE_ENABLED, it -> isRagSearchPipelineEnabled = it);
clusterService
.getClusterSettings()
.addSettingsUpdateConsumer(MLCommonsSettings.ML_COMMONS_AGENT_TRACING_ENABLED, it -> isAgentTracingEnabled = it);
}

/**
Expand Down Expand Up @@ -178,6 +188,14 @@ public boolean isStaticMetricCollectionEnabled() {
return isStaticMetricCollectionEnabled;
}

public boolean isTracingEnabled() {
return isTracingEnabled;
}

public boolean isAgentTracingEnabled() {
return isAgentTracingEnabled;
}

@VisibleForTesting
public void notifyMultiTenancyListeners(boolean isEnabled) {
for (SettingsChangeListener listener : listeners) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ public void setUp() {
MLCommonsSettings.ML_COMMONS_MCP_SERVER_ENABLED,
MLCommonsSettings.ML_COMMONS_RAG_PIPELINE_FEATURE_ENABLED,
MLCommonsSettings.ML_COMMONS_METRIC_COLLECTION_ENABLED,
MLCommonsSettings.ML_COMMONS_STATIC_METRIC_COLLECTION_ENABLED
MLCommonsSettings.ML_COMMONS_STATIC_METRIC_COLLECTION_ENABLED,
MLCommonsSettings.ML_COMMONS_TRACING_ENABLED,
MLCommonsSettings.ML_COMMONS_AGENT_TRACING_ENABLED
)
);
when(mockClusterService.getClusterSettings()).thenReturn(mockClusterSettings);
Expand All @@ -65,6 +67,8 @@ public void testDefaults_allFeaturesEnabled() {
.put("plugins.ml_commons.rag_pipeline_feature_enabled", true)
.put("plugins.ml_commons.metrics_collection_enabled", true)
.put("plugins.ml_commons.metrics_static_collection_enabled", true)
.put("plugins.ml_commons.tracing_enabled", true)
.put("plugins.ml_commons.agent_tracing_enabled", true)
.build();

MLFeatureEnabledSetting setting = new MLFeatureEnabledSetting(mockClusterService, settings);
Expand All @@ -81,6 +85,8 @@ public void testDefaults_allFeaturesEnabled() {
assertTrue(setting.isRagSearchPipelineEnabled());
assertTrue(setting.isMetricCollectionEnabled());
assertTrue(setting.isStaticMetricCollectionEnabled());
assertTrue(setting.isTracingEnabled());
assertTrue(setting.isAgentTracingEnabled());
}

@Test
Expand All @@ -99,6 +105,8 @@ public void testDefaults_someFeaturesDisabled() {
.put("plugins.ml_commons.rag_pipeline_feature_enabled", false)
.put("plugins.ml_commons.metrics_collection_enabled", false)
.put("plugins.ml_commons.metrics_static_collection_enabled", false)
.put("plugins.ml_commons.tracing_enabled", false)
.put("plugins.ml_commons.agent_tracing_enabled", false)
.build();

MLFeatureEnabledSetting setting = new MLFeatureEnabledSetting(mockClusterService, settings);
Expand All @@ -115,6 +123,8 @@ public void testDefaults_someFeaturesDisabled() {
assertFalse(setting.isRagSearchPipelineEnabled());
assertFalse(setting.isMetricCollectionEnabled());
assertFalse(setting.isStaticMetricCollectionEnabled());
assertFalse(setting.isTracingEnabled());
assertFalse(setting.isAgentTracingEnabled());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.ml.engine.algorithms.agent.tracing;

import java.util.Map;

import org.opensearch.ml.common.settings.MLFeatureEnabledSetting;
import org.opensearch.telemetry.tracing.Span;
import org.opensearch.telemetry.tracing.Tracer;

/**
* Abstract base class for tracing implementations in ML Commons.
*
* This class defines the common interface and shared state for all ML tracing logic,
* such as starting and ending spans. Concrete subclasses (such as {@link MLAgentTracer})
* implement tracing for specific ML components or workflows.
*
* The intention is to allow for future extension: additional tracers can be created
* for other ML features (e.g., connector tracing) by extending this class.
*
* Each call to {@link #startSpan(String, Map, Span)} returns a {@link Span} object,
* which acts as a handle to the started span. The {@link Span} object typically contains
* a unique identifier (span ID) that can be used for logging and debugging. When ending
* a span, always pass the same {@link Span} object to {@link #endSpan(Span)}.
*/
public abstract class AbstractMLTracer {
protected final Tracer tracer;
protected final MLFeatureEnabledSetting mlFeatureEnabledSetting;

/**
* Constructs a new AbstractMLTracer with the specified tracer and feature settings.
*
* @param tracer The underlying tracer implementation to use for span operations.
* This may be a real tracer or a no-op tracer depending on configuration.
* @param mlFeatureEnabledSetting The ML feature settings that control tracing behavior.
* Used to determine if tracing is enabled and which features
* should be traced.
*/
protected AbstractMLTracer(Tracer tracer, MLFeatureEnabledSetting mlFeatureEnabledSetting) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you expecting other types of the MLTracer other than MLAgentTracer, if so, write the java doc to explain the intension of the the abstract class

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

this.tracer = tracer;
this.mlFeatureEnabledSetting = mlFeatureEnabledSetting;
}

/**
* Starts a new span for tracing ML operations.
*
* This method creates a new span with the specified name and attributes. The span
* can be either a root span (when parentSpan is null) or a child span of the
* specified parent span. The returned Span object should be passed to
* {@link #endSpan(Span)} when the operation completes.
*
* @param name The name of the span.
* @param attributes A map of key-value pairs to associate with the span. These attributes
* provide additional context about the operation being traced. May be null
* or empty if no attributes are needed.
* @param parentSpan The parent span, or null if this should be a root span. Child spans
* are nested under their parent spans in the trace hierarchy.
* @return A Span object representing the started span, or null if tracing is disabled.
* The returned span should be passed to {@link #endSpan(Span)} when the
* operation completes.
*/
public abstract Span startSpan(String name, Map<String, String> attributes, Span parentSpan);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does the the span has id? thinking if you have multiple start and end, would be helpful the start and end method would return some identifier, so that it would be helpful for future debugging. thinking if you would start multiple span, one the span failed, how do you know which span goes wrong?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it does, however it is standard practice to use the Span object as the unique identifier

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do you think of creating an overloaded method for spans without parents:
startSpan(String name, Map<String, String> attributes);

This internally can call the startSpan method with null parent


/**
* Ends a previously started span.
*
* This method marks the completion of a span and finalizes its timing information.
* The span will be recorded in the trace with its start time, end time, and any
* attributes that were set during its lifetime.
*
* @param span The span to end. This should be the same Span object that was returned
* by a previous call to {@link #startSpan(String, Map, Span)}. If null,
* this method is a no-op.
*/
public abstract void endSpan(Span span);
}
Loading
Loading