Added changes for rate limited sampler (azure-exporter changes) (#41954)

rads-1996 · web-flow · commit 7a8a3aaf154d · 2025-07-24T14:51:52.000-07:00
* Added changes for rate limited sampler azure-exporter * Added CHANGELOG entry * Fixed spell check errors * Fixed spell check in tests * CHANGELOG updated * Fixed if-else block * Revert "Fixed if-else block" This reverts commit 3f8e58e. * Refactored if-else blocks to ternary operators in rate-limited sampler * Addressed feedback * Fixed djb2 arguments * Added comments to test file * Update processor file to match main * Fixed linting errors * Updated logic for when sampling percentage is 100% * Updated DJB2 arguments * Fixed linting errors * Moved parent_context check to top of the function
diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md
@@ -22,7 +22,8 @@
   ([#41950](https://github.com/Azure/azure-sdk-for-python/pull/41950))
 - Customer Facing Statsbeat: Added logic for retry item count
   ([#41971](https://github.com/Azure/azure-sdk-for-python/pull/41971))
-
+- Added RateLimited Sampler 
+  ([#41954](https://github.com/Azure/azure-sdk-for-python/pull/41954))
 
 - Support AI Foundry by Handling GEN_AI_SYSTEM Attributes with [Spec](https://github.com/aep-health-and-standards/Telemetry-Collection-Spec/blob/main/ApplicationInsights/genai_semconv_mapping.md) ([#41705](https://github.com/Azure/azure-sdk-for-python/pull/41705))
 
diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/__init__.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/__init__.py
@@ -8,10 +8,12 @@
 from azure.monitor.opentelemetry.exporter.export.metrics._exporter import AzureMonitorMetricExporter
 from azure.monitor.opentelemetry.exporter.export.trace._exporter import AzureMonitorTraceExporter
 from azure.monitor.opentelemetry.exporter.export.trace._sampling import ApplicationInsightsSampler
+from azure.monitor.opentelemetry.exporter.export.trace._rate_limited_sampling import RateLimitedSampler
 from ._version import VERSION
 
 __all__ = [
     "ApplicationInsightsSampler",
+    "RateLimitedSampler",
     "AzureMonitorMetricExporter",
     "AzureMonitorLogExporter",
     "AzureMonitorTraceExporter",
diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py
@@ -9,8 +9,11 @@
     HTTP_CLIENT_REQUEST_DURATION,
     HTTP_SERVER_REQUEST_DURATION,
 )
+# pylint:disable=no-name-in-module
+from fixedint import Int32
 from azure.core import CaseInsensitiveEnumMeta
 
+
 # Environment variables
 
 _APPLICATIONINSIGHTS_STATSBEAT_DISABLED_ALL = "APPLICATIONINSIGHTS_STATSBEAT_DISABLED_ALL"
@@ -294,6 +297,9 @@ class _RP_Names(Enum):
 # sampleRate
 
 _SAMPLE_RATE_KEY = "_MS.sampleRate"
+_SAMPLING_HASH = 5381
+_INTEGER_MAX: int = Int32.maxval
+_INTEGER_MIN: int = Int32.minval
 
 # AAD Auth
 
diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/trace/_rate_limited_sampling.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/trace/_rate_limited_sampling.py
@@ -0,0 +1,127 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import math
+import threading
+import time
+from typing import Optional, Sequence
+from opentelemetry.context import Context
+from opentelemetry.trace import Link, SpanKind, format_trace_id
+from opentelemetry.sdk.trace.sampling import (
+    Decision,
+    Sampler,
+    SamplingResult,
+    _get_parent_trace_state,
+)
+from opentelemetry.trace.span import TraceState
+from opentelemetry.util.types import Attributes
+
+from azure.monitor.opentelemetry.exporter._constants import _SAMPLE_RATE_KEY
+
+from azure.monitor.opentelemetry.exporter.export.trace._utils import (
+    _get_DJB2_sample_score,
+    _round_down_to_nearest,
+    parent_context_sampling,
+)
+
+class _State:
+    def __init__(self, effective_window_count: float, effective_window_nanoseconds: float, last_nano_time: int):
+        self.effective_window_count = effective_window_count
+        self.effective_window_nanoseconds = effective_window_nanoseconds
+        self.last_nano_time = last_nano_time
+
+class RateLimitedSamplingPercentage:
+    def __init__(self, target_spans_per_second_limit: float, round_to_nearest: bool = True):
+        if target_spans_per_second_limit < 0.0:
+            raise ValueError("Limit for sampled spans per second must be nonnegative!")
+        # Hardcoded adaptation time of 0.1 seconds for adjusting to sudden changes in telemetry volumes
+        adaptation_time_seconds = 0.1
+        self._inverse_adaptation_time_nanoseconds = 1e-9 / adaptation_time_seconds
+        self._target_spans_per_nanosecond_limit = 1e-9 * target_spans_per_second_limit
+        initial_nano_time = int(time.time_ns())
+        self._state = _State(0.0, 0.0, initial_nano_time)
+        self._lock = threading.Lock()
+        self._round_to_nearest = round_to_nearest
+
+    def _update_state(self, old_state: _State, current_nano_time: int) -> _State:
+        if current_nano_time <= old_state.last_nano_time:
+            return _State(
+                old_state.effective_window_count + 1,
+                old_state.effective_window_nanoseconds,
+                old_state.last_nano_time
+            )
+        nano_time_delta = current_nano_time - old_state.last_nano_time
+        decay_factor = math.exp(-nano_time_delta * self._inverse_adaptation_time_nanoseconds)
+        current_effective_window_count = old_state.effective_window_count * decay_factor + 1
+        current_effective_window_nanoseconds = old_state.effective_window_nanoseconds * decay_factor + nano_time_delta
+
+        return _State(current_effective_window_count, current_effective_window_nanoseconds, current_nano_time)
+
+    def get(self) -> float:
+        current_nano_time = int(time.time_ns())
+
+        with self._lock:
+            old_state = self._state
+            self._state = self._update_state(old_state, current_nano_time)
+            current_state = self._state
+
+        # Calculate sampling probability based on current state
+        if current_state.effective_window_count == 0:
+            return 100.0
+
+        sampling_probability = (
+            (current_state.effective_window_nanoseconds * self._target_spans_per_nanosecond_limit) /
+            current_state.effective_window_count
+        )
+
+        sampling_percentage = 100 * min(sampling_probability, 1.0)
+
+        if self._round_to_nearest:
+            sampling_percentage = _round_down_to_nearest(sampling_percentage)
+
+        return sampling_percentage
+
+
+class RateLimitedSampler(Sampler):
+    def __init__(self, target_spans_per_second_limit: float):
+        self._sampling_percentage_generator = RateLimitedSamplingPercentage(target_spans_per_second_limit)
+        self._description = f"RateLimitedSampler{{{target_spans_per_second_limit}}}"
+
+    def should_sample(
+        self,
+        parent_context: Optional[Context],
+        trace_id: int,
+        name: str,
+        kind: Optional[SpanKind] = None,
+        attributes: Attributes = None,
+        links: Optional[Sequence["Link"]] = None,
+        trace_state: Optional["TraceState"] = None,
+    ) -> "SamplingResult":
+
+        if parent_context is not None:
+            parent_result = parent_context_sampling(parent_context, attributes)
+            if parent_result is not None:
+                return parent_result
+
+        sampling_percentage = self._sampling_percentage_generator.get()
+        sampling_score = _get_DJB2_sample_score(format_trace_id(trace_id).lower()) * 100.0
+
+        if sampling_score < sampling_percentage:
+            decision = Decision.RECORD_AND_SAMPLE
+        else:
+            decision = Decision.DROP
+
+        if sampling_percentage == 100.0:
+            new_attributes = {}
+        else:
+            new_attributes = {} if attributes is None else dict(attributes)
+            new_attributes[_SAMPLE_RATE_KEY] = sampling_percentage
+
+        return SamplingResult(
+            decision,
+            new_attributes,
+            _get_parent_trace_state(parent_context),
+        )
+
+    def get_description(self) -> str:
+        return self._description
diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/trace/_utils.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/trace/_utils.py
@@ -3,16 +3,35 @@
 
 from typing import no_type_check, Optional, Tuple
 from urllib.parse import urlparse
+import math
 
 from opentelemetry.semconv.attributes import (
     client_attributes,
     server_attributes,
     url_attributes,
     user_agent_attributes,
 )
+from opentelemetry.context import Context
+from opentelemetry.trace import get_current_span
+from opentelemetry.sdk.trace.sampling import (
+    Decision,
+    SamplingResult,
+    _get_parent_trace_state,
+)
 from opentelemetry.semconv.trace import DbSystemValues, SpanAttributes
 from opentelemetry.util.types import Attributes
 
+# pylint:disable=no-name-in-module
+from fixedint import Int32
+
+from azure.monitor.opentelemetry.exporter._constants import _SAMPLE_RATE_KEY
+
+from azure.monitor.opentelemetry.exporter._constants import (
+    _SAMPLING_HASH,
+    _INTEGER_MAX,
+    _INTEGER_MIN,
+)
+
 
 # pylint:disable=too-many-return-statements
 def _get_default_port_db(db_system: str) -> int:
@@ -320,3 +339,65 @@ def _get_url_for_http_request(attributes: Attributes) -> Optional[str]:
                     http_target,
                 )
     return url
+
+def _get_DJB2_sample_score(trace_id_hex: str) -> float:
+    # This algorithm uses 32bit integers
+    hash_value = Int32(_SAMPLING_HASH)
+    for char in trace_id_hex:
+        hash_value = ((hash_value << 5) + hash_value) + ord(char)
+
+    if hash_value == _INTEGER_MIN:
+        hash_value = int(_INTEGER_MAX)
+    else:
+        hash_value = abs(hash_value)
+
+    # divide by _INTEGER_MAX for value between 0 and 1 for sampling score
+    return float(hash_value) / _INTEGER_MAX
+
+def _round_down_to_nearest(sampling_percentage: float) -> float:
+    if sampling_percentage == 0:
+        return 0
+    # Handle extremely small percentages that would cause overflow
+    if sampling_percentage <= _INTEGER_MIN:  # Extremely small threshold
+        return 0.0
+    item_count = 100.0 / sampling_percentage
+    # Handle case where item_count is infinity or too large for math.ceil
+    if not math.isfinite(item_count) or item_count >= _INTEGER_MAX:
+        return 0.0
+    return 100.0 / math.ceil(item_count)
+
+def parent_context_sampling(
+    parent_context: Optional[Context],
+    attributes: Attributes = None
+) -> Optional["SamplingResult"]:
+
+    if parent_context is not None:
+        parent_span = get_current_span(parent_context)
+        parent_span_context = parent_span.get_span_context()
+        if parent_span_context.is_valid and not parent_span_context.is_remote:
+            if not parent_span.is_recording():
+                # Parent was dropped, drop this child too
+                new_attributes = {} if attributes is None else dict(attributes)
+                new_attributes[_SAMPLE_RATE_KEY] = 0.0
+
+                return SamplingResult(
+                    Decision.DROP,
+                    new_attributes,
+                    _get_parent_trace_state(parent_context),
+                )
+
+            parent_attributes = getattr(parent_span, 'attributes', {})
+            parent_sample_rate = parent_attributes.get(_SAMPLE_RATE_KEY)
+
+            if parent_sample_rate is not None:
+                # Honor parent's sampling rate
+                new_attributes = {} if attributes is None else dict(attributes)
+                new_attributes[_SAMPLE_RATE_KEY] = parent_sample_rate
+
+                return SamplingResult(
+                    Decision.RECORD_AND_SAMPLE,
+                    new_attributes,
+                    _get_parent_trace_state(parent_context),
+                )
+        return None
+    return None
diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/trace/test_rate_limited_sampling.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/trace/test_rate_limited_sampling.py