Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 23 additions & 9 deletions custom_model_runner/datarobot_drum/drum/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import os
import sys
import trafaret as t
import requests.adapters

from contextvars import ContextVar
from urllib.parse import urlparse, urlunparse

Expand Down Expand Up @@ -153,10 +155,10 @@ def filter(self, record: logging.LogRecord) -> bool:
return not record.name.startswith("opentelemetry")


def _setup_otel_logging(resource, multiprocessing=False):
def _setup_otel_logging(resource, multiprocessing=False, session=None):
logger_provider = LoggerProvider(resource=resource)
set_logger_provider(logger_provider)
exporter = OTLPLogExporter()
exporter = OTLPLogExporter(session=session)
if multiprocessing:
logger_provider.add_log_record_processor(SimpleLogRecordProcessor(exporter))
else:
Expand All @@ -168,16 +170,16 @@ def _setup_otel_logging(resource, multiprocessing=False):
return logger_provider


def _setup_otel_metrics(resource):
metric_exporter = OTLPMetricExporter()
def _setup_otel_metrics(resource, session=None):
metric_exporter = OTLPMetricExporter(session=session)
metric_reader = PeriodicExportingMetricReader(metric_exporter)
metric_provider = MeterProvider(metric_readers=[metric_reader], resource=resource)
metrics.set_meter_provider(metric_provider)
return metric_provider


def _setup_otel_tracing(resource, multiprocessing=False):
otlp_exporter = OTLPSpanExporter()
def _setup_otel_tracing(resource, multiprocessing=False, session=None):
otlp_exporter = OTLPSpanExporter(session=session)
trace_provider = TracerProvider(resource=resource)
if multiprocessing:
trace_provider.add_span_processor(SimpleSpanProcessor(otlp_exporter))
Expand Down Expand Up @@ -225,8 +227,18 @@ def setup_otel(runtime_parameters, options):
# (most frequent case)
multiprocessing = options.max_workers > 1

pool_maxsize = 30 # reqeusts default is 10
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about

total_concurrent_requests = 500
num_workers = self._params.get("processes")

pool_maxsize = ceil(total_concurrent_requests / num_workers)
for 8 cores it is ~60

if runtime_parameters.has("DR_OTEL_SESSION_POOL_MAXSIZE"):
pool_maxsize = int(runtime_parameters.get("DR_OTEL_SESSION_POOL_MAXSIZE"))

session = requests.Session()
adapter = requests.adapters.HTTPAdapter(pool_maxsize=pool_maxsize)
Copy link
Contributor

@s-gavrenkov s-gavrenkov Aug 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

# Custom adapter with default timeout
class TimeoutHTTPAdapter(requests.adapters.HTTPAdapter):
    def __init__(self, *args, timeout=5, **kwargs):
        self.timeout = timeout
        super().__init__(*args, **kwargs)

    def send(self, request, **kwargs):
        kwargs["timeout"] = kwargs.get("timeout", self.timeout)
        return super().send(request, **kwargs)

adapter = TimeoutHTTPAdapter(pool_maxsize=pool_maxsize, timeout=5)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably need timeout

Copy link
Contributor

@s-gavrenkov s-gavrenkov Aug 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or

class TimeoutSession(requests.Session):
    def __init__(self, timeout=(5, 15)):
        super().__init__()
        self._timeout = timeout

    def request(self, *args, **kwargs):
        kwargs.setdefault("timeout", self._timeout)
        return super().request(*args, **kwargs)

# Example usage:
pool_maxsize = 10  # or any desired value
session = TimeoutSession(timeout=(5, 15))
adapter = HTTPAdapter(pool_maxsize=pool_maxsize)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Default timeout is 10 sec.

session.mount("http://", adapter)
session.mount("https://", adapter)
resource = Resource.create()
trace_provider = _setup_otel_tracing(resource=resource, multiprocessing=multiprocessing)
trace_provider = _setup_otel_tracing(
resource=resource, multiprocessing=multiprocessing, session=session
)

logger_provider = None
metric_provider = None
Expand All @@ -235,8 +247,10 @@ def setup_otel(runtime_parameters, options):
if runtime_parameters.has("DR_OTEL_METRICS_LOGS_ENABLED") and runtime_parameters.get(
"DR_OTEL_METRICS_LOGS_ENABLED"
):
logger_provider = _setup_otel_logging(resource=resource, multiprocessing=multiprocessing)
metric_provider = _setup_otel_metrics(resource=resource)
logger_provider = _setup_otel_logging(
resource=resource, multiprocessing=multiprocessing, session=session
)
metric_provider = _setup_otel_metrics(resource=resource, session=session)

log.info(f"OTEL is configured with endpoint: {endpoint}")
return trace_provider, metric_provider, logger_provider
Expand Down