diff --git a/tests/otel/test_metrics.py b/tests/otel/test_metrics.py new file mode 100644 index 00000000000..632f91a3db2 --- /dev/null +++ b/tests/otel/test_metrics.py @@ -0,0 +1,271 @@ +import time + +from utils import context, weblog, interfaces, scenarios, irrelevant, features +from collections.abc import Callable + + +def validate_resource_metrics(resource_metrics_list: list) -> None: + # Assert the following protobuf structure from https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto: + # message ResourceMetrics { + # optional opentelemetry.proto.resource.v1.Resource resource = 1; + # repeated ScopeMetrics scope_metrics = 2; + # optional string schema_url = 3; + # } + + assert all( + len(resource_metrics) == 1 for resource_metrics in resource_metrics_list + ), "Metrics payloads from one configured application should have one set of resource metrics" + assert any( + "scopeMetrics" in resource_metrics[0] and len(resource_metrics[0]["scopeMetrics"]) > 0 + for resource_metrics in resource_metrics_list + ), "Scope metrics should be present on some payloads" + assert all( + resource_metrics[0]["resource"] is not None for resource_metrics in resource_metrics_list + ), "Resource metrics from an application with configured resources should have a resource field" + # Do not assert on schema_url, as it is not always present + + +def validate_scope_metrics(scope_metrics_list: list) -> None: + # Assert the following protobuf structure from https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto: + # message ScopeMetrics { + # optional opentelemetry.proto.common.v1.InstrumentationScope scope = 1; + # repeated Metric metrics = 2; + # optional string schema_url = 3; + # } + + assert all( + len(scope_metrics) >= 1 for scope_metrics in scope_metrics_list + ), "Metrics payloads from one configured application should have one or more set of scope metrics" + assert all( + scope_metrics[0]["scope"] is not None for scope_metrics in scope_metrics_list + ), "Scope metrics should have a scope field" + # Do not assert on schema_url, as it is not always present + for scope_metrics in scope_metrics_list: + # Assert values only for metrics we created, since we're asserting against specific fields + if "opentelemetry" not in scope_metrics[0]["scope"]["name"]: + for metric in scope_metrics[0]["metrics"]: + validate_metric(metric) + + +def validate_metric(metric: dict) -> None: + # Assert the following protobuf structure from https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto: + # message Metric { + # optional string name = 1; + # optional string description = 2; + # optional string unit = 3; + # oneof data { + # Gauge gauge = 5; + # Sum sum = 7; + # Histogram histogram = 9; + # ExponentialHistogram exponential_histogram = 10; + # Summary summary = 11; + # } + # repeated opentelemetry.proto.common.v1.KeyValue metadata = 12; + # } + + assert metric["name"] is not None, "Metrics are expected to have a name" + # assert metric["description"] is not None, "Metrics are expected to have a description" + # assert metric["unit"] is not None, "Metrics are expected to have a unit" + # assert metric["metadata"] is not None, "Metrics are expected to have metadata" + + assert metric["name"].lower() in metric_to_validator, f"Metric {metric['name']} is not expected" + func, name, value, aggregation_temporality = metric_to_validator[metric["name"].lower()] + func(metric, name, value, aggregation_temporality) + + +def validate_counter(metric: dict, name: str, value: object, aggregation_temporality: str) -> None: + # Assert the following protobuf structure from https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto: + # message Sum { + # repeated NumberDataPoint data_points = 1; + # optional AggregationTemporality aggregation_temporality = 2; + # optional bool is_monotonic = 3; + # } + # enum AggregationTemporality { + # AGGREGATION_TEMPORALITY_UNSPECIFIED = 0; + # AGGREGATION_TEMPORALITY_DELTA = 1; + # AGGREGATION_TEMPORALITY_CUMULATIVE = 2; + # } + + assert metric["name"].lower() == name + assert "sum" in metric + assert len(metric["sum"]["dataPoints"]) == 1 + assert metric["sum"]["aggregationTemporality"] == aggregation_temporality + assert metric["sum"]["isMonotonic"] + validate_number_data_point(metric["sum"]["dataPoints"][0], "asInt", value, "0") + + +def validate_histogram(metric: dict, name: str, value: object, aggregation_temporality: str) -> None: + # Assert the following protobuf structure from https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto: + # message Histogram { + # repeated HistogramDataPoint data_points = 1; + # optional AggregationTemporality aggregation_temporality = 2; + # } + # message HistogramDataPoint { + # reserved 1; + # repeated opentelemetry.proto.common.v1.KeyValue attributes = 9; + # fixed64 start_time_unix_nano = 2; + # fixed64 time_unix_nano = 3; + # fixed64 count = 4; + # optional double sum = 5; + # repeated fixed64 bucket_counts = 6; + # repeated double explicit_bounds = 7; + # repeated Exemplar exemplars = 8; + # uint32 flags = 10; + # optional double min = 11; + # optional double max = 12; + # } + + assert metric["name"].lower() == name + assert "histogram" in metric # This asserts the metric type is a histogram + assert len(metric["histogram"]["dataPoints"]) == 1 + assert metric["histogram"]["aggregationTemporality"] == aggregation_temporality + + assert metric["histogram"]["dataPoints"][0]["startTimeUnixNano"].isdecimal() + assert metric["histogram"]["dataPoints"][0]["timeUnixNano"].isdecimal() + assert metric["histogram"]["dataPoints"][0]["count"] == "1" + assert metric["histogram"]["dataPoints"][0]["sum"] == value + assert metric["histogram"]["dataPoints"][0]["min"] == value + assert metric["histogram"]["dataPoints"][0]["max"] == value + + +def validate_up_down_counter(metric: dict, name: str, value: object, aggregation_temporality: str) -> None: + # Assert the following protobuf structure from https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto: + # message Sum { + # repeated NumberDataPoint data_points = 1; + # optional AggregationTemporality aggregation_temporality = 2; + # optional bool is_monotonic = 3; + # } + # enum AggregationTemporality { + # AGGREGATION_TEMPORALITY_UNSPECIFIED = 0; + # AGGREGATION_TEMPORALITY_DELTA = 1; + # AGGREGATION_TEMPORALITY_CUMULATIVE = 2; + # } + + assert metric["name"].lower() == name + assert "sum" in metric + assert len(metric["sum"]["dataPoints"]) == 1 + assert metric["sum"]["aggregationTemporality"] == aggregation_temporality + validate_number_data_point(metric["sum"]["dataPoints"][0], "asInt", value) + + +def validate_gauge(metric: dict, name: str, value: object, _: str) -> None: + # Assert the following protobuf structure from https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto: + # message Gauge { + # repeated NumberDataPoint data_points = 1; + # } + + assert metric["name"].lower() == name + assert "gauge" in metric + assert len(metric["gauge"]["dataPoints"]) == 1 + validate_number_data_point(metric["gauge"]["dataPoints"][0], "asDouble", value, start_time_is_required=False) + + +def validate_number_data_point( + data_point: dict, value_type: object, value: object, default_value: object = None, start_time_is_required: bool = True +) -> None: + # Assert the following protobuf structure from https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto: + # message NumberDataPoint { + # reserved 1; + # repeated opentelemetry.proto.common.v1.KeyValue attributes = 7; + # optional fixed64 start_time_unix_nano = 2; + # optional fixed64 time_unix_nano = 3; + # oneof value { + # double as_double = 4; + # sfixed64 as_int = 6; + # } + # repeated Exemplar exemplars = 5; + # uint32 flags = 8; + # } + + if start_time_is_required: + assert data_point["startTimeUnixNano"].isdecimal() + assert data_point["timeUnixNano"].isdecimal() + if default_value is not None: + assert data_point[value_type] == value or data_point[value_type] == default_value + else: + assert data_point[value_type] == value + # Do not assert attributes on every data point + # Do not assert exemplars on every data point + # Do not assert flags on every data point + + +metric_to_validator: dict[str, tuple[Callable[[dict, str, object, str], None], str, object, str]] = { + "example.counter": (validate_counter, "example.counter", "11", "AGGREGATION_TEMPORALITY_DELTA"), + "example.async.counter": (validate_counter, "example.async.counter", "22", "AGGREGATION_TEMPORALITY_DELTA"), + "example.histogram": (validate_histogram, "example.histogram", 33.0, "AGGREGATION_TEMPORALITY_DELTA"), + "example.updowncounter": ( + validate_up_down_counter, + "example.updowncounter", + "55", + "AGGREGATION_TEMPORALITY_CUMULATIVE", + ), + "example.async.updowncounter": ( + validate_up_down_counter, + "example.async.updowncounter", + "66", + "AGGREGATION_TEMPORALITY_CUMULATIVE", + ), + "example.gauge": (validate_gauge, "example.gauge", 77.0, ""), + "example.async.gauge": (validate_gauge, "example.async.gauge", 88.0, ""), +} + + +@scenarios.otel_metric_e2e +@scenarios.apm_tracing_e2e_otel +@irrelevant(context.library not in ("java_otel", "dotnet", "python")) +@features.not_reported # FPD does not support otel libs +class Test_OTelMetrics: + def setup_agent_otlp_upload(self): + self.start = int(time.time()) + self.r = weblog.get(path="/basic/metric") + self.expected_metrics = [ + "example.counter", + "example.async.counter", + "example.gauge", + "example.async.gauge", + "example.updowncounter", + "example.async.updowncounter", + "example.histogram", + ] + + def test_agent_otlp_upload(self): + seen = set() + + all_resource_metrics = [] + all_scope_metrics = [] + filtered_individual_metrics = [] + + for _, resource_metrics in interfaces.open_telemetry.get_metrics(host="agent"): + all_resource_metrics.append(resource_metrics) + if "scopeMetrics" not in resource_metrics[0]: + continue + + scope_metrics = resource_metrics[0]["scopeMetrics"] + all_scope_metrics.append(scope_metrics) + + for scope_metric in scope_metrics: + for metric in scope_metric["metrics"]: + metric_name = metric["name"].lower() + if metric_name.startswith("example"): + # Asynchronous instruments report on each metric read, so we need to deduplicate + # Also, the UpDownCounter instrument (in addition to the AsyncUpDownCounter instrument) is cumulative, so we need to deduplicate + if metric_name.startswith("example.async") or metric_name == "example.updowncounter": + if metric_name not in seen: + filtered_individual_metrics.append(metric) + seen.add(metric_name) + else: + filtered_individual_metrics.append(metric) + + # Assert resource metrics are present and valid + assert all( + len(resource_metrics) == 1 for resource_metrics in all_resource_metrics + ), "Metrics payloads from one configured application should have one set of resource metrics, but in general this may be 0+" + assert all( + resource_metrics[0]["resource"] is not None for resource_metrics in all_resource_metrics + ), "Resource metrics from an application with configured resources should have a resource field, but in general is optional" + validate_resource_metrics(all_resource_metrics) + + # Assert scope metrics are present and valid + # Specific OTLP metric types not tested are: ExponentialHistogram (and by extension ExponentialHistogramDataPoint), Summary (and by extension SummaryDataPoint), Exemplar + assert len(filtered_individual_metrics) == len(self.expected_metrics), "Agent metrics should match expected" + validate_scope_metrics(all_scope_metrics) diff --git a/utils/_context/_scenarios/__init__.py b/utils/_context/_scenarios/__init__.py index cb4d370683a..4be40d41b22 100644 --- a/utils/_context/_scenarios/__init__.py +++ b/utils/_context/_scenarios/__init__.py @@ -555,9 +555,18 @@ class _Scenarios: # APM tracing end-to-end scenarios apm_tracing_e2e_otel = EndToEndScenario( "APM_TRACING_E2E_OTEL", - weblog_env={"DD_TRACE_OTEL_ENABLED": "true"}, + weblog_env={ + "DD_TRACE_OTEL_ENABLED": "true", + "DD_TRACE_OTEL_METRICS_ENABLED": "true", + "OTEL_EXPORTER_OTLP_PROTOCOL": "http/protobuf", + "OTEL_EXPORTER_OTLP_ENDPOINT": f"http://proxy:{ProxyPorts.open_telemetry_weblog}", + "OTEL_EXPORTER_OTLP_HEADERS": "dd-protocol=otlp,dd-otlp-path=agent", + "OTEL_METRIC_EXPORT_INTERVAL": "1000", + "OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE": "delta", + }, backend_interface_timeout=5, require_api_key=True, + use_proxy_for_open_telemetry=True, doc="", ) apm_tracing_e2e_single_span = EndToEndScenario( @@ -574,7 +583,7 @@ class _Scenarios: ) otel_tracing_e2e = OpenTelemetryScenario("OTEL_TRACING_E2E", require_api_key=True, doc="") - otel_metric_e2e = OpenTelemetryScenario("OTEL_METRIC_E2E", require_api_key=True, doc="") + otel_metric_e2e = OpenTelemetryScenario("OTEL_METRIC_E2E", require_api_key=True, doc="", include_collector=False, include_intake=False) otel_log_e2e = OpenTelemetryScenario("OTEL_LOG_E2E", require_api_key=True, doc="") library_conf_custom_header_tags = EndToEndScenario( diff --git a/utils/_context/_scenarios/endtoend.py b/utils/_context/_scenarios/endtoend.py index 383e37ae4ec..03816ef6b4b 100644 --- a/utils/_context/_scenarios/endtoend.py +++ b/utils/_context/_scenarios/endtoend.py @@ -253,8 +253,10 @@ def __init__( additional_trace_header_tags: tuple[str, ...] = (), library_interface_timeout: int | None = None, agent_interface_timeout: int = 5, + open_telemetry_interface_timeout: int = 5, use_proxy_for_weblog: bool = True, use_proxy_for_agent: bool = True, + use_proxy_for_open_telemetry: bool = True, rc_api_enabled: bool = False, meta_structs_disabled: bool = False, span_events: bool = True, @@ -285,7 +287,7 @@ def __init__( github_workflow=github_workflow, scenario_groups=scenario_groups, enable_ipv6=enable_ipv6, - use_proxy=use_proxy_for_agent or use_proxy_for_weblog, + use_proxy=use_proxy_for_agent or use_proxy_for_weblog or use_proxy_for_open_telemetry, rc_api_enabled=rc_api_enabled, meta_structs_disabled=meta_structs_disabled, span_events=span_events, @@ -302,6 +304,7 @@ def __init__( self._use_proxy_for_agent = use_proxy_for_agent self._use_proxy_for_weblog = use_proxy_for_weblog + self._use_proxy_for_open_telemetry = use_proxy_for_open_telemetry self._require_api_key = require_api_key @@ -387,6 +390,7 @@ def __init__( self.agent_interface_timeout = agent_interface_timeout self.backend_interface_timeout = backend_interface_timeout + self.open_telemetry_interface_timeout = open_telemetry_interface_timeout self._library_interface_timeout = library_interface_timeout def configure(self, config: pytest.Config): @@ -408,6 +412,7 @@ def configure(self, config: pytest.Config): interfaces.library_dotnet_managed.configure(self.host_log_folder, replay=self.replay) interfaces.library_stdout.configure(self.host_log_folder, replay=self.replay) interfaces.agent_stdout.configure(self.host_log_folder, replay=self.replay) + interfaces.open_telemetry.configure(self.host_log_folder, replay=self.replay) for container in self.buddies: container.interface.configure(self.host_log_folder, replay=self.replay) @@ -450,7 +455,8 @@ def _get_weblog_system_info(self): def _start_interfaces_watchdog(self): super().start_interfaces_watchdog( - [interfaces.library, interfaces.agent] + [container.interface for container in self.buddies] + [interfaces.library, interfaces.agent, interfaces.open_telemetry] + + [container.interface for container in self.buddies] ) def _set_weblog_domain(self): @@ -506,6 +512,9 @@ def _wait_for_app_readiness(self): raise ValueError("Datadog agent not ready") logger.debug("Agent ready") + # Explicitly do not wait for the open telemetry interface to be ready. + # It's possible it is only invoked during the test run, and not during the warmup. + def post_setup(self, session: pytest.Session): # if no test are run, skip interface tomeouts is_empty_test_run = session.config.option.skip_empty_scenario and len(session.items) == 0 @@ -532,6 +541,9 @@ def _wait_and_stop_containers(self, *, force_interface_timout_to_zero: bool): interfaces.agent.load_data_from_logs() interfaces.agent.check_deserialization_errors() + interfaces.open_telemetry.load_data_from_logs() + interfaces.open_telemetry.check_deserialization_errors() + interfaces.backend.load_data_from_logs() else: @@ -570,6 +582,11 @@ def _wait_and_stop_containers(self, *, force_interface_timout_to_zero: bool): self._wait_interface( interfaces.backend, 0 if force_interface_timout_to_zero else self.backend_interface_timeout ) + self._wait_interface( + interfaces.open_telemetry, + 0 if force_interface_timout_to_zero else self.open_telemetry_interface_timeout, + ) + interfaces.open_telemetry.check_deserialization_errors() def _wait_interface(self, interface: ProxyBasedInterfaceValidator, timeout: int): logger.terminal.write_sep("-", f"Wait for {interface} ({timeout}s)") diff --git a/utils/_context/_scenarios/open_telemetry.py b/utils/_context/_scenarios/open_telemetry.py index 474fd5a51be..dbf9f5666ab 100644 --- a/utils/_context/_scenarios/open_telemetry.py +++ b/utils/_context/_scenarios/open_telemetry.py @@ -91,8 +91,13 @@ def configure(self, config: pytest.Config): self.weblog_container.environment["OTEL_SYSTEST_INCLUDE_AGENT"] = "True" interfaces.agent.configure(self.host_log_folder, replay=self.replay) + # interfaces.agent.configure(self.host_log_folder, replay=self.replay) + interfaces.library.configure(self.host_log_folder, replay=self.replay) interfaces.backend.configure(self.host_log_folder, replay=self.replay) + interfaces.library_dotnet_managed.configure(self.host_log_folder, replay=self.replay) + interfaces.library_stdout.configure(self.host_log_folder, replay=self.replay) interfaces.open_telemetry.configure(self.host_log_folder, replay=self.replay) + interfaces.agent_stdout.configure(self.host_log_folder, replay=self.replay) def _start_interface_watchdog(self): class Event(FileSystemEventHandler): diff --git a/utils/build/docker/dotnet/weblog/Endpoints/OtelDropInEndpoint.cs b/utils/build/docker/dotnet/weblog/Endpoints/OtelDropInEndpoint.cs index a26b950ec01..a6f34c2eaff 100644 --- a/utils/build/docker/dotnet/weblog/Endpoints/OtelDropInEndpoint.cs +++ b/utils/build/docker/dotnet/weblog/Endpoints/OtelDropInEndpoint.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.Threading; using System.Text.Json; using Microsoft.AspNetCore.Builder; using Microsoft.AspNetCore.Http; @@ -44,6 +45,20 @@ public void Register(Microsoft.AspNetCore.Routing.IEndpointRouteBuilder routeBui await context.Response.WriteAsync(JsonSerializer.Serialize(headersDict)); }); + + routeBuilder.MapGet("/basic/metric", async context => + { + OpenTelemetryInstrumentation.LongCounter.Add(11L, + new KeyValuePair("http.method", "GET"), new KeyValuePair("rid", "1234567890")); + OpenTelemetryInstrumentation.DoubleHistogram.Record(33L, + new KeyValuePair("http.method", "GET"), new KeyValuePair("rid", "1234567890")); + OpenTelemetryInstrumentation.LongUpDownCounter.Add(55L, + new KeyValuePair("http.method", "GET"), new KeyValuePair("rid", "1234567890")); + OpenTelemetryInstrumentation.DoubleGauge.Record(77L, + new KeyValuePair("http.method", "GET"), new KeyValuePair("rid", "1234567890")); + Thread.Sleep(2000); + await context.Response.WriteAsync("Hello World!"); + }); } } } diff --git a/utils/build/docker/dotnet/weblog/OpenTelemetryInstrumentation.cs b/utils/build/docker/dotnet/weblog/OpenTelemetryInstrumentation.cs index a2797162b5a..830756d9db5 100644 --- a/utils/build/docker/dotnet/weblog/OpenTelemetryInstrumentation.cs +++ b/utils/build/docker/dotnet/weblog/OpenTelemetryInstrumentation.cs @@ -1,10 +1,23 @@ using System.Diagnostics; +using System.Diagnostics.Metrics; using OpenTelemetry.Context.Propagation; namespace weblog { public static class OpenTelemetryInstrumentation { + internal const string ServiceName = "ApmTestApi"; + internal const string MeterName = "ApmTestApi"; + + private static readonly Meter _meter = new Meter(MeterName); + public static Counter LongCounter => _meter.CreateCounter("example.counter"); + public static Histogram DoubleHistogram => _meter.CreateHistogram("example.histogram"); + public static UpDownCounter LongUpDownCounter = _meter.CreateUpDownCounter("example.upDownCounter"); + public static Gauge DoubleGauge = _meter.CreateGauge("example.gauge"); + public static ObservableCounter AsyncLongCounter = _meter.CreateObservableCounter("example.async.counter", () => 22L); + public static ObservableUpDownCounter AsyncLongUpDownCounter = _meter.CreateObservableUpDownCounter("example.async.upDownCounter", () => 66L); + public static ObservableGauge AsyncGauge = _meter.CreateObservableGauge("example.async.gauge", () => 88L); + public static TextMapPropagator Propagator { get; } = Propagators.DefaultTextMapPropagator; } } diff --git a/utils/build/docker/dotnet/weblog/Startup.cs b/utils/build/docker/dotnet/weblog/Startup.cs index 3fef703b1ad..9fa565f71ae 100644 --- a/utils/build/docker/dotnet/weblog/Startup.cs +++ b/utils/build/docker/dotnet/weblog/Startup.cs @@ -8,6 +8,8 @@ using Serilog.Formatting.Compact; using weblog.IdentityStores; using weblog.ModelBinders; +using OpenTelemetry.Resources; +using OpenTelemetry.Metrics; namespace weblog { @@ -15,6 +17,21 @@ public class Startup { public void ConfigureServices(IServiceCollection services) { + services.AddOpenTelemetry() + .ConfigureResource(r => r.AddService(OpenTelemetryInstrumentation.ServiceName)) + .WithMetrics(builder => builder + .AddMeter(OpenTelemetryInstrumentation.MeterName) + .AddConsoleExporter() + .AddOtlpExporter((exporterOptions, metricReaderOptions) => + { + exporterOptions.Protocol = OpenTelemetry.Exporter.OtlpExportProtocol.HttpProtobuf; + exporterOptions.Endpoint = new("http://proxy:8127/v1/metrics"); + exporterOptions.Headers = "dd-protocol=otlp,dd-otlp-path=agent"; + + metricReaderOptions.PeriodicExportingMetricReaderOptions.ExportIntervalMilliseconds = 1; + metricReaderOptions.TemporalityPreference = MetricReaderTemporalityPreference.Delta; + })); + services.AddSerilog((services, lc) => lc .Enrich.FromLogContext() .WriteTo.Console(new CompactJsonFormatter())); diff --git a/utils/build/docker/dotnet/weblog/app.csproj b/utils/build/docker/dotnet/weblog/app.csproj index 8c6f4d36bae..33ad7e5d747 100644 --- a/utils/build/docker/dotnet/weblog/app.csproj +++ b/utils/build/docker/dotnet/weblog/app.csproj @@ -1,4 +1,4 @@ - + net8.0 Latest @@ -48,5 +48,11 @@ + + + + + + diff --git a/utils/build/docker/java_otel/spring-boot-native.Dockerfile b/utils/build/docker/java_otel/spring-boot-native.Dockerfile index 6c7bebfe3eb..cfdedc71024 100644 --- a/utils/build/docker/java_otel/spring-boot-native.Dockerfile +++ b/utils/build/docker/java_otel/spring-boot-native.Dockerfile @@ -10,7 +10,7 @@ WORKDIR /app COPY ./utils/build/docker/java_otel/spring-boot-native/pom.xml . COPY ./utils/build/docker/java_otel/spring-boot-native/src ./src -ENV OTEL_VERSION="1.26.0" +ENV OTEL_VERSION="1.38.0" # Compile application RUN mvn clean package diff --git a/utils/build/docker/java_otel/spring-boot-native/src/main/java/com/datadoghq/springbootnative/WebController.java b/utils/build/docker/java_otel/spring-boot-native/src/main/java/com/datadoghq/springbootnative/WebController.java index cf6fb79b2cf..c35d520a121 100644 --- a/utils/build/docker/java_otel/spring-boot-native/src/main/java/com/datadoghq/springbootnative/WebController.java +++ b/utils/build/docker/java_otel/spring-boot-native/src/main/java/com/datadoghq/springbootnative/WebController.java @@ -6,10 +6,14 @@ import io.opentelemetry.api.logs.GlobalLoggerProvider; import io.opentelemetry.api.logs.Logger; import io.opentelemetry.api.logs.Severity; +import io.opentelemetry.api.metrics.DoubleGauge; import io.opentelemetry.api.metrics.DoubleHistogram; import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.api.metrics.LongUpDownCounter; import io.opentelemetry.api.metrics.Meter; -import io.opentelemetry.api.metrics.ObservableDoubleMeasurement; +import io.opentelemetry.api.metrics.ObservableDoubleGauge; +import io.opentelemetry.api.metrics.ObservableLongCounter; +import io.opentelemetry.api.metrics.ObservableLongUpDownCounter; import io.opentelemetry.api.trace.Span; import io.opentelemetry.api.trace.SpanContext; import io.opentelemetry.api.trace.SpanKind; @@ -36,6 +40,24 @@ public class WebController { private final Meter meter = GlobalOpenTelemetry.getMeter("com.datadoghq.springbootnative"); private final LongCounter counter = meter.counterBuilder("example.counter").build(); private final DoubleHistogram histogram = meter.histogramBuilder("example.histogram").build(); + private final LongUpDownCounter upDownCounter = meter.upDownCounterBuilder("example.upDownCounter").build(); + private final DoubleGauge gauge = meter.gaugeBuilder("example.gauge").build(); + private final ObservableLongCounter asyncCounter = meter.counterBuilder("example.async.counter").buildWithCallback( + measurement -> { + measurement.record(22L); + } + ); + private final ObservableLongUpDownCounter asyncUpDownCounter = meter.upDownCounterBuilder("example.async.upDownCounter").buildWithCallback( + measurement -> { + measurement.record(66L); + } + ); + private final ObservableDoubleGauge asyncGauge = meter.gaugeBuilder("example.async.gauge").buildWithCallback( + measurement -> { + measurement.record(88L); + } + ); + private final Logger customAppenderLogger = GlobalLoggerProvider.get().get("com.datadoghq.springbootnative"); @RequestMapping("/") @@ -110,6 +132,10 @@ private String basicMetric(@RequestHeader HttpHeaders headers) throws Interrupte Attributes.of(SemanticAttributes.HTTP_METHOD, "GET", AttributeKey.stringKey("rid"), rid)); histogram.record(33L, Attributes.of(SemanticAttributes.HTTP_METHOD, "GET", AttributeKey.stringKey("rid"), rid)); + upDownCounter.add(55L, + Attributes.of(SemanticAttributes.HTTP_METHOD, "GET", AttributeKey.stringKey("rid"), rid)); + gauge.set(77L, + Attributes.of(SemanticAttributes.HTTP_METHOD, "GET", AttributeKey.stringKey("rid"), rid)); Thread.sleep(2000); return "Hello World!"; } diff --git a/utils/build/docker/python/flask-poc.base.Dockerfile b/utils/build/docker/python/flask-poc.base.Dockerfile index df2f3afb6ec..8d2db745f70 100644 --- a/utils/build/docker/python/flask-poc.base.Dockerfile +++ b/utils/build/docker/python/flask-poc.base.Dockerfile @@ -26,6 +26,9 @@ RUN pip install 'flask[async]'==2.2.4 flask-login==0.6.3 gunicorn==21.2.0 gevent RUN pip install boto3==1.34.141 kombu==5.3.7 mock==5.1.0 asyncpg==0.29.0 aiomysql==0.2.0 mysql-connector-python==9.0.0 mysqlclient==2.2.4 urllib3==1.26.19 +# Install dependencies for OpenTelemetry API support +RUN pip install opentelemetry-api==1.34.1 opentelemetry-exporter-otlp==1.34.1 + # Install Rust toolchain RUN curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain stable -y ENV PATH="/root/.cargo/bin:$PATH" diff --git a/utils/build/docker/python/flask/app.py b/utils/build/docker/python/flask/app.py index c5a948022b8..f23c469e31f 100644 --- a/utils/build/docker/python/flask/app.py +++ b/utils/build/docker/python/flask/app.py @@ -15,6 +15,7 @@ import subprocess import sys import threading +import time import urllib.request import boto3 @@ -89,6 +90,12 @@ from ddtrace.data_streams import set_consume_checkpoint from ddtrace.data_streams import set_produce_checkpoint +from opentelemetry.metrics import ( + CallbackOptions, + Observation, + get_meter_provider, +) + from debugger_controller import debugger_blueprint from exception_replay_controller import exception_replay_blueprint @@ -144,6 +151,26 @@ MARIADB_CONFIG = dict(AIOMYSQL_CONFIG) MARIADB_CONFIG["collation"] = "utf8mb4_unicode_520_ci" +# Define OTel Metrics usage +def observable_counter_func(options: CallbackOptions): + yield Observation(22, {}) + +def observable_updown_counter_func(options: CallbackOptions): + yield Observation(66, {}) + +def observable_gauge_func(options: CallbackOptions): + yield Observation(88.0, {}) + +meter = get_meter_provider().get_meter("flask") + +counter = meter.create_counter("example.counter") +histogram = meter.create_histogram("example.histogram") +updown_counter = meter.create_up_down_counter("example.upDownCounter") +gauge = meter.create_gauge("example.gauge") + +async_counter = meter.create_observable_counter("example.async.counter", [observable_counter_func]) +async_updown_counter = meter.create_observable_up_down_counter("example.async.upDownCounter", [observable_updown_counter_func]) +async_gauge = meter.create_observable_gauge("example.async.gauge", [observable_gauge_func]) def main(): # IAST Flask patch @@ -1838,6 +1865,16 @@ def otel_drop_in_default_propagator_inject(): return jsonify(result) +@app.route("/basic/metric", methods=["GET"]) +def basic_metric(): + counter.add(11, {"http.method": "GET", "rid": "1234567890"}) + histogram.record(33.0, {"http.method": "GET", "rid": "1234567890"}) + updown_counter.add(55, {"http.method": "GET", "rid": "1234567890"}) + gauge.set(77.0, {"http.method": "GET", "rid": "1234567890"}) + # time.sleep(2) + return "Hello, World!\\n" + + @app.route("/inferred-proxy/span-creation", methods=["GET"]) def inferred_proxy_span_creation(): headers = flask_request.args.get("headers", {}) diff --git a/utils/interfaces/_open_telemetry.py b/utils/interfaces/_open_telemetry.py index 54c9381631c..78dcb6d6223 100644 --- a/utils/interfaces/_open_telemetry.py +++ b/utils/interfaces/_open_telemetry.py @@ -38,3 +38,13 @@ def get_otel_trace_id(self, request: HttpResponse): attr_val = attribute.get("value").get("stringValue") if attr_key == "http.request.headers.user-agent" and rid in attr_val: yield span.get("traceId") + + def get_metrics(self, host: str = ""): + """Attempts to fetch the metrics sent from the OTLP-generating application.""" + + for data in self.get_data(path_filters="/v1/metrics"): + if (not host) or (host == data["host"]): + if "resourceMetrics" not in data["request"]["content"]: + raise ValueError("resourceMetrics property is missing in metrics payload") + + yield data, data["request"]["content"]["resourceMetrics"]