Skip to content

Commit 47931d5

Browse files
authored
ref(replay): move summary log parsing to a seer rpc (#99547)
1 parent c1ce8db commit 47931d5

File tree

6 files changed

+539
-11
lines changed

6 files changed

+539
-11
lines changed

src/sentry/features/temporary.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
348348
manager.add("organizations:release-comparison-performance", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
349349
# Enable replay AI summaries
350350
manager.add("organizations:replay-ai-summaries", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
351+
# Enable replay summary log parsing via Seer RPC
352+
manager.add("organizations:replay-ai-summaries-rpc", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE)
351353
# Enable replay list selection
352354
manager.add("organizations:replay-list-select", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
353355
# Enable version 2 of release serializer

src/sentry/replays/blueprints/api.md

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -581,13 +581,7 @@ A POST request is issued with no body. The URL and authorization context is used
581581

582582
- Response 204
583583

584-
## Replay Summarize [/projects/<organization_id_or_slug>/<project_id_or_slug>/replays/<replay_id>/summarize/]
585-
586-
- Parameters
587-
- start (optional, string) - ISO 8601 format (`YYYY-MM-DDTHH:mm:ss.sssZ`).
588-
- end (optional, string) - ISO 8601 format. Required if `start` is set.
589-
590-
`start` and `end` default to the last 90 days. If the replay is not found in the specified time range, this endpoint will 404.
584+
## Replay Summary [/projects/<organization_id_or_slug>/<project_id_or_slug>/replays/<replay_id>/summarize/]
591585

592586
### Fetch Replay Summary Task State [GET]
593587

src/sentry/replays/endpoints/project_replay_summary.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from sentry.api.api_publish_status import ApiPublishStatus
1313
from sentry.api.base import region_silo_endpoint
1414
from sentry.api.bases.project import ProjectEndpoint, ProjectPermission
15+
from sentry.api.utils import default_start_end_dates
1516
from sentry.models.project import Project
1617
from sentry.replays.lib.seer_api import seer_summarization_connection_pool
1718
from sentry.replays.lib.storage import storage
@@ -178,6 +179,37 @@ def post(self, request: Request, project: Project, replay_id: str) -> Response:
178179
)
179180
num_segments = MAX_SEGMENTS_TO_SUMMARIZE
180181

182+
if features.has(
183+
"organizations:replay-ai-summaries-rpc", project.organization, actor=request.user
184+
):
185+
start, end = default_start_end_dates()
186+
snuba_response = query_replay_instance(
187+
project_id=project.id,
188+
replay_id=replay_id,
189+
start=start,
190+
end=end,
191+
organization=project.organization,
192+
request_user_id=request.user.id,
193+
)
194+
if not snuba_response:
195+
return self.respond(
196+
{"detail": "Replay not found."},
197+
status=404,
198+
)
199+
200+
return self.make_seer_request(
201+
SEER_START_TASK_ENDPOINT_PATH,
202+
{
203+
"logs": [],
204+
"use_rpc": True,
205+
"num_segments": num_segments,
206+
"replay_id": replay_id,
207+
"organization_id": project.organization.id,
208+
"project_id": project.id,
209+
"temperature": temperature,
210+
},
211+
)
212+
181213
# Fetch the replay's error and trace IDs from the replay_id.
182214
snuba_response = query_replay_instance(
183215
project_id=project.id,

src/sentry/replays/usecases/summarize.py

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,22 @@
77
import sentry_sdk
88

99
from sentry import nodestore
10+
from sentry.api.utils import default_start_end_dates
1011
from sentry.constants import ObjectStatus
1112
from sentry.issues.grouptype import FeedbackGroup
1213
from sentry.models.project import Project
13-
from sentry.replays.query import query_trace_connected_events
14+
from sentry.replays.post_process import process_raw_response
15+
from sentry.replays.query import query_replay_instance, query_trace_connected_events
1416
from sentry.replays.usecases.ingest.event_parser import EventType
1517
from sentry.replays.usecases.ingest.event_parser import (
1618
get_timestamp_ms as get_replay_event_timestamp_ms,
1719
)
1820
from sentry.replays.usecases.ingest.event_parser import parse_network_content_lengths, which
21+
from sentry.replays.usecases.reader import fetch_segments_metadata, iter_segment_data
1922
from sentry.search.events.types import SnubaParams
2023
from sentry.services.eventstore.models import Event
2124
from sentry.snuba.referrer import Referrer
22-
from sentry.utils import json
25+
from sentry.utils import json, metrics
2326

2427
logger = logging.getLogger(__name__)
2528

@@ -449,3 +452,78 @@ def _parse_url(s: str, trunc_length: int) -> str:
449452
if len(s) > trunc_length:
450453
return s[:trunc_length] + " [truncated]"
451454
return s
455+
456+
457+
def rpc_get_replay_summary_logs(
458+
project_id: int, replay_id: str, num_segments: int
459+
) -> dict[str, Any]:
460+
"""
461+
RPC call for Seer. Downloads a replay's segment data, queries associated errors, and parses this into summary logs.
462+
"""
463+
464+
project = Project.objects.get(id=project_id)
465+
# Last 90 days. We don't support date filters in /summarize/.
466+
start, end = default_start_end_dates()
467+
468+
# Fetch the replay's error and trace IDs from the replay_id.
469+
snuba_response = query_replay_instance(
470+
project_id=project.id,
471+
replay_id=replay_id,
472+
start=start,
473+
end=end,
474+
organization=project.organization,
475+
request_user_id=None, # This is for the viewed_by_me field which is unused for summaries.
476+
)
477+
processed_response = process_raw_response(
478+
snuba_response,
479+
fields=[], # Defaults to all fields.
480+
)
481+
482+
# 404s should be handled in the originating Sentry endpoint.
483+
# If the replay is missing here just return an empty response.
484+
if not processed_response:
485+
return {"logs": []}
486+
487+
error_ids = processed_response[0].get("error_ids", [])
488+
trace_ids = processed_response[0].get("trace_ids", [])
489+
490+
# Fetch same-trace errors.
491+
trace_connected_errors = fetch_trace_connected_errors(
492+
project=project,
493+
trace_ids=trace_ids,
494+
start=start,
495+
end=end,
496+
limit=100,
497+
)
498+
trace_connected_error_ids = {x["id"] for x in trace_connected_errors}
499+
500+
# Fetch directly linked errors, if they weren't returned by the trace query.
501+
direct_errors = fetch_error_details(
502+
project_id=project.id,
503+
error_ids=[x for x in error_ids if x not in trace_connected_error_ids],
504+
)
505+
506+
error_events = direct_errors + trace_connected_errors
507+
508+
# Metric names kept for backwards compatibility.
509+
metrics.distribution(
510+
"replays.endpoints.project_replay_summary.direct_errors",
511+
value=len(direct_errors),
512+
)
513+
metrics.distribution(
514+
"replays.endpoints.project_replay_summary.trace_connected_errors",
515+
value=len(trace_connected_errors),
516+
)
517+
metrics.distribution(
518+
"replays.endpoints.project_replay_summary.num_trace_ids",
519+
value=len(trace_ids),
520+
)
521+
522+
# Download segment data.
523+
segment_md = fetch_segments_metadata(project.id, replay_id, 0, num_segments)
524+
segment_data = iter_segment_data(segment_md)
525+
526+
# Combine replay and error data and parse into logs.
527+
logs = get_summary_logs(segment_data, error_events, project.id)
528+
529+
return {"logs": logs}

src/sentry/seer/endpoints/seer_rpc.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from sentry.integrations.types import IntegrationProviderSlug
5656
from sentry.models.organization import Organization, OrganizationStatus
5757
from sentry.models.repository import Repository
58+
from sentry.replays.usecases.summarize import rpc_get_replay_summary_logs
5859
from sentry.search.eap.resolver import SearchResolver
5960
from sentry.search.eap.spans.definitions import SPAN_DEFINITIONS
6061
from sentry.search.eap.types import SearchResolverConfig, SupportedTraceItemType
@@ -931,6 +932,9 @@ def send_seer_webhook(*, event_name: str, organization_id: int, payload: dict) -
931932
"get_trace_for_transaction": rpc_get_trace_for_transaction,
932933
"get_profiles_for_trace": rpc_get_profiles_for_trace,
933934
"get_issues_for_transaction": rpc_get_issues_for_transaction,
935+
#
936+
# Replays
937+
"get_replay_summary_logs": rpc_get_replay_summary_logs,
934938
}
935939

936940

0 commit comments

Comments
 (0)