Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
manager.add("organizations:release-comparison-performance", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable replay AI summaries
manager.add("organizations:replay-ai-summaries", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable replay summary log parsing via Seer RPC
manager.add("organizations:replay-ai-summaries-rpc", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE)
# Enable replay list selection
manager.add("organizations:replay-list-select", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable version 2 of release serializer
Expand Down
8 changes: 1 addition & 7 deletions src/sentry/replays/blueprints/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -581,13 +581,7 @@ A POST request is issued with no body. The URL and authorization context is used

- Response 204

## Replay Summarize [/projects/<organization_id_or_slug>/<project_id_or_slug>/replays/<replay_id>/summarize/]

- Parameters
- start (optional, string) - ISO 8601 format (`YYYY-MM-DDTHH:mm:ss.sssZ`).
- end (optional, string) - ISO 8601 format. Required if `start` is set.

`start` and `end` default to the last 90 days. If the replay is not found in the specified time range, this endpoint will 404.
## Replay Summary [/projects/<organization_id_or_slug>/<project_id_or_slug>/replays/<replay_id>/summarize/]

### Fetch Replay Summary Task State [GET]

Expand Down
36 changes: 34 additions & 2 deletions src/sentry/replays/endpoints/project_replay_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import region_silo_endpoint
from sentry.api.bases.project import ProjectEndpoint, ProjectPermission
from sentry.api.utils import default_start_end_dates
from sentry.models.project import Project
from sentry.net.http import connection_from_url
from sentry.replays.lib.storage import storage
Expand All @@ -21,7 +22,7 @@
from sentry.replays.usecases.summarize import (
fetch_error_details,
fetch_trace_connected_errors,
get_summary_logs,
get_summary_logs_from_segments,
)
from sentry.seer.seer_setup import has_seer_access
from sentry.seer.signed_seer_api import make_signed_seer_api_request
Expand Down Expand Up @@ -196,6 +197,37 @@ def post(self, request: Request, project: Project, replay_id: str) -> Response:
)
num_segments = MAX_SEGMENTS_TO_SUMMARIZE

if features.has(
"organizations:replay-ai-summaries-rpc", project.organization, actor=request.user
):
start, end = default_start_end_dates()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deciding to not support date params, imo it's not necessary. We don't pass these in from the frontend atm

snuba_response = query_replay_instance(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't the query_replace_instance call in the other (summarize.py) file already?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess i'm confused why we have this call + also the other one

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to 404 early if the replay's missing, instead of making a seer task + db entry

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Colton is working on a more efficient existence check but hope this will do til then. Can test with the FF

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ohhh ok makes sense

project_id=project.id,
replay_id=replay_id,
start=start,
end=end,
organization=project.organization,
request_user_id=request.user.id,
)
if not snuba_response:
return self.respond(
{"detail": "Replay not found."},
status=404,
)

return self.make_seer_request(
SEER_START_TASK_ENDPOINT_PATH,
{
"logs": [],
"use_rpc": True,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Used to toggle code paths in seer (default false)

"num_segments": num_segments,
"replay_id": replay_id,
"organization_id": project.organization.id,
"project_id": project.id,
"temperature": temperature,
},
)

# Fetch the replay's error and trace IDs from the replay_id.
snuba_response = query_replay_instance(
project_id=project.id,
Expand Down Expand Up @@ -256,7 +288,7 @@ def post(self, request: Request, project: Project, replay_id: str) -> Response:
segment_data = iter_segment_data(segment_md)

# Combine replay and error data and parse into logs.
logs = get_summary_logs(segment_data, error_events, project.id)
logs = get_summary_logs_from_segments(segment_data, error_events, project.id)

# Post to Seer to start a summary task.
# XXX: Request isn't streaming. Limitation of Seer authentication. Would be much faster if we
Expand Down
85 changes: 80 additions & 5 deletions src/sentry/replays/usecases/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,22 @@
import sentry_sdk

from sentry import nodestore
from sentry.api.utils import default_start_end_dates
from sentry.constants import ObjectStatus
from sentry.issues.grouptype import FeedbackGroup
from sentry.models.project import Project
from sentry.replays.query import query_trace_connected_events
from sentry.replays.post_process import process_raw_response
from sentry.replays.query import query_replay_instance, query_trace_connected_events
from sentry.replays.usecases.ingest.event_parser import EventType
from sentry.replays.usecases.ingest.event_parser import (
get_timestamp_ms as get_replay_event_timestamp_ms,
)
from sentry.replays.usecases.ingest.event_parser import parse_network_content_lengths, which
from sentry.replays.usecases.reader import fetch_segments_metadata, iter_segment_data
from sentry.search.events.types import SnubaParams
from sentry.services.eventstore.models import Event
from sentry.snuba.referrer import Referrer
from sentry.utils import json
from sentry.utils import json, metrics

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -235,17 +238,17 @@ def generate_feedback_log_message(feedback: EventDict) -> str:


@sentry_sdk.trace
def get_summary_logs(
def get_summary_logs_from_segments(
segment_data: Iterator[tuple[int, memoryview]],
error_events: list[EventDict],
project_id: int,
) -> list[str]:
# Sort error events by timestamp. This list includes all feedback events still.
error_events.sort(key=lambda x: x["timestamp"])
return list(generate_summary_logs(segment_data, error_events, project_id))
return list(generate_summary_logs_from_segments(segment_data, error_events, project_id))


def generate_summary_logs(
def generate_summary_logs_from_segments(
segment_data: Iterator[tuple[int, memoryview]],
error_events: list[EventDict],
project_id,
Expand Down Expand Up @@ -446,3 +449,75 @@ def _parse_url(s: str, trunc_length: int) -> str:
if len(s) > trunc_length:
return s[:trunc_length] + " [truncated]"
return s


def get_replay_summary_logs(project_id: int, replay_id: str, num_segments: int) -> list[str]:
"""
Downloads a replay's segment data, queries associated errors, and parses this into summary logs.
"""

project = Project.objects.get(id=project_id)
# Last 90 days. We don't support date filters in /summarize/.
start, end = default_start_end_dates()

# Fetch the replay's error and trace IDs from the replay_id.
snuba_response = query_replay_instance(
project_id=project.id,
replay_id=replay_id,
start=start,
end=end,
organization=project.organization,
request_user_id=None, # This is for the viewed_by_me field which is unused for summaries.
)
processed_response = process_raw_response(
snuba_response,
fields=[], # Defaults to all fields.
)

# 404s are handled in the originating Sentry /summarize/ endpoint.
if not processed_response:
return []

error_ids = processed_response[0].get("error_ids", [])
trace_ids = processed_response[0].get("trace_ids", [])

# Fetch same-trace errors.
trace_connected_errors = fetch_trace_connected_errors(
project=project,
trace_ids=trace_ids,
start=start,
end=end,
limit=100,
)
trace_connected_error_ids = {x["id"] for x in trace_connected_errors}

# Fetch directly linked errors, if they weren't returned by the trace query.
replay_errors = fetch_error_details(
project_id=project.id,
error_ids=[x for x in error_ids if x not in trace_connected_error_ids],
)

error_events = replay_errors + trace_connected_errors

metrics.distribution(
"replays.endpoints.project_replay_summary.direct_errors",
value=len(replay_errors),
)
metrics.distribution(
"replays.endpoints.project_replay_summary.trace_connected_errors",
value=len(trace_connected_errors),
)
metrics.distribution(
"replays.endpoints.project_replay_summary.num_trace_ids",
value=len(trace_ids),
)

# Download segment data.
# XXX: For now this is capped to 100 and blocking. DD shows no replays with >25 segments, but we should still stress test and figure out how to deal with large replays.
segment_md = fetch_segments_metadata(project.id, replay_id, 0, num_segments)
segment_data = iter_segment_data(segment_md)

# Combine replay and error data and parse into logs.
logs = get_summary_logs_from_segments(segment_data, error_events, project.id)

return logs
4 changes: 4 additions & 0 deletions src/sentry/seer/endpoints/seer_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from sentry.integrations.types import IntegrationProviderSlug
from sentry.models.organization import Organization, OrganizationStatus
from sentry.models.repository import Repository
from sentry.replays.usecases.summarize import get_replay_summary_logs
from sentry.search.eap.resolver import SearchResolver
from sentry.search.eap.spans.definitions import SPAN_DEFINITIONS
from sentry.search.eap.types import SearchResolverConfig, SupportedTraceItemType
Expand Down Expand Up @@ -931,6 +932,9 @@ def send_seer_webhook(*, event_name: str, organization_id: int, payload: dict) -
"get_trace_for_transaction": rpc_get_trace_for_transaction,
"get_profiles_for_trace": rpc_get_profiles_for_trace,
"get_issues_for_transaction": rpc_get_issues_for_transaction,
#
# Replays
"get_replay_summary_logs": get_replay_summary_logs,
}


Expand Down
6 changes: 3 additions & 3 deletions tests/sentry/replays/usecases/test_summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
EventDict,
_parse_iso_timestamp_to_ms,
as_log_message,
get_summary_logs,
get_summary_logs_from_segments,
)
from sentry.utils import json

Expand All @@ -19,7 +19,7 @@


@patch("sentry.replays.usecases.summarize.fetch_feedback_details")
def test_get_summary_logs(mock_fetch_feedback_details: Mock) -> None:
def test_get_summary_logs_from_segments(mock_fetch_feedback_details: Mock) -> None:

def _mock_fetch_feedback(feedback_id: str | None, _project_id: int) -> EventDict | None:
if feedback_id == "12345678123456781234567812345678":
Expand Down Expand Up @@ -102,7 +102,7 @@ def _faker() -> Generator[tuple[int, memoryview]]:
),
]

result = get_summary_logs(_faker(), error_events=error_events, project_id=1)
result = get_summary_logs_from_segments(_faker(), error_events=error_events, project_id=1)

assert result == [
"User experienced an error: 'BadError: something else bad' at 1756400489849.0",
Expand Down
Loading