Skip to content

Commit e23a773

Browse files
authored
feat(bug-prediction): Allow exception type string format to be flexible (#99617)
Make the input to query to get issues by exception type to be case insensitive and ignore non alphanumeric characters. For example when querying "ReactErrorBoundaryError" previously would not match the issues with "React Error Boundary Error" as the type in the DB ### Legal Boilerplate Look, I get it. The entity doing business as "Sentry" was incorporated in the State of Delaware in 2015 as Functional Software, Inc. and is gonna need some rights from me in order to utilize my contributions in this here PR. So here's the deal: I retain all rights, title and interest in and to my contributions, and by keeping this boilerplate intact I confirm that Sentry can use, modify, copy, and redistribute my contributions, under Sentry's choice of terms.
1 parent f938cd4 commit e23a773

File tree

2 files changed

+184
-2
lines changed

2 files changed

+184
-2
lines changed

src/sentry/seer/fetch_issues/by_error_type.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,23 @@ def _fetch_issues_from_repo_projects(
1414
) -> list[Group]:
1515
project_ids = [project.id for project in repo_projects.projects]
1616
date_threshold = datetime.now(tz=UTC) - timedelta(days=num_days_ago)
17+
18+
# Normalize the search term by removing non-ASCII alphanumeric characters and converting to uppercase
19+
# This matches the SQL regex [^a-zA-Z0-9] which only keeps ASCII alphanumeric characters
20+
normalized_exception_type = "".join(
21+
c.upper() for c in exception_type if c.isascii() and c.isalnum()
22+
)
23+
1724
# Using raw SQL since data is LegacyTextJSONField which can't be filtered with Django ORM
1825
query_set = (
19-
Group.objects.annotate(metadata_type=RawSQL("(data::json -> 'metadata' ->> 'type')", []))
26+
Group.objects.annotate(
27+
metadata_type=RawSQL(
28+
"UPPER(REGEXP_REPLACE(data::json -> 'metadata' ->> 'type', '[^a-zA-Z0-9]', '', 'g'))",
29+
[],
30+
)
31+
)
2032
.filter(
21-
metadata_type=exception_type,
33+
metadata_type=normalized_exception_type,
2234
project_id__in=project_ids,
2335
last_seen__gte=date_threshold,
2436
)

tests/sentry/seer/fetch_issues/test_by_error_type.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,3 +367,173 @@ def test_fetch_issues_from_repo_projects_empty_result(self) -> None:
367367
# Verify it returns an empty list
368368
assert isinstance(results, list)
369369
assert len(results) == 0
370+
371+
def _setup_test_environment(
372+
self, exception_type: str, exception_value: str = "Test error"
373+
) -> Group:
374+
"""Helper to set up test environment with a group containing the specified exception type."""
375+
release = self.create_release(project=self.project, version="1.0.0")
376+
repo = self.create_repo(
377+
project=self.project,
378+
name="getsentry/sentryA",
379+
provider="integrations:github",
380+
external_id="1",
381+
)
382+
self.create_code_mapping(project=self.project, repo=repo)
383+
384+
data = load_data("python", timestamp=before_now(minutes=1))
385+
event = self.store_event(
386+
data={
387+
**data,
388+
"release": release.version,
389+
"exception": {
390+
"values": [
391+
{"type": exception_type, "value": exception_value, "data": {"values": []}}
392+
]
393+
},
394+
},
395+
project_id=self.project.id,
396+
)
397+
group = event.group
398+
assert group is not None
399+
group.save()
400+
return group
401+
402+
def _assert_exception_type_matches(
403+
self, search_exception_type: str, expected_group: Group
404+
) -> None:
405+
"""Helper to assert that a search exception type returns the expected group."""
406+
seer_response = fetch_issues(
407+
organization_id=self.organization.id,
408+
provider="integrations:github",
409+
external_id="1",
410+
exception_type=search_exception_type,
411+
)
412+
assert seer_response["issues"] == [expected_group.id]
413+
assert len(seer_response["issues_full"]) == 1
414+
415+
def _test_exception_type_variants(
416+
self, stored_exception_type: str, search_variants: list[str]
417+
) -> None:
418+
"""Helper to test multiple search variants against a stored exception type."""
419+
group = self._setup_test_environment(stored_exception_type)
420+
421+
for search_exception_type in search_variants:
422+
with self.subTest(search_exception_type=search_exception_type):
423+
self._assert_exception_type_matches(search_exception_type, group)
424+
425+
def test_case_insensitive_matching(self) -> None:
426+
"""Test that exception type matching is case insensitive."""
427+
search_variants = ["TypeError", "typeerror", "TYPEERROR", "TypeERROR", "tYpEeRrOr"]
428+
self._test_exception_type_variants("TypeError", search_variants)
429+
430+
def test_normalized_matching_spaces(self) -> None:
431+
"""Test that exception type matching normalizes spaces and special characters."""
432+
search_variants = [
433+
"Runtime Error",
434+
"RuntimeError",
435+
"runtime error",
436+
"runtimeerror",
437+
"RUNTIME ERROR",
438+
"RUNTIMEERROR",
439+
"runtime_error",
440+
"runtime-error",
441+
]
442+
self._test_exception_type_variants("Runtime Error", search_variants)
443+
444+
def test_normalized_matching_special_characters(self) -> None:
445+
"""Test that exception type matching normalizes various special characters."""
446+
search_variants = [
447+
"HTTP-404-Error",
448+
"HTTP 404 Error",
449+
"HTTP_404_Error",
450+
"HTTP.404.Error",
451+
"HTTP404Error",
452+
"http404error",
453+
"HTTP 404 Error", # multiple spaces
454+
"HTTP__404__Error", # multiple underscores
455+
]
456+
self._test_exception_type_variants("HTTP-404-Error", search_variants)
457+
458+
def test_normalized_matching_multiple_groups(self) -> None:
459+
"""Test normalized matching works correctly with multiple different exception types."""
460+
release = self.create_release(project=self.project, version="1.0.0")
461+
repo = self.create_repo(
462+
project=self.project,
463+
name="getsentry/sentryA",
464+
provider="integrations:github",
465+
external_id="1",
466+
)
467+
self.create_code_mapping(project=self.project, repo=repo)
468+
469+
# Create first group with "Value Error"
470+
data1 = load_data("python", timestamp=before_now(minutes=1))
471+
event1 = self.store_event(
472+
data={
473+
**data1,
474+
"release": release.version,
475+
"exception": {
476+
"values": [
477+
{"type": "Value Error", "value": "Bad value", "data": {"values": []}}
478+
]
479+
},
480+
},
481+
project_id=self.project.id,
482+
)
483+
group1 = event1.group
484+
assert group1 is not None
485+
group1.save()
486+
487+
# Create second group with "Type-Error"
488+
data2 = load_data("python", timestamp=before_now(minutes=2))
489+
event2 = self.store_event(
490+
data={
491+
**data2,
492+
"release": release.version,
493+
"exception": {
494+
"values": [{"type": "Type-Error", "value": "Bad type", "data": {"values": []}}]
495+
},
496+
},
497+
project_id=self.project.id,
498+
)
499+
group2 = event2.group
500+
assert group2 is not None
501+
group2.save()
502+
503+
# Test that "valueerror" matches only the first group
504+
seer_response = fetch_issues(
505+
organization_id=self.organization.id,
506+
provider="integrations:github",
507+
external_id="1",
508+
exception_type="valueerror",
509+
)
510+
assert seer_response["issues"] == [group1.id]
511+
assert len(seer_response["issues_full"]) == 1
512+
513+
# Test that "type error" matches only the second group
514+
seer_response = fetch_issues(
515+
organization_id=self.organization.id,
516+
provider="integrations:github",
517+
external_id="1",
518+
exception_type="type error",
519+
)
520+
assert seer_response["issues"] == [group2.id]
521+
assert len(seer_response["issues_full"]) == 1
522+
523+
# Test that "runtimeerror" matches neither
524+
seer_response = fetch_issues(
525+
organization_id=self.organization.id,
526+
provider="integrations:github",
527+
external_id="1",
528+
exception_type="runtimeerror",
529+
)
530+
assert seer_response == {"issues": [], "issues_full": []}
531+
532+
def test_unicode_normalization_consistency(self) -> None:
533+
"""Test that Unicode characters are handled consistently between Python and SQL."""
534+
search_variants = [
535+
"ValueError测试", # Same Unicode as stored
536+
"ValueError", # Just ASCII part
537+
"ValueError测试αβ", # Different Unicode chars that normalize to same ASCII
538+
]
539+
self._test_exception_type_variants("ValueError测试", search_variants)

0 commit comments

Comments
 (0)