From 8eb16271bd87e8734a0c975386f72a81dc72225d Mon Sep 17 00:00:00 2001 From: Mihir Mavalankar Date: Thu, 6 Nov 2025 15:37:03 -0800 Subject: [PATCH 1/7] feat(autofix): Atomiticity check for starting automation runs --- src/sentry/tasks/post_process.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/sentry/tasks/post_process.py b/src/sentry/tasks/post_process.py index d9180b45ac569a..cdeb4082bbc977 100644 --- a/src/sentry/tasks/post_process.py +++ b/src/sentry/tasks/post_process.py @@ -1628,7 +1628,7 @@ def kick_off_seer_automation(job: PostProcessJob) -> None: event = job["event"] group = event.group - # Only run on issues with no existing scan + # Only run on issues with no existing scan - TODO: Update condition for triage signals V0 if group.seer_fixability_score is not None: return @@ -1660,6 +1660,13 @@ def kick_off_seer_automation(job: PostProcessJob) -> None: ): return + # Check if automation has already been queued or completed for this group + # seer_autofix_last_triggered is set when trigger_autofix is successfully started. + # Use cache with short TTL to hold lock for a short while since seer_autofix_last_triggered will be set within minutes + cache_key = f"seer_automation_queued:{group.id}" + if cache.get(cache_key) or group.seer_autofix_last_triggered is not None: + return + # Don't run if there's already a task in progress for this issue lock_key, lock_name = get_issue_summary_lock_key(group.id) lock = locks.get(lock_key, duration=1, name=lock_name) @@ -1684,6 +1691,10 @@ def kick_off_seer_automation(job: PostProcessJob) -> None: if is_seer_scanner_rate_limited(project, group.organization): return + # Check cache again to prevent race condition since seer_autofix_last_triggered takes a few minutes to set + if not cache.add(cache_key, True, timeout=3600): # 1 hour + return + start_seer_automation.delay(group.id) From 3299ae37336a32cdd65e8e81aaa3e28dc125f692 Mon Sep 17 00:00:00 2001 From: Mihir Mavalankar Date: Thu, 6 Nov 2025 15:42:06 -0800 Subject: [PATCH 2/7] comment update --- src/sentry/tasks/post_process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/tasks/post_process.py b/src/sentry/tasks/post_process.py index cdeb4082bbc977..65ae9a59876582 100644 --- a/src/sentry/tasks/post_process.py +++ b/src/sentry/tasks/post_process.py @@ -1691,7 +1691,7 @@ def kick_off_seer_automation(job: PostProcessJob) -> None: if is_seer_scanner_rate_limited(project, group.organization): return - # Check cache again to prevent race condition since seer_autofix_last_triggered takes a few minutes to set + # Check cache to prevent race condition since it takes a few minutes to set seer_autofix_last_triggered if not cache.add(cache_key, True, timeout=3600): # 1 hour return From 368e9173f26baec48056768e3779658012a097c8 Mon Sep 17 00:00:00 2001 From: Mihir Mavalankar Date: Thu, 6 Nov 2025 15:42:22 -0800 Subject: [PATCH 3/7] comment update --- src/sentry/tasks/post_process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/tasks/post_process.py b/src/sentry/tasks/post_process.py index 65ae9a59876582..121e43573d01f3 100644 --- a/src/sentry/tasks/post_process.py +++ b/src/sentry/tasks/post_process.py @@ -1662,7 +1662,7 @@ def kick_off_seer_automation(job: PostProcessJob) -> None: # Check if automation has already been queued or completed for this group # seer_autofix_last_triggered is set when trigger_autofix is successfully started. - # Use cache with short TTL to hold lock for a short while since seer_autofix_last_triggered will be set within minutes + # Use cache with short TTL to hold lock for a short since it takes a few minutes to set seer_autofix_last_triggeredes cache_key = f"seer_automation_queued:{group.id}" if cache.get(cache_key) or group.seer_autofix_last_triggered is not None: return From a4560a92a2d8982ac0805877914424af50394422 Mon Sep 17 00:00:00 2001 From: Mihir Mavalankar Date: Thu, 6 Nov 2025 15:49:43 -0800 Subject: [PATCH 4/7] comment update --- src/sentry/tasks/post_process.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sentry/tasks/post_process.py b/src/sentry/tasks/post_process.py index 121e43573d01f3..701fad2b25f73f 100644 --- a/src/sentry/tasks/post_process.py +++ b/src/sentry/tasks/post_process.py @@ -1691,7 +1691,8 @@ def kick_off_seer_automation(job: PostProcessJob) -> None: if is_seer_scanner_rate_limited(project, group.organization): return - # Check cache to prevent race condition since it takes a few minutes to set seer_autofix_last_triggered + # cache.add uses Redis SETNX which atomically sets the key only if it doesn't exist + # Returns False if another process already set the key, ensuring only one process proceeds if not cache.add(cache_key, True, timeout=3600): # 1 hour return From e89e626be5b63d2ff8b7af9d7212b1d93d5e7cc8 Mon Sep 17 00:00:00 2001 From: Mihir Mavalankar Date: Fri, 7 Nov 2025 09:08:02 -0800 Subject: [PATCH 5/7] shorter time in cache and new unit tests --- src/sentry/tasks/post_process.py | 2 +- tests/sentry/tasks/test_post_process.py | 131 ++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/src/sentry/tasks/post_process.py b/src/sentry/tasks/post_process.py index 701fad2b25f73f..ecd762287faadb 100644 --- a/src/sentry/tasks/post_process.py +++ b/src/sentry/tasks/post_process.py @@ -1693,7 +1693,7 @@ def kick_off_seer_automation(job: PostProcessJob) -> None: # cache.add uses Redis SETNX which atomically sets the key only if it doesn't exist # Returns False if another process already set the key, ensuring only one process proceeds - if not cache.add(cache_key, True, timeout=3600): # 1 hour + if not cache.add(cache_key, True, timeout=900): # 15 minutes return start_seer_automation.delay(group.id) diff --git a/tests/sentry/tasks/test_post_process.py b/tests/sentry/tasks/test_post_process.py index 57a52f56793862..6f70c83e1f19b8 100644 --- a/tests/sentry/tasks/test_post_process.py +++ b/tests/sentry/tasks/test_post_process.py @@ -3053,6 +3053,137 @@ def test_kick_off_seer_automation_with_hide_ai_features_enabled( mock_start_seer_automation.assert_not_called() + @patch( + "sentry.seer.seer_setup.get_seer_org_acknowledgement", + return_value=True, + ) + @patch("sentry.tasks.autofix.start_seer_automation.delay") + @with_feature("organizations:gen-ai-features") + def test_kick_off_seer_automation_skips_when_seer_autofix_last_triggered_set( + self, mock_start_seer_automation, mock_get_seer_org_acknowledgement + ): + """Test that automation is skipped when group.seer_autofix_last_triggered is already set""" + self.project.update_option("sentry:seer_scanner_automation", True) + event = self.create_event( + data={"message": "testing"}, + project_id=self.project.id, + ) + + # Set seer_autofix_last_triggered on the group to simulate autofix already triggered + group = event.group + group.seer_autofix_last_triggered = timezone.now() + group.save() + + self.call_post_process_group( + is_new=True, + is_regression=False, + is_new_group_environment=True, + event=event, + ) + + mock_start_seer_automation.assert_not_called() + + @patch( + "sentry.seer.seer_setup.get_seer_org_acknowledgement", + return_value=True, + ) + @patch("sentry.tasks.autofix.start_seer_automation.delay") + @with_feature("organizations:gen-ai-features") + def test_kick_off_seer_automation_skips_when_cache_key_exists( + self, mock_start_seer_automation, mock_get_seer_org_acknowledgement + ): + """Test that automation is skipped when cache key indicates it's already queued""" + self.project.update_option("sentry:seer_scanner_automation", True) + event = self.create_event( + data={"message": "testing"}, + project_id=self.project.id, + ) + + # Set cache key to simulate automation already queued + cache_key = f"seer_automation_queued:{event.group.id}" + cache.set(cache_key, True, timeout=900) + + self.call_post_process_group( + is_new=True, + is_regression=False, + is_new_group_environment=True, + event=event, + ) + + mock_start_seer_automation.assert_not_called() + + # Cleanup + cache.delete(cache_key) + + @patch( + "sentry.seer.seer_setup.get_seer_org_acknowledgement", + return_value=True, + ) + @patch("sentry.tasks.autofix.start_seer_automation.delay") + @with_feature("organizations:gen-ai-features") + def test_kick_off_seer_automation_uses_atomic_cache_add( + self, mock_start_seer_automation, mock_get_seer_org_acknowledgement + ): + """Test that cache.add atomic operation prevents race conditions""" + self.project.update_option("sentry:seer_scanner_automation", True) + event = self.create_event( + data={"message": "testing"}, + project_id=self.project.id, + ) + + cache_key = f"seer_automation_queued:{event.group.id}" + + with patch("sentry.tasks.post_process.cache") as mock_cache: + # Simulate cache.get returning None (not in cache) + # but cache.add returning False (another process set it) + mock_cache.get.return_value = None + mock_cache.add.return_value = False + + self.call_post_process_group( + is_new=True, + is_regression=False, + is_new_group_environment=True, + event=event, + ) + + # Should check cache but not call automation due to cache.add returning False + mock_cache.get.assert_called() + mock_cache.add.assert_called_once_with(cache_key, True, timeout=900) + mock_start_seer_automation.assert_not_called() + + @patch( + "sentry.seer.seer_setup.get_seer_org_acknowledgement", + return_value=True, + ) + @patch("sentry.tasks.autofix.start_seer_automation.delay") + @with_feature("organizations:gen-ai-features") + def test_kick_off_seer_automation_proceeds_when_cache_add_succeeds( + self, mock_start_seer_automation, mock_get_seer_org_acknowledgement + ): + """Test that automation proceeds when cache.add succeeds (no race condition)""" + self.project.update_option("sentry:seer_scanner_automation", True) + event = self.create_event( + data={"message": "testing"}, + project_id=self.project.id, + ) + + # Ensure seer_autofix_last_triggered is not set + assert event.group.seer_autofix_last_triggered is None + + self.call_post_process_group( + is_new=True, + is_regression=False, + is_new_group_environment=True, + event=event, + ) + + # Should successfully queue automation + mock_start_seer_automation.assert_called_once_with(event.group.id) + + # Cleanup + cache_key = f"seer_automation_queued:{event.group.id}" + cache.delete(cache_key) + class PostProcessGroupErrorTest( TestCase, From 5a5a25ba507a2eb4a6e787a406a3eaf0bb511d5a Mon Sep 17 00:00:00 2001 From: Mihir Mavalankar Date: Fri, 7 Nov 2025 11:35:29 -0800 Subject: [PATCH 6/7] shorter time in cache 10 min --- src/sentry/tasks/post_process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/tasks/post_process.py b/src/sentry/tasks/post_process.py index ecd762287faadb..cc64c249c51892 100644 --- a/src/sentry/tasks/post_process.py +++ b/src/sentry/tasks/post_process.py @@ -1693,7 +1693,7 @@ def kick_off_seer_automation(job: PostProcessJob) -> None: # cache.add uses Redis SETNX which atomically sets the key only if it doesn't exist # Returns False if another process already set the key, ensuring only one process proceeds - if not cache.add(cache_key, True, timeout=900): # 15 minutes + if not cache.add(cache_key, True, timeout=600): # 10 minute return start_seer_automation.delay(group.id) From 42b39c03f55850afd8180c3588fc894cffb5bd4b Mon Sep 17 00:00:00 2001 From: Mihir Mavalankar Date: Fri, 7 Nov 2025 12:01:15 -0800 Subject: [PATCH 7/7] fix test --- tests/sentry/tasks/test_post_process.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/sentry/tasks/test_post_process.py b/tests/sentry/tasks/test_post_process.py index 6f70c83e1f19b8..357424102c6e68 100644 --- a/tests/sentry/tasks/test_post_process.py +++ b/tests/sentry/tasks/test_post_process.py @@ -3101,7 +3101,7 @@ def test_kick_off_seer_automation_skips_when_cache_key_exists( # Set cache key to simulate automation already queued cache_key = f"seer_automation_queued:{event.group.id}" - cache.set(cache_key, True, timeout=900) + cache.set(cache_key, True, timeout=600) self.call_post_process_group( is_new=True, @@ -3148,7 +3148,7 @@ def test_kick_off_seer_automation_uses_atomic_cache_add( # Should check cache but not call automation due to cache.add returning False mock_cache.get.assert_called() - mock_cache.add.assert_called_once_with(cache_key, True, timeout=900) + mock_cache.add.assert_called_once_with(cache_key, True, timeout=600) mock_start_seer_automation.assert_not_called() @patch(