Skip to content

Commit b1cbaa1

Browse files
feat(triage signals): New kick_off_seer_automation flow [feature flagged] (#103676)
## PR Details + I had to revert by last fixability [PR](#103485). I am going to address it separately after this PR - made a ticket for that: https://linear.app/getsentry/issue/AIML-1650/address-fixability-issue-in-the-new-automation-flow + Also mentioned in the TODO comment + This PR just creates the new automation flow for triage signals behind a feature flag. + Reference diagram: https://miro.com/app/board/uXjVJqn1-fQ=/?focusWidget=3458764648325594380 + Also make `run_automation` a public method now since we directly use in a task.
1 parent e7a2b37 commit b1cbaa1

File tree

5 files changed

+425
-71
lines changed

5 files changed

+425
-71
lines changed

src/sentry/seer/autofix/issue_summary.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ def _is_issue_fixable(group: Group, fixability_score: float) -> bool:
290290
return False
291291

292292

293-
def _run_automation(
293+
def run_automation(
294294
group: Group,
295295
user: User | RpcUser | AnonymousUser,
296296
event: GroupEvent,
@@ -402,7 +402,7 @@ def _generate_summary(
402402

403403
if should_run_automation:
404404
try:
405-
_run_automation(group, user, event, source)
405+
run_automation(group, user, event, source)
406406
except Exception:
407407
logger.exception(
408408
"Error auto-triggering autofix from issue summary", extra={"group_id": group.id}

src/sentry/tasks/autofix.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ def check_autofix_status(run_id: int, organization_id: int) -> None:
3131

3232

3333
@instrumented_task(
34-
name="sentry.tasks.autofix.start_seer_automation",
34+
name="sentry.tasks.autofix.generate_summary_and_run_automation",
3535
namespace=ingest_errors_tasks,
3636
processing_deadline_duration=35,
3737
retry=Retry(times=1),
3838
)
39-
def start_seer_automation(group_id: int) -> None:
39+
def generate_summary_and_run_automation(group_id: int) -> None:
4040
from sentry.seer.autofix.issue_summary import get_issue_summary
4141

4242
group = Group.objects.get(id=group_id)
@@ -60,3 +60,33 @@ def generate_issue_summary_only(group_id: int) -> None:
6060
get_issue_summary(
6161
group=group, source=SeerAutomationSource.POST_PROCESS, should_run_automation=False
6262
)
63+
# TODO: Generate fixability score here and check for it in run_automation around line 316
64+
# That will make sure that even after adding fixability here it's not re-triggered.
65+
# Currently fixability will only be generated after 10 events when run_automation is called
66+
67+
68+
@instrumented_task(
69+
name="sentry.tasks.autofix.run_automation_only_task",
70+
namespace=ingest_errors_tasks,
71+
processing_deadline_duration=35,
72+
retry=Retry(times=1),
73+
)
74+
def run_automation_only_task(group_id: int) -> None:
75+
"""
76+
Run automation directly for a group (assumes summary and fixability already exist).
77+
Used for triage signals flow when event count >= 10 and summary exists.
78+
"""
79+
from django.contrib.auth.models import AnonymousUser
80+
81+
from sentry.seer.autofix.issue_summary import run_automation
82+
83+
group = Group.objects.get(id=group_id)
84+
event = group.get_latest_event()
85+
86+
if not event:
87+
logger.warning("run_automation_only_task.no_event_found", extra={"group_id": group_id})
88+
return
89+
90+
run_automation(
91+
group=group, user=AnonymousUser(), event=event, source=SeerAutomationSource.POST_PROCESS
92+
)

src/sentry/tasks/post_process.py

Lines changed: 82 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1595,33 +1595,100 @@ def check_if_flags_sent(job: PostProcessJob) -> None:
15951595

15961596

15971597
def kick_off_seer_automation(job: PostProcessJob) -> None:
1598-
from sentry.seer.autofix.issue_summary import get_issue_summary_lock_key
1598+
from sentry.seer.autofix.constants import AutofixAutomationTuningSettings
1599+
from sentry.seer.autofix.issue_summary import (
1600+
get_issue_summary_cache_key,
1601+
get_issue_summary_lock_key,
1602+
)
15991603
from sentry.seer.autofix.utils import (
16001604
is_issue_eligible_for_seer_automation,
16011605
is_seer_scanner_rate_limited,
16021606
)
1603-
from sentry.tasks.autofix import start_seer_automation
1607+
from sentry.tasks.autofix import (
1608+
generate_issue_summary_only,
1609+
generate_summary_and_run_automation,
1610+
run_automation_only_task,
1611+
)
16041612

16051613
event = job["event"]
16061614
group = event.group
16071615

1608-
# Only run on issues with no existing scan - TODO: Update condition for triage signals V0
1609-
if group.seer_fixability_score is not None:
1610-
return
1616+
# Default behaviour
1617+
if not features.has("projects:triage-signals-v0", group.project):
1618+
# Only run on issues with no existing scan
1619+
if group.seer_fixability_score is not None:
1620+
return
16111621

1612-
if is_issue_eligible_for_seer_automation(group) is False:
1613-
return
1622+
if not is_issue_eligible_for_seer_automation(group):
1623+
return
16141624

1615-
# Don't run if there's already a task in progress for this issue
1616-
lock_key, lock_name = get_issue_summary_lock_key(group.id)
1617-
lock = locks.get(lock_key, duration=1, name=lock_name)
1618-
if lock.locked():
1619-
return
1625+
# Don't run if there's already a task in progress for this issue
1626+
lock_key, lock_name = get_issue_summary_lock_key(group.id)
1627+
lock = locks.get(lock_key, duration=1, name=lock_name)
1628+
if lock.locked():
1629+
return
16201630

1621-
if is_seer_scanner_rate_limited(group.project, group.organization):
1622-
return
1631+
if is_seer_scanner_rate_limited(group.project, group.organization):
1632+
return
1633+
1634+
generate_summary_and_run_automation.delay(group.id)
1635+
else:
1636+
# Triage signals V0 behaviour
1637+
1638+
# If event count < 10, only generate summary (no automation)
1639+
if group.times_seen_with_pending < 10:
1640+
# Check if summary exists in cache
1641+
cache_key = get_issue_summary_cache_key(group.id)
1642+
if cache.get(cache_key) is not None:
1643+
return
1644+
1645+
# Early returns for eligibility checks (cheap checks first)
1646+
if not is_issue_eligible_for_seer_automation(group):
1647+
return
1648+
1649+
# Atomically set cache to prevent duplicate summary generation
1650+
summary_dispatch_cache_key = f"seer-summary-dispatched:{group.id}"
1651+
if not cache.add(summary_dispatch_cache_key, True, timeout=30):
1652+
return # Another process already dispatched summary generation
1653+
1654+
# Rate limit check must be last, after cache.add succeeds, to avoid wasting quota
1655+
if is_seer_scanner_rate_limited(group.project, group.organization):
1656+
return
1657+
1658+
generate_issue_summary_only.delay(group.id)
1659+
else:
1660+
# Event count >= 10: run automation
1661+
# Long-term check to avoid re-running
1662+
if (
1663+
group.seer_autofix_last_triggered is not None
1664+
or group.seer_fixability_score
1665+
is not None # TODO: Remove this once fixability is generated with generate_issue_summary_only
1666+
or group.project.get_option("sentry:autofix_automation_tuning")
1667+
== AutofixAutomationTuningSettings.OFF
1668+
):
1669+
return
1670+
1671+
# Early returns for eligibility checks (cheap checks first)
1672+
if not is_issue_eligible_for_seer_automation(group):
1673+
return
16231674

1624-
start_seer_automation.delay(group.id)
1675+
# Atomically set cache to prevent duplicate dispatches (returns False if key exists)
1676+
automation_dispatch_cache_key = f"seer-automation-dispatched:{group.id}"
1677+
if not cache.add(automation_dispatch_cache_key, True, timeout=300):
1678+
return # Another process already dispatched automation
1679+
1680+
# Check if summary exists in cache
1681+
cache_key = get_issue_summary_cache_key(group.id)
1682+
if cache.get(cache_key) is not None:
1683+
# Summary exists, run automation directly
1684+
run_automation_only_task.delay(group.id)
1685+
else:
1686+
# Rate limit check before generating summary
1687+
if is_seer_scanner_rate_limited(group.project, group.organization):
1688+
return
1689+
1690+
# No summary yet, generate summary + run automation in one go
1691+
generate_summary_and_run_automation.delay(group.id)
16251692

16261693

16271694
GROUP_CATEGORY_POST_PROCESS_PIPELINE = {

tests/sentry/seer/autofix/test_issue_summary.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
_fetch_user_preference,
1919
_get_event,
2020
_get_stopping_point_from_fixability,
21-
_run_automation,
2221
get_issue_summary,
22+
run_automation,
2323
)
2424
from sentry.seer.autofix.utils import AutofixStoppingPoint
2525
from sentry.seer.models import SummarizeIssueResponse, SummarizeIssueScores
@@ -611,7 +611,7 @@ def test_get_issue_summary_with_web_vitals_issue(
611611
mock_trigger_autofix_task.assert_called_once()
612612

613613
@patch("sentry.seer.autofix.issue_summary.get_seer_org_acknowledgement")
614-
@patch("sentry.seer.autofix.issue_summary._run_automation")
614+
@patch("sentry.seer.autofix.issue_summary.run_automation")
615615
@patch("sentry.seer.autofix.issue_summary._get_trace_tree_for_event")
616616
@patch("sentry.seer.autofix.issue_summary._call_seer")
617617
@patch("sentry.seer.autofix.issue_summary._get_event")
@@ -623,7 +623,7 @@ def test_get_issue_summary_continues_when_automation_fails(
623623
mock_run_automation,
624624
mock_get_acknowledgement,
625625
):
626-
"""Test that issue summary is still returned when _run_automation throws an exception."""
626+
"""Test that issue summary is still returned when run_automation throws an exception."""
627627
mock_get_acknowledgement.return_value = True
628628

629629
# Set up event and seer response
@@ -641,7 +641,7 @@ def test_get_issue_summary_continues_when_automation_fails(
641641
)
642642
mock_call_seer.return_value = mock_summary
643643

644-
# Make _run_automation raise an exception
644+
# Make run_automation raise an exception
645645
mock_run_automation.side_effect = Exception("Automation failed")
646646

647647
# Call get_issue_summary and verify it still returns successfully
@@ -652,7 +652,7 @@ def test_get_issue_summary_continues_when_automation_fails(
652652
expected_response["event_id"] = event.event_id
653653
assert summary_data == convert_dict_key_case(expected_response, snake_to_camel_case)
654654

655-
# Verify _run_automation was called and failed
655+
# Verify run_automation was called and failed
656656
mock_run_automation.assert_called_once()
657657
mock_call_seer.assert_called_once()
658658

@@ -681,7 +681,7 @@ def test_get_issue_summary_handles_trace_tree_errors(
681681
possible_cause="cause",
682682
),
683683
) as mock_call_seer,
684-
patch("sentry.seer.autofix.issue_summary._run_automation"),
684+
patch("sentry.seer.autofix.issue_summary.run_automation"),
685685
patch(
686686
"sentry.seer.autofix.issue_summary.get_seer_org_acknowledgement",
687687
return_value=True,
@@ -693,7 +693,7 @@ def test_get_issue_summary_handles_trace_tree_errors(
693693
mock_call_seer.assert_called_once_with(self.group, serialized_event, None)
694694

695695
@patch("sentry.seer.autofix.issue_summary.get_seer_org_acknowledgement")
696-
@patch("sentry.seer.autofix.issue_summary._run_automation")
696+
@patch("sentry.seer.autofix.issue_summary.run_automation")
697697
@patch("sentry.seer.autofix.issue_summary._get_trace_tree_for_event")
698698
@patch("sentry.seer.autofix.issue_summary._call_seer")
699699
@patch("sentry.seer.autofix.issue_summary._get_event")
@@ -705,7 +705,7 @@ def test_get_issue_summary_with_should_run_automation_false(
705705
mock_run_automation,
706706
mock_get_acknowledgement,
707707
):
708-
"""Test that should_run_automation=False prevents _run_automation from being called."""
708+
"""Test that should_run_automation=False prevents run_automation from being called."""
709709
mock_get_acknowledgement.return_value = True
710710
event = Mock(
711711
event_id="test_event_id",
@@ -743,7 +743,7 @@ def test_get_issue_summary_with_should_run_automation_false(
743743
mock_call_seer.assert_called_once_with(self.group, serialized_event, {"trace": "tree"})
744744
mock_get_acknowledgement.assert_called_once_with(self.group.organization)
745745

746-
# Verify that _run_automation was NOT called
746+
# Verify that run_automation was NOT called
747747
mock_run_automation.assert_not_called()
748748

749749
# Check if the cache was set correctly
@@ -798,7 +798,7 @@ def test_high_fixability_code_changes(
798798
possible_cause="c",
799799
scores=SummarizeIssueScores(fixability_score=0.70),
800800
)
801-
_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
801+
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
802802
mock_trigger.assert_called_once()
803803
assert mock_trigger.call_args[1]["stopping_point"] == AutofixStoppingPoint.CODE_CHANGES
804804

@@ -822,7 +822,7 @@ def test_medium_fixability_solution(
822822
possible_cause="c",
823823
scores=SummarizeIssueScores(fixability_score=0.50),
824824
)
825-
_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
825+
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
826826
mock_trigger.assert_called_once()
827827
assert mock_trigger.call_args[1]["stopping_point"] == AutofixStoppingPoint.ROOT_CAUSE
828828

@@ -848,7 +848,7 @@ def test_without_feature_flag(self, mock_gen, mock_budget, mock_state, mock_rate
848848
with self.feature(
849849
{"organizations:gen-ai-features": True, "projects:triage-signals-v0": False}
850850
):
851-
_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
851+
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
852852

853853
mock_trigger.assert_called_once()
854854
assert mock_trigger.call_args[1]["stopping_point"] is None
@@ -1001,7 +1001,7 @@ def test_user_preference_limits_high_fixability(
10011001
)
10021002
mock_fetch.return_value = "solution"
10031003

1004-
_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
1004+
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
10051005

10061006
mock_trigger.assert_called_once()
10071007
# Should be limited to SOLUTION by user preference
@@ -1031,7 +1031,7 @@ def test_fixability_limits_permissive_user_preference(
10311031
)
10321032
mock_fetch.return_value = "open_pr"
10331033

1034-
_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
1034+
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
10351035

10361036
mock_trigger.assert_called_once()
10371037
# Should use ROOT_CAUSE from fixability, not OPEN_PR from user
@@ -1061,7 +1061,7 @@ def test_no_user_preference_uses_fixability_only(
10611061
)
10621062
mock_fetch.return_value = None
10631063

1064-
_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
1064+
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
10651065

10661066
mock_trigger.assert_called_once()
10671067
# Should use OPEN_PR from fixability

0 commit comments

Comments
 (0)