Skip to content

Commit 41f474c

Browse files
committed
ref(grouping): Add training mode for similarity model rollout
Introduce training_mode parameter to send dual embeddings during model upgrades. Centralize model version config and add should_send_new_model_embeddings() to track which groups need new embeddings. Rename feature flag to be version-agnostic.
1 parent 66de818 commit 41f474c

File tree

9 files changed

+407
-36
lines changed

9 files changed

+407
-36
lines changed

src/sentry/event_manager.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@
6565
run_primary_grouping,
6666
)
6767
from sentry.grouping.ingest.metrics import record_hash_calculation_metrics, record_new_group_metrics
68-
from sentry.grouping.ingest.seer import maybe_check_seer_for_matching_grouphash
68+
from sentry.grouping.ingest.seer import (
69+
maybe_check_seer_for_matching_grouphash,
70+
maybe_send_seer_for_new_model_training,
71+
)
6972
from sentry.grouping.ingest.utils import (
7073
add_group_id_to_grouphashes,
7174
check_for_group_creation_load_shed,
@@ -1287,6 +1290,8 @@ def assign_event_to_group(
12871290
if primary.existing_grouphash:
12881291
group_info = handle_existing_grouphash(job, primary.existing_grouphash, primary.grouphashes)
12891292
result = "found_primary"
1293+
# Send new model embedding request if needed for rollout
1294+
maybe_send_seer_for_new_model_training(event, primary.existing_grouphash, primary.variants)
12901295
# If we haven't, try again using the secondary config. (If there is no secondary config, or
12911296
# we're out of the transition period, we'll get back the empty `NULL_GROUPHASH_INFO`.)
12921297
else:
@@ -1298,6 +1303,10 @@ def assign_event_to_group(
12981303
job, secondary.existing_grouphash, all_grouphashes
12991304
)
13001305
result = "found_secondary"
1306+
# Send new model embedding request if needed for rollout
1307+
maybe_send_seer_for_new_model_training(
1308+
event, secondary.existing_grouphash, primary.variants
1309+
)
13011310

13021311
# If we still haven't found a group, ask Seer for a match (if enabled for the event's platform)
13031312
else:

src/sentry/features/temporary.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -626,8 +626,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
626626
manager.add("projects:similarity-embeddings", ProjectFeature, FeatureHandlerStrategy.INTERNAL, default=False, api_expose=True)
627627
manager.add("projects:similarity-indexing", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False)
628628
manager.add("projects:similarity-view", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=True)
629-
# Enable v2 similarity grouping model (part of v2 grouping rollout)
630-
manager.add("projects:similarity-grouping-v2-model", ProjectFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
629+
# Enable new similarity grouping model upgrade (version-agnostic rollout)
630+
manager.add("projects:similarity-grouping-model-upgrade", ProjectFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
631631
# Starfish: extract metrics from the spans
632632
manager.add("projects:span-metrics-extraction", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=True)
633633
manager.add("projects:span-metrics-extraction-addons", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False)

src/sentry/grouping/ingest/seer.py

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66
from django.conf import settings
77
from django.utils import timezone
88

9-
from sentry import features, options
9+
from sentry import options
1010
from sentry import ratelimits as ratelimiter
11-
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
1211
from sentry.grouping.grouping_info import get_grouping_info_from_variants_legacy
1312
from sentry.grouping.ingest.grouphash_metadata import (
1413
check_grouphashes_for_positive_fingerprint_match,
@@ -17,8 +16,9 @@
1716
from sentry.grouping.variants import BaseVariant
1817
from sentry.models.grouphash import GroupHash
1918
from sentry.models.project import Project
19+
from sentry.seer.similarity.config import get_grouping_model_version
2020
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
21-
from sentry.seer.similarity.types import GroupingVersion, SimilarIssuesEmbeddingsRequest
21+
from sentry.seer.similarity.types import SimilarIssuesEmbeddingsRequest
2222
from sentry.seer.similarity.utils import (
2323
SEER_INELIGIBLE_EVENT_PLATFORMS,
2424
ReferrerOptions,
@@ -257,10 +257,17 @@ def get_seer_similar_issues(
257257
event: Event,
258258
event_grouphash: GroupHash,
259259
variants: dict[str, BaseVariant],
260+
training_mode: bool = False,
260261
) -> tuple[float | None, GroupHash | None]:
261262
"""
262263
Ask Seer for the given event's nearest neighbor(s) and return the stacktrace distance and
263264
matching GroupHash of the closest match (if any), or `(None, None)` if no match found.
265+
266+
Args:
267+
event: The event being grouped
268+
event_grouphash: The grouphash for this event
269+
variants: Grouping variants for the event
270+
training_mode: If True, only possibly insert embedding without returning matches
264271
"""
265272
event_hash = event.get_primary_hash()
266273
exception_type = get_path(event.data, "exception", "values", -1, "type")
@@ -272,10 +279,7 @@ def get_seer_similar_issues(
272279
get_stacktrace_string(get_grouping_info_from_variants_legacy(variants)),
273280
)
274281

275-
# Get model configuration from feature flags
276-
use_v2_model = features.has("projects:similarity-grouping-v2-model", event.project)
277-
model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1
278-
training_mode = False # PR #B will add the smart logic
282+
model_version = get_grouping_model_version(event.project)
279283

280284
request_data: SimilarIssuesEmbeddingsRequest = {
281285
"event_id": event.event_id,
@@ -528,6 +532,8 @@ def maybe_check_seer_for_matching_grouphash(
528532

529533
timestamp = timezone.now()
530534

535+
model_version = get_grouping_model_version(event.project)
536+
531537
gh_metadata.update(
532538
# Technically the time of the metadata record creation and the time of the Seer
533539
# request will be some milliseconds apart, but a) the difference isn't meaningful
@@ -541,9 +547,66 @@ def maybe_check_seer_for_matching_grouphash(
541547
date_added=gh_metadata.date_added or timestamp,
542548
seer_date_sent=gh_metadata.date_added or timestamp,
543549
seer_event_sent=event.event_id,
544-
seer_model=SEER_SIMILARITY_MODEL_VERSION,
550+
seer_model=model_version.value,
545551
seer_matched_grouphash=seer_matched_grouphash,
546552
seer_match_distance=seer_match_distance,
547553
)
548554

549555
return seer_matched_grouphash
556+
557+
558+
@sentry_sdk.tracing.trace
559+
def maybe_send_seer_for_new_model_training(
560+
event: Event,
561+
existing_grouphash: GroupHash,
562+
variants: dict[str, BaseVariant],
563+
) -> None:
564+
"""
565+
Send a training_mode=true request to Seer to build embeddings for the new model
566+
version if the existing grouphash hasn't been sent to the new version yet.
567+
568+
This only happens for projects that have the new model rolled out. It helps
569+
build embeddings for existing groups without affecting production grouping decisions.
570+
571+
Args:
572+
event: The event being grouped
573+
existing_grouphash: The grouphash that was found for this event
574+
variants: Grouping variants for the event
575+
"""
576+
from sentry.seer.similarity.config import should_send_new_model_embeddings
577+
578+
# Check if we should send embeddings for the new model
579+
gh_metadata = existing_grouphash.metadata
580+
grouphash_seer_model = gh_metadata.seer_model if gh_metadata else None
581+
582+
if should_send_new_model_embeddings(event.project, grouphash_seer_model):
583+
had_metadata = gh_metadata is not None
584+
# Send training mode request (honor all checks like rate limits, circuit breaker, etc.)
585+
if should_call_seer_for_grouping(event, variants, existing_grouphash):
586+
record_did_call_seer_metric(event, call_made=True, blocker="none")
587+
588+
try:
589+
# Call Seer with training_mode=True (results won't be used for grouping)
590+
get_seer_similar_issues(event, existing_grouphash, variants, training_mode=True)
591+
592+
# Record metrics for new model embedding requests
593+
metrics.incr(
594+
"seer.new_model_embedding_request",
595+
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
596+
tags={
597+
"platform": event.platform or "unknown",
598+
"had_metadata": had_metadata,
599+
},
600+
)
601+
except Exception as e:
602+
sentry_sdk.capture_exception(
603+
e,
604+
tags={
605+
"event": event.event_id,
606+
"project": event.project.id,
607+
"grouphash": existing_grouphash.hash,
608+
},
609+
)
610+
else:
611+
# Not eligible for Seer call (e.g., rate limited, killswitch enabled)
612+
record_did_call_seer_metric(event, call_made=False, blocker="new_model_embedding_check")

src/sentry/issues/endpoints/group_similar_issues_embeddings.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from rest_framework.request import Request
77
from rest_framework.response import Response
88

9-
from sentry import analytics, features, options
9+
from sentry import analytics, options
1010
from sentry.api.analytics import GroupSimilarIssuesEmbeddingsCountEvent
1111
from sentry.api.api_owners import ApiOwner
1212
from sentry.api.api_publish_status import ApiPublishStatus
@@ -16,12 +16,9 @@
1616
from sentry.issues.endpoints.bases.group import GroupEndpoint
1717
from sentry.models.group import Group
1818
from sentry.models.grouphash import GroupHash
19+
from sentry.seer.similarity.config import get_grouping_model_version
1920
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
20-
from sentry.seer.similarity.types import (
21-
GroupingVersion,
22-
SeerSimilarIssueData,
23-
SimilarIssuesEmbeddingsRequest,
24-
)
21+
from sentry.seer.similarity.types import SeerSimilarIssueData, SimilarIssuesEmbeddingsRequest
2522
from sentry.seer.similarity.utils import (
2623
ReferrerOptions,
2724
event_content_has_stacktrace,
@@ -104,10 +101,7 @@ def get(self, request: Request, group: Group) -> Response:
104101
if not stacktrace_string or not latest_event:
105102
return Response([]) # No exception, stacktrace or in-app frames, or event
106103

107-
# Get model configuration from feature flags
108-
use_v2_model = features.has("projects:similarity-grouping-v2-model", group.project)
109-
model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1
110-
training_mode = False # TODO: currently hardcoded, follow up PR will add the logic
104+
model_version = get_grouping_model_version(group.project)
111105

112106
similar_issues_params: SimilarIssuesEmbeddingsRequest = {
113107
"event_id": latest_event.event_id,
@@ -119,7 +113,7 @@ def get(self, request: Request, group: Group) -> Response:
119113
"referrer": "similar_issues",
120114
"use_reranking": options.get("seer.similarity.similar_issues.use_reranking"),
121115
"model": model_version,
122-
"training_mode": training_mode,
116+
"training_mode": False,
123117
}
124118
# Add optional parameters
125119
if request.GET.get("k"):
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""
2+
Configuration for similarity grouping model versions.
3+
4+
This module defines which model versions are used for similarity grouping
5+
and provides helper functions for determining the appropriate version to use.
6+
"""
7+
8+
from sentry import features
9+
from sentry.models.project import Project
10+
from sentry.seer.similarity.types import GroupingVersion
11+
12+
# Stable model version - used for ALL requests for non-rolled-out projects
13+
SEER_GROUPING_STABLE_VERSION = GroupingVersion.V1
14+
15+
# New model version being rolled out
16+
# - Rolled-out projects: Use this for ALL requests (both grouping and embeddings)
17+
# - Non-rolled-out projects: Never use this (use stable version for everything)
18+
# Set to None to disable rollout entirely
19+
SEER_GROUPING_NEW_VERSION = GroupingVersion.V2
20+
21+
# Feature flag name (version-agnostic)
22+
SEER_GROUPING_NEW_MODEL_ROLLOUT_FEATURE = "projects:similarity-grouping-model-upgrade"
23+
24+
25+
def get_grouping_model_version(project: Project) -> GroupingVersion:
26+
"""
27+
Get the model version to use for grouping decisions for this project.
28+
29+
Returns:
30+
- New version if rollout is enabled for this project
31+
- Stable version otherwise
32+
"""
33+
if is_new_model_rolled_out(project):
34+
return SEER_GROUPING_NEW_VERSION
35+
return SEER_GROUPING_STABLE_VERSION
36+
37+
38+
def is_new_model_rolled_out(project: Project) -> bool:
39+
"""
40+
Check if the new model version is rolled out for this project.
41+
42+
Returns False if:
43+
- No new version is configured (rollout disabled globally)
44+
- Feature flag is not enabled for this project
45+
"""
46+
if SEER_GROUPING_NEW_VERSION is None:
47+
return False
48+
49+
return features.has(SEER_GROUPING_NEW_MODEL_ROLLOUT_FEATURE, project)
50+
51+
52+
def get_new_model_version() -> GroupingVersion | None:
53+
"""
54+
Get the new model version being rolled out, if any.
55+
Returns None if no rollout is in progress.
56+
"""
57+
return SEER_GROUPING_NEW_VERSION
58+
59+
60+
def should_send_new_model_embeddings(
61+
project: Project,
62+
grouphash_seer_model: str | None,
63+
) -> bool:
64+
"""
65+
Check if we should send training_mode=true request to build embeddings
66+
for the new model version for an existing group.
67+
68+
This is true when:
69+
1. A new version is being rolled out
70+
2. The project has the rollout feature enabled
71+
3. The grouphash hasn't been sent to the new version yet
72+
73+
Args:
74+
project: The project
75+
grouphash_seer_model: The seer_model value from grouphash metadata
76+
77+
Returns:
78+
True if we should send a training_mode=true request
79+
"""
80+
new_version = get_new_model_version()
81+
if new_version is None:
82+
# No rollout in progress
83+
return False
84+
85+
if not is_new_model_rolled_out(project):
86+
# Rollout not enabled for this project
87+
return False
88+
89+
if grouphash_seer_model is None:
90+
# Never sent to Seer at all
91+
return True
92+
93+
# Check if it was sent to the new version
94+
return grouphash_seer_model != new_version.value

src/sentry/tasks/embeddings_grouping/utils.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@
1111
from google.api_core.exceptions import DeadlineExceeded, ServiceUnavailable
1212
from snuba_sdk import Column, Condition, Entity, Limit, Op, Query, Request
1313

14-
from sentry import features, nodestore, options
14+
from sentry import nodestore, options
1515
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
1616
from sentry.grouping.grouping_info import get_grouping_info_from_variants_legacy
1717
from sentry.grouping.grouptype import ErrorGroupType
1818
from sentry.models.group import Group, GroupStatus
1919
from sentry.models.project import Project
20+
from sentry.seer.similarity.config import get_grouping_model_version
2021
from sentry.seer.similarity.grouping_records import (
2122
BulkCreateGroupingRecordsResponse,
2223
CreateGroupingRecordData,
@@ -25,7 +26,6 @@
2526
post_bulk_grouping_records,
2627
)
2728
from sentry.seer.similarity.types import (
28-
GroupingVersion,
2929
IncompleteSeerDataError,
3030
SeerSimilarIssueData,
3131
SimilarHashMissingGroupError,
@@ -492,11 +492,9 @@ def send_group_and_stacktrace_to_seer(
492492
f"{BACKFILL_NAME}.send_group_and_stacktrace_to_seer",
493493
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
494494
):
495-
# Get model configuration from feature flags
496495
project = Project.objects.get_from_cache(id=project_id)
497-
use_v2_model = features.has("projects:similarity-grouping-v2-model", project)
498-
model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1
499-
training_mode = False # TODO: currently hardcoded, follow up PR will add the logic
496+
model_version = get_grouping_model_version(project)
497+
training_mode = False # Backfill always uses production mode
500498

501499
return _make_seer_call(
502500
CreateGroupingRecordsRequest(
@@ -517,11 +515,9 @@ def send_group_and_stacktrace_to_seer_multithreaded(
517515
nodestore_results,
518516
project_id,
519517
):
520-
# Get model configuration from feature flags
521518
project = Project.objects.get_from_cache(id=project_id)
522-
use_v2_model = features.has("projects:similarity-grouping-v2-model", project)
523-
model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1
524-
training_mode = False # TODO: currently hardcoded, follow up PR will add the logic
519+
model_version = get_grouping_model_version(project)
520+
training_mode = False # Backfill always uses production mode
525521

526522
def process_chunk(chunk_data, chunk_stacktrace):
527523
return _make_seer_call(

tests/sentry/event_manager/grouping/test_seer_grouping.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@
33
from typing import Any
44
from unittest.mock import MagicMock, patch
55

6-
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
76
from sentry.grouping.ingest.grouphash_metadata import create_or_update_grouphash_metadata_if_needed
87
from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping
98
from sentry.models.grouphash import GroupHash
10-
from sentry.seer.similarity.types import SeerSimilarIssueData
9+
from sentry.seer.similarity.types import GroupingVersion, SeerSimilarIssueData
1110
from sentry.testutils.cases import TestCase
1211
from sentry.testutils.helpers.eventprocessing import save_new_event
1312
from sentry.testutils.pytest.mocking import capture_results
@@ -205,7 +204,7 @@ def test_group_with_no_seer_match(self, _: MagicMock) -> None:
205204
event_grouphash,
206205
event_grouphash.metadata.date_added,
207206
event.event_id,
208-
SEER_SIMILARITY_MODEL_VERSION,
207+
GroupingVersion.V1.value,
209208
None,
210209
None,
211210
)
@@ -248,7 +247,7 @@ def test_group_with_seer_match(self, _: MagicMock) -> None:
248247
new_event_grouphash,
249248
new_event_grouphash.metadata.date_added,
250249
new_event.event_id,
251-
SEER_SIMILARITY_MODEL_VERSION,
250+
GroupingVersion.V1.value,
252251
existing_event_grouphash,
253252
seer_result_data.stacktrace_distance,
254253
)

0 commit comments

Comments
 (0)