ref(explorer): clean up old trace rpcs and auto-select sort field (#103491)

aliu39 · web-flow · commit e3fcb229fbd3 · 2025-11-17T22:36:15.000Z
- Cleans up the trace rpcs which are no longer called (replaced by table_query and timeseries_query). - Guards against a common snuba gotcha where the `sort` field must be present in selected `fields`. Fixes [SEER-6GE](https://sentry.sentry.io/issues/7001246036/events/0062a55360c94e16b7e8fb28f23e5e4d/) - Return only the table data and not the meta, which is unused - Some test renaming and adds a very basic logs sanity check test
diff --git a/src/sentry/seer/endpoints/seer_rpc.py b/src/sentry/seer/endpoints/seer_rpc.py
@@ -88,8 +88,6 @@
 from sentry.seer.explorer.tools import (
     execute_table_query,
     execute_timeseries_query,
-    execute_trace_query_chart,
-    execute_trace_query_table,
     get_issue_details,
     get_replay_metadata,
     get_repository_definition,
@@ -1206,8 +1204,6 @@ def check_repository_integrations_status(*, repository_integrations: list[dict[s
     "get_trace_waterfall": rpc_get_trace_waterfall,
     "get_issue_details": get_issue_details,
     "get_profile_flamegraph": rpc_get_profile_flamegraph,
-    "execute_trace_query_chart": execute_trace_query_chart,
-    "execute_trace_query_table": execute_trace_query_table,
     "execute_table_query": execute_table_query,
     "execute_timeseries_query": execute_timeseries_query,
     "get_trace_item_attributes": get_trace_item_attributes,
diff --git a/src/sentry/seer/explorer/tools.py b/src/sentry/seer/explorer/tools.py
@@ -34,159 +34,15 @@
 logger = logging.getLogger(__name__)
 
 
-def execute_trace_query_chart(
-    *,
-    org_id: int,
-    query: str,
-    stats_period: str,
-    y_axes: list[str],
-    group_by: list[str] | None = None,
-    project_ids: list[int] | None = None,
-) -> dict[str, Any] | None:
-    """
-    Execute a trace query to get chart/timeseries data by calling the events-stats endpoint.
-    """
-    try:
-        organization = Organization.objects.get(id=org_id)
-    except Organization.DoesNotExist:
-        logger.warning("Organization not found", extra={"org_id": org_id})
-        return None
-
-    # Use provided project_ids or get all project IDs for the organization
-    if project_ids is None:
-        project_ids = list(organization.project_set.values_list("id", flat=True))
-        if not project_ids:
-            logger.warning("No projects found for organization", extra={"org_id": org_id})
-            return None
-
-    params: dict[str, Any] = {
-        "query": query,
-        "statsPeriod": stats_period,
-        "yAxis": y_axes,
-        "project": project_ids,
-        "dataset": "spans",
-        "referrer": Referrer.SEER_RPC,
-        "transformAliasToInputFormat": "1",  # Required for RPC datasets
-    }
-
-    # Add group_by if provided (for top events)
-    if group_by and len(group_by) > 0:
-        params["topEvents"] = 5
-        params["field"] = group_by
-        params["excludeOther"] = "0"  # Include "Other" series
-
-    resp = client.get(
-        auth=ApiKey(organization_id=organization.id, scope_list=["org:read", "project:read"]),
-        user=None,
-        path=f"/organizations/{organization.slug}/events-stats/",
-        params=params,
-    )
-    data = resp.data
-
-    # Always normalize to the nested {"metric": {"data": [...]}} format for consistency
-    metric_is_single = len(y_axes) == 1
-    metric_name = y_axes[0] if metric_is_single else None
-    if metric_name and metric_is_single:
-        # Handle grouped data with single metric: wrap each group's data in the metric name
-        if group_by:
-            return {
-                group_value: (
-                    {metric_name: group_data}
-                    if isinstance(group_data, dict) and "data" in group_data
-                    else group_data
-                )
-                for group_value, group_data in data.items()
-            }
-
-        # Handle non-grouped data with single metric: wrap data in the metric name
-        if isinstance(data, dict) and "data" in data:
-            return {metric_name: data}
-
-    return data
-
-
-def execute_trace_query_table(
-    *,
-    org_id: int,
-    query: str,
-    stats_period: str,
-    sort: str,
-    group_by: list[str] | None = None,
-    y_axes: list[str] | None = None,
-    per_page: int = 50,
-    mode: Literal["spans", "aggregates"] = "spans",
-    project_ids: list[int] | None = None,
-) -> dict[str, Any] | None:
-    """
-    Execute a trace query to get table data by calling the events endpoint.
-    """
-    try:
-        organization = Organization.objects.get(id=org_id)
-    except Organization.DoesNotExist:
-        logger.warning("Organization not found", extra={"org_id": org_id})
-        return None
-
-    # Use provided project_ids or get all project IDs for the organization
-    if project_ids is None:
-        project_ids = list(organization.project_set.values_list("id", flat=True))
-        if not project_ids:
-            logger.warning("No projects found for organization", extra={"org_id": org_id})
-            return None
-
-    # Determine fields based on mode
-    if mode == "aggregates":
-        # Aggregates mode: group_by fields + aggregate functions
-        fields = []
-        if group_by:
-            fields.extend(group_by)
-        if y_axes:
-            fields.extend(y_axes)
-    else:
-        # Samples mode: default span fields
-        fields = [
-            "id",
-            "span.op",
-            "span.description",
-            "span.duration",
-            "transaction",
-            "timestamp",
-            "project",
-            "trace",
-        ]
-
-    params: dict[str, Any] = {
-        "query": query,
-        "statsPeriod": stats_period,
-        "field": fields,
-        "sort": sort if sort else ("-timestamp" if not group_by else None),
-        "per_page": per_page,
-        "project": project_ids,
-        "dataset": "spans",
-        "referrer": Referrer.SEER_RPC,
-        "transformAliasToInputFormat": "1",  # Required for RPC datasets
-    }
-
-    # Remove None values
-    params = {k: v for k, v in params.items() if v is not None}
-
-    resp = client.get(
-        auth=ApiKey(organization_id=organization.id, scope_list=["org:read", "project:read"]),
-        user=None,
-        path=f"/organizations/{organization.slug}/events/",
-        params=params,
-    )
-    return resp.data
-
-
 def execute_table_query(
     *,
     org_id: int,
     dataset: str,
     fields: list[str],
-    query: str,
-    sort: str,
     per_page: int,
     stats_period: str,
+    query: str | None = None,
+    sort: str | None = None,
     project_ids: list[int] | None = None,
     project_slugs: list[str] | None = None,
     sampling_mode: SAMPLING_MODES = "NORMAL",
@@ -209,10 +65,16 @@ def execute_table_query(
         project_ids = [ALL_ACCESS_PROJECT_ID]
     # Note if both project_ids and project_slugs are provided, the API request will 400.
 
+    if sort:
+        # Auto-select sort field to avoid snuba errors.
+        sort_field = sort.lstrip("-")
+        if sort_field not in fields:
+            fields.append(sort_field)
+
     params: dict[str, Any] = {
         "dataset": dataset,
         "field": fields,
-        "query": query,
+        "query": query or None,
         "sort": sort if sort else ("-timestamp" if "timestamp" in fields else None),
         "per_page": per_page,
         "statsPeriod": stats_period,
@@ -232,7 +94,7 @@ def execute_table_query(
         path=f"/organizations/{organization.slug}/events/",
         params=params,
     )
-    return resp.data
+    return {"data": resp.data["data"]}
 
 
 def execute_timeseries_query(
diff --git a/tests/sentry/seer/explorer/test_tools.py b/tests/sentry/seer/explorer/test_tools.py