diff --git a/snuba/datasets/deletion_settings.py b/snuba/datasets/deletion_settings.py index 144ba59819e..c08e6ada61f 100644 --- a/snuba/datasets/deletion_settings.py +++ b/snuba/datasets/deletion_settings.py @@ -3,6 +3,8 @@ from dataclasses import dataclass, field from typing import Dict, List, Sequence +from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType + MAX_ROWS_TO_DELETE_DEFAULT = 100000 @@ -14,3 +16,38 @@ class DeletionSettings: allowed_columns: Sequence[str] = field(default_factory=list) max_rows_to_delete: int = MAX_ROWS_TO_DELETE_DEFAULT allowed_attributes_by_item_type: Dict[str, List[str]] = field(default_factory=dict) + + +def get_trace_item_type_name(item_type: int) -> str: + """ + Get the string name for a TraceItemType enum value. + + Uses the protobuf enum's Name() method and strips the "TRACE_ITEM_TYPE_" prefix, + then converts to lowercase to match storage configuration naming conventions. + + Args: + item_type: The integer value of the TraceItemType enum + + Returns: + The string name used in storage configurations (e.g., "occurrence", "span") + + Raises: + ValueError: If the item_type is not a valid TraceItemType enum value + """ + try: + # Get the full protobuf enum name (e.g., "TRACE_ITEM_TYPE_SPAN") + # Cast to TraceItemType.ValueType to satisfy type checker + full_name = TraceItemType.Name(item_type) # type: ignore[arg-type] + + # Strip the "TRACE_ITEM_TYPE_" prefix and convert to lowercase + prefix = "TRACE_ITEM_TYPE_" + if not full_name.startswith(prefix): + raise ValueError(f"Unexpected TraceItemType name format: {full_name}") + + return full_name[len(prefix) :].lower() + except ValueError as e: + # This happens when item_type is not a valid enum value + raise ValueError( + f"Unknown TraceItemType value: {item_type}. " + "Must be a valid TraceItemType enum value." + ) from e diff --git a/snuba/lw_deletions/formatters.py b/snuba/lw_deletions/formatters.py index d0ce6fe1700..66602c20950 100644 --- a/snuba/lw_deletions/formatters.py +++ b/snuba/lw_deletions/formatters.py @@ -1,8 +1,9 @@ from abc import ABC, abstractmethod from collections import defaultdict -from typing import Mapping, MutableMapping, Sequence, Type +from typing import List, Mapping, MutableMapping, Sequence, Type from snuba.datasets.storages.storage_key import StorageKey +from snuba.utils.hashes import fnv_1a from snuba.web.bulk_delete_query import DeleteQueryMessage from snuba.web.delete_query import ConditionsType @@ -54,14 +55,70 @@ def format(self, messages: Sequence[DeleteQueryMessage]) -> Sequence[ConditionsT ] -class IdentityFormatter(Formatter): +class EAPItemsFormatter(Formatter): + # Number of attribute buckets used in eap_items storage + # TODO: find a way to wire this in from a canonical source + NUM_ATTRIBUTE_BUCKETS = 40 + def format(self, messages: Sequence[DeleteQueryMessage]) -> Sequence[ConditionsType]: - return [msg["conditions"] for msg in messages] + """ + For eap_items storage, we need to resolve attribute_conditions to their + appropriate column names based on type: + - int/bool: Single columns (no bucketing) like attributes_int['key'] or attributes_bool['key'] + - string/float: Hash-bucketed columns like attributes_string_0['key'] or attributes_float_23['key'] + + For example, if attribute_conditions has {"group_id": [123]}, we need to: + 1. Determine the type based on the values (int in this case) + 2. Since int doesn't use bucketing, add it as attributes_int['group_id'] = [123] + """ + formatted_conditions: List[ConditionsType] = [] + + for message in messages: + conditions = dict(message["conditions"]) + + # Process attribute_conditions if present + if "attribute_conditions" in message and message["attribute_conditions"]: + attribute_conditions = message["attribute_conditions"] + + # For each attribute, determine its type and bucket (if applicable) + for attr_name, attr_values in attribute_conditions.items(): + if not attr_values: + continue + + # Determine the attribute type from the first value + # All values in the list should be of the same type + first_value = attr_values[0] + if isinstance(first_value, bool): + # Check bool before int since bool is a subclass of int in Python + attr_type = "bool" + elif isinstance(first_value, int): + attr_type = "int" + elif isinstance(first_value, float): + attr_type = "float" + else: + # Default to string for str and any other type + attr_type = "string" + + # Only string and float attributes use bucketing + # int and bool attributes are stored in single columns + if attr_type in ("int", "bool"): + # No bucketing for int and bool + bucketed_column = f"attributes_{attr_type}['{attr_name}']" + else: + # Bucketing for string and float + bucket_idx = fnv_1a(attr_name.encode("utf-8")) % self.NUM_ATTRIBUTE_BUCKETS + bucketed_column = f"attributes_{attr_type}_{bucket_idx}['{attr_name}']" + + conditions[bucketed_column] = attr_values + + formatted_conditions.append(conditions) + + return formatted_conditions STORAGE_FORMATTER: Mapping[str, Type[Formatter]] = { StorageKey.SEARCH_ISSUES.value: SearchIssuesFormatter, # TODO: We will probably do something more sophisticated here in the future # but it won't make much of a difference until we support delete by attribute - StorageKey.EAP_ITEMS.value: IdentityFormatter, + StorageKey.EAP_ITEMS.value: EAPItemsFormatter, } diff --git a/snuba/web/bulk_delete_query.py b/snuba/web/bulk_delete_query.py index 89aa730d0b3..b115a15f70b 100644 --- a/snuba/web/bulk_delete_query.py +++ b/snuba/web/bulk_delete_query.py @@ -22,7 +22,7 @@ from snuba.attribution.attribution_info import AttributionInfo from snuba.clickhouse.columns import ColumnSet from snuba.clickhouse.query import Query -from snuba.datasets.deletion_settings import DeletionSettings +from snuba.datasets.deletion_settings import DeletionSettings, get_trace_item_type_name from snuba.datasets.storage import WritableTableStorage from snuba.datasets.storages.storage_key import StorageKey from snuba.query.conditions import combine_or_conditions @@ -31,7 +31,7 @@ from snuba.query.exceptions import InvalidQueryException, NoRowsToDeleteException from snuba.query.expressions import Expression from snuba.reader import Result -from snuba.state import get_str_config +from snuba.state import get_int_config, get_str_config from snuba.utils.metrics.util import with_span from snuba.utils.metrics.wrapper import MetricsWrapper from snuba.utils.schemas import ColumnValidator, InvalidColumnType @@ -56,11 +56,13 @@ class AttributeConditions: attributes: Dict[str, List[Any]] -class DeleteQueryMessage(TypedDict): +class DeleteQueryMessage(TypedDict, total=False): rows_to_delete: int storage_name: str conditions: ConditionsType tenant_ids: Mapping[str, str | int] + attribute_conditions: Optional[Dict[str, List[Any]]] + attribute_conditions_item_type: Optional[int] PRODUCER_MAP: MutableMapping[str, Producer] = {} @@ -155,38 +157,36 @@ def _validate_attribute_conditions( InvalidQueryException: If no attributes are configured for the item_type, or if any requested attributes are not allowed """ - # Get the string name for the item_type from the configuration - # The configuration uses string names (e.g., "occurrence") as keys allowed_attrs_config = delete_settings.allowed_attributes_by_item_type if not allowed_attrs_config: raise InvalidQueryException("No attribute-based deletions configured for this storage") - # Check if the item_type has any allowed attributes configured - # Since the config uses string names and we're given an integer, we need to find the matching config - # For now, we'll check all configured item types and validate against any that match - - # Try to find a matching configuration by item_type name - matching_allowed_attrs = None - for configured_item_type, allowed_attrs in allowed_attrs_config.items(): - # For this initial implementation, we'll use the string key directly - # In the future, we might need a mapping from item_type int to string name - matching_allowed_attrs = allowed_attrs - break # For now, assume the first/only configured type + # Map the integer item_type to its string name used in configuration + try: + item_type_name = get_trace_item_type_name(attribute_conditions.item_type) + except ValueError as e: + raise InvalidQueryException(str(e)) - if matching_allowed_attrs is None: + # Check if this specific item_type has any allowed attributes configured + if item_type_name not in allowed_attrs_config: raise InvalidQueryException( - f"No attribute-based deletions configured for item_type {attribute_conditions.item_type}" + f"No attribute-based deletions configured for item_type {item_type_name} " + f"(value: {attribute_conditions.item_type}). Configured item types: " + f"{sorted(allowed_attrs_config.keys())}" ) + # Get the allowed attributes for this specific item_type + allowed_attrs = allowed_attrs_config[item_type_name] + # Validate that all requested attributes are allowed requested_attrs = set(attribute_conditions.attributes.keys()) - allowed_attrs_set = set(matching_allowed_attrs) + allowed_attrs_set = set(allowed_attrs) invalid_attrs = requested_attrs - allowed_attrs_set if invalid_attrs: raise InvalidQueryException( - f"Invalid attributes for deletion: {invalid_attrs}. " + f"Invalid attributes for deletion on item_type '{item_type_name}': {invalid_attrs}. " f"Allowed attributes: {allowed_attrs_set}" ) @@ -234,15 +234,18 @@ def delete_from_storage( # validate attribute conditions if provided if attribute_conditions: _validate_attribute_conditions(attribute_conditions, delete_settings) - logger.error( - "valid attribute_conditions passed to delete_from_storage, but delete will be ignored " - "as functionality is not yet implemented" - ) - # deleting by just conditions and ignoring attribute_conditions would be dangerous - return {} + + if not get_int_config("permit_delete_by_attribute", default=0): + logger.error( + "valid attribute_conditions passed to delete_from_storage, but delete will be ignored " + "as functionality is not yet launched (permit_delete_by_attribute=0)" + ) + return {} attr_info = _get_attribution_info(attribution_info) - return delete_from_tables(storage, delete_settings.tables, conditions, attr_info) + return delete_from_tables( + storage, delete_settings.tables, conditions, attr_info, attribute_conditions + ) def construct_query(storage: WritableTableStorage, table: str, condition: Expression) -> Query: @@ -266,8 +269,8 @@ def delete_from_tables( tables: Sequence[str], conditions: Dict[str, Any], attribution_info: AttributionInfo, + attribute_conditions: Optional[AttributeConditions] = None, ) -> dict[str, Result]: - highest_rows_to_delete = 0 result: dict[str, Result] = {} for table in tables: @@ -293,6 +296,12 @@ def delete_from_tables( "conditions": conditions, "tenant_ids": attribution_info.tenant_ids, } + + # Add attribute_conditions to the message if present + if attribute_conditions: + delete_query["attribute_conditions"] = attribute_conditions.attributes + delete_query["attribute_conditions_item_type"] = attribute_conditions.item_type + produce_delete_query(delete_query) return result diff --git a/snuba/web/delete_query.py b/snuba/web/delete_query.py index 4679043930d..96ed988c755 100644 --- a/snuba/web/delete_query.py +++ b/snuba/web/delete_query.py @@ -1,3 +1,4 @@ +import re import typing import uuid from typing import Any, Mapping, MutableMapping, Optional, Sequence @@ -27,7 +28,13 @@ NoRowsToDeleteException, TooManyDeleteRowsException, ) -from snuba.query.expressions import Expression, FunctionCall +from snuba.query.expressions import ( + Column, + Expression, + FunctionCall, + Literal, + SubscriptableReference, +) from snuba.query.query_settings import HTTPQuerySettings from snuba.reader import Result from snuba.state import get_config, get_int_config @@ -354,12 +361,39 @@ def _execute_query( def _construct_condition(columns: ConditionsType) -> Expression: and_conditions = [] for col, values in columns.items(): + # Check if this is a map access pattern like "attributes_string_0['group_id']" + col_expr = _parse_column_expression(col) + if len(values) == 1: - exp = equals(column(col), literal(values[0])) + exp = equals(col_expr, literal(values[0])) else: literal_values = [literal(v) for v in values] - exp = in_cond(column(col), literals_tuple(alias=None, literals=literal_values)) + exp = in_cond(col_expr, literals_tuple(alias=None, literals=literal_values)) and_conditions.append(exp) return combine_and_conditions(and_conditions) + + +def _parse_column_expression(col_name: str) -> Expression: + """ + Parse a column name that might include map access notation. + + Examples: + "project_id" -> Column("project_id") + "attributes_string_0['group_id']" -> SubscriptableReference(Column("attributes_string_0"), Literal("group_id")) + """ + # Pattern to match "column_name['key']" or 'column_name["key"]' + match = re.match(r"^([a-zA-Z_][a-zA-Z0-9_]*)\[(['\"])(.+?)\2\]$", col_name) + + if match: + base_column_name = match.group(1) + key_name = match.group(3) + return SubscriptableReference( + alias=None, + column=Column(alias=None, table_name=None, column_name=base_column_name), + key=Literal(alias=None, value=key_name), + ) + else: + # Regular column + return column(col_name) diff --git a/tests/datasets/test_deletion_settings.py b/tests/datasets/test_deletion_settings.py new file mode 100644 index 00000000000..d596493393e --- /dev/null +++ b/tests/datasets/test_deletion_settings.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import pytest +from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType + +from snuba.datasets.deletion_settings import get_trace_item_type_name + + +def test_get_trace_item_type_name_valid() -> None: + assert get_trace_item_type_name(TraceItemType.TRACE_ITEM_TYPE_UNSPECIFIED) == "unspecified" + assert get_trace_item_type_name(TraceItemType.TRACE_ITEM_TYPE_SPAN) == "span" + assert get_trace_item_type_name(TraceItemType.TRACE_ITEM_TYPE_ERROR) == "error" + assert get_trace_item_type_name(TraceItemType.TRACE_ITEM_TYPE_LOG) == "log" + assert get_trace_item_type_name(TraceItemType.TRACE_ITEM_TYPE_UPTIME_CHECK) == "uptime_check" + assert get_trace_item_type_name(TraceItemType.TRACE_ITEM_TYPE_UPTIME_RESULT) == "uptime_result" + assert get_trace_item_type_name(TraceItemType.TRACE_ITEM_TYPE_REPLAY) == "replay" + assert get_trace_item_type_name(TraceItemType.TRACE_ITEM_TYPE_OCCURRENCE) == "occurrence" + assert get_trace_item_type_name(TraceItemType.TRACE_ITEM_TYPE_METRIC) == "metric" + assert ( + get_trace_item_type_name(TraceItemType.TRACE_ITEM_TYPE_PROFILE_FUNCTION) + == "profile_function" + ) + + +def test_get_trace_item_type_name_by_integer() -> None: + assert get_trace_item_type_name(0) == "unspecified" + assert get_trace_item_type_name(1) == "span" + assert get_trace_item_type_name(7) == "occurrence" + + +def test_get_trace_item_type_name_invalid() -> None: + with pytest.raises(ValueError, match="Unknown TraceItemType value: 999"): + get_trace_item_type_name(999) + + with pytest.raises(ValueError, match="Unknown TraceItemType value: -1"): + get_trace_item_type_name(-1) diff --git a/tests/lw_deletions/test_formatters.py b/tests/lw_deletions/test_formatters.py index 47acd89b43e..029a18a78bf 100644 --- a/tests/lw_deletions/test_formatters.py +++ b/tests/lw_deletions/test_formatters.py @@ -1,23 +1,32 @@ -from typing import Sequence, Type +from typing import Any, Sequence, Type import pytest from snuba.lw_deletions.formatters import ( + EAPItemsFormatter, Formatter, - IdentityFormatter, SearchIssuesFormatter, ) +from snuba.utils.hashes import fnv_1a from snuba.web.bulk_delete_query import DeleteQueryMessage from snuba.web.delete_query import ConditionsType -def create_delete_query_message(conditions: ConditionsType) -> DeleteQueryMessage: - return DeleteQueryMessage( +def create_delete_query_message( + conditions: ConditionsType, + attribute_conditions: dict[str, list[Any]] | None = None, + attribute_conditions_item_type: int | None = None, +) -> DeleteQueryMessage: + msg = DeleteQueryMessage( rows_to_delete=1, tenant_ids={}, conditions=conditions, storage_name="search_issues", ) + if attribute_conditions is not None and attribute_conditions_item_type is not None: + msg["attribute_conditions"] = attribute_conditions + msg["attribute_conditions_item_type"] = attribute_conditions_item_type + return msg SEARCH_ISSUES_FORMATTER = SearchIssuesFormatter @@ -83,15 +92,103 @@ def test_search_issues_formatter( {"project_id": [1], "trace_id": [1, 2, 3]}, {"project_id": [1], "trace_id": [4, 5, 6]}, ], - IdentityFormatter, + EAPItemsFormatter, id="identity does basically nothing", ), ], ) -def test_identity_formatter( +def test_eap_items_formatter_identity_conditions( messages: Sequence[DeleteQueryMessage], expected_formatted: Sequence[ConditionsType], formatter: Type[Formatter], ) -> None: formatted = formatter().format(messages) assert formatted == expected_formatted + + +def test_eap_items_formatter_with_attribute_conditions() -> None: + # Create a message with attribute_conditions (integer values) + messages = [ + create_delete_query_message( + conditions={"project_id": [1], "item_type": [1]}, + attribute_conditions={"group_id": [12345, 67890]}, + attribute_conditions_item_type=1, + ) + ] + + formatter = EAPItemsFormatter() + formatted = formatter.format(messages) + + # Since values are integers, should use attributes_int (no bucketing) + expected_column = "attributes_int['group_id']" + + assert len(formatted) == 1 + assert formatted[0]["project_id"] == [1] + assert formatted[0]["item_type"] == [1] + assert formatted[0][expected_column] == [12345, 67890] + + +def test_eap_items_formatter_multiple_attributes() -> None: + messages = [ + create_delete_query_message( + conditions={"project_id": [1], "item_type": [1]}, + attribute_conditions={ + "group_id": [12345], # int (no bucketing) + "transaction": ["test_transaction"], # string (bucketing) + }, + attribute_conditions_item_type=1, + ) + ] + + formatter = EAPItemsFormatter() + formatted = formatter.format(messages) + + # Calculate expected bucket for transaction (string uses bucketing) + transaction_bucket = fnv_1a("transaction".encode("utf-8")) % 40 + + # group_id is int (no bucketing), transaction is string (with bucketing) + expected_group_id_column = "attributes_int['group_id']" + expected_transaction_column = f"attributes_string_{transaction_bucket}['transaction']" + + assert len(formatted) == 1 + assert formatted[0][expected_group_id_column] == [12345] + assert formatted[0][expected_transaction_column] == ["test_transaction"] + + +def test_eap_items_formatter_with_float_attributes() -> None: + messages = [ + create_delete_query_message( + conditions={"project_id": [1], "item_type": [1]}, + attribute_conditions={"duration": [123.45, 678.90]}, + attribute_conditions_item_type=1, + ) + ] + + formatter = EAPItemsFormatter() + formatted = formatter.format(messages) + + # Calculate the expected bucket for "duration" + expected_bucket = fnv_1a("duration".encode("utf-8")) % 40 + expected_column = f"attributes_float_{expected_bucket}['duration']" + + assert len(formatted) == 1 + assert formatted[0][expected_column] == [123.45, 678.90] + + +def test_eap_items_formatter_with_bool_attributes() -> None: + messages = [ + create_delete_query_message( + conditions={"project_id": [1], "item_type": [1]}, + attribute_conditions={"is_error": [True, False]}, + attribute_conditions_item_type=1, + ) + ] + + formatter = EAPItemsFormatter() + formatted = formatter.format(messages) + + # Bool attributes don't use bucketing + expected_column = "attributes_bool['is_error']" + + assert len(formatted) == 1 + assert formatted[0][expected_column] == [True, False] diff --git a/tests/web/test_bulk_delete_query.py b/tests/web/test_bulk_delete_query.py index 029ef97ceef..1bbbf0e61f2 100644 --- a/tests/web/test_bulk_delete_query.py +++ b/tests/web/test_bulk_delete_query.py @@ -20,6 +20,10 @@ from snuba.web.bulk_delete_query import AttributeConditions, delete_from_storage from snuba.web.delete_query import DeletesNotEnabledError +# TraceItemType values from sentry_protos +TRACE_ITEM_TYPE_SPAN = 1 +TRACE_ITEM_TYPE_OCCURRENCE = 7 + CONSUMER_CONFIG = { "bootstrap.servers": settings.BROKER_CONFIG["bootstrap.servers"], "group.id": "lwd-search-issues", @@ -137,26 +141,55 @@ def test_delete_invalid_column_name() -> None: @pytest.mark.redis_db -def test_attribute_conditions_valid() -> None: - """Test that valid attribute_conditions are accepted for eap_items storage""" +def test_attribute_conditions_invalid_item_type() -> None: + """Test that attribute_conditions with wrong item_type (span instead of occurrence) are rejected""" + storage = get_writable_storage(StorageKey("eap_items")) + conditions = {"project_id": [1], "item_type": [TRACE_ITEM_TYPE_SPAN]} + # Using span (1) but config only allows occurrence (7) + attribute_conditions = AttributeConditions( + item_type=TRACE_ITEM_TYPE_SPAN, attributes={"group_id": [12345]} + ) + attr_info = get_attribution_info() + + with pytest.raises( + InvalidQueryException, + match="No attribute-based deletions configured for item_type span", + ): + delete_from_storage(storage, conditions, attr_info, attribute_conditions) + + +@pytest.mark.redis_db +def test_attribute_conditions_valid_occurrence() -> None: + """Test that valid attribute_conditions are accepted for occurrence item_type""" storage = get_writable_storage(StorageKey("eap_items")) - conditions = {"project_id": [1], "item_type": [1]} - attribute_conditions = AttributeConditions(item_type=1, attributes={"group_id": [12345]}) + conditions = {"project_id": [1], "item_type": [TRACE_ITEM_TYPE_OCCURRENCE]} + attribute_conditions = AttributeConditions( + item_type=TRACE_ITEM_TYPE_OCCURRENCE, attributes={"group_id": [12345]} + ) attr_info = get_attribution_info() # Mock out _enforce_max_rows to avoid needing actual data with patch("snuba.web.bulk_delete_query._enforce_max_rows", return_value=10): - with patch("snuba.web.bulk_delete_query.produce_delete_query"): - # Should not raise an exception - delete_from_storage(storage, conditions, attr_info, attribute_conditions) + with patch("snuba.web.bulk_delete_query.produce_delete_query") as mock_produce: + # Should not raise an exception, but should return empty dict since + # functionality is not yet launched (permit_delete_by_attribute=0 by default) + result = delete_from_storage(storage, conditions, attr_info, attribute_conditions) + + # Should return empty because the feature flag is off + assert result == {} + # Should not have produced a message since we return early + assert mock_produce.call_count == 0 @pytest.mark.redis_db def test_attribute_conditions_invalid_attribute() -> None: """Test that invalid attribute names in attribute_conditions are rejected""" storage = get_writable_storage(StorageKey("eap_items")) - conditions = {"project_id": [1], "item_type": [1]} - attribute_conditions = AttributeConditions(item_type=1, attributes={"invalid_attr": [12345]}) + conditions = {"project_id": [1], "item_type": [TRACE_ITEM_TYPE_OCCURRENCE]} + # Using valid item_type (occurrence/7) but invalid attribute + attribute_conditions = AttributeConditions( + item_type=TRACE_ITEM_TYPE_OCCURRENCE, attributes={"invalid_attr": [12345]} + ) attr_info = get_attribution_info() with pytest.raises(InvalidQueryException, match="Invalid attributes for deletion"): @@ -168,7 +201,9 @@ def test_attribute_conditions_missing_item_type() -> None: """Test that attribute_conditions requires item_type in conditions""" storage = get_writable_storage(StorageKey("eap_items")) conditions = {"project_id": [1]} - attribute_conditions = AttributeConditions(item_type=1, attributes={"group_id": [12345]}) + attribute_conditions = AttributeConditions( + item_type=TRACE_ITEM_TYPE_OCCURRENCE, attributes={"group_id": [12345]} + ) attr_info = get_attribution_info() # Since item_type is now in AttributeConditions, we need to test a different scenario @@ -191,3 +226,38 @@ def test_attribute_conditions_storage_not_configured() -> None: InvalidQueryException, match="No attribute-based deletions configured for this storage" ): delete_from_storage(storage, conditions, attr_info, attribute_conditions) + + +@pytest.mark.redis_db +def test_attribute_conditions_feature_flag_enabled() -> None: + """Test that attribute_conditions are processed when feature flag is enabled""" + storage = get_writable_storage(StorageKey("eap_items")) + conditions = {"project_id": [1], "item_type": [TRACE_ITEM_TYPE_OCCURRENCE]} + attribute_conditions = AttributeConditions( + item_type=TRACE_ITEM_TYPE_OCCURRENCE, attributes={"group_id": [12345]} + ) + attr_info = get_attribution_info() + + # Enable the feature flag + set_config("permit_delete_by_attribute", 1) + + try: + # Mock out _enforce_max_rows to avoid needing actual data + with patch("snuba.web.bulk_delete_query._enforce_max_rows", return_value=10): + with patch("snuba.web.bulk_delete_query.produce_delete_query") as mock_produce: + # Should process normally and produce a message + result = delete_from_storage(storage, conditions, attr_info, attribute_conditions) + + # Should have produced a message + assert mock_produce.call_count == 1 + # Should return success results + assert result != {} + + # Verify the message includes attribute_conditions + call_args = mock_produce.call_args[0][0] + assert "attribute_conditions" in call_args + assert call_args["attribute_conditions"] == {"group_id": [12345]} + assert call_args["attribute_conditions_item_type"] == TRACE_ITEM_TYPE_OCCURRENCE + finally: + # Clean up: disable the feature flag + set_config("permit_delete_by_attribute", 0) diff --git a/tests/web/test_delete_query.py b/tests/web/test_delete_query.py index ba6d10a6a9c..39de67b4327 100644 --- a/tests/web/test_delete_query.py +++ b/tests/web/test_delete_query.py @@ -7,6 +7,7 @@ from snuba.attribution import get_app_id from snuba.attribution.attribution_info import AttributionInfo from snuba.clickhouse.columns import ColumnSet +from snuba.clickhouse.formatter.query import format_query from snuba.clickhouse.query import Query from snuba.configs.configuration import Configuration, ResourceIdentifier from snuba.datasets.storages.factory import get_writable_storage @@ -22,9 +23,14 @@ ) from snuba.query.data_source.simple import Table from snuba.query.dsl import and_cond, column, equals, literal +from snuba.query.expressions import Column, SubscriptableReference from snuba.query.query_settings import HTTPQuerySettings from snuba.web import QueryException -from snuba.web.delete_query import _execute_query +from snuba.web.delete_query import ( + _construct_condition, + _execute_query, + _parse_column_expression, +) @pytest.mark.clickhouse_db @@ -155,3 +161,47 @@ def _update_quota_balance( assert ( update_called ), "update_quota_balance should have been called even though the query was rejected but was not" + + +def test_parse_column_expression_regular_column() -> None: + expr = _parse_column_expression("project_id") + assert isinstance(expr, Column) + assert expr.column_name == "project_id" + + +def test_parse_column_expression_map_access() -> None: + expr = _parse_column_expression("attributes_string_36['group_id']") + assert isinstance(expr, SubscriptableReference) + assert expr.column.column_name == "attributes_string_36" + assert expr.key.value == "group_id" + + # double quotes are also acceptable + expr = _parse_column_expression('attributes_string_0["event_id"]') + assert isinstance(expr, SubscriptableReference) + assert expr.column.column_name == "attributes_string_0" + assert expr.key.value == "event_id" + + +def test_parse_column_expression_formats_correctly() -> None: + conditions = { + "project_id": [1], + "attributes_string_36['group_id']": [12345], + } + + condition_expr = _construct_condition(conditions) + + query = Query( + from_clause=Table( + "eap_items_1_local", + ColumnSet([]), + storage_key=StorageKey.EAP_ITEMS, + ), + condition=condition_expr, + is_delete=True, + ) + + formatted = format_query(query) + sql = formatted.get_sql() + + assert "equals(project_id, 1)" in sql + assert "equals(attributes_string_36['group_id'], 12345)" in sql