Skip to content
Draft

WIP #102168

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@
flags=FLAG_AUTOMATOR_MODIFIABLE,
)


register(
"cleanup.abort_execution",
default=False,
Expand Down Expand Up @@ -2468,6 +2467,13 @@
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

# SDK Transport Error Sample Rate
register(
"sdk_transport_error.sample-rate",
default=0.0,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

# SDK Crash Detection
#
# The project ID belongs to the sentry organization: https://sentry.sentry.io/projects/cocoa-sdk-crashes/?project=4505469596663808.
Expand Down
29 changes: 28 additions & 1 deletion src/sentry/utils/sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,34 @@ def patch_transport_for_instrumentation(transport, transport_name):

def patched_send_request(*args, **kwargs):
metrics.incr(f"internal.sent_requests.{transport_name}.events")
return _send_request(*args, **kwargs)

try:
return _send_request(*args, **kwargs)
except Exception as e:
# Track transport failures as Datadog metrics (won't fail if SDK is down)
# This is critical for detecting SDK health issues like HTTP/2 connection
# exhaustion (e.g., httpcore.LocalProtocolError in cleanup operations)
error_type = type(e).__name__
metrics.incr(
"internal.sdk_transport_error",
tags={
"transport": transport_name,
"error_type": error_type,
},
sample_rate=options.get("sdk_transport_error.sample-rate"),
)
# Log for visibility in GCP logs (where transport errors surface)
logger.warning(
"SDK transport error - event delivery failed",
extra={
"transport": transport_name,
"error_type": error_type,
"error_message": str(e)[:200], # Truncate to avoid log spam
},
exc_info=False, # Don't include full traceback to avoid recursion
)
# Re-raise to let SDK handle it normally (SDK will log internally)
raise

transport._send_request = patched_send_request
return transport
Expand Down
117 changes: 117 additions & 0 deletions tests/sentry/utils/test_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import contextlib
from unittest.mock import MagicMock, patch

import pytest
import sentry_sdk.scope
from django.conf import settings
from django.db import OperationalError
Expand All @@ -20,6 +21,7 @@
check_current_scope_transaction,
check_tag_for_scope_bleed,
merge_context_into_scope,
patch_transport_for_instrumentation,
)


Expand Down Expand Up @@ -525,3 +527,118 @@ def test_before_send_error_level() -> None:
event_with_before_send = sdk.before_send(event, hint) # type: ignore[arg-type]
assert event_with_before_send
assert event_with_before_send["level"] == "warning"


class PatchTransportForInstrumentationTest(TestCase):
"""Tests for SDK transport patching with error tracking."""

@patch("sentry.utils.sdk.metrics")
@patch("sentry.utils.sdk.logger")
def test_successful_request(self, mock_logger: MagicMock, mock_metrics: MagicMock) -> None:
"""Test that successful requests are instrumented with metrics."""
mock_transport = MagicMock()
mock_transport._send_request = MagicMock(return_value="success")

patched_transport = patch_transport_for_instrumentation(mock_transport, "test_transport")
result = patched_transport._send_request("arg1", kwarg1="value1")

# Should increment success metric
mock_metrics.incr.assert_called_once_with("internal.sent_requests.test_transport.events")
# Should not log errors
assert mock_logger.warning.call_count == 0
# Should return the result
assert result == "success"

@patch("sentry.utils.sdk.metrics")
@patch("sentry.utils.sdk.logger")
def test_transport_error_tracking(
self, mock_logger: MagicMock, mock_metrics: MagicMock
) -> None:
"""Test that transport errors are tracked with metrics and logged."""
# Simulate an HTTP/2 connection error like in the original issue
error = ConnectionError("Invalid input ConnectionInputs.RECV_PING in state CLOSED")
mock_transport = MagicMock()
mock_transport._send_request = MagicMock(side_effect=error)

patched_transport = patch_transport_for_instrumentation(mock_transport, "upstream")

# Should re-raise the exception after tracking
with pytest.raises(ConnectionError):
patched_transport._send_request("arg1")

# Should increment both metrics
assert mock_metrics.incr.call_count == 2
mock_metrics.incr.assert_any_call("internal.sent_requests.upstream.events")
mock_metrics.incr.assert_any_call(
"internal.sdk_transport_error",
tags={"transport": "upstream", "error_type": "ConnectionError"},
sample_rate=1.0,
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Mismatched Sample Rate Causes Test Failure

The test_transport_error_tracking test expects metrics.incr to be called with sample_rate=1.0 for the internal.sdk_transport_error metric. The patch_transport_for_instrumentation function, however, retrieves this sample rate from options.get("sdk_transport_error.sample-rate"), which defaults to 0.0. This mismatch causes the test to fail.

Fix in Cursor Fix in Web


# Should log the error
mock_logger.warning.assert_called_once()
call_args = mock_logger.warning.call_args
assert "SDK transport error" in call_args[0][0]
assert call_args[1]["extra"]["transport"] == "upstream"
assert call_args[1]["extra"]["error_type"] == "ConnectionError"
assert call_args[1]["exc_info"] is False

@patch("sentry.utils.sdk.metrics")
@patch("sentry.utils.sdk.logger")
def test_httpcore_protocol_error(self, mock_logger: MagicMock, mock_metrics: MagicMock) -> None:
"""Test specific tracking of httpcore.LocalProtocolError like in the real issue."""

# Create a mock exception that mimics httpcore.LocalProtocolError
class LocalProtocolError(Exception):
pass

error = LocalProtocolError(
"Invalid input ConnectionInputs.RECV_PING in state ConnectionState.CLOSED"
)
mock_transport = MagicMock()
mock_transport._send_request = MagicMock(side_effect=error)

patched_transport = patch_transport_for_instrumentation(mock_transport, "relay")

with pytest.raises(LocalProtocolError):
patched_transport._send_request()

# Verify error type is tracked correctly
error_metric_call = [
call for call in mock_metrics.incr.call_args_list if "sdk_transport_error" in str(call)
][0]
assert error_metric_call[1]["tags"]["error_type"] == "LocalProtocolError"

@patch("sentry.utils.sdk.metrics")
def test_transport_with_no_send_request(self, mock_metrics: MagicMock) -> None:
"""Test that patching handles transports without _send_request gracefully."""
mock_transport = MagicMock()
mock_transport._send_request = None

patched_transport = patch_transport_for_instrumentation(mock_transport, "test_transport")

# Should return transport unchanged
assert patched_transport == mock_transport
# Should not call metrics
assert mock_metrics.incr.call_count == 0

@patch("sentry.utils.sdk.metrics")
@patch("sentry.utils.sdk.logger")
def test_error_message_truncation(
self, mock_logger: MagicMock, mock_metrics: MagicMock
) -> None:
"""Test that long error messages are truncated to avoid log spam."""
long_message = "x" * 300 # 300 character error message
error = ValueError(long_message)
mock_transport = MagicMock()
mock_transport._send_request = MagicMock(side_effect=error)

patched_transport = patch_transport_for_instrumentation(mock_transport, "test")

with pytest.raises(ValueError):
patched_transport._send_request()

# Verify error message is truncated in logs
log_call = mock_logger.warning.call_args
logged_message = log_call[1]["extra"]["error_message"]
assert len(logged_message) <= 200
Loading