diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index 337baa9bd48067..b9f528bd07ce86 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -343,7 +343,6 @@ flags=FLAG_AUTOMATOR_MODIFIABLE, ) - register( "cleanup.abort_execution", default=False, @@ -2468,6 +2467,13 @@ flags=FLAG_AUTOMATOR_MODIFIABLE, ) +# SDK Transport Error Sample Rate +register( + "sdk_transport_error.sample-rate", + default=0.0, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) + # SDK Crash Detection # # The project ID belongs to the sentry organization: https://sentry.sentry.io/projects/cocoa-sdk-crashes/?project=4505469596663808. diff --git a/src/sentry/utils/sdk.py b/src/sentry/utils/sdk.py index 3069f33411380b..e3cbf832fca07c 100644 --- a/src/sentry/utils/sdk.py +++ b/src/sentry/utils/sdk.py @@ -283,7 +283,34 @@ def patch_transport_for_instrumentation(transport, transport_name): def patched_send_request(*args, **kwargs): metrics.incr(f"internal.sent_requests.{transport_name}.events") - return _send_request(*args, **kwargs) + + try: + return _send_request(*args, **kwargs) + except Exception as e: + # Track transport failures as Datadog metrics (won't fail if SDK is down) + # This is critical for detecting SDK health issues like HTTP/2 connection + # exhaustion (e.g., httpcore.LocalProtocolError in cleanup operations) + error_type = type(e).__name__ + metrics.incr( + "internal.sdk_transport_error", + tags={ + "transport": transport_name, + "error_type": error_type, + }, + sample_rate=options.get("sdk_transport_error.sample-rate"), + ) + # Log for visibility in GCP logs (where transport errors surface) + logger.warning( + "SDK transport error - event delivery failed", + extra={ + "transport": transport_name, + "error_type": error_type, + "error_message": str(e)[:200], # Truncate to avoid log spam + }, + exc_info=False, # Don't include full traceback to avoid recursion + ) + # Re-raise to let SDK handle it normally (SDK will log internally) + raise transport._send_request = patched_send_request return transport diff --git a/tests/sentry/utils/test_sdk.py b/tests/sentry/utils/test_sdk.py index 43f00bad016a6a..2edcc93b44a56b 100644 --- a/tests/sentry/utils/test_sdk.py +++ b/tests/sentry/utils/test_sdk.py @@ -3,6 +3,7 @@ import contextlib from unittest.mock import MagicMock, patch +import pytest import sentry_sdk.scope from django.conf import settings from django.db import OperationalError @@ -20,6 +21,7 @@ check_current_scope_transaction, check_tag_for_scope_bleed, merge_context_into_scope, + patch_transport_for_instrumentation, ) @@ -525,3 +527,118 @@ def test_before_send_error_level() -> None: event_with_before_send = sdk.before_send(event, hint) # type: ignore[arg-type] assert event_with_before_send assert event_with_before_send["level"] == "warning" + + +class PatchTransportForInstrumentationTest(TestCase): + """Tests for SDK transport patching with error tracking.""" + + @patch("sentry.utils.sdk.metrics") + @patch("sentry.utils.sdk.logger") + def test_successful_request(self, mock_logger: MagicMock, mock_metrics: MagicMock) -> None: + """Test that successful requests are instrumented with metrics.""" + mock_transport = MagicMock() + mock_transport._send_request = MagicMock(return_value="success") + + patched_transport = patch_transport_for_instrumentation(mock_transport, "test_transport") + result = patched_transport._send_request("arg1", kwarg1="value1") + + # Should increment success metric + mock_metrics.incr.assert_called_once_with("internal.sent_requests.test_transport.events") + # Should not log errors + assert mock_logger.warning.call_count == 0 + # Should return the result + assert result == "success" + + @patch("sentry.utils.sdk.metrics") + @patch("sentry.utils.sdk.logger") + def test_transport_error_tracking( + self, mock_logger: MagicMock, mock_metrics: MagicMock + ) -> None: + """Test that transport errors are tracked with metrics and logged.""" + # Simulate an HTTP/2 connection error like in the original issue + error = ConnectionError("Invalid input ConnectionInputs.RECV_PING in state CLOSED") + mock_transport = MagicMock() + mock_transport._send_request = MagicMock(side_effect=error) + + patched_transport = patch_transport_for_instrumentation(mock_transport, "upstream") + + # Should re-raise the exception after tracking + with pytest.raises(ConnectionError): + patched_transport._send_request("arg1") + + # Should increment both metrics + assert mock_metrics.incr.call_count == 2 + mock_metrics.incr.assert_any_call("internal.sent_requests.upstream.events") + mock_metrics.incr.assert_any_call( + "internal.sdk_transport_error", + tags={"transport": "upstream", "error_type": "ConnectionError"}, + sample_rate=1.0, + ) + + # Should log the error + mock_logger.warning.assert_called_once() + call_args = mock_logger.warning.call_args + assert "SDK transport error" in call_args[0][0] + assert call_args[1]["extra"]["transport"] == "upstream" + assert call_args[1]["extra"]["error_type"] == "ConnectionError" + assert call_args[1]["exc_info"] is False + + @patch("sentry.utils.sdk.metrics") + @patch("sentry.utils.sdk.logger") + def test_httpcore_protocol_error(self, mock_logger: MagicMock, mock_metrics: MagicMock) -> None: + """Test specific tracking of httpcore.LocalProtocolError like in the real issue.""" + + # Create a mock exception that mimics httpcore.LocalProtocolError + class LocalProtocolError(Exception): + pass + + error = LocalProtocolError( + "Invalid input ConnectionInputs.RECV_PING in state ConnectionState.CLOSED" + ) + mock_transport = MagicMock() + mock_transport._send_request = MagicMock(side_effect=error) + + patched_transport = patch_transport_for_instrumentation(mock_transport, "relay") + + with pytest.raises(LocalProtocolError): + patched_transport._send_request() + + # Verify error type is tracked correctly + error_metric_call = [ + call for call in mock_metrics.incr.call_args_list if "sdk_transport_error" in str(call) + ][0] + assert error_metric_call[1]["tags"]["error_type"] == "LocalProtocolError" + + @patch("sentry.utils.sdk.metrics") + def test_transport_with_no_send_request(self, mock_metrics: MagicMock) -> None: + """Test that patching handles transports without _send_request gracefully.""" + mock_transport = MagicMock() + mock_transport._send_request = None + + patched_transport = patch_transport_for_instrumentation(mock_transport, "test_transport") + + # Should return transport unchanged + assert patched_transport == mock_transport + # Should not call metrics + assert mock_metrics.incr.call_count == 0 + + @patch("sentry.utils.sdk.metrics") + @patch("sentry.utils.sdk.logger") + def test_error_message_truncation( + self, mock_logger: MagicMock, mock_metrics: MagicMock + ) -> None: + """Test that long error messages are truncated to avoid log spam.""" + long_message = "x" * 300 # 300 character error message + error = ValueError(long_message) + mock_transport = MagicMock() + mock_transport._send_request = MagicMock(side_effect=error) + + patched_transport = patch_transport_for_instrumentation(mock_transport, "test") + + with pytest.raises(ValueError): + patched_transport._send_request() + + # Verify error message is truncated in logs + log_call = mock_logger.warning.call_args + logged_message = log_call[1]["extra"]["error_message"] + assert len(logged_message) <= 200