fix(integrations): enhance input handling for embeddings in LiteLLM integration (#5127)

constantinius · web-flow · commit 8596f894907c · 2025-11-21T11:51:35.000+01:00
#### Issues Closes https://linear.app/getsentry/issue/TET-1461/fix-embedding-support-for-litellm
diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py
@@ -77,15 +77,40 @@ def _input_callback(kwargs):
     set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
     set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
 
-    # Record messages if allowed
-    messages = kwargs.get("messages", [])
-    if messages and should_send_default_pii() and integration.include_prompts:
-        scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(messages, span, scope)
-        if messages_data is not None:
-            set_data_normalized(
-                span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
-            )
+    # Record input/messages if allowed
+    if should_send_default_pii() and integration.include_prompts:
+        if operation == "embeddings":
+            # For embeddings, look for the 'input' parameter
+            embedding_input = kwargs.get("input")
+            if embedding_input:
+                scope = sentry_sdk.get_current_scope()
+                # Normalize to list format
+                input_list = (
+                    embedding_input
+                    if isinstance(embedding_input, list)
+                    else [embedding_input]
+                )
+                messages_data = truncate_and_annotate_messages(input_list, span, scope)
+                if messages_data is not None:
+                    set_data_normalized(
+                        span,
+                        SPANDATA.GEN_AI_EMBEDDINGS_INPUT,
+                        messages_data,
+                        unpack=False,
+                    )
+        else:
+            # For chat, look for the 'messages' parameter
+            messages = kwargs.get("messages", [])
+            if messages:
+                scope = sentry_sdk.get_current_scope()
+                messages_data = truncate_and_annotate_messages(messages, span, scope)
+                if messages_data is not None:
+                    set_data_normalized(
+                        span,
+                        SPANDATA.GEN_AI_REQUEST_MESSAGES,
+                        messages_data,
+                        unpack=False,
+                    )
 
     # Record other parameters
     params = {
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
@@ -1,5 +1,6 @@
 import json
 import pytest
+import time
 from unittest import mock
 from datetime import datetime
 
@@ -17,6 +18,7 @@ async def __call__(self, *args, **kwargs):
 except ImportError:
     pytest.skip("litellm not installed", allow_module_level=True)
 
+import sentry_sdk
 from sentry_sdk import start_transaction
 from sentry_sdk.consts import OP, SPANDATA
 from sentry_sdk.integrations.litellm import (
@@ -31,6 +33,36 @@ async def __call__(self, *args, **kwargs):
 LITELLM_VERSION = package_version("litellm")
 
 
+@pytest.fixture
+def clear_litellm_cache():
+    """
+    Clear litellm's client cache and reset integration state to ensure test isolation.
+
+    The LiteLLM integration uses setup_once() which only runs once per Python process.
+    This fixture ensures the integration is properly re-initialized for each test.
+    """
+
+    # Stop all existing mocks
+    mock.patch.stopall()
+
+    # Clear client cache
+    if (
+        hasattr(litellm, "in_memory_llm_clients_cache")
+        and litellm.in_memory_llm_clients_cache
+    ):
+        litellm.in_memory_llm_clients_cache.flush_cache()
+
+    yield
+
+    # Clean up after test as well
+    mock.patch.stopall()
+    if (
+        hasattr(litellm, "in_memory_llm_clients_cache")
+        and litellm.in_memory_llm_clients_cache
+    ):
+        litellm.in_memory_llm_clients_cache.flush_cache()
+
+
 # Mock response objects
 class MockMessage:
     def __init__(self, role="assistant", content="Test response"):
@@ -87,6 +119,21 @@ def __init__(self, model="text-embedding-ada-002", data=None, usage=None):
         )
         self.object = "list"
 
+    def model_dump(self):
+        return {
+            "model": self.model,
+            "data": [
+                {"embedding": d.embedding, "index": d.index, "object": d.object}
+                for d in self.data
+            ],
+            "usage": {
+                "prompt_tokens": self.usage.prompt_tokens,
+                "completion_tokens": self.usage.completion_tokens,
+                "total_tokens": self.usage.total_tokens,
+            },
+            "object": self.object,
+        }
+
 
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -201,44 +248,145 @@ def test_streaming_chat_completion(
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
-def test_embeddings_create(sentry_init, capture_events):
+def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
+    """
+    Test that litellm.embedding() calls are properly instrumented.
+
+    This test calls the actual litellm.embedding() function (not just callbacks)
+    to ensure proper integration testing.
+    """
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
     events = capture_events()
 
-    messages = [{"role": "user", "content": "Some text to test embeddings"}]
     mock_response = MockEmbeddingResponse()
 
-    with start_transaction(name="litellm test"):
-        kwargs = {
-            "model": "text-embedding-ada-002",
-            "input": "Hello!",
-            "messages": messages,
-            "call_type": "embedding",
-        }
+    # Mock within the test to ensure proper ordering with cache clearing
+    with mock.patch(
+        "litellm.openai_chat_completions.make_sync_openai_embedding_request"
+    ) as mock_http:
+        # The function returns (headers, response)
+        mock_http.return_value = ({}, mock_response)
+
+        with start_transaction(name="litellm test"):
+            response = litellm.embedding(
+                model="text-embedding-ada-002",
+                input="Hello, world!",
+                api_key="test-key",  # Provide a fake API key to avoid authentication errors
+            )
+            # Allow time for callbacks to complete (they may run in separate threads)
+            time.sleep(0.1)
+
+        # Response is processed by litellm, so just check it exists
+        assert response is not None
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
+
+        assert span["op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["description"] == "embeddings text-embedding-ada-002"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002"
+        # Check that embeddings input is captured (it's JSON serialized)
+        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert json.loads(embeddings_input) == ["Hello, world!"]
+
+
+def test_embeddings_create_with_list_input(
+    sentry_init, capture_events, clear_litellm_cache
+):
+    """Test embedding with list input."""
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
 
-        _input_callback(kwargs)
-        _success_callback(
-            kwargs,
-            mock_response,
-            datetime.now(),
-            datetime.now(),
-        )
+    mock_response = MockEmbeddingResponse()
 
-    assert len(events) == 1
-    (event,) = events
+    # Mock within the test to ensure proper ordering with cache clearing
+    with mock.patch(
+        "litellm.openai_chat_completions.make_sync_openai_embedding_request"
+    ) as mock_http:
+        # The function returns (headers, response)
+        mock_http.return_value = ({}, mock_response)
+
+        with start_transaction(name="litellm test"):
+            response = litellm.embedding(
+                model="text-embedding-ada-002",
+                input=["First text", "Second text", "Third text"],
+                api_key="test-key",  # Provide a fake API key to avoid authentication errors
+            )
+            # Allow time for callbacks to complete (they may run in separate threads)
+            time.sleep(0.1)
+
+        # Response is processed by litellm, so just check it exists
+        assert response is not None
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
+
+        assert span["op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        # Check that list of embeddings input is captured (it's JSON serialized)
+        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert json.loads(embeddings_input) == [
+            "First text",
+            "Second text",
+            "Third text",
+        ]
+
+
+def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
+    """Test that PII is not captured when disabled."""
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=False,  # PII disabled
+    )
+    events = capture_events()
 
-    assert event["type"] == "transaction"
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    mock_response = MockEmbeddingResponse()
+
+    # Mock within the test to ensure proper ordering with cache clearing
+    with mock.patch(
+        "litellm.openai_chat_completions.make_sync_openai_embedding_request"
+    ) as mock_http:
+        # The function returns (headers, response)
+        mock_http.return_value = ({}, mock_response)
+
+        with start_transaction(name="litellm test"):
+            response = litellm.embedding(
+                model="text-embedding-ada-002",
+                input="Hello, world!",
+                api_key="test-key",  # Provide a fake API key to avoid authentication errors
+            )
+            # Allow time for callbacks to complete (they may run in separate threads)
+            time.sleep(0.1)
+
+        # Response is processed by litellm, so just check it exists
+        assert response is not None
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
 
-    assert span["op"] == OP.GEN_AI_EMBEDDINGS
-    assert span["description"] == "embeddings text-embedding-ada-002"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        assert span["op"] == OP.GEN_AI_EMBEDDINGS
+        # Check that embeddings input is NOT captured when PII is disabled
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
 
 
 def test_exception_handling(sentry_init, capture_events):