From 36fcaf9df158819d9589b86ef73781286badbeae Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 17:24:05 +0100 Subject: [PATCH 1/3] fix(litellm): fix `gen_ai.request.messages` to be as expected --- sentry_sdk/integrations/litellm.py | 65 +++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 08cb217962..1e3f18ddf6 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -14,7 +14,7 @@ from sentry_sdk.utils import event_from_exception if TYPE_CHECKING: - from typing import Any, Dict + from typing import Any, Dict, List from datetime import datetime try: @@ -35,6 +35,68 @@ def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]": return metadata +def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]": + """ + Convert the message parts from OpenAI format to the `gen_ai.request.messages` format. + e.g: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64,...", + "detail": "high" + } + } + ] + } + becomes: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,..." + } + ] + } + """ + + def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]": + if item.get("type") == "image_url": + image_url = item.get("image_url") or {} + if image_url.get("url", "").startswith("data:"): + return { + "type": "blob", + "modality": "image", + "mime_type": item["image_url"]["url"].split(";base64,")[0], + "content": item["image_url"]["url"].split(";base64,")[1], + } + else: + return { + "type": "uri", + "uri": item["image_url"]["url"], + } + return item + + for message in messages: + content = message.get("content") + if isinstance(content, list): + message["content"] = [_map_item(item) for item in content] + return messages + + def _input_callback(kwargs: "Dict[str, Any]") -> None: """Handle the start of a request.""" integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration) @@ -101,6 +163,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: messages = kwargs.get("messages", []) if messages: scope = sentry_sdk.get_current_scope() + messages = _convert_message_parts(messages) messages_data = truncate_and_annotate_messages(messages, span, scope) if messages_data is not None: set_data_normalized( From d9d12648f635f00abbcda2b516c1d2acda1fc625 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 17:30:17 +0100 Subject: [PATCH 2/3] tests: add tests for litellm message conversion --- tests/integrations/litellm/test_litellm.py | 161 +++++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 1b925fb61f..4f81c54b63 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -1,3 +1,4 @@ +import base64 import json import pytest import time @@ -23,6 +24,7 @@ async def __call__(self, *args, **kwargs): from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.litellm import ( LiteLLMIntegration, + _convert_message_parts, _input_callback, _success_callback, _failure_callback, @@ -753,3 +755,162 @@ def test_litellm_message_truncation(sentry_init, capture_events): assert "small message 4" in str(parsed_messages[0]) assert "small message 5" in str(parsed_messages[1]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 + + +IMAGE_DATA = b"fake_image_data_12345" +IMAGE_B64 = base64.b64encode(IMAGE_DATA).decode("utf-8") +IMAGE_DATA_URI = f"data:image/png;base64,{IMAGE_B64}" + + +def test_binary_content_encoding_image_url(sentry_init, capture_events): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Look at this image:"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI, "detail": "high"}, + }, + ], + } + ] + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = {"model": "gpt-4-vision-preview", "messages": messages} + _input_callback(kwargs) + _success_callback(kwargs, mock_response, datetime.now(), datetime.now()) + + (event,) = events + (span,) = event["spans"] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + blob_item = next( + ( + item + for msg in messages_data + if "content" in msg + for item in msg["content"] + if item.get("type") == "blob" + ), + None, + ) + assert blob_item is not None + assert blob_item["modality"] == "image" + assert blob_item["mime_type"] == "data:image/png" + assert IMAGE_B64 in blob_item["content"] or "[Filtered]" in str( + blob_item["content"] + ) + + +def test_binary_content_encoding_mixed_content(sentry_init, capture_events): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Here is an image:"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI}, + }, + {"type": "text", "text": "What do you see?"}, + ], + } + ] + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = {"model": "gpt-4-vision-preview", "messages": messages} + _input_callback(kwargs) + _success_callback(kwargs, mock_response, datetime.now(), datetime.now()) + + (event,) = events + (span,) = event["spans"] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + content_items = [ + item for msg in messages_data if "content" in msg for item in msg["content"] + ] + assert any(item.get("type") == "text" for item in content_items) + assert any(item.get("type") == "blob" for item in content_items) + + +def test_binary_content_encoding_uri_type(sentry_init, capture_events): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + ], + } + ] + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = {"model": "gpt-4-vision-preview", "messages": messages} + _input_callback(kwargs) + _success_callback(kwargs, mock_response, datetime.now(), datetime.now()) + + (event,) = events + (span,) = event["spans"] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + uri_item = next( + ( + item + for msg in messages_data + if "content" in msg + for item in msg["content"] + if item.get("type") == "uri" + ), + None, + ) + assert uri_item is not None + assert uri_item["uri"] == "https://example.com/image.jpg" + + +def test_convert_message_parts_direct(): + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Hello"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI}, + }, + ], + } + ] + converted = _convert_message_parts(messages) + blob_item = next( + item for item in converted[0]["content"] if item.get("type") == "blob" + ) + assert blob_item["modality"] == "image" + assert blob_item["mime_type"] == "data:image/png" + assert IMAGE_B64 in blob_item["content"] From 4a178068544669cbc09bf5cccd6522311604a6c3 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 8 Jan 2026 15:03:17 +0100 Subject: [PATCH 3/3] fix(integrations): ensure _convert_message_parts does not mutate original messages and handle data URLs correctly --- sentry_sdk/integrations/litellm.py | 17 +++-- tests/integrations/litellm/test_litellm.py | 80 +++++++++++++++++++++- 2 files changed, 90 insertions(+), 7 deletions(-) diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 1e3f18ddf6..51a0882ea2 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -1,3 +1,4 @@ +import copy from typing import TYPE_CHECKING import sentry_sdk @@ -72,21 +73,27 @@ def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, ] } """ + # Deep copy to avoid mutating original messages from kwargs + messages = copy.deepcopy(messages) def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]": if item.get("type") == "image_url": image_url = item.get("image_url") or {} - if image_url.get("url", "").startswith("data:"): + url = image_url.get("url", "") + if url.startswith("data:") and ";base64," in url: + parts = url.split(";base64,", 1) + # Remove "data:" prefix (5 chars) to get proper MIME type + mime_type = parts[0][5:] return { "type": "blob", "modality": "image", - "mime_type": item["image_url"]["url"].split(";base64,")[0], - "content": item["image_url"]["url"].split(";base64,")[1], + "mime_type": mime_type, + "content": parts[1], } - else: + elif url: return { "type": "uri", - "uri": item["image_url"]["url"], + "uri": url, } return item diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 4f81c54b63..bac90d02ec 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -805,7 +805,7 @@ def test_binary_content_encoding_image_url(sentry_init, capture_events): ) assert blob_item is not None assert blob_item["modality"] == "image" - assert blob_item["mime_type"] == "data:image/png" + assert blob_item["mime_type"] == "image/png" assert IMAGE_B64 in blob_item["content"] or "[Filtered]" in str( blob_item["content"] ) @@ -912,5 +912,81 @@ def test_convert_message_parts_direct(): item for item in converted[0]["content"] if item.get("type") == "blob" ) assert blob_item["modality"] == "image" - assert blob_item["mime_type"] == "data:image/png" + assert blob_item["mime_type"] == "image/png" assert IMAGE_B64 in blob_item["content"] + + +def test_convert_message_parts_does_not_mutate_original(): + """Ensure _convert_message_parts does not mutate the original messages.""" + original_url = IMAGE_DATA_URI + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": original_url}, + }, + ], + } + ] + _convert_message_parts(messages) + # Original should be unchanged + assert messages[0]["content"][0]["type"] == "image_url" + assert messages[0]["content"][0]["image_url"]["url"] == original_url + + +def test_convert_message_parts_data_url_without_base64(): + """Data URLs without ;base64, marker should be treated as regular URIs.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": "data:image/png,rawdata"}, + }, + ], + } + ] + converted = _convert_message_parts(messages) + uri_item = converted[0]["content"][0] + # Should be converted to uri type, not blob (since no base64 encoding) + assert uri_item["type"] == "uri" + assert uri_item["uri"] == "data:image/png,rawdata" + + +def test_convert_message_parts_image_url_none(): + """image_url being None should not crash.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": None, + }, + ], + } + ] + converted = _convert_message_parts(messages) + # Should return item unchanged + assert converted[0]["content"][0]["type"] == "image_url" + + +def test_convert_message_parts_image_url_missing_url(): + """image_url missing the url key should not crash.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"detail": "high"}, + }, + ], + } + ] + converted = _convert_message_parts(messages) + # Should return item unchanged + assert converted[0]["content"][0]["type"] == "image_url"