diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 78a64ab737..6fdf763ad6 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -40,6 +40,39 @@ class GEN_AI_ALLOWED_MESSAGE_ROLES: GEN_AI_MESSAGE_ROLE_MAPPING[source_role] = target_role +def parse_data_uri(url): + # type: (str) -> Tuple[str, str] + """ + Parse a data URI and return (mime_type, content). + + Data URI format (RFC 2397): data:[][;base64], + + Examples: + ... → ("image/jpeg", "/9j/4AAQ...") + data:text/plain,Hello → ("text/plain", "Hello") + data:;base64,SGVsbG8= → ("", "SGVsbG8=") + + Raises: + ValueError: If the URL is not a valid data URI (missing comma separator) + """ + if "," not in url: + raise ValueError("Invalid data URI: missing comma separator") + + header, content = url.split(",", 1) + + # Extract mime type from header + # Format: "data:[;param1][;param2]..." e.g. "data:image/jpeg;base64" + # Remove "data:" prefix, then take everything before the first semicolon + if header.startswith("data:"): + mime_part = header[5:] # Remove "data:" prefix + else: + mime_part = header + + mime_type = mime_part.split(";")[0] + + return mime_type, content + + def _normalize_data(data: "Any", unpack: bool = True) -> "Any": # convert pydantic data (e.g. OpenAI v1+) to json compatible format if hasattr(data, "model_dump"): diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 53d464c3c4..fc41d79bf8 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -6,6 +6,7 @@ from sentry_sdk.ai.utils import ( set_data_normalized, normalize_message_roles, + parse_data_uri, truncate_and_annotate_messages, ) from sentry_sdk.consts import SPANDATA @@ -18,7 +19,7 @@ safe_serialize, ) -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Dict if TYPE_CHECKING: from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator @@ -177,6 +178,80 @@ def _calculate_token_usage( ) +def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]": + """ + Convert the message parts from OpenAI format to the `gen_ai.request.messages` format. + e.g: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64,...", + "detail": "high" + } + } + ] + } + becomes: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,..." + } + ] + } + """ + + def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]": + if item.get("type") == "image_url": + image_url = item.get("image_url") or {} + url = image_url.get("url", "") + if url.startswith("data:"): + try: + mime_type, content = parse_data_uri(url) + return { + "type": "blob", + "modality": "image", + "mime_type": mime_type, + "content": content, + } + except ValueError: + # If parsing fails, return as URI + return { + "type": "uri", + "modality": "image", + "uri": url, + } + else: + return { + "type": "uri", + "uri": url, + } + return item + + for message in messages: + if not isinstance(message, dict): + continue + content = message.get("content") + if isinstance(content, list): + message["content"] = [_map_item(item) for item in content] + return messages + + def _set_input_data( span: "Span", kwargs: "dict[str, Any]", @@ -198,6 +273,8 @@ def _set_input_data( and integration.include_prompts ): normalized_messages = normalize_message_roles(messages) + normalized_messages = _convert_message_parts(normalized_messages) + scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index c3a3a04dc9..1e2d7e758c 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -3,6 +3,7 @@ get_start_span_function, set_data_normalized, normalize_message_roles, + normalize_message_role, truncate_and_annotate_messages, ) from sentry_sdk.consts import OP, SPANDATA @@ -10,7 +11,11 @@ from sentry_sdk.utils import safe_serialize from ..consts import SPAN_ORIGIN -from ..utils import _set_agent_data, _set_usage_data +from ..utils import ( + _set_agent_data, + _set_usage_data, + _transform_openai_agents_message_content, +) from typing import TYPE_CHECKING @@ -49,17 +54,40 @@ def invoke_agent_span( original_input = kwargs.get("original_input") if original_input is not None: - message = ( - original_input - if isinstance(original_input, str) - else safe_serialize(original_input) - ) - messages.append( - { - "content": [{"text": message, "type": "text"}], - "role": "user", - } - ) + if isinstance(original_input, str): + # String input: wrap in text block + messages.append( + { + "content": [{"text": original_input, "type": "text"}], + "role": "user", + } + ) + elif isinstance(original_input, list) and len(original_input) > 0: + # Check if list contains message objects (with type="message") + # or content parts (input_text, input_image, etc.) + first_item = original_input[0] + if isinstance(first_item, dict) and first_item.get("type") == "message": + # List of message objects - process each individually + for msg in original_input: + if isinstance(msg, dict) and msg.get("type") == "message": + role = normalize_message_role(msg.get("role", "user")) + content = msg.get("content") + transformed = _transform_openai_agents_message_content( + content + ) + if isinstance(transformed, str): + transformed = [{"text": transformed, "type": "text"}] + elif not isinstance(transformed, list): + transformed = [ + {"text": str(transformed), "type": "text"} + ] + messages.append({"content": transformed, "role": role}) + else: + # List of content parts - transform and wrap as user message + content = _transform_openai_agents_message_content(original_input) + if not isinstance(content, list): + content = [{"text": str(content), "type": "text"}] + messages.append({"content": content, "role": "user"}) if len(messages) > 0: normalized_messages = normalize_message_roles(messages) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index a24d0e909d..a95fac422a 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -2,6 +2,7 @@ from sentry_sdk.ai.utils import ( GEN_AI_ALLOWED_MESSAGE_ROLES, normalize_message_roles, + parse_data_uri, set_data_normalized, normalize_message_role, truncate_and_annotate_messages, @@ -27,6 +28,124 @@ raise DidNotEnable("OpenAI Agents not installed") +def _transform_openai_agents_content_part( + content_part: "dict[str, Any]", +) -> "dict[str, Any]": + """ + Transform an OpenAI Agents content part to Sentry-compatible format. + + Handles multimodal content (images, audio, files) by converting them + to the standardized format: + - base64 encoded data -> type: "blob" + - URL references -> type: "uri" + - file_id references -> type: "file" + """ + if not isinstance(content_part, dict): + return content_part + + part_type = content_part.get("type") + + # Handle input_text (OpenAI Agents SDK text format) -> normalize to standard text format + if part_type == "input_text": + return { + "type": "text", + "text": content_part.get("text", ""), + } + + # Handle image_url (OpenAI vision format) and input_image (OpenAI Agents SDK format) + if part_type in ("image_url", "input_image"): + # Get URL from either format + if part_type == "image_url": + image_url = content_part.get("image_url", {}) + url = ( + image_url.get("url", "") + if isinstance(image_url, dict) + else str(image_url) + ) + else: + # input_image format has image_url directly + url = content_part.get("image_url", "") + + if url.startswith("data:"): + try: + mime_type, content = parse_data_uri(url) + return { + "type": "blob", + "modality": "image", + "mime_type": mime_type, + "content": content, + } + except ValueError: + # If parsing fails, return as URI + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + else: + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + + # Handle input_audio (OpenAI audio input format) + if part_type == "input_audio": + input_audio = content_part.get("input_audio", {}) + audio_format = input_audio.get("format", "") + mime_type = f"audio/{audio_format}" if audio_format else "" + return { + "type": "blob", + "modality": "audio", + "mime_type": mime_type, + "content": input_audio.get("data", ""), + } + + # Handle image_file (Assistants API file-based images) + if part_type == "image_file": + image_file = content_part.get("image_file", {}) + return { + "type": "file", + "modality": "image", + "mime_type": "", + "file_id": image_file.get("file_id", ""), + } + + # Handle file (document attachments) + if part_type == "file": + file_data = content_part.get("file", {}) + return { + "type": "file", + "modality": "document", + "mime_type": "", + "file_id": file_data.get("file_id", ""), + } + + return content_part + + +def _transform_openai_agents_message_content(content: "Any") -> "Any": + """ + Transform OpenAI Agents message content, handling both string content and + list of content parts. + """ + if isinstance(content, str): + return content + + if isinstance(content, (list, tuple)): + transformed = [] + for item in content: + if isinstance(item, dict): + transformed.append(_transform_openai_agents_content_part(item)) + else: + transformed.append(item) + return transformed + + return content + + def _capture_exception(exc: "Any") -> None: set_span_errored() @@ -128,13 +247,15 @@ def _set_input_data( if "role" in message: normalized_role = normalize_message_role(message.get("role")) content = message.get("content") + # Transform content to handle multimodal data (images, audio, files) + transformed_content = _transform_openai_agents_message_content(content) request_messages.append( { "role": normalized_role, "content": ( - [{"type": "text", "text": content}] - if isinstance(content, str) - else content + [{"type": "text", "text": transformed_content}] + if isinstance(transformed_content, str) + else transformed_content ), } ) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 814289c887..3f971afaee 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -43,6 +43,7 @@ from sentry_sdk.integrations.openai import ( OpenAIIntegration, _calculate_token_usage, + _convert_message_parts, ) from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES from sentry_sdk._types import AnnotatedValue @@ -1509,6 +1510,105 @@ def test_openai_message_role_mapping(sentry_init, capture_events): assert "ai" not in roles +def test_convert_message_parts_image_url_to_blob(): + """Test that OpenAI image_url message parts are correctly converted to blob format""" + messages = [ + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text", + }, + { + "type": "image_url", + "image_url": { + "url": "", + "detail": "high", + }, + }, + ], + } + ] + + converted = _convert_message_parts(messages) + + assert len(converted) == 1 + assert converted[0]["role"] == "user" + assert isinstance(converted[0]["content"], list) + assert len(converted[0]["content"]) == 2 + + # First item (text) should remain unchanged + assert converted[0]["content"][0] == { + "text": "How many ponies do you see in the image?", + "type": "text", + } + + # Second item (image_url) should be converted to blob format + blob_item = converted[0]["content"][1] + assert blob_item["type"] == "blob" + assert blob_item["modality"] == "image" + assert blob_item["mime_type"] == "image/jpeg" + assert blob_item["content"] == "/9j/4AAQSkZJRg==" + # Verify the original image_url structure is replaced + assert "image_url" not in blob_item + + +def test_convert_message_parts_image_url_to_uri(): + """Test that OpenAI image_url with non-data URLs are converted to uri format""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://example.com/image.jpg", + "detail": "low", + }, + }, + ], + } + ] + + converted = _convert_message_parts(messages) + + assert len(converted) == 1 + uri_item = converted[0]["content"][0] + assert uri_item["type"] == "uri" + assert uri_item["uri"] == "https://example.com/image.jpg" + # Verify the original image_url structure is replaced + assert "image_url" not in uri_item + + +def test_convert_message_parts_malformed_data_uri(): + """Test that malformed data URIs are handled gracefully without crashing""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + # Malformed: missing ;base64, and comma separator + "url": "", + "detail": "high", + }, + } + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRgABAQAAAQABAAD", + } + + +def test_transform_image_url_to_uri(): + """Test that OpenAI image_url with HTTP URL is converted to uri format.""" + content_part = { + "type": "image_url", + "image_url": { + "url": "https://example.com/image.jpg", + "detail": "low", + }, + } + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + +def test_transform_message_content_with_image(): + """Test that message content with image is properly transformed.""" + content = [ + {"type": "text", "text": "What is in this image?"}, + { + "type": "image_url", + "image_url": { + "url": "", + }, + }, + ] + result = _transform_openai_agents_message_content(content) + assert len(result) == 2 + assert result[0] == {"type": "text", "text": "What is in this image?"} + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgoAAAANSUhEUg==", + } + + +def test_transform_input_image_to_blob(): + """Test that OpenAI Agents SDK input_image format is converted to blob format.""" + # OpenAI Agents SDK uses input_image type with image_url as a direct string + content_part = { + "type": "input_image", + "image_url": "", + } + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgoAAAANSUhEUg==", + } + + +def test_transform_input_text_to_text(): + """Test that OpenAI Agents SDK input_text format is normalized to text format.""" + content_part = { + "type": "input_text", + "text": "Hello, world!", + } + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "text", + "text": "Hello, world!", + } diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index e9f3712cd3..9e86aac5d4 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -13,6 +13,7 @@ truncate_and_annotate_messages, truncate_messages_by_size, _find_truncation_index, + parse_data_uri, redact_blob_message_parts, ) from sentry_sdk.serializer import serialize @@ -646,3 +647,69 @@ def test_redacts_blobs_in_multiple_messages(self): assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE assert messages[1]["content"] == "I see the image." # Unchanged assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + + +class TestParseDataUri: + """Tests for the parse_data_uri utility function.""" + + def test_standard_base64_image(self): + """Test parsing a standard base64 encoded image data URI.""" + url = "" + mime_type, content = parse_data_uri(url) + assert mime_type == "image/jpeg" + assert content == "/9j/4AAQSkZJRg==" + + def test_png_image(self): + """Test parsing a PNG image data URI.""" + url = "" + mime_type, content = parse_data_uri(url) + assert mime_type == "image/png" + assert content == "iVBORw0KGgo=" + + def test_plain_text_without_base64(self): + """Test parsing a plain text data URI without base64 encoding.""" + url = "data:text/plain,Hello%20World" + mime_type, content = parse_data_uri(url) + assert mime_type == "text/plain" + assert content == "Hello%20World" + + def test_no_mime_type_with_base64(self): + """Test parsing a data URI with no mime type but base64 encoding.""" + url = "data:;base64,SGVsbG8=" + mime_type, content = parse_data_uri(url) + assert mime_type == "" + assert content == "SGVsbG8=" + + def test_no_mime_type_no_base64(self): + """Test parsing a minimal data URI.""" + url = "data:,Hello" + mime_type, content = parse_data_uri(url) + assert mime_type == "" + assert content == "Hello" + + def test_content_with_commas(self): + """Test that content with commas is handled correctly.""" + url = "data:text/csv,a,b,c,d" + mime_type, content = parse_data_uri(url) + assert mime_type == "text/csv" + assert content == "a,b,c,d" + + def test_missing_comma_raises_value_error(self): + """Test that a data URI without a comma raises ValueError.""" + url = "data:image/jpeg" + with pytest.raises(ValueError, match="missing comma separator"): + parse_data_uri(url) + + def test_empty_content(self): + """Test parsing a data URI with empty content.""" + url = "data:text/plain," + mime_type, content = parse_data_uri(url) + assert mime_type == "text/plain" + assert content == "" + + def test_mime_type_with_charset(self): + """Test parsing a data URI with charset parameter.""" + url = "data:text/html;charset=utf-8,

Hello

" + mime_type, content = parse_data_uri(url) + assert mime_type == "text/html" + assert content == "

Hello

"