diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index a4ebe96d99..51a75b1706 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -15,7 +15,6 @@ import sentry_sdk from sentry_sdk.utils import logger -MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB # Maximum characters when only a single message is left after bytes truncation MAX_SINGLE_MESSAGE_CONTENT_CHARS = 10_000 @@ -550,22 +549,6 @@ def _truncate_single_message_content_if_present( return message -def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> int: - """ - Find the index of the first message that would exceed the max bytes limit. - Compute the individual message sizes, and return the index of the first message from the back - of the list that would exceed the max bytes limit. - """ - running_sum = 0 - for idx in range(len(messages) - 1, -1, -1): - size = len(json.dumps(messages[idx], separators=(",", ":")).encode("utf-8")) - running_sum += size - if running_sum > max_bytes: - return idx + 1 - - return 0 - - def redact_blob_message_parts( messages: "List[Dict[str, Any]]", ) -> "List[Dict[str, Any]]": @@ -645,55 +628,21 @@ def redact_blob_message_parts( return messages_copy -def truncate_messages_by_size( - messages: "List[Dict[str, Any]]", - max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, - max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, -) -> "Tuple[List[Dict[str, Any]], int]": - """ - Returns a truncated messages list, consisting of - - the last message, with its content truncated to `max_single_message_chars` characters, - if the last message's size exceeds `max_bytes` bytes; otherwise, - - the maximum number of messages, starting from the end of the `messages` list, whose total - serialized size does not exceed `max_bytes` bytes. - - In the single message case, the serialized message size may exceed `max_bytes`, because - truncation is based only on character count in that case. - """ - serialized_json = json.dumps(messages, separators=(",", ":")) - current_size = len(serialized_json.encode("utf-8")) - - if current_size <= max_bytes: - return messages, 0 - - truncation_index = _find_truncation_index(messages, max_bytes) - if truncation_index < len(messages): - truncated_messages = messages[truncation_index:] - else: - truncation_index = len(messages) - 1 - truncated_messages = messages[-1:] - - if len(truncated_messages) == 1: - truncated_messages[0] = _truncate_single_message_content_if_present( - deepcopy(truncated_messages[0]), max_chars=max_single_message_chars - ) - - return truncated_messages, truncation_index - - def truncate_and_annotate_messages( messages: "Optional[List[Dict[str, Any]]]", span: "Any", scope: "Any", - max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, + max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS, ) -> "Optional[List[Dict[str, Any]]]": if not messages: return None messages = redact_blob_message_parts(messages) - truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes) - if removed_count > 0: + truncated_message = _truncate_single_message_content_if_present( + deepcopy(messages[-1]), max_chars=max_single_message_chars + ) + if len(messages) > 1: scope._gen_ai_original_message_count[span.span_id] = len(messages) - return truncated_messages + return [truncated_message] diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index e8bc4648b6..e197475d5b 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -899,7 +899,25 @@ def test_set_output_data_with_input_json_delta(sentry_init): assert span._data.get(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS) == 30 -def test_anthropic_message_role_mapping(sentry_init, capture_events): +# Test messages with mixed roles including "ai" that should be mapped to "assistant" +@pytest.mark.parametrize( + "test_message,expected_role", + [ + ({"role": "system", "content": "You are helpful."}, "system"), + ({"role": "user", "content": "Hello"}, "user"), + ( + {"role": "ai", "content": "Hi there!"}, + "assistant", + ), # Should be mapped to "assistant" + ( + {"role": "assistant", "content": "How can I help?"}, + "assistant", + ), # Should stay "assistant" + ], +) +def test_anthropic_message_role_mapping( + sentry_init, capture_events, test_message, expected_role +): """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], @@ -924,13 +942,7 @@ def mock_messages_create(*args, **kwargs): client.messages._post = mock.Mock(return_value=mock_messages_create()) - # Test messages with mixed roles including "ai" that should be mapped to "assistant" - test_messages = [ - {"role": "system", "content": "You are helpful."}, - {"role": "user", "content": "Hello"}, - {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" - {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" - ] + test_messages = [test_message] with start_transaction(name="anthropic tx"): client.messages.create( @@ -948,22 +960,7 @@ def mock_messages_create(*args, **kwargs): # Parse the stored messages stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - # Verify that "ai" role was mapped to "assistant" - assert len(stored_messages) == 4 - assert stored_messages[0]["role"] == "system" - assert stored_messages[1]["role"] == "user" - assert ( - stored_messages[2]["role"] == "assistant" - ) # "ai" should be mapped to "assistant" - assert stored_messages[3]["role"] == "assistant" # should stay "assistant" - - # Verify content is preserved - assert stored_messages[2]["content"] == "Hi there!" - assert stored_messages[3]["content"] == "How can I help?" - - # Verify no "ai" roles remain - roles = [msg["role"] for msg in stored_messages] - assert "ai" not in roles + assert stored_messages[0]["role"] == expected_role def test_anthropic_message_truncation(sentry_init, capture_events): @@ -1010,9 +1007,8 @@ def test_anthropic_message_truncation(sentry_init, capture_events): parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 2 - assert "small message 4" in str(parsed_messages[0]) - assert "small message 5" in str(parsed_messages[1]) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -1076,13 +1072,9 @@ def test_nonstreaming_create_message_with_system_prompt( if send_default_pii and include_prompts: assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] @@ -1155,13 +1147,9 @@ async def test_nonstreaming_create_message_with_system_prompt_async( if send_default_pii and include_prompts: assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] @@ -1266,13 +1254,9 @@ def test_streaming_create_message_with_system_prompt( if send_default_pii and include_prompts: assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: @@ -1381,13 +1365,9 @@ async def test_streaming_create_message_with_system_prompt_async( if send_default_pii and include_prompts: assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: @@ -1400,60 +1380,6 @@ async def test_streaming_create_message_with_system_prompt_async( assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True -def test_system_prompt_with_complex_structure(sentry_init, capture_events): - """Test that complex system prompt structures (list of text blocks) are properly captured.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - events = capture_events() - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - # System prompt as list of text blocks - system_prompt = [ - {"type": "text", "text": "You are a helpful assistant."}, - {"type": "text", "text": "Be concise and clear."}, - ] - - messages = [ - { - "role": "user", - "content": "Hello", - } - ] - - with start_transaction(name="anthropic"): - response = client.messages.create( - max_tokens=1024, messages=messages, model="model", system=system_prompt - ) - - assert response == EXAMPLE_MESSAGE - assert len(events) == 1 - (event,) = events - - assert len(event["spans"]) == 1 - (span,) = event["spans"] - - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - - # Should have system message first, then user message - assert len(stored_messages) == 2 - assert stored_messages[0]["role"] == "system" - # System content should be a list of text blocks - assert isinstance(stored_messages[0]["content"], list) - assert len(stored_messages[0]["content"]) == 2 - assert stored_messages[0]["content"][0]["type"] == "text" - assert stored_messages[0]["content"][0]["text"] == "You are a helpful assistant." - assert stored_messages[0]["content"][1]["type"] == "text" - assert stored_messages[0]["content"][1]["text"] == "Be concise and clear." - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello" - - # Tests for transform_content_part (shared) and _transform_anthropic_content_block helper functions diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index ad89b878ea..ba296871d5 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -234,9 +234,8 @@ def test_generate_content_with_system_instruction( messages_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] # Parse the JSON string to verify content messages = json.loads(messages_str) - assert len(messages) == 2 - assert messages[0] == {"role": "system", "content": "You are a helpful assistant"} - assert messages[1] == {"role": "user", "content": "What is 2+2?"} + assert len(messages) == 1 + assert messages[0] == {"role": "user", "content": "What is 2+2?"} def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client): @@ -1459,60 +1458,6 @@ def test_generate_content_with_content_object( ] -def test_generate_content_with_conversation_history( - sentry_init, capture_events, mock_genai_client -): - """Test generate_content with list of Content objects (conversation history).""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - events = capture_events() - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # Create conversation history - contents = [ - genai_types.Content( - role="user", parts=[genai_types.Part(text="What is the capital of France?")] - ), - genai_types.Content( - role="model", - parts=[genai_types.Part(text="The capital of France is Paris.")], - ), - genai_types.Content( - role="user", parts=[genai_types.Part(text="What about Germany?")] - ), - ] - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - (event,) = events - invoke_span = event["spans"][0] - - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 3 - assert messages[0]["role"] == "user" - assert messages[0]["content"] == [ - {"text": "What is the capital of France?", "type": "text"} - ] - assert ( - messages[1]["role"] == "assistant" - ) # "model" should be normalized to "assistant" - assert messages[1]["content"] == [ - {"text": "The capital of France is Paris.", "type": "text"} - ] - assert messages[2]["role"] == "user" - assert messages[2]["content"] == [{"text": "What about Germany?", "type": "text"}] - - def test_generate_content_with_dict_format( sentry_init, capture_events, mock_genai_client ): @@ -1682,17 +1627,12 @@ def test_generate_content_with_function_response( invoke_span = event["spans"][0] messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 2 + assert len(messages) == 1 # First message is user message - assert messages[0]["role"] == "user" - assert messages[0]["content"] == [ - {"text": "What's the weather in Paris?", "type": "text"} - ] - # Second message is tool message - assert messages[1]["role"] == "tool" - assert messages[1]["content"]["toolCallId"] == "call_123" - assert messages[1]["content"]["toolName"] == "get_weather" - assert messages[1]["content"]["output"] == '"Sunny, 72F"' + assert messages[0]["role"] == "tool" + assert messages[0]["content"]["toolCallId"] == "call_123" + assert messages[0]["content"]["toolName"] == "get_weather" + assert messages[0]["content"]["output"] == '"Sunny, 72F"' def test_generate_content_with_mixed_string_and_content( @@ -1733,18 +1673,10 @@ def test_generate_content_with_mixed_string_and_content( invoke_span = event["spans"][0] messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 3 - # String becomes user message - assert messages[0]["role"] == "user" - assert messages[0]["content"] == "Hello, this is a string message" - # Model role normalized to assistant - assert messages[1]["role"] == "assistant" - assert messages[1]["content"] == [ - {"text": "Hi! How can I help you?", "type": "text"} - ] + assert len(messages) == 1 # User message - assert messages[2]["role"] == "user" - assert messages[2]["content"] == [{"text": "Tell me a joke", "type": "text"}] + assert messages[0]["role"] == "user" + assert messages[0]["content"] == [{"text": "Tell me a joke", "type": "text"}] def test_generate_content_with_part_object_directly( @@ -1812,13 +1744,9 @@ def test_generate_content_with_list_of_dicts( invoke_span = event["spans"][0] messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 3 + assert len(messages) == 1 assert messages[0]["role"] == "user" - assert messages[0]["content"] == [{"text": "First user message", "type": "text"}] - assert messages[1]["role"] == "assistant" - assert messages[1]["content"] == [{"text": "First model response", "type": "text"}] - assert messages[2]["role"] == "user" - assert messages[2]["content"] == [{"text": "Second user message", "type": "text"}] + assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}] def test_generate_content_with_dict_inline_data( diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 6f5f9f14a1..e03c4db940 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -218,15 +218,14 @@ def test_langchain_agent( if send_default_pii and include_prompts: assert ( - "You are very powerful" + "How many letters in the word" in chat_spans[0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] ) assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) assert ( - "You are very powerful" - in chat_spans[1]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + "get_word_length" in chat_spans[1]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] ) assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] @@ -1035,9 +1034,8 @@ def test_langchain_message_truncation(sentry_init, capture_events): parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 2 - assert "small message 4" in str(parsed_messages[0]) - assert "small message 5" in str(parsed_messages[1]) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py index 99ab216957..2a385d8a78 100644 --- a/tests/integrations/langgraph/test_langgraph.py +++ b/tests/integrations/langgraph/test_langgraph.py @@ -270,9 +270,8 @@ def original_invoke(self, *args, **kwargs): import json request_messages = json.loads(request_messages) - assert len(request_messages) == 2 - assert request_messages[0]["content"] == "Hello, can you help me?" - assert request_messages[1]["content"] == "Of course! How can I assist you?" + assert len(request_messages) == 1 + assert request_messages[0]["content"] == "Of course! How can I assist you?" response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert response_text == expected_assistant_response @@ -1383,7 +1382,6 @@ def original_invoke(self, *args, **kwargs): parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 2 - assert "small message 4" in str(parsed_messages[0]) - assert "small message 5" in str(parsed_messages[1]) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 8849ab0372..0c2b349cff 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -344,11 +344,7 @@ def test_embeddings_create_with_list_input( assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" # Check that list of embeddings input is captured (it's JSON serialized) embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] - assert json.loads(embeddings_input) == [ - "First text", - "Second text", - "Third text", - ] + assert json.loads(embeddings_input) == ["Third text"] def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache): @@ -752,9 +748,8 @@ def test_litellm_message_truncation(sentry_init, capture_events): parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 2 - assert "small message 4" in str(parsed_messages[0]) - assert "small message 5" in str(parsed_messages[1]) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 814289c887..f6ffb24f80 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -44,7 +44,6 @@ OpenAIIntegration, _calculate_token_usage, ) -from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES from sentry_sdk._types import AnnotatedValue from sentry_sdk.serializer import serialize @@ -1458,7 +1457,25 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): assert "gen_ai.request.available_tools" not in span["data"] -def test_openai_message_role_mapping(sentry_init, capture_events): +# Test messages with mixed roles including "ai" that should be mapped to "assistant" +@pytest.mark.parametrize( + "test_message,expected_role", + [ + ({"role": "system", "content": "You are helpful."}, "system"), + ({"role": "user", "content": "Hello"}, "user"), + ( + {"role": "ai", "content": "Hi there!"}, + "assistant", + ), # Should be mapped to "assistant" + ( + {"role": "assistant", "content": "How can I help?"}, + "assistant", + ), # Should stay "assistant" + ], +) +def test_openai_message_role_mapping( + sentry_init, capture_events, test_message, expected_role +): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( @@ -1470,13 +1487,8 @@ def test_openai_message_role_mapping(sentry_init, capture_events): client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - # Test messages with mixed roles including "ai" that should be mapped to "assistant" - test_messages = [ - {"role": "system", "content": "You are helpful."}, - {"role": "user", "content": "Hello"}, - {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" - {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" - ] + + test_messages = [test_message] with start_transaction(name="openai tx"): client.chat.completions.create(model="test-model", messages=test_messages) @@ -1491,22 +1503,8 @@ def test_openai_message_role_mapping(sentry_init, capture_events): stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - # Verify that "ai" role was mapped to "assistant" - assert len(stored_messages) == 4 - assert stored_messages[0]["role"] == "system" - assert stored_messages[1]["role"] == "user" - assert ( - stored_messages[2]["role"] == "assistant" - ) # "ai" should be mapped to "assistant" - assert stored_messages[3]["role"] == "assistant" # should stay "assistant" - - # Verify content is preserved - assert stored_messages[2]["content"] == "Hi there!" - assert stored_messages[3]["content"] == "How can I help?" - - # Verify no "ai" roles remain - roles = [msg["role"] for msg in stored_messages] - assert "ai" not in roles + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == expected_role def test_openai_message_truncation(sentry_init, capture_events): @@ -1548,14 +1546,7 @@ def test_openai_message_truncation(sentry_init, capture_events): assert isinstance(parsed_messages, list) assert len(parsed_messages) <= len(large_messages) - if "_meta" in event and len(parsed_messages) < len(large_messages): - meta_path = event["_meta"] - if ( - "spans" in meta_path - and "0" in meta_path["spans"] - and "data" in meta_path["spans"]["0"] - ): - span_meta = meta_path["spans"]["0"]["data"] - if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta: - messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "len" in messages_meta.get("", {}) + meta_path = event["_meta"] + span_meta = meta_path["spans"]["0"]["data"] + messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "len" in messages_meta.get("", {}) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 9d463f8de5..da4af4ebdd 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -189,12 +189,6 @@ async def test_agent_invocation_span( assert invoke_agent_span["description"] == "invoke_agent test_agent" assert invoke_agent_span["data"]["gen_ai.request.messages"] == safe_serialize( [ - { - "content": [ - {"text": "You are a helpful test assistant.", "type": "text"} - ], - "role": "system", - }, {"content": [{"text": "Test input", "type": "text"}], "role": "user"}, ] ) @@ -617,12 +611,6 @@ def simple_test_tool(message: str) -> str: assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, { "role": "user", "content": [ @@ -688,30 +676,6 @@ def simple_test_tool(message: str) -> str: assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize( [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - { - "role": "assistant", - "content": [ - { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - } - ], - }, { "role": "tool", "content": [ @@ -982,12 +946,6 @@ async def test_error_captures_input_data(sentry_init, capture_events, test_agent assert "gen_ai.request.messages" in ai_client_span["data"] request_messages = safe_serialize( [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, ] ) @@ -1366,7 +1324,25 @@ async def run(): assert txn3["transaction"] == "test_agent workflow" -def test_openai_agents_message_role_mapping(sentry_init, capture_events): +# Test input messages with mixed roles including "ai" +@pytest.mark.parametrize( + "test_message,expected_role", + [ + ({"role": "system", "content": "You are helpful."}, "system"), + ({"role": "user", "content": "Hello"}, "user"), + ( + {"role": "ai", "content": "Hi there!"}, + "assistant", + ), # Should be mapped to "assistant" + ( + {"role": "assistant", "content": "How can I help?"}, + "assistant", + ), # Should stay "assistant" + ], +) +def test_openai_agents_message_role_mapping( + sentry_init, capture_events, test_message, expected_role +): """Test that OpenAI Agents integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( integrations=[OpenAIAgentsIntegration()], @@ -1374,15 +1350,7 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events): send_default_pii=True, ) - # Test input messages with mixed roles including "ai" - test_input = [ - {"role": "system", "content": "You are helpful."}, - {"role": "user", "content": "Hello"}, - {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" - {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" - ] - - get_response_kwargs = {"input": test_input} + get_response_kwargs = {"input": [test_message]} from sentry_sdk.integrations.openai_agents.utils import _set_input_data from sentry_sdk import start_span @@ -1393,23 +1361,10 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events): # Verify that messages were processed and roles were mapped from sentry_sdk.consts import SPANDATA - if SPANDATA.GEN_AI_REQUEST_MESSAGES in span._data: - import json - - stored_messages = json.loads(span._data[SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span._data[SPANDATA.GEN_AI_REQUEST_MESSAGES]) - # Verify roles were properly mapped - found_assistant_roles = 0 - for message in stored_messages: - if message["role"] == "assistant": - found_assistant_roles += 1 - - # Should have 2 assistant roles (1 from original "assistant", 1 from mapped "ai") - assert found_assistant_roles == 2 - - # Verify no "ai" roles remain in any message - for message in stored_messages: - assert message["role"] != "ai" + # Verify roles were properly mapped + assert stored_messages[0]["role"] == expected_role @pytest.mark.asyncio @@ -2108,11 +2063,10 @@ def test_openai_agents_message_truncation(sentry_init, capture_events): with start_span(op="gen_ai.chat") as span: scope = sentry_sdk.get_current_scope() _set_input_data(span, get_response_kwargs) - if hasattr(scope, "_gen_ai_original_message_count"): - truncated_count = scope._gen_ai_original_message_count.get(span.span_id) - assert truncated_count == 5, ( - f"Expected 5 original messages, got {truncated_count}" - ) + truncated_count = scope._gen_ai_original_message_count.get(span.span_id) + assert truncated_count == 5, ( + f"Expected 5 original messages, got {truncated_count}" + ) assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span._data messages_data = span._data[SPANDATA.GEN_AI_REQUEST_MESSAGES] @@ -2120,6 +2074,5 @@ def test_openai_agents_message_truncation(sentry_init, capture_events): parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 2 - assert "small message 4" in str(parsed_messages[0]) - assert "small message 5" in str(parsed_messages[1]) + assert len(parsed_messages) == 1 + assert "small message 5" in str(parsed_messages[0]) diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 575eae35cc..86e524dc7b 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -513,43 +513,6 @@ async def test_model_settings(sentry_init, capture_events, test_agent_with_setti assert chat_span["data"].get("gen_ai.request.top_p") == 0.9 -@pytest.mark.asyncio -async def test_system_prompt_in_messages(sentry_init, capture_events): - """ - Test that system prompts are included as the first message. - """ - agent = Agent( - "test", - name="test_system", - system_prompt="You are a helpful assistant specialized in testing.", - ) - - sentry_init( - integrations=[PydanticAIIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) - - events = capture_events() - - await agent.run("Hello") - - (transaction,) = events - spans = transaction["spans"] - - # The transaction IS the invoke_agent span, check for messages in chat spans instead - chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] - assert len(chat_spans) >= 1 - - chat_span = chat_spans[0] - messages_str = chat_span["data"]["gen_ai.request.messages"] - - # Messages is serialized as a string - # Should contain system role and helpful assistant text - assert "system" in messages_str - assert "helpful assistant" in messages_str - - @pytest.mark.asyncio async def test_error_handling(sentry_init, capture_events): """ @@ -1183,44 +1146,6 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events): assert "Second part" in messages_str -@pytest.mark.asyncio -async def test_invoke_agent_with_instructions(sentry_init, capture_events): - """ - Test that invoke_agent span handles instructions correctly. - """ - from pydantic_ai import Agent - - # Create agent with instructions (can be string or list) - agent = Agent( - "test", - name="test_instructions", - ) - - # Add instructions via _instructions attribute (internal API) - agent._instructions = ["Instruction 1", "Instruction 2"] - agent._system_prompts = ["System prompt"] - - sentry_init( - integrations=[PydanticAIIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) - - events = capture_events() - - await agent.run("Test input") - - (transaction,) = events - - # Check that the invoke_agent transaction has messages data - if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]: - messages_str = transaction["contexts"]["trace"]["data"][ - "gen_ai.request.messages" - ] - # Should contain both instructions and system prompts - assert "Instruction" in messages_str or "System prompt" in messages_str - - @pytest.mark.asyncio async def test_model_name_extraction_with_callable(sentry_init, capture_events): """ diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index f6852d54bb..767d79b747 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -11,12 +11,9 @@ ) from sentry_sdk.ai.monitoring import ai_track from sentry_sdk.ai.utils import ( - MAX_GEN_AI_MESSAGE_BYTES, MAX_SINGLE_MESSAGE_CONTENT_CHARS, set_data_normalized, truncate_and_annotate_messages, - truncate_messages_by_size, - _find_truncation_index, parse_data_uri, redact_blob_message_parts, get_modality_from_mime_type, @@ -222,127 +219,7 @@ def large_messages(): ] -class TestTruncateMessagesBySize: - def test_no_truncation_needed(self, sample_messages): - """Test that messages under the limit are not truncated""" - result, truncation_index = truncate_messages_by_size( - sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES - ) - assert len(result) == len(sample_messages) - assert result == sample_messages - assert truncation_index == 0 - - def test_truncation_removes_oldest_first(self, large_messages): - """Test that oldest messages are removed first during truncation""" - small_limit = 3000 - result, truncation_index = truncate_messages_by_size( - large_messages, max_bytes=small_limit - ) - assert len(result) < len(large_messages) - - assert result[-1] == large_messages[-1] - assert truncation_index == len(large_messages) - len(result) - - def test_empty_messages_list(self): - """Test handling of empty messages list""" - result, truncation_index = truncate_messages_by_size( - [], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500 - ) - assert result == [] - assert truncation_index == 0 - - def test_find_truncation_index( - self, - ): - """Test that the truncation index is found correctly""" - # when represented in JSON, these are each 7 bytes long - messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5] - truncation_index = _find_truncation_index(messages, 20) - assert truncation_index == 3 - assert messages[truncation_index:] == ["D" * 5, "E" * 5] - - messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5] - truncation_index = _find_truncation_index(messages, 40) - assert truncation_index == 0 - assert messages[truncation_index:] == [ - "A" * 5, - "B" * 5, - "C" * 5, - "D" * 5, - "E" * 5, - ] - - def test_progressive_truncation(self, large_messages): - """Test that truncation works progressively with different limits""" - limits = [ - MAX_GEN_AI_MESSAGE_BYTES // 5, - MAX_GEN_AI_MESSAGE_BYTES // 10, - MAX_GEN_AI_MESSAGE_BYTES // 25, - MAX_GEN_AI_MESSAGE_BYTES // 100, - MAX_GEN_AI_MESSAGE_BYTES // 500, - ] - prev_count = len(large_messages) - - for limit in limits: - result = truncate_messages_by_size(large_messages, max_bytes=limit) - current_count = len(result) - - assert current_count <= prev_count - assert current_count >= 1 - prev_count = current_count - - def test_single_message_truncation(self): - large_content = "This is a very long message. " * 10_000 - - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": large_content}, - ] - - result, truncation_index = truncate_messages_by_size( - messages, max_single_message_chars=MAX_SINGLE_MESSAGE_CONTENT_CHARS - ) - - assert len(result) == 1 - assert ( - len(result[0]["content"].rstrip("...")) <= MAX_SINGLE_MESSAGE_CONTENT_CHARS - ) - - # If the last message is too large, the system message is not present - system_msgs = [m for m in result if m.get("role") == "system"] - assert len(system_msgs) == 0 - - # Confirm the user message is truncated with '...' - user_msgs = [m for m in result if m.get("role") == "user"] - assert len(user_msgs) == 1 - assert user_msgs[0]["content"].endswith("...") - assert len(user_msgs[0]["content"]) < len(large_content) - - class TestTruncateAndAnnotateMessages: - def test_no_truncation_returns_list(self, sample_messages): - class MockSpan: - def __init__(self): - self.span_id = "test_span_id" - self.data = {} - - def set_data(self, key, value): - self.data[key] = value - - class MockScope: - def __init__(self): - self._gen_ai_original_message_count = {} - - span = MockSpan() - scope = MockScope() - result = truncate_and_annotate_messages(sample_messages, span, scope) - - assert isinstance(result, list) - assert not isinstance(result, AnnotatedValue) - assert len(result) == len(sample_messages) - assert result == sample_messages - assert span.span_id not in scope._gen_ai_original_message_count - def test_truncation_sets_metadata_on_scope(self, large_messages): class MockSpan: def __init__(self): @@ -361,7 +238,7 @@ def __init__(self): scope = MockScope() original_count = len(large_messages) result = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) assert isinstance(result, list) @@ -388,7 +265,7 @@ def __init__(self): scope = MockScope() result = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) assert scope._gen_ai_original_message_count[span.span_id] == original_count @@ -415,6 +292,47 @@ def __init__(self): result = truncate_and_annotate_messages(None, span, scope) assert result is None + def test_single_message_truncation(self, large_messages): + class MockSpan: + def __init__(self): + self.span_id = "test_span_id" + self.data = {} + + def set_data(self, key, value): + self.data[key] = value + + class MockScope: + def __init__(self): + self._gen_ai_original_message_count = {} + + large_content = "This is a very long message. " * 10_000 + + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + ] + + span = MockSpan() + scope = MockScope() + result = truncate_and_annotate_messages( + messages, + span, + scope, + max_single_message_chars=MAX_SINGLE_MESSAGE_CONTENT_CHARS, + ) + assert result is not None + + assert len(result) == 1 + assert ( + len(result[0]["content"].rstrip("...")) <= MAX_SINGLE_MESSAGE_CONTENT_CHARS + ) + + # Confirm the user message is truncated with '...' + user_msgs = [m for m in result if m.get("role") == "user"] + assert len(user_msgs) == 1 + assert user_msgs[0]["content"].endswith("...") + assert len(user_msgs[0]["content"]) < len(large_content) + def test_truncated_messages_newest_first(self, large_messages): class MockSpan: def __init__(self): @@ -432,7 +350,7 @@ def __init__(self): span = MockSpan() scope = MockScope() result = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit + large_messages, span, scope, max_single_message_chars=small_limit ) assert isinstance(result, list) @@ -500,15 +418,12 @@ class MockScope: def __init__(self): self._gen_ai_original_message_count = {} - small_limit = 3000 span = MockSpan() scope = MockScope() original_count = len(large_messages) # Simulate what integrations do - truncated_messages = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit - ) + truncated_messages = truncate_and_annotate_messages(large_messages, span, scope) span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, truncated_messages) # Verify metadata was set on scope @@ -557,14 +472,11 @@ class MockScope: def __init__(self): self._gen_ai_original_message_count = {} - small_limit = 3000 span = MockSpan() scope = MockScope() original_message_count = len(large_messages) - truncated_messages = truncate_and_annotate_messages( - large_messages, span, scope, max_bytes=small_limit - ) + truncated_messages = truncate_and_annotate_messages(large_messages, span, scope) assert len(truncated_messages) < original_message_count