getsentry · alexander-alderman-webb · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
@@ -15,7 +15,6 @@
 import sentry_sdk
 from sentry_sdk.utils import logger
 
-MAX_GEN_AI_MESSAGE_BYTES = 20_000  # 20KB
 # Maximum characters when only a single message is left after bytes truncation
 MAX_SINGLE_MESSAGE_CONTENT_CHARS = 10_000
 
@@ -550,22 +549,6 @@ def _truncate_single_message_content_if_present(
     return message
 
 
-def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> int:
-    """
-    Find the index of the first message that would exceed the max bytes limit.
-    Compute the individual message sizes, and return the index of the first message from the back
-    of the list that would exceed the max bytes limit.
-    """
-    running_sum = 0
-    for idx in range(len(messages) - 1, -1, -1):
-        size = len(json.dumps(messages[idx], separators=(",", ":")).encode("utf-8"))
-        running_sum += size
-        if running_sum > max_bytes:
-            return idx + 1
-
-    return 0
-
-
 def redact_blob_message_parts(
     messages: "List[Dict[str, Any]]",
 ) -> "List[Dict[str, Any]]":
@@ -645,55 +628,21 @@ def redact_blob_message_parts(
     return messages_copy
 
 
-def truncate_messages_by_size(
-    messages: "List[Dict[str, Any]]",
-    max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
-    max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS,
-) -> "Tuple[List[Dict[str, Any]], int]":
-    """
-    Returns a truncated messages list, consisting of
-    - the last message, with its content truncated to `max_single_message_chars` characters,
-      if the last message's size exceeds `max_bytes` bytes; otherwise,
-    - the maximum number of messages, starting from the end of the `messages` list, whose total
-      serialized size does not exceed `max_bytes` bytes.
-
-    In the single message case, the serialized message size may exceed `max_bytes`, because
-    truncation is based only on character count in that case.
-    """
-    serialized_json = json.dumps(messages, separators=(",", ":"))
-    current_size = len(serialized_json.encode("utf-8"))
-
-    if current_size <= max_bytes:
-        return messages, 0
-
-    truncation_index = _find_truncation_index(messages, max_bytes)
-    if truncation_index < len(messages):
-        truncated_messages = messages[truncation_index:]
-    else:
-        truncation_index = len(messages) - 1
-        truncated_messages = messages[-1:]
-
-    if len(truncated_messages) == 1:
-        truncated_messages[0] = _truncate_single_message_content_if_present(
-            deepcopy(truncated_messages[0]), max_chars=max_single_message_chars
-        )
-
-    return truncated_messages, truncation_index
-
-
 def truncate_and_annotate_messages(
     messages: "Optional[List[Dict[str, Any]]]",
     span: "Any",
     scope: "Any",
-    max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
+    max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS,
 ) -> "Optional[List[Dict[str, Any]]]":
     if not messages:
         return None
 
     messages = redact_blob_message_parts(messages)
 
-    truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
-    if removed_count > 0:
+    truncated_message = _truncate_single_message_content_if_present(
+        deepcopy(messages[-1]), max_chars=max_single_message_chars
+    )
+    if len(messages) > 1:
         scope._gen_ai_original_message_count[span.span_id] = len(messages)
 
-    return truncated_messages
+    return [truncated_message]
@@ -899,7 +899,25 @@ def test_set_output_data_with_input_json_delta(sentry_init):
         assert span._data.get(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS) == 30
 
 
-def test_anthropic_message_role_mapping(sentry_init, capture_events):
+# Test messages with mixed roles including "ai" that should be mapped to "assistant"
+@pytest.mark.parametrize(
+    "test_message,expected_role",
+    [
+        ({"role": "system", "content": "You are helpful."}, "system"),
+        ({"role": "user", "content": "Hello"}, "user"),
+        (
+            {"role": "ai", "content": "Hi there!"},
+            "assistant",
+        ),  # Should be mapped to "assistant"
+        (
+            {"role": "assistant", "content": "How can I help?"},
+            "assistant",
+        ),  # Should stay "assistant"
+    ],
+)
+def test_anthropic_message_role_mapping(
+    sentry_init, capture_events, test_message, expected_role
+):
     """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'"""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
@@ -924,13 +942,7 @@ def mock_messages_create(*args, **kwargs):
 
     client.messages._post = mock.Mock(return_value=mock_messages_create())
 
-    # Test messages with mixed roles including "ai" that should be mapped to "assistant"
-    test_messages = [
-        {"role": "system", "content": "You are helpful."},
-        {"role": "user", "content": "Hello"},
-        {"role": "ai", "content": "Hi there!"},  # Should be mapped to "assistant"
-        {"role": "assistant", "content": "How can I help?"},  # Should stay "assistant"
-    ]
+    test_messages = [test_message]
 
     with start_transaction(name="anthropic tx"):
         client.messages.create(
@@ -948,22 +960,7 @@ def mock_messages_create(*args, **kwargs):
     # Parse the stored messages
     stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    # Verify that "ai" role was mapped to "assistant"
-    assert len(stored_messages) == 4
-    assert stored_messages[0]["role"] == "system"
-    assert stored_messages[1]["role"] == "user"
-    assert (
-        stored_messages[2]["role"] == "assistant"
-    )  # "ai" should be mapped to "assistant"
-    assert stored_messages[3]["role"] == "assistant"  # should stay "assistant"
-
-    # Verify content is preserved
-    assert stored_messages[2]["content"] == "Hi there!"
-    assert stored_messages[3]["content"] == "How can I help?"
-
-    # Verify no "ai" roles remain
-    roles = [msg["role"] for msg in stored_messages]
-    assert "ai" not in roles
+    assert stored_messages[0]["role"] == expected_role
 
 
 def test_anthropic_message_truncation(sentry_init, capture_events):
@@ -1010,9 +1007,8 @@ def test_anthropic_message_truncation(sentry_init, capture_events):
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) == 2
-    assert "small message 4" in str(parsed_messages[0])
-    assert "small message 5" in str(parsed_messages[1])
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
@@ -1076,13 +1072,9 @@ def test_nonstreaming_create_message_with_system_prompt(
     if send_default_pii and include_prompts:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
         stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
-        assert len(stored_messages) == 2
-        # System message should be first
-        assert stored_messages[0]["role"] == "system"
-        assert stored_messages[0]["content"] == "You are a helpful assistant."
-        # User message should be second
-        assert stored_messages[1]["role"] == "user"
-        assert stored_messages[1]["content"] == "Hello, Claude"
+        assert len(stored_messages) == 1
+        assert stored_messages[0]["role"] == "user"
+        assert stored_messages[0]["content"] == "Hello, Claude"
         assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
@@ -1155,13 +1147,9 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
     if send_default_pii and include_prompts:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
         stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
-        assert len(stored_messages) == 2
-        # System message should be first
-        assert stored_messages[0]["role"] == "system"
-        assert stored_messages[0]["content"] == "You are a helpful assistant."
-        # User message should be second
-        assert stored_messages[1]["role"] == "user"
-        assert stored_messages[1]["content"] == "Hello, Claude"
+        assert len(stored_messages) == 1
+        assert stored_messages[0]["role"] == "user"
+        assert stored_messages[0]["content"] == "Hello, Claude"
         assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
@@ -1266,13 +1254,9 @@ def test_streaming_create_message_with_system_prompt(
     if send_default_pii and include_prompts:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
         stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
-        assert len(stored_messages) == 2
-        # System message should be first
-        assert stored_messages[0]["role"] == "system"
-        assert stored_messages[0]["content"] == "You are a helpful assistant."
-        # User message should be second
-        assert stored_messages[1]["role"] == "user"
-        assert stored_messages[1]["content"] == "Hello, Claude"
+        assert len(stored_messages) == 1
+        assert stored_messages[0]["role"] == "user"
+        assert stored_messages[0]["content"] == "Hello, Claude"
         assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
@@ -1381,13 +1365,9 @@ async def test_streaming_create_message_with_system_prompt_async(
     if send_default_pii and include_prompts:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
         stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
-        assert len(stored_messages) == 2
-        # System message should be first
-        assert stored_messages[0]["role"] == "system"
-        assert stored_messages[0]["content"] == "You are a helpful assistant."
-        # User message should be second
-        assert stored_messages[1]["role"] == "user"
-        assert stored_messages[1]["content"] == "Hello, Claude"
+        assert len(stored_messages) == 1
+        assert stored_messages[0]["role"] == "user"
+        assert stored_messages[0]["content"] == "Hello, Claude"
         assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
@@ -1400,60 +1380,6 @@ async def test_streaming_create_message_with_system_prompt_async(
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
-def test_system_prompt_with_complex_structure(sentry_init, capture_events):
-    """Test that complex system prompt structures (list of text blocks) are properly captured."""
-    sentry_init(
-        integrations=[AnthropicIntegration(include_prompts=True)],
-        traces_sample_rate=1.0,
-        send_default_pii=True,
-    )
-    events = capture_events()
-    client = Anthropic(api_key="z")
-    client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
-
-    # System prompt as list of text blocks
-    system_prompt = [
-        {"type": "text", "text": "You are a helpful assistant."},
-        {"type": "text", "text": "Be concise and clear."},
-    ]
-
-    messages = [
-        {
-            "role": "user",
-            "content": "Hello",
-        }
-    ]
-
-    with start_transaction(name="anthropic"):
-        response = client.messages.create(
-            max_tokens=1024, messages=messages, model="model", system=system_prompt
-        )
-
-    assert response == EXAMPLE_MESSAGE
-    assert len(events) == 1
-    (event,) = events
-
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
-
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
-
-    # Should have system message first, then user message
-    assert len(stored_messages) == 2
-    assert stored_messages[0]["role"] == "system"
-    # System content should be a list of text blocks
-    assert isinstance(stored_messages[0]["content"], list)
-    assert len(stored_messages[0]["content"]) == 2
-    assert stored_messages[0]["content"][0]["type"] == "text"
-    assert stored_messages[0]["content"][0]["text"] == "You are a helpful assistant."
-    assert stored_messages[0]["content"][1]["type"] == "text"
-    assert stored_messages[0]["content"][1]["text"] == "Be concise and clear."
-    assert stored_messages[1]["role"] == "user"
-    assert stored_messages[1]["content"] == "Hello"
-
-
 # Tests for transform_content_part (shared) and _transform_anthropic_content_block helper functions