diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 7043bbc2ee..ecb8abcd10 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -359,3 +359,7 @@ class SDKInfo(TypedDict): ) HttpStatusCodeRange = Union[int, Container[int]] + + class TextPart(TypedDict): + type: Literal["text"] + content: str diff --git a/sentry_sdk/ai/_openai_completions_api.py b/sentry_sdk/ai/_openai_completions_api.py new file mode 100644 index 0000000000..c77fdb82dc --- /dev/null +++ b/sentry_sdk/ai/_openai_completions_api.py @@ -0,0 +1,50 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openai.types.chat import ( + ChatCompletionMessageParam, + ChatCompletionSystemMessageParam, + ) + from typing import Iterable + + from sentry_sdk._types import TextPart + + +def _is_system_instruction(message: "ChatCompletionMessageParam") -> bool: + return isinstance(message, dict) and message.get("role") == "system" + + +def _get_system_instructions( + messages: "Iterable[ChatCompletionMessageParam]", +) -> "list[ChatCompletionSystemMessageParam]": + system_instructions = [] + + for message in messages: + if _is_system_instruction(message): + system_instructions.append(message) + + return system_instructions + + +def _transform_system_instructions( + system_instructions: "list[ChatCompletionSystemMessageParam]", +) -> "list[TextPart]": + instruction_text_parts: "list[TextPart]" = [] + + for instruction in system_instructions: + if not isinstance(instruction, dict): + continue + + content = instruction.get("content") + + if isinstance(content, str): + instruction_text_parts.append({"type": "text", "content": content}) + + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + text = part.get("text", "") + if text: + instruction_text_parts.append({"type": "text", "content": text}) + + return instruction_text_parts diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 93fca6ba3e..4b61a317fb 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -542,6 +542,12 @@ class SPANDATA: Example: 2048 """ + GEN_AI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions" + """ + The system instructions passed to the model. + Example: [{"type": "text", "text": "You are a helpful assistant."},{"type": "text", "text": "Be concise and clear."}] + """ + GEN_AI_REQUEST_MESSAGES = "gen_ai.request.messages" """ The messages passed to the model. The "content" can be a string or an array of objects. diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index a7af385d12..676c8c8612 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -9,6 +9,11 @@ normalize_message_roles, truncate_and_annotate_messages, ) +from sentry_sdk.ai._openai_completions_api import ( + _get_system_instructions as _get_system_instructions_completions, + _is_system_instruction as _is_system_instruction_completions, + _transform_system_instructions, +) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -34,8 +39,10 @@ Union, ) from sentry_sdk.tracing import Span + from sentry_sdk._types import TextPart - from openai.types.responses import ResponseInputParam + from openai.types.responses import ResponseInputParam, ResponseInputItemParam + from openai import Omit try: try: @@ -193,6 +200,29 @@ def _calculate_token_usage( ) +def _is_system_instruction_responses(message: "ResponseInputItemParam") -> bool: + return ( + isinstance(message, dict) + and message.get("type") == "message" + and message.get("role") == "system" + ) + + +def _get_system_instructions_responses( + messages: "Union[str, ResponseInputParam]", +) -> "list[ResponseInputItemParam]": + if isinstance(messages, str): + return [] + + system_instructions = [] + + for message in messages: + if _is_system_instruction_responses(message): + system_instructions.append(message) + + return system_instructions + + def _get_input_messages( kwargs: "dict[str, Any]", ) -> "Optional[Union[Iterable[Any], list[str]]]": @@ -243,17 +273,48 @@ def _set_responses_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ) -> None: - messages: "Optional[Union[ResponseInputParam, list[str]]]" = _get_input_messages( - kwargs - ) + messages: "Optional[Union[str, ResponseInputParam]]" = kwargs.get("input") + if messages is None: + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + return + + explicit_instructions: "Union[Optional[str], Omit]" = kwargs.get("instructions") + system_instructions = _get_system_instructions_responses(messages) if ( - messages is not None - and len(messages) > 0 + ( + (explicit_instructions is not None and _is_given(explicit_instructions)) + or len(system_instructions) > 0 + ) and should_send_default_pii() and integration.include_prompts ): - normalized_messages = normalize_message_roles(messages) # type: ignore + instructions_text_parts: "list[TextPart]" = [] + if explicit_instructions is not None and _is_given(explicit_instructions): + instructions_text_parts.append( + { + "type": "text", + "content": explicit_instructions, + } + ) + # Deliberate use of function accepting completions API type because + # of shared structure FOR THIS PURPOSE ONLY. + instructions_text_parts += _transform_system_instructions(system_instructions) + + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + instructions_text_parts, + unpack=False, + ) + + if ( + isinstance(messages, str) + and should_send_default_pii() + and integration.include_prompts + ): + normalized_messages = normalize_message_roles([messages]) # type: ignore scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: @@ -261,6 +322,23 @@ def _set_responses_api_input_data( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) + elif should_send_default_pii() and integration.include_prompts: + non_system_messages = [ + message + for message in messages + if not _is_system_instruction_responses(message) + ] + if len(non_system_messages) > 0: + normalized_messages = normalize_message_roles(non_system_messages) # type: ignore + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope + ) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") _commmon_set_input_data(span, kwargs) @@ -270,23 +348,56 @@ def _set_completions_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ) -> None: - messages: "Optional[Union[Iterable[ChatCompletionMessageParam], list[str]]]" = ( - _get_input_messages(kwargs) + messages: "Optional[Union[str, Iterable[ChatCompletionMessageParam]]]" = kwargs.get( + "messages" ) + if messages is None: + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") + _commmon_set_input_data(span, kwargs) + return + + system_instructions = _get_system_instructions_completions(messages) if ( - messages is not None - and len(messages) > 0 # type: ignore + len(system_instructions) > 0 and should_send_default_pii() and integration.include_prompts ): - normalized_messages = normalize_message_roles(messages) # type: ignore + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + _transform_system_instructions(system_instructions), + unpack=False, + ) + + if ( + isinstance(messages, str) + and should_send_default_pii() + and integration.include_prompts + ): + normalized_messages = normalize_message_roles([messages]) # type: ignore scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False ) + elif should_send_default_pii() and integration.include_prompts: + non_system_messages = [ + message + for message in messages + if not _is_system_instruction_completions(message) + ] + if len(non_system_messages) > 0: + normalized_messages = normalize_message_roles(non_system_messages) # type: ignore + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope + ) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") _commmon_set_input_data(span, kwargs) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 814289c887..35a0b04f96 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -9,8 +9,10 @@ NOT_GIVEN = None try: from openai import omit + from openai import Omit except ImportError: omit = None + Omit = None from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding @@ -47,6 +49,7 @@ from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES from sentry_sdk._types import AnnotatedValue from sentry_sdk.serializer import serialize +from sentry_sdk.utils import safe_serialize from unittest import mock # python 3.3 and above @@ -147,7 +150,11 @@ def test_nonstreaming_chat_completion( with start_transaction(name="openai tx"): response = ( client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], ) .choices[0] .message.content @@ -160,9 +167,17 @@ def test_nonstreaming_chat_completion( assert span["op"] == "gen_ai.chat" if send_default_pii and include_prompts: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] @@ -191,7 +206,11 @@ async def test_nonstreaming_chat_completion_async( with start_transaction(name="openai tx"): response = await client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] + model="some-model", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "hello"}, + ], ) response = response.choices[0].message.content @@ -202,9 +221,17 @@ async def test_nonstreaming_chat_completion_async( assert span["op"] == "gen_ai.chat" if send_default_pii and include_prompts: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] @@ -227,8 +254,36 @@ def tiktoken_encoding_if_installed(): "send_default_pii, include_prompts", [(True, True), (True, False), (False, True), (False, False)], ) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + [ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + {"role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) def test_streaming_chat_completion( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_events, send_default_pii, include_prompts, input, request ): sentry_init( integrations=[ @@ -283,7 +338,8 @@ def test_streaming_chat_completion( client.chat.completions._post = mock.Mock(return_value=returned_stream) with start_transaction(name="openai tx"): response_stream = client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] + model="some-model", + messages=input, ) response_string = "".join( map(lambda x: x.choices[0].delta.content, response_stream) @@ -294,19 +350,45 @@ def test_streaming_chat_completion( span = tx["spans"][0] assert span["op"] == "gen_ai.chat" + param_id = request.node.callspec.id if send_default_pii and include_prompts: + if "blocks" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 1 - assert span["data"]["gen_ai.usage.total_tokens"] == 3 + if "blocks" in param_id: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 1 + assert span["data"]["gen_ai.usage.total_tokens"] == 3 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -317,8 +399,36 @@ def test_streaming_chat_completion( "send_default_pii, include_prompts", [(True, True), (True, False), (False, True), (False, False)], ) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + [ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + {"role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) async def test_streaming_chat_completion_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_events, send_default_pii, include_prompts, input, request ): sentry_init( integrations=[ @@ -377,7 +487,8 @@ async def test_streaming_chat_completion_async( client.chat.completions._post = AsyncMock(return_value=returned_stream) with start_transaction(name="openai tx"): response_stream = await client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] + model="some-model", + messages=input, ) response_string = "" @@ -390,19 +501,46 @@ async def test_streaming_chat_completion_async( span = tx["spans"][0] assert span["op"] == "gen_ai.chat" + param_id = request.node.callspec.id if send_default_pii and include_prompts: + if "blocks" in param_id: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + } + ] + else: + assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 1 - assert span["data"]["gen_ai.usage.total_tokens"] == 3 + if "blocks" in param_id: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 7 + assert span["data"]["gen_ai.usage.total_tokens"] == 9 + else: + assert span["data"]["gen_ai.usage.output_tokens"] == 2 + assert span["data"]["gen_ai.usage.input_tokens"] == 1 + assert span["data"]["gen_ai.usage.total_tokens"] == 3 + except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -1034,12 +1172,55 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): "thread.name": mock.ANY, } + assert "gen_ai.system_instructions" not in spans[0]["data"] assert "gen_ai.request.messages" not in spans[0]["data"] assert "gen_ai.response.text" not in spans[0]["data"] +@pytest.mark.parametrize( + "instructions", + ( + omit, + "You are a coding assistant that talks like a pirate.", + ), +) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + "How do I check if a Python object is an instance of a class?", id="string" + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are a helpful assistant.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_ai_client_span_responses_api(sentry_init, capture_events): +def test_ai_client_span_responses_api( + sentry_init, capture_events, instructions, input, request +): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1053,8 +1234,8 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): with start_transaction(name="openai tx"): client.responses.create( model="gpt-4o", - instructions="You are a coding assistant that talks like a pirate.", - input="How do I check if a Python object is an instance of a class?", + instructions=instructions, + input=input, ) (transaction,) = events @@ -1063,10 +1244,10 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): assert len(spans) == 1 assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" - assert spans[0]["data"] == { + + expected_data = { "gen_ai.operation.name": "responses", - "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', - "gen_ai.request.model": "gpt-4o", + "gen_ai.response.streaming": True, "gen_ai.system": "openai", "gen_ai.response.model": "response-model-id", "gen_ai.usage.input_tokens": 20, @@ -1074,11 +1255,100 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", "thread.id": mock.ANY, "thread.name": mock.ANY, } + param_id = request.node.callspec.id + if "string" in param_id and isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks" in param_id and isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["data"] == expected_data + @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_error_in_responses_api(sentry_init, capture_events): @@ -1149,6 +1419,7 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): "gen_ai.request.model": "gpt-4o", "gen_ai.response.model": "response-model-id", "gen_ai.system": "openai", + "gen_ai.system_instructions": '[{"type": "text", "content": "You are a coding assistant that talks like a pirate."}]', "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, @@ -1161,9 +1432,49 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): @pytest.mark.asyncio +@pytest.mark.parametrize( + "instructions", + ( + omit, + "You are a coding assistant that talks like a pirate.", + ), +) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + "How do I check if a Python object is an instance of a class?", id="string" + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are a helpful assistant.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_ai_client_span_streaming_responses_async_api( - sentry_init, capture_events + sentry_init, capture_events, instructions, input, request ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], @@ -1178,8 +1489,8 @@ async def test_ai_client_span_streaming_responses_async_api( with start_transaction(name="openai tx"): await client.responses.create( model="gpt-4o", - instructions="You are a coding assistant that talks like a pirate.", - input="How do I check if a Python object is an instance of a class?", + instructions=instructions, + input=input, stream=True, ) @@ -1189,23 +1500,111 @@ async def test_ai_client_span_streaming_responses_async_api( assert len(spans) == 1 assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" - assert spans[0]["data"] == { + + expected_data = { "gen_ai.operation.name": "responses", - "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', - "gen_ai.request.model": "gpt-4o", - "gen_ai.response.model": "response-model-id", "gen_ai.response.streaming": True, "gen_ai.system": "openai", + "gen_ai.response.model": "response-model-id", "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", "thread.id": mock.ANY, "thread.name": mock.ANY, } + param_id = request.node.callspec.id + if "string" in param_id and isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks" in param_id and isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["data"] == expected_data + @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") @@ -1472,7 +1871,6 @@ def test_openai_message_role_mapping(sentry_init, capture_events): client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) # Test messages with mixed roles including "ai" that should be mapped to "assistant" test_messages = [ - {"role": "system", "content": "You are helpful."}, {"role": "user", "content": "Hello"}, {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" @@ -1492,17 +1890,15 @@ def test_openai_message_role_mapping(sentry_init, capture_events): stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) # Verify that "ai" role was mapped to "assistant" - assert len(stored_messages) == 4 - assert stored_messages[0]["role"] == "system" - assert stored_messages[1]["role"] == "user" + assert len(stored_messages) == 3 assert ( - stored_messages[2]["role"] == "assistant" + stored_messages[1]["role"] == "assistant" ) # "ai" should be mapped to "assistant" - assert stored_messages[3]["role"] == "assistant" # should stay "assistant" + assert stored_messages[2]["role"] == "assistant" # should stay "assistant" # Verify content is preserved - assert stored_messages[2]["content"] == "Hi there!" - assert stored_messages[3]["content"] == "How can I help?" + assert stored_messages[1]["content"] == "Hi there!" + assert stored_messages[2]["content"] == "How can I help?" # Verify no "ai" roles remain roles = [msg["role"] for msg in stored_messages]