豆豆友情提示:这是一个非官方 GitHub 代理镜像,主要用于网络测试或访问加速。请勿在此进行登录、注册或处理任何敏感信息。进行这些操作请务必访问官方网站 github.com。 Raw 内容也通过此代理提供。
Skip to content

Commit a7b4851

Browse files
authored
fix: #2797 accept raw image_url content parts on chat completions input (#2799)
1 parent 9ed6dad commit a7b4851

File tree

4 files changed

+168
-1
lines changed

4 files changed

+168
-1
lines changed

src/agents/models/chatcmpl_converter.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,41 @@ def extract_text_content(
329329
raise UserError(f"Only text content is supported here, got: {c}")
330330
return out
331331

332+
@classmethod
333+
def _normalize_input_content_part_alias(
334+
cls,
335+
content_part: ResponseInputContentWithAudioParam,
336+
) -> ResponseInputContentWithAudioParam:
337+
"""Accept raw Chat Completions parts by mapping them to SDK canonical shapes."""
338+
if not isinstance(content_part, dict):
339+
return content_part
340+
341+
content_type = content_part.get("type")
342+
if content_type == "text":
343+
text = content_part.get("text")
344+
if not isinstance(text, str):
345+
raise UserError(f"Only text content is supported here, got: {content_part}")
346+
# Cast the normalized dict because we are constructing a TypedDict alias by hand.
347+
return cast(ResponseInputTextParam, {"type": "input_text", "text": text})
348+
349+
if content_type != "image_url":
350+
return content_part
351+
352+
image_payload = content_part.get("image_url")
353+
if not isinstance(image_payload, dict):
354+
raise UserError(f"Only image URLs are supported for image_url {content_part}")
355+
356+
image_url = image_payload.get("url")
357+
if not isinstance(image_url, str) or not image_url:
358+
raise UserError(f"Only image URLs are supported for image_url {content_part}")
359+
360+
normalized: dict[str, Any] = {"type": "input_image", "image_url": image_url}
361+
detail = image_payload.get("detail")
362+
if detail is not None:
363+
normalized["detail"] = detail
364+
# Cast the normalized dict because we are constructing a TypedDict alias by hand.
365+
return cast(ResponseInputImageParam, normalized)
366+
332367
@classmethod
333368
def extract_all_content(
334369
cls, content: str | Iterable[ResponseInputContentWithAudioParam]
@@ -338,6 +373,7 @@ def extract_all_content(
338373
out: list[ChatCompletionContentPartParam] = []
339374

340375
for c in content:
376+
c = cls._normalize_input_content_part_alias(c)
341377
if isinstance(c, dict) and c.get("type") == "input_text":
342378
casted_text_param = cast(ResponseInputTextParam, c)
343379
out.append(

src/agents/models/openai_chatcompletions.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,11 @@ def _validate_official_openai_input_content_types(
8787
if not isinstance(part, dict):
8888
continue
8989

90-
content_type = part.get("type")
90+
normalized_part = Converter._normalize_input_content_part_alias(part)
91+
if not isinstance(normalized_part, dict):
92+
continue
93+
94+
content_type = normalized_part.get("type")
9195
if content_type in self._OFFICIAL_OPENAI_SUPPORTED_INPUT_CONTENT_TYPES:
9296
continue
9397

tests/test_openai_chatcompletions.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,90 @@ def __init__(self, completions: DummyCompletions) -> None:
384384
assert kwargs["stream_options"] is omit
385385

386386

387+
@pytest.mark.allow_call_model_methods
388+
@pytest.mark.asyncio
389+
async def test_get_response_accepts_raw_chat_completions_image_content() -> None:
390+
"""
391+
Raw Chat Completions content parts should be accepted on the SDK input path
392+
when using the Chat Completions backend.
393+
"""
394+
395+
class DummyCompletions:
396+
def __init__(self) -> None:
397+
self.kwargs: dict[str, Any] = {}
398+
399+
async def create(self, **kwargs: Any) -> Any:
400+
self.kwargs = kwargs
401+
return chat
402+
403+
class DummyClient:
404+
def __init__(self, completions: DummyCompletions) -> None:
405+
self.chat = type("_Chat", (), {"completions": completions})()
406+
self.base_url = httpx.URL("https://api.openai.com/v1/")
407+
408+
msg = ChatCompletionMessage(role="assistant", content="ok")
409+
choice = Choice(index=0, finish_reason="stop", message=msg)
410+
chat = ChatCompletion(
411+
id="resp-id",
412+
created=0,
413+
model="fake",
414+
object="chat.completion",
415+
choices=[choice],
416+
usage=None,
417+
)
418+
completions = DummyCompletions()
419+
dummy_client = DummyClient(completions)
420+
model = OpenAIChatCompletionsModel(model="gpt-4", openai_client=dummy_client) # type: ignore[arg-type]
421+
422+
await model.get_response(
423+
system_instructions=None,
424+
input=[
425+
# Cast the fixture because the raw chat-style alias is intentionally outside the
426+
# canonical TypedDict shape that mypy expects for ordinary SDK inputs.
427+
cast(
428+
Any,
429+
{
430+
"role": "user",
431+
"content": [
432+
{"type": "text", "text": "What is in this image?"},
433+
{
434+
"type": "image_url",
435+
"image_url": {
436+
"url": "data:image/png;base64,AAAA",
437+
"detail": "high",
438+
},
439+
},
440+
],
441+
},
442+
)
443+
],
444+
model_settings=ModelSettings(),
445+
tools=[],
446+
output_schema=None,
447+
handoffs=[],
448+
tracing=ModelTracing.DISABLED,
449+
previous_response_id=None,
450+
conversation_id=None,
451+
prompt=None,
452+
)
453+
454+
assert completions.kwargs["messages"] == [
455+
{
456+
"role": "user",
457+
"content": [
458+
{"type": "text", "text": "What is in this image?"},
459+
{
460+
"type": "image_url",
461+
"image_url": {
462+
"url": "data:image/png;base64,AAAA",
463+
"detail": "high",
464+
},
465+
},
466+
],
467+
}
468+
]
469+
470+
387471
@pytest.mark.asyncio
388472
async def test_fetch_response_stream(monkeypatch) -> None:
389473
"""

tests/test_openai_chatcompletions_converter.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,49 @@ def test_items_to_messages_with_easy_input_message():
140140
assert out["content"] == "How are you?"
141141

142142

143+
def test_items_to_messages_accepts_raw_chat_completions_user_content_parts():
144+
"""
145+
Raw Chat Completions content parts should be accepted as aliases for the SDK's
146+
canonical input content shapes.
147+
"""
148+
items: list[TResponseInputItem] = [
149+
# Cast the fixture because mypy cannot infer this raw chat-style dict as a specific
150+
# member of the TResponseInputItem TypedDict union on its own.
151+
cast(
152+
TResponseInputItem,
153+
{
154+
"role": "user",
155+
"content": [
156+
{"type": "text", "text": "What is in this image?"},
157+
{
158+
"type": "image_url",
159+
"image_url": {
160+
"url": "https://example.com/image.png",
161+
"detail": "high",
162+
},
163+
},
164+
],
165+
},
166+
)
167+
]
168+
169+
messages = Converter.items_to_messages(items)
170+
171+
assert len(messages) == 1
172+
message = messages[0]
173+
assert message["role"] == "user"
174+
assert message["content"] == [
175+
{"type": "text", "text": "What is in this image?"},
176+
{
177+
"type": "image_url",
178+
"image_url": {
179+
"url": "https://example.com/image.png",
180+
"detail": "high",
181+
},
182+
},
183+
]
184+
185+
143186
def test_items_to_messages_with_output_message_and_function_call():
144187
"""
145188
Given a sequence of one ResponseOutputMessageParam followed by a

0 commit comments

Comments
 (0)