Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 8 additions & 14 deletions ccproxy/llms/formatters/anthropic_to_openai/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,10 @@ def convert__anthropic_message_to_openai_responses__response(
text_parts.append(getattr(block, "text", ""))
elif block_type == "thinking":
thinking = getattr(block, "thinking", None) or ""
signature = getattr(block, "signature", None)
sig_attr = (
f' signature="{signature}"'
if isinstance(signature, str) and signature
else ""
)
text_parts.append(f"<thinking{sig_attr}>{thinking}</thinking>")
text_parts.append(f"<think>{thinking}</think>")
elif block_type == "redacted_thinking":
# Skip redacted thinking blocks
continue
elif block_type == "tool_use":
tool_contents.append(
{
Expand Down Expand Up @@ -113,14 +110,11 @@ def convert__anthropic_message_to_openai_chat__response(
parts.append(text)
elif btype == "thinking":
thinking = getattr(block, "thinking", None)
signature = getattr(block, "signature", None)
if isinstance(thinking, str):
sig_attr = (
f' signature="{signature}"'
if isinstance(signature, str) and signature
else ""
)
parts.append(f"<thinking{sig_attr}>{thinking}</thinking>")
parts.append(f"<think>{thinking}</think>")
# Skip redacted_thinking blocks
elif btype == "redacted_thinking":
continue
elif btype == "tool_use":
tool_calls.append(
build_openai_tool_call(
Expand Down
61 changes: 54 additions & 7 deletions ccproxy/llms/formatters/anthropic_to_openai/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,11 @@ def _anthropic_delta_to_text(
block_type = block_meta.get("type")

if block_type == "thinking":
# Return just the thinking text - tags handled by block start/stop events
thinking_text = delta.get("thinking")
if not isinstance(thinking_text, str) or not thinking_text:
return None
signature = block_meta.get("signature")
if isinstance(signature, str) and signature:
return f'<thinking signature="{signature}">{thinking_text}</thinking>'
return f"<thinking>{thinking_text}</thinking>"
if isinstance(thinking_text, str) and thinking_text:
return thinking_text
return None

text_val = delta.get("text")
if isinstance(text_val, str) and text_val:
Expand Down Expand Up @@ -1378,6 +1376,34 @@ async def generator() -> AsyncGenerator[
if not message_started:
continue

if event_type == "content_block_start":
content_block = (
event_payload.get("content_block", {})
if isinstance(event_payload, dict)
else {}
)
if (
isinstance(content_block, dict)
and content_block.get("type") == "thinking"
):
# Emit opening <think> tag
yield openai_models.ChatCompletionChunk(
id="chatcmpl-stream",
object="chat.completion.chunk",
created=0,
model=model_id,
choices=[
openai_models.StreamingChoice(
index=0,
delta=openai_models.DeltaMessage(
role="assistant", content="<think>"
),
finish_reason=None,
)
Comment on lines +1396 to +1402
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For OpenAI-compatible streaming, delta.role is typically only sent once at the start of the assistant message (many clients assume subsequent chunks omit it). For these synthetic <think> wrapper chunks, consider omitting role (i.e., only set content) or only including role if it hasn’t been emitted yet for the message.

Copilot uses AI. Check for mistakes.
],
)
Comment on lines +1390 to +1404
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ChatCompletionChunk construction for emitting wrapper tags is duplicated (opening and closing) with many identical fields. Consider extracting a small helper/factory (e.g., emit_text_chunk(content: str, *, role: str | None = None)) to reduce repetition and the risk of future inconsistencies across these synthetic chunks.

Copilot uses AI. Check for mistakes.
continue

if event_type == "content_block_delta":
block_index = int(event_payload.get("index", 0))
text_delta = _anthropic_delta_to_text(
Expand Down Expand Up @@ -1409,7 +1435,28 @@ async def generator() -> AsyncGenerator[
if not block_info:
continue
_, block_meta = block_info
if block_meta.get("type") != "tool_use":
block_type = block_meta.get("type")

if block_type == "thinking":
# Emit closing </think> tag
yield openai_models.ChatCompletionChunk(
id="chatcmpl-stream",
object="chat.completion.chunk",
created=0,
model=model_id,
choices=[
openai_models.StreamingChoice(
index=0,
delta=openai_models.DeltaMessage(
role="assistant", content="</think>"
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as the opening-tag emission: emitting delta.role=\"assistant\" on this closing-tag chunk may be inconsistent with common streaming expectations. Prefer omitting role (or gate it behind a ‘role already emitted’ flag) for these wrapper-only chunks.

Suggested change
role="assistant", content="</think>"
content="</think>"

Copilot uses AI. Check for mistakes.
),
finish_reason=None,
)
],
)
continue

if block_type != "tool_use":
continue
if block_index in emitted_tool_indices:
continue
Expand Down
8 changes: 7 additions & 1 deletion ccproxy/llms/models/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,13 @@ class RedactedThinkingBlock(ContentBlockBase):


RequestContentBlock = Annotated[
TextBlock | ImageBlock | ToolUseBlock | ToolResultBlock, Field(discriminator="type")
TextBlock
| ImageBlock
| ToolUseBlock
| ToolResultBlock
| ThinkingBlock
| RedactedThinkingBlock,
Field(discriminator="type"),
]

ResponseContentBlock = Annotated[
Expand Down
Loading