Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions src/browser/components/Settings/sections/ProvidersSection.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1468,6 +1468,64 @@ export function ProvidersSection() {
);
})}

{/* Anthropic: prompt cache TTL */}
{provider === "anthropic" && (
<div className="border-border-light border-t pt-3">
<div className="mb-1 flex items-center gap-1">
<label className="text-muted block text-xs">Prompt cache TTL</label>
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<HelpIndicator aria-label="Anthropic prompt cache TTL help">
?
</HelpIndicator>
</TooltipTrigger>
<TooltipContent>
<div className="max-w-[280px]">
<div className="font-semibold">Prompt cache TTL</div>
<div className="mt-1">
Default is <span className="font-semibold">5m</span>. Use{" "}
<span className="font-semibold">1h</span> for longer workflows at a
higher cache-write cost.
</div>
</div>
</TooltipContent>
</Tooltip>
</TooltipProvider>
</div>

<Select
value={config?.anthropic?.cacheTtl ?? "default"}
onValueChange={(next) => {
if (!api) {
return;
}
if (next !== "default" && next !== "5m" && next !== "1h") {
return;
}

const cacheTtl = next === "default" ? undefined : next;
updateOptimistically("anthropic", { cacheTtl });
void api.providers.setProviderConfig({
provider: "anthropic",
keyPath: ["cacheTtl"],
// Empty string clears providers.jsonc key; backend defaults to 5m when unset.
value: next === "default" ? "" : next,
});
}}
>
<SelectTrigger className="w-40">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="default">Default (5m)</SelectItem>
<SelectItem value="5m">5 minutes</SelectItem>
<SelectItem value="1h">1 hour</SelectItem>
</SelectContent>
</Select>
</div>
)}

{/* OpenAI: ChatGPT OAuth + service tier */}
{provider === "openai" && (
<div className="border-border-light space-y-3 border-t pt-3">
Expand Down
14 changes: 14 additions & 0 deletions src/browser/utils/messages/sendOptions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,18 @@ describe("getSendOptionsFromStorage", () => {
const withThinking = getSendOptionsFromStorage(workspaceId);
expect(withThinking.system1ThinkingLevel).toBe("high");
});

test("includes Anthropic prompt cache TTL from persisted provider options", () => {
const workspaceId = "ws-3";

window.localStorage.setItem(
"provider_options_anthropic",
JSON.stringify({
cacheTtl: "1h",
})
);

const options = getSendOptionsFromStorage(workspaceId);
expect(options.providerOptions?.anthropic?.cacheTtl).toBe("1h");
});
});
2 changes: 2 additions & 0 deletions src/common/orpc/schemas/api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ describe("ProviderConfigInfoSchema conformance", () => {
baseUrl: "https://custom.endpoint.com",
models: ["claude-3-opus", "claude-3-sonnet"],
serviceTier: "flex",
cacheTtl: "1h",
codexOauthSet: true,
codexOauthDefaultAuth: "apiKey",
aws: {
Expand All @@ -126,6 +127,7 @@ describe("ProviderConfigInfoSchema conformance", () => {
expect(parsed.baseUrl).toBe(full.baseUrl);
expect(parsed.models).toEqual(full.models);
expect(parsed.serviceTier).toBe(full.serviceTier);
expect(parsed.cacheTtl).toBe(full.cacheTtl);
expect(parsed.codexOauthSet).toBe(full.codexOauthSet);
expect(parsed.codexOauthDefaultAuth).toBe(full.codexOauthDefaultAuth);
expect(parsed.aws).toEqual(full.aws);
Expand Down
2 changes: 2 additions & 0 deletions src/common/orpc/schemas/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ export const ProviderConfigInfoSchema = z.object({
models: z.array(z.string()).optional(),
/** OpenAI-specific fields */
serviceTier: z.enum(["auto", "default", "flex", "priority"]).optional(),
/** Anthropic-specific fields */
cacheTtl: z.enum(["5m", "1h"]).optional(),
/** OpenAI-only: whether Codex OAuth tokens are present in providers.jsonc */
codexOauthSet: z.boolean().optional(),
/**
Expand Down
8 changes: 8 additions & 0 deletions src/common/orpc/schemas/providerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ export const MuxProviderOptionsSchema = z.object({
description:
"Model IDs with 1M context enabled (e.g. ['anthropic:claude-sonnet-4-20250514'])",
}),
// Anthropic prompt cache TTL. "5m" is the default (free refresh on hit).
// "1h" costs 2× base input for cache writes but keeps the cache alive longer —
// useful for agentic workflows where turns take >5 minutes.
// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration
cacheTtl: z.enum(["5m", "1h"]).nullish().meta({
description:
'Anthropic prompt cache TTL: "5m" (default, free refresh) or "1h" (2× write cost, longer cache)',
}),
})
.optional(),
openai: z
Expand Down
55 changes: 55 additions & 0 deletions src/common/utils/ai/cacheStrategy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,23 @@ describe("cacheStrategy", () => {
anthropic: { cacheControl: { type: "ephemeral" } },
}); // Last part has cache control
});

it("should include cache TTL when provided", () => {
const messages: ModelMessage[] = [{ role: "user", content: "Hello" }];
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet", "1h");

expect(result[0]).toEqual({
...messages[0],
providerOptions: {
anthropic: {
cacheControl: {
type: "ephemeral",
ttl: "1h",
},
},
},
});
});
});

describe("createCachedSystemMessage", () => {
Expand Down Expand Up @@ -194,6 +211,24 @@ describe("cacheStrategy", () => {
},
});
});

it("should include cache TTL in cached system message when provided", () => {
const systemContent = "You are a helpful assistant";
const result = createCachedSystemMessage(systemContent, "anthropic:claude-3-5-sonnet", "1h");

expect(result).toEqual({
role: "system",
content: systemContent,
providerOptions: {
anthropic: {
cacheControl: {
type: "ephemeral",
ttl: "1h",
},
},
},
});
});
});

describe("applyCacheControlToTools", () => {
Expand Down Expand Up @@ -269,6 +304,26 @@ describe("cacheStrategy", () => {
expect(Object.keys(result)).toEqual(Object.keys(mockTools));
});

it("should include cache TTL on the cached tool when provided", () => {
const result = applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet", "1h");
const keys = Object.keys(mockTools);
const lastKey = keys[keys.length - 1];
const cachedLastTool = result[lastKey] as unknown as {
providerOptions?: {
anthropic?: {
cacheControl?: {
type?: string;
ttl?: string;
};
};
};
};

expect(cachedLastTool.providerOptions?.anthropic?.cacheControl).toEqual({
type: "ephemeral",
ttl: "1h",
});
});
it("should not modify original tools object", () => {
const originalTools = { ...mockTools };
applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet");
Expand Down
67 changes: 44 additions & 23 deletions src/common/utils/ai/cacheStrategy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ import assert from "@/common/utils/assert";
import { cloneToolPreservingDescriptors } from "@/common/utils/tools/cloneToolPreservingDescriptors";
import { normalizeGatewayModel } from "./models";

/**
* Anthropic prompt cache TTL value.
* "5m" = 5-minute cache (default, free refresh on hit).
* "1h" = 1-hour cache (2× base input write cost, longer lived).
* See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration
*/
export type AnthropicCacheTtl = "5m" | "1h";

/**
* Check if a model supports Anthropic cache control.
* Matches:
Expand All @@ -24,12 +32,19 @@ export function supportsAnthropicCache(modelString: string): boolean {
return false;
}

/** Cache control providerOptions for Anthropic */
const ANTHROPIC_CACHE_CONTROL = {
anthropic: {
cacheControl: { type: "ephemeral" as const },
},
};
/** Build cache control providerOptions for Anthropic with optional TTL. */
function anthropicCacheControl(cacheTtl?: AnthropicCacheTtl | null) {
return {
anthropic: {
cacheControl: cacheTtl
? { type: "ephemeral" as const, ttl: cacheTtl }
: { type: "ephemeral" as const },
},
};
}

/** Default cache control (no explicit TTL — Anthropic defaults to 5m). */
const ANTHROPIC_CACHE_CONTROL = anthropicCacheControl();

type ProviderNativeTool = Extract<Tool, { type: "provider" }>;

Expand All @@ -45,15 +60,19 @@ function isProviderNativeTool(tool: Tool): tool is ProviderNativeTool {
* (which the SDK handles correctly). For user/assistant messages with array
* content, we add providerOptions to the last content part.
*/
function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
function addCacheControlToLastContentPart(
msg: ModelMessage,
cacheTtl?: AnthropicCacheTtl | null
): ModelMessage {
const cacheOpts = cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL;
const content = msg.content;

// String content (typically system messages): use message-level providerOptions
// The SDK correctly translates this for system messages
if (typeof content === "string") {
return {
...msg,
providerOptions: ANTHROPIC_CACHE_CONTROL,
providerOptions: cacheOpts,
};
}

Expand All @@ -62,7 +81,7 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
if (Array.isArray(content) && content.length > 0) {
const lastIndex = content.length - 1;
const newContent = content.map((part, i) =>
i === lastIndex ? { ...part, providerOptions: ANTHROPIC_CACHE_CONTROL } : part
i === lastIndex ? { ...part, providerOptions: cacheOpts } : part
);
// Type assertion needed: ModelMessage types are strict unions but providerOptions
// on content parts is valid per SDK docs
Expand All @@ -81,7 +100,11 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
* NOTE: The SDK requires providerOptions on content parts, not on the message.
* We add cache_control to the last content part of the last message.
*/
export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] {
export function applyCacheControl(
messages: ModelMessage[],
modelString: string,
cacheTtl?: AnthropicCacheTtl | null
): ModelMessage[] {
// Only apply cache control for Anthropic models
if (!supportsAnthropicCache(modelString)) {
return messages;
Expand All @@ -97,7 +120,7 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string)

return messages.map((msg, index) => {
if (index === cacheIndex) {
return addCacheControlToLastContentPart(msg);
return addCacheControlToLastContentPart(msg, cacheTtl);
}
return msg;
});
Expand All @@ -109,7 +132,8 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string)
*/
export function createCachedSystemMessage(
systemContent: string,
modelString: string
modelString: string,
cacheTtl?: AnthropicCacheTtl | null
): ModelMessage | null {
if (!systemContent || !supportsAnthropicCache(modelString)) {
return null;
Expand All @@ -118,13 +142,7 @@ export function createCachedSystemMessage(
return {
role: "system" as const,
content: systemContent,
providerOptions: {
anthropic: {
cacheControl: {
type: "ephemeral" as const,
},
},
},
providerOptions: cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL,
};
}

Expand All @@ -145,7 +163,8 @@ export function createCachedSystemMessage(
*/
export function applyCacheControlToTools<T extends Record<string, Tool>>(
tools: T,
modelString: string
modelString: string,
cacheTtl?: AnthropicCacheTtl | null
): T {
// Only apply cache control for Anthropic models
if (!supportsAnthropicCache(modelString) || !tools || Object.keys(tools).length === 0) {
Expand All @@ -156,6 +175,8 @@ export function applyCacheControlToTools<T extends Record<string, Tool>>(
const toolKeys = Object.keys(tools);
const lastToolKey = toolKeys[toolKeys.length - 1];

const cacheOpts = cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL;

// Clone tools and add cache control ONLY to the last tool
// Anthropic caches everything up to the cache breakpoint, so marking
// only the last tool will cache all tools
Expand All @@ -168,13 +189,13 @@ export function applyCacheControlToTools<T extends Record<string, Tool>>(
const cachedProviderTool = cloneToolPreservingDescriptors(
existingTool
) as ProviderNativeTool;
cachedProviderTool.providerOptions = ANTHROPIC_CACHE_CONTROL;
cachedProviderTool.providerOptions = cacheOpts;
cachedTools[key as keyof T] = cachedProviderTool as unknown as T[keyof T];
} else if (existingTool.execute == null) {
// Some MCP/dynamic tools are valid without execute handlers (provider-/client-executed).
// Keep their runtime shape and attach cache control without forcing recreation.
const cachedDynamicTool = cloneToolPreservingDescriptors(existingTool);
cachedDynamicTool.providerOptions = ANTHROPIC_CACHE_CONTROL;
cachedDynamicTool.providerOptions = cacheOpts;
cachedTools[key as keyof T] = cachedDynamicTool as unknown as T[keyof T];
} else {
assert(
Expand All @@ -187,7 +208,7 @@ export function applyCacheControlToTools<T extends Record<string, Tool>>(
description: existingTool.description,
inputSchema: existingTool.inputSchema,
execute: existingTool.execute,
providerOptions: ANTHROPIC_CACHE_CONTROL,
providerOptions: cacheOpts,
});
cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T];
}
Expand Down
Loading
Loading