diff --git a/src/browser/components/Settings/sections/ProvidersSection.tsx b/src/browser/components/Settings/sections/ProvidersSection.tsx
index f6a3ac7eec..b457c17f6f 100644
--- a/src/browser/components/Settings/sections/ProvidersSection.tsx
+++ b/src/browser/components/Settings/sections/ProvidersSection.tsx
@@ -1468,6 +1468,64 @@ export function ProvidersSection() {
);
})}
+ {/* Anthropic: prompt cache TTL */}
+ {provider === "anthropic" && (
+
+
+
+
+
+
+
+ ?
+
+
+
+
+
Prompt cache TTL
+
+ Default is 5m. Use{" "}
+ 1h for longer workflows at a
+ higher cache-write cost.
+
+
+
+
+
+
+
+
+
+ )}
+
{/* OpenAI: ChatGPT OAuth + service tier */}
{provider === "openai" && (
diff --git a/src/browser/utils/messages/sendOptions.test.ts b/src/browser/utils/messages/sendOptions.test.ts
index b0712d1ed6..624b0ba947 100644
--- a/src/browser/utils/messages/sendOptions.test.ts
+++ b/src/browser/utils/messages/sendOptions.test.ts
@@ -61,4 +61,18 @@ describe("getSendOptionsFromStorage", () => {
const withThinking = getSendOptionsFromStorage(workspaceId);
expect(withThinking.system1ThinkingLevel).toBe("high");
});
+
+ test("includes Anthropic prompt cache TTL from persisted provider options", () => {
+ const workspaceId = "ws-3";
+
+ window.localStorage.setItem(
+ "provider_options_anthropic",
+ JSON.stringify({
+ cacheTtl: "1h",
+ })
+ );
+
+ const options = getSendOptionsFromStorage(workspaceId);
+ expect(options.providerOptions?.anthropic?.cacheTtl).toBe("1h");
+ });
});
diff --git a/src/common/orpc/schemas/api.test.ts b/src/common/orpc/schemas/api.test.ts
index 00064d18ca..7cec9dd1f5 100644
--- a/src/common/orpc/schemas/api.test.ts
+++ b/src/common/orpc/schemas/api.test.ts
@@ -103,6 +103,7 @@ describe("ProviderConfigInfoSchema conformance", () => {
baseUrl: "https://custom.endpoint.com",
models: ["claude-3-opus", "claude-3-sonnet"],
serviceTier: "flex",
+ cacheTtl: "1h",
codexOauthSet: true,
codexOauthDefaultAuth: "apiKey",
aws: {
@@ -126,6 +127,7 @@ describe("ProviderConfigInfoSchema conformance", () => {
expect(parsed.baseUrl).toBe(full.baseUrl);
expect(parsed.models).toEqual(full.models);
expect(parsed.serviceTier).toBe(full.serviceTier);
+ expect(parsed.cacheTtl).toBe(full.cacheTtl);
expect(parsed.codexOauthSet).toBe(full.codexOauthSet);
expect(parsed.codexOauthDefaultAuth).toBe(full.codexOauthDefaultAuth);
expect(parsed.aws).toEqual(full.aws);
diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts
index 5132928871..aa597fc1ad 100644
--- a/src/common/orpc/schemas/api.ts
+++ b/src/common/orpc/schemas/api.ts
@@ -131,6 +131,8 @@ export const ProviderConfigInfoSchema = z.object({
models: z.array(z.string()).optional(),
/** OpenAI-specific fields */
serviceTier: z.enum(["auto", "default", "flex", "priority"]).optional(),
+ /** Anthropic-specific fields */
+ cacheTtl: z.enum(["5m", "1h"]).optional(),
/** OpenAI-only: whether Codex OAuth tokens are present in providers.jsonc */
codexOauthSet: z.boolean().optional(),
/**
diff --git a/src/common/orpc/schemas/providerOptions.ts b/src/common/orpc/schemas/providerOptions.ts
index 8b5a54386a..a4d1678b1a 100644
--- a/src/common/orpc/schemas/providerOptions.ts
+++ b/src/common/orpc/schemas/providerOptions.ts
@@ -12,6 +12,14 @@ export const MuxProviderOptionsSchema = z.object({
description:
"Model IDs with 1M context enabled (e.g. ['anthropic:claude-sonnet-4-20250514'])",
}),
+ // Anthropic prompt cache TTL. "5m" is the default (free refresh on hit).
+ // "1h" costs 2× base input for cache writes but keeps the cache alive longer —
+ // useful for agentic workflows where turns take >5 minutes.
+ // See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration
+ cacheTtl: z.enum(["5m", "1h"]).nullish().meta({
+ description:
+ 'Anthropic prompt cache TTL: "5m" (default, free refresh) or "1h" (2× write cost, longer cache)',
+ }),
})
.optional(),
openai: z
diff --git a/src/common/utils/ai/cacheStrategy.test.ts b/src/common/utils/ai/cacheStrategy.test.ts
index 10eeeb5631..1bb76b24a4 100644
--- a/src/common/utils/ai/cacheStrategy.test.ts
+++ b/src/common/utils/ai/cacheStrategy.test.ts
@@ -146,6 +146,23 @@ describe("cacheStrategy", () => {
anthropic: { cacheControl: { type: "ephemeral" } },
}); // Last part has cache control
});
+
+ it("should include cache TTL when provided", () => {
+ const messages: ModelMessage[] = [{ role: "user", content: "Hello" }];
+ const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet", "1h");
+
+ expect(result[0]).toEqual({
+ ...messages[0],
+ providerOptions: {
+ anthropic: {
+ cacheControl: {
+ type: "ephemeral",
+ ttl: "1h",
+ },
+ },
+ },
+ });
+ });
});
describe("createCachedSystemMessage", () => {
@@ -194,6 +211,24 @@ describe("cacheStrategy", () => {
},
});
});
+
+ it("should include cache TTL in cached system message when provided", () => {
+ const systemContent = "You are a helpful assistant";
+ const result = createCachedSystemMessage(systemContent, "anthropic:claude-3-5-sonnet", "1h");
+
+ expect(result).toEqual({
+ role: "system",
+ content: systemContent,
+ providerOptions: {
+ anthropic: {
+ cacheControl: {
+ type: "ephemeral",
+ ttl: "1h",
+ },
+ },
+ },
+ });
+ });
});
describe("applyCacheControlToTools", () => {
@@ -269,6 +304,26 @@ describe("cacheStrategy", () => {
expect(Object.keys(result)).toEqual(Object.keys(mockTools));
});
+ it("should include cache TTL on the cached tool when provided", () => {
+ const result = applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet", "1h");
+ const keys = Object.keys(mockTools);
+ const lastKey = keys[keys.length - 1];
+ const cachedLastTool = result[lastKey] as unknown as {
+ providerOptions?: {
+ anthropic?: {
+ cacheControl?: {
+ type?: string;
+ ttl?: string;
+ };
+ };
+ };
+ };
+
+ expect(cachedLastTool.providerOptions?.anthropic?.cacheControl).toEqual({
+ type: "ephemeral",
+ ttl: "1h",
+ });
+ });
it("should not modify original tools object", () => {
const originalTools = { ...mockTools };
applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet");
diff --git a/src/common/utils/ai/cacheStrategy.ts b/src/common/utils/ai/cacheStrategy.ts
index e730f04207..b47b53b599 100644
--- a/src/common/utils/ai/cacheStrategy.ts
+++ b/src/common/utils/ai/cacheStrategy.ts
@@ -3,6 +3,14 @@ import assert from "@/common/utils/assert";
import { cloneToolPreservingDescriptors } from "@/common/utils/tools/cloneToolPreservingDescriptors";
import { normalizeGatewayModel } from "./models";
+/**
+ * Anthropic prompt cache TTL value.
+ * "5m" = 5-minute cache (default, free refresh on hit).
+ * "1h" = 1-hour cache (2× base input write cost, longer lived).
+ * See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration
+ */
+export type AnthropicCacheTtl = "5m" | "1h";
+
/**
* Check if a model supports Anthropic cache control.
* Matches:
@@ -24,12 +32,19 @@ export function supportsAnthropicCache(modelString: string): boolean {
return false;
}
-/** Cache control providerOptions for Anthropic */
-const ANTHROPIC_CACHE_CONTROL = {
- anthropic: {
- cacheControl: { type: "ephemeral" as const },
- },
-};
+/** Build cache control providerOptions for Anthropic with optional TTL. */
+function anthropicCacheControl(cacheTtl?: AnthropicCacheTtl | null) {
+ return {
+ anthropic: {
+ cacheControl: cacheTtl
+ ? { type: "ephemeral" as const, ttl: cacheTtl }
+ : { type: "ephemeral" as const },
+ },
+ };
+}
+
+/** Default cache control (no explicit TTL — Anthropic defaults to 5m). */
+const ANTHROPIC_CACHE_CONTROL = anthropicCacheControl();
type ProviderNativeTool = Extract;
@@ -45,7 +60,11 @@ function isProviderNativeTool(tool: Tool): tool is ProviderNativeTool {
* (which the SDK handles correctly). For user/assistant messages with array
* content, we add providerOptions to the last content part.
*/
-function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
+function addCacheControlToLastContentPart(
+ msg: ModelMessage,
+ cacheTtl?: AnthropicCacheTtl | null
+): ModelMessage {
+ const cacheOpts = cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL;
const content = msg.content;
// String content (typically system messages): use message-level providerOptions
@@ -53,7 +72,7 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
if (typeof content === "string") {
return {
...msg,
- providerOptions: ANTHROPIC_CACHE_CONTROL,
+ providerOptions: cacheOpts,
};
}
@@ -62,7 +81,7 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
if (Array.isArray(content) && content.length > 0) {
const lastIndex = content.length - 1;
const newContent = content.map((part, i) =>
- i === lastIndex ? { ...part, providerOptions: ANTHROPIC_CACHE_CONTROL } : part
+ i === lastIndex ? { ...part, providerOptions: cacheOpts } : part
);
// Type assertion needed: ModelMessage types are strict unions but providerOptions
// on content parts is valid per SDK docs
@@ -81,7 +100,11 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
* NOTE: The SDK requires providerOptions on content parts, not on the message.
* We add cache_control to the last content part of the last message.
*/
-export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] {
+export function applyCacheControl(
+ messages: ModelMessage[],
+ modelString: string,
+ cacheTtl?: AnthropicCacheTtl | null
+): ModelMessage[] {
// Only apply cache control for Anthropic models
if (!supportsAnthropicCache(modelString)) {
return messages;
@@ -97,7 +120,7 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string)
return messages.map((msg, index) => {
if (index === cacheIndex) {
- return addCacheControlToLastContentPart(msg);
+ return addCacheControlToLastContentPart(msg, cacheTtl);
}
return msg;
});
@@ -109,7 +132,8 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string)
*/
export function createCachedSystemMessage(
systemContent: string,
- modelString: string
+ modelString: string,
+ cacheTtl?: AnthropicCacheTtl | null
): ModelMessage | null {
if (!systemContent || !supportsAnthropicCache(modelString)) {
return null;
@@ -118,13 +142,7 @@ export function createCachedSystemMessage(
return {
role: "system" as const,
content: systemContent,
- providerOptions: {
- anthropic: {
- cacheControl: {
- type: "ephemeral" as const,
- },
- },
- },
+ providerOptions: cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL,
};
}
@@ -145,7 +163,8 @@ export function createCachedSystemMessage(
*/
export function applyCacheControlToTools>(
tools: T,
- modelString: string
+ modelString: string,
+ cacheTtl?: AnthropicCacheTtl | null
): T {
// Only apply cache control for Anthropic models
if (!supportsAnthropicCache(modelString) || !tools || Object.keys(tools).length === 0) {
@@ -156,6 +175,8 @@ export function applyCacheControlToTools>(
const toolKeys = Object.keys(tools);
const lastToolKey = toolKeys[toolKeys.length - 1];
+ const cacheOpts = cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL;
+
// Clone tools and add cache control ONLY to the last tool
// Anthropic caches everything up to the cache breakpoint, so marking
// only the last tool will cache all tools
@@ -168,13 +189,13 @@ export function applyCacheControlToTools>(
const cachedProviderTool = cloneToolPreservingDescriptors(
existingTool
) as ProviderNativeTool;
- cachedProviderTool.providerOptions = ANTHROPIC_CACHE_CONTROL;
+ cachedProviderTool.providerOptions = cacheOpts;
cachedTools[key as keyof T] = cachedProviderTool as unknown as T[keyof T];
} else if (existingTool.execute == null) {
// Some MCP/dynamic tools are valid without execute handlers (provider-/client-executed).
// Keep their runtime shape and attach cache control without forcing recreation.
const cachedDynamicTool = cloneToolPreservingDescriptors(existingTool);
- cachedDynamicTool.providerOptions = ANTHROPIC_CACHE_CONTROL;
+ cachedDynamicTool.providerOptions = cacheOpts;
cachedTools[key as keyof T] = cachedDynamicTool as unknown as T[keyof T];
} else {
assert(
@@ -187,7 +208,7 @@ export function applyCacheControlToTools>(
description: existingTool.description,
inputSchema: existingTool.inputSchema,
execute: existingTool.execute,
- providerOptions: ANTHROPIC_CACHE_CONTROL,
+ providerOptions: cacheOpts,
});
cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T];
}
diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
index e692e36c0f..a90655827f 100644
--- a/src/common/utils/ai/providerOptions.test.ts
+++ b/src/common/utils/ai/providerOptions.test.ts
@@ -154,6 +154,58 @@ describe("buildProviderOptions - Anthropic", () => {
});
});
});
+
+ describe("Anthropic cache TTL overrides", () => {
+ test("should include cacheControl ttl when configured", () => {
+ const result = buildProviderOptions(
+ "anthropic:claude-sonnet-4-5",
+ "off",
+ undefined,
+ undefined,
+ {
+ anthropic: { cacheTtl: "1h" },
+ }
+ );
+
+ expect(result).toEqual({
+ anthropic: {
+ disableParallelToolUse: false,
+ sendReasoning: true,
+ cacheControl: {
+ type: "ephemeral",
+ ttl: "1h",
+ },
+ },
+ });
+ });
+
+ test("should include cacheControl ttl for Opus 4.6 effort models", () => {
+ const result = buildProviderOptions(
+ "anthropic:claude-opus-4-6",
+ "medium",
+ undefined,
+ undefined,
+ {
+ anthropic: { cacheTtl: "5m" },
+ }
+ );
+
+ expect(result).toEqual({
+ anthropic: {
+ disableParallelToolUse: false,
+ sendReasoning: true,
+ thinking: {
+ type: "adaptive",
+ },
+ cacheControl: {
+ type: "ephemeral",
+ ttl: "5m",
+ },
+ effort: "medium",
+ },
+ });
+ });
+ });
});
describe("buildProviderOptions - OpenAI", () => {
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
index 805e2628e1..82165266a7 100644
--- a/src/common/utils/ai/providerOptions.ts
+++ b/src/common/utils/ai/providerOptions.ts
@@ -93,6 +93,9 @@ export function buildProviderOptions(
// Build Anthropic-specific options
if (provider === "anthropic") {
+ const cacheTtl = muxProviderOptions?.anthropic?.cacheTtl;
+ const cacheControl = cacheTtl ? { type: "ephemeral" as const, ttl: cacheTtl } : undefined;
+
// Opus 4.5+ use the effort parameter for reasoning control.
// Opus 4.6 uses adaptive thinking (model decides when/how much to think).
// Opus 4.5 uses enabled thinking with a budgetTokens ceiling.
@@ -124,6 +127,7 @@ export function buildProviderOptions(
disableParallelToolUse: false,
sendReasoning: true,
...(thinking && { thinking }),
+ ...(cacheControl && { cacheControl }),
effort: effortLevel,
},
};
@@ -140,6 +144,7 @@ export function buildProviderOptions(
anthropic: {
disableParallelToolUse: false, // Always enable concurrent tool execution
sendReasoning: true, // Include reasoning traces in requests sent to the model
+ ...(cacheControl && { cacheControl }),
// Conditionally add thinking configuration (non-Opus 4.5 models)
...(budgetTokens > 0 && {
thinking: {
diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts
index 026b899313..ff16b81631 100644
--- a/src/node/services/aiService.ts
+++ b/src/node/services/aiService.ts
@@ -618,6 +618,7 @@ export class AIService extends EventEmitter {
providerForMessages: canonicalProviderName,
effectiveThinkingLevel,
modelString,
+ anthropicCacheTtl: effectiveMuxProviderOptions.anthropic?.cacheTtl,
workspaceId,
});
diff --git a/src/node/services/messagePipeline.ts b/src/node/services/messagePipeline.ts
index 4d7999ef8c..a78ff37014 100644
--- a/src/node/services/messagePipeline.ts
+++ b/src/node/services/messagePipeline.ts
@@ -27,7 +27,7 @@ import {
injectFileChangeNotifications,
injectPostCompactionAttachments,
} from "@/browser/utils/messages/modelMessageTransform";
-import { applyCacheControl } from "@/common/utils/ai/cacheStrategy";
+import { applyCacheControl, type AnthropicCacheTtl } from "@/common/utils/ai/cacheStrategy";
import { log } from "./log";
/** Options for the full message preparation pipeline. */
@@ -58,6 +58,8 @@ export interface PrepareMessagesOptions {
effectiveThinkingLevel: ThinkingLevel;
/** Full model string (used for cache control). */
modelString: string;
+ /** Optional Anthropic cache TTL override for prompt caching. */
+ anthropicCacheTtl?: AnthropicCacheTtl | null;
/** Workspace ID (used only for debug logging). */
workspaceId: string;
}
@@ -98,6 +100,7 @@ export async function prepareMessagesForProvider(
providerForMessages,
effectiveThinkingLevel,
modelString,
+ anthropicCacheTtl,
workspaceId,
} = opts;
@@ -181,7 +184,7 @@ export async function prepareMessagesForProvider(
});
// Apply cache control for Anthropic models AFTER transformation
- const finalMessages = applyCacheControl(transformedMessages, modelString);
+ const finalMessages = applyCacheControl(transformedMessages, modelString, anthropicCacheTtl);
log.debug_obj(`${workspaceId}/3_final_messages.json`, finalMessages);
diff --git a/src/node/services/providerModelFactory.ts b/src/node/services/providerModelFactory.ts
index 11e07f8ad4..b908ee68b9 100644
--- a/src/node/services/providerModelFactory.ts
+++ b/src/node/services/providerModelFactory.ts
@@ -24,6 +24,7 @@ import type { PolicyService } from "@/node/services/policyService";
import type { ProviderService } from "@/node/services/providerService";
import type { CodexOauthService } from "@/node/services/codexOauthService";
import { normalizeGatewayModel } from "@/common/utils/ai/models";
+import type { AnthropicCacheTtl } from "@/common/utils/ai/cacheStrategy";
import { MUX_APP_ATTRIBUTION_TITLE, MUX_APP_ATTRIBUTION_URL } from "@/constants/appAttribution";
import { resolveProviderCredentials } from "@/node/utils/providerRequirements";
import {
@@ -101,7 +102,16 @@ if (typeof globalFetchWithExtras.certificate === "function") {
* 1. Last tool (caches all tool definitions)
* 2. Last message's last content part (caches entire conversation)
*/
-function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fetch {
+function wrapFetchWithAnthropicCacheControl(
+ baseFetch: typeof fetch,
+ cacheTtl?: AnthropicCacheTtl | null
+): typeof fetch {
+ // Build the cache_control value once — include ttl only when explicitly set.
+ const cacheControlValue: Record = { type: "ephemeral" };
+ if (cacheTtl) {
+ cacheControlValue.ttl = cacheTtl;
+ }
+
const cachingFetch = async (
input: Parameters[0],
init?: Parameters[1]
@@ -117,7 +127,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet
// Inject cache_control on the last tool if tools array exists
if (Array.isArray(json.tools) && json.tools.length > 0) {
const lastTool = json.tools[json.tools.length - 1] as Record;
- lastTool.cache_control ??= { type: "ephemeral" };
+ lastTool.cache_control ??= cacheControlValue;
}
// Inject cache_control on last message's last content part
@@ -139,7 +149,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet
if (Array.isArray(json.prompt)) {
const providerOpts = (lastMsg.providerOptions ?? {}) as Record;
const anthropicOpts = (providerOpts.anthropic ?? {}) as Record;
- anthropicOpts.cacheControl ??= { type: "ephemeral" };
+ anthropicOpts.cacheControl ??= cacheControlValue;
providerOpts.anthropic = anthropicOpts;
lastMsg.providerOptions = providerOpts;
}
@@ -148,7 +158,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet
const content = lastMsg.content;
if (Array.isArray(content) && content.length > 0) {
const lastPart = content[content.length - 1] as Record;
- lastPart.cache_control ??= { type: "ephemeral" };
+ lastPart.cache_control ??= cacheControlValue;
}
}
@@ -309,6 +319,13 @@ export function parseModelString(modelString: string): [string, string] {
return [providerName, modelId];
}
+function parseAnthropicCacheTtl(value: unknown): AnthropicCacheTtl | undefined {
+ if (value === "5m" || value === "1h") {
+ return value;
+ }
+ return undefined;
+}
+
// ---------------------------------------------------------------------------
// Model cost tracking
// ---------------------------------------------------------------------------
@@ -435,8 +452,25 @@ export class ProviderModelFactory {
}
// Load providers configuration - the ONLY source of truth
- const providersConfig = this.config.loadProvidersConfig();
- let providerConfig = providersConfig?.[providerName] ?? {};
+ const providersConfig = this.config.loadProvidersConfig() ?? {};
+
+ // Backend config is authoritative for Anthropic prompt cache TTL on any
+ // Anthropic-routed model (direct Anthropic, mux-gateway:anthropic/*,
+ // openrouter:anthropic/*). We still allow request-level values when config
+ // is unset for backward compatibility with older clients.
+ const configAnthropicCacheTtl = parseAnthropicCacheTtl(providersConfig.anthropic?.cacheTtl);
+ const isAnthropicRoutedModel =
+ providerName === "anthropic" || modelId.startsWith("anthropic/");
+ if (isAnthropicRoutedModel && configAnthropicCacheTtl && muxProviderOptions) {
+ muxProviderOptions.anthropic = {
+ ...(muxProviderOptions.anthropic ?? {}),
+ cacheTtl: configAnthropicCacheTtl,
+ };
+ }
+ const effectiveAnthropicCacheTtl =
+ muxProviderOptions?.anthropic?.cacheTtl ?? configAnthropicCacheTtl;
+
+ let providerConfig = providersConfig[providerName] ?? {};
// Providers can be disabled in providers.jsonc without deleting credentials.
if (
@@ -496,7 +530,10 @@ export class ProviderModelFactory {
// (SDK doesn't translate providerOptions to cache_control for these)
// Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies)
const baseFetch = getProviderFetch(providerConfig);
- const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(baseFetch);
+ const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(
+ baseFetch,
+ effectiveAnthropicCacheTtl
+ );
const provider = createAnthropic({
...normalizedConfig,
fetch: fetchWithCacheControl,
@@ -1010,7 +1047,7 @@ export class ProviderModelFactory {
const baseFetch = getProviderFetch(providerConfig);
const isAnthropicModel = modelId.startsWith("anthropic/");
const fetchWithCacheControl = isAnthropicModel
- ? wrapFetchWithAnthropicCacheControl(baseFetch)
+ ? wrapFetchWithAnthropicCacheControl(baseFetch, effectiveAnthropicCacheTtl)
: baseFetch;
const fetchWithAutoLogout = wrapFetchWithMuxGatewayAutoLogout(
fetchWithCacheControl,
diff --git a/src/node/services/providerService.test.ts b/src/node/services/providerService.test.ts
index a05b0deffa..d8c24ec662 100644
--- a/src/node/services/providerService.test.ts
+++ b/src/node/services/providerService.test.ts
@@ -121,4 +121,49 @@ describe("ProviderService.setConfig", () => {
expect(afterEnable?.openai?.enabled).toBeUndefined();
});
});
+
+ it("surfaces valid Anthropic cacheTtl", () => {
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "mux-provider-service-"));
+ try {
+ const config = new Config(tmpDir);
+ config.saveProvidersConfig({
+ anthropic: {
+ apiKey: "sk-ant-test",
+ cacheTtl: "1h",
+ },
+ });
+
+ const service = new ProviderService(config);
+ const cfg = service.getConfig();
+
+ expect(cfg.anthropic.apiKeySet).toBe(true);
+ expect(cfg.anthropic.cacheTtl).toBe("1h");
+ expect(Object.prototype.hasOwnProperty.call(cfg.anthropic, "cacheTtl")).toBe(true);
+ } finally {
+ fs.rmSync(tmpDir, { recursive: true, force: true });
+ }
+ });
+
+ it("omits invalid Anthropic cacheTtl", () => {
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "mux-provider-service-"));
+ try {
+ const config = new Config(tmpDir);
+ config.saveProvidersConfig({
+ anthropic: {
+ apiKey: "sk-ant-test",
+ // Intentionally invalid
+ cacheTtl: "24h",
+ },
+ });
+
+ const service = new ProviderService(config);
+ const cfg = service.getConfig();
+
+ expect(cfg.anthropic.apiKeySet).toBe(true);
+ expect(cfg.anthropic.cacheTtl).toBeUndefined();
+ expect(Object.prototype.hasOwnProperty.call(cfg.anthropic, "cacheTtl")).toBe(false);
+ } finally {
+ fs.rmSync(tmpDir, { recursive: true, force: true });
+ }
+ });
});
diff --git a/src/node/services/providerService.ts b/src/node/services/providerService.ts
index 5aa5f6a61c..f393eeca3d 100644
--- a/src/node/services/providerService.ts
+++ b/src/node/services/providerService.ts
@@ -71,6 +71,7 @@ export class ProviderService {
baseUrl?: string;
models?: string[];
serviceTier?: unknown;
+ cacheTtl?: unknown;
/** OpenAI-only: default auth precedence for Codex-OAuth-allowed models. */
codexOauthDefaultAuth?: unknown;
region?: string;
@@ -124,6 +125,12 @@ export class ProviderService {
providerInfo.serviceTier = serviceTier;
}
+ // Anthropic-specific fields
+ const cacheTtl = config.cacheTtl;
+ if (provider === "anthropic" && (cacheTtl === "5m" || cacheTtl === "1h")) {
+ providerInfo.cacheTtl = cacheTtl;
+ }
+
if (provider === "openai") {
providerInfo.codexOauthSet = codexOauthSet;
diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts
index 6ec40761f0..60ce7095e1 100644
--- a/src/node/services/streamManager.ts
+++ b/src/node/services/streamManager.ts
@@ -46,6 +46,7 @@ import type { Runtime } from "@/node/runtime/Runtime";
import {
createCachedSystemMessage,
applyCacheControlToTools,
+ type AnthropicCacheTtl,
} from "@/common/utils/ai/cacheStrategy";
import type { SessionUsageService } from "./sessionUsageService";
import { createDisplayUsage } from "@/common/utils/tokens/displayUsage";
@@ -103,6 +104,35 @@ interface StreamRequestConfig {
hasQueuedMessage?: () => boolean;
}
+function isRecord(value: unknown): value is Record {
+ return typeof value === "object" && value !== null;
+}
+
+function isAnthropicCacheTtl(value: unknown): value is AnthropicCacheTtl {
+ return value === "5m" || value === "1h";
+}
+
+function getAnthropicCacheTtl(
+ providerOptions?: Record
+): AnthropicCacheTtl | undefined {
+ if (!providerOptions) {
+ return undefined;
+ }
+
+ const anthropicOptions = providerOptions.anthropic;
+ if (!isRecord(anthropicOptions)) {
+ return undefined;
+ }
+
+ const cacheControl = anthropicOptions.cacheControl;
+ if (!isRecord(cacheControl)) {
+ return undefined;
+ }
+
+ const ttl = cacheControl.ttl;
+ return isAnthropicCacheTtl(ttl) ? ttl : undefined;
+}
+
// Stream state enum for exhaustive checking
enum StreamState {
IDLE = "idle",
@@ -977,9 +1007,10 @@ export class StreamManager extends EventEmitter {
let finalMessages = messages;
let finalTools = tools;
let finalSystem: string | undefined = system;
+ const anthropicCacheTtl = getAnthropicCacheTtl(finalProviderOptions);
// For Anthropic models, convert system message to a cached message at the start
- const cachedSystemMessage = createCachedSystemMessage(system, modelString);
+ const cachedSystemMessage = createCachedSystemMessage(system, modelString, anthropicCacheTtl);
if (cachedSystemMessage) {
// Prepend cached system message and set system parameter to undefined
// Note: Must be undefined, not empty string, to avoid Anthropic API error
@@ -989,7 +1020,7 @@ export class StreamManager extends EventEmitter {
// Apply cache control to tools for Anthropic models
if (tools) {
- finalTools = applyCacheControlToTools(tools, modelString);
+ finalTools = applyCacheControlToTools(tools, modelString, anthropicCacheTtl);
}
// Use model's max_output_tokens if available and caller didn't specify.
diff --git a/tests/ipc/streaming/resume.test.ts b/tests/ipc/streaming/resume.test.ts
index a8ff2d96e8..4dac0b138f 100644
--- a/tests/ipc/streaming/resume.test.ts
+++ b/tests/ipc/streaming/resume.test.ts
@@ -28,6 +28,11 @@ describeIntegration("resumeStream", () => {
const { env, workspaceId, cleanup } = await setupWorkspace("anthropic");
const collector1 = createStreamCollector(env.orpc, workspaceId);
collector1.start();
+ // Wait until the onChat subscription is fully established before sending.
+ // Without this guard, the initial history replay can race with the live user
+ // message append and emit the same user message twice in collector1.
+ await collector1.waitForSubscription(10000);
+
try {
// Ensure the onChat subscription has finished history replay before we send a new message.
// Otherwise the user message can appear twice (once from live events, once from replay).