diff --git a/src/browser/components/Settings/sections/ProvidersSection.tsx b/src/browser/components/Settings/sections/ProvidersSection.tsx index f6a3ac7eec..b457c17f6f 100644 --- a/src/browser/components/Settings/sections/ProvidersSection.tsx +++ b/src/browser/components/Settings/sections/ProvidersSection.tsx @@ -1468,6 +1468,64 @@ export function ProvidersSection() { ); })} + {/* Anthropic: prompt cache TTL */} + {provider === "anthropic" && ( +
+
+ + + + + + ? + + + +
+
Prompt cache TTL
+
+ Default is 5m. Use{" "} + 1h for longer workflows at a + higher cache-write cost. +
+
+
+
+
+
+ + +
+ )} + {/* OpenAI: ChatGPT OAuth + service tier */} {provider === "openai" && (
diff --git a/src/browser/utils/messages/sendOptions.test.ts b/src/browser/utils/messages/sendOptions.test.ts index b0712d1ed6..624b0ba947 100644 --- a/src/browser/utils/messages/sendOptions.test.ts +++ b/src/browser/utils/messages/sendOptions.test.ts @@ -61,4 +61,18 @@ describe("getSendOptionsFromStorage", () => { const withThinking = getSendOptionsFromStorage(workspaceId); expect(withThinking.system1ThinkingLevel).toBe("high"); }); + + test("includes Anthropic prompt cache TTL from persisted provider options", () => { + const workspaceId = "ws-3"; + + window.localStorage.setItem( + "provider_options_anthropic", + JSON.stringify({ + cacheTtl: "1h", + }) + ); + + const options = getSendOptionsFromStorage(workspaceId); + expect(options.providerOptions?.anthropic?.cacheTtl).toBe("1h"); + }); }); diff --git a/src/common/orpc/schemas/api.test.ts b/src/common/orpc/schemas/api.test.ts index 00064d18ca..7cec9dd1f5 100644 --- a/src/common/orpc/schemas/api.test.ts +++ b/src/common/orpc/schemas/api.test.ts @@ -103,6 +103,7 @@ describe("ProviderConfigInfoSchema conformance", () => { baseUrl: "https://custom.endpoint.com", models: ["claude-3-opus", "claude-3-sonnet"], serviceTier: "flex", + cacheTtl: "1h", codexOauthSet: true, codexOauthDefaultAuth: "apiKey", aws: { @@ -126,6 +127,7 @@ describe("ProviderConfigInfoSchema conformance", () => { expect(parsed.baseUrl).toBe(full.baseUrl); expect(parsed.models).toEqual(full.models); expect(parsed.serviceTier).toBe(full.serviceTier); + expect(parsed.cacheTtl).toBe(full.cacheTtl); expect(parsed.codexOauthSet).toBe(full.codexOauthSet); expect(parsed.codexOauthDefaultAuth).toBe(full.codexOauthDefaultAuth); expect(parsed.aws).toEqual(full.aws); diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts index 5132928871..aa597fc1ad 100644 --- a/src/common/orpc/schemas/api.ts +++ b/src/common/orpc/schemas/api.ts @@ -131,6 +131,8 @@ export const ProviderConfigInfoSchema = z.object({ models: z.array(z.string()).optional(), /** OpenAI-specific fields */ serviceTier: z.enum(["auto", "default", "flex", "priority"]).optional(), + /** Anthropic-specific fields */ + cacheTtl: z.enum(["5m", "1h"]).optional(), /** OpenAI-only: whether Codex OAuth tokens are present in providers.jsonc */ codexOauthSet: z.boolean().optional(), /** diff --git a/src/common/orpc/schemas/providerOptions.ts b/src/common/orpc/schemas/providerOptions.ts index 8b5a54386a..a4d1678b1a 100644 --- a/src/common/orpc/schemas/providerOptions.ts +++ b/src/common/orpc/schemas/providerOptions.ts @@ -12,6 +12,14 @@ export const MuxProviderOptionsSchema = z.object({ description: "Model IDs with 1M context enabled (e.g. ['anthropic:claude-sonnet-4-20250514'])", }), + // Anthropic prompt cache TTL. "5m" is the default (free refresh on hit). + // "1h" costs 2× base input for cache writes but keeps the cache alive longer — + // useful for agentic workflows where turns take >5 minutes. + // See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration + cacheTtl: z.enum(["5m", "1h"]).nullish().meta({ + description: + 'Anthropic prompt cache TTL: "5m" (default, free refresh) or "1h" (2× write cost, longer cache)', + }), }) .optional(), openai: z diff --git a/src/common/utils/ai/cacheStrategy.test.ts b/src/common/utils/ai/cacheStrategy.test.ts index 10eeeb5631..1bb76b24a4 100644 --- a/src/common/utils/ai/cacheStrategy.test.ts +++ b/src/common/utils/ai/cacheStrategy.test.ts @@ -146,6 +146,23 @@ describe("cacheStrategy", () => { anthropic: { cacheControl: { type: "ephemeral" } }, }); // Last part has cache control }); + + it("should include cache TTL when provided", () => { + const messages: ModelMessage[] = [{ role: "user", content: "Hello" }]; + const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet", "1h"); + + expect(result[0]).toEqual({ + ...messages[0], + providerOptions: { + anthropic: { + cacheControl: { + type: "ephemeral", + ttl: "1h", + }, + }, + }, + }); + }); }); describe("createCachedSystemMessage", () => { @@ -194,6 +211,24 @@ describe("cacheStrategy", () => { }, }); }); + + it("should include cache TTL in cached system message when provided", () => { + const systemContent = "You are a helpful assistant"; + const result = createCachedSystemMessage(systemContent, "anthropic:claude-3-5-sonnet", "1h"); + + expect(result).toEqual({ + role: "system", + content: systemContent, + providerOptions: { + anthropic: { + cacheControl: { + type: "ephemeral", + ttl: "1h", + }, + }, + }, + }); + }); }); describe("applyCacheControlToTools", () => { @@ -269,6 +304,26 @@ describe("cacheStrategy", () => { expect(Object.keys(result)).toEqual(Object.keys(mockTools)); }); + it("should include cache TTL on the cached tool when provided", () => { + const result = applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet", "1h"); + const keys = Object.keys(mockTools); + const lastKey = keys[keys.length - 1]; + const cachedLastTool = result[lastKey] as unknown as { + providerOptions?: { + anthropic?: { + cacheControl?: { + type?: string; + ttl?: string; + }; + }; + }; + }; + + expect(cachedLastTool.providerOptions?.anthropic?.cacheControl).toEqual({ + type: "ephemeral", + ttl: "1h", + }); + }); it("should not modify original tools object", () => { const originalTools = { ...mockTools }; applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet"); diff --git a/src/common/utils/ai/cacheStrategy.ts b/src/common/utils/ai/cacheStrategy.ts index e730f04207..b47b53b599 100644 --- a/src/common/utils/ai/cacheStrategy.ts +++ b/src/common/utils/ai/cacheStrategy.ts @@ -3,6 +3,14 @@ import assert from "@/common/utils/assert"; import { cloneToolPreservingDescriptors } from "@/common/utils/tools/cloneToolPreservingDescriptors"; import { normalizeGatewayModel } from "./models"; +/** + * Anthropic prompt cache TTL value. + * "5m" = 5-minute cache (default, free refresh on hit). + * "1h" = 1-hour cache (2× base input write cost, longer lived). + * See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration + */ +export type AnthropicCacheTtl = "5m" | "1h"; + /** * Check if a model supports Anthropic cache control. * Matches: @@ -24,12 +32,19 @@ export function supportsAnthropicCache(modelString: string): boolean { return false; } -/** Cache control providerOptions for Anthropic */ -const ANTHROPIC_CACHE_CONTROL = { - anthropic: { - cacheControl: { type: "ephemeral" as const }, - }, -}; +/** Build cache control providerOptions for Anthropic with optional TTL. */ +function anthropicCacheControl(cacheTtl?: AnthropicCacheTtl | null) { + return { + anthropic: { + cacheControl: cacheTtl + ? { type: "ephemeral" as const, ttl: cacheTtl } + : { type: "ephemeral" as const }, + }, + }; +} + +/** Default cache control (no explicit TTL — Anthropic defaults to 5m). */ +const ANTHROPIC_CACHE_CONTROL = anthropicCacheControl(); type ProviderNativeTool = Extract; @@ -45,7 +60,11 @@ function isProviderNativeTool(tool: Tool): tool is ProviderNativeTool { * (which the SDK handles correctly). For user/assistant messages with array * content, we add providerOptions to the last content part. */ -function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage { +function addCacheControlToLastContentPart( + msg: ModelMessage, + cacheTtl?: AnthropicCacheTtl | null +): ModelMessage { + const cacheOpts = cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL; const content = msg.content; // String content (typically system messages): use message-level providerOptions @@ -53,7 +72,7 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage { if (typeof content === "string") { return { ...msg, - providerOptions: ANTHROPIC_CACHE_CONTROL, + providerOptions: cacheOpts, }; } @@ -62,7 +81,7 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage { if (Array.isArray(content) && content.length > 0) { const lastIndex = content.length - 1; const newContent = content.map((part, i) => - i === lastIndex ? { ...part, providerOptions: ANTHROPIC_CACHE_CONTROL } : part + i === lastIndex ? { ...part, providerOptions: cacheOpts } : part ); // Type assertion needed: ModelMessage types are strict unions but providerOptions // on content parts is valid per SDK docs @@ -81,7 +100,11 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage { * NOTE: The SDK requires providerOptions on content parts, not on the message. * We add cache_control to the last content part of the last message. */ -export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] { +export function applyCacheControl( + messages: ModelMessage[], + modelString: string, + cacheTtl?: AnthropicCacheTtl | null +): ModelMessage[] { // Only apply cache control for Anthropic models if (!supportsAnthropicCache(modelString)) { return messages; @@ -97,7 +120,7 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string) return messages.map((msg, index) => { if (index === cacheIndex) { - return addCacheControlToLastContentPart(msg); + return addCacheControlToLastContentPart(msg, cacheTtl); } return msg; }); @@ -109,7 +132,8 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string) */ export function createCachedSystemMessage( systemContent: string, - modelString: string + modelString: string, + cacheTtl?: AnthropicCacheTtl | null ): ModelMessage | null { if (!systemContent || !supportsAnthropicCache(modelString)) { return null; @@ -118,13 +142,7 @@ export function createCachedSystemMessage( return { role: "system" as const, content: systemContent, - providerOptions: { - anthropic: { - cacheControl: { - type: "ephemeral" as const, - }, - }, - }, + providerOptions: cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL, }; } @@ -145,7 +163,8 @@ export function createCachedSystemMessage( */ export function applyCacheControlToTools>( tools: T, - modelString: string + modelString: string, + cacheTtl?: AnthropicCacheTtl | null ): T { // Only apply cache control for Anthropic models if (!supportsAnthropicCache(modelString) || !tools || Object.keys(tools).length === 0) { @@ -156,6 +175,8 @@ export function applyCacheControlToTools>( const toolKeys = Object.keys(tools); const lastToolKey = toolKeys[toolKeys.length - 1]; + const cacheOpts = cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL; + // Clone tools and add cache control ONLY to the last tool // Anthropic caches everything up to the cache breakpoint, so marking // only the last tool will cache all tools @@ -168,13 +189,13 @@ export function applyCacheControlToTools>( const cachedProviderTool = cloneToolPreservingDescriptors( existingTool ) as ProviderNativeTool; - cachedProviderTool.providerOptions = ANTHROPIC_CACHE_CONTROL; + cachedProviderTool.providerOptions = cacheOpts; cachedTools[key as keyof T] = cachedProviderTool as unknown as T[keyof T]; } else if (existingTool.execute == null) { // Some MCP/dynamic tools are valid without execute handlers (provider-/client-executed). // Keep their runtime shape and attach cache control without forcing recreation. const cachedDynamicTool = cloneToolPreservingDescriptors(existingTool); - cachedDynamicTool.providerOptions = ANTHROPIC_CACHE_CONTROL; + cachedDynamicTool.providerOptions = cacheOpts; cachedTools[key as keyof T] = cachedDynamicTool as unknown as T[keyof T]; } else { assert( @@ -187,7 +208,7 @@ export function applyCacheControlToTools>( description: existingTool.description, inputSchema: existingTool.inputSchema, execute: existingTool.execute, - providerOptions: ANTHROPIC_CACHE_CONTROL, + providerOptions: cacheOpts, }); cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T]; } diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index e692e36c0f..a90655827f 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -154,6 +154,58 @@ describe("buildProviderOptions - Anthropic", () => { }); }); }); + + describe("Anthropic cache TTL overrides", () => { + test("should include cacheControl ttl when configured", () => { + const result = buildProviderOptions( + "anthropic:claude-sonnet-4-5", + "off", + undefined, + undefined, + { + anthropic: { cacheTtl: "1h" }, + } + ); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + cacheControl: { + type: "ephemeral", + ttl: "1h", + }, + }, + }); + }); + + test("should include cacheControl ttl for Opus 4.6 effort models", () => { + const result = buildProviderOptions( + "anthropic:claude-opus-4-6", + "medium", + undefined, + undefined, + { + anthropic: { cacheTtl: "5m" }, + } + ); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + thinking: { + type: "adaptive", + }, + cacheControl: { + type: "ephemeral", + ttl: "5m", + }, + effort: "medium", + }, + }); + }); + }); }); describe("buildProviderOptions - OpenAI", () => { diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 805e2628e1..82165266a7 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -93,6 +93,9 @@ export function buildProviderOptions( // Build Anthropic-specific options if (provider === "anthropic") { + const cacheTtl = muxProviderOptions?.anthropic?.cacheTtl; + const cacheControl = cacheTtl ? { type: "ephemeral" as const, ttl: cacheTtl } : undefined; + // Opus 4.5+ use the effort parameter for reasoning control. // Opus 4.6 uses adaptive thinking (model decides when/how much to think). // Opus 4.5 uses enabled thinking with a budgetTokens ceiling. @@ -124,6 +127,7 @@ export function buildProviderOptions( disableParallelToolUse: false, sendReasoning: true, ...(thinking && { thinking }), + ...(cacheControl && { cacheControl }), effort: effortLevel, }, }; @@ -140,6 +144,7 @@ export function buildProviderOptions( anthropic: { disableParallelToolUse: false, // Always enable concurrent tool execution sendReasoning: true, // Include reasoning traces in requests sent to the model + ...(cacheControl && { cacheControl }), // Conditionally add thinking configuration (non-Opus 4.5 models) ...(budgetTokens > 0 && { thinking: { diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 026b899313..ff16b81631 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -618,6 +618,7 @@ export class AIService extends EventEmitter { providerForMessages: canonicalProviderName, effectiveThinkingLevel, modelString, + anthropicCacheTtl: effectiveMuxProviderOptions.anthropic?.cacheTtl, workspaceId, }); diff --git a/src/node/services/messagePipeline.ts b/src/node/services/messagePipeline.ts index 4d7999ef8c..a78ff37014 100644 --- a/src/node/services/messagePipeline.ts +++ b/src/node/services/messagePipeline.ts @@ -27,7 +27,7 @@ import { injectFileChangeNotifications, injectPostCompactionAttachments, } from "@/browser/utils/messages/modelMessageTransform"; -import { applyCacheControl } from "@/common/utils/ai/cacheStrategy"; +import { applyCacheControl, type AnthropicCacheTtl } from "@/common/utils/ai/cacheStrategy"; import { log } from "./log"; /** Options for the full message preparation pipeline. */ @@ -58,6 +58,8 @@ export interface PrepareMessagesOptions { effectiveThinkingLevel: ThinkingLevel; /** Full model string (used for cache control). */ modelString: string; + /** Optional Anthropic cache TTL override for prompt caching. */ + anthropicCacheTtl?: AnthropicCacheTtl | null; /** Workspace ID (used only for debug logging). */ workspaceId: string; } @@ -98,6 +100,7 @@ export async function prepareMessagesForProvider( providerForMessages, effectiveThinkingLevel, modelString, + anthropicCacheTtl, workspaceId, } = opts; @@ -181,7 +184,7 @@ export async function prepareMessagesForProvider( }); // Apply cache control for Anthropic models AFTER transformation - const finalMessages = applyCacheControl(transformedMessages, modelString); + const finalMessages = applyCacheControl(transformedMessages, modelString, anthropicCacheTtl); log.debug_obj(`${workspaceId}/3_final_messages.json`, finalMessages); diff --git a/src/node/services/providerModelFactory.ts b/src/node/services/providerModelFactory.ts index 11e07f8ad4..b908ee68b9 100644 --- a/src/node/services/providerModelFactory.ts +++ b/src/node/services/providerModelFactory.ts @@ -24,6 +24,7 @@ import type { PolicyService } from "@/node/services/policyService"; import type { ProviderService } from "@/node/services/providerService"; import type { CodexOauthService } from "@/node/services/codexOauthService"; import { normalizeGatewayModel } from "@/common/utils/ai/models"; +import type { AnthropicCacheTtl } from "@/common/utils/ai/cacheStrategy"; import { MUX_APP_ATTRIBUTION_TITLE, MUX_APP_ATTRIBUTION_URL } from "@/constants/appAttribution"; import { resolveProviderCredentials } from "@/node/utils/providerRequirements"; import { @@ -101,7 +102,16 @@ if (typeof globalFetchWithExtras.certificate === "function") { * 1. Last tool (caches all tool definitions) * 2. Last message's last content part (caches entire conversation) */ -function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fetch { +function wrapFetchWithAnthropicCacheControl( + baseFetch: typeof fetch, + cacheTtl?: AnthropicCacheTtl | null +): typeof fetch { + // Build the cache_control value once — include ttl only when explicitly set. + const cacheControlValue: Record = { type: "ephemeral" }; + if (cacheTtl) { + cacheControlValue.ttl = cacheTtl; + } + const cachingFetch = async ( input: Parameters[0], init?: Parameters[1] @@ -117,7 +127,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet // Inject cache_control on the last tool if tools array exists if (Array.isArray(json.tools) && json.tools.length > 0) { const lastTool = json.tools[json.tools.length - 1] as Record; - lastTool.cache_control ??= { type: "ephemeral" }; + lastTool.cache_control ??= cacheControlValue; } // Inject cache_control on last message's last content part @@ -139,7 +149,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet if (Array.isArray(json.prompt)) { const providerOpts = (lastMsg.providerOptions ?? {}) as Record; const anthropicOpts = (providerOpts.anthropic ?? {}) as Record; - anthropicOpts.cacheControl ??= { type: "ephemeral" }; + anthropicOpts.cacheControl ??= cacheControlValue; providerOpts.anthropic = anthropicOpts; lastMsg.providerOptions = providerOpts; } @@ -148,7 +158,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet const content = lastMsg.content; if (Array.isArray(content) && content.length > 0) { const lastPart = content[content.length - 1] as Record; - lastPart.cache_control ??= { type: "ephemeral" }; + lastPart.cache_control ??= cacheControlValue; } } @@ -309,6 +319,13 @@ export function parseModelString(modelString: string): [string, string] { return [providerName, modelId]; } +function parseAnthropicCacheTtl(value: unknown): AnthropicCacheTtl | undefined { + if (value === "5m" || value === "1h") { + return value; + } + return undefined; +} + // --------------------------------------------------------------------------- // Model cost tracking // --------------------------------------------------------------------------- @@ -435,8 +452,25 @@ export class ProviderModelFactory { } // Load providers configuration - the ONLY source of truth - const providersConfig = this.config.loadProvidersConfig(); - let providerConfig = providersConfig?.[providerName] ?? {}; + const providersConfig = this.config.loadProvidersConfig() ?? {}; + + // Backend config is authoritative for Anthropic prompt cache TTL on any + // Anthropic-routed model (direct Anthropic, mux-gateway:anthropic/*, + // openrouter:anthropic/*). We still allow request-level values when config + // is unset for backward compatibility with older clients. + const configAnthropicCacheTtl = parseAnthropicCacheTtl(providersConfig.anthropic?.cacheTtl); + const isAnthropicRoutedModel = + providerName === "anthropic" || modelId.startsWith("anthropic/"); + if (isAnthropicRoutedModel && configAnthropicCacheTtl && muxProviderOptions) { + muxProviderOptions.anthropic = { + ...(muxProviderOptions.anthropic ?? {}), + cacheTtl: configAnthropicCacheTtl, + }; + } + const effectiveAnthropicCacheTtl = + muxProviderOptions?.anthropic?.cacheTtl ?? configAnthropicCacheTtl; + + let providerConfig = providersConfig[providerName] ?? {}; // Providers can be disabled in providers.jsonc without deleting credentials. if ( @@ -496,7 +530,10 @@ export class ProviderModelFactory { // (SDK doesn't translate providerOptions to cache_control for these) // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies) const baseFetch = getProviderFetch(providerConfig); - const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(baseFetch); + const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl( + baseFetch, + effectiveAnthropicCacheTtl + ); const provider = createAnthropic({ ...normalizedConfig, fetch: fetchWithCacheControl, @@ -1010,7 +1047,7 @@ export class ProviderModelFactory { const baseFetch = getProviderFetch(providerConfig); const isAnthropicModel = modelId.startsWith("anthropic/"); const fetchWithCacheControl = isAnthropicModel - ? wrapFetchWithAnthropicCacheControl(baseFetch) + ? wrapFetchWithAnthropicCacheControl(baseFetch, effectiveAnthropicCacheTtl) : baseFetch; const fetchWithAutoLogout = wrapFetchWithMuxGatewayAutoLogout( fetchWithCacheControl, diff --git a/src/node/services/providerService.test.ts b/src/node/services/providerService.test.ts index a05b0deffa..d8c24ec662 100644 --- a/src/node/services/providerService.test.ts +++ b/src/node/services/providerService.test.ts @@ -121,4 +121,49 @@ describe("ProviderService.setConfig", () => { expect(afterEnable?.openai?.enabled).toBeUndefined(); }); }); + + it("surfaces valid Anthropic cacheTtl", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "mux-provider-service-")); + try { + const config = new Config(tmpDir); + config.saveProvidersConfig({ + anthropic: { + apiKey: "sk-ant-test", + cacheTtl: "1h", + }, + }); + + const service = new ProviderService(config); + const cfg = service.getConfig(); + + expect(cfg.anthropic.apiKeySet).toBe(true); + expect(cfg.anthropic.cacheTtl).toBe("1h"); + expect(Object.prototype.hasOwnProperty.call(cfg.anthropic, "cacheTtl")).toBe(true); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it("omits invalid Anthropic cacheTtl", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "mux-provider-service-")); + try { + const config = new Config(tmpDir); + config.saveProvidersConfig({ + anthropic: { + apiKey: "sk-ant-test", + // Intentionally invalid + cacheTtl: "24h", + }, + }); + + const service = new ProviderService(config); + const cfg = service.getConfig(); + + expect(cfg.anthropic.apiKeySet).toBe(true); + expect(cfg.anthropic.cacheTtl).toBeUndefined(); + expect(Object.prototype.hasOwnProperty.call(cfg.anthropic, "cacheTtl")).toBe(false); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); }); diff --git a/src/node/services/providerService.ts b/src/node/services/providerService.ts index 5aa5f6a61c..f393eeca3d 100644 --- a/src/node/services/providerService.ts +++ b/src/node/services/providerService.ts @@ -71,6 +71,7 @@ export class ProviderService { baseUrl?: string; models?: string[]; serviceTier?: unknown; + cacheTtl?: unknown; /** OpenAI-only: default auth precedence for Codex-OAuth-allowed models. */ codexOauthDefaultAuth?: unknown; region?: string; @@ -124,6 +125,12 @@ export class ProviderService { providerInfo.serviceTier = serviceTier; } + // Anthropic-specific fields + const cacheTtl = config.cacheTtl; + if (provider === "anthropic" && (cacheTtl === "5m" || cacheTtl === "1h")) { + providerInfo.cacheTtl = cacheTtl; + } + if (provider === "openai") { providerInfo.codexOauthSet = codexOauthSet; diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts index 6ec40761f0..60ce7095e1 100644 --- a/src/node/services/streamManager.ts +++ b/src/node/services/streamManager.ts @@ -46,6 +46,7 @@ import type { Runtime } from "@/node/runtime/Runtime"; import { createCachedSystemMessage, applyCacheControlToTools, + type AnthropicCacheTtl, } from "@/common/utils/ai/cacheStrategy"; import type { SessionUsageService } from "./sessionUsageService"; import { createDisplayUsage } from "@/common/utils/tokens/displayUsage"; @@ -103,6 +104,35 @@ interface StreamRequestConfig { hasQueuedMessage?: () => boolean; } +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null; +} + +function isAnthropicCacheTtl(value: unknown): value is AnthropicCacheTtl { + return value === "5m" || value === "1h"; +} + +function getAnthropicCacheTtl( + providerOptions?: Record +): AnthropicCacheTtl | undefined { + if (!providerOptions) { + return undefined; + } + + const anthropicOptions = providerOptions.anthropic; + if (!isRecord(anthropicOptions)) { + return undefined; + } + + const cacheControl = anthropicOptions.cacheControl; + if (!isRecord(cacheControl)) { + return undefined; + } + + const ttl = cacheControl.ttl; + return isAnthropicCacheTtl(ttl) ? ttl : undefined; +} + // Stream state enum for exhaustive checking enum StreamState { IDLE = "idle", @@ -977,9 +1007,10 @@ export class StreamManager extends EventEmitter { let finalMessages = messages; let finalTools = tools; let finalSystem: string | undefined = system; + const anthropicCacheTtl = getAnthropicCacheTtl(finalProviderOptions); // For Anthropic models, convert system message to a cached message at the start - const cachedSystemMessage = createCachedSystemMessage(system, modelString); + const cachedSystemMessage = createCachedSystemMessage(system, modelString, anthropicCacheTtl); if (cachedSystemMessage) { // Prepend cached system message and set system parameter to undefined // Note: Must be undefined, not empty string, to avoid Anthropic API error @@ -989,7 +1020,7 @@ export class StreamManager extends EventEmitter { // Apply cache control to tools for Anthropic models if (tools) { - finalTools = applyCacheControlToTools(tools, modelString); + finalTools = applyCacheControlToTools(tools, modelString, anthropicCacheTtl); } // Use model's max_output_tokens if available and caller didn't specify. diff --git a/tests/ipc/streaming/resume.test.ts b/tests/ipc/streaming/resume.test.ts index a8ff2d96e8..4dac0b138f 100644 --- a/tests/ipc/streaming/resume.test.ts +++ b/tests/ipc/streaming/resume.test.ts @@ -28,6 +28,11 @@ describeIntegration("resumeStream", () => { const { env, workspaceId, cleanup } = await setupWorkspace("anthropic"); const collector1 = createStreamCollector(env.orpc, workspaceId); collector1.start(); + // Wait until the onChat subscription is fully established before sending. + // Without this guard, the initial history replay can race with the live user + // message append and emit the same user message twice in collector1. + await collector1.waitForSubscription(10000); + try { // Ensure the onChat subscription has finished history replay before we send a new message. // Otherwise the user message can appear twice (once from live events, once from replay).