From 90e82576bdc4d6a378b248609fa86ced1704d04c Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Mon, 9 Feb 2026 13:41:56 +0000 Subject: [PATCH 1/6] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20configurable?= =?UTF-8?q?=20Anthropic=20prompt=20cache=20TTL?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Anthropic cache TTL support (`5m` / `1h`) across provider options, cache strategy, stream pipeline, and fetch-level cache_control injection, with tests for TTL propagation. --- _Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `.50`_ --- src/common/orpc/schemas/providerOptions.ts | 8 +++ src/common/utils/ai/cacheStrategy.test.ts | 55 +++++++++++++++++ src/common/utils/ai/cacheStrategy.ts | 67 ++++++++++++++------- src/common/utils/ai/providerOptions.test.ts | 52 ++++++++++++++++ src/common/utils/ai/providerOptions.ts | 5 ++ src/node/services/aiService.ts | 1 + src/node/services/messagePipeline.ts | 7 ++- src/node/services/providerModelFactory.ts | 24 ++++++-- src/node/services/streamManager.ts | 35 ++++++++++- 9 files changed, 221 insertions(+), 33 deletions(-) diff --git a/src/common/orpc/schemas/providerOptions.ts b/src/common/orpc/schemas/providerOptions.ts index 8b5a54386a..a4d1678b1a 100644 --- a/src/common/orpc/schemas/providerOptions.ts +++ b/src/common/orpc/schemas/providerOptions.ts @@ -12,6 +12,14 @@ export const MuxProviderOptionsSchema = z.object({ description: "Model IDs with 1M context enabled (e.g. ['anthropic:claude-sonnet-4-20250514'])", }), + // Anthropic prompt cache TTL. "5m" is the default (free refresh on hit). + // "1h" costs 2× base input for cache writes but keeps the cache alive longer — + // useful for agentic workflows where turns take >5 minutes. + // See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration + cacheTtl: z.enum(["5m", "1h"]).nullish().meta({ + description: + 'Anthropic prompt cache TTL: "5m" (default, free refresh) or "1h" (2× write cost, longer cache)', + }), }) .optional(), openai: z diff --git a/src/common/utils/ai/cacheStrategy.test.ts b/src/common/utils/ai/cacheStrategy.test.ts index 10eeeb5631..1bb76b24a4 100644 --- a/src/common/utils/ai/cacheStrategy.test.ts +++ b/src/common/utils/ai/cacheStrategy.test.ts @@ -146,6 +146,23 @@ describe("cacheStrategy", () => { anthropic: { cacheControl: { type: "ephemeral" } }, }); // Last part has cache control }); + + it("should include cache TTL when provided", () => { + const messages: ModelMessage[] = [{ role: "user", content: "Hello" }]; + const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet", "1h"); + + expect(result[0]).toEqual({ + ...messages[0], + providerOptions: { + anthropic: { + cacheControl: { + type: "ephemeral", + ttl: "1h", + }, + }, + }, + }); + }); }); describe("createCachedSystemMessage", () => { @@ -194,6 +211,24 @@ describe("cacheStrategy", () => { }, }); }); + + it("should include cache TTL in cached system message when provided", () => { + const systemContent = "You are a helpful assistant"; + const result = createCachedSystemMessage(systemContent, "anthropic:claude-3-5-sonnet", "1h"); + + expect(result).toEqual({ + role: "system", + content: systemContent, + providerOptions: { + anthropic: { + cacheControl: { + type: "ephemeral", + ttl: "1h", + }, + }, + }, + }); + }); }); describe("applyCacheControlToTools", () => { @@ -269,6 +304,26 @@ describe("cacheStrategy", () => { expect(Object.keys(result)).toEqual(Object.keys(mockTools)); }); + it("should include cache TTL on the cached tool when provided", () => { + const result = applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet", "1h"); + const keys = Object.keys(mockTools); + const lastKey = keys[keys.length - 1]; + const cachedLastTool = result[lastKey] as unknown as { + providerOptions?: { + anthropic?: { + cacheControl?: { + type?: string; + ttl?: string; + }; + }; + }; + }; + + expect(cachedLastTool.providerOptions?.anthropic?.cacheControl).toEqual({ + type: "ephemeral", + ttl: "1h", + }); + }); it("should not modify original tools object", () => { const originalTools = { ...mockTools }; applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet"); diff --git a/src/common/utils/ai/cacheStrategy.ts b/src/common/utils/ai/cacheStrategy.ts index e730f04207..b47b53b599 100644 --- a/src/common/utils/ai/cacheStrategy.ts +++ b/src/common/utils/ai/cacheStrategy.ts @@ -3,6 +3,14 @@ import assert from "@/common/utils/assert"; import { cloneToolPreservingDescriptors } from "@/common/utils/tools/cloneToolPreservingDescriptors"; import { normalizeGatewayModel } from "./models"; +/** + * Anthropic prompt cache TTL value. + * "5m" = 5-minute cache (default, free refresh on hit). + * "1h" = 1-hour cache (2× base input write cost, longer lived). + * See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration + */ +export type AnthropicCacheTtl = "5m" | "1h"; + /** * Check if a model supports Anthropic cache control. * Matches: @@ -24,12 +32,19 @@ export function supportsAnthropicCache(modelString: string): boolean { return false; } -/** Cache control providerOptions for Anthropic */ -const ANTHROPIC_CACHE_CONTROL = { - anthropic: { - cacheControl: { type: "ephemeral" as const }, - }, -}; +/** Build cache control providerOptions for Anthropic with optional TTL. */ +function anthropicCacheControl(cacheTtl?: AnthropicCacheTtl | null) { + return { + anthropic: { + cacheControl: cacheTtl + ? { type: "ephemeral" as const, ttl: cacheTtl } + : { type: "ephemeral" as const }, + }, + }; +} + +/** Default cache control (no explicit TTL — Anthropic defaults to 5m). */ +const ANTHROPIC_CACHE_CONTROL = anthropicCacheControl(); type ProviderNativeTool = Extract; @@ -45,7 +60,11 @@ function isProviderNativeTool(tool: Tool): tool is ProviderNativeTool { * (which the SDK handles correctly). For user/assistant messages with array * content, we add providerOptions to the last content part. */ -function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage { +function addCacheControlToLastContentPart( + msg: ModelMessage, + cacheTtl?: AnthropicCacheTtl | null +): ModelMessage { + const cacheOpts = cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL; const content = msg.content; // String content (typically system messages): use message-level providerOptions @@ -53,7 +72,7 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage { if (typeof content === "string") { return { ...msg, - providerOptions: ANTHROPIC_CACHE_CONTROL, + providerOptions: cacheOpts, }; } @@ -62,7 +81,7 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage { if (Array.isArray(content) && content.length > 0) { const lastIndex = content.length - 1; const newContent = content.map((part, i) => - i === lastIndex ? { ...part, providerOptions: ANTHROPIC_CACHE_CONTROL } : part + i === lastIndex ? { ...part, providerOptions: cacheOpts } : part ); // Type assertion needed: ModelMessage types are strict unions but providerOptions // on content parts is valid per SDK docs @@ -81,7 +100,11 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage { * NOTE: The SDK requires providerOptions on content parts, not on the message. * We add cache_control to the last content part of the last message. */ -export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] { +export function applyCacheControl( + messages: ModelMessage[], + modelString: string, + cacheTtl?: AnthropicCacheTtl | null +): ModelMessage[] { // Only apply cache control for Anthropic models if (!supportsAnthropicCache(modelString)) { return messages; @@ -97,7 +120,7 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string) return messages.map((msg, index) => { if (index === cacheIndex) { - return addCacheControlToLastContentPart(msg); + return addCacheControlToLastContentPart(msg, cacheTtl); } return msg; }); @@ -109,7 +132,8 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string) */ export function createCachedSystemMessage( systemContent: string, - modelString: string + modelString: string, + cacheTtl?: AnthropicCacheTtl | null ): ModelMessage | null { if (!systemContent || !supportsAnthropicCache(modelString)) { return null; @@ -118,13 +142,7 @@ export function createCachedSystemMessage( return { role: "system" as const, content: systemContent, - providerOptions: { - anthropic: { - cacheControl: { - type: "ephemeral" as const, - }, - }, - }, + providerOptions: cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL, }; } @@ -145,7 +163,8 @@ export function createCachedSystemMessage( */ export function applyCacheControlToTools>( tools: T, - modelString: string + modelString: string, + cacheTtl?: AnthropicCacheTtl | null ): T { // Only apply cache control for Anthropic models if (!supportsAnthropicCache(modelString) || !tools || Object.keys(tools).length === 0) { @@ -156,6 +175,8 @@ export function applyCacheControlToTools>( const toolKeys = Object.keys(tools); const lastToolKey = toolKeys[toolKeys.length - 1]; + const cacheOpts = cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL; + // Clone tools and add cache control ONLY to the last tool // Anthropic caches everything up to the cache breakpoint, so marking // only the last tool will cache all tools @@ -168,13 +189,13 @@ export function applyCacheControlToTools>( const cachedProviderTool = cloneToolPreservingDescriptors( existingTool ) as ProviderNativeTool; - cachedProviderTool.providerOptions = ANTHROPIC_CACHE_CONTROL; + cachedProviderTool.providerOptions = cacheOpts; cachedTools[key as keyof T] = cachedProviderTool as unknown as T[keyof T]; } else if (existingTool.execute == null) { // Some MCP/dynamic tools are valid without execute handlers (provider-/client-executed). // Keep their runtime shape and attach cache control without forcing recreation. const cachedDynamicTool = cloneToolPreservingDescriptors(existingTool); - cachedDynamicTool.providerOptions = ANTHROPIC_CACHE_CONTROL; + cachedDynamicTool.providerOptions = cacheOpts; cachedTools[key as keyof T] = cachedDynamicTool as unknown as T[keyof T]; } else { assert( @@ -187,7 +208,7 @@ export function applyCacheControlToTools>( description: existingTool.description, inputSchema: existingTool.inputSchema, execute: existingTool.execute, - providerOptions: ANTHROPIC_CACHE_CONTROL, + providerOptions: cacheOpts, }); cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T]; } diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index e692e36c0f..a90655827f 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -154,6 +154,58 @@ describe("buildProviderOptions - Anthropic", () => { }); }); }); + + describe("Anthropic cache TTL overrides", () => { + test("should include cacheControl ttl when configured", () => { + const result = buildProviderOptions( + "anthropic:claude-sonnet-4-5", + "off", + undefined, + undefined, + { + anthropic: { cacheTtl: "1h" }, + } + ); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + cacheControl: { + type: "ephemeral", + ttl: "1h", + }, + }, + }); + }); + + test("should include cacheControl ttl for Opus 4.6 effort models", () => { + const result = buildProviderOptions( + "anthropic:claude-opus-4-6", + "medium", + undefined, + undefined, + { + anthropic: { cacheTtl: "5m" }, + } + ); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + thinking: { + type: "adaptive", + }, + cacheControl: { + type: "ephemeral", + ttl: "5m", + }, + effort: "medium", + }, + }); + }); + }); }); describe("buildProviderOptions - OpenAI", () => { diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 805e2628e1..82165266a7 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -93,6 +93,9 @@ export function buildProviderOptions( // Build Anthropic-specific options if (provider === "anthropic") { + const cacheTtl = muxProviderOptions?.anthropic?.cacheTtl; + const cacheControl = cacheTtl ? { type: "ephemeral" as const, ttl: cacheTtl } : undefined; + // Opus 4.5+ use the effort parameter for reasoning control. // Opus 4.6 uses adaptive thinking (model decides when/how much to think). // Opus 4.5 uses enabled thinking with a budgetTokens ceiling. @@ -124,6 +127,7 @@ export function buildProviderOptions( disableParallelToolUse: false, sendReasoning: true, ...(thinking && { thinking }), + ...(cacheControl && { cacheControl }), effort: effortLevel, }, }; @@ -140,6 +144,7 @@ export function buildProviderOptions( anthropic: { disableParallelToolUse: false, // Always enable concurrent tool execution sendReasoning: true, // Include reasoning traces in requests sent to the model + ...(cacheControl && { cacheControl }), // Conditionally add thinking configuration (non-Opus 4.5 models) ...(budgetTokens > 0 && { thinking: { diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 026b899313..ff16b81631 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -618,6 +618,7 @@ export class AIService extends EventEmitter { providerForMessages: canonicalProviderName, effectiveThinkingLevel, modelString, + anthropicCacheTtl: effectiveMuxProviderOptions.anthropic?.cacheTtl, workspaceId, }); diff --git a/src/node/services/messagePipeline.ts b/src/node/services/messagePipeline.ts index 4d7999ef8c..a78ff37014 100644 --- a/src/node/services/messagePipeline.ts +++ b/src/node/services/messagePipeline.ts @@ -27,7 +27,7 @@ import { injectFileChangeNotifications, injectPostCompactionAttachments, } from "@/browser/utils/messages/modelMessageTransform"; -import { applyCacheControl } from "@/common/utils/ai/cacheStrategy"; +import { applyCacheControl, type AnthropicCacheTtl } from "@/common/utils/ai/cacheStrategy"; import { log } from "./log"; /** Options for the full message preparation pipeline. */ @@ -58,6 +58,8 @@ export interface PrepareMessagesOptions { effectiveThinkingLevel: ThinkingLevel; /** Full model string (used for cache control). */ modelString: string; + /** Optional Anthropic cache TTL override for prompt caching. */ + anthropicCacheTtl?: AnthropicCacheTtl | null; /** Workspace ID (used only for debug logging). */ workspaceId: string; } @@ -98,6 +100,7 @@ export async function prepareMessagesForProvider( providerForMessages, effectiveThinkingLevel, modelString, + anthropicCacheTtl, workspaceId, } = opts; @@ -181,7 +184,7 @@ export async function prepareMessagesForProvider( }); // Apply cache control for Anthropic models AFTER transformation - const finalMessages = applyCacheControl(transformedMessages, modelString); + const finalMessages = applyCacheControl(transformedMessages, modelString, anthropicCacheTtl); log.debug_obj(`${workspaceId}/3_final_messages.json`, finalMessages); diff --git a/src/node/services/providerModelFactory.ts b/src/node/services/providerModelFactory.ts index 11e07f8ad4..0b0051a97d 100644 --- a/src/node/services/providerModelFactory.ts +++ b/src/node/services/providerModelFactory.ts @@ -24,6 +24,7 @@ import type { PolicyService } from "@/node/services/policyService"; import type { ProviderService } from "@/node/services/providerService"; import type { CodexOauthService } from "@/node/services/codexOauthService"; import { normalizeGatewayModel } from "@/common/utils/ai/models"; +import type { AnthropicCacheTtl } from "@/common/utils/ai/cacheStrategy"; import { MUX_APP_ATTRIBUTION_TITLE, MUX_APP_ATTRIBUTION_URL } from "@/constants/appAttribution"; import { resolveProviderCredentials } from "@/node/utils/providerRequirements"; import { @@ -101,7 +102,16 @@ if (typeof globalFetchWithExtras.certificate === "function") { * 1. Last tool (caches all tool definitions) * 2. Last message's last content part (caches entire conversation) */ -function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fetch { +function wrapFetchWithAnthropicCacheControl( + baseFetch: typeof fetch, + cacheTtl?: AnthropicCacheTtl | null +): typeof fetch { + // Build the cache_control value once — include ttl only when explicitly set. + const cacheControlValue: Record = { type: "ephemeral" }; + if (cacheTtl) { + cacheControlValue.ttl = cacheTtl; + } + const cachingFetch = async ( input: Parameters[0], init?: Parameters[1] @@ -117,7 +127,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet // Inject cache_control on the last tool if tools array exists if (Array.isArray(json.tools) && json.tools.length > 0) { const lastTool = json.tools[json.tools.length - 1] as Record; - lastTool.cache_control ??= { type: "ephemeral" }; + lastTool.cache_control ??= cacheControlValue; } // Inject cache_control on last message's last content part @@ -139,7 +149,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet if (Array.isArray(json.prompt)) { const providerOpts = (lastMsg.providerOptions ?? {}) as Record; const anthropicOpts = (providerOpts.anthropic ?? {}) as Record; - anthropicOpts.cacheControl ??= { type: "ephemeral" }; + anthropicOpts.cacheControl ??= cacheControlValue; providerOpts.anthropic = anthropicOpts; lastMsg.providerOptions = providerOpts; } @@ -148,7 +158,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet const content = lastMsg.content; if (Array.isArray(content) && content.length > 0) { const lastPart = content[content.length - 1] as Record; - lastPart.cache_control ??= { type: "ephemeral" }; + lastPart.cache_control ??= cacheControlValue; } } @@ -496,7 +506,8 @@ export class ProviderModelFactory { // (SDK doesn't translate providerOptions to cache_control for these) // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies) const baseFetch = getProviderFetch(providerConfig); - const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(baseFetch); + const cacheTtl = muxProviderOptions?.anthropic?.cacheTtl; + const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(baseFetch, cacheTtl); const provider = createAnthropic({ ...normalizedConfig, fetch: fetchWithCacheControl, @@ -1009,8 +1020,9 @@ export class ProviderModelFactory { // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies) const baseFetch = getProviderFetch(providerConfig); const isAnthropicModel = modelId.startsWith("anthropic/"); + const cacheTtl = muxProviderOptions?.anthropic?.cacheTtl; const fetchWithCacheControl = isAnthropicModel - ? wrapFetchWithAnthropicCacheControl(baseFetch) + ? wrapFetchWithAnthropicCacheControl(baseFetch, cacheTtl) : baseFetch; const fetchWithAutoLogout = wrapFetchWithMuxGatewayAutoLogout( fetchWithCacheControl, diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts index 6ec40761f0..60ce7095e1 100644 --- a/src/node/services/streamManager.ts +++ b/src/node/services/streamManager.ts @@ -46,6 +46,7 @@ import type { Runtime } from "@/node/runtime/Runtime"; import { createCachedSystemMessage, applyCacheControlToTools, + type AnthropicCacheTtl, } from "@/common/utils/ai/cacheStrategy"; import type { SessionUsageService } from "./sessionUsageService"; import { createDisplayUsage } from "@/common/utils/tokens/displayUsage"; @@ -103,6 +104,35 @@ interface StreamRequestConfig { hasQueuedMessage?: () => boolean; } +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null; +} + +function isAnthropicCacheTtl(value: unknown): value is AnthropicCacheTtl { + return value === "5m" || value === "1h"; +} + +function getAnthropicCacheTtl( + providerOptions?: Record +): AnthropicCacheTtl | undefined { + if (!providerOptions) { + return undefined; + } + + const anthropicOptions = providerOptions.anthropic; + if (!isRecord(anthropicOptions)) { + return undefined; + } + + const cacheControl = anthropicOptions.cacheControl; + if (!isRecord(cacheControl)) { + return undefined; + } + + const ttl = cacheControl.ttl; + return isAnthropicCacheTtl(ttl) ? ttl : undefined; +} + // Stream state enum for exhaustive checking enum StreamState { IDLE = "idle", @@ -977,9 +1007,10 @@ export class StreamManager extends EventEmitter { let finalMessages = messages; let finalTools = tools; let finalSystem: string | undefined = system; + const anthropicCacheTtl = getAnthropicCacheTtl(finalProviderOptions); // For Anthropic models, convert system message to a cached message at the start - const cachedSystemMessage = createCachedSystemMessage(system, modelString); + const cachedSystemMessage = createCachedSystemMessage(system, modelString, anthropicCacheTtl); if (cachedSystemMessage) { // Prepend cached system message and set system parameter to undefined // Note: Must be undefined, not empty string, to avoid Anthropic API error @@ -989,7 +1020,7 @@ export class StreamManager extends EventEmitter { // Apply cache control to tools for Anthropic models if (tools) { - finalTools = applyCacheControlToTools(tools, modelString); + finalTools = applyCacheControlToTools(tools, modelString, anthropicCacheTtl); } // Use model's max_output_tokens if available and caller didn't specify. From a02975d63b71ea60e18adf26ac8e1faff66df1e4 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Mon, 9 Feb 2026 13:58:03 +0000 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=A4=96=20fix:=20propagate=20Anthropic?= =?UTF-8?q?=20cache=20TTL=20through=20stream=20pipeline?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure stream-level system/tool cache markers honor configured Anthropic TTL even for Anthropic-routed gateway models whose providerOptions are not under the anthropic namespace. --- _Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `.50`_ --- src/node/services/streamManager.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts index 60ce7095e1..79f22451f0 100644 --- a/src/node/services/streamManager.ts +++ b/src/node/services/streamManager.ts @@ -1007,7 +1007,8 @@ export class StreamManager extends EventEmitter { let finalMessages = messages; let finalTools = tools; let finalSystem: string | undefined = system; - const anthropicCacheTtl = getAnthropicCacheTtl(finalProviderOptions); + const anthropicCacheTtl = + anthropicCacheTtlOverride ?? getAnthropicCacheTtl(finalProviderOptions); // For Anthropic models, convert system message to a cached message at the start const cachedSystemMessage = createCachedSystemMessage(system, modelString, anthropicCacheTtl); From fa22fdd3da2766556cbbe119764525cd679ab35e Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Mon, 9 Feb 2026 18:49:58 +0000 Subject: [PATCH 3/6] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20Anthropic=20pr?= =?UTF-8?q?ompt=20cache=20TTL=20setting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a provider-level Anthropic setting in Providers to configure prompt cache TTL with defensive value guards and default clearing behavior. Also add a regression test ensuring persisted Anthropic cache TTL is propagated into send options from storage. --- _Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `$3.19`_ --- .../Settings/sections/ProvidersSection.tsx | 60 +++++++++++++++++++ .../utils/messages/sendOptions.test.ts | 14 +++++ 2 files changed, 74 insertions(+) diff --git a/src/browser/components/Settings/sections/ProvidersSection.tsx b/src/browser/components/Settings/sections/ProvidersSection.tsx index f6a3ac7eec..38c4fb650c 100644 --- a/src/browser/components/Settings/sections/ProvidersSection.tsx +++ b/src/browser/components/Settings/sections/ProvidersSection.tsx @@ -21,6 +21,7 @@ import { getStoredAuthToken } from "@/browser/components/AuthTokenModal"; import { useAPI } from "@/browser/contexts/API"; import { useSettings } from "@/browser/contexts/SettingsContext"; import { usePersistedState } from "@/browser/hooks/usePersistedState"; +import { useProviderOptions } from "@/browser/hooks/useProviderOptions"; import { useProvidersConfig } from "@/browser/hooks/useProvidersConfig"; import { formatMuxGatewayBalance, @@ -161,6 +162,7 @@ export function ProvidersSection() { ); const { providersExpandedProvider, setProvidersExpandedProvider } = useSettings(); + const { options: providerOptions, setAnthropicOptions } = useProviderOptions(); const { api } = useAPI(); const { config, refresh, updateOptimistically } = useProvidersConfig(); @@ -1468,6 +1470,64 @@ export function ProvidersSection() { ); })} + {/* Anthropic: prompt cache TTL */} + {provider === "anthropic" && ( +
+
+ + + + + + ? + + + +
+
Prompt cache TTL
+
+ Default is 5m. Use{" "} + 1h for longer workflows at a + higher cache-write cost. +
+
+
+
+
+
+ + +
+ )} + {/* OpenAI: ChatGPT OAuth + service tier */} {provider === "openai" && (
diff --git a/src/browser/utils/messages/sendOptions.test.ts b/src/browser/utils/messages/sendOptions.test.ts index b0712d1ed6..624b0ba947 100644 --- a/src/browser/utils/messages/sendOptions.test.ts +++ b/src/browser/utils/messages/sendOptions.test.ts @@ -61,4 +61,18 @@ describe("getSendOptionsFromStorage", () => { const withThinking = getSendOptionsFromStorage(workspaceId); expect(withThinking.system1ThinkingLevel).toBe("high"); }); + + test("includes Anthropic prompt cache TTL from persisted provider options", () => { + const workspaceId = "ws-3"; + + window.localStorage.setItem( + "provider_options_anthropic", + JSON.stringify({ + cacheTtl: "1h", + }) + ); + + const options = getSendOptionsFromStorage(workspaceId); + expect(options.providerOptions?.anthropic?.cacheTtl).toBe("1h"); + }); }); From fc7bd4b5cecfab43688058ec45d4d208ddcdd8f8 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Tue, 10 Feb 2026 09:08:22 +0000 Subject: [PATCH 4/6] =?UTF-8?q?=F0=9F=A4=96=20feat:=20persist=20anthropic?= =?UTF-8?q?=20cache=20TTL=20in=20providers=20config?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move Anthropic prompt cache TTL persistence from frontend localStorage to backend providers.jsonc and make backend config authoritative for Anthropic-routed models. - expose anthropic cacheTtl in provider config IPC schema - surface valid cacheTtl in ProviderService getConfig with tests - inject backend cacheTtl in ProviderModelFactory for anthropic and anthropic/* routes - update Providers settings UI to read/write cacheTtl through provider config API --- _Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `$4.21`_ --- .../Settings/sections/ProvidersSection.tsx | 22 +++++---- src/common/orpc/schemas/api.test.ts | 2 + src/common/orpc/schemas/api.ts | 2 + src/node/services/providerModelFactory.ts | 37 ++++++++++++--- src/node/services/providerService.test.ts | 45 +++++++++++++++++++ src/node/services/providerService.ts | 7 +++ 6 files changed, 97 insertions(+), 18 deletions(-) diff --git a/src/browser/components/Settings/sections/ProvidersSection.tsx b/src/browser/components/Settings/sections/ProvidersSection.tsx index 38c4fb650c..b457c17f6f 100644 --- a/src/browser/components/Settings/sections/ProvidersSection.tsx +++ b/src/browser/components/Settings/sections/ProvidersSection.tsx @@ -21,7 +21,6 @@ import { getStoredAuthToken } from "@/browser/components/AuthTokenModal"; import { useAPI } from "@/browser/contexts/API"; import { useSettings } from "@/browser/contexts/SettingsContext"; import { usePersistedState } from "@/browser/hooks/usePersistedState"; -import { useProviderOptions } from "@/browser/hooks/useProviderOptions"; import { useProvidersConfig } from "@/browser/hooks/useProvidersConfig"; import { formatMuxGatewayBalance, @@ -162,7 +161,6 @@ export function ProvidersSection() { ); const { providersExpandedProvider, setProvidersExpandedProvider } = useSettings(); - const { options: providerOptions, setAnthropicOptions } = useProviderOptions(); const { api } = useAPI(); const { config, refresh, updateOptimistically } = useProvidersConfig(); @@ -1497,22 +1495,22 @@ export function ProvidersSection() {