From 90e82576bdc4d6a378b248609fa86ced1704d04c Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Mon, 9 Feb 2026 13:41:56 +0000
Subject: [PATCH 1/6] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20configurable?=
 =?UTF-8?q?=20Anthropic=20prompt=20cache=20TTL?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Anthropic cache TTL support (`5m` / `1h`) across provider options, cache strategy, stream pipeline, and fetch-level cache_control injection, with tests for TTL propagation.

---
_Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `.50`_

<!-- mux-attribution: model=openai:gpt-5.3-codex thinking=xhigh costs=2.50 -->
---
 src/common/orpc/schemas/providerOptions.ts  |  8 +++
 src/common/utils/ai/cacheStrategy.test.ts   | 55 +++++++++++++++++
 src/common/utils/ai/cacheStrategy.ts        | 67 ++++++++++++++-------
 src/common/utils/ai/providerOptions.test.ts | 52 ++++++++++++++++
 src/common/utils/ai/providerOptions.ts      |  5 ++
 src/node/services/aiService.ts              |  1 +
 src/node/services/messagePipeline.ts        |  7 ++-
 src/node/services/providerModelFactory.ts   | 24 ++++++--
 src/node/services/streamManager.ts          | 35 ++++++++++-
 9 files changed, 221 insertions(+), 33 deletions(-)

diff --git a/src/common/orpc/schemas/providerOptions.ts b/src/common/orpc/schemas/providerOptions.ts
index 8b5a54386a..a4d1678b1a 100644
--- a/src/common/orpc/schemas/providerOptions.ts
+++ b/src/common/orpc/schemas/providerOptions.ts
@@ -12,6 +12,14 @@ export const MuxProviderOptionsSchema = z.object({
         description:
           "Model IDs with 1M context enabled (e.g. ['anthropic:claude-sonnet-4-20250514'])",
       }),
+      // Anthropic prompt cache TTL. "5m" is the default (free refresh on hit).
+      // "1h" costs 2× base input for cache writes but keeps the cache alive longer —
+      // useful for agentic workflows where turns take >5 minutes.
+      // See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration
+      cacheTtl: z.enum(["5m", "1h"]).nullish().meta({
+        description:
+          'Anthropic prompt cache TTL: "5m" (default, free refresh) or "1h" (2× write cost, longer cache)',
+      }),
     })
     .optional(),
   openai: z
diff --git a/src/common/utils/ai/cacheStrategy.test.ts b/src/common/utils/ai/cacheStrategy.test.ts
index 10eeeb5631..1bb76b24a4 100644
--- a/src/common/utils/ai/cacheStrategy.test.ts
+++ b/src/common/utils/ai/cacheStrategy.test.ts
@@ -146,6 +146,23 @@ describe("cacheStrategy", () => {
         anthropic: { cacheControl: { type: "ephemeral" } },
       }); // Last part has cache control
     });
+
+    it("should include cache TTL when provided", () => {
+      const messages: ModelMessage[] = [{ role: "user", content: "Hello" }];
+      const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet", "1h");
+
+      expect(result[0]).toEqual({
+        ...messages[0],
+        providerOptions: {
+          anthropic: {
+            cacheControl: {
+              type: "ephemeral",
+              ttl: "1h",
+            },
+          },
+        },
+      });
+    });
   });
 
   describe("createCachedSystemMessage", () => {
@@ -194,6 +211,24 @@ describe("cacheStrategy", () => {
         },
       });
     });
+
+    it("should include cache TTL in cached system message when provided", () => {
+      const systemContent = "You are a helpful assistant";
+      const result = createCachedSystemMessage(systemContent, "anthropic:claude-3-5-sonnet", "1h");
+
+      expect(result).toEqual({
+        role: "system",
+        content: systemContent,
+        providerOptions: {
+          anthropic: {
+            cacheControl: {
+              type: "ephemeral",
+              ttl: "1h",
+            },
+          },
+        },
+      });
+    });
   });
 
   describe("applyCacheControlToTools", () => {
@@ -269,6 +304,26 @@ describe("cacheStrategy", () => {
       expect(Object.keys(result)).toEqual(Object.keys(mockTools));
     });
 
+    it("should include cache TTL on the cached tool when provided", () => {
+      const result = applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet", "1h");
+      const keys = Object.keys(mockTools);
+      const lastKey = keys[keys.length - 1];
+      const cachedLastTool = result[lastKey] as unknown as {
+        providerOptions?: {
+          anthropic?: {
+            cacheControl?: {
+              type?: string;
+              ttl?: string;
+            };
+          };
+        };
+      };
+
+      expect(cachedLastTool.providerOptions?.anthropic?.cacheControl).toEqual({
+        type: "ephemeral",
+        ttl: "1h",
+      });
+    });
     it("should not modify original tools object", () => {
       const originalTools = { ...mockTools };
       applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet");
diff --git a/src/common/utils/ai/cacheStrategy.ts b/src/common/utils/ai/cacheStrategy.ts
index e730f04207..b47b53b599 100644
--- a/src/common/utils/ai/cacheStrategy.ts
+++ b/src/common/utils/ai/cacheStrategy.ts
@@ -3,6 +3,14 @@ import assert from "@/common/utils/assert";
 import { cloneToolPreservingDescriptors } from "@/common/utils/tools/cloneToolPreservingDescriptors";
 import { normalizeGatewayModel } from "./models";
 
+/**
+ * Anthropic prompt cache TTL value.
+ * "5m" = 5-minute cache (default, free refresh on hit).
+ * "1h" = 1-hour cache (2× base input write cost, longer lived).
+ * See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration
+ */
+export type AnthropicCacheTtl = "5m" | "1h";
+
 /**
  * Check if a model supports Anthropic cache control.
  * Matches:
@@ -24,12 +32,19 @@ export function supportsAnthropicCache(modelString: string): boolean {
   return false;
 }
 
-/** Cache control providerOptions for Anthropic */
-const ANTHROPIC_CACHE_CONTROL = {
-  anthropic: {
-    cacheControl: { type: "ephemeral" as const },
-  },
-};
+/** Build cache control providerOptions for Anthropic with optional TTL. */
+function anthropicCacheControl(cacheTtl?: AnthropicCacheTtl | null) {
+  return {
+    anthropic: {
+      cacheControl: cacheTtl
+        ? { type: "ephemeral" as const, ttl: cacheTtl }
+        : { type: "ephemeral" as const },
+    },
+  };
+}
+
+/** Default cache control (no explicit TTL — Anthropic defaults to 5m). */
+const ANTHROPIC_CACHE_CONTROL = anthropicCacheControl();
 
 type ProviderNativeTool = Extract<Tool, { type: "provider" }>;
 
@@ -45,7 +60,11 @@ function isProviderNativeTool(tool: Tool): tool is ProviderNativeTool {
  * (which the SDK handles correctly). For user/assistant messages with array
  * content, we add providerOptions to the last content part.
  */
-function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
+function addCacheControlToLastContentPart(
+  msg: ModelMessage,
+  cacheTtl?: AnthropicCacheTtl | null
+): ModelMessage {
+  const cacheOpts = cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL;
   const content = msg.content;
 
   // String content (typically system messages): use message-level providerOptions
@@ -53,7 +72,7 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
   if (typeof content === "string") {
     return {
       ...msg,
-      providerOptions: ANTHROPIC_CACHE_CONTROL,
+      providerOptions: cacheOpts,
     };
   }
 
@@ -62,7 +81,7 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
   if (Array.isArray(content) && content.length > 0) {
     const lastIndex = content.length - 1;
     const newContent = content.map((part, i) =>
-      i === lastIndex ? { ...part, providerOptions: ANTHROPIC_CACHE_CONTROL } : part
+      i === lastIndex ? { ...part, providerOptions: cacheOpts } : part
     );
     // Type assertion needed: ModelMessage types are strict unions but providerOptions
     // on content parts is valid per SDK docs
@@ -81,7 +100,11 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
  * NOTE: The SDK requires providerOptions on content parts, not on the message.
  * We add cache_control to the last content part of the last message.
  */
-export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] {
+export function applyCacheControl(
+  messages: ModelMessage[],
+  modelString: string,
+  cacheTtl?: AnthropicCacheTtl | null
+): ModelMessage[] {
   // Only apply cache control for Anthropic models
   if (!supportsAnthropicCache(modelString)) {
     return messages;
@@ -97,7 +120,7 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string)
 
   return messages.map((msg, index) => {
     if (index === cacheIndex) {
-      return addCacheControlToLastContentPart(msg);
+      return addCacheControlToLastContentPart(msg, cacheTtl);
     }
     return msg;
   });
@@ -109,7 +132,8 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string)
  */
 export function createCachedSystemMessage(
   systemContent: string,
-  modelString: string
+  modelString: string,
+  cacheTtl?: AnthropicCacheTtl | null
 ): ModelMessage | null {
   if (!systemContent || !supportsAnthropicCache(modelString)) {
     return null;
@@ -118,13 +142,7 @@ export function createCachedSystemMessage(
   return {
     role: "system" as const,
     content: systemContent,
-    providerOptions: {
-      anthropic: {
-        cacheControl: {
-          type: "ephemeral" as const,
-        },
-      },
-    },
+    providerOptions: cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL,
   };
 }
 
@@ -145,7 +163,8 @@ export function createCachedSystemMessage(
  */
 export function applyCacheControlToTools<T extends Record<string, Tool>>(
   tools: T,
-  modelString: string
+  modelString: string,
+  cacheTtl?: AnthropicCacheTtl | null
 ): T {
   // Only apply cache control for Anthropic models
   if (!supportsAnthropicCache(modelString) || !tools || Object.keys(tools).length === 0) {
@@ -156,6 +175,8 @@ export function applyCacheControlToTools<T extends Record<string, Tool>>(
   const toolKeys = Object.keys(tools);
   const lastToolKey = toolKeys[toolKeys.length - 1];
 
+  const cacheOpts = cacheTtl ? anthropicCacheControl(cacheTtl) : ANTHROPIC_CACHE_CONTROL;
+
   // Clone tools and add cache control ONLY to the last tool
   // Anthropic caches everything up to the cache breakpoint, so marking
   // only the last tool will cache all tools
@@ -168,13 +189,13 @@ export function applyCacheControlToTools<T extends Record<string, Tool>>(
         const cachedProviderTool = cloneToolPreservingDescriptors(
           existingTool
         ) as ProviderNativeTool;
-        cachedProviderTool.providerOptions = ANTHROPIC_CACHE_CONTROL;
+        cachedProviderTool.providerOptions = cacheOpts;
         cachedTools[key as keyof T] = cachedProviderTool as unknown as T[keyof T];
       } else if (existingTool.execute == null) {
         // Some MCP/dynamic tools are valid without execute handlers (provider-/client-executed).
         // Keep their runtime shape and attach cache control without forcing recreation.
         const cachedDynamicTool = cloneToolPreservingDescriptors(existingTool);
-        cachedDynamicTool.providerOptions = ANTHROPIC_CACHE_CONTROL;
+        cachedDynamicTool.providerOptions = cacheOpts;
         cachedTools[key as keyof T] = cachedDynamicTool as unknown as T[keyof T];
       } else {
         assert(
@@ -187,7 +208,7 @@ export function applyCacheControlToTools<T extends Record<string, Tool>>(
           description: existingTool.description,
           inputSchema: existingTool.inputSchema,
           execute: existingTool.execute,
-          providerOptions: ANTHROPIC_CACHE_CONTROL,
+          providerOptions: cacheOpts,
         });
         cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T];
       }
diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
index e692e36c0f..a90655827f 100644
--- a/src/common/utils/ai/providerOptions.test.ts
+++ b/src/common/utils/ai/providerOptions.test.ts
@@ -154,6 +154,58 @@ describe("buildProviderOptions - Anthropic", () => {
       });
     });
   });
+
+  describe("Anthropic cache TTL overrides", () => {
+    test("should include cacheControl ttl when configured", () => {
+      const result = buildProviderOptions(
+        "anthropic:claude-sonnet-4-5",
+        "off",
+        undefined,
+        undefined,
+        {
+          anthropic: { cacheTtl: "1h" },
+        }
+      );
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          cacheControl: {
+            type: "ephemeral",
+            ttl: "1h",
+          },
+        },
+      });
+    });
+
+    test("should include cacheControl ttl for Opus 4.6 effort models", () => {
+      const result = buildProviderOptions(
+        "anthropic:claude-opus-4-6",
+        "medium",
+        undefined,
+        undefined,
+        {
+          anthropic: { cacheTtl: "5m" },
+        }
+      );
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          thinking: {
+            type: "adaptive",
+          },
+          cacheControl: {
+            type: "ephemeral",
+            ttl: "5m",
+          },
+          effort: "medium",
+        },
+      });
+    });
+  });
 });
 
 describe("buildProviderOptions - OpenAI", () => {
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
index 805e2628e1..82165266a7 100644
--- a/src/common/utils/ai/providerOptions.ts
+++ b/src/common/utils/ai/providerOptions.ts
@@ -93,6 +93,9 @@ export function buildProviderOptions(
 
   // Build Anthropic-specific options
   if (provider === "anthropic") {
+    const cacheTtl = muxProviderOptions?.anthropic?.cacheTtl;
+    const cacheControl = cacheTtl ? { type: "ephemeral" as const, ttl: cacheTtl } : undefined;
+
     // Opus 4.5+ use the effort parameter for reasoning control.
     // Opus 4.6 uses adaptive thinking (model decides when/how much to think).
     // Opus 4.5 uses enabled thinking with a budgetTokens ceiling.
@@ -124,6 +127,7 @@ export function buildProviderOptions(
           disableParallelToolUse: false,
           sendReasoning: true,
           ...(thinking && { thinking }),
+          ...(cacheControl && { cacheControl }),
           effort: effortLevel,
         },
       };
@@ -140,6 +144,7 @@ export function buildProviderOptions(
       anthropic: {
         disableParallelToolUse: false, // Always enable concurrent tool execution
         sendReasoning: true, // Include reasoning traces in requests sent to the model
+        ...(cacheControl && { cacheControl }),
         // Conditionally add thinking configuration (non-Opus 4.5 models)
         ...(budgetTokens > 0 && {
           thinking: {
diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts
index 026b899313..ff16b81631 100644
--- a/src/node/services/aiService.ts
+++ b/src/node/services/aiService.ts
@@ -618,6 +618,7 @@ export class AIService extends EventEmitter {
         providerForMessages: canonicalProviderName,
         effectiveThinkingLevel,
         modelString,
+        anthropicCacheTtl: effectiveMuxProviderOptions.anthropic?.cacheTtl,
         workspaceId,
       });
 
diff --git a/src/node/services/messagePipeline.ts b/src/node/services/messagePipeline.ts
index 4d7999ef8c..a78ff37014 100644
--- a/src/node/services/messagePipeline.ts
+++ b/src/node/services/messagePipeline.ts
@@ -27,7 +27,7 @@ import {
   injectFileChangeNotifications,
   injectPostCompactionAttachments,
 } from "@/browser/utils/messages/modelMessageTransform";
-import { applyCacheControl } from "@/common/utils/ai/cacheStrategy";
+import { applyCacheControl, type AnthropicCacheTtl } from "@/common/utils/ai/cacheStrategy";
 import { log } from "./log";
 
 /** Options for the full message preparation pipeline. */
@@ -58,6 +58,8 @@ export interface PrepareMessagesOptions {
   effectiveThinkingLevel: ThinkingLevel;
   /** Full model string (used for cache control). */
   modelString: string;
+  /** Optional Anthropic cache TTL override for prompt caching. */
+  anthropicCacheTtl?: AnthropicCacheTtl | null;
   /** Workspace ID (used only for debug logging). */
   workspaceId: string;
 }
@@ -98,6 +100,7 @@ export async function prepareMessagesForProvider(
     providerForMessages,
     effectiveThinkingLevel,
     modelString,
+    anthropicCacheTtl,
     workspaceId,
   } = opts;
 
@@ -181,7 +184,7 @@ export async function prepareMessagesForProvider(
   });
 
   // Apply cache control for Anthropic models AFTER transformation
-  const finalMessages = applyCacheControl(transformedMessages, modelString);
+  const finalMessages = applyCacheControl(transformedMessages, modelString, anthropicCacheTtl);
 
   log.debug_obj(`${workspaceId}/3_final_messages.json`, finalMessages);
 
diff --git a/src/node/services/providerModelFactory.ts b/src/node/services/providerModelFactory.ts
index 11e07f8ad4..0b0051a97d 100644
--- a/src/node/services/providerModelFactory.ts
+++ b/src/node/services/providerModelFactory.ts
@@ -24,6 +24,7 @@ import type { PolicyService } from "@/node/services/policyService";
 import type { ProviderService } from "@/node/services/providerService";
 import type { CodexOauthService } from "@/node/services/codexOauthService";
 import { normalizeGatewayModel } from "@/common/utils/ai/models";
+import type { AnthropicCacheTtl } from "@/common/utils/ai/cacheStrategy";
 import { MUX_APP_ATTRIBUTION_TITLE, MUX_APP_ATTRIBUTION_URL } from "@/constants/appAttribution";
 import { resolveProviderCredentials } from "@/node/utils/providerRequirements";
 import {
@@ -101,7 +102,16 @@ if (typeof globalFetchWithExtras.certificate === "function") {
  * 1. Last tool (caches all tool definitions)
  * 2. Last message's last content part (caches entire conversation)
  */
-function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fetch {
+function wrapFetchWithAnthropicCacheControl(
+  baseFetch: typeof fetch,
+  cacheTtl?: AnthropicCacheTtl | null
+): typeof fetch {
+  // Build the cache_control value once — include ttl only when explicitly set.
+  const cacheControlValue: Record<string, string> = { type: "ephemeral" };
+  if (cacheTtl) {
+    cacheControlValue.ttl = cacheTtl;
+  }
+
   const cachingFetch = async (
     input: Parameters<typeof fetch>[0],
     init?: Parameters<typeof fetch>[1]
@@ -117,7 +127,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet
       // Inject cache_control on the last tool if tools array exists
       if (Array.isArray(json.tools) && json.tools.length > 0) {
         const lastTool = json.tools[json.tools.length - 1] as Record<string, unknown>;
-        lastTool.cache_control ??= { type: "ephemeral" };
+        lastTool.cache_control ??= cacheControlValue;
       }
 
       // Inject cache_control on last message's last content part
@@ -139,7 +149,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet
         if (Array.isArray(json.prompt)) {
           const providerOpts = (lastMsg.providerOptions ?? {}) as Record<string, unknown>;
           const anthropicOpts = (providerOpts.anthropic ?? {}) as Record<string, unknown>;
-          anthropicOpts.cacheControl ??= { type: "ephemeral" };
+          anthropicOpts.cacheControl ??= cacheControlValue;
           providerOpts.anthropic = anthropicOpts;
           lastMsg.providerOptions = providerOpts;
         }
@@ -148,7 +158,7 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet
         const content = lastMsg.content;
         if (Array.isArray(content) && content.length > 0) {
           const lastPart = content[content.length - 1] as Record<string, unknown>;
-          lastPart.cache_control ??= { type: "ephemeral" };
+          lastPart.cache_control ??= cacheControlValue;
         }
       }
 
@@ -496,7 +506,8 @@ export class ProviderModelFactory {
         // (SDK doesn't translate providerOptions to cache_control for these)
         // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies)
         const baseFetch = getProviderFetch(providerConfig);
-        const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(baseFetch);
+        const cacheTtl = muxProviderOptions?.anthropic?.cacheTtl;
+        const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(baseFetch, cacheTtl);
         const provider = createAnthropic({
           ...normalizedConfig,
           fetch: fetchWithCacheControl,
@@ -1009,8 +1020,9 @@ export class ProviderModelFactory {
         // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies)
         const baseFetch = getProviderFetch(providerConfig);
         const isAnthropicModel = modelId.startsWith("anthropic/");
+        const cacheTtl = muxProviderOptions?.anthropic?.cacheTtl;
         const fetchWithCacheControl = isAnthropicModel
-          ? wrapFetchWithAnthropicCacheControl(baseFetch)
+          ? wrapFetchWithAnthropicCacheControl(baseFetch, cacheTtl)
           : baseFetch;
         const fetchWithAutoLogout = wrapFetchWithMuxGatewayAutoLogout(
           fetchWithCacheControl,
diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts
index 6ec40761f0..60ce7095e1 100644
--- a/src/node/services/streamManager.ts
+++ b/src/node/services/streamManager.ts
@@ -46,6 +46,7 @@ import type { Runtime } from "@/node/runtime/Runtime";
 import {
   createCachedSystemMessage,
   applyCacheControlToTools,
+  type AnthropicCacheTtl,
 } from "@/common/utils/ai/cacheStrategy";
 import type { SessionUsageService } from "./sessionUsageService";
 import { createDisplayUsage } from "@/common/utils/tokens/displayUsage";
@@ -103,6 +104,35 @@ interface StreamRequestConfig {
   hasQueuedMessage?: () => boolean;
 }
 
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null;
+}
+
+function isAnthropicCacheTtl(value: unknown): value is AnthropicCacheTtl {
+  return value === "5m" || value === "1h";
+}
+
+function getAnthropicCacheTtl(
+  providerOptions?: Record<string, unknown>
+): AnthropicCacheTtl | undefined {
+  if (!providerOptions) {
+    return undefined;
+  }
+
+  const anthropicOptions = providerOptions.anthropic;
+  if (!isRecord(anthropicOptions)) {
+    return undefined;
+  }
+
+  const cacheControl = anthropicOptions.cacheControl;
+  if (!isRecord(cacheControl)) {
+    return undefined;
+  }
+
+  const ttl = cacheControl.ttl;
+  return isAnthropicCacheTtl(ttl) ? ttl : undefined;
+}
+
 // Stream state enum for exhaustive checking
 enum StreamState {
   IDLE = "idle",
@@ -977,9 +1007,10 @@ export class StreamManager extends EventEmitter {
     let finalMessages = messages;
     let finalTools = tools;
     let finalSystem: string | undefined = system;
+    const anthropicCacheTtl = getAnthropicCacheTtl(finalProviderOptions);
 
     // For Anthropic models, convert system message to a cached message at the start
-    const cachedSystemMessage = createCachedSystemMessage(system, modelString);
+    const cachedSystemMessage = createCachedSystemMessage(system, modelString, anthropicCacheTtl);
     if (cachedSystemMessage) {
       // Prepend cached system message and set system parameter to undefined
       // Note: Must be undefined, not empty string, to avoid Anthropic API error
@@ -989,7 +1020,7 @@ export class StreamManager extends EventEmitter {
 
     // Apply cache control to tools for Anthropic models
     if (tools) {
-      finalTools = applyCacheControlToTools(tools, modelString);
+      finalTools = applyCacheControlToTools(tools, modelString, anthropicCacheTtl);
     }
 
     // Use model's max_output_tokens if available and caller didn't specify.

From a02975d63b71ea60e18adf26ac8e1faff66df1e4 Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Mon, 9 Feb 2026 13:58:03 +0000
Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=A4=96=20fix:=20propagate=20Anthropic?=
 =?UTF-8?q?=20cache=20TTL=20through=20stream=20pipeline?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ensure stream-level system/tool cache markers honor configured Anthropic TTL even for Anthropic-routed gateway models whose providerOptions are not under the anthropic namespace.

---
_Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `.50`_

<!-- mux-attribution: model=openai:gpt-5.3-codex thinking=xhigh costs=2.50 -->
---
 src/node/services/streamManager.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts
index 60ce7095e1..79f22451f0 100644
--- a/src/node/services/streamManager.ts
+++ b/src/node/services/streamManager.ts
@@ -1007,7 +1007,8 @@ export class StreamManager extends EventEmitter {
     let finalMessages = messages;
     let finalTools = tools;
     let finalSystem: string | undefined = system;
-    const anthropicCacheTtl = getAnthropicCacheTtl(finalProviderOptions);
+    const anthropicCacheTtl =
+      anthropicCacheTtlOverride ?? getAnthropicCacheTtl(finalProviderOptions);
 
     // For Anthropic models, convert system message to a cached message at the start
     const cachedSystemMessage = createCachedSystemMessage(system, modelString, anthropicCacheTtl);

From fa22fdd3da2766556cbbe119764525cd679ab35e Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Mon, 9 Feb 2026 18:49:58 +0000
Subject: [PATCH 3/6] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20Anthropic=20pr?=
 =?UTF-8?q?ompt=20cache=20TTL=20setting?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a provider-level Anthropic setting in Providers to configure prompt cache TTL with defensive value guards and default clearing behavior.

Also add a regression test ensuring persisted Anthropic cache TTL is propagated into send options from storage.

---

_Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `$3.19`_

<!-- mux-attribution: model=openai:gpt-5.3-codex thinking=xhigh costs=3.19 -->
---
 .../Settings/sections/ProvidersSection.tsx    | 60 +++++++++++++++++++
 .../utils/messages/sendOptions.test.ts        | 14 +++++
 2 files changed, 74 insertions(+)

diff --git a/src/browser/components/Settings/sections/ProvidersSection.tsx b/src/browser/components/Settings/sections/ProvidersSection.tsx
index f6a3ac7eec..38c4fb650c 100644
--- a/src/browser/components/Settings/sections/ProvidersSection.tsx
+++ b/src/browser/components/Settings/sections/ProvidersSection.tsx
@@ -21,6 +21,7 @@ import { getStoredAuthToken } from "@/browser/components/AuthTokenModal";
 import { useAPI } from "@/browser/contexts/API";
 import { useSettings } from "@/browser/contexts/SettingsContext";
 import { usePersistedState } from "@/browser/hooks/usePersistedState";
+import { useProviderOptions } from "@/browser/hooks/useProviderOptions";
 import { useProvidersConfig } from "@/browser/hooks/useProvidersConfig";
 import {
   formatMuxGatewayBalance,
@@ -161,6 +162,7 @@ export function ProvidersSection() {
   );
 
   const { providersExpandedProvider, setProvidersExpandedProvider } = useSettings();
+  const { options: providerOptions, setAnthropicOptions } = useProviderOptions();
 
   const { api } = useAPI();
   const { config, refresh, updateOptimistically } = useProvidersConfig();
@@ -1468,6 +1470,64 @@ export function ProvidersSection() {
                   );
                 })}
 
+                {/* Anthropic: prompt cache TTL */}
+                {provider === "anthropic" && (
+                  <div className="border-border-light border-t pt-3">
+                    <div className="mb-1 flex items-center gap-1">
+                      <label className="text-muted block text-xs">Prompt cache TTL</label>
+                      <TooltipProvider>
+                        <Tooltip>
+                          <TooltipTrigger asChild>
+                            <HelpIndicator aria-label="Anthropic prompt cache TTL help">
+                              ?
+                            </HelpIndicator>
+                          </TooltipTrigger>
+                          <TooltipContent>
+                            <div className="max-w-[280px]">
+                              <div className="font-semibold">Prompt cache TTL</div>
+                              <div className="mt-1">
+                                Default is <span className="font-semibold">5m</span>. Use{" "}
+                                <span className="font-semibold">1h</span> for longer workflows at a
+                                higher cache-write cost.
+                              </div>
+                            </div>
+                          </TooltipContent>
+                        </Tooltip>
+                      </TooltipProvider>
+                    </div>
+
+                    <Select
+                      value={providerOptions.anthropic?.cacheTtl ?? "default"}
+                      onValueChange={(next) => {
+                        if (next !== "default" && next !== "5m" && next !== "1h") {
+                          return;
+                        }
+
+                        if (next === "default") {
+                          const nextAnthropicOptions = { ...(providerOptions.anthropic ?? {}) };
+                          delete nextAnthropicOptions.cacheTtl;
+                          setAnthropicOptions(nextAnthropicOptions);
+                          return;
+                        }
+
+                        setAnthropicOptions({
+                          ...(providerOptions.anthropic ?? {}),
+                          cacheTtl: next,
+                        });
+                      }}
+                    >
+                      <SelectTrigger className="w-40">
+                        <SelectValue />
+                      </SelectTrigger>
+                      <SelectContent>
+                        <SelectItem value="default">Default (5m)</SelectItem>
+                        <SelectItem value="5m">5 minutes</SelectItem>
+                        <SelectItem value="1h">1 hour</SelectItem>
+                      </SelectContent>
+                    </Select>
+                  </div>
+                )}
+
                 {/* OpenAI: ChatGPT OAuth + service tier */}
                 {provider === "openai" && (
                   <div className="border-border-light space-y-3 border-t pt-3">
diff --git a/src/browser/utils/messages/sendOptions.test.ts b/src/browser/utils/messages/sendOptions.test.ts
index b0712d1ed6..624b0ba947 100644
--- a/src/browser/utils/messages/sendOptions.test.ts
+++ b/src/browser/utils/messages/sendOptions.test.ts
@@ -61,4 +61,18 @@ describe("getSendOptionsFromStorage", () => {
     const withThinking = getSendOptionsFromStorage(workspaceId);
     expect(withThinking.system1ThinkingLevel).toBe("high");
   });
+
+  test("includes Anthropic prompt cache TTL from persisted provider options", () => {
+    const workspaceId = "ws-3";
+
+    window.localStorage.setItem(
+      "provider_options_anthropic",
+      JSON.stringify({
+        cacheTtl: "1h",
+      })
+    );
+
+    const options = getSendOptionsFromStorage(workspaceId);
+    expect(options.providerOptions?.anthropic?.cacheTtl).toBe("1h");
+  });
 });

From fc7bd4b5cecfab43688058ec45d4d208ddcdd8f8 Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Tue, 10 Feb 2026 09:08:22 +0000
Subject: [PATCH 4/6] =?UTF-8?q?=F0=9F=A4=96=20feat:=20persist=20anthropic?=
 =?UTF-8?q?=20cache=20TTL=20in=20providers=20config?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move Anthropic prompt cache TTL persistence from frontend localStorage to backend providers.jsonc and make backend config authoritative for Anthropic-routed models.

- expose anthropic cacheTtl in provider config IPC schema
- surface valid cacheTtl in ProviderService getConfig with tests
- inject backend cacheTtl in ProviderModelFactory for anthropic and anthropic/* routes
- update Providers settings UI to read/write cacheTtl through provider config API

---

_Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `$4.21`_

<!-- mux-attribution: model=openai:gpt-5.3-codex thinking=xhigh costs=4.21 -->
---
 .../Settings/sections/ProvidersSection.tsx    | 22 +++++----
 src/common/orpc/schemas/api.test.ts           |  2 +
 src/common/orpc/schemas/api.ts                |  2 +
 src/node/services/providerModelFactory.ts     | 37 ++++++++++++---
 src/node/services/providerService.test.ts     | 45 +++++++++++++++++++
 src/node/services/providerService.ts          |  7 +++
 6 files changed, 97 insertions(+), 18 deletions(-)

diff --git a/src/browser/components/Settings/sections/ProvidersSection.tsx b/src/browser/components/Settings/sections/ProvidersSection.tsx
index 38c4fb650c..b457c17f6f 100644
--- a/src/browser/components/Settings/sections/ProvidersSection.tsx
+++ b/src/browser/components/Settings/sections/ProvidersSection.tsx
@@ -21,7 +21,6 @@ import { getStoredAuthToken } from "@/browser/components/AuthTokenModal";
 import { useAPI } from "@/browser/contexts/API";
 import { useSettings } from "@/browser/contexts/SettingsContext";
 import { usePersistedState } from "@/browser/hooks/usePersistedState";
-import { useProviderOptions } from "@/browser/hooks/useProviderOptions";
 import { useProvidersConfig } from "@/browser/hooks/useProvidersConfig";
 import {
   formatMuxGatewayBalance,
@@ -162,7 +161,6 @@ export function ProvidersSection() {
   );
 
   const { providersExpandedProvider, setProvidersExpandedProvider } = useSettings();
-  const { options: providerOptions, setAnthropicOptions } = useProviderOptions();
 
   const { api } = useAPI();
   const { config, refresh, updateOptimistically } = useProvidersConfig();
@@ -1497,22 +1495,22 @@ export function ProvidersSection() {
                     </div>
 
                     <Select
-                      value={providerOptions.anthropic?.cacheTtl ?? "default"}
+                      value={config?.anthropic?.cacheTtl ?? "default"}
                       onValueChange={(next) => {
-                        if (next !== "default" && next !== "5m" && next !== "1h") {
+                        if (!api) {
                           return;
                         }
-
-                        if (next === "default") {
-                          const nextAnthropicOptions = { ...(providerOptions.anthropic ?? {}) };
-                          delete nextAnthropicOptions.cacheTtl;
-                          setAnthropicOptions(nextAnthropicOptions);
+                        if (next !== "default" && next !== "5m" && next !== "1h") {
                           return;
                         }
 
-                        setAnthropicOptions({
-                          ...(providerOptions.anthropic ?? {}),
-                          cacheTtl: next,
+                        const cacheTtl = next === "default" ? undefined : next;
+                        updateOptimistically("anthropic", { cacheTtl });
+                        void api.providers.setProviderConfig({
+                          provider: "anthropic",
+                          keyPath: ["cacheTtl"],
+                          // Empty string clears providers.jsonc key; backend defaults to 5m when unset.
+                          value: next === "default" ? "" : next,
                         });
                       }}
                     >
diff --git a/src/common/orpc/schemas/api.test.ts b/src/common/orpc/schemas/api.test.ts
index 00064d18ca..7cec9dd1f5 100644
--- a/src/common/orpc/schemas/api.test.ts
+++ b/src/common/orpc/schemas/api.test.ts
@@ -103,6 +103,7 @@ describe("ProviderConfigInfoSchema conformance", () => {
       baseUrl: "https://custom.endpoint.com",
       models: ["claude-3-opus", "claude-3-sonnet"],
       serviceTier: "flex",
+      cacheTtl: "1h",
       codexOauthSet: true,
       codexOauthDefaultAuth: "apiKey",
       aws: {
@@ -126,6 +127,7 @@ describe("ProviderConfigInfoSchema conformance", () => {
     expect(parsed.baseUrl).toBe(full.baseUrl);
     expect(parsed.models).toEqual(full.models);
     expect(parsed.serviceTier).toBe(full.serviceTier);
+    expect(parsed.cacheTtl).toBe(full.cacheTtl);
     expect(parsed.codexOauthSet).toBe(full.codexOauthSet);
     expect(parsed.codexOauthDefaultAuth).toBe(full.codexOauthDefaultAuth);
     expect(parsed.aws).toEqual(full.aws);
diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts
index 5132928871..aa597fc1ad 100644
--- a/src/common/orpc/schemas/api.ts
+++ b/src/common/orpc/schemas/api.ts
@@ -131,6 +131,8 @@ export const ProviderConfigInfoSchema = z.object({
   models: z.array(z.string()).optional(),
   /** OpenAI-specific fields */
   serviceTier: z.enum(["auto", "default", "flex", "priority"]).optional(),
+  /** Anthropic-specific fields */
+  cacheTtl: z.enum(["5m", "1h"]).optional(),
   /** OpenAI-only: whether Codex OAuth tokens are present in providers.jsonc */
   codexOauthSet: z.boolean().optional(),
   /**
diff --git a/src/node/services/providerModelFactory.ts b/src/node/services/providerModelFactory.ts
index 0b0051a97d..b908ee68b9 100644
--- a/src/node/services/providerModelFactory.ts
+++ b/src/node/services/providerModelFactory.ts
@@ -319,6 +319,13 @@ export function parseModelString(modelString: string): [string, string] {
   return [providerName, modelId];
 }
 
+function parseAnthropicCacheTtl(value: unknown): AnthropicCacheTtl | undefined {
+  if (value === "5m" || value === "1h") {
+    return value;
+  }
+  return undefined;
+}
+
 // ---------------------------------------------------------------------------
 // Model cost tracking
 // ---------------------------------------------------------------------------
@@ -445,8 +452,25 @@ export class ProviderModelFactory {
       }
 
       // Load providers configuration - the ONLY source of truth
-      const providersConfig = this.config.loadProvidersConfig();
-      let providerConfig = providersConfig?.[providerName] ?? {};
+      const providersConfig = this.config.loadProvidersConfig() ?? {};
+
+      // Backend config is authoritative for Anthropic prompt cache TTL on any
+      // Anthropic-routed model (direct Anthropic, mux-gateway:anthropic/*,
+      // openrouter:anthropic/*). We still allow request-level values when config
+      // is unset for backward compatibility with older clients.
+      const configAnthropicCacheTtl = parseAnthropicCacheTtl(providersConfig.anthropic?.cacheTtl);
+      const isAnthropicRoutedModel =
+        providerName === "anthropic" || modelId.startsWith("anthropic/");
+      if (isAnthropicRoutedModel && configAnthropicCacheTtl && muxProviderOptions) {
+        muxProviderOptions.anthropic = {
+          ...(muxProviderOptions.anthropic ?? {}),
+          cacheTtl: configAnthropicCacheTtl,
+        };
+      }
+      const effectiveAnthropicCacheTtl =
+        muxProviderOptions?.anthropic?.cacheTtl ?? configAnthropicCacheTtl;
+
+      let providerConfig = providersConfig[providerName] ?? {};
 
       // Providers can be disabled in providers.jsonc without deleting credentials.
       if (
@@ -506,8 +530,10 @@ export class ProviderModelFactory {
         // (SDK doesn't translate providerOptions to cache_control for these)
         // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies)
         const baseFetch = getProviderFetch(providerConfig);
-        const cacheTtl = muxProviderOptions?.anthropic?.cacheTtl;
-        const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(baseFetch, cacheTtl);
+        const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(
+          baseFetch,
+          effectiveAnthropicCacheTtl
+        );
         const provider = createAnthropic({
           ...normalizedConfig,
           fetch: fetchWithCacheControl,
@@ -1020,9 +1046,8 @@ export class ProviderModelFactory {
         // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies)
         const baseFetch = getProviderFetch(providerConfig);
         const isAnthropicModel = modelId.startsWith("anthropic/");
-        const cacheTtl = muxProviderOptions?.anthropic?.cacheTtl;
         const fetchWithCacheControl = isAnthropicModel
-          ? wrapFetchWithAnthropicCacheControl(baseFetch, cacheTtl)
+          ? wrapFetchWithAnthropicCacheControl(baseFetch, effectiveAnthropicCacheTtl)
           : baseFetch;
         const fetchWithAutoLogout = wrapFetchWithMuxGatewayAutoLogout(
           fetchWithCacheControl,
diff --git a/src/node/services/providerService.test.ts b/src/node/services/providerService.test.ts
index a05b0deffa..d8c24ec662 100644
--- a/src/node/services/providerService.test.ts
+++ b/src/node/services/providerService.test.ts
@@ -121,4 +121,49 @@ describe("ProviderService.setConfig", () => {
       expect(afterEnable?.openai?.enabled).toBeUndefined();
     });
   });
+
+  it("surfaces valid Anthropic cacheTtl", () => {
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "mux-provider-service-"));
+    try {
+      const config = new Config(tmpDir);
+      config.saveProvidersConfig({
+        anthropic: {
+          apiKey: "sk-ant-test",
+          cacheTtl: "1h",
+        },
+      });
+
+      const service = new ProviderService(config);
+      const cfg = service.getConfig();
+
+      expect(cfg.anthropic.apiKeySet).toBe(true);
+      expect(cfg.anthropic.cacheTtl).toBe("1h");
+      expect(Object.prototype.hasOwnProperty.call(cfg.anthropic, "cacheTtl")).toBe(true);
+    } finally {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  it("omits invalid Anthropic cacheTtl", () => {
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "mux-provider-service-"));
+    try {
+      const config = new Config(tmpDir);
+      config.saveProvidersConfig({
+        anthropic: {
+          apiKey: "sk-ant-test",
+          // Intentionally invalid
+          cacheTtl: "24h",
+        },
+      });
+
+      const service = new ProviderService(config);
+      const cfg = service.getConfig();
+
+      expect(cfg.anthropic.apiKeySet).toBe(true);
+      expect(cfg.anthropic.cacheTtl).toBeUndefined();
+      expect(Object.prototype.hasOwnProperty.call(cfg.anthropic, "cacheTtl")).toBe(false);
+    } finally {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
 });
diff --git a/src/node/services/providerService.ts b/src/node/services/providerService.ts
index 5aa5f6a61c..f393eeca3d 100644
--- a/src/node/services/providerService.ts
+++ b/src/node/services/providerService.ts
@@ -71,6 +71,7 @@ export class ProviderService {
         baseUrl?: string;
         models?: string[];
         serviceTier?: unknown;
+        cacheTtl?: unknown;
         /** OpenAI-only: default auth precedence for Codex-OAuth-allowed models. */
         codexOauthDefaultAuth?: unknown;
         region?: string;
@@ -124,6 +125,12 @@ export class ProviderService {
         providerInfo.serviceTier = serviceTier;
       }
 
+      // Anthropic-specific fields
+      const cacheTtl = config.cacheTtl;
+      if (provider === "anthropic" && (cacheTtl === "5m" || cacheTtl === "1h")) {
+        providerInfo.cacheTtl = cacheTtl;
+      }
+
       if (provider === "openai") {
         providerInfo.codexOauthSet = codexOauthSet;
 

From 643e0935e9c71bae8a0ed442844a156630dfb417 Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Tue, 10 Feb 2026 09:19:51 +0000
Subject: [PATCH 5/6] =?UTF-8?q?=F0=9F=A4=96=20tests:=20make=20resumeStream?=
 =?UTF-8?q?=20integration=20collector=20wait=20for=20subscription?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix flaky/deterministic failure in tests/ipc/resumeStream.test.ts where collector1 could observe duplicate user messages.

The first collector started and sent a message immediately without waiting for the onChat subscription caught-up signal. Under CI timing, initial history replay can race with live append and emit the same user message twice.

- await collector1.waitForSubscription(10000) before sending
- add explanatory comment about replay/live race

---

_Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `$4.21`_

<!-- mux-attribution: model=openai:gpt-5.3-codex thinking=xhigh costs=4.21 -->
---
 tests/ipc/streaming/resume.test.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/ipc/streaming/resume.test.ts b/tests/ipc/streaming/resume.test.ts
index a8ff2d96e8..4dac0b138f 100644
--- a/tests/ipc/streaming/resume.test.ts
+++ b/tests/ipc/streaming/resume.test.ts
@@ -28,6 +28,11 @@ describeIntegration("resumeStream", () => {
       const { env, workspaceId, cleanup } = await setupWorkspace("anthropic");
       const collector1 = createStreamCollector(env.orpc, workspaceId);
       collector1.start();
+      // Wait until the onChat subscription is fully established before sending.
+      // Without this guard, the initial history replay can race with the live user
+      // message append and emit the same user message twice in collector1.
+      await collector1.waitForSubscription(10000);
+
       try {
         // Ensure the onChat subscription has finished history replay before we send a new message.
         // Otherwise the user message can appear twice (once from live events, once from replay).

From f85344471709793f7d5136733bbc6aaa1339085a Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Fri, 13 Feb 2026 09:53:38 +0000
Subject: [PATCH 6/6] =?UTF-8?q?=F0=9F=A4=96=20fix:=20remove=20stale=20cach?=
 =?UTF-8?q?e=20ttl=20override=20ref=20from=20streamManager?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolve post-rebase CI failures by removing a leftover `anthropicCacheTtlOverride` reference in `buildStreamRequestConfig`.

During rebase conflict resolution, StreamManager kept HEAD's request-header-based API but retained one line from an older cache-ttl-override approach, which triggered lint/type errors.

---

_Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `$4.21`_

<!-- mux-attribution: model=openai:gpt-5.3-codex thinking=xhigh costs=4.21 -->
---
 src/node/services/streamManager.ts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts
index 79f22451f0..60ce7095e1 100644
--- a/src/node/services/streamManager.ts
+++ b/src/node/services/streamManager.ts
@@ -1007,8 +1007,7 @@ export class StreamManager extends EventEmitter {
     let finalMessages = messages;
     let finalTools = tools;
     let finalSystem: string | undefined = system;
-    const anthropicCacheTtl =
-      anthropicCacheTtlOverride ?? getAnthropicCacheTtl(finalProviderOptions);
+    const anthropicCacheTtl = getAnthropicCacheTtl(finalProviderOptions);
 
     // For Anthropic models, convert system message to a cached message at the start
     const cachedSystemMessage = createCachedSystemMessage(system, modelString, anthropicCacheTtl);