diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx index a973e24fce..2732b0798c 100644 --- a/src/browser/components/ChatInput/index.tsx +++ b/src/browser/components/ChatInput/index.tsx @@ -472,7 +472,7 @@ const ChatInputInner: React.FC = (props) => { const workspaceIdForUsage = variant === "workspace" ? props.workspaceId : ""; const usage = useWorkspaceUsage(workspaceIdForUsage); const { has1MContext } = useProviderOptions(); - const lastUsage = usage?.liveUsage ?? usage?.lastContextUsage; + const lastUsage = usage?.currentContextUsage; const usageModel = lastUsage?.model ?? null; const use1M = has1MContext(usageModel ?? ""); const contextUsageData = useMemo(() => { diff --git a/src/browser/components/RightSidebar/CostsTab.tsx b/src/browser/components/RightSidebar/CostsTab.tsx index ff043de741..5b0637e681 100644 --- a/src/browser/components/RightSidebar/CostsTab.tsx +++ b/src/browser/components/RightSidebar/CostsTab.tsx @@ -1,11 +1,8 @@ import React from "react"; import { useWorkspaceUsage, useWorkspaceConsumers } from "@/browser/stores/WorkspaceStore"; import { getModelStats } from "@/common/utils/tokens/modelStats"; -import { - sumUsageHistory, - formatCostWithDollar, - type ChatUsageDisplay, -} from "@/common/utils/tokens/usageAggregator"; +import { getSessionCostTotal, formatCostWithDollar } from "@/common/utils/tokens/usageAggregator"; +import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit"; import { usePersistedState } from "@/browser/hooks/usePersistedState"; import { PREFERRED_COMPACTION_MODEL_KEY } from "@/common/constants/storage"; import { resolveCompactionModel } from "@/browser/utils/messages/compactionModelPreference"; @@ -69,8 +66,7 @@ const CostsTabComponent: React.FC = ({ workspaceId }) => { const runtimeConfig = workspaceContext?.workspaceMetadata.get(workspaceId)?.runtimeConfig; // Get model from context usage for per-model threshold storage - // Use lastContextUsage for context window display (last step's usage) - const contextUsageForModel = usage.liveUsage ?? usage.lastContextUsage; + const contextUsageForModel = usage.currentContextUsage; const currentModel = contextUsageForModel?.model ?? null; // Align warning with /compact model resolution so it matches actual compaction behavior. const effectiveCompactionModel = resolveCompactionModel(preferredCompactionModel) ?? currentModel; @@ -81,18 +77,13 @@ const CostsTabComponent: React.FC = ({ workspaceId }) => { // Session usage for cost calculation // Uses sessionTotal (pre-computed) + liveCostUsage (cumulative during streaming) - const sessionUsage = React.useMemo(() => { - const parts: ChatUsageDisplay[] = []; - if (usage.sessionTotal) parts.push(usage.sessionTotal); - if (usage.liveCostUsage) parts.push(usage.liveCostUsage); - return parts.length > 0 ? sumUsageHistory(parts) : undefined; - }, [usage.sessionTotal, usage.liveCostUsage]); + const sessionUsage = React.useMemo( + () => getSessionCostTotal(usage.sessionTotal, usage.liveCostUsage), + [usage.sessionTotal, usage.liveCostUsage] + ); const hasUsageData = - usage && - (usage.sessionTotal !== undefined || - usage.lastContextUsage !== undefined || - usage.liveUsage !== undefined); + usage && (usage.sessionTotal !== undefined || usage.currentContextUsage !== undefined); const hasConsumerData = consumers && (consumers.totalTokens > 0 || consumers.isCalculating); const hasAnyData = hasUsageData || hasConsumerData; @@ -120,7 +111,7 @@ const CostsTabComponent: React.FC = ({ workspaceId }) => {
{(() => { - const contextUsage = usage.liveUsage ?? usage.lastContextUsage; + const contextUsage = usage.currentContextUsage; const model = contextUsage?.model ?? "unknown"; const contextUsageData = contextUsage @@ -134,12 +125,10 @@ const CostsTabComponent: React.FC = ({ workspaceId }) => { return undefined; const thresholdTokens = Math.round((autoCompactThreshold / 100) * maxTokens); - const compactionStats = getModelStats(effectiveCompactionModel); - const compactionMaxTokens = - has1MContext(effectiveCompactionModel) && - supports1MContext(effectiveCompactionModel) - ? 1_000_000 - : compactionStats?.max_input_tokens; + const compactionMaxTokens = getEffectiveContextLimit( + effectiveCompactionModel, + has1MContext(effectiveCompactionModel) + ); if (compactionMaxTokens && compactionMaxTokens < thresholdTokens) { return { compactionModelMaxTokens: compactionMaxTokens, thresholdTokens }; diff --git a/src/browser/components/RightSidebar/tabs/TabLabels.tsx b/src/browser/components/RightSidebar/tabs/TabLabels.tsx index 078a2f8005..3763272c30 100644 --- a/src/browser/components/RightSidebar/tabs/TabLabels.tsx +++ b/src/browser/components/RightSidebar/tabs/TabLabels.tsx @@ -13,9 +13,8 @@ import { Tooltip, TooltipContent, TooltipTrigger } from "../../ui/tooltip"; import { FileIcon } from "../../FileIcon"; import { formatTabDuration, type ReviewStats } from "./registry"; import { formatKeybind, KEYBINDS } from "@/browser/utils/ui/keybinds"; -import { cn } from "@/common/lib/utils"; import { useWorkspaceUsage, useWorkspaceStatsSnapshot } from "@/browser/stores/WorkspaceStore"; -import { sumUsageHistory, type ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; +import { getSessionCostTotal, getTotalCost } from "@/common/utils/tokens/usageAggregator"; interface CostsTabLabelProps { workspaceId: string; @@ -29,21 +28,9 @@ export const CostsTabLabel: React.FC = ({ workspaceId }) => const usage = useWorkspaceUsage(workspaceId); const sessionCost = React.useMemo(() => { - const parts: ChatUsageDisplay[] = []; - if (usage.sessionTotal) parts.push(usage.sessionTotal); - if (usage.liveCostUsage) parts.push(usage.liveCostUsage); - if (parts.length === 0) return null; - - const aggregated = sumUsageHistory(parts); - if (!aggregated) return null; - - const total = - (aggregated.input.cost_usd ?? 0) + - (aggregated.cached.cost_usd ?? 0) + - (aggregated.cacheCreate.cost_usd ?? 0) + - (aggregated.output.cost_usd ?? 0) + - (aggregated.reasoning.cost_usd ?? 0); - return total > 0 ? total : null; + const aggregated = getSessionCostTotal(usage.sessionTotal, usage.liveCostUsage); + const total = getTotalCost(aggregated); + return total && total > 0 ? total : null; }, [usage.sessionTotal, usage.liveCostUsage]); return ( @@ -67,12 +54,7 @@ export const ReviewTabLabel: React.FC = ({ reviewStats }) = <> Review {reviewStats !== null && reviewStats.total > 0 && ( - + {reviewStats.read}/{reviewStats.total} )} diff --git a/src/browser/hooks/useContextSwitchWarning.test.ts b/src/browser/hooks/useContextSwitchWarning.test.ts index 9037132a3e..42c7944376 100644 --- a/src/browser/hooks/useContextSwitchWarning.test.ts +++ b/src/browser/hooks/useContextSwitchWarning.test.ts @@ -8,7 +8,7 @@ import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore"; import type { SendMessageOptions } from "@/common/orpc/types"; import type { DisplayedMessage } from "@/common/types/message"; import { useContextSwitchWarning } from "./useContextSwitchWarning"; -import { getEffectiveContextLimit } from "@/browser/utils/compaction/contextLimit"; +import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit"; import { recordWorkspaceModelChange, setWorkspaceModelWithOrigin, @@ -81,17 +81,21 @@ const createPolicyChurnClient = () => { return { client, triggerPolicyEvent }; }; -const buildUsage = (tokens: number, model?: string): WorkspaceUsageState => ({ - totalTokens: tokens, - lastContextUsage: { +const buildUsage = (tokens: number, model?: string): WorkspaceUsageState => { + const contextUsage = { input: { tokens }, cached: { tokens: 0 }, cacheCreate: { tokens: 0 }, output: { tokens: 0 }, reasoning: { tokens: 0 }, model, - }, -}); + }; + return { + totalTokens: tokens, + lastContextUsage: contextUsage, + currentContextUsage: contextUsage, + }; +}; const buildAssistantMessage = (model: string): DisplayedMessage => ({ type: "assistant", diff --git a/src/browser/hooks/useContextSwitchWarning.ts b/src/browser/hooks/useContextSwitchWarning.ts index 96bcfbbebe..2ab04db5f3 100644 --- a/src/browser/hooks/useContextSwitchWarning.ts +++ b/src/browser/hooks/useContextSwitchWarning.ts @@ -11,6 +11,7 @@ import type { AppRouter } from "@/node/orpc/router"; import type { SendMessageOptions } from "@/common/orpc/types"; import type { DisplayedMessage } from "@/common/types/message"; import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore"; +import { getContextTokens } from "@/common/utils/tokens/usageAggregator"; import { normalizeGatewayModel } from "@/common/utils/ai/models"; import { usePolicy } from "@/browser/contexts/PolicyContext"; import { @@ -20,7 +21,7 @@ import { type ContextSwitchWarning, } from "@/browser/utils/compaction/contextSwitchCheck"; import { getHigherContextCompactionSuggestion } from "@/browser/utils/compaction/suggestion"; -import { getEffectiveContextLimit } from "@/browser/utils/compaction/contextLimit"; +import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit"; import { consumeWorkspaceModelChange, setWorkspaceModelWithOrigin, @@ -158,8 +159,8 @@ export function useContextSwitchWarning( }, [workspaceId, pendingModel, use1M, checkOptions]); const getCurrentTokens = useCallback(() => { - const usage = workspaceUsage?.liveUsage ?? workspaceUsage?.lastContextUsage; - return usage ? usage.input.tokens + usage.cached.tokens + usage.cacheCreate.tokens : 0; + const usage = workspaceUsage?.currentContextUsage; + return usage ? getContextTokens(usage) : 0; }, [workspaceUsage]); const tokens = getCurrentTokens(); diff --git a/src/browser/stores/WorkspaceConsumerManager.ts b/src/browser/stores/WorkspaceConsumerManager.ts index 93bd4d16f1..e020b95fd3 100644 --- a/src/browser/stores/WorkspaceConsumerManager.ts +++ b/src/browser/stores/WorkspaceConsumerManager.ts @@ -2,6 +2,7 @@ import type { WorkspaceConsumersState } from "./WorkspaceStore"; import type { StreamingMessageAggregator } from "@/browser/utils/messages/StreamingMessageAggregator"; import type { ChatStats } from "@/common/types/chatStats"; import type { MuxMessage } from "@/common/types/message"; +import { sliceMessagesFromLatestCompactionBoundary } from "@/common/utils/messages/compactionBoundary"; const TOKENIZER_CANCELLED_MESSAGE = "Cancelled by newer request"; @@ -197,7 +198,9 @@ export class WorkspaceConsumerManager { // Run in next tick to avoid blocking caller void (async () => { try { - const messages = aggregator.getAllMessages(); + // Only count tokens for the current compaction epoch — pre-boundary + // messages carry stale context and inflate the consumer breakdown. + const messages = sliceMessagesFromLatestCompactionBoundary(aggregator.getAllMessages()); const model = aggregator.getCurrentModel() ?? "unknown"; // Calculate in piscina pool with timeout protection diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index c42582048e..74974c0a62 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -35,6 +35,7 @@ import type { } from "@/common/types/stream"; import { MapStore } from "./MapStore"; import { createDisplayUsage } from "@/common/utils/tokens/displayUsage"; +import { isDurableCompactionBoundaryMarker } from "@/common/utils/messages/compactionBoundary"; import { WorkspaceConsumerManager } from "./WorkspaceConsumerManager"; import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; import { sumUsageHistory } from "@/common/utils/tokens/usageAggregator"; @@ -203,6 +204,9 @@ export interface WorkspaceUsageState { liveUsage?: ChatUsageDisplay; /** Live cost usage during streaming (cumulative across all steps) */ liveCostUsage?: ChatUsageDisplay; + /** Best-available context usage: live when streaming, else last completed. + * Consumers should prefer this over manually deriving liveUsage ?? lastContextUsage. */ + currentContextUsage?: ChatUsageDisplay; } /** @@ -1255,11 +1259,15 @@ export class WorkspaceStore { sessionTotal.reasoning.tokens : 0; - // Get last message's context usage (unchanged from before) + // Get last message's context usage — only search within the current + // compaction epoch. Pre-boundary messages carry stale contextUsage from + // before compaction; including them inflates the usage indicator and + // triggers premature auto-compaction. const messages = aggregator.getAllMessages(); const lastContextUsage = (() => { for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i]; + if (isDurableCompactionBoundaryMarker(msg)) break; if (msg.role === "assistant") { if (msg.metadata?.compacted) continue; const rawUsage = msg.metadata?.contextUsage; @@ -1298,7 +1306,18 @@ export class WorkspaceStore { ? createDisplayUsage(rawCumulativeUsage, model, rawCumulativeProviderMetadata) : undefined; - return { sessionTotal, lastRequest, lastContextUsage, totalTokens, liveUsage, liveCostUsage }; + // Derived: best-available context usage (live when streaming, else last completed) + const currentContextUsage = liveUsage ?? lastContextUsage; + + return { + sessionTotal, + lastRequest, + lastContextUsage, + totalTokens, + liveUsage, + liveCostUsage, + currentContextUsage, + }; }); } diff --git a/src/browser/utils/compaction/autoCompactionCheck.test.ts b/src/browser/utils/compaction/autoCompactionCheck.test.ts index c37d5f519b..fb1ec3b050 100644 --- a/src/browser/utils/compaction/autoCompactionCheck.test.ts +++ b/src/browser/utils/compaction/autoCompactionCheck.test.ts @@ -28,14 +28,14 @@ const createUsageEntry = ( // Helper to create mock WorkspaceUsageState const createMockUsage = ( lastEntryTokens: number, - _historicalTokens?: number, // Kept for backward compat but unused (session-usage.json handles historical) model: string = KNOWN_MODELS.SONNET.id, liveUsage?: ChatUsageDisplay ): WorkspaceUsageState => { // Create lastContextUsage representing the most recent context window state const lastContextUsage = createUsageEntry(lastEntryTokens, model); - return { lastContextUsage, totalTokens: 0, liveUsage }; + const currentContextUsage = liveUsage ?? lastContextUsage; + return { lastContextUsage, totalTokens: 0, liveUsage, currentContextUsage }; }; describe("checkAutoCompaction", () => { @@ -120,7 +120,8 @@ describe("checkAutoCompaction", () => { test("handles historical usage correctly - ignores it in calculation", () => { // Scenario: After compaction, historical = 70K, recent = 5K // Should calculate based on 5K (2.5%), not 75K (37.5%) - const usage = createMockUsage(5_000, 70_000); + // (session-usage.json handles historical separately; only lastContextUsage matters) + const usage = createMockUsage(5_000); const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); expect(result.usagePercentage).toBe(2.5); @@ -140,6 +141,7 @@ describe("checkAutoCompaction", () => { }; const usage: WorkspaceUsageState = { lastContextUsage: usageEntry, + currentContextUsage: usageEntry, totalTokens: 0, }; @@ -163,6 +165,7 @@ describe("checkAutoCompaction", () => { }; const usage: WorkspaceUsageState = { lastContextUsage: usageEntry, + currentContextUsage: usageEntry, totalTokens: 0, }; @@ -202,7 +205,7 @@ describe("checkAutoCompaction", () => { }); test("ignores use1M for models that don't support it (GPT)", () => { - const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.GPT_MINI.id); + const usage = createMockUsage(100_000, KNOWN_MODELS.GPT_MINI.id); // GPT Mini has 272k context, so 100k = 36.76% const result = checkAutoCompaction(usage, KNOWN_MODELS.GPT_MINI.id, true); @@ -261,6 +264,7 @@ describe("checkAutoCompaction", () => { }; const usage: WorkspaceUsageState = { lastContextUsage: zeroEntry, + currentContextUsage: zeroEntry, totalTokens: 0, }; @@ -349,7 +353,7 @@ describe("checkAutoCompaction", () => { test("shouldForceCompact uses liveUsage when available", () => { // lastUsage at 50%, liveUsage at 75% - should trigger based on live const liveUsage = createUsageEntry(150_000); // 75% - const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); + const usage = createMockUsage(100_000, KNOWN_MODELS.SONNET.id, liveUsage); const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); expect(result.shouldForceCompact).toBe(true); @@ -367,7 +371,7 @@ describe("checkAutoCompaction", () => { test("shouldForceCompact respects 1M context mode", () => { // 75% of 1M = 750k tokens const liveUsage = createUsageEntry(750_000); - const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); + const usage = createMockUsage(50_000, KNOWN_MODELS.SONNET.id, liveUsage); const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, true); expect(result.shouldForceCompact).toBe(true); @@ -378,6 +382,7 @@ describe("checkAutoCompaction", () => { const usage: WorkspaceUsageState = { totalTokens: 0, liveUsage, + currentContextUsage: liveUsage, }; const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); @@ -391,6 +396,7 @@ describe("checkAutoCompaction", () => { const usage: WorkspaceUsageState = { totalTokens: 0, liveUsage, + currentContextUsage: liveUsage, }; const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); @@ -401,7 +407,7 @@ describe("checkAutoCompaction", () => { test("shouldShowWarning uses max of last and live usage", () => { // lastUsage at 50% (below warning), liveUsage at 72% (above warning) const liveUsage = createUsageEntry(144_000); // 72% - const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage); + const usage = createMockUsage(100_000, KNOWN_MODELS.SONNET.id, liveUsage); const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false); expect(result.shouldShowWarning).toBe(true); // 72% >= 60% diff --git a/src/browser/utils/compaction/autoCompactionCheck.ts b/src/browser/utils/compaction/autoCompactionCheck.ts index f0c983ba18..4bee176c79 100644 --- a/src/browser/utils/compaction/autoCompactionCheck.ts +++ b/src/browser/utils/compaction/autoCompactionCheck.ts @@ -16,23 +16,12 @@ */ import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore"; -import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; +import { getContextTokens } from "@/common/utils/tokens/usageAggregator"; import { DEFAULT_AUTO_COMPACTION_THRESHOLD, FORCE_COMPACTION_BUFFER_PERCENT, } from "@/common/constants/ui"; -import { getEffectiveContextLimit } from "./contextLimit"; - -/** - * Get context window tokens (input only). - * Output and reasoning tokens are excluded because they represent the model's - * response, not the context window size. This prevents compaction loops with - * Extended Thinking models where high reasoning token counts (50k+) would - * incorrectly inflate context usage calculations. - */ -function getContextTokens(usage: ChatUsageDisplay): number { - return usage.input.tokens + usage.cached.tokens + usage.cacheCreate.tokens; -} +import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit"; export interface AutoCompactionCheckResult { shouldShowWarning: boolean; @@ -94,8 +83,8 @@ export function checkAutoCompaction( } // Current usage: live when streaming, else last completed + const currentUsage = usage.currentContextUsage; const lastUsage = usage.lastContextUsage; - const currentUsage = usage.liveUsage ?? lastUsage; // Usage percentage from current context (live when streaming, otherwise last completed) const usagePercentage = currentUsage ? (getContextTokens(currentUsage) / maxTokens) * 100 : 0; diff --git a/src/browser/utils/compaction/contextSwitchCheck.test.ts b/src/browser/utils/compaction/contextSwitchCheck.test.ts index 52a5250fc6..82149e70ff 100644 --- a/src/browser/utils/compaction/contextSwitchCheck.test.ts +++ b/src/browser/utils/compaction/contextSwitchCheck.test.ts @@ -1,6 +1,6 @@ import { describe, expect, test } from "bun:test"; import { checkContextSwitch } from "./contextSwitchCheck"; -import { getEffectiveContextLimit } from "./contextLimit"; +import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit"; const OPTIONS = { providersConfig: null, policy: null }; diff --git a/src/browser/utils/compaction/contextSwitchCheck.ts b/src/browser/utils/compaction/contextSwitchCheck.ts index 4cccf01ced..6dfa8dbbf5 100644 --- a/src/browser/utils/compaction/contextSwitchCheck.ts +++ b/src/browser/utils/compaction/contextSwitchCheck.ts @@ -10,7 +10,7 @@ import type { EffectivePolicy, ProvidersConfigMap } from "@/common/orpc/types"; import type { DisplayedMessage } from "@/common/types/message"; import { getPreferredCompactionModel } from "@/browser/utils/messages/compactionModelPreference"; import { normalizeGatewayModel } from "@/common/utils/ai/models"; -import { getEffectiveContextLimit } from "./contextLimit"; +import { getEffectiveContextLimit } from "@/common/utils/tokens/contextLimit"; import { getExplicitCompactionSuggestion } from "./suggestion"; /** Safety buffer - warn if context exceeds 90% of target model's limit */ diff --git a/src/common/utils/messages/compactionBoundary.ts b/src/common/utils/messages/compactionBoundary.ts index 95082f001b..d5d95e0a70 100644 --- a/src/common/utils/messages/compactionBoundary.ts +++ b/src/common/utils/messages/compactionBoundary.ts @@ -1,4 +1,4 @@ -import assert from "node:assert"; +import assert from "@/common/utils/assert"; import type { MuxMessage } from "@/common/types/message"; diff --git a/src/browser/utils/compaction/contextLimit.ts b/src/common/utils/tokens/contextLimit.ts similarity index 66% rename from src/browser/utils/compaction/contextLimit.ts rename to src/common/utils/tokens/contextLimit.ts index c20a320f0c..61b09de17d 100644 --- a/src/browser/utils/compaction/contextLimit.ts +++ b/src/common/utils/tokens/contextLimit.ts @@ -1,12 +1,12 @@ /** - * Shared context limit utilities for compaction logic. + * Shared context limit utilities. * - * Used by autoCompactionCheck and contextSwitchCheck to calculate - * effective context limits accounting for 1M context toggle. + * Used by token meter display, auto-compaction, and context-switch checks + * to calculate effective context limits accounting for the 1M context toggle. */ -import { getModelStats } from "@/common/utils/tokens/modelStats"; -import { supports1MContext } from "@/common/utils/ai/models"; +import { getModelStats } from "./modelStats"; +import { supports1MContext } from "../ai/models"; /** * Get effective context limit for a model, accounting for 1M toggle. diff --git a/src/common/utils/tokens/tokenMeterUtils.ts b/src/common/utils/tokens/tokenMeterUtils.ts index 5943fa99f7..c94e288b44 100644 --- a/src/common/utils/tokens/tokenMeterUtils.ts +++ b/src/common/utils/tokens/tokenMeterUtils.ts @@ -1,6 +1,5 @@ import type { ChatUsageDisplay } from "./usageAggregator"; -import { getModelStats } from "./modelStats"; -import { supports1MContext } from "../ai/models"; +import { getEffectiveContextLimit } from "./contextLimit"; // NOTE: Provide theme-matching fallbacks so token meters render consistently // even if a host environment doesn't define the CSS variables (e.g., an embedded UI). @@ -63,8 +62,7 @@ export function calculateTokenMeterData( ): TokenMeterData { if (!usage) return { segments: [], totalTokens: 0, totalPercentage: 0 }; - const modelStats = getModelStats(model); - const maxTokens = use1M && supports1MContext(model) ? 1_000_000 : modelStats?.max_input_tokens; + const maxTokens = getEffectiveContextLimit(model, use1M) ?? undefined; // Total tokens used in the request. // For Anthropic prompt caching, cacheCreate tokens are reported separately but still diff --git a/src/common/utils/tokens/tokenStatsCalculator.test.ts b/src/common/utils/tokens/tokenStatsCalculator.test.ts index db7f864da0..8b3564b5ce 100644 --- a/src/common/utils/tokens/tokenStatsCalculator.test.ts +++ b/src/common/utils/tokens/tokenStatsCalculator.test.ts @@ -5,7 +5,6 @@ import type { LanguageModelV2Usage } from "@ai-sdk/provider"; import { collectUniqueToolNames, countEncryptedWebSearchTokens, - createDisplayUsage, extractSyncMetadata, extractToolOutputData, getConsumerInfoForToolCall, @@ -13,6 +12,7 @@ import { mergeResults, type TokenCountJob, } from "./tokenStatsCalculator"; +import { createDisplayUsage } from "./displayUsage"; describe("createDisplayUsage", () => { test("uses usage.reasoningTokens when available", () => { diff --git a/src/common/utils/tokens/tokenStatsCalculator.ts b/src/common/utils/tokens/tokenStatsCalculator.ts index 90ae4bc5d1..a1ae920101 100644 --- a/src/common/utils/tokens/tokenStatsCalculator.ts +++ b/src/common/utils/tokens/tokenStatsCalculator.ts @@ -17,9 +17,6 @@ import { import { createDisplayUsage } from "./displayUsage"; import type { ChatUsageDisplay } from "./usageAggregator"; -// Re-export for backward compatibility -export { createDisplayUsage }; - /** * Helper Functions for Token Counting * (Exported for testing) diff --git a/src/common/utils/tokens/usageAggregator.ts b/src/common/utils/tokens/usageAggregator.ts index 948827dfef..ebce6d5a0d 100644 --- a/src/common/utils/tokens/usageAggregator.ts +++ b/src/common/utils/tokens/usageAggregator.ts @@ -34,6 +34,28 @@ export interface ChatUsageDisplay { hasUnknownCosts?: boolean; } +/** + * Context window tokens: input + cached + cacheCreate (excludes output/reasoning). + * Output and reasoning tokens represent the model's response, not context window size. + * This prevents compaction loops with Extended Thinking models where high reasoning + * token counts (50k+) would incorrectly inflate context usage calculations. + */ +export function getContextTokens(usage: ChatUsageDisplay): number { + return usage.input.tokens + usage.cached.tokens + usage.cacheCreate.tokens; +} + +/** + * Combine session total with live streaming cost into a single display-ready total. + * Used by CostsTab and CostsTabLabel to avoid duplicating the aggregation pattern. + */ +export function getSessionCostTotal( + sessionTotal: ChatUsageDisplay | undefined, + liveCostUsage: ChatUsageDisplay | undefined +): ChatUsageDisplay | undefined { + const parts = [sessionTotal, liveCostUsage].filter(Boolean) as ChatUsageDisplay[]; + return parts.length > 0 ? sumUsageHistory(parts) : undefined; +} + /** * Sum multiple ChatUsageDisplay objects into a single cumulative display * Used for showing total costs across multiple API responses diff --git a/src/node/services/compactionHandler.ts b/src/node/services/compactionHandler.ts index 15de44d519..279f8422f5 100644 --- a/src/node/services/compactionHandler.ts +++ b/src/node/services/compactionHandler.ts @@ -513,13 +513,17 @@ export class CompactionHandler { ...event.metadata, providerMetadata: undefined, contextProviderMetadata: undefined, + // contextUsage reflects the pre-compaction context window; keeping it + // would inflate the usage indicator until the next real request. + contextUsage: undefined, }, }; assert( sanitizedEvent.metadata.providerMetadata === undefined && - sanitizedEvent.metadata.contextProviderMetadata === undefined, - "Compaction stream-end event must not carry stale provider metadata" + sanitizedEvent.metadata.contextProviderMetadata === undefined && + sanitizedEvent.metadata.contextUsage === undefined, + "Compaction stream-end event must not carry stale provider metadata or context usage" ); return sanitizedEvent;