diff --git a/src/__tests__/extension.spec.ts b/src/__tests__/extension.spec.ts index 5b072672699..dced3367144 100644 --- a/src/__tests__/extension.spec.ts +++ b/src/__tests__/extension.spec.ts @@ -238,6 +238,43 @@ describe("extension.ts", () => { authStateChangedHandler = undefined }) + test("API inference logging writes to Debug Console when ROO_CODE_API_LOGGING=true", async () => { + const previousValue = process.env.ROO_CODE_API_LOGGING + process.env.ROO_CODE_API_LOGGING = "true" + + const consoleLogSpy = vi.spyOn(console, "log").mockImplementation(() => {}) + + try { + const { activate } = await import("../extension") + await activate(mockContext) + + const { ApiInferenceLogger } = await import("../api/logging/ApiInferenceLogger") + ApiInferenceLogger.start( + { + provider: "Test Provider", + operation: "createMessage", + model: "test-model", + taskId: "test-task", + requestId: "test-request", + }, + { messageCount: 1 }, + ) + + // New simplified format: [API][request][Provider][model] + raw payload + expect(consoleLogSpy).toHaveBeenCalledWith( + "[API][request][Test Provider][test-model]", + expect.objectContaining({ messageCount: 1 }), + ) + } finally { + consoleLogSpy.mockRestore() + if (previousValue === undefined) { + delete process.env.ROO_CODE_API_LOGGING + } else { + process.env.ROO_CODE_API_LOGGING = previousValue + } + } + }) + test("authStateChangedHandler calls BridgeOrchestrator.disconnect when logged-out event fires", async () => { const { CloudService, BridgeOrchestrator } = await import("@roo-code/cloud") diff --git a/src/api/logging/ApiInferenceLogger.ts b/src/api/logging/ApiInferenceLogger.ts new file mode 100644 index 00000000000..e463ee287a1 --- /dev/null +++ b/src/api/logging/ApiInferenceLogger.ts @@ -0,0 +1,349 @@ +/** + * Lightweight logger for API inference requests/responses. + * + * This logger is designed to capture raw inference inputs/outputs across providers + * for debugging purposes. It emits structured objects to a configurable sink. + * + * For streaming requests, only the final assembled response is logged (not individual chunks). + * + * Enable via environment variable: process.env.ROO_CODE_API_LOGGING === "true" + */ + +export interface ApiInferenceLoggerConfig { + enabled: boolean + sink: (...args: unknown[]) => void +} + +export interface ApiInferenceContext { + provider: string + operation: string + model?: string + taskId?: string + requestId?: string +} + +export interface ApiInferenceHandle { + success: (responsePayload: unknown) => void + error: (errorPayload: unknown) => void +} + +function extractModelFromPayload(payload: unknown): string | undefined { + if (!payload || typeof payload !== "object") return undefined + const rec = payload as Record + const model = rec["model"] + return typeof model === "string" && model.trim().length > 0 ? model : undefined +} + +/** + * Configuration for payload size limiting to avoid freezing the Output Channel. + */ +const PAYLOAD_LIMITS = { + /** Maximum string length before truncation */ + MAX_STRING_LENGTH: 10_000, + /** Maximum array entries to log */ + MAX_ARRAY_LENGTH: 200, + /** Maximum object keys to log */ + MAX_OBJECT_KEYS: 200, +} + +/** + * Regex pattern for detecting base64 image data URLs. + */ +const BASE64_IMAGE_PATTERN = /^data:image\/[^;]+;base64,/ + +/** + * Secret field patterns to redact in logged payloads. + * Case-insensitive matching is applied. + * Note: Patterns are designed to avoid false positives (e.g., "inputTokens" should not be redacted). + */ +const SECRET_PATTERNS = [ + "authorization", + "apikey", + "api_key", + "x-api-key", + "access_token", + "accesstoken", + "bearer", + "secret", + "password", + "credential", +] + +/** + * Patterns that indicate a field is NOT a secret (allowlist). + * These are checked before secret patterns to prevent false positives. + */ +const NON_SECRET_PATTERNS = ["inputtokens", "outputtokens", "cachetokens", "reasoningtokens", "totaltokens"] + +/** + * Check if a key name looks like a secret field. + */ +function isSecretKey(key: string): boolean { + const lowerKey = key.toLowerCase() + // Check allowlist first to avoid false positives + if (NON_SECRET_PATTERNS.some((pattern) => lowerKey.includes(pattern))) { + return false + } + return SECRET_PATTERNS.some((pattern) => lowerKey.includes(pattern)) +} + +/** + * Truncate a string if it exceeds the maximum length. + * Also replaces base64 image data with a placeholder. + */ +function sanitizeString(str: string): string { + // Check for base64 image data URLs first + if (BASE64_IMAGE_PATTERN.test(str)) { + return `[ImageData len=${str.length}]` + } + + // Truncate long strings + if (str.length > PAYLOAD_LIMITS.MAX_STRING_LENGTH) { + return `[Truncated len=${str.length}]` + } + + return str +} + +/** + * Recursively sanitize and redact secrets from an object. + * Applies size limiting to prevent Output Channel from freezing: + * - Strings longer than MAX_STRING_LENGTH are truncated + * - Arrays longer than MAX_ARRAY_LENGTH are capped + * - Objects with more than MAX_OBJECT_KEYS are capped + * - Base64 image data URLs are replaced with placeholders + * - Secret fields are redacted + * Returns a sanitized copy of the object. + */ +function sanitizePayload(obj: unknown, visited = new WeakSet()): unknown { + if (obj === null || obj === undefined) { + return obj + } + + // Handle strings + if (typeof obj === "string") { + return sanitizeString(obj) + } + + // Handle other primitives + if (typeof obj !== "object") { + return obj + } + + // Prevent infinite recursion on circular references + if (visited.has(obj as object)) { + return "[Circular Reference]" + } + visited.add(obj as object) + + // Handle arrays with length limiting + if (Array.isArray(obj)) { + const maxLen = PAYLOAD_LIMITS.MAX_ARRAY_LENGTH + if (obj.length > maxLen) { + const truncated = obj.slice(0, maxLen).map((item) => sanitizePayload(item, visited)) + truncated.push(`[...${obj.length - maxLen} more items]`) + return truncated + } + return obj.map((item) => sanitizePayload(item, visited)) + } + + // Handle objects with key limiting + const entries = Object.entries(obj as Record) + const maxKeys = PAYLOAD_LIMITS.MAX_OBJECT_KEYS + const result: Record = {} + let keyCount = 0 + + for (const [key, value] of entries) { + if (keyCount >= maxKeys) { + result["[...]"] = `${entries.length - maxKeys} more keys omitted` + break + } + + if (isSecretKey(key)) { + result[key] = "[REDACTED]" + } else if (typeof value === "string") { + result[key] = sanitizeString(value) + } else if (typeof value === "object" && value !== null) { + result[key] = sanitizePayload(value, visited) + } else { + result[key] = value + } + + keyCount++ + } + + return result +} + +/** + * Generate a unique request ID. + */ +function generateRequestId(): string { + return `req_${Date.now()}_${Math.random().toString(36).slice(2, 11)}` +} + +/** + * Singleton logger class for API inference logging. + */ +class ApiInferenceLoggerSingleton { + private enabled = false + private sink: ((...args: unknown[]) => void) | null = null + + /** + * Emit an already-formatted log entry. + * This is used by HTTP-level middleware to preserve exact label formats. + */ + logRaw(label: string, payload: unknown): void { + if (!this.isEnabled() || !this.sink) return + try { + this.sink(label, sanitizePayload(payload)) + } catch { + // Silently ignore logging errors to avoid breaking the application + } + } + + /** + * Emit an already-formatted error log entry. + */ + logRawError(label: string, errorPayload: unknown): void { + if (!this.isEnabled() || !this.sink) return + try { + let errorData: unknown + if (errorPayload instanceof Error) { + errorData = { + name: errorPayload.name, + message: errorPayload.message, + stack: errorPayload.stack, + } + } else { + errorData = sanitizePayload(errorPayload) + } + this.sink(label, errorData) + } catch { + // Silently ignore logging errors to avoid breaking the application + } + } + + /** + * Configure the logger with enabled state and output sink. + * Should be called once during extension activation. + */ + configure(config: ApiInferenceLoggerConfig): void { + this.enabled = config.enabled + this.sink = config.enabled ? config.sink : null + } + + /** + * Check if logging is currently enabled. + */ + isEnabled(): boolean { + return this.enabled && this.sink !== null + } + + /** + * Start logging an API inference request. + * Returns a handle to log the response or error. + * + * @param context - Context information about the request + * @param requestPayload - The request payload to log + * @returns A handle with success() and error() methods + */ + start(context: ApiInferenceContext, requestPayload: unknown): ApiInferenceHandle { + const requestId = context.requestId ?? generateRequestId() + const startTime = Date.now() + const startTimestamp = new Date().toISOString() + const resolvedModel = context.model ?? extractModelFromPayload(requestPayload) + + // Log the request + if (this.isEnabled()) { + this.logRequest({ + ...context, + ...(resolvedModel ? { model: resolvedModel } : {}), + requestId, + timestamp: startTimestamp, + payload: requestPayload, + }) + } + + return { + success: (responsePayload: unknown) => { + if (this.isEnabled()) { + const endTime = Date.now() + this.logResponse({ + ...context, + ...(resolvedModel ? { model: resolvedModel } : {}), + requestId, + timestamp: new Date().toISOString(), + durationMs: endTime - startTime, + payload: responsePayload, + }) + } + }, + error: (errorPayload: unknown) => { + if (this.isEnabled()) { + const endTime = Date.now() + this.logError({ + ...context, + ...(resolvedModel ? { model: resolvedModel } : {}), + requestId, + timestamp: new Date().toISOString(), + durationMs: endTime - startTime, + error: errorPayload, + }) + } + }, + } + } + + /** + * Log a request - outputs only the raw request payload for debugging. + */ + private logRequest(data: { + provider: string + operation: string + model?: string + taskId?: string + requestId: string + timestamp: string + payload: unknown + }): void { + this.logRaw(`[API][request][${data.provider}][${data.model ?? "unknown"}]`, data.payload) + } + + /** + * Log a successful response - outputs only the raw response payload for debugging. + */ + private logResponse(data: { + provider: string + operation: string + model?: string + taskId?: string + requestId: string + timestamp: string + durationMs: number + payload: unknown + }): void { + this.logRaw(`[API][response][${data.provider}][${data.model ?? "unknown"}][${data.durationMs}ms]`, data.payload) + } + + /** + * Log an error response - outputs only the error details for debugging. + */ + private logError(data: { + provider: string + operation: string + model?: string + taskId?: string + requestId: string + timestamp: string + durationMs: number + error: unknown + }): void { + this.logRawError(`[API][error][${data.provider}][${data.model ?? "unknown"}][${data.durationMs}ms]`, data.error) + } +} + +/** + * Singleton instance of the API inference logger. + */ +export const ApiInferenceLogger = new ApiInferenceLoggerSingleton() diff --git a/src/api/logging/__tests__/ApiInferenceLogger.spec.ts b/src/api/logging/__tests__/ApiInferenceLogger.spec.ts new file mode 100644 index 00000000000..bc32d7ca22f --- /dev/null +++ b/src/api/logging/__tests__/ApiInferenceLogger.spec.ts @@ -0,0 +1,444 @@ +import { ApiInferenceLogger } from "../ApiInferenceLogger" + +describe("ApiInferenceLogger", () => { + let mockSink: ReturnType + + beforeEach(() => { + mockSink = vi.fn() + // Reset the logger to disabled state before each test + ApiInferenceLogger.configure({ enabled: false, sink: () => {} }) + }) + + describe("configure", () => { + it("should enable logging when configured with enabled=true", () => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + expect(ApiInferenceLogger.isEnabled()).toBe(true) + }) + + it("should disable logging when configured with enabled=false", () => { + ApiInferenceLogger.configure({ enabled: false, sink: mockSink }) + expect(ApiInferenceLogger.isEnabled()).toBe(false) + }) + + it("should not log when disabled", () => { + ApiInferenceLogger.configure({ enabled: false, sink: mockSink }) + const handle = ApiInferenceLogger.start( + { provider: "test", operation: "createMessage" }, + { test: "payload" }, + ) + handle.success({ response: "data" }) + expect(mockSink).not.toHaveBeenCalled() + }) + }) + + describe("start", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should emit a request log with simplified label format", () => { + ApiInferenceLogger.start({ provider: "OpenAI", operation: "createMessage" }, { model: "gpt-4" }) + + expect(mockSink).toHaveBeenCalledTimes(1) + expect(mockSink).toHaveBeenCalledWith("[API][request][OpenAI][gpt-4]", { model: "gpt-4" }) + }) + + it("should use context.model in the request label", () => { + ApiInferenceLogger.start( + { + provider: "Anthropic", + operation: "createMessage", + model: "claude-3", + taskId: "task-123", + requestId: "req-456", + }, + { test: "data" }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][Anthropic][claude-3]", + expect.objectContaining({ test: "data" }), + ) + }) + + it("should fall back to payload.model when context.model is missing", () => { + ApiInferenceLogger.start( + { + provider: "OpenAI", + operation: "createMessage", + }, + { model: "gpt-4", foo: "bar" }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][OpenAI][gpt-4]", + expect.objectContaining({ model: "gpt-4", foo: "bar" }), + ) + }) + }) + + describe("success", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should emit a response log with simplified label format", () => { + const handle = ApiInferenceLogger.start({ provider: "OpenAI", operation: "createMessage" }, {}) + mockSink.mockClear() + + handle.success({ text: "Hello world", usage: { inputTokens: 10, outputTokens: 20 } }) + + expect(mockSink).toHaveBeenCalledTimes(1) + expect(mockSink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[response\]\[OpenAI\]\[unknown\]\[\d+ms\]$/), + expect.objectContaining({ + text: "Hello world", + usage: { inputTokens: 10, outputTokens: 20 }, + }), + ) + }) + }) + + describe("error", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should emit an error log with simplified label format", () => { + const handle = ApiInferenceLogger.start({ provider: "OpenAI", operation: "createMessage" }, {}) + mockSink.mockClear() + + handle.error(new Error("API request failed")) + + expect(mockSink).toHaveBeenCalledTimes(1) + expect(mockSink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[error\]\[OpenAI\]\[unknown\]\[\d+ms\]$/), + expect.objectContaining({ + name: "Error", + message: "API request failed", + }), + ) + }) + + it("should handle non-Error objects", () => { + const handle = ApiInferenceLogger.start({ provider: "test", operation: "test" }, {}) + mockSink.mockClear() + + handle.error({ status: 401, message: "Unauthorized" }) + + expect(mockSink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[error\]\[test\]\[unknown\]\[\d+ms\]$/), + expect.objectContaining({ + status: 401, + message: "Unauthorized", + }), + ) + }) + }) + + describe("secret redaction", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should redact Authorization header", () => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { headers: { Authorization: "Bearer sk-secret-key-12345" } }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + headers: { Authorization: "[REDACTED]" }, + }), + ) + }) + + it("should redact apiKey field", () => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { apiKey: "sk-secret-12345" }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + apiKey: "[REDACTED]", + }), + ) + }) + + it("should redact nested secret fields", () => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { + config: { + auth: { + access_token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...", + api_key: "secret-api-key", + }, + }, + }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + config: { + auth: { + access_token: "[REDACTED]", + api_key: "[REDACTED]", + }, + }, + }), + ) + }) + + it("should redact secret fields in arrays", () => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { + items: [{ apiKey: "secret1" }, { apiKey: "secret2" }], + }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + items: [{ apiKey: "[REDACTED]" }, { apiKey: "[REDACTED]" }], + }), + ) + }) + + it("should not redact non-secret fields", () => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { model: "gpt-4", messages: [{ role: "user", content: "Hello" }] }, + ) + + expect(mockSink).toHaveBeenCalledWith("[API][request][test][gpt-4]", { + model: "gpt-4", + messages: [{ role: "user", content: "Hello" }], + }) + }) + }) + + describe("payload size limiting", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should truncate strings longer than 10,000 characters", () => { + const longString = "x".repeat(15_000) + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { content: longString }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + content: "[Truncated len=15000]", + }), + ) + }) + + it("should not truncate strings within the limit", () => { + const normalString = "x".repeat(5_000) + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { content: normalString }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + content: normalString, + }), + ) + }) + + it("should replace base64 image data with placeholder", () => { + const imageData = + "" + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { image: imageData }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + image: expect.stringMatching(/^\[ImageData len=\d+\]$/), + }), + ) + }) + + it("should replace base64 image data for various image types", () => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { + png: "", + jpeg: "", + gif: "", + webp: "", + }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + png: expect.stringMatching(/^\[ImageData len=\d+\]$/), + jpeg: expect.stringMatching(/^\[ImageData len=\d+\]$/), + gif: expect.stringMatching(/^\[ImageData len=\d+\]$/), + webp: expect.stringMatching(/^\[ImageData len=\d+\]$/), + }), + ) + }) + + it("should cap arrays longer than 200 entries", () => { + const longArray = Array.from({ length: 250 }, (_, i) => ({ id: i })) + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { items: longArray }) + + const call = mockSink.mock.calls[0] + const payload = call[1] as { items: any[] } + + expect(payload.items.length).toBe(201) + expect(payload.items[200]).toBe("[...50 more items]") + }) + + it("should not cap arrays within the limit", () => { + const normalArray = Array.from({ length: 50 }, (_, i) => ({ id: i })) + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { items: normalArray }) + + const call = mockSink.mock.calls[0] + const payload = call[1] as { items: any[] } + + expect(payload.items.length).toBe(50) + }) + + it("should cap objects with more than 200 keys", () => { + const bigObject: Record = {} + for (let i = 0; i < 250; i++) { + bigObject[`key${i}`] = i + } + ApiInferenceLogger.start({ provider: "test", operation: "test" }, bigObject) + + const call = mockSink.mock.calls[0] + const payload = call[1] as Record + + const keys = Object.keys(payload) + expect(keys.length).toBe(201) + expect(payload["[...]"]).toBe("50 more keys omitted") + }) + + it("should apply size limiting recursively in nested objects", () => { + const nested = { + level1: { + longString: "x".repeat(15_000), + level2: { + imageData: "", + }, + }, + } + ApiInferenceLogger.start({ provider: "test", operation: "test" }, nested) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + level1: expect.objectContaining({ + longString: "[Truncated len=15000]", + level2: expect.objectContaining({ + imageData: expect.stringMatching(/^\[ImageData len=\d+\]$/), + }), + }), + }), + ) + }) + + it("should apply size limiting recursively in arrays", () => { + const messages = [ + { role: "user", content: "x".repeat(15_000) }, + { role: "assistant", content: "" }, + ] + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { messages }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + messages: [ + expect.objectContaining({ + role: "user", + content: "[Truncated len=15000]", + }), + expect.objectContaining({ + role: "assistant", + content: expect.stringMatching(/^\[ImageData len=\d+\]$/), + }), + ], + }), + ) + }) + }) + + describe("edge cases", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should handle null and undefined values", () => { + expect(() => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { value: null, other: undefined }) + }).not.toThrow() + + expect(mockSink).toHaveBeenCalledWith( + "[API][request][test][unknown]", + expect.objectContaining({ + value: null, + other: undefined, + }), + ) + }) + + it("should handle empty objects", () => { + expect(() => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, {}) + }).not.toThrow() + }) + + it("should not throw on circular references", () => { + const obj: any = { name: "test" } + obj.self = obj + + expect(() => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, obj) + }).not.toThrow() + }) + + it("should handle primitive values in payload", () => { + expect(() => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, "string payload" as any) + }).not.toThrow() + }) + + it("should handle functions in payload without throwing", () => { + expect(() => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { callback: () => console.log("test") }, + ) + }).not.toThrow() + }) + + it("should handle BigInt values without throwing", () => { + expect(() => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { bigValue: BigInt(9007199254740991) }, + ) + }).not.toThrow() + }) + }) + + describe("sink error handling", () => { + it("should not throw if sink throws an error", () => { + const throwingSink = vi.fn(() => { + throw new Error("Sink error") + }) + ApiInferenceLogger.configure({ enabled: true, sink: throwingSink }) + + expect(() => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, {}) + }).not.toThrow() + }) + }) +}) diff --git a/src/api/logging/__tests__/logging-fetch.spec.ts b/src/api/logging/__tests__/logging-fetch.spec.ts new file mode 100644 index 00000000000..4b329a3f408 --- /dev/null +++ b/src/api/logging/__tests__/logging-fetch.spec.ts @@ -0,0 +1,271 @@ +import { ApiInferenceLogger } from "../ApiInferenceLogger" +import { createLoggingFetch } from "../logging-fetch" + +describe("createLoggingFetch", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: vi.fn() }) + }) + + afterEach(() => { + ApiInferenceLogger.configure({ enabled: false, sink: () => {} }) + }) + + it("logs request and JSON response with expected labels", async () => { + const sink = vi.fn() + ApiInferenceLogger.configure({ enabled: true, sink }) + + const baseFetch: typeof fetch = async (_input, init) => { + expect(init?.method).toBe("POST") + return new Response(JSON.stringify({ ok: true }), { + status: 200, + headers: { "content-type": "application/json" }, + }) + } + + const loggingFetch = createLoggingFetch({ provider: "TestProvider" }, baseFetch) + await loggingFetch("https://example.com/v1/test", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ model: "test-model", foo: "bar" }), + }) + + expect(sink).toHaveBeenCalledWith( + "[API][request][TestProvider][test-model]", + expect.objectContaining({ model: "test-model", foo: "bar" }), + ) + + expect(sink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[response\]\[TestProvider\]\[test-model\]\[\d+ms\]$/), + expect.objectContaining({ ok: true }), + ) + }) + + it("tees SSE responses so the consumer can still read the stream", async () => { + const sink = vi.fn() + ApiInferenceLogger.configure({ enabled: true, sink }) + + const encoder = new TextEncoder() + + const events = [ + { + id: "1", + model: "test-model", + object: "chat.completion.chunk", + choices: [{ index: 0, delta: { role: "assistant", content: "He" } }], + }, + { + id: "1", + model: "test-model", + object: "chat.completion.chunk", + choices: [{ index: 0, delta: { content: "llo" } }], + }, + { + id: "1", + model: "test-model", + object: "chat.completion.chunk", + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: "tc_1", + type: "function", + function: { name: "do", arguments: '{"a":' }, + }, + ], + }, + }, + ], + }, + { + id: "1", + model: "test-model", + object: "chat.completion.chunk", + choices: [ + { + index: 0, + delta: { + tool_calls: [{ index: 0, id: "tc_1", type: "function", function: { arguments: "1}" } }], + }, + }, + ], + }, + { + id: "1", + model: "test-model", + object: "chat.completion.chunk", + choices: [{ index: 0, delta: {}, finish_reason: "tool_calls" }], + usage: { prompt_tokens: 1, completion_tokens: 2 }, + }, + ] + + const sse = + `: OPENROUTER PROCESSING\n\n` + + events.map((e) => `data: ${JSON.stringify(e)}\n\n`).join("") + + `data: [DONE]\n\n` + const baseFetch: typeof fetch = async () => { + return new Response( + new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(sse)) + controller.close() + }, + }), + { + status: 200, + headers: { "content-type": "text/event-stream" }, + }, + ) + } + + const loggingFetch = createLoggingFetch({ provider: "TestProvider" }, baseFetch) + const res = await loggingFetch("https://example.com/v1/stream", { + method: "POST", + body: JSON.stringify({ model: "test-model" }), + }) + + // Consumer still receives the body. + const consumerText = await res.text() + expect(consumerText).toContain("data:") + + // Logger gets a streaming label. + expect(sink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[response\]\[TestProvider\]\[test-model\]\[\d+ms\]\[streaming\]$/), + expect.objectContaining({ + object: "chat.completion", + model: "test-model", + choices: [ + expect.objectContaining({ + index: 0, + finish_reason: "tool_calls", + message: expect.objectContaining({ + role: "assistant", + content: "Hello", + }), + }), + ], + usage: expect.objectContaining({ prompt_tokens: 1, completion_tokens: 2 }), + __sse: expect.objectContaining({ + format: "sse", + __rawSse: expect.anything(), + events: expect.any(Array), + }), + }), + ) + }) + + it("logs non-OpenAI SSE in a structured preview object", async () => { + const sink = vi.fn() + ApiInferenceLogger.configure({ enabled: true, sink }) + + const encoder = new TextEncoder() + const sse = + `event: ping\n` + + `data: {"type":"ping","t":123}\n\n` + + `event: custom\n` + + `data: hello world\n\n` + + `data: [DONE]\n\n` + + const baseFetch: typeof fetch = async () => { + return new Response( + new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(sse)) + controller.close() + }, + }), + { + status: 200, + headers: { "content-type": "text/event-stream" }, + }, + ) + } + + const loggingFetch = createLoggingFetch({ provider: "TestProvider" }, baseFetch) + const res = await loggingFetch("https://example.com/v1/stream", { + method: "POST", + body: JSON.stringify({ model: "test-model" }), + }) + + // Drain the consumer stream and allow the async logger to finish. + await res.text() + await new Promise((r) => setTimeout(r, 0)) + + expect(sink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[response\]\[TestProvider\]\[test-model\]\[\d+ms\]\[streaming\]$/), + expect.objectContaining({ + format: "sse", + __rawSse: expect.anything(), + events: expect.arrayContaining([ + expect.objectContaining({ event: "ping", data: expect.objectContaining({ type: "ping" }) }), + expect.objectContaining({ event: "custom" }), + ]), + }), + ) + }) + + it("assembles Anthropic Messages SSE into a final message object", async () => { + const sink = vi.fn() + ApiInferenceLogger.configure({ enabled: true, sink }) + + const encoder = new TextEncoder() + const sse = + `event: message_start\n` + + `data: {"type":"message_start","message":{"id":"msg_1","type":"message","role":"assistant","model":"claude-test","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":1,"output_tokens":0}}}\n\n` + + `event: content_block_start\n` + + `data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":"","signature":""}}\n\n` + + `event: content_block_delta\n` + + `data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"Hello"}}\n\n` + + `event: content_block_delta\n` + + `data: {"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"sig_123"}}\n\n` + + `event: content_block_stop\n` + + `data: {"type":"content_block_stop","index":0}\n\n` + + `event: message_delta\n` + + `data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":2}}\n\n` + + `event: message_stop\n` + + `data: {"type":"message_stop"}\n\n` + + const baseFetch: typeof fetch = async () => { + return new Response( + new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(sse)) + controller.close() + }, + }), + { + status: 200, + headers: { "content-type": "text/event-stream" }, + }, + ) + } + + const loggingFetch = createLoggingFetch({ provider: "Anthropic" }, baseFetch) + const res = await loggingFetch("https://example.com/v1/messages", { + method: "POST", + body: JSON.stringify({ model: "claude-test" }), + }) + await res.text() + await new Promise((r) => setTimeout(r, 0)) + + expect(sink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[response\]\[Anthropic\]\[claude-test\]\[\d+ms\]\[streaming\]$/), + expect.objectContaining({ + type: "message", + id: "msg_1", + model: "claude-test", + role: "assistant", + stop_reason: "end_turn", + usage: expect.objectContaining({ input_tokens: 1, output_tokens: 2 }), + content: [expect.objectContaining({ type: "thinking", thinking: "Hello", signature: "sig_123" })], + __sse: expect.objectContaining({ + format: "sse", + __rawSse: expect.anything(), + events: expect.any(Array), + }), + }), + ) + }) +}) diff --git a/src/api/logging/logging-fetch.ts b/src/api/logging/logging-fetch.ts new file mode 100644 index 00000000000..37c76531cdb --- /dev/null +++ b/src/api/logging/logging-fetch.ts @@ -0,0 +1,758 @@ +import { ApiInferenceLogger } from "./ApiInferenceLogger" + +export type LoggingFetchOptions = { + provider: string + /** + * Maximum number of bytes to buffer from request/response bodies. + * This protects against OOM when logging is enabled. + */ + maxBodySizeBytes?: number +} + +const DEFAULT_MAX_BODY_SIZE_BYTES = 512 * 1024 + +function getMaxBodySizeBytes(maxBodySizeBytes?: number): number { + return typeof maxBodySizeBytes === "number" && maxBodySizeBytes > 0 ? maxBodySizeBytes : DEFAULT_MAX_BODY_SIZE_BYTES +} + +function tryParseJson(text: string): unknown { + try { + return JSON.parse(text) + } catch { + return undefined + } +} + +function extractModelId(payload: unknown): string | undefined { + if (!payload || typeof payload !== "object") return undefined + + const rec = payload as Record + const model = rec["model"] + return typeof model === "string" && model.trim().length > 0 ? model : undefined +} + +type OpenAiToolCall = { + id: string + type: "function" + function: { name: string; arguments: string } +} + +type OpenAiAssembledChoice = { + index: number + message: { + role: string + content: string + reasoning?: string + reasoning_details?: unknown + tool_calls?: OpenAiToolCall[] + } + finish_reason: string | null +} + +type OpenAiAssembledResponse = { + id?: string + object: "chat.completion" + created?: number + model?: string + choices: OpenAiAssembledChoice[] + usage?: unknown +} + +type ParsedSsePayload = { + model?: string + payload: unknown +} + +type SseDebugInfo = { + format: "sse" + /** Total SSE blocks (event groups) in the raw stream */ + blocks: number + /** Number of parsed blocks included in `events` (capped) */ + loggedBlocks: number + /** Bounded raw SSE (comments/keepalives stripped) */ + __rawSse: string | TruncatedStringSummary + /** Parsed events (best-effort; data may be JSON or a truncated string summary) */ + events: Array<{ event?: string; data?: unknown }> +} + +const DEFAULT_SSE_PREVIEW_CHARS = 1500 +const DEFAULT_SSE_MAX_EVENTS = 50 + +type TruncatedStringSummary = { + truncated: true + length: number + head: string + tail: string +} + +function summarizeLongString( + text: string, + maxChars: number = DEFAULT_SSE_PREVIEW_CHARS, +): string | TruncatedStringSummary { + if (text.length <= maxChars * 2) return text + return { + truncated: true, + length: text.length, + head: text.slice(0, maxChars), + tail: text.slice(-maxChars), + } +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null +} + +type AnthropicUsage = { + input_tokens?: number + output_tokens?: number + cache_creation_input_tokens?: number + cache_read_input_tokens?: number +} + +type AnthropicContentBlock = + | { type: "text"; text: string; [key: string]: unknown } + | { type: "thinking"; thinking: string; [key: string]: unknown } + | { type: "tool_use"; id?: string; name?: string; input?: unknown; [key: string]: unknown } + | { type: string; [key: string]: unknown } + +type AnthropicAssembledMessage = { + id?: string + type: "message" + role?: string + model?: string + content: AnthropicContentBlock[] + stop_reason?: string | null + stop_sequence?: string | null + usage?: AnthropicUsage +} + +function parseAnthropicStreamingSse(text: string): AnthropicAssembledMessage | undefined { + // Assemble Anthropic Messages SSE into a final message object. + // Reference: https://docs.anthropic.com/en/api/messages-streaming + + const blocks = text + .split(/\n\n+/g) + .map((b) => b.trim()) + .filter(Boolean) + + let message: AnthropicAssembledMessage | undefined + const contentByIndex = new Map() + const toolInputJsonByIndex = new Map() + + type AnthropicUnhandledDelta = { + sseBlockIndex: number + event?: string + delta: Record + } + + function pushUnhandledDelta(index: number, entry: AnthropicUnhandledDelta) { + const current = contentByIndex.get(index) + if (!current) return + const rec = current as Record + const existing = rec["__unhandled_deltas"] + const next = Array.isArray(existing) ? (existing as unknown[]).slice(0, DEFAULT_SSE_MAX_EVENTS) : [] + next.push(entry) + rec["__unhandled_deltas"] = next + } + + let sseBlockIndex = 0 + for (const block of blocks) { + let eventName: string | undefined + const dataParts: string[] = [] + + for (const line of block.split("\n")) { + const trimmed = line.trimEnd() + if (trimmed.startsWith(":")) continue + if (trimmed.startsWith("event:")) { + eventName = trimmed.slice("event:".length).trim() || undefined + continue + } + if (trimmed.startsWith("data:")) { + dataParts.push(trimmed.slice("data:".length).trim()) + } + } + + if (dataParts.length === 0) { + sseBlockIndex++ + continue + } + const dataText = dataParts.join("\n") + if (dataText === "[DONE]") break + + const parsed = tryParseJson(dataText) + if (!parsed || !isRecord(parsed)) { + sseBlockIndex++ + continue + } + + const typeFromData = typeof parsed["type"] === "string" ? (parsed["type"] as string) : undefined + const event = eventName ?? typeFromData + if (!event) { + sseBlockIndex++ + continue + } + + if (event === "message_start") { + const m = isRecord(parsed["message"]) ? (parsed["message"] as Record) : undefined + if (!m) continue + + // Preserve the raw message_start.message object as much as possible. + // We'll replace content with the assembled content blocks below. + message = { + ...(m as Omit), + type: "message", + content: [], + } + sseBlockIndex++ + continue + } + + if (!message) { + // Not an Anthropic message stream. + return undefined + } + + if (event === "content_block_start") { + const index = typeof parsed["index"] === "number" ? (parsed["index"] as number) : 0 + const cb = isRecord(parsed["content_block"]) + ? (parsed["content_block"] as Record) + : undefined + if (!cb) continue + + const cbType = typeof cb["type"] === "string" ? (cb["type"] as string) : "unknown" + const block: Record = { ...cb, type: cbType } + if (cbType === "text") { + block["text"] = typeof cb["text"] === "string" ? (cb["text"] as string) : "" + } + if (cbType === "thinking") { + block["thinking"] = typeof cb["thinking"] === "string" ? (cb["thinking"] as string) : "" + } + if (cbType === "tool_use") { + toolInputJsonByIndex.set(index, []) + } + contentByIndex.set(index, block as AnthropicContentBlock) + sseBlockIndex++ + continue + } + + if (event === "content_block_delta") { + const index = typeof parsed["index"] === "number" ? (parsed["index"] as number) : 0 + const delta = isRecord(parsed["delta"]) ? (parsed["delta"] as Record) : undefined + if (!delta) continue + const deltaType = typeof delta["type"] === "string" ? (delta["type"] as string) : "unknown" + + const current = contentByIndex.get(index) + if (!current) { + sseBlockIndex++ + continue + } + + const currentRec = current as Record + if (current.type === "text" && deltaType === "text_delta" && typeof delta["text"] === "string") { + const prev = typeof currentRec["text"] === "string" ? (currentRec["text"] as string) : "" + currentRec["text"] = prev + (delta["text"] as string) + } else if ( + current.type === "thinking" && + deltaType === "thinking_delta" && + typeof delta["thinking"] === "string" + ) { + const prev = typeof currentRec["thinking"] === "string" ? (currentRec["thinking"] as string) : "" + currentRec["thinking"] = prev + (delta["thinking"] as string) + } else if (deltaType === "signature_delta" && typeof delta["signature"] === "string") { + // Interleaved thinking uses signature deltas that must be preserved alongside thinking blocks. + // Some streams initialize signature as "" in content_block_start and then send the real value via signature_delta. + const prev = typeof currentRec["signature"] === "string" ? (currentRec["signature"] as string) : "" + currentRec["signature"] = prev + (delta["signature"] as string) + } else if ( + current.type === "tool_use" && + deltaType === "input_json_delta" && + (typeof delta["partial_json"] === "string" || typeof (delta as any)["partial_json"] === "string") + ) { + const partial = (delta["partial_json"] ?? (delta as any)["partial_json"]) as string + const acc = toolInputJsonByIndex.get(index) + if (acc) acc.push(partial) + } else { + pushUnhandledDelta(index, { + sseBlockIndex, + event, + delta, + }) + } + sseBlockIndex++ + continue + } + + if (event === "content_block_stop") { + const index = typeof parsed["index"] === "number" ? (parsed["index"] as number) : 0 + const current = contentByIndex.get(index) + if (current?.type === "tool_use") { + const parts = toolInputJsonByIndex.get(index) + if (parts && parts.length > 0) { + const joined = parts.join("") + const parsedJson = tryParseJson(joined) + current.input = typeof parsedJson === "undefined" ? joined : parsedJson + } + } + sseBlockIndex++ + continue + } + + if (event === "message_delta") { + const delta = isRecord(parsed["delta"]) ? (parsed["delta"] as Record) : undefined + if (delta) { + // Preserve all message-level delta fields (except content/type) so we don't drop + // new/unknown fields introduced by Anthropic. + for (const [k, v] of Object.entries(delta)) { + if (k === "content" || k === "type") continue + ;(message as unknown as Record)[k] = v + } + } + + if (isRecord(parsed["usage"])) { + message.usage = { ...(message.usage ?? {}), ...(parsed["usage"] as AnthropicUsage) } + } + sseBlockIndex++ + continue + } + + if (event === "message_stop") { + break + } + + sseBlockIndex++ + } + + if (!message) return undefined + + // Finalize content array in index order. + message.content = Array.from(contentByIndex.entries()) + .sort(([a], [b]) => a - b) + .map(([, block]) => block) + + return message +} + +function parseOpenAiStreamingSse(text: string): OpenAiAssembledResponse | undefined { + // Parse SSE text and attempt to assemble an OpenAI-style chat completion response. + // If the stream isn't OpenAI-like, return undefined and fall back to raw text logging. + + const byChoiceIndex = new Map< + number, + { + role?: string + contentParts: string[] + reasoningParts: string[] + finishReason: string | null + toolCallsById: Map + reasoningDetails: Map< + string, + { type: string; index: number; text?: string; summary?: string; data?: string } + > + } + >() + + let sawChoices = false + let id: string | undefined + let created: number | undefined + let model: string | undefined + let usage: unknown = undefined + + const blocks = text.split(/\n\n+/g) + for (const block of blocks) { + if (!block.trim()) continue + + const lines = block.split("\n") + const dataParts: string[] = [] + for (const line of lines) { + const trimmed = line.trimEnd() + // Ignore SSE comment/keepalive lines like ": OPENROUTER PROCESSING". + if (trimmed.startsWith(":")) continue + if (trimmed.startsWith("data:")) { + dataParts.push(trimmed.slice("data:".length).trim()) + } + } + + if (dataParts.length === 0) continue + + const data = dataParts.join("\n") + if (data === "[DONE]") break + + const parsed = tryParseJson(data) + if (!parsed || !isRecord(parsed)) continue + + const choices = parsed["choices"] + if (!Array.isArray(choices)) continue + + sawChoices = true + if (typeof parsed["id"] === "string") id = parsed["id"] as string + if (typeof parsed["created"] === "number") created = parsed["created"] as number + if (typeof parsed["model"] === "string") model = parsed["model"] as string + if ("usage" in parsed) usage = parsed["usage"] + + for (const choiceAny of choices) { + if (!isRecord(choiceAny)) continue + const index = typeof choiceAny["index"] === "number" ? (choiceAny["index"] as number) : 0 + const finishReason = + choiceAny["finish_reason"] === null || typeof choiceAny["finish_reason"] === "string" + ? (choiceAny["finish_reason"] as string | null) + : null + + const delta = isRecord(choiceAny["delta"]) ? (choiceAny["delta"] as Record) : undefined + if (!delta) continue + + let state = byChoiceIndex.get(index) + if (!state) { + state = { + contentParts: [], + reasoningParts: [], + finishReason: null, + toolCallsById: new Map(), + reasoningDetails: new Map(), + } + byChoiceIndex.set(index, state) + } + + if (!state.role && typeof delta["role"] === "string") state.role = delta["role"] as string + + if (typeof delta["content"] === "string") { + state.contentParts.push(delta["content"] as string) + } + + for (const key of ["reasoning", "reasoning_content"] as const) { + if (typeof delta[key] === "string") { + state.reasoningParts.push(delta[key] as string) + break + } + } + + const toolCalls = delta["tool_calls"] + if (Array.isArray(toolCalls)) { + for (const tcAny of toolCalls) { + if (!isRecord(tcAny)) continue + const tcId = typeof tcAny["id"] === "string" ? (tcAny["id"] as string) : undefined + const tcIndex = typeof tcAny["index"] === "number" ? (tcAny["index"] as number) : undefined + const idKey = tcId ?? `index-${tcIndex ?? 0}` + const fn = isRecord(tcAny["function"]) ? (tcAny["function"] as Record) : undefined + const name = typeof fn?.["name"] === "string" ? (fn?.["name"] as string) : undefined + const args = typeof fn?.["arguments"] === "string" ? (fn?.["arguments"] as string) : undefined + + let existing = state.toolCallsById.get(idKey) + if (!existing) { + existing = { id: idKey, name, argsParts: [] } + state.toolCallsById.set(idKey, existing) + } + if (!existing.name && name) existing.name = name + if (args) existing.argsParts.push(args) + } + } + + const reasoningDetailsAny = delta["reasoning_details"] + if (Array.isArray(reasoningDetailsAny)) { + for (const detailAny of reasoningDetailsAny) { + if (!isRecord(detailAny)) continue + const type = typeof detailAny["type"] === "string" ? (detailAny["type"] as string) : "unknown" + const rIndex = typeof detailAny["index"] === "number" ? (detailAny["index"] as number) : 0 + const key = `${type}-${rIndex}` + const existing = state.reasoningDetails.get(key) + const textPart = typeof detailAny["text"] === "string" ? (detailAny["text"] as string) : undefined + const summaryPart = + typeof detailAny["summary"] === "string" ? (detailAny["summary"] as string) : undefined + const dataPart = typeof detailAny["data"] === "string" ? (detailAny["data"] as string) : undefined + + if (existing) { + if (textPart) existing.text = (existing.text ?? "") + textPart + if (summaryPart) existing.summary = (existing.summary ?? "") + summaryPart + if (dataPart) existing.data = (existing.data ?? "") + dataPart + } else { + state.reasoningDetails.set(key, { + type, + index: rIndex, + ...(textPart ? { text: textPart } : {}), + ...(summaryPart ? { summary: summaryPart } : {}), + ...(dataPart ? { data: dataPart } : {}), + }) + } + } + } + + if (finishReason !== null) state.finishReason = finishReason + } + } + + if (!sawChoices) return undefined + + const choicesOut: OpenAiAssembledChoice[] = Array.from(byChoiceIndex.entries()) + .sort(([a], [b]) => a - b) + .map(([index, state]) => { + const toolCallsOut: OpenAiToolCall[] = Array.from(state.toolCallsById.values()) + .filter((tc) => typeof tc.name === "string" && tc.name.length > 0) + .map((tc) => ({ + id: tc.id, + type: "function", + function: { name: tc.name!, arguments: tc.argsParts.join("") }, + })) + + const reasoningDetailsOut = + state.reasoningDetails.size > 0 + ? Array.from(state.reasoningDetails.values()).sort((a, b) => a.index - b.index) + : undefined + + return { + index, + message: { + role: state.role ?? "assistant", + content: state.contentParts.join(""), + ...(state.reasoningParts.length > 0 ? { reasoning: state.reasoningParts.join("") } : {}), + ...(reasoningDetailsOut ? { reasoning_details: reasoningDetailsOut } : {}), + ...(toolCallsOut.length > 0 ? { tool_calls: toolCallsOut } : {}), + }, + finish_reason: state.finishReason, + } + }) + + return { + id, + object: "chat.completion", + created, + model, + choices: choicesOut, + ...(typeof usage === "undefined" ? {} : { usage }), + } +} + +function parseSsePayload(text: string): ParsedSsePayload { + // Log SSE in a structured way so callers can see the *actual* wire format. + // We always include `events` + `__rawSse`, and optionally include an `assembled` object + // for convenience when we recognize a stream format. + const stripped = text + .split("\n") + .filter((line) => !line.trimStart().startsWith(":")) + .join("\n") + + const blocks = stripped.split(/\n\n+/g) + const events: Array<{ event?: string; data?: unknown }> = [] + let totalBlocks = 0 + + for (const block of blocks) { + if (!block.trim()) continue + totalBlocks++ + if (events.length >= DEFAULT_SSE_MAX_EVENTS) continue + + let eventName: string | undefined + const dataParts: string[] = [] + for (const line of block.split("\n")) { + const trimmed = line.trimEnd() + if (trimmed.startsWith("event:")) { + eventName = trimmed.slice("event:".length).trim() || undefined + continue + } + if (trimmed.startsWith("data:")) { + dataParts.push(trimmed.slice("data:".length).trim()) + } + } + + if (dataParts.length === 0) continue + const dataText = dataParts.join("\n") + if (dataText === "[DONE]") break + + const maybeJson = tryParseJson(dataText) + const dataPayload = typeof maybeJson === "undefined" ? summarizeLongString(dataText) : maybeJson + events.push({ ...(eventName ? { event: eventName } : {}), data: dataPayload }) + } + + const debug: SseDebugInfo = { + format: "sse", + blocks: totalBlocks, + loggedBlocks: events.length, + __rawSse: summarizeLongString(stripped), + events, + } + + const openAi = parseOpenAiStreamingSse(text) + if (openAi) { + return { + model: openAi.model, + payload: { + ...openAi, + __sse: debug, + }, + } + } + + const anthropic = parseAnthropicStreamingSse(text) + if (anthropic) { + return { + model: anthropic.model, + payload: { + ...anthropic, + __sse: debug, + }, + } + } + + return { payload: debug } +} + +async function readStreamToText(stream: ReadableStream, maxBytes: number): Promise { + const reader = stream.getReader() + const decoder = new TextDecoder() + let bytesRead = 0 + const parts: string[] = [] + + try { + while (true) { + const { done, value } = await reader.read() + if (done) break + if (!value || value.length === 0) continue + + const remaining = maxBytes - bytesRead + if (remaining <= 0) break + + const slice = value.length > remaining ? value.subarray(0, remaining) : value + bytesRead += slice.length + parts.push(decoder.decode(slice, { stream: true })) + + if (bytesRead >= maxBytes) break + } + } finally { + reader.releaseLock() + } + + parts.push(decoder.decode()) + return parts.join("") +} + +async function getRequestBodyForLogging(input: RequestInfo | URL, init: RequestInit | undefined, maxBytes: number) { + // Prefer init.body because that's the most common usage. + const body = init?.body + if (typeof body === "string") { + const maybeJson = tryParseJson(body) + return { payload: maybeJson ?? body, model: extractModelId(maybeJson) } + } + + if (body instanceof URLSearchParams) { + return { payload: body.toString(), model: undefined } + } + + // If fetch was called with a Request and no init, try to read its body. + if (!body && typeof Request !== "undefined" && input instanceof Request) { + try { + const cloned = input.clone() + if (cloned.body) { + const text = await readStreamToText(cloned.body as ReadableStream, maxBytes) + const maybeJson = tryParseJson(text) + return { payload: maybeJson ?? text, model: extractModelId(maybeJson) } + } + } catch { + // Ignore body read errors. + } + } + + if (body) { + return { + payload: `[Unlogged body type=${Object.prototype.toString.call(body)}]`, + model: undefined, + } + } + + return { payload: undefined, model: undefined } +} + +async function logNonStreamingResponse( + response: Response, + provider: string, + model: string, + durationMs: number, + maxBytes: number, +): Promise { + try { + const clone = response.clone() + const contentType = clone.headers.get("content-type") ?? "" + + let payload: unknown = undefined + if (clone.body) { + const text = await readStreamToText(clone.body as ReadableStream, maxBytes) + if (contentType.includes("application/json") || contentType.includes("+json")) { + payload = tryParseJson(text) ?? text + } else { + payload = text + } + } + + ApiInferenceLogger.logRaw(`[API][response][${provider}][${model}][${durationMs}ms]`, payload) + } catch (error) { + ApiInferenceLogger.logRawError(`[API][error][${provider}][${model}][${durationMs}ms]`, error) + } +} + +async function logSseResponse( + stream: ReadableStream, + provider: string, + fallbackModel: string, + startedAt: number, + maxBytes: number, +): Promise { + try { + const text = await readStreamToText(stream, maxBytes) + const { payload, model: modelFromStream } = parseSsePayload(text) + const durationMs = Date.now() - startedAt + const model = modelFromStream ?? fallbackModel + ApiInferenceLogger.logRaw(`[API][response][${provider}][${model}][${durationMs}ms][streaming]`, payload) + } catch (error) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRawError(`[API][error][${provider}][${fallbackModel}][${durationMs}ms]`, error) + } +} + +export function createLoggingFetch(options: LoggingFetchOptions, baseFetch: typeof fetch = fetch): typeof fetch { + return async (input: RequestInfo | URL, init?: RequestInit): Promise => { + // Fast path: no overhead when disabled. + if (!ApiInferenceLogger.isEnabled()) { + return baseFetch(input, init) + } + + const maxBytes = getMaxBodySizeBytes(options.maxBodySizeBytes) + const startedAt = Date.now() + + const { payload: requestPayload, model: requestModel } = await getRequestBodyForLogging(input, init, maxBytes) + const model = requestModel ?? "unknown" + + ApiInferenceLogger.logRaw(`[API][request][${options.provider}][${model}]`, requestPayload) + + try { + const response = await baseFetch(input, init) + const durationMs = Date.now() - startedAt + + const contentType = response.headers.get("content-type") ?? "" + const isSse = contentType.includes("text/event-stream") + + if (!isSse) { + await logNonStreamingResponse(response, options.provider, model, durationMs, maxBytes) + return response + } + + if (!response.body) { + ApiInferenceLogger.logRaw( + `[API][response][${options.provider}][${model}][${durationMs}ms][streaming]`, + undefined, + ) + return response + } + + // Tee the stream so the consumer can read one branch while we buffer the other. + const [consumerStream, logStream] = response.body.tee() + // Note: returning a new Response means some Response properties (e.g. `url`) are not preserved. + const responseForConsumer = new Response(consumerStream, { + status: response.status, + statusText: response.statusText, + headers: new Headers(response.headers), + }) + void logSseResponse(logStream, options.provider, model, startedAt, maxBytes) + return responseForConsumer + } catch (error) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRawError(`[API][error][${options.provider}][${model}][${durationMs}ms]`, error) + throw error + } + } +} diff --git a/src/api/providers/__tests__/anthropic-vertex-logging.spec.ts b/src/api/providers/__tests__/anthropic-vertex-logging.spec.ts new file mode 100644 index 00000000000..45b61e08ce2 --- /dev/null +++ b/src/api/providers/__tests__/anthropic-vertex-logging.spec.ts @@ -0,0 +1,77 @@ +// npx vitest run api/providers/__tests__/anthropic-vertex-logging.spec.ts + +import type { ApiHandlerOptions } from "../../../shared/api" +import { ApiInferenceLogger } from "../../logging/ApiInferenceLogger" + +const mockMessagesCreate = vitest.fn() + +vitest.mock("@anthropic-ai/vertex-sdk", () => { + return { + AnthropicVertex: vitest.fn().mockImplementation(() => ({ + messages: { + create: mockMessagesCreate, + }, + })), + } +}) + +describe("AnthropicVertexHandler API logging", () => { + beforeEach(() => { + vitest.clearAllMocks() + vitest.spyOn(ApiInferenceLogger, "isEnabled").mockReturnValue(true) + vitest.spyOn(ApiInferenceLogger, "logRaw").mockImplementation(() => {}) + vitest.spyOn(ApiInferenceLogger, "logRawError").mockImplementation(() => {}) + }) + + it("logs request + streaming response", async () => { + mockMessagesCreate.mockResolvedValueOnce({ + async *[Symbol.asyncIterator]() { + yield { + type: "message_start", + message: { + usage: { input_tokens: 10, output_tokens: 5 }, + }, + } + yield { + type: "content_block_start", + index: 0, + content_block: { type: "text", text: "Hello" }, + } + yield { + type: "content_block_delta", + index: 0, + delta: { type: "text_delta", text: " world" }, + } + }, + }) + + const { AnthropicVertexHandler } = await import("../anthropic-vertex") + const options: ApiHandlerOptions = { + apiModelId: "claude-3-5-sonnet-v2@20241022", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + } + const handler = new AnthropicVertexHandler(options) + + for await (const _ of handler.createMessage("system", [{ role: "user", content: "hi" }])) { + // drain + } + + expect(ApiInferenceLogger.logRaw).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[request\]\[Anthropic Vertex\]\[.+\]$/), + expect.objectContaining({ + model: expect.any(String), + stream: true, + }), + ) + + expect(ApiInferenceLogger.logRaw).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[response\]\[Anthropic Vertex\]\[.+\]\[\d+ms\]\[streaming\]$/), + expect.objectContaining({ + type: "message", + model: expect.any(String), + content: expect.any(Array), + }), + ) + }) +}) diff --git a/src/api/providers/__tests__/bedrock-logging.spec.ts b/src/api/providers/__tests__/bedrock-logging.spec.ts new file mode 100644 index 00000000000..8eab46dcee9 --- /dev/null +++ b/src/api/providers/__tests__/bedrock-logging.spec.ts @@ -0,0 +1,121 @@ +// npx vitest run api/providers/__tests__/bedrock-logging.spec.ts + +import { AwsBedrockHandler } from "../bedrock" +import type { ApiHandlerOptions } from "../../../shared/api" +import { ApiInferenceLogger } from "../../logging/ApiInferenceLogger" + +const mockSend = vitest.fn() + +vitest.mock("@aws-sdk/client-bedrock-runtime", () => { + return { + BedrockRuntimeClient: vitest.fn().mockImplementation(() => ({ + send: mockSend, + config: { region: "us-east-1" }, + middlewareStack: { + clone: () => ({ resolve: () => {} }), + use: () => {}, + }, + })), + ConverseStreamCommand: vitest.fn((params) => ({ + ...params, + input: params, + middlewareStack: { + clone: () => ({ resolve: () => {} }), + use: () => {}, + }, + })), + ConverseCommand: vitest.fn((params) => ({ + ...params, + input: params, + middlewareStack: { + clone: () => ({ resolve: () => {} }), + use: () => {}, + }, + })), + } +}) + +vitest.mock("@aws-sdk/credential-providers", () => ({ + fromIni: vitest.fn().mockReturnValue({ + accessKeyId: "profile-access-key", + secretAccessKey: "profile-secret-key", + }), +})) + +vitest.mock("@smithy/smithy-client", () => ({ + throwDefaultError: vitest.fn(), +})) + +describe("AwsBedrockHandler API logging", () => { + beforeEach(() => { + vitest.clearAllMocks() + vitest.spyOn(ApiInferenceLogger, "isEnabled").mockReturnValue(true) + vitest.spyOn(ApiInferenceLogger, "logRaw").mockImplementation(() => {}) + vitest.spyOn(ApiInferenceLogger, "logRawError").mockImplementation(() => {}) + }) + + it("logs request + streaming response", async () => { + mockSend.mockResolvedValueOnce({ + stream: { + [Symbol.asyncIterator]: async function* () { + yield { + contentBlockStart: { + start: { text: "Hello" }, + contentBlockIndex: 0, + }, + } + yield { + contentBlockDelta: { + delta: { text: " world" }, + contentBlockIndex: 0, + }, + } + yield { + metadata: { + usage: { + inputTokens: 10, + outputTokens: 2, + }, + }, + } + }, + }, + }) + + const options: ApiHandlerOptions = { + apiModelId: "anthropic.claude-3-5-sonnet-20241022-v2:0", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + } + const handler = new AwsBedrockHandler(options) + + const stream = handler.createMessage("system", [{ role: "user", content: "hi" }]) + for await (const _ of stream) { + // consume + } + + expect(ApiInferenceLogger.logRaw).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[request\]\[Bedrock\]\[anthropic\.claude-3-5-sonnet-20241022-v2:0\]$/), + expect.objectContaining({ + modelId: "anthropic.claude-3-5-sonnet-20241022-v2:0", + messages: expect.any(Array), + }), + ) + + expect(ApiInferenceLogger.logRaw).toHaveBeenCalledWith( + expect.stringMatching( + /^\[API\]\[response\]\[Bedrock\]\[anthropic\.claude-3-5-sonnet-20241022-v2:0\]\[\d+ms\]\[streaming\]$/, + ), + expect.objectContaining({ + model: "anthropic.claude-3-5-sonnet-20241022-v2:0", + message: expect.objectContaining({ + role: "assistant", + content: "Hello world", + }), + usage: expect.objectContaining({ inputTokens: 10, outputTokens: 2 }), + __stream: expect.objectContaining({ format: "bedrock" }), + }), + ) + }) +}) diff --git a/src/api/providers/__tests__/gemini-handler.spec.ts b/src/api/providers/__tests__/gemini-handler.spec.ts index 541ffd5611c..798208dee0f 100644 --- a/src/api/providers/__tests__/gemini-handler.spec.ts +++ b/src/api/providers/__tests__/gemini-handler.spec.ts @@ -2,8 +2,13 @@ import { t } from "i18next" import { GeminiHandler } from "../gemini" import type { ApiHandlerOptions } from "../../../shared/api" +import { ApiInferenceLogger } from "../../logging/ApiInferenceLogger" describe("GeminiHandler backend support", () => { + afterEach(() => { + ApiInferenceLogger.configure({ enabled: false, sink: () => {} }) + }) + it("passes tools for URL context and grounding in config", async () => { const options = { apiProvider: "gemini", @@ -19,6 +24,46 @@ describe("GeminiHandler backend support", () => { expect(config.tools).toEqual([{ urlContext: {} }, { googleSearch: {} }]) }) + it("logs request/response via ApiInferenceLogger when enabled", async () => { + const sink = vi.fn() + ApiInferenceLogger.configure({ enabled: true, sink }) + + const options = { + apiProvider: "gemini", + apiModelId: "gemini-1.5-flash", // just needs to exist for getModel + } as ApiHandlerOptions + const handler = new GeminiHandler(options) + + const mockStream = (async function* () { + yield { + candidates: [{ content: { parts: [{ text: "hi" }] }, finishReason: "STOP" }], + usageMetadata: { promptTokenCount: 1, candidatesTokenCount: 2 }, + responseId: "r_1", + } + })() + + // @ts-ignore access private client + handler["client"].models.generateContentStream = vi.fn().mockResolvedValue(mockStream) + + // Drain + for await (const _ of handler.createMessage("sys", [] as any)) { + // noop + } + + expect(sink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[request\]\[Gemini\]\[.+\]$/), + expect.objectContaining({ + model: expect.any(String), + contents: expect.any(Array), + config: expect.any(Object), + }), + ) + expect(sink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[response\]\[Gemini\]\[.+\]\[\d+ms\]\[streaming\]$/), + expect.objectContaining({ candidates: expect.any(Array) }), + ) + }) + it("completePrompt passes config overrides without tools when URL context and grounding disabled", async () => { const options = { apiProvider: "gemini", diff --git a/src/api/providers/__tests__/native-ollama-logging.spec.ts b/src/api/providers/__tests__/native-ollama-logging.spec.ts new file mode 100644 index 00000000000..d53256a9c4e --- /dev/null +++ b/src/api/providers/__tests__/native-ollama-logging.spec.ts @@ -0,0 +1,78 @@ +// npx vitest run api/providers/__tests__/native-ollama-logging.spec.ts + +import { NativeOllamaHandler } from "../native-ollama" +import type { ApiHandlerOptions } from "../../../shared/api" +import { ApiInferenceLogger } from "../../logging/ApiInferenceLogger" +import { getOllamaModels } from "../fetchers/ollama" + +const mockChat = vitest.fn() +vitest.mock("ollama", () => { + return { + Ollama: vitest.fn().mockImplementation(() => ({ + chat: mockChat, + })), + Message: vitest.fn(), + } +}) + +vitest.mock("../fetchers/ollama", () => ({ + getOllamaModels: vitest.fn(), +})) + +describe("NativeOllamaHandler API logging", () => { + beforeEach(() => { + vitest.clearAllMocks() + vitest.spyOn(ApiInferenceLogger, "isEnabled").mockReturnValue(true) + vitest.spyOn(ApiInferenceLogger, "logRaw").mockImplementation(() => {}) + vitest.spyOn(ApiInferenceLogger, "logRawError").mockImplementation(() => {}) + + vitest.mocked(getOllamaModels).mockResolvedValue({ + llama2: { + contextWindow: 4096, + maxTokens: 4096, + supportsImages: false, + supportsPromptCache: false, + }, + }) + }) + + it("logs request + streaming response", async () => { + mockChat.mockImplementation(async function* () { + yield { message: { content: "Hello" } } + yield { message: { content: " world" }, eval_count: 2, prompt_eval_count: 10 } + }) + + const options: ApiHandlerOptions = { + apiModelId: "llama2", + ollamaModelId: "llama2", + ollamaBaseUrl: "http://localhost:11434", + } + const handler = new NativeOllamaHandler(options) + + const stream = handler.createMessage("system", [{ role: "user", content: "hi" }]) + for await (const _ of stream) { + // consume + } + + expect(ApiInferenceLogger.logRaw).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[request\]\[Ollama\]\[llama2\]$/), + expect.objectContaining({ + model: "llama2", + stream: true, + messages: expect.any(Array), + }), + ) + + expect(ApiInferenceLogger.logRaw).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[response\]\[Ollama\]\[llama2\]\[\d+ms\]\[streaming\]$/), + expect.objectContaining({ + model: "llama2", + message: expect.objectContaining({ + role: "assistant", + content: "Hello world", + }), + usage: expect.objectContaining({ inputTokens: 10, outputTokens: 2 }), + }), + ) + }) +}) diff --git a/src/api/providers/__tests__/qwen-code-native-tools.spec.ts b/src/api/providers/__tests__/qwen-code-native-tools.spec.ts index d6766dafd6e..88aba1498bc 100644 --- a/src/api/providers/__tests__/qwen-code-native-tools.spec.ts +++ b/src/api/providers/__tests__/qwen-code-native-tools.spec.ts @@ -8,6 +8,10 @@ vi.mock("node:fs", () => ({ }, })) +vi.mock("../../../utils/safeWriteJson", () => ({ + safeWriteJson: vi.fn().mockResolvedValue(undefined), +})) + const mockCreate = vi.fn() vi.mock("openai", () => { return { @@ -28,6 +32,7 @@ import { promises as fs } from "node:fs" import { QwenCodeHandler } from "../qwen-code" import { NativeToolCallParser } from "../../../core/assistant-message/NativeToolCallParser" import type { ApiHandlerOptions } from "../../../shared/api" +import { safeWriteJson } from "../../../utils/safeWriteJson" describe("QwenCodeHandler Native Tools", () => { let handler: QwenCodeHandler @@ -52,6 +57,7 @@ describe("QwenCodeHandler Native Tools", () => { beforeEach(() => { vi.clearAllMocks() + vi.mocked(safeWriteJson).mockResolvedValue(undefined) // Mock credentials file const mockCredentials = { diff --git a/src/api/providers/__tests__/vscode-lm.spec.ts b/src/api/providers/__tests__/vscode-lm.spec.ts index e277ce53300..9dd32e9a9b8 100644 --- a/src/api/providers/__tests__/vscode-lm.spec.ts +++ b/src/api/providers/__tests__/vscode-lm.spec.ts @@ -58,6 +58,7 @@ import * as vscode from "vscode" import { VsCodeLmHandler } from "../vscode-lm" import type { ApiHandlerOptions } from "../../../shared/api" import type { Anthropic } from "@anthropic-ai/sdk" +import { ApiInferenceLogger } from "../../logging/ApiInferenceLogger" const mockLanguageModelChat = { id: "test-model", @@ -141,6 +142,48 @@ describe("VsCodeLmHandler", () => { handler["client"] = mockLanguageModelChat }) + it("should log request/response when ApiInferenceLogger is enabled", async () => { + const sink = vi.fn() + ApiInferenceLogger.configure({ enabled: true, sink }) + + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user" as const, + content: "Hello", + }, + ] + + const responseText = "Hello!" + mockLanguageModelChat.sendRequest.mockResolvedValueOnce({ + stream: (async function* () { + yield new vscode.LanguageModelTextPart(responseText) + return + })(), + text: (async function* () { + yield responseText + return + })(), + }) + + for await (const _ of handler.createMessage(systemPrompt, messages)) { + // drain + } + + expect(sink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[request\]\[VS Code LM\]\[.+\]$/), + expect.objectContaining({ + model: expect.any(String), + system: systemPrompt, + messages: expect.any(Array), + }), + ) + expect(sink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[response\]\[VS Code LM\]\[.+\]\[\d+ms\]\[streaming\]$/), + expect.objectContaining({ text: expect.any(String), usage: expect.any(Object) }), + ) + }) + it("should stream text responses", async () => { const systemPrompt = "You are a helpful assistant" const messages: Anthropic.Messages.MessageParam[] = [ diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts index cbfae08f41e..27ee802ae07 100644 --- a/src/api/providers/anthropic-vertex.ts +++ b/src/api/providers/anthropic-vertex.ts @@ -25,11 +25,14 @@ import { convertOpenAIToolChoiceToAnthropic, } from "../../core/prompts/tools/native-tools/converters" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" + import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" // https://docs.anthropic.com/en/api/claude-on-vertex-ai export class AnthropicVertexHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "Anthropic Vertex" protected options: ApiHandlerOptions private client: AnthropicVertex @@ -70,6 +73,9 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { + const startedAt = Date.now() + const shouldLog = ApiInferenceLogger.isEnabled() + let { id, info, temperature, maxTokens, reasoning: thinking, betas } = this.getModel() const { supportsPromptCache } = info @@ -94,6 +100,44 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple } : {} + const textParts: string[] = [] + const reasoningParts: string[] = [] + const toolCallsByIndex = new Map< + number, + { + index: number + id?: string + name?: string + argsParts: string[] + } + >() + + let usageSnapshot: + | { + inputTokens: number + outputTokens: number + cacheWriteTokens?: number + cacheReadTokens?: number + } + | undefined + + function tryParseJsonString(input: string): unknown { + try { + return JSON.parse(input) + } catch { + return input + } + } + + function getOrCreateToolCall(index: number) { + let current = toolCallsByIndex.get(index) + if (!current) { + current = { index, argsParts: [] } + toolCallsByIndex.set(index, current) + } + return current + } + /** * Vertex API has specific limitations for prompt caching: * 1. Maximum of 4 blocks can have cache_control @@ -124,98 +168,183 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple // and prompt caching const requestOptions = betas?.length ? { headers: { "anthropic-beta": betas.join(",") } } : undefined - const stream = await this.client.messages.create(params, requestOptions) + if (shouldLog) { + ApiInferenceLogger.logRaw(`[API][request][${this.providerName}][${id}]`, { + ...params, + ...(requestOptions ? { __requestOptions: requestOptions } : {}), + }) + } - for await (const chunk of stream) { - switch (chunk.type) { - case "message_start": { - const usage = chunk.message!.usage + let stream: Awaited> + try { + stream = await this.client.messages.create(params, requestOptions) + } catch (error) { + if (shouldLog) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRawError(`[API][error][${this.providerName}][${id}][${durationMs}ms]`, error) + } + throw error + } - yield { - type: "usage", - inputTokens: usage.input_tokens || 0, - outputTokens: usage.output_tokens || 0, - cacheWriteTokens: usage.cache_creation_input_tokens || undefined, - cacheReadTokens: usage.cache_read_input_tokens || undefined, - } + try { + for await (const chunk of stream) { + switch (chunk.type) { + case "message_start": { + const usage = chunk.message!.usage + usageSnapshot = { + inputTokens: usage.input_tokens || 0, + outputTokens: usage.output_tokens || 0, + cacheWriteTokens: usage.cache_creation_input_tokens || undefined, + cacheReadTokens: usage.cache_read_input_tokens || undefined, + } - break - } - case "message_delta": { - yield { - type: "usage", - inputTokens: 0, - outputTokens: chunk.usage!.output_tokens || 0, + yield { + type: "usage", + inputTokens: usageSnapshot.inputTokens, + outputTokens: usageSnapshot.outputTokens, + cacheWriteTokens: usageSnapshot.cacheWriteTokens, + cacheReadTokens: usageSnapshot.cacheReadTokens, + } + + break } + case "message_delta": { + const outputTokens = chunk.usage!.output_tokens || 0 + if (usageSnapshot) usageSnapshot.outputTokens = outputTokens + yield { + type: "usage", + inputTokens: 0, + outputTokens, + } - break - } - case "content_block_start": { - switch (chunk.content_block!.type) { - case "text": { - if (chunk.index! > 0) { - yield { type: "text", text: "\n" } + break + } + case "content_block_start": { + switch (chunk.content_block!.type) { + case "text": { + if (chunk.index! > 0) { + textParts.push("\n") + yield { type: "text", text: "\n" } + } + + textParts.push(chunk.content_block!.text) + yield { type: "text", text: chunk.content_block!.text } + break } - - yield { type: "text", text: chunk.content_block!.text } - break - } - case "thinking": { - if (chunk.index! > 0) { - yield { type: "reasoning", text: "\n" } + case "thinking": { + if (chunk.index! > 0) { + reasoningParts.push("\n") + yield { type: "reasoning", text: "\n" } + } + + const thinkingText = (chunk.content_block as any).thinking as string + reasoningParts.push(thinkingText) + yield { type: "reasoning", text: thinkingText } + break } - - yield { type: "reasoning", text: (chunk.content_block as any).thinking } - break - } - case "tool_use": { - // Emit initial tool call partial with id and name - yield { - type: "tool_call_partial", - index: chunk.index, - id: chunk.content_block!.id, - name: chunk.content_block!.name, - arguments: undefined, + case "tool_use": { + const tc = getOrCreateToolCall(chunk.index ?? 0) + tc.id = chunk.content_block!.id + tc.name = chunk.content_block!.name + + // Emit initial tool call partial with id and name + yield { + type: "tool_call_partial", + index: chunk.index, + id: chunk.content_block!.id, + name: chunk.content_block!.name, + arguments: undefined, + } + break } - break } - } - break - } - case "content_block_delta": { - switch (chunk.delta!.type) { - case "text_delta": { - yield { type: "text", text: chunk.delta!.text } - break - } - case "thinking_delta": { - yield { type: "reasoning", text: (chunk.delta as any).thinking } - break - } - case "input_json_delta": { - // Emit tool call partial chunks as arguments stream in - yield { - type: "tool_call_partial", - index: chunk.index, - id: undefined, - name: undefined, - arguments: (chunk.delta as any).partial_json, + break + } + case "content_block_delta": { + switch (chunk.delta!.type) { + case "text_delta": { + textParts.push(chunk.delta!.text) + yield { type: "text", text: chunk.delta!.text } + break + } + case "thinking_delta": { + const thinkingText = (chunk.delta as any).thinking as string + reasoningParts.push(thinkingText) + yield { type: "reasoning", text: thinkingText } + break + } + case "input_json_delta": { + const partial = (chunk.delta as any).partial_json as string + const tc = getOrCreateToolCall(chunk.index ?? 0) + if (typeof partial === "string" && partial.length > 0) tc.argsParts.push(partial) + // Emit tool call partial chunks as arguments stream in + yield { + type: "tool_call_partial", + index: chunk.index, + id: undefined, + name: undefined, + arguments: partial, + } + break } - break } - } - break - } - case "content_block_stop": { - // Block complete - no action needed for now. - // NativeToolCallParser handles tool call completion - // Note: Signature for multi-turn thinking would require using stream.finalMessage() - // after iteration completes, which requires restructuring the streaming approach. - break + break + } + case "content_block_stop": { + // Block complete - no action needed for now. + // NativeToolCallParser handles tool call completion + // Note: Signature for multi-turn thinking would require using stream.finalMessage() + // after iteration completes, which requires restructuring the streaming approach. + break + } } } + } catch (error) { + if (shouldLog) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRawError(`[API][error][${this.providerName}][${id}][${durationMs}ms]`, error) + } + throw error + } finally { + if (shouldLog) { + const durationMs = Date.now() - startedAt + const toolUseBlocks = Array.from(toolCallsByIndex.values()) + .sort((a, b) => a.index - b.index) + .map((tc) => { + const joined = tc.argsParts.join("") + return { + type: "tool_use" as const, + ...(tc.id ? { id: tc.id } : {}), + ...(tc.name ? { name: tc.name } : {}), + ...(joined.length > 0 ? { input: tryParseJsonString(joined) } : {}), + } + }) + + ApiInferenceLogger.logRaw(`[API][response][${this.providerName}][${id}][${durationMs}ms][streaming]`, { + type: "message", + role: "assistant", + model: id, + content: [ + ...(textParts.length > 0 ? ([{ type: "text", text: textParts.join("") }] as const) : []), + ...(reasoningParts.length > 0 + ? ([{ type: "thinking", thinking: reasoningParts.join("") }] as const) + : []), + ...toolUseBlocks, + ], + ...(usageSnapshot + ? { + usage: { + input_tokens: usageSnapshot.inputTokens, + output_tokens: usageSnapshot.outputTokens, + cache_creation_input_tokens: usageSnapshot.cacheWriteTokens, + cache_read_input_tokens: usageSnapshot.cacheReadTokens, + }, + } + : {}), + }) + } } } @@ -269,6 +398,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple async completePrompt(prompt: string) { try { + const startedAt = Date.now() let { id, info: { supportsPromptCache }, @@ -293,7 +423,15 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple stream: false, } + if (ApiInferenceLogger.isEnabled()) { + ApiInferenceLogger.logRaw(`[API][request][${this.providerName}][${id}]`, params) + } + const response = await this.client.messages.create(params) + if (ApiInferenceLogger.isEnabled()) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRaw(`[API][response][${this.providerName}][${id}][${durationMs}ms]`, response) + } const content = response.content[0] if (content.type === "text") { @@ -302,6 +440,10 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple return "" } catch (error) { + const modelId = this.options.apiModelId ?? vertexDefaultModelId + if (ApiInferenceLogger.isEnabled()) { + ApiInferenceLogger.logRawError(`[API][error][${this.providerName}][${modelId}][0ms]`, error) + } if (error instanceof Error) { throw new Error(`Vertex completion error: ${error.message}`) } diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 4faf341d28f..59c239ac78d 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -29,11 +29,13 @@ import { convertOpenAIToolsToAnthropic, convertOpenAIToolChoiceToAnthropic, } from "../../core/prompts/tools/native-tools/converters" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" export class AnthropicHandler extends BaseProvider implements SingleCompletionHandler { private options: ApiHandlerOptions private client: Anthropic - private readonly providerName = "Anthropic" + protected readonly providerName = "Anthropic" constructor(options: ApiHandlerOptions) { super() @@ -45,6 +47,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa this.client = new Anthropic({ baseURL: this.options.anthropicBaseUrl || undefined, [apiKeyFieldName]: this.options.apiKey, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } @@ -123,59 +126,46 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1 const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 - try { - stream = await this.client.messages.create( - { - model: modelId, - max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, - temperature, - thinking, - // Setting cache breakpoint for system prompt so new tasks can reuse it. - system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }], - messages: sanitizedMessages.map((message, index) => { - if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { - return { - ...message, - content: - typeof message.content === "string" - ? [{ type: "text", text: message.content, cache_control: cacheControl }] - : message.content.map((content, contentIndex) => - contentIndex === message.content.length - 1 - ? { ...content, cache_control: cacheControl } - : content, - ), - } - } - return message - }), - stream: true, - ...nativeToolParams, - }, - (() => { - // prompt caching: https://x.com/alexalbert__/status/1823751995901272068 - // https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers - // https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393 - - // Then check for models that support prompt caching - switch (modelId) { - case "claude-sonnet-4-5": - case "claude-sonnet-4-20250514": - case "claude-opus-4-5-20251101": - case "claude-opus-4-1-20250805": - case "claude-opus-4-20250514": - case "claude-3-7-sonnet-20250219": - case "claude-3-5-sonnet-20241022": - case "claude-3-5-haiku-20241022": - case "claude-3-opus-20240229": - case "claude-haiku-4-5-20251001": - case "claude-3-haiku-20240307": - betas.push("prompt-caching-2024-07-31") - return { headers: { "anthropic-beta": betas.join(",") } } - default: - return undefined + // Build the request body for logging and API call + const requestBody = { + model: modelId, + max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, + temperature, + thinking, + // Setting cache breakpoint for system prompt so new tasks can reuse it. + system: [{ text: systemPrompt, type: "text" as const, cache_control: cacheControl }], + messages: sanitizedMessages.map((message, index) => { + if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { + return { + ...message, + content: + typeof message.content === "string" + ? [ + { + type: "text" as const, + text: message.content, + cache_control: cacheControl, + }, + ] + : message.content.map((content, contentIndex) => + contentIndex === message.content.length - 1 + ? { ...content, cache_control: cacheControl } + : content, + ), } - })(), - ) + } + return message + }), + stream: true as const, + ...nativeToolParams, + } + + try { + // Determine request options (beta headers) + betas.push("prompt-caching-2024-07-31") + const requestOptions = { headers: { "anthropic-beta": betas.join(",") } } + + stream = await this.client.messages.create(requestBody, requestOptions) } catch (error) { TelemetryService.instance.captureException( new ApiProviderError( @@ -190,16 +180,19 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa break } default: { + // Build the request body for logging and API call + const requestBody = { + model: modelId, + max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, + temperature, + system: [{ text: systemPrompt, type: "text" as const }], + messages: sanitizedMessages, + stream: true as const, + ...nativeToolParams, + } + try { - stream = (await this.client.messages.create({ - model: modelId, - max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, - temperature, - system: [{ text: systemPrompt, type: "text" }], - messages: sanitizedMessages, - stream: true, - ...nativeToolParams, - })) as any + stream = (await this.client.messages.create(requestBody)) as any } catch (error) { TelemetryService.instance.captureException( new ApiProviderError( diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index 5aee7267b3b..d402cb975be 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -14,6 +14,8 @@ import { BaseProvider } from "./base-provider" import { handleOpenAIError } from "./utils/openai-error-handler" import { calculateApiCostOpenAI } from "../../shared/cost" import { getApiRequestTimeout } from "./utils/timeout-config" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" type BaseOpenAiCompatibleProviderOptions = ApiHandlerOptions & { providerName: string @@ -64,6 +66,7 @@ export abstract class BaseOpenAiCompatibleProvider apiKey: this.options.apiKey, defaultHeaders: DEFAULT_HEADERS, timeout: getApiRequestTimeout(), + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } @@ -117,6 +120,10 @@ export abstract class BaseOpenAiCompatibleProvider messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { + const { id: model, info } = this.getModel() + let lastUsage: OpenAI.CompletionUsage | undefined + const activeToolCallIds = new Set() + const stream = await this.createStream(systemPrompt, messages, metadata) const matcher = new XmlMatcher( @@ -128,16 +135,14 @@ export abstract class BaseOpenAiCompatibleProvider }) as const, ) - let lastUsage: OpenAI.CompletionUsage | undefined - const activeToolCallIds = new Set() - for await (const chunk of stream) { // Check for provider-specific error responses (e.g., MiniMax base_resp) const chunkAny = chunk as any if (chunkAny.base_resp?.status_code && chunkAny.base_resp.status_code !== 0) { - throw new Error( + const error = new Error( `${this.providerName} API Error (${chunkAny.base_resp.status_code}): ${chunkAny.base_resp.status_msg || "Unknown error"}`, ) + throw error } const delta = chunk.choices?.[0]?.delta @@ -164,6 +169,7 @@ export abstract class BaseOpenAiCompatibleProvider // Emit raw tool call chunks - NativeToolCallParser handles state management if (delta?.tool_calls) { for (const toolCall of delta.tool_calls) { + // Track tool call IDs for finishReason handling if (toolCall.id) { activeToolCallIds.add(toolCall.id) } diff --git a/src/api/providers/base-provider.ts b/src/api/providers/base-provider.ts index 64d99b3f0c1..a19857466ad 100644 --- a/src/api/providers/base-provider.ts +++ b/src/api/providers/base-provider.ts @@ -11,6 +11,14 @@ import { isMcpTool } from "../../utils/mcp-name" * Base class for API providers that implements common functionality. */ export abstract class BaseProvider implements ApiHandler { + /** + * The name of this provider, used for logging and error reporting. + */ + protected abstract readonly providerName: string + + /** + * Providers implement inference by streaming chunks from their underlying SDK/transport. + */ abstract createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index 761500750d0..fcee6ede2cb 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -45,6 +45,7 @@ import { getModelParams } from "../transform/model-params" import { shouldUseReasoningBudget } from "../../shared/api" import { normalizeToolSchema } from "../../utils/json-schema" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" /************************************************************************************ * @@ -200,7 +201,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH protected options: ProviderSettings private client: BedrockRuntimeClient private arnInfo: any - private readonly providerName = "Bedrock" + protected readonly providerName = "Bedrock" constructor(options: ProviderSettings) { super() @@ -356,6 +357,9 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH } }, ): ApiStream { + const startedAt = Date.now() + const shouldLog = ApiInferenceLogger.isEnabled() + const modelConfig = this.getModel() const usePromptCache = Boolean(this.options.awsUsePromptCache && this.supportsAwsPromptCache(modelConfig)) @@ -483,6 +487,55 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH ...(useServiceTier && { service_tier: this.options.awsBedrockServiceTier }), } + if (shouldLog) { + ApiInferenceLogger.logRaw(`[API][request][${this.providerName}][${modelConfig.id}]`, payload) + } + + const textParts: string[] = [] + const reasoningParts: string[] = [] + const toolCallsByIndex = new Map< + number, + { + index: number + id?: string + name?: string + argsParts: string[] + } + >() + + let lastUsage: + | { + inputTokens: number + outputTokens: number + cacheReadTokens: number + cacheWriteTokens: number + } + | undefined + + const debugEvents: StreamEvent[] = [] + let totalDebugEvents = 0 + const MAX_DEBUG_EVENTS = 50 + + function appendToolCallPartial(entry: { index: number; id?: string; name?: string; arguments?: string }) { + let current = toolCallsByIndex.get(entry.index) + if (!current) { + current = { index: entry.index, argsParts: [] } + toolCallsByIndex.set(entry.index, current) + } + if (!current.id && entry.id) current.id = entry.id + if (!current.name && entry.name) current.name = entry.name + if (typeof entry.arguments === "string" && entry.arguments.length > 0) + current.argsParts.push(entry.arguments) + } + + function tryParseJsonString(input: string): unknown { + try { + return JSON.parse(input) + } catch { + return input + } + } + // Create AbortController with 10 minute timeout const controller = new AbortController() let timeoutId: NodeJS.Timeout | undefined @@ -519,6 +572,11 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH continue } + totalDebugEvents++ + if (shouldLog && debugEvents.length < MAX_DEBUG_EVENTS) { + debugEvents.push(streamEvent) + } + // Handle metadata events first if (streamEvent.metadata?.usage) { const usage = (streamEvent.metadata?.usage || {}) as UsageType @@ -528,6 +586,13 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH const cacheWriteTokens = usage.cacheWriteInputTokens || usage.cacheWriteInputTokenCount || 0 // Always include all available token information + lastUsage = { + inputTokens: usage.inputTokens || 0, + outputTokens: usage.outputTokens || 0, + cacheReadTokens: cacheReadTokens, + cacheWriteTokens: cacheWriteTokens, + } + yield { type: "usage", inputTokens: usage.inputTokens || 0, @@ -592,8 +657,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH // Check if this is a reasoning block (AWS SDK structure) if (cbStart.contentBlock?.reasoningContent) { if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) { + reasoningParts.push("\n") yield { type: "reasoning", text: "\n" } } + reasoningParts.push(cbStart.contentBlock.reasoningContent.text || "") yield { type: "reasoning", text: cbStart.contentBlock.reasoningContent.text || "", @@ -605,9 +672,11 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH else if (cbStart.contentBlock?.type === "thinking" || cbStart.content_block?.type === "thinking") { const contentBlock = cbStart.contentBlock || cbStart.content_block if (cbStart.contentBlockIndex && cbStart.contentBlockIndex > 0) { + reasoningParts.push("\n") yield { type: "reasoning", text: "\n" } } if (contentBlock?.thinking) { + reasoningParts.push(contentBlock.thinking) yield { type: "reasoning", text: contentBlock.thinking, @@ -618,6 +687,12 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH else if (cbStart.start?.toolUse || cbStart.contentBlock?.toolUse) { const toolUse = cbStart.start?.toolUse || cbStart.contentBlock?.toolUse if (toolUse) { + appendToolCallPartial({ + index: cbStart.contentBlockIndex ?? 0, + id: toolUse.toolUseId, + name: toolUse.name, + arguments: undefined, + }) yield { type: "tool_call_partial", index: cbStart.contentBlockIndex ?? 0, @@ -627,6 +702,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH } } } else if (cbStart.start?.text) { + textParts.push(cbStart.start.text) yield { type: "text", text: cbStart.start.text, @@ -649,6 +725,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH if (delta) { // Check for reasoningContent property (AWS SDK structure) if (delta.reasoningContent?.text) { + reasoningParts.push(delta.reasoningContent.text) yield { type: "reasoning", text: delta.reasoningContent.text, @@ -658,6 +735,12 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH // Handle tool use input delta if (delta.toolUse?.input) { + appendToolCallPartial({ + index: cbDelta.contentBlockIndex ?? 0, + id: undefined, + name: undefined, + arguments: delta.toolUse.input, + }) yield { type: "tool_call_partial", index: cbDelta.contentBlockIndex ?? 0, @@ -670,11 +753,13 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH // Handle alternative thinking structure (fallback for older SDK versions) if (delta.type === "thinking_delta" && delta.thinking) { + reasoningParts.push(delta.thinking) yield { type: "reasoning", text: delta.thinking, } } else if (delta.text) { + textParts.push(delta.text) yield { type: "text", text: delta.text, @@ -688,12 +773,51 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH continue } } + + if (shouldLog) { + const durationMs = Date.now() - startedAt + const toolCalls = Array.from(toolCallsByIndex.values()) + .sort((a, b) => a.index - b.index) + .map((tc) => ({ + index: tc.index, + ...(tc.id ? { id: tc.id } : {}), + ...(tc.name ? { name: tc.name } : {}), + ...(tc.argsParts.length > 0 ? { arguments: tryParseJsonString(tc.argsParts.join("")) } : {}), + })) + + ApiInferenceLogger.logRaw( + `[API][response][${this.providerName}][${modelConfig.id}][${durationMs}ms][streaming]`, + { + model: modelConfig.id, + message: { + role: "assistant", + content: textParts.join(""), + ...(reasoningParts.length > 0 ? { reasoning: reasoningParts.join("") } : {}), + ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}), + }, + ...(lastUsage ? { usage: lastUsage } : {}), + __stream: { + format: "bedrock", + totalEvents: totalDebugEvents, + events: debugEvents, + }, + }, + ) + } // Clear timeout after stream completes clearTimeout(timeoutId) } catch (error: unknown) { // Clear timeout on error clearTimeout(timeoutId) + if (shouldLog) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRawError( + `[API][error][${this.providerName}][${modelConfig.id}][${durationMs}ms]`, + error, + ) + } + // Capture error in telemetry before processing const errorMessage = error instanceof Error ? error.message : String(error) const apiError = new ApiProviderError(errorMessage, this.providerName, modelConfig.id, "createMessage") @@ -748,6 +872,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH async completePrompt(prompt: string): Promise { try { + const startedAt = Date.now() const modelConfig = this.getModel() // For completePrompt, thinking is typically not used, but we should still check @@ -782,9 +907,21 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH inferenceConfig, } + if (ApiInferenceLogger.isEnabled()) { + ApiInferenceLogger.logRaw(`[API][request][${this.providerName}][${modelConfig.id}]`, payload) + } + const command = new ConverseCommand(payload) const response = await this.client.send(command) + if (ApiInferenceLogger.isEnabled()) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRaw( + `[API][response][${this.providerName}][${modelConfig.id}][${durationMs}ms]`, + response, + ) + } + if ( response?.output?.message?.content && response.output.message.content.length > 0 && @@ -808,6 +945,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH const apiError = new ApiProviderError(telemetryErrorMessage, this.providerName, model.id, "completePrompt") TelemetryService.instance.captureException(apiError) + if (ApiInferenceLogger.isEnabled()) { + ApiInferenceLogger.logRawError(`[API][error][${this.providerName}][${model.id}][0ms]`, error) + } + // Use the extracted error handling method for all errors const errorResult = this.handleBedrockError(error, false) // false for non-streaming context // Since we're in a non-streaming context, we know the result is a string diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts index 99e7c4cc3d4..8b769882b53 100644 --- a/src/api/providers/cerebras.ts +++ b/src/api/providers/cerebras.ts @@ -12,6 +12,8 @@ import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from ". import { BaseProvider } from "./base-provider" import { DEFAULT_HEADERS } from "./constants" import { t } from "../../i18n" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1" const CEREBRAS_DEFAULT_TEMPERATURE = 0 @@ -20,6 +22,7 @@ const CEREBRAS_INTEGRATION_HEADER = "X-Cerebras-3rd-Party-Integration" const CEREBRAS_INTEGRATION_NAME = "roocode" export class CerebrasHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "Cerebras" private apiKey: string private providerModels: typeof cerebrasModels private defaultProviderModelId: CerebrasModelId @@ -128,7 +131,9 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan } try { - const response = await fetch(`${CEREBRAS_BASE_URL}/chat/completions`, { + const fetchFn = ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : fetch + + const response = await fetchFn(`${CEREBRAS_BASE_URL}/chat/completions`, { method: "POST", headers: { ...DEFAULT_HEADERS, @@ -290,7 +295,9 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan } try { - const response = await fetch(`${CEREBRAS_BASE_URL}/chat/completions`, { + const fetchFn = ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : fetch + + const response = await fetchFn(`${CEREBRAS_BASE_URL}/chat/completions`, { method: "POST", headers: { ...DEFAULT_HEADERS, diff --git a/src/api/providers/claude-code.ts b/src/api/providers/claude-code.ts index cdd1cb3beb7..f09cccf71eb 100644 --- a/src/api/providers/claude-code.ts +++ b/src/api/providers/claude-code.ts @@ -20,6 +20,7 @@ import { t } from "../../i18n" import { ApiHandlerOptions } from "../../shared/api" import { countTokens } from "../../utils/countTokens" import { convertOpenAIToolsToAnthropic } from "../../core/prompts/tools/native-tools/converters" +import { BaseProvider } from "./base-provider" /** * Converts OpenAI tool_choice to Anthropic ToolChoice format @@ -64,7 +65,8 @@ function convertOpenAIToolChoice( return { type: "auto", disable_parallel_tool_use: disableParallelToolUse } } -export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { +export class ClaudeCodeHandler extends BaseProvider implements ApiHandler, SingleCompletionHandler { + protected readonly providerName = "Claude Code" private options: ApiHandlerOptions /** * Store the last thinking block signature for interleaved thinking with tool use. @@ -75,6 +77,7 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { private lastThinkingSignature?: string constructor(options: ApiHandlerOptions) { + super() this.options = options } @@ -114,7 +117,7 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { return null } - async *createMessage( + override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, @@ -182,9 +185,8 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { thinking = { type: "disabled" } } - // Create streaming request using OAuth - const stream = createStreamingMessage({ - accessToken, + // Build request params for logging + const requestParams = { model: modelId, systemPrompt, messages, @@ -195,6 +197,13 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { metadata: { user_id: userId, }, + } + + // HTTP-level request/response logging is handled by logging-fetch injected into + // the Claude Code streaming client. + const stream = createStreamingMessage({ + accessToken, + ...requestParams, }) // Track usage for cost calculation @@ -284,7 +293,7 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { } } - async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise { + override async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise { if (content.length === 0) { return 0 } diff --git a/src/api/providers/fetchers/roo.ts b/src/api/providers/fetchers/roo.ts index 65a2db77c39..21a8033bd53 100644 --- a/src/api/providers/fetchers/roo.ts +++ b/src/api/providers/fetchers/roo.ts @@ -5,6 +5,8 @@ import { parseApiPrice } from "../../../shared/cost" import { DEFAULT_HEADERS } from "../constants" import { resolveVersionedSettings, type VersionedSettings } from "./versionedSettings" +import { ApiInferenceLogger } from "../../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../../logging/logging-fetch" /** * Fetches available models from the Roo Code Cloud provider @@ -35,7 +37,8 @@ export async function getRooModels(baseUrl: string, apiKey?: string): Promise controller.abort(), 10000) try { - const response = await fetch(url, { + const fetchFn = ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: "Roo Code Cloud" }) : fetch + const response = await fetchFn(url, { headers, signal: controller.signal, }) diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts index 4402e3e0177..a97515643f5 100644 --- a/src/api/providers/gemini.ts +++ b/src/api/providers/gemini.ts @@ -27,6 +27,8 @@ import type { ApiStream, GroundingSource } from "../transform/stream" import { getModelParams } from "../transform/model-params" import { handleProviderError } from "./utils/error-handler" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" + import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { BaseProvider } from "./base-provider" @@ -40,7 +42,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl private client: GoogleGenAI private lastThoughtSignature?: string private lastResponseId?: string - private readonly providerName = "Gemini" + protected readonly providerName = "Gemini" constructor({ isVertex, ...options }: GeminiHandlerOptions) { super() @@ -77,6 +79,9 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { + const startedAt = Date.now() + const loggingEnabled = ApiInferenceLogger.isEnabled() + const { id: model, info, reasoning: thinkingConfig, maxTokens } = this.getModel() // Reset per-request metadata that we persist into apiConversationHistory. this.lastThoughtSignature = undefined @@ -201,10 +206,21 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl } const params: GenerateContentParameters = { model, contents, config } + if (loggingEnabled) { + ApiInferenceLogger.logRaw(`[API][request][${this.providerName}][${model}]`, params) + } try { const result = await this.client.models.generateContentStream(params) + // Accumulators for a coherent final response log. + const assembledParts: Array<{ + thought?: boolean + text?: string + thoughtSignature?: string + functionCall?: { name: string; args: Record } + }> = [] + let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined let pendingGroundingMetadata: GroundingMetadata | undefined let finalResponse: { responseId?: string } | undefined @@ -235,6 +251,17 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl thoughtSignature?: string functionCall?: { name: string; args: Record } }>) { + if (loggingEnabled) { + assembledParts.push({ + ...(typeof part.thought === "boolean" ? { thought: part.thought } : {}), + ...(typeof part.text === "string" ? { text: part.text } : {}), + ...(typeof part.thoughtSignature === "string" + ? { thoughtSignature: part.thoughtSignature } + : {}), + ...(part.functionCall ? { functionCall: part.functionCall } : {}), + }) + } + // Capture thought signatures so they can be persisted into API history. const thoughtSignature = part.thoughtSignature // Persist encrypted reasoning when using reasoning. Both effort-based @@ -331,7 +358,31 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl }), } } + + if (loggingEnabled) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRaw( + `[API][response][${this.providerName}][${model}][${durationMs}ms][streaming]`, + { + responseId: this.lastResponseId, + model, + candidates: [ + { + finishReason, + content: { parts: assembledParts }, + ...(pendingGroundingMetadata ? { groundingMetadata: pendingGroundingMetadata } : {}), + }, + ], + ...(lastUsageMetadata ? { usageMetadata: lastUsageMetadata } : {}), + }, + ) + } } catch (error) { + if (loggingEnabled) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRawError(`[API][error][${this.providerName}][${model}][${durationMs}ms]`, error) + } + const errorMessage = error instanceof Error ? error.message : String(error) const apiError = new ApiProviderError(errorMessage, this.providerName, model, "createMessage") TelemetryService.instance.captureException(apiError) @@ -400,6 +451,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl async completePrompt(prompt: string): Promise { const { id: model, info } = this.getModel() + const startedAt = Date.now() + const loggingEnabled = ApiInferenceLogger.isEnabled() try { const tools: GenerateContentConfig["tools"] = [] @@ -428,8 +481,15 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl contents: [{ role: "user", parts: [{ text: prompt }] }], config: promptConfig, } + if (loggingEnabled) { + ApiInferenceLogger.logRaw(`[API][request][${this.providerName}][${model}]`, request) + } const result = await this.client.models.generateContent(request) + if (loggingEnabled) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRaw(`[API][response][${this.providerName}][${model}][${durationMs}ms]`, result) + } let text = result.text ?? "" @@ -443,6 +503,11 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl return text } catch (error) { + if (loggingEnabled) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRawError(`[API][error][${this.providerName}][${model}][${durationMs}ms]`, error) + } + const errorMessage = error instanceof Error ? error.message : String(error) const apiError = new ApiProviderError(errorMessage, this.providerName, model, "completePrompt") TelemetryService.instance.captureException(apiError) diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts index 7b62046b99e..ed40a767e3f 100644 --- a/src/api/providers/huggingface.ts +++ b/src/api/providers/huggingface.ts @@ -9,12 +9,14 @@ import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" import { getHuggingFaceModels, getCachedHuggingFaceModels } from "./fetchers/huggingface" import { handleOpenAIError } from "./utils/openai-error-handler" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" export class HuggingFaceHandler extends BaseProvider implements SingleCompletionHandler { private client: OpenAI private options: ApiHandlerOptions private modelCache: ModelRecord | null = null - private readonly providerName = "HuggingFace" + protected readonly providerName = "HuggingFace" constructor(options: ApiHandlerOptions) { super() @@ -28,6 +30,7 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion baseURL: "https://router.huggingface.co/v1", apiKey: this.options.huggingFaceApiKey, defaultHeaders: DEFAULT_HEADERS, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) // Try to get cached models first diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index 102c108dcee..301ea6d4425 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -17,11 +17,13 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { getModelsFromCache } from "./fetchers/modelCache" import { getApiRequestTimeout } from "./utils/timeout-config" import { handleOpenAIError } from "./utils/openai-error-handler" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI - private readonly providerName = "LM Studio" + protected readonly providerName = "LM Studio" constructor(options: ApiHandlerOptions) { super() @@ -34,6 +36,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan baseURL: (this.options.lmStudioBaseUrl || "http://localhost:1234") + "/v1", apiKey: apiKey, timeout: getApiRequestTimeout(), + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } diff --git a/src/api/providers/minimax.ts b/src/api/providers/minimax.ts index a7cea478ed0..c888ff2b8fc 100644 --- a/src/api/providers/minimax.ts +++ b/src/api/providers/minimax.ts @@ -15,6 +15,8 @@ import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { calculateApiCostAnthropic } from "../../shared/cost" import { convertOpenAIToolsToAnthropic } from "../../core/prompts/tools/native-tools/converters" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" /** * Converts OpenAI tool_choice to Anthropic ToolChoice format @@ -51,6 +53,7 @@ function convertOpenAIToolChoice( } export class MiniMaxHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "MiniMax" private options: ApiHandlerOptions private client: Anthropic @@ -73,6 +76,7 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand this.client = new Anthropic({ baseURL, apiKey: options.minimaxApiKey, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } diff --git a/src/api/providers/mistral.ts b/src/api/providers/mistral.ts index 95739cdcf73..10b3fa1f3be 100644 --- a/src/api/providers/mistral.ts +++ b/src/api/providers/mistral.ts @@ -51,7 +51,7 @@ type MistralTool = { export class MistralHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: Mistral - private readonly providerName = "Mistral" + protected readonly providerName = "Mistral" constructor(options: ApiHandlerOptions) { super() @@ -102,9 +102,6 @@ export class MistralHandler extends BaseProvider implements SingleCompletionHand requestOptions.toolChoice = "any" } - // Temporary debug log for QA - // console.log("[MISTRAL DEBUG] Raw API request body:", requestOptions) - let response try { response = await this.client.chat.stream(requestOptions) diff --git a/src/api/providers/native-ollama.ts b/src/api/providers/native-ollama.ts index 712b70445cc..ff6d995f491 100644 --- a/src/api/providers/native-ollama.ts +++ b/src/api/providers/native-ollama.ts @@ -8,6 +8,7 @@ import type { ApiHandlerOptions } from "../../shared/api" import { getOllamaModels } from "./fetchers/ollama" import { XmlMatcher } from "../../utils/xml-matcher" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" interface OllamaChatOptions { temperature: number @@ -146,6 +147,7 @@ function convertToOllamaMessages(anthropicMessages: Anthropic.Messages.MessagePa } export class NativeOllamaHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "Ollama" protected options: ApiHandlerOptions private client: Ollama | undefined protected models: Record = {} @@ -205,6 +207,9 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { + const startedAt = Date.now() + const shouldLog = ApiInferenceLogger.isEnabled() + const client = this.ensureClient() const { id: modelId, info: modelInfo } = await this.fetchModel() const useR1Format = modelId.toLowerCase().includes("deepseek-r1") @@ -229,6 +234,19 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio supportsNativeTools && metadata?.tools && metadata.tools.length > 0 && metadata?.toolProtocol !== "xml" try { + if (shouldLog) { + ApiInferenceLogger.logRaw(`[API][request][${this.providerName}][${modelId}]`, { + model: modelId, + messages: ollamaMessages, + stream: true, + options: { + temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), + ...(this.options.ollamaNumCtx !== undefined ? { num_ctx: this.options.ollamaNumCtx } : {}), + }, + ...(useNativeTools ? { tools: this.convertToolsToOllama(metadata.tools) } : {}), + }) + } + // Build options object conditionally const chatOptions: OllamaChatOptions = { temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), @@ -253,12 +271,17 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio let totalOutputTokens = 0 // Track tool calls across chunks (Ollama may send complete tool_calls in final chunk) let toolCallIndex = 0 + const toolCalls: Array<{ id: string; name: string; arguments: unknown }> = [] + let assembledText = "" + let assembledReasoning = "" try { for await (const chunk of stream) { if (typeof chunk.message.content === "string" && chunk.message.content.length > 0) { // Process content through matcher for reasoning detection for (const matcherChunk of matcher.update(chunk.message.content)) { + if (matcherChunk.type === "text") assembledText += matcherChunk.text + if (matcherChunk.type === "reasoning") assembledReasoning += matcherChunk.text yield matcherChunk } } @@ -268,6 +291,11 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio for (const toolCall of chunk.message.tool_calls) { // Generate a unique ID for this tool call const toolCallId = `ollama-tool-${toolCallIndex}` + toolCalls.push({ + id: toolCallId, + name: toolCall.function.name, + arguments: toolCall.function.arguments, + }) yield { type: "tool_call_partial", index: toolCallIndex, @@ -292,6 +320,8 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio // Yield any remaining content from the matcher for (const chunk of matcher.final()) { + if (chunk.type === "text") assembledText += chunk.text + if (chunk.type === "reasoning") assembledReasoning += chunk.text yield chunk } @@ -303,11 +333,42 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio outputTokens: totalOutputTokens, } } + + if (shouldLog) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRaw( + `[API][response][${this.providerName}][${modelId}][${durationMs}ms][streaming]`, + { + model: modelId, + message: { + role: "assistant", + content: assembledText, + ...(assembledReasoning.length > 0 ? { reasoning: assembledReasoning } : {}), + ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}), + }, + usage: + totalInputTokens > 0 || totalOutputTokens > 0 + ? { inputTokens: totalInputTokens, outputTokens: totalOutputTokens } + : undefined, + }, + ) + } } catch (streamError: any) { + if (shouldLog) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRawError( + `[API][error][${this.providerName}][${modelId}][${durationMs}ms]`, + streamError, + ) + } console.error("Error processing Ollama stream:", streamError) throw new Error(`Ollama stream processing error: ${streamError.message || "Unknown error"}`) } } catch (error: any) { + if (shouldLog) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRawError(`[API][error][${this.providerName}][${modelId}][${durationMs}ms]`, error) + } // Enhance error reporting const statusCode = error.status || error.statusCode const errorMessage = error.message || "Unknown error" @@ -342,6 +403,9 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio async completePrompt(prompt: string): Promise { try { + const startedAt = Date.now() + const shouldLog = ApiInferenceLogger.isEnabled() + const client = this.ensureClient() const { id: modelId } = await this.fetchModel() const useR1Format = modelId.toLowerCase().includes("deepseek-r1") @@ -356,6 +420,15 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio chatOptions.num_ctx = this.options.ollamaNumCtx } + if (shouldLog) { + ApiInferenceLogger.logRaw(`[API][request][${this.providerName}][${modelId}]`, { + model: modelId, + messages: [{ role: "user", content: prompt }], + stream: false, + options: chatOptions, + }) + } + const response = await client.chat({ model: modelId, messages: [{ role: "user", content: prompt }], @@ -363,8 +436,20 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio options: chatOptions, }) + if (shouldLog) { + const durationMs = Date.now() - startedAt + ApiInferenceLogger.logRaw( + `[API][response][${this.providerName}][${modelId}][${durationMs}ms]`, + response, + ) + } + return response.message?.content || "" } catch (error) { + const modelId = this.options.ollamaModelId || this.options.apiModelId || "unknown" + if (ApiInferenceLogger.isEnabled()) { + ApiInferenceLogger.logRawError(`[API][error][${this.providerName}][${modelId}][0ms]`, error) + } if (error instanceof Error) { throw new Error(`Ollama completion error: ${error.message}`) } diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 8f9cc2297f8..6bf1e1c9bd6 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -25,13 +25,15 @@ import { getModelParams } from "../transform/model-params" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { isMcpTool } from "../../utils/mcp-name" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" export type OpenAiNativeModel = ReturnType export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI - private readonly providerName = "OpenAI Native" + protected readonly providerName = "OpenAI Native" // Resolved service tier from Responses API (actual tier used by OpenAI) private lastServiceTier: ServiceTier | undefined // Complete response output array (includes reasoning items with encrypted_content) @@ -68,7 +70,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio this.options.enableResponsesReasoningSummary = true } const apiKey = this.options.openAiNativeApiKey ?? "not-provided" - this.client = new OpenAI({ baseURL: this.options.openAiNativeBaseUrl, apiKey }) + this.client = new OpenAI({ + baseURL: this.options.openAiNativeBaseUrl, + apiKey, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, + }) } private normalizeUsage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined { @@ -176,7 +182,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio ) // Make the request (pass systemPrompt and messages for potential retry) - yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages) + for await (const chunk of this.executeRequest(requestBody, model, metadata, systemPrompt, messages)) { + yield chunk + } } private buildRequestBody( @@ -471,7 +479,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio this.abortController = new AbortController() try { - const response = await fetch(url, { + const fetchFn = ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : fetch + const response = await fetchFn(url, { method: "POST", headers: { "Content-Type": "application/json", diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 860fb76a6f0..35759f1e0d0 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -26,6 +26,8 @@ import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { getApiRequestTimeout } from "./utils/timeout-config" import { handleOpenAIError } from "./utils/openai-error-handler" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" // TODO: Rename this to OpenAICompatibleHandler. Also, I think the // `OpenAINativeHandler` can subclass from this, since it's obviously @@ -33,7 +35,7 @@ import { handleOpenAIError } from "./utils/openai-error-handler" export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions protected client: OpenAI - private readonly providerName = "OpenAI" + protected readonly providerName = "OpenAI" constructor(options: ApiHandlerOptions) { super() @@ -60,6 +62,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl defaultHeaders: headers, defaultQuery: { "api-version": this.options.azureApiVersion || "2024-05-01-preview" }, timeout, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } else if (isAzureOpenAi) { // Azure API shape slightly differs from the core API shape: @@ -70,6 +73,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl apiVersion: this.options.azureApiVersion || azureOpenAiDefaultApiVersion, defaultHeaders: headers, timeout, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } else { this.client = new OpenAI({ @@ -77,6 +81,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl apiKey, defaultHeaders: headers, timeout, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } } @@ -95,6 +100,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format const ark = modelUrl.includes(".volces.com") + let lastUsage: any + + // Handle O3 family models separately with their own logging if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) { yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata) return @@ -194,7 +202,6 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl }) as const, ) - let lastUsage const activeToolCallIds = new Set() for await (const chunk of stream) { @@ -202,8 +209,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const finishReason = chunk.choices?.[0]?.finish_reason if (delta.content) { - for (const chunk of matcher.update(delta.content)) { - yield chunk + for (const matchedChunk of matcher.update(delta.content)) { + yield matchedChunk } } @@ -214,6 +221,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } + // Use processToolCalls for proper tool_call_end events yield* this.processToolCalls(delta, finishReason, activeToolCallIds) if (chunk.usage) { @@ -221,14 +229,15 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } - for (const chunk of matcher.final()) { - yield chunk + for (const matchedChunk of matcher.final()) { + yield matchedChunk } if (lastUsage) { yield this.processUsageMetrics(lastUsage, modelInfo) } } else { + // Non-streaming path const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, messages: deepseekReasoner @@ -276,6 +285,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl text: message?.content || "", } + lastUsage = response.usage yield this.processUsageMetrics(response.usage, modelInfo) } } @@ -346,6 +356,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const modelInfo = this.getModel().info const methodIsAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) + let lastUsage: any + if (this.options.openAiStreamingEnabled ?? true) { const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl) @@ -384,7 +396,32 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl throw handleOpenAIError(error, this.providerName) } - yield* this.handleStreamResponse(stream) + const activeToolCallIds = new Set() + + for await (const chunk of stream) { + const delta = chunk.choices?.[0]?.delta + const finishReason = chunk.choices?.[0]?.finish_reason + + if (delta) { + if (delta.content) { + yield { + type: "text", + text: delta.content, + } + } + + yield* this.processToolCalls(delta, finishReason, activeToolCallIds) + } + + if (chunk.usage) { + lastUsage = chunk.usage + yield { + type: "usage", + inputTokens: chunk.usage.prompt_tokens || 0, + outputTokens: chunk.usage.completion_tokens || 0, + } + } + } } else { const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, @@ -437,35 +474,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl type: "text", text: message?.content || "", } - yield this.processUsageMetrics(response.usage) - } - } - - private async *handleStreamResponse(stream: AsyncIterable): ApiStream { - const activeToolCallIds = new Set() - - for await (const chunk of stream) { - const delta = chunk.choices?.[0]?.delta - const finishReason = chunk.choices?.[0]?.finish_reason - - if (delta) { - if (delta.content) { - yield { - type: "text", - text: delta.content, - } - } - - yield* this.processToolCalls(delta, finishReason, activeToolCallIds) - } - if (chunk.usage) { - yield { - type: "usage", - inputTokens: chunk.usage.prompt_tokens || 0, - outputTokens: chunk.usage.completion_tokens || 0, - } - } + lastUsage = response.usage + yield this.processUsageMetrics(response.usage) } } diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 4a8a0780186..42056b4f6ae 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -36,6 +36,8 @@ import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from ". import { handleOpenAIError } from "./utils/openai-error-handler" import { generateImageWithProvider, ImageGenerationResult } from "./utils/image-generation" import { applyRouterToolPreferences } from "./utils/router-tool-preferences" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" // Add custom interface for OpenRouter params. type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & { @@ -140,7 +142,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH private client: OpenAI protected models: ModelRecord = {} protected endpoints: ModelRecord = {} - private readonly providerName = "OpenRouter" + protected readonly providerName = "OpenRouter" private currentReasoningDetails: any[] = [] constructor(options: ApiHandlerOptions) { @@ -150,7 +152,12 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1" const apiKey = this.options.openRouterApiKey ?? "not-provided" - this.client = new OpenAI({ baseURL, apiKey, defaultHeaders: DEFAULT_HEADERS }) + this.client = new OpenAI({ + baseURL, + apiKey, + defaultHeaders: DEFAULT_HEADERS, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, + }) // Load models asynchronously to populate cache before getModel() is called this.loadDynamicModels().catch((error) => { @@ -346,20 +353,19 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }, ) - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } else { - // Fallback for non-OpenRouter errors - const errorMessage = error instanceof Error ? error.message : String(error) - const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage") TelemetryService.instance.captureException(apiError) throw handleOpenAIError(error, this.providerName) } + + // Fallback for non-OpenRouter errors + const errorMessage = error instanceof Error ? error.message : String(error) + const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage") + TelemetryService.instance.captureException(apiError) + throw handleOpenAIError(error, this.providerName) } let lastUsage: CompletionUsage | undefined = undefined - // Accumulator for reasoning_details FROM the API. - // We preserve the original shape of reasoning_details to prevent malformed responses. + // Accumulator for reasoning_details: accumulate text by type-index key const reasoningDetailsAccumulator = new Map< string, { @@ -497,7 +503,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } } - // After streaming completes, store ONLY the reasoning_details we received from the API. + // After streaming completes, store the accumulated reasoning_details if (reasoningDetailsAccumulator.size > 0) { this.currentReasoningDetails = Array.from(reasoningDetailsAccumulator.values()) } diff --git a/src/api/providers/qwen-code.ts b/src/api/providers/qwen-code.ts index 8f26273ebaf..b34c0b19358 100644 --- a/src/api/providers/qwen-code.ts +++ b/src/api/providers/qwen-code.ts @@ -15,6 +15,9 @@ import { ApiStream } from "../transform/stream" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" +import { safeWriteJson } from "../../utils/safeWriteJson" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" const QWEN_OAUTH_BASE_URL = "https://chat.qwen.ai" const QWEN_OAUTH_TOKEN_ENDPOINT = `${QWEN_OAUTH_BASE_URL}/api/v1/oauth2/token` @@ -52,6 +55,7 @@ function objectToUrlEncoded(data: Record): string { } export class QwenCodeHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "Qwen Code" protected options: QwenCodeHandlerOptions private credentials: QwenOAuthCredentials | null = null private client: OpenAI | undefined @@ -69,6 +73,7 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan this.client = new OpenAI({ apiKey: "dummy-key-will-be-replaced", baseURL: "https://dashscope.aliyuncs.com/compatible-mode/v1", + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } return this.client @@ -146,7 +151,7 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan const filePath = getQwenCachedCredentialPath(this.options.qwenCodeOauthPath) try { - await fs.writeFile(filePath, JSON.stringify(newCredentials, null, 2)) + await safeWriteJson(filePath, newCredentials) } catch (error) { console.error("Failed to save refreshed credentials:", error) // Continue with the refreshed token in memory even if file write fails diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index 85efeb800f1..44534ff1e3a 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -25,6 +25,8 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { toRequestyServiceUrl } from "../../shared/utils/requesty" import { handleOpenAIError } from "./utils/openai-error-handler" import { applyRouterToolPreferences } from "./utils/router-tool-preferences" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" // Requesty usage includes an extra field for Anthropic use cases. // Safely cast the prompt token details section to the appropriate structure. @@ -61,7 +63,7 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan protected models: ModelRecord = {} private client: OpenAI private baseURL: string - private readonly providerName = "Requesty" + protected readonly providerName = "Requesty" constructor(options: ApiHandlerOptions) { super() @@ -75,6 +77,7 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan baseURL: this.baseURL, apiKey: apiKey, defaultHeaders: DEFAULT_HEADERS, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } diff --git a/src/api/providers/roo.ts b/src/api/providers/roo.ts index bfd99750bfe..d0c2343a7ba 100644 --- a/src/api/providers/roo.ts +++ b/src/api/providers/roo.ts @@ -125,6 +125,10 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { + const { id: model } = this.getModel() + + // Accumulators for final response logging + try { // Reset reasoning_details accumulator for this request this.currentReasoningDetails = [] diff --git a/src/api/providers/router-provider.ts b/src/api/providers/router-provider.ts index 01942e21723..e309b906eec 100644 --- a/src/api/providers/router-provider.ts +++ b/src/api/providers/router-provider.ts @@ -8,6 +8,8 @@ import { BaseProvider } from "./base-provider" import { getModels, getModelsFromCache } from "./fetchers/modelCache" import { DEFAULT_HEADERS } from "./constants" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" type RouterProviderOptions = { name: RouterName @@ -22,6 +24,10 @@ type RouterProviderOptions = { export abstract class RouterProvider extends BaseProvider { protected readonly options: ApiHandlerOptions protected readonly name: RouterName + // Implement abstract providerName from BaseProvider using name + protected get providerName(): string { + return this.name + } protected models: ModelRecord = {} protected readonly modelId?: string protected readonly defaultModelId: string @@ -52,6 +58,7 @@ export abstract class RouterProvider extends BaseProvider { ...DEFAULT_HEADERS, ...(options.openAiHeaders || {}), }, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts index 5c598ccd012..f160ce58ccf 100644 --- a/src/api/providers/vscode-lm.ts +++ b/src/api/providers/vscode-lm.ts @@ -10,6 +10,7 @@ import { normalizeToolSchema } from "../../utils/json-schema" import { ApiStream } from "../transform/stream" import { convertToVsCodeLmMessages, extractTextCountFromMessage } from "../transform/vscode-lm-format" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" @@ -61,6 +62,7 @@ function convertToVsCodeLmTools(tools: OpenAI.Chat.ChatCompletionTool[]): vscode * ``` */ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "VS Code LM" protected options: ApiHandlerOptions private client: vscode.LanguageModelChat | null private disposable: vscode.Disposable | null @@ -372,6 +374,21 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan ...convertToVsCodeLmMessages(cleanedMessages), ] + // Inference logging (VS Code LM is not HTTP-based, so we log at provider level) + const modelId = this.getModel().id + const startedAt = Date.now() + if (ApiInferenceLogger.isEnabled()) { + ApiInferenceLogger.logRaw(`[API][request][${this.providerName}][${modelId}]`, { + model: modelId, + system: systemPrompt, + messages: cleanedMessages, + toolProtocol: metadata?.toolProtocol, + tool_choice: metadata?.tool_choice, + parallelToolCalls: metadata?.parallelToolCalls, + hasTools: Boolean(metadata?.tools && metadata.tools.length > 0), + }) + } + // Initialize cancellation token for the request this.currentRequestCancellation = new vscode.CancellationTokenSource() @@ -481,6 +498,16 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan // Count tokens in the accumulated text after stream completion const totalOutputTokens: number = await this.internalCountTokens(accumulatedText) + if (ApiInferenceLogger.isEnabled()) { + ApiInferenceLogger.logRaw( + `[API][response][${this.providerName}][${modelId}][${Date.now() - startedAt}ms][streaming]`, + { + text: accumulatedText, + usage: { inputTokens: totalInputTokens, outputTokens: totalOutputTokens }, + }, + ) + } + // Report final usage after stream completion yield { type: "usage", @@ -490,6 +517,13 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan } catch (error: unknown) { this.ensureCleanState() + if (ApiInferenceLogger.isEnabled()) { + ApiInferenceLogger.logRawError( + `[API][error][${this.providerName}][${modelId}][${Date.now() - startedAt}ms]`, + error, + ) + } + if (error instanceof vscode.CancellationError) { throw new Error("Roo Code : Request cancelled by user") } diff --git a/src/api/providers/xai.ts b/src/api/providers/xai.ts index a1377a1317a..7b44fbc1c17 100644 --- a/src/api/providers/xai.ts +++ b/src/api/providers/xai.ts @@ -15,13 +15,15 @@ import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { handleOpenAIError } from "./utils/openai-error-handler" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" +import { createLoggingFetch } from "../logging/logging-fetch" const XAI_DEFAULT_TEMPERATURE = 0 export class XAIHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI - private readonly providerName = "xAI" + protected readonly providerName = "xAI" constructor(options: ApiHandlerOptions) { super() @@ -33,6 +35,7 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler baseURL: "https://api.x.ai/v1", apiKey: apiKey, defaultHeaders: DEFAULT_HEADERS, + fetch: ApiInferenceLogger.isEnabled() ? createLoggingFetch({ provider: this.providerName }) : undefined, }) } diff --git a/src/core/condense/__tests__/condense.spec.ts b/src/core/condense/__tests__/condense.spec.ts index bea7d50ac17..1899961ceca 100644 --- a/src/core/condense/__tests__/condense.spec.ts +++ b/src/core/condense/__tests__/condense.spec.ts @@ -15,6 +15,7 @@ import { // Create a mock ApiHandler for testing class MockApiHandler extends BaseProvider { + protected readonly providerName = "Mock" createMessage(): any { // Mock implementation for testing - returns an async iterable stream const mockStream = { diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts index 3ee36fc5956..4dfec368b44 100644 --- a/src/core/context-management/__tests__/context-management.spec.ts +++ b/src/core/context-management/__tests__/context-management.spec.ts @@ -19,6 +19,7 @@ import { // Create a mock ApiHandler for testing class MockApiHandler extends BaseProvider { + protected readonly providerName = "Mock" createMessage(): any { // Mock implementation for testing - returns an async iterable stream const mockStream = { diff --git a/src/extension.ts b/src/extension.ts index dcb941fa581..dac9a0f434d 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -1,15 +1,27 @@ import * as vscode from "vscode" import * as dotenvx from "@dotenvx/dotenvx" import * as path from "path" +import * as fs from "fs" -// Load environment variables from .env file +// Load environment variables from local development files if present. +// In packaged production builds these files typically won't exist. try { - // Specify path to .env file in the project root directory + // Specify paths to .env and .env.local files in the project root directory const envPath = path.join(__dirname, "..", ".env") - dotenvx.config({ path: envPath }) + const envLocalPath = path.join(__dirname, "..", ".env.local") + // Load .env first, then .env.local (so .env.local can override) + if (fs.existsSync(envPath)) { + dotenvx.config({ path: envPath }) + } + if (fs.existsSync(envLocalPath)) { + dotenvx.config({ path: envLocalPath, override: true }) + } } catch (e) { - // Silently handle environment loading errors - console.warn("Failed to load environment variables:", e) + // Don't let dotenv errors break extension activation. + // Avoid noisy logs for normal users. + if (process.env.NODE_ENV === "development") { + console.warn("Failed to load environment variables:", e) + } } import type { CloudUserInfo, AuthState } from "@roo-code/types" @@ -19,6 +31,7 @@ import { customToolRegistry } from "@roo-code/core" import "./utils/path" // Necessary to have access to String.prototype.toPosix. import { createOutputChannelLogger, createDualLogger } from "./utils/outputChannelLogger" +import { ApiInferenceLogger } from "./api/logging/ApiInferenceLogger" import { Package } from "./shared/package" import { formatLanguage } from "./shared/language" @@ -68,6 +81,13 @@ export async function activate(context: vscode.ExtensionContext) { context.subscriptions.push(outputChannel) outputChannel.appendLine(`${Package.name} extension activated - ${JSON.stringify(Package)}`) + // Configure API inference logger for debugging (enable via ROO_CODE_API_LOGGING=true) + ApiInferenceLogger.configure({ + enabled: process.env.ROO_CODE_API_LOGGING === "true", + // Log only to the Extension Host Debug Console. + sink: (...args: unknown[]) => console.log(...args), + }) + // Set extension path for custom tool registry to find bundled esbuild customToolRegistry.setExtensionPath(context.extensionPath) diff --git a/src/integrations/claude-code/__tests__/streaming-client.spec.ts b/src/integrations/claude-code/__tests__/streaming-client.spec.ts index 8ccb108827d..23f4e2a2834 100644 --- a/src/integrations/claude-code/__tests__/streaming-client.spec.ts +++ b/src/integrations/claude-code/__tests__/streaming-client.spec.ts @@ -1,4 +1,5 @@ import { CLAUDE_CODE_API_CONFIG } from "../streaming-client" +import { ApiInferenceLogger } from "../../../api/logging/ApiInferenceLogger" describe("Claude Code Streaming Client", () => { describe("CLAUDE_CODE_API_CONFIG", () => { @@ -27,12 +28,48 @@ describe("Claude Code Streaming Client", () => { beforeEach(() => { originalFetch = global.fetch + // Ensure we start from disabled state per test + ApiInferenceLogger.configure({ enabled: false, sink: () => {} }) }) afterEach(() => { global.fetch = originalFetch }) + test("should wrap fetch with logging when ApiInferenceLogger is enabled", async () => { + const sink = vi.fn() + ApiInferenceLogger.configure({ enabled: true, sink }) + + const emptyStream = new ReadableStream({ + start(controller) { + controller.close() + }, + }) + + const mockFetch = vi + .fn() + .mockResolvedValue(new Response(emptyStream, { headers: { "content-type": "text/event-stream" } })) + global.fetch = mockFetch + + const { createStreamingMessage } = await import("../streaming-client") + + const stream = createStreamingMessage({ + accessToken: "test-token", + model: "claude-3-5-sonnet-20241022", + systemPrompt: "You are helpful", + messages: [{ role: "user", content: "Hello" }], + }) + + for await (const _ of stream) { + // drain + } + + expect(sink).toHaveBeenCalledWith( + expect.stringMatching(/^\[API\]\[request\]\[Claude Code\]\[.+\]$/), + expect.anything(), + ) + }) + test("should make request with correct headers", async () => { const mockFetch = vi.fn().mockResolvedValue({ ok: true, diff --git a/src/integrations/claude-code/streaming-client.ts b/src/integrations/claude-code/streaming-client.ts index b864995f2cd..75ff13180c9 100644 --- a/src/integrations/claude-code/streaming-client.ts +++ b/src/integrations/claude-code/streaming-client.ts @@ -1,6 +1,8 @@ import type { Anthropic } from "@anthropic-ai/sdk" import type { ClaudeCodeRateLimitInfo } from "@roo-code/types" import { Package } from "../../shared/package" +import { ApiInferenceLogger } from "../../api/logging/ApiInferenceLogger" +import { createLoggingFetch } from "../../api/logging/logging-fetch" /** * Set of content block types that are valid for Anthropic API. @@ -370,6 +372,10 @@ export async function* createStreamingMessage(options: StreamMessageOptions): As const { accessToken, model, systemPrompt, messages, maxTokens, thinking, tools, toolChoice, metadata, signal } = options + const fetchFn: typeof fetch = ApiInferenceLogger.isEnabled() + ? createLoggingFetch({ provider: "Claude Code" }, fetch) + : fetch + // Filter out non-Anthropic blocks before processing const sanitizedMessages = filterNonAnthropicBlocks(messages) @@ -430,7 +436,7 @@ export async function* createStreamingMessage(options: StreamMessageOptions): As } // Make the request - const response = await fetch(`${CLAUDE_CODE_API_CONFIG.endpoint}?beta=true`, { + const response = await fetchFn(`${CLAUDE_CODE_API_CONFIG.endpoint}?beta=true`, { method: "POST", headers, body: JSON.stringify(body), @@ -713,6 +719,10 @@ function parseRateLimitHeaders(headers: Headers): ClaudeCodeRateLimitInfo { * Uses a small request to get the response headers containing rate limit data */ export async function fetchRateLimitInfo(accessToken: string): Promise { + const fetchFn: typeof fetch = ApiInferenceLogger.isEnabled() + ? createLoggingFetch({ provider: "Claude Code" }, fetch) + : fetch + // Build minimal request body - use haiku for speed and lowest cost const body = { model: "claude-haiku-4-5", @@ -731,7 +741,7 @@ export async function fetchRateLimitInfo(accessToken: string): Promise