From 16549fe4463036194e31be0877114b87586d0f00 Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Thu, 29 Jan 2026 21:16:18 +0100 Subject: [PATCH 01/17] Added POC --- .gitignore | 3 + .../opencode/python/voice/requirements.txt | 6 + packages/opencode/python/voice/test_voice.py | 34 +++ .../opencode/python/voice/voice_server.py | 197 +++++++++++++ packages/opencode/src/bus/index.ts | 21 +- .../cli/cmd/tui/component/dialog-status.tsx | 21 ++ .../cli/cmd/tui/component/prompt/index.tsx | 88 ++++++ .../opencode/src/cli/cmd/tui/context/sync.tsx | 25 ++ .../opencode/src/cli/cmd/tui/routes/home.tsx | 15 + .../src/cli/cmd/tui/routes/session/footer.tsx | 13 + packages/opencode/src/cli/cmd/tui/thread.ts | 2 + .../src/cli/cmd/tui/util/voice-recorder.ts | 116 ++++++++ packages/opencode/src/cli/cmd/tui/worker.ts | 20 +- packages/opencode/src/config/config.ts | 1 + packages/opencode/src/server/routes/voice.ts | 265 ++++++++++++++++++ packages/opencode/src/server/server.ts | 9 + packages/opencode/src/voice/audio-buffer.ts | 62 ++++ packages/opencode/src/voice/event.ts | 13 + .../opencode/src/voice/parakeet-engine.ts | 221 +++++++++++++++ packages/opencode/src/voice/service.ts | 89 ++++++ packages/sdk/js/src/v2/gen/sdk.gen.ts | 85 ++++++ packages/sdk/js/src/v2/gen/types.gen.ts | 95 +++++++ 22 files changed, 1384 insertions(+), 17 deletions(-) create mode 100644 packages/opencode/python/voice/requirements.txt create mode 100644 packages/opencode/python/voice/test_voice.py create mode 100644 packages/opencode/python/voice/voice_server.py create mode 100644 packages/opencode/src/cli/cmd/tui/util/voice-recorder.ts create mode 100644 packages/opencode/src/server/routes/voice.ts create mode 100644 packages/opencode/src/voice/audio-buffer.ts create mode 100644 packages/opencode/src/voice/event.ts create mode 100644 packages/opencode/src/voice/parakeet-engine.ts create mode 100644 packages/opencode/src/voice/service.ts diff --git a/.gitignore b/.gitignore index 78a77f819828..27e845334d6a 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,9 @@ a.out target .scripts .direnv/ +packages/opencode/python/*/venv/ +packages/opencode/python/*/__pycache__/ +packages/opencode/python/*/*.pyc # Local dev files opencode-dev diff --git a/packages/opencode/python/voice/requirements.txt b/packages/opencode/python/voice/requirements.txt new file mode 100644 index 000000000000..4b452bf4d520 --- /dev/null +++ b/packages/opencode/python/voice/requirements.txt @@ -0,0 +1,6 @@ +# NeMo toolkit for ASR +nemo_toolkit[asr]>=2.4.0 + +# Core dependencies (usually installed with nemo_toolkit) +torch>=2.0.0 +torchaudio>=2.0.0 diff --git a/packages/opencode/python/voice/test_voice.py b/packages/opencode/python/voice/test_voice.py new file mode 100644 index 000000000000..fa5a14c1058e --- /dev/null +++ b/packages/opencode/python/voice/test_voice.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +"""Test script to verify Parakeet transcription works""" +import sys +import os + +# Suppress NeMo logging +os.environ['NEMO_LOG_LEVEL'] = 'ERROR' + +print("Testing Parakeet transcription...", file=sys.stderr) + +try: + import nemo.collections.asr as nemo_asr + import torch + print(f"✓ Imports successful", file=sys.stderr) + print(f"✓ CUDA available: {torch.cuda.is_available()}", file=sys.stderr) + + # Redirect NeMo logs to stderr + old_stdout = sys.stdout + sys.stdout = sys.stderr + + print("Loading model...", file=sys.stderr) + model = nemo_asr.models.ASRModel.from_pretrained("nvidia/parakeet-tdt-0.6b-v3") + model = model.cpu() + model.eval() + + # Restore stdout + sys.stdout = old_stdout + + print("✓ Model loaded successfully!", file=sys.stderr) + print("✓ Transcription service is ready to use", file=sys.stderr) + +except Exception as e: + print(f"✗ Error: {e}", file=sys.stderr) + sys.exit(1) diff --git a/packages/opencode/python/voice/voice_server.py b/packages/opencode/python/voice/voice_server.py new file mode 100644 index 000000000000..e170822f175b --- /dev/null +++ b/packages/opencode/python/voice/voice_server.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +""" +Parakeet TDT v3 Transcription Server +Reads audio chunks from stdin, outputs transcriptions to stdout +Keeps model loaded in memory for fast inference +""" + +import sys +import json +import base64 +import io +import tempfile +import os +import logging +import warnings +from pathlib import Path + +# Suppress all warnings +warnings.filterwarnings('ignore') + +# Suppress NeMo and other library logging +logging.getLogger('nemo_logger').setLevel(logging.CRITICAL) +logging.getLogger('nemo').setLevel(logging.CRITICAL) +logging.getLogger('lightning').setLevel(logging.CRITICAL) +logging.getLogger('pytorch_lightning').setLevel(logging.CRITICAL) +logging.getLogger('torch').setLevel(logging.CRITICAL) +logging.basicConfig(level=logging.CRITICAL) +os.environ['NEMO_LOG_LEVEL'] = 'CRITICAL' +os.environ['HYDRA_FULL_ERROR'] = '0' +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + +try: + import nemo.collections.asr as nemo_asr + import torch +except ImportError as e: + print(json.dumps({"error": f"Failed to import dependencies: {e}"}), file=sys.stderr, flush=True) + sys.exit(1) + + +class TranscriptionServer: + def __init__(self, model_name="nvidia/parakeet-tdt-0.6b-v3", device=None): + self.model_name = model_name + self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") + self.model = None + + def initialize(self): + try: + print(json.dumps({"status": "loading", "model": self.model_name, "device": self.device}), flush=True) + + # Redirect stdout at file descriptor level to suppress NeMo logs + stdout_fd = sys.stdout.fileno() + saved_stdout_fd = os.dup(stdout_fd) + devnull_fd = os.open(os.devnull, os.O_WRONLY) + + try: + # Redirect FD 1 (stdout) to /dev/null + os.dup2(devnull_fd, stdout_fd) + + self.model = nemo_asr.models.ASRModel.from_pretrained( + model_name=self.model_name + ) + + if self.device == "cuda" and torch.cuda.is_available(): + self.model = self.model.cuda() + else: + self.model = self.model.cpu() + + self.model.eval() + finally: + # Restore stdout file descriptor + os.dup2(saved_stdout_fd, stdout_fd) + os.close(saved_stdout_fd) + os.close(devnull_fd) + + print(json.dumps({"status": "ready", "device": self.device}), flush=True) + return True + + except Exception as e: + print(json.dumps({"status": "error", "message": str(e)}), flush=True) + return False + + def transcribe_audio(self, audio_data, timestamps=False): + try: + # Create temporary file for audio data + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: + tmp.write(audio_data) + tmp_path = tmp.name + + try: + # Call transcribe with minimal parameters + output = self.model.transcribe([tmp_path], batch_size=1, verbose=False) + + # Extract text - NeMo models typically return a list of strings + text = "" + if output: + if isinstance(output, list) and len(output) > 0: + # If it's a list of strings + if isinstance(output[0], str): + text = output[0] + # If it's a list of objects with text attribute + elif hasattr(output[0], 'text'): + text = str(output[0].text) + else: + # Try to convert to string + text = str(output[0]) + elif isinstance(output, str): + text = output + + result = { + "text": text, + } + + if timestamps and output and len(output) > 0 and hasattr(output[0], 'timestamp'): + try: + result["timestamps"] = { + "word": output[0].timestamp.get('word', []), + "segment": output[0].timestamp.get('segment', []), + } + except: + pass + + return result + finally: + # Clean up temp file + try: + os.unlink(tmp_path) + except: + pass + + except Exception as e: + return {"error": str(e)} + + def run(self): + if not self.initialize(): + return 1 + + # Main processing loop + for line in sys.stdin: + try: + line = line.strip() + if not line: + continue + + request = json.loads(line) + command = request.get("command") + + if command == "transcribe": + # Decode base64 audio + audio_base64 = request.get("audio") + if not audio_base64: + print(json.dumps({"error": "No audio data provided"}), flush=True) + continue + + audio_data = base64.b64decode(audio_base64) + timestamps = request.get("timestamps", False) + + # Transcribe + result = self.transcribe_audio(audio_data, timestamps) + + # Ensure result is JSON serializable + safe_result = { + "text": str(result.get("text", "")) if result.get("text") is not None else "", + } + if "timestamps" in result: + safe_result["timestamps"] = result["timestamps"] + + print(json.dumps(safe_result), flush=True) + + elif command == "ping": + print(json.dumps({"status": "alive"}), flush=True) + + elif command == "shutdown": + print(json.dumps({"status": "shutting_down"}), flush=True) + break + + else: + print(json.dumps({"error": f"Unknown command: {command}"}), flush=True) + + except json.JSONDecodeError as e: + print(json.dumps({"error": f"Invalid JSON: {e}"}), flush=True) + except Exception as e: + print(json.dumps({"error": f"Processing error: {e}"}), flush=True) + + return 0 + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Parakeet Transcription Server") + parser.add_argument("--model", default="nvidia/parakeet-tdt-0.6b-v3", help="Model name") + parser.add_argument("--device", choices=["cuda", "cpu"], help="Device to use") + + args = parser.parse_args() + + server = TranscriptionServer(model_name=args.model, device=args.device) + sys.exit(server.run()) diff --git a/packages/opencode/src/bus/index.ts b/packages/opencode/src/bus/index.ts index edb093f19747..a4a9055380df 100644 --- a/packages/opencode/src/bus/index.ts +++ b/packages/opencode/src/bus/index.ts @@ -50,14 +50,25 @@ export namespace Bus { type: def.type, }) const pending = [] - for (const key of [def.type, "*"]) { - const match = state().subscriptions.get(key) - for (const sub of match ?? []) { - pending.push(sub(payload)) + try { + for (const key of [def.type, "*"]) { + const match = state().subscriptions.get(key) + for (const sub of match ?? []) { + pending.push(sub(payload)) + } } + } catch (e) { + // No instance context - skip instance-scoped subscriptions } + const directory = (() => { + try { + return Instance.directory + } catch (e) { + return "" + } + })() GlobalBus.emit("event", { - directory: Instance.directory, + directory, payload, }) return Promise.all(pending) diff --git a/packages/opencode/src/cli/cmd/tui/component/dialog-status.tsx b/packages/opencode/src/cli/cmd/tui/component/dialog-status.tsx index c08fc99b6e34..57f7161567d1 100644 --- a/packages/opencode/src/cli/cmd/tui/component/dialog-status.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/dialog-status.tsx @@ -89,6 +89,27 @@ export function DialogStatus() { + + + Voice Input + + + • + + + Voice-to-text{" "} + + {sync.data.voice?.available ? "Ready (press \\ to record)" : "Loading model..."} + + + + + {sync.data.lsp.length > 0 && ( {sync.data.lsp.length} LSP Servers diff --git a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx index e19c8b709824..097232d4a79b 100644 --- a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx @@ -31,6 +31,7 @@ import { DialogAlert } from "../../ui/dialog-alert" import { useToast } from "../../ui/toast" import { useKV } from "../../context/kv" import { useTextareaKeybindings } from "../textarea-keybindings" +import { VoiceRecorder, type VoiceRecorderStatus } from "@tui/util/voice-recorder" export type PromptProps = { sessionID?: string @@ -130,6 +131,18 @@ export function Prompt(props: PromptProps) { interrupt: 0, }) + // Voice recording state + const [voiceStatus, setVoiceStatus] = createSignal("idle") + let voiceRecorder: VoiceRecorder | null = null + + onMount(() => { + voiceRecorder = new VoiceRecorder(sdk.client) + }) + + onCleanup(() => { + voiceRecorder?.cancel() + }) + // Initialize agent/model/variant from last user message when session changes let syncedSessionID: string | undefined createEffect(() => { @@ -788,6 +801,66 @@ export function Prompt(props: PromptProps) { e.preventDefault() return } + + // Handle voice input - toggle recording on backslash + if (e.name === "\\") { + // Prevent default to avoid inserting backslash character + e.preventDefault() + + // Only allow voice input if service is available + if (!sync.data.voice?.available) { + return + } + + if (voiceStatus() === "idle" && voiceRecorder) { + // Start recording + try { + await voiceRecorder.startRecording() + setVoiceStatus(voiceRecorder.status) + } catch (err) { + toast.show({ + variant: "error", + message: `Failed to start recording: ${err instanceof Error ? err.message : String(err)}`, + duration: 3000, + }) + } + } else if (voiceStatus() === "recording" && voiceRecorder) { + // Stop recording and transcribe + setVoiceStatus("transcribing") + + try { + const text = await voiceRecorder.stopRecordingAndTranscribe() + setVoiceStatus(voiceRecorder.status) + + if (text) { + // Insert transcribed text at cursor position + input.insertText(text) + setTimeout(() => { + input.getLayoutNode().markDirty() + renderer.requestRender() + }, 0) + } else { + toast.show({ + variant: "warning", + message: "No speech detected", + duration: 3000, + }) + } + } catch (err) { + setVoiceStatus("error") + toast.show({ + variant: "error", + message: `Transcription failed: ${err instanceof Error ? err.message : String(err)}`, + duration: 5000, + }) + // Reset status after error + setTimeout(() => setVoiceStatus("idle"), 100) + } + } + + return + } + // Handle clipboard paste (Ctrl+V) - check for images first on Windows // This is needed because Windows terminal doesn't properly send image data // through bracketed paste, so we need to intercept the keypress and @@ -1075,6 +1148,21 @@ export function Prompt(props: PromptProps) { + + + Recording... (\ to stop) + + + + + Transcribing... + + + + + \ voice + + 0}> {keybind.print("variant_cycle")} variants diff --git a/packages/opencode/src/cli/cmd/tui/context/sync.tsx b/packages/opencode/src/cli/cmd/tui/context/sync.tsx index eb8ed2d9bbad..640e0e98ba83 100644 --- a/packages/opencode/src/cli/cmd/tui/context/sync.tsx +++ b/packages/opencode/src/cli/cmd/tui/context/sync.tsx @@ -17,6 +17,7 @@ import type { ProviderListResponse, ProviderAuthMethod, VcsInfo, + VoiceStatusResponse, } from "@opencode-ai/sdk/v2" import { createStore, produce, reconcile } from "solid-js/store" import { useSDK } from "@tui/context/sdk" @@ -71,6 +72,7 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ [key: string]: McpResource } formatter: FormatterStatus[] + voice: VoiceStatusResponse | undefined vcs: VcsInfo | undefined path: Path }>({ @@ -98,6 +100,7 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ mcp: {}, mcp_resource: {}, formatter: [], + voice: undefined, vcs: undefined, path: { state: "", config: "", worktree: "", directory: "" }, }) @@ -318,6 +321,20 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ break } + case "voice.updated": { + sdk.client.voice + .status() + .then((x) => { + if (x.data) { + setStore("voice", reconcile(x.data)) + } + }) + .catch((e) => { + Log.Default.error("failed to fetch voice status", { error: e }) + }) + break + } + case "vcs.branch.updated": { setStore("vcs", { branch: event.properties.branch }) break @@ -389,6 +406,14 @@ export const { use: useSync, provider: SyncProvider } = createSimpleContext({ sdk.client.mcp.status().then((x) => setStore("mcp", reconcile(x.data!))), sdk.client.experimental.resource.list().then((x) => setStore("mcp_resource", reconcile(x.data ?? {}))), sdk.client.formatter.status().then((x) => setStore("formatter", reconcile(x.data!))), + sdk.client.voice + .status() + .then((x) => { + if (x.data) setStore("voice", reconcile(x.data)) + }) + .catch(() => { + // Voice service might not be available - that's okay + }), sdk.client.session.status().then((x) => { setStore("session_status", reconcile(x.data!)) }), diff --git a/packages/opencode/src/cli/cmd/tui/routes/home.tsx b/packages/opencode/src/cli/cmd/tui/routes/home.tsx index 59923c69d94c..94be7856e2b6 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/home.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/home.tsx @@ -127,6 +127,21 @@ export function Home() { {connectedMcpCount()} MCP + + + + + + + + + + + + Voice + + + /status diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx index 8ace2fff3725..3408e7459934 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx @@ -82,6 +82,19 @@ export function Footer() { {mcp()} MCP + + + + + + + + + + + Voice + + /status diff --git a/packages/opencode/src/cli/cmd/tui/thread.ts b/packages/opencode/src/cli/cmd/tui/thread.ts index 05714268545b..e7f245cae4bd 100644 --- a/packages/opencode/src/cli/cmd/tui/thread.ts +++ b/packages/opencode/src/cli/cmd/tui/thread.ts @@ -133,6 +133,8 @@ export const TuiThreadCommand = cmd({ // Start HTTP server for external access const server = await client.call("server", networkOpts) url = server.url + customFetch = createWorkerFetch(client) + events = createEventSource(client) } else { // Use direct RPC communication (no HTTP) url = "http://opencode.internal" diff --git a/packages/opencode/src/cli/cmd/tui/util/voice-recorder.ts b/packages/opencode/src/cli/cmd/tui/util/voice-recorder.ts new file mode 100644 index 000000000000..c3be7f9c06aa --- /dev/null +++ b/packages/opencode/src/cli/cmd/tui/util/voice-recorder.ts @@ -0,0 +1,116 @@ +import { spawn, type ChildProcess } from "child_process" +import { tmpdir } from "os" +import { join } from "path" +import { unlinkSync } from "fs" +import type { OpencodeClient } from "@opencode-ai/sdk/v2" + +export type VoiceRecorderStatus = "idle" | "recording" | "transcribing" | "error" + +export class VoiceRecorder { + private process: ChildProcess | null = null + private tempFile: string | null = null + private client: OpencodeClient + status: VoiceRecorderStatus = "idle" + + constructor(client: OpencodeClient) { + this.client = client + } + + async startRecording(): Promise { + if (this.status !== "idle") { + throw new Error("Already recording or transcribing") + } + + this.tempFile = join(tmpdir(), `voice-input-${Date.now()}.wav`) + this.status = "recording" + + // Start sox recording process - will continue until stopped + this.process = spawn("sox", [ + "-d", // default input device + "-r", + "16000", // sample rate + "-c", + "1", // mono + "-b", + "16", // 16-bit + this.tempFile, + ]) + + this.process.on("error", (err) => { + this.status = "error" + console.error("Recording error:", err) + }) + } + + async stopRecordingAndTranscribe(): Promise { + if (this.status !== "recording" || !this.process || !this.tempFile) { + throw new Error("Not currently recording") + } + + const tempFile = this.tempFile + + // Stop recording by killing sox + this.process.kill("SIGTERM") + this.process = null + + // Wait a moment for file to be flushed + await new Promise((resolve) => setTimeout(resolve, 100)) + + this.status = "transcribing" + + try { + // Read audio file + const audioFile = Bun.file(tempFile) + const audioBuffer = await audioFile.arrayBuffer() + + if (audioBuffer.byteLength === 0) { + throw new Error("Audio file is empty - no audio was recorded") + } + + const audioBase64 = Buffer.from(audioBuffer).toString("base64") + + // Transcribe using SDK client + const result = await this.client.voice.transcribe({ + audio: audioBase64, + timestamps: false, + }) + + if (!result.data) { + throw new Error("Transcription failed: No data returned") + } + + const text = result.data.text?.trim() ?? "" + + // Clean up temp file + try { + unlinkSync(tempFile) + } catch { + // Ignore cleanup errors + } + + this.status = "idle" + this.tempFile = null + return text + } catch (err) { + this.status = "error" + // Don't delete file on error for debugging + throw err + } + } + + cancel(): void { + if (this.process) { + this.process.kill("SIGTERM") + this.process = null + } + if (this.tempFile) { + try { + unlinkSync(this.tempFile) + } catch { + // Ignore + } + this.tempFile = null + } + this.status = "idle" + } +} diff --git a/packages/opencode/src/cli/cmd/tui/worker.ts b/packages/opencode/src/cli/cmd/tui/worker.ts index e63f10ba80c9..0b468c22c47a 100644 --- a/packages/opencode/src/cli/cmd/tui/worker.ts +++ b/packages/opencode/src/cli/cmd/tui/worker.ts @@ -10,6 +10,7 @@ import { GlobalBus } from "@/bus/global" import { createOpencodeClient, type Event } from "@opencode-ai/sdk/v2" import type { BunWebSocketData } from "hono/bun" import { Flag } from "@/flag/flag" +import { VoiceService } from "@/voice/service" await Log.init({ print: process.argv.includes("--print-logs"), @@ -32,9 +33,11 @@ process.on("uncaughtException", (e) => { }) }) -// Subscribe to global events and forward them via RPC -GlobalBus.on("event", (event) => { - Rpc.emit("global.event", event) +// Initialize transcription service (non-blocking) +VoiceService.initialize().catch((error) => { + Log.Default.warn("voice service initialization failed", { + error: error instanceof Error ? error.message : String(error), + }) }) let server: Bun.Server | undefined @@ -65,14 +68,7 @@ const startEventStream = (directory: string) => { ;(async () => { while (!signal.aborted) { - const events = await Promise.resolve( - sdk.event.subscribe( - {}, - { - signal, - }, - ), - ).catch(() => undefined) + const events = await Promise.resolve(sdk.global.event({ parseAs: "stream" })).catch(() => undefined) if (!events) { await Bun.sleep(250) @@ -80,7 +76,7 @@ const startEventStream = (directory: string) => { } for await (const event of events.stream) { - Rpc.emit("event", event as Event) + Rpc.emit("event", event.payload as Event) } if (!signal.aborted) { diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index 020e626cba89..771c8f34149a 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -705,6 +705,7 @@ export namespace Config { variant_cycle: z.string().optional().default("ctrl+t").describe("Cycle model variants"), input_clear: z.string().optional().default("ctrl+c").describe("Clear input field"), input_paste: z.string().optional().default("ctrl+v").describe("Paste from clipboard"), + voice_input: z.string().optional().default("\\").describe("Voice input (tap to record, tap to stop)"), input_submit: z.string().optional().default("return").describe("Submit input"), input_newline: z .string() diff --git a/packages/opencode/src/server/routes/voice.ts b/packages/opencode/src/server/routes/voice.ts new file mode 100644 index 000000000000..26a52582a0d8 --- /dev/null +++ b/packages/opencode/src/server/routes/voice.ts @@ -0,0 +1,265 @@ +import { Hono } from "hono" +import { describeRoute, validator, resolver } from "hono-openapi" +import { upgradeWebSocket } from "hono/bun" +import z from "zod" +import { VoiceService } from "../../voice/service" +import { AudioBuffer } from "../../voice/audio-buffer" +import { errors } from "../error" +import { lazy } from "../../util/lazy" + +export const VoiceRoutes = lazy(() => + new Hono() + .get( + "/status", + describeRoute({ + summary: "Get voice service status", + description: "Check if the voice service is available and ready", + operationId: "voice.status", + responses: { + 200: { + description: "Service status", + content: { + "application/json": { + schema: resolver( + z.object({ + available: z.boolean(), + config: z.object({ + enabled: z.boolean(), + model: z.string(), + device: z.enum(["cuda", "cpu", "auto"]), + maxDuration: z.number(), + chunkDuration: z.number(), + }), + }), + ), + }, + }, + }, + }, + }), + async (c) => { + return c.json({ + available: VoiceService.isAvailable(), + config: VoiceService.getConfig(), + }) + }, + ) + .post( + "/transcribe", + describeRoute({ + summary: "Transcribe audio file", + description: "Submit a base64-encoded audio file for transcription", + operationId: "voice.transcribe", + responses: { + 200: { + description: "Transcription result", + content: { + "application/json": { + schema: resolver( + z.object({ + text: z.string(), + timestamps: z + .object({ + word: z.array(z.object({ start: z.number(), end: z.number(), word: z.string() })), + segment: z.array(z.object({ start: z.number(), end: z.number(), segment: z.string() })), + }) + .optional(), + }), + ), + }, + }, + }, + ...errors(503), + }, + }), + validator( + "json", + z.object({ + audio: z.string().describe("Base64-encoded WAV audio data"), + timestamps: z.boolean().optional().default(false), + }), + ), + async (c) => { + if (!VoiceService.isAvailable()) { + return c.json({ error: "Transcription service not available" }, 503) + } + + const { audio, timestamps } = c.req.valid("json") + + try { + const audioBuffer = Buffer.from(audio, "base64") + const result = await VoiceService.transcribe(audioBuffer, timestamps) + return c.json(result) + } catch (error) { + console.error("[Transcription] Error:", error) + return c.json( + { + error: error instanceof Error ? error.message : "Transcription failed", + }, + 500, + ) + } + }, + ) + .get( + "/stream", + describeRoute({ + summary: "Stream audio for transcription", + description: "Establish a WebSocket connection to stream audio chunks and receive real-time transcriptions", + operationId: "voice.stream", + responses: { + 200: { + description: "WebSocket connection established", + content: { + "application/json": { + schema: resolver(z.boolean()), + }, + }, + }, + ...errors(503), + }, + }), + upgradeWebSocket(() => { + if (!VoiceService.isAvailable()) { + throw new Error("Transcription service not available") + } + + const buffer = new AudioBuffer(16000, 1) + const config = VoiceService.getConfig() + let isProcessing = false + let isClosed = false + + return { + onOpen(_event, ws) { + ws.send( + JSON.stringify({ + type: "ready", + maxDuration: config.maxDuration, + }), + ) + }, + + async onMessage(event, ws) { + if (isClosed || isProcessing) return + + try { + const data = event.data + + // Handle text messages (commands) + if (typeof data === "string") { + const msg = JSON.parse(data) + + if (msg.type === "finalize") { + // Transcribe whatever we have buffered + isProcessing = true + + if (!buffer.isEmpty()) { + try { + const wavBuffer = buffer.toWav() + const result = await VoiceService.transcribe(wavBuffer, msg.timestamps || false) + + ws.send( + JSON.stringify({ + type: "transcription", + text: result.text, + timestamps: result.timestamps, + final: true, + }), + ) + } catch (error) { + ws.send( + JSON.stringify({ + type: "error", + message: error instanceof Error ? error.message : "Transcription failed", + }), + ) + } + + buffer.clear() + } + + ws.send(JSON.stringify({ type: "done" })) + isProcessing = false + return + } + + if (msg.type === "clear") { + buffer.clear() + ws.send(JSON.stringify({ type: "cleared" })) + return + } + } + + // Handle binary audio data + if (data instanceof ArrayBuffer || Buffer.isBuffer(data)) { + const chunk = Buffer.isBuffer(data) ? data : Buffer.from(data) + buffer.append(chunk) + + // Check if we've exceeded max duration + if (buffer.getDuration() > config.maxDuration) { + ws.send( + JSON.stringify({ + type: "error", + message: `Maximum recording duration (${config.maxDuration}s) exceeded`, + }), + ) + ws.close() + return + } + + // Send progress updates + ws.send( + JSON.stringify({ + type: "progress", + duration: buffer.getDuration(), + }), + ) + + // Optional: Perform intermediate transcription every chunkDuration seconds + if (buffer.getDuration() >= config.chunkDuration && !isProcessing) { + isProcessing = true + + try { + const wavBuffer = buffer.toWav() + const result = await VoiceService.transcribe(wavBuffer, false) + + ws.send( + JSON.stringify({ + type: "transcription", + text: result.text, + final: false, + }), + ) + + // Keep the buffer for the final transcription + } catch (error) { + console.error("[Transcription] Intermediate transcription error:", error) + // Don't fail the whole session on intermediate errors + } + + isProcessing = false + } + } + } catch (error) { + console.error("[Transcription] Message handling error:", error) + ws.send( + JSON.stringify({ + type: "error", + message: error instanceof Error ? error.message : "Unknown error", + }), + ) + } + }, + + onClose() { + isClosed = true + buffer.clear() + }, + + onError(_ws, error) { + console.error("[Transcription] WebSocket error:", error) + }, + } + }), + ), +) diff --git a/packages/opencode/src/server/server.ts b/packages/opencode/src/server/server.ts index 302c5376d290..0b1bb07f00de 100644 --- a/packages/opencode/src/server/server.ts +++ b/packages/opencode/src/server/server.ts @@ -39,6 +39,8 @@ import { errors } from "./error" import { QuestionRoutes } from "./routes/question" import { PermissionRoutes } from "./routes/permission" import { GlobalRoutes } from "./routes/global" +import { VoiceRoutes } from "./routes/voice" +import { VoiceService } from "../voice/service" import { MDNS } from "./mdns" // @ts-ignore This global is needed to prevent ai-sdk from logging warnings to stdout https://github.com/vercel/ai/blob/2dc67e0ef538307f21368db32d5a12345d98831b/packages/ai/src/logger/log-warnings.ts#L85 @@ -159,6 +161,7 @@ export namespace Server { .route("/permission", PermissionRoutes()) .route("/question", QuestionRoutes()) .route("/provider", ProviderRoutes()) + .route("/voice", VoiceRoutes()) .route("/", FileRoutes()) .route("/mcp", McpRoutes()) .route("/tui", TuiRoutes()) @@ -563,6 +566,11 @@ export namespace Server { export function listen(opts: { port: number; hostname: string; mdns?: boolean; cors?: string[] }) { _corsWhitelist = opts.cors ?? [] + // Initialize transcription service (non-blocking) + VoiceService.initialize().catch((error) => { + log.warn("transcription service initialization failed", { error }) + }) + const args = { hostname: opts.hostname, idleTimeout: 0, @@ -596,6 +604,7 @@ export namespace Server { const originalStop = server.stop.bind(server) server.stop = async (closeActiveConnections?: boolean) => { if (shouldPublishMDNS) MDNS.unpublish() + await VoiceService.shutdown() return originalStop(closeActiveConnections) } diff --git a/packages/opencode/src/voice/audio-buffer.ts b/packages/opencode/src/voice/audio-buffer.ts new file mode 100644 index 000000000000..53ba1f1bd8b0 --- /dev/null +++ b/packages/opencode/src/voice/audio-buffer.ts @@ -0,0 +1,62 @@ +export class AudioBuffer { + private chunks: Buffer[] = [] + private totalDuration = 0 // in seconds + private sampleRate: number + private channels: number + + constructor(sampleRate = 16000, channels = 1) { + this.sampleRate = sampleRate + this.channels = channels + } + + append(chunk: Buffer) { + this.chunks.push(chunk) + // Assuming 16-bit PCM audio + const samples = chunk.length / 2 + this.totalDuration += samples / this.sampleRate + } + + getDuration(): number { + return this.totalDuration + } + + getBuffer(): Buffer { + return Buffer.concat(this.chunks) + } + + clear() { + this.chunks = [] + this.totalDuration = 0 + } + + isEmpty(): boolean { + return this.chunks.length === 0 + } + + toWav(): Buffer { + const audioData = this.getBuffer() + const dataLength = audioData.length + const header = Buffer.alloc(44) + + // RIFF header + header.write("RIFF", 0) + header.writeUInt32LE(36 + dataLength, 4) + header.write("WAVE", 8) + + // fmt chunk + header.write("fmt ", 12) + header.writeUInt32LE(16, 16) // chunk size + header.writeUInt16LE(1, 20) // audio format (PCM) + header.writeUInt16LE(this.channels, 22) + header.writeUInt32LE(this.sampleRate, 24) + header.writeUInt32LE(this.sampleRate * this.channels * 2, 28) // byte rate + header.writeUInt16LE(this.channels * 2, 32) // block align + header.writeUInt16LE(16, 34) // bits per sample + + // data chunk + header.write("data", 36) + header.writeUInt32LE(dataLength, 40) + + return Buffer.concat([header, audioData]) + } +} diff --git a/packages/opencode/src/voice/event.ts b/packages/opencode/src/voice/event.ts new file mode 100644 index 000000000000..2ec4c4280e78 --- /dev/null +++ b/packages/opencode/src/voice/event.ts @@ -0,0 +1,13 @@ +import { BusEvent } from "@/bus/bus-event" +import z from "zod" + +export namespace Voice { + export const Event = { + Updated: BusEvent.define( + "voice.updated", + z.object({ + available: z.boolean(), + }), + ), + } +} diff --git a/packages/opencode/src/voice/parakeet-engine.ts b/packages/opencode/src/voice/parakeet-engine.ts new file mode 100644 index 000000000000..4a43d1879460 --- /dev/null +++ b/packages/opencode/src/voice/parakeet-engine.ts @@ -0,0 +1,221 @@ +import { spawn, type ChildProcess } from "child_process" +import { join, dirname } from "path" +import { fileURLToPath } from "url" +import { Bus } from "@/bus" +import { Voice } from "./event" +import { Log } from "@/util/log" + +const getCurrentDir = () => { + if (typeof __dirname !== "undefined") return __dirname + if (typeof import.meta.url !== "undefined") return dirname(fileURLToPath(import.meta.url)) + return process.cwd() +} + +export type VoiceResult = { + text: string + timestamps?: { + word: Array<{ start: number; end: number; word: string }> + segment: Array<{ start: number; end: number; segment: string }> + } +} + +export type VoiceStatus = "loading" | "ready" | "error" | "stopped" + +export class ParakeetEngine { + private process: ChildProcess | null = null + private status: VoiceStatus = "stopped" + private model: string + private device: "cuda" | "cpu" | "auto" + private readyPromise: Promise | null = null + private responseHandlers = new Map void>() + private requestId = 0 + private log = Log.create({ service: "voice.parakeet" }) + + constructor(model = "nvidia/parakeet-tdt-0.6b-v3", device: "cuda" | "cpu" | "auto" = "auto") { + this.model = model + this.device = device + } + + async start(): Promise { + if (this.process) { + return this.status === "ready" + } + + this.readyPromise = new Promise((resolve, reject) => { + const currentDir = getCurrentDir() + const pythonScript = join(currentDir, "../../python/voice/voice_server.py") + const pythonBinary = join(currentDir, "../../python/voice/venv/bin/python") + + const args = [pythonScript, "--model", this.model] + + if (this.device !== "auto") { + args.push("--device", this.device) + } + + this.process = spawn(pythonBinary, args, { + stdio: ["pipe", "pipe", "pipe"], + }) + + const state = { resolved: false } + + const timeout = setTimeout(() => { + if (!state.resolved) { + reject(new Error("Initialization timeout")) + } + }, 300000) + + this.process.stdout?.on("data", (data) => { + const lines = data.toString().split("\n") + for (const line of lines) { + if (!line.trim()) continue + + try { + const msg = JSON.parse(line) + + if (msg.status === "loading") { + const previousStatus = this.status + this.status = "loading" + if (previousStatus !== "loading") { + Bus.publish(Voice.Event.Updated, { available: false }) + } + continue + } + if (msg.status === "ready") { + const previousStatus = this.status + this.status = "ready" + if (!state.resolved) { + state.resolved = true + clearTimeout(timeout) + resolve() + } + if (previousStatus !== "ready") { + this.log.info("parakeet engine ready", { + model: this.model, + device: this.device, + }) + Bus.publish(Voice.Event.Updated, { available: true }) + } + continue + } + if (msg.status === "error") { + const previousStatus = this.status + this.status = "error" + if (!state.resolved) { + state.resolved = true + clearTimeout(timeout) + reject(new Error(msg.message)) + } + if (previousStatus !== "error") { + Bus.publish(Voice.Event.Updated, { available: false }) + } + continue + } + if (msg.text !== undefined || msg.error) { + const handler = this.responseHandlers.get(this.requestId - 1) + if (handler) { + handler(msg) + this.responseHandlers.delete(this.requestId - 1) + } + } + } catch (e) { + // Silently skip non-JSON lines + } + } + }) + + this.process.stderr?.on("data", (data) => { + // Suppress stderr - Python NeMo warnings are verbose + }) + + this.process.on("exit", (code) => { + this.status = "stopped" + this.process = null + Bus.publish(Voice.Event.Updated, { available: false }) + if (!state.resolved) { + clearTimeout(timeout) + reject(new Error(`Process exited with code ${code}`)) + } + }) + + this.process.on("error", (err) => { + this.status = "error" + Bus.publish(Voice.Event.Updated, { available: false }) + if (!state.resolved) { + clearTimeout(timeout) + reject(err) + } + }) + }) + + try { + await this.readyPromise + return true + } catch (error) { + return false + } + } + + async transcribe(audioBuffer: Buffer, timestamps = false): Promise { + if (!this.process || this.status !== "ready") { + throw new Error("Voice engine not ready") + } + + return new Promise((resolve, reject) => { + const id = this.requestId++ + const audioBase64 = audioBuffer.toString("base64") + + this.responseHandlers.set(id, (result) => { + if ("error" in result) { + reject(new Error(result.error)) + return + } + resolve(result) + }) + + const request = { + command: "transcribe", + audio: audioBase64, + timestamps, + } + + this.process!.stdin?.write(JSON.stringify(request) + "\n") + + setTimeout(() => { + if (this.responseHandlers.has(id)) { + this.responseHandlers.delete(id) + reject(new Error("Voice timeout")) + } + }, 30000) + }) + } + + async stop() { + if (!this.process) return + + this.process.stdin?.write(JSON.stringify({ command: "shutdown" }) + "\n") + + await new Promise((resolve) => { + const timeout = setTimeout(() => { + this.process?.kill("SIGKILL") + resolve() + }, 5000) + + this.process?.once("exit", () => { + clearTimeout(timeout) + resolve() + }) + }) + + this.process = null + this.status = "stopped" + Bus.publish(Voice.Event.Updated, { available: false }) + } + + getStatus(): VoiceStatus { + return this.status + } + + isReady(): boolean { + return this.status === "ready" + } +} diff --git a/packages/opencode/src/voice/service.ts b/packages/opencode/src/voice/service.ts new file mode 100644 index 000000000000..ad76156514a5 --- /dev/null +++ b/packages/opencode/src/voice/service.ts @@ -0,0 +1,89 @@ +import { ParakeetEngine } from "./parakeet-engine" +import { Bus } from "@/bus" +import { Voice } from "./event" +import { Log } from "@/util/log" + +export type TranscriptionConfig = { + enabled: boolean + model: string + device: "cuda" | "cpu" | "auto" + maxDuration: number + chunkDuration: number +} + +export { Voice } + +class VoiceServiceImpl { + private engine: ParakeetEngine | null = null + private config: TranscriptionConfig = { + enabled: true, + model: "nvidia/parakeet-tdt-0.6b-v3", + device: "auto", + maxDuration: 300, + chunkDuration: 3, + } + private log = Log.create({ service: "voice" }) + + async initialize(config?: Partial): Promise { + this.config = { ...this.config, ...config } + + if (!this.config.enabled) { + this.log.info("voice service disabled by config") + Bus.publish(Voice.Event.Updated, { available: false }) + return false + } + + try { + this.engine = new ParakeetEngine(this.config.model, this.config.device) + const started = await this.engine.start() + + if (!started) { + this.config.enabled = false + this.log.warn("voice engine failed to start") + Bus.publish(Voice.Event.Updated, { available: false }) + return false + } + + this.log.info("voice service initialized successfully") + Bus.publish(Voice.Event.Updated, { available: true }) + return true + } catch (error) { + this.config.enabled = false + this.log.error("voice service initialization error", { + error: error instanceof Error ? error.message : String(error), + }) + Bus.publish(Voice.Event.Updated, { available: false }) + return false + } + } + + async transcribe(audioBuffer: Buffer, timestamps = false) { + if (!this.engine || !this.config.enabled) { + throw new Error("Transcription service not available") + } + + if (!this.engine.isReady()) { + throw new Error("Transcription engine not ready") + } + + return this.engine.transcribe(audioBuffer, timestamps) + } + + async shutdown() { + if (this.engine) { + await this.engine.stop() + this.engine = null + } + Bus.publish(Voice.Event.Updated, { available: false }) + } + + isAvailable(): boolean { + return this.config.enabled && this.engine !== null && this.engine.isReady() + } + + getConfig(): TranscriptionConfig { + return { ...this.config } + } +} + +export const VoiceService = new VoiceServiceImpl() diff --git a/packages/sdk/js/src/v2/gen/sdk.gen.ts b/packages/sdk/js/src/v2/gen/sdk.gen.ts index d39dd2b3485e..a80daf023acc 100644 --- a/packages/sdk/js/src/v2/gen/sdk.gen.ts +++ b/packages/sdk/js/src/v2/gen/sdk.gen.ts @@ -160,6 +160,9 @@ import type { TuiShowToastResponses, TuiSubmitPromptResponses, VcsGetResponses, + VoiceStatusResponses, + VoiceStreamResponses, + VoiceTranscribeResponses, WorktreeCreateErrors, WorktreeCreateInput, WorktreeCreateResponses, @@ -2059,6 +2062,83 @@ export class Provider extends HeyApiClient { } } +export class Voice extends HeyApiClient { + /** + * Get voice service status + * + * Check if the voice service is available and ready + */ + public status( + parameters?: { + directory?: string + }, + options?: Options, + ) { + const params = buildClientParams([parameters], [{ args: [{ in: "query", key: "directory" }] }]) + return (options?.client ?? this.client).get({ + url: "/voice/status", + ...options, + ...params, + }) + } + + /** + * Transcribe audio file + * + * Submit a base64-encoded audio file for transcription + */ + public transcribe( + parameters?: { + directory?: string + audio?: string + timestamps?: boolean + }, + options?: Options, + ) { + const params = buildClientParams( + [parameters], + [ + { + args: [ + { in: "query", key: "directory" }, + { in: "body", key: "audio" }, + { in: "body", key: "timestamps" }, + ], + }, + ], + ) + return (options?.client ?? this.client).post({ + url: "/voice/transcribe", + ...options, + ...params, + headers: { + "Content-Type": "application/json", + ...options?.headers, + ...params.headers, + }, + }) + } + + /** + * Stream audio for transcription + * + * Establish a WebSocket connection to stream audio chunks and receive real-time transcriptions + */ + public stream( + parameters?: { + directory?: string + }, + options?: Options, + ) { + const params = buildClientParams([parameters], [{ args: [{ in: "query", key: "directory" }] }]) + return (options?.client ?? this.client).get({ + url: "/voice/stream", + ...options, + ...params, + }) + } +} + export class Find extends HeyApiClient { /** * Find text @@ -3213,6 +3293,11 @@ export class OpencodeClient extends HeyApiClient { return (this._provider ??= new Provider({ client: this.client })) } + private _voice?: Voice + get voice(): Voice { + return (this._voice ??= new Voice({ client: this.client })) + } + private _find?: Find get find(): Find { return (this._find ??= new Find({ client: this.client })) diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index 2a63d7212150..6caba9db6033 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -881,6 +881,13 @@ export type EventWorktreeFailed = { } } +export type EventVoiceUpdated = { + type: "voice.updated" + properties: { + available: boolean + } +} + export type Event = | EventInstallationUpdated | EventInstallationUpdateAvailable @@ -924,6 +931,7 @@ export type Event = | EventPtyDeleted | EventWorktreeReady | EventWorktreeFailed + | EventVoiceUpdated export type GlobalEvent = { directory: string @@ -1149,6 +1157,10 @@ export type KeybindsConfig = { * Paste from clipboard */ input_paste?: string + /** + * Voice input (tap to record, tap to stop) + */ + voice_input?: string /** * Submit input */ @@ -4015,6 +4027,89 @@ export type ProviderOauthCallbackResponses = { export type ProviderOauthCallbackResponse = ProviderOauthCallbackResponses[keyof ProviderOauthCallbackResponses] +export type VoiceStatusData = { + body?: never + path?: never + query?: { + directory?: string + } + url: "/voice/status" +} + +export type VoiceStatusResponses = { + /** + * Service status + */ + 200: { + available: boolean + config: { + enabled: boolean + model: string + device: "cuda" | "cpu" | "auto" + maxDuration: number + chunkDuration: number + } + } +} + +export type VoiceStatusResponse = VoiceStatusResponses[keyof VoiceStatusResponses] + +export type VoiceTranscribeData = { + body?: { + /** + * Base64-encoded WAV audio data + */ + audio: string + timestamps?: boolean + } + path?: never + query?: { + directory?: string + } + url: "/voice/transcribe" +} + +export type VoiceTranscribeResponses = { + /** + * Transcription result + */ + 200: { + text: string + timestamps?: { + word: Array<{ + start: number + end: number + word: string + }> + segment: Array<{ + start: number + end: number + segment: string + }> + } + } +} + +export type VoiceTranscribeResponse = VoiceTranscribeResponses[keyof VoiceTranscribeResponses] + +export type VoiceStreamData = { + body?: never + path?: never + query?: { + directory?: string + } + url: "/voice/stream" +} + +export type VoiceStreamResponses = { + /** + * WebSocket connection established + */ + 200: boolean +} + +export type VoiceStreamResponse = VoiceStreamResponses[keyof VoiceStreamResponses] + export type FindTextData = { body?: never path?: never From aafd563a2a1277d7f72e5c775a2b8ed3c0d6b362 Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 16:05:20 +0100 Subject: [PATCH 02/17] Switched to javascript to fit repo better. --- .gitignore | 4 +- bun.lock | 125 +++-- package.json | 7 +- packages/opencode/package.json | 1 + .../opencode/python/voice/requirements.txt | 6 - packages/opencode/python/voice/test_voice.py | 34 -- .../opencode/python/voice/voice_server.py | 197 ------- packages/opencode/src/cli/cmd/tui/app.tsx | 12 + .../cli/cmd/tui/component/dialog-status.tsx | 42 +- .../cli/cmd/tui/component/dialog-voice.tsx | 147 ++++++ .../cli/cmd/tui/component/prompt/index.tsx | 8 +- .../src/cli/cmd/tui/context/local.tsx | 73 +++ .../opencode/src/cli/cmd/tui/routes/home.tsx | 4 +- .../src/cli/cmd/tui/routes/session/footer.tsx | 4 +- packages/opencode/src/cli/cmd/tui/worker.ts | 8 - packages/opencode/src/config/config.ts | 20 + packages/opencode/src/server/routes/voice.ts | 170 ++++-- packages/opencode/src/voice/event.ts | 29 +- .../opencode/src/voice/parakeet-engine.ts | 221 -------- packages/opencode/src/voice/service.ts | 204 ++++++-- packages/opencode/src/voice/whisper-engine.ts | 151 ++++++ packages/sdk/js/src/v2/gen/sdk.gen.ts | 114 +++- packages/sdk/js/src/v2/gen/types.gen.ts | 158 +++++- packages/sdk/openapi.json | 492 ++++++++++++++++++ 24 files changed, 1590 insertions(+), 641 deletions(-) delete mode 100644 packages/opencode/python/voice/requirements.txt delete mode 100644 packages/opencode/python/voice/test_voice.py delete mode 100644 packages/opencode/python/voice/voice_server.py create mode 100644 packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx delete mode 100644 packages/opencode/src/voice/parakeet-engine.ts create mode 100644 packages/opencode/src/voice/whisper-engine.ts diff --git a/.gitignore b/.gitignore index 27e845334d6a..a8dcd665f2d9 100644 --- a/.gitignore +++ b/.gitignore @@ -21,9 +21,7 @@ a.out target .scripts .direnv/ -packages/opencode/python/*/venv/ -packages/opencode/python/*/__pycache__/ -packages/opencode/python/*/*.pyc + # Local dev files opencode-dev diff --git a/bun.lock b/bun.lock index d02afd42d3ef..4a29fcd81709 100644 --- a/bun.lock +++ b/bun.lock @@ -9,7 +9,9 @@ "@opencode-ai/plugin": "workspace:*", "@opencode-ai/script": "workspace:*", "@opencode-ai/sdk": "workspace:*", + "sharp": "0.34.5", "typescript": "catalog:", + "wavefile": "11.0.0", }, "devDependencies": { "@actions/artifact": "5.0.1", @@ -304,6 +306,7 @@ "@solid-primitives/event-bus": "1.1.2", "@solid-primitives/scheduled": "1.5.2", "@standard-schema/spec": "1.0.0", + "@xenova/transformers": "2.17.2", "@zip.js/zip.js": "2.7.62", "ai": "catalog:", "bonjour-service": "1.3.0", @@ -489,6 +492,7 @@ }, }, "trustedDependencies": [ + "protobufjs", "esbuild", "web-tree-sitter", "tree-sitter-bash", @@ -499,6 +503,7 @@ "overrides": { "@types/bun": "catalog:", "@types/node": "catalog:", + "sharp": "0.34.5", }, "catalog": { "@cloudflare/workers-types": "4.20251008.0", @@ -944,47 +949,61 @@ "@hono/zod-validator": ["@hono/zod-validator@0.4.2", "", { "peerDependencies": { "hono": ">=3.9.0", "zod": "^3.19.1" } }, "sha512-1rrlBg+EpDPhzOV4hT9pxr5+xDVmKuz6YJl+la7VCwK6ass5ldyKm5fD+umJdV2zhHD6jROoCCv8NbTwyfhT0g=="], + "@huggingface/jinja": ["@huggingface/jinja@0.2.2", "", {}, "sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA=="], + "@ibm/plex": ["@ibm/plex@6.4.1", "", { "dependencies": { "@ibm/telemetry-js": "^1.5.1" } }, "sha512-fnsipQywHt3zWvsnlyYKMikcVI7E2fEwpiPnIHFqlbByXVfQfANAAeJk1IV4mNnxhppUIDlhU0TzwYwL++Rn2g=="], "@ibm/telemetry-js": ["@ibm/telemetry-js@1.10.2", "", { "bin": { "ibmtelemetry": "dist/collect.js" } }, "sha512-F8+/NNUwtm8BuFz18O9KPvIFTFDo8GUSoyhPxPjEpk7nEyEzWGfhIiEPhL00B2NdHRLDSljh3AiCfSnL/tutiQ=="], - "@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.0.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ=="], + "@img/colour": ["@img/colour@1.0.0", "", {}, "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw=="], + + "@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.2.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w=="], + + "@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.2.4" }, "os": "darwin", "cpu": "x64" }, "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw=="], + + "@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.2.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g=="], + + "@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.2.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg=="], - "@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.0.4" }, "os": "darwin", "cpu": "x64" }, "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q=="], + "@img/sharp-libvips-linux-arm": ["@img/sharp-libvips-linux-arm@1.2.4", "", { "os": "linux", "cpu": "arm" }, "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A=="], - "@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.0.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg=="], + "@img/sharp-libvips-linux-arm64": ["@img/sharp-libvips-linux-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw=="], - "@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.0.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ=="], + "@img/sharp-libvips-linux-ppc64": ["@img/sharp-libvips-linux-ppc64@1.2.4", "", { "os": "linux", "cpu": "ppc64" }, "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA=="], - "@img/sharp-libvips-linux-arm": ["@img/sharp-libvips-linux-arm@1.0.5", "", { "os": "linux", "cpu": "arm" }, "sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g=="], + "@img/sharp-libvips-linux-riscv64": ["@img/sharp-libvips-linux-riscv64@1.2.4", "", { "os": "linux", "cpu": "none" }, "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA=="], - "@img/sharp-libvips-linux-arm64": ["@img/sharp-libvips-linux-arm64@1.0.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA=="], + "@img/sharp-libvips-linux-s390x": ["@img/sharp-libvips-linux-s390x@1.2.4", "", { "os": "linux", "cpu": "s390x" }, "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ=="], - "@img/sharp-libvips-linux-s390x": ["@img/sharp-libvips-linux-s390x@1.0.4", "", { "os": "linux", "cpu": "s390x" }, "sha512-u7Wz6ntiSSgGSGcjZ55im6uvTrOxSIS8/dgoVMoiGE9I6JAfU50yH5BoDlYA1tcuGS7g/QNtetJnxA6QEsCVTA=="], + "@img/sharp-libvips-linux-x64": ["@img/sharp-libvips-linux-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw=="], - "@img/sharp-libvips-linux-x64": ["@img/sharp-libvips-linux-x64@1.0.4", "", { "os": "linux", "cpu": "x64" }, "sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw=="], + "@img/sharp-libvips-linuxmusl-arm64": ["@img/sharp-libvips-linuxmusl-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw=="], - "@img/sharp-libvips-linuxmusl-arm64": ["@img/sharp-libvips-linuxmusl-arm64@1.0.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA=="], + "@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg=="], - "@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.0.4", "", { "os": "linux", "cpu": "x64" }, "sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw=="], + "@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.2.4" }, "os": "linux", "cpu": "arm" }, "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw=="], - "@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.0.5" }, "os": "linux", "cpu": "arm" }, "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ=="], + "@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg=="], - "@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.0.4" }, "os": "linux", "cpu": "arm64" }, "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA=="], + "@img/sharp-linux-ppc64": ["@img/sharp-linux-ppc64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-ppc64": "1.2.4" }, "os": "linux", "cpu": "ppc64" }, "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA=="], - "@img/sharp-linux-s390x": ["@img/sharp-linux-s390x@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-s390x": "1.0.4" }, "os": "linux", "cpu": "s390x" }, "sha512-y/5PCd+mP4CA/sPDKl2961b+C9d+vPAveS33s6Z3zfASk2j5upL6fXVPZi7ztePZ5CuH+1kW8JtvxgbuXHRa4Q=="], + "@img/sharp-linux-riscv64": ["@img/sharp-linux-riscv64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-riscv64": "1.2.4" }, "os": "linux", "cpu": "none" }, "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw=="], - "@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.0.4" }, "os": "linux", "cpu": "x64" }, "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA=="], + "@img/sharp-linux-s390x": ["@img/sharp-linux-s390x@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-s390x": "1.2.4" }, "os": "linux", "cpu": "s390x" }, "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg=="], - "@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.0.4" }, "os": "linux", "cpu": "arm64" }, "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g=="], + "@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ=="], - "@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.0.4" }, "os": "linux", "cpu": "x64" }, "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw=="], + "@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg=="], - "@img/sharp-wasm32": ["@img/sharp-wasm32@0.33.5", "", { "dependencies": { "@emnapi/runtime": "^1.2.0" }, "cpu": "none" }, "sha512-ykUW4LVGaMcU9lu9thv85CbRMAwfeadCJHRsg2GmeRa/cJxsVY9Rbd57JcMxBkKHag5U/x7TSBpScF4U8ElVzg=="], + "@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q=="], - "@img/sharp-win32-ia32": ["@img/sharp-win32-ia32@0.33.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-T36PblLaTwuVJ/zw/LaH0PdZkRz5rd3SmMHX8GSmR7vtNSP5Z6bQkExdSK7xGWyxLw4sUknBuugTelgw2faBbQ=="], + "@img/sharp-wasm32": ["@img/sharp-wasm32@0.34.5", "", { "dependencies": { "@emnapi/runtime": "^1.7.0" }, "cpu": "none" }, "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw=="], - "@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.33.5", "", { "os": "win32", "cpu": "x64" }, "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg=="], + "@img/sharp-win32-arm64": ["@img/sharp-win32-arm64@0.34.5", "", { "os": "win32", "cpu": "arm64" }, "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g=="], + + "@img/sharp-win32-ia32": ["@img/sharp-win32-ia32@0.34.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg=="], + + "@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.34.5", "", { "os": "win32", "cpu": "x64" }, "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw=="], "@internationalized/date": ["@internationalized/date@3.10.0", "", { "dependencies": { "@swc/helpers": "^0.5.0" } }, "sha512-oxDR/NTEJ1k+UFVQElaNIk65E/Z83HK1z1WI3lQyhTtnNg4R5oVXaPzK3jcpKG8UHKDVuDQHzn+wsxSz8RP3aw=="], @@ -1380,6 +1399,26 @@ "@protobuf-ts/runtime-rpc": ["@protobuf-ts/runtime-rpc@2.11.1", "", { "dependencies": { "@protobuf-ts/runtime": "^2.11.1" } }, "sha512-4CqqUmNA+/uMz00+d3CYKgElXO9VrEbucjnBFEjqI4GuDrEQ32MaI3q+9qPBvIGOlL4PmHXrzM32vBPWRhQKWQ=="], + "@protobufjs/aspromise": ["@protobufjs/aspromise@1.1.2", "", {}, "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="], + + "@protobufjs/base64": ["@protobufjs/base64@1.1.2", "", {}, "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="], + + "@protobufjs/codegen": ["@protobufjs/codegen@2.0.4", "", {}, "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="], + + "@protobufjs/eventemitter": ["@protobufjs/eventemitter@1.1.0", "", {}, "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="], + + "@protobufjs/fetch": ["@protobufjs/fetch@1.1.0", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.1", "@protobufjs/inquire": "^1.1.0" } }, "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ=="], + + "@protobufjs/float": ["@protobufjs/float@1.0.2", "", {}, "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="], + + "@protobufjs/inquire": ["@protobufjs/inquire@1.1.0", "", {}, "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="], + + "@protobufjs/path": ["@protobufjs/path@1.1.2", "", {}, "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="], + + "@protobufjs/pool": ["@protobufjs/pool@1.1.0", "", {}, "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="], + + "@protobufjs/utf8": ["@protobufjs/utf8@1.1.0", "", {}, "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="], + "@radix-ui/colors": ["@radix-ui/colors@1.0.1", "", {}, "sha512-xySw8f0ZVsAEP+e7iLl3EvcBXX7gsIlC1Zso/sPBW9gIWerBTgz6axrjU+MZ39wD+WFi5h5zdWpsg3+hwt2Qsg=="], "@radix-ui/primitive": ["@radix-ui/primitive@1.0.1", "", { "dependencies": { "@babel/runtime": "^7.13.10" } }, "sha512-yQ8oGX2GVsEYMWGxcovu1uGWPCxV5BFfeeYxqPmuAzUyLT9qmaMXSAhXpb0WrspIeqYzdJpkh2vHModJPgRIaw=="], @@ -1824,6 +1863,8 @@ "@types/katex": ["@types/katex@0.16.7", "", {}, "sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ=="], + "@types/long": ["@types/long@4.0.2", "", {}, "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA=="], + "@types/luxon": ["@types/luxon@3.7.1", "", {}, "sha512-H3iskjFIAn5SlJU7OuxUmTEpebK6TKB8rxZShDslBMZJ5u9S//KM1sbdAisiSrqwLQncVjnpi2OK2J51h+4lsg=="], "@types/mdast": ["@types/mdast@4.0.4", "", { "dependencies": { "@types/unist": "*" } }, "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA=="], @@ -1922,6 +1963,8 @@ "@webgpu/types": ["@webgpu/types@0.1.54", "", {}, "sha512-81oaalC8LFrXjhsczomEQ0u3jG+TqE6V9QHLA8GNZq/Rnot0KDugu3LhSYSlie8tSdooAN1Hov05asrUUp9qgg=="], + "@xenova/transformers": ["@xenova/transformers@2.17.2", "", { "dependencies": { "@huggingface/jinja": "^0.2.2", "onnxruntime-web": "1.14.0", "sharp": "^0.32.0" }, "optionalDependencies": { "onnxruntime-node": "1.14.0" } }, "sha512-lZmHqzrVIkSvZdKZEx7IYY51TK0WDrC8eR0c5IMnBsO8di8are1zzw8BlLhyO2TklZKLN5UffNGs1IJwT6oOqQ=="], + "@zip.js/zip.js": ["@zip.js/zip.js@2.7.62", "", {}, "sha512-OaLvZ8j4gCkLn048ypkZu29KX30r8/OfFF2w4Jo5WXFr+J04J+lzJ5TKZBVgFXhlvSkqNFQdfnY1Q8TMTCyBVA=="], "abbrev": ["abbrev@2.0.0", "", {}, "sha512-6/mh1E2u2YgEsCHdY0Yx5oW+61gZU+1vXaoiHHrpKeuRNNgFvS+/jrwHiQhB5apAf5oB7UB7E19ol2R2LKH8hQ=="], @@ -2176,14 +2219,10 @@ "collapse-white-space": ["collapse-white-space@2.1.0", "", {}, "sha512-loKTxY1zCOuG4j9f6EPnuyyYkf58RnhhWTvRoZEokgB+WbdXehfjFviyOVYkqzEWz1Q5kRiZdBYS5SwxbQYwzw=="], - "color": ["color@4.2.3", "", { "dependencies": { "color-convert": "^2.0.1", "color-string": "^1.9.0" } }, "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A=="], - "color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="], "color-name": ["color-name@1.1.4", "", {}, "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="], - "color-string": ["color-string@1.9.1", "", { "dependencies": { "color-name": "^1.0.0", "simple-swizzle": "^0.2.2" } }, "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg=="], - "color-support": ["color-support@1.1.3", "", { "bin": { "color-support": "bin.js" } }, "sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg=="], "combined-stream": ["combined-stream@1.0.8", "", { "dependencies": { "delayed-stream": "~1.0.0" } }, "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg=="], @@ -2286,7 +2325,7 @@ "destroy": ["destroy@1.2.0", "", {}, "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg=="], - "detect-libc": ["detect-libc@1.0.3", "", { "bin": { "detect-libc": "./bin/detect-libc.js" } }, "sha512-pGjwhsmsp4kL2RTz08wcOlGN83otlqHeD/Z5T8GXZB+/YcpQ/dgo+lbU8ZsGxV0HIvqqxo9l7mqYwyYMD9bKDg=="], + "detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], "detect-node-es": ["detect-node-es@1.1.0", "", {}, "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ=="], @@ -2478,6 +2517,8 @@ "finity": ["finity@0.5.4", "", {}, "sha512-3l+5/1tuw616Lgb0QBimxfdd2TqaDGpfCBpfX6EqtFmqUV3FtQnVEX4Aa62DagYEqnsTIjZcTfbq9msDbXYgyA=="], + "flatbuffers": ["flatbuffers@1.12.0", "", {}, "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="], + "flattie": ["flattie@1.1.1", "", {}, "sha512-9UbaD6XdAL97+k/n+N7JwX46K/M6Zc6KcFYskrYL8wbBV/Uyk0CTAMY0VT+qiK5PM7AIc9aTWYtq65U7T+aCNQ=="], "follow-redirects": ["follow-redirects@1.15.11", "", {}, "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ=="], @@ -2584,6 +2625,8 @@ "gtoken": ["gtoken@8.0.0", "", { "dependencies": { "gaxios": "^7.0.0", "jws": "^4.0.0" } }, "sha512-+CqsMbHPiSTdtSO14O51eMNlrp9N79gmeqmXeouJOhfucAedHw9noVe/n5uJk3tbKE6a+6ZCQg3RPhVhHByAIw=="], + "guid-typescript": ["guid-typescript@1.0.9", "", {}, "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ=="], + "h3": ["h3@2.0.1-rc.4", "", { "dependencies": { "rou3": "^0.7.8", "srvx": "^0.9.1" }, "peerDependencies": { "crossws": "^0.4.1" }, "optionalPeers": ["crossws"] }, "sha512-vZq8pEUp6THsXKXrUXX44eOqfChic2wVQ1GlSzQCBr7DeFBkfIZAo2WyNND4GSv54TAa0E4LYIK73WSPdgKUgw=="], "happy-dom": ["happy-dom@20.0.11", "", { "dependencies": { "@types/node": "^20.0.0", "@types/whatwg-mimetype": "^3.0.2", "whatwg-mimetype": "^3.0.0" } }, "sha512-QsCdAUHAmiDeKeaNojb1OHOPF7NjcWPBR7obdu3NwH2a/oyQaLg5d0aaCy/9My6CdPChYF07dvz5chaXBGaD4g=="], @@ -2718,8 +2761,6 @@ "is-array-buffer": ["is-array-buffer@3.0.5", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "get-intrinsic": "^1.2.6" } }, "sha512-DDfANUiiG2wC1qawP66qlTugJeL5HyzMpfr8lLK+jMQirGzNod0B12cFB/9q838Ru27sBwfw78/rdoU7RERz6A=="], - "is-arrayish": ["is-arrayish@0.3.4", "", {}, "sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA=="], - "is-async-function": ["is-async-function@2.1.1", "", { "dependencies": { "async-function": "^1.0.0", "call-bound": "^1.0.3", "get-proto": "^1.0.1", "has-tostringtag": "^1.0.2", "safe-regex-test": "^1.1.0" } }, "sha512-9dgM/cZBnNvjzaMYHVoxxfPj2QXt22Ev7SuuPrs+xav0ukGB0S6d4ydZdEiM48kLx5kDV+QBPrpVnFyefL8kkQ=="], "is-bigint": ["is-bigint@1.1.0", "", { "dependencies": { "has-bigints": "^1.0.2" } }, "sha512-n4ZT37wG78iz03xPRKJrHTdZbe3IicyucEtdRsV5yglwc3GyUfbAfpSeD0FJ41NbUNSt5wbhqfp1fS+BgnvDFQ=="], @@ -3196,6 +3237,14 @@ "oniguruma-to-es": ["oniguruma-to-es@4.3.4", "", { "dependencies": { "oniguruma-parser": "^0.12.1", "regex": "^6.0.1", "regex-recursion": "^6.0.2" } }, "sha512-3VhUGN3w2eYxnTzHn+ikMI+fp/96KoRSVK9/kMTcFqj1NRDh2IhQCKvYxDnWePKRXY/AqH+Fuiyb7VHSzBjHfA=="], + "onnx-proto": ["onnx-proto@4.0.4", "", { "dependencies": { "protobufjs": "^6.8.8" } }, "sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA=="], + + "onnxruntime-common": ["onnxruntime-common@1.14.0", "", {}, "sha512-3LJpegM2iMNRX2wUmtYfeX/ytfOzNwAWKSq1HbRrKc9+uqG/FsEA0bbKZl1btQeZaXhC26l44NWpNUeXPII7Ew=="], + + "onnxruntime-node": ["onnxruntime-node@1.14.0", "", { "dependencies": { "onnxruntime-common": "~1.14.0" }, "os": [ "linux", "win32", "darwin", ] }, "sha512-5ba7TWomIV/9b6NH/1x/8QEeowsb+jBEvFzU6z0T4mNsFwdPqXeFUM7uxC6QeSRkEbWu3qEB0VMjrvzN/0S9+w=="], + + "onnxruntime-web": ["onnxruntime-web@1.14.0", "", { "dependencies": { "flatbuffers": "^1.12.0", "guid-typescript": "^1.0.9", "long": "^4.0.0", "onnx-proto": "^4.0.4", "onnxruntime-common": "~1.14.0", "platform": "^1.3.6" } }, "sha512-Kcqf43UMfW8mCydVGcX9OMXI2VN17c0p6XvR7IPSZzBf/6lteBzXHvcEVWDPmCKuGombl997HgLqj91F11DzXw=="], + "open": ["open@10.1.2", "", { "dependencies": { "default-browser": "^5.2.1", "define-lazy-prop": "^3.0.0", "is-inside-container": "^1.0.0", "is-wsl": "^3.1.0" } }, "sha512-cxN6aIDPz6rm8hbebcP7vrQNhvRcveZoJU72Y7vskh4oIm+BZwBECnx5nTmrlres1Qapvx27Qo1Auukpf8PKXw=="], "openai": ["openai@5.11.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.23.8" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-+AuTc5pVjlnTuA9zvn8rA/k+1RluPIx9AD4eDcnutv6JNwHHZxIhkFy+tmMKCvmMFDQzfA/r1ujvPWB19DQkYg=="], @@ -3306,6 +3355,8 @@ "planck": ["planck@1.4.2", "", { "peerDependencies": { "stage-js": "^1.0.0-alpha.12" } }, "sha512-mNbhnV3g8X2rwGxzcesjmN8BDA6qfXgQxXVMkWau9MCRlQY0RLNEkyHlVp6yFy/X6qrzAXyNONCnZ1cGDLrNew=="], + "platform": ["platform@1.3.6", "", {}, "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg=="], + "playwright": ["playwright@1.57.0", "", { "dependencies": { "playwright-core": "1.57.0" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw=="], "playwright-core": ["playwright-core@1.57.0", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ=="], @@ -3352,6 +3403,8 @@ "proto-list": ["proto-list@1.2.4", "", {}, "sha512-vtK/94akxsTMhe0/cbfpR+syPuszcuwhqVjJq26CuNDgFGj682oRBXOP5MJpv2r7JtE8MsiepGIqvvOTBwn2vA=="], + "protobufjs": ["protobufjs@6.11.4", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.4", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.0", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", "@types/long": "^4.0.1", "@types/node": ">=13.7.0", "long": "^4.0.0" }, "bin": { "pbjs": "bin/pbjs", "pbts": "bin/pbts" } }, "sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw=="], + "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="], "proxy-from-env": ["proxy-from-env@1.1.0", "", {}, "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="], @@ -3534,7 +3587,7 @@ "setprototypeof": ["setprototypeof@1.2.0", "", {}, "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="], - "sharp": ["sharp@0.33.5", "", { "dependencies": { "color": "^4.2.3", "detect-libc": "^2.0.3", "semver": "^7.6.3" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "0.33.5", "@img/sharp-darwin-x64": "0.33.5", "@img/sharp-libvips-darwin-arm64": "1.0.4", "@img/sharp-libvips-darwin-x64": "1.0.4", "@img/sharp-libvips-linux-arm": "1.0.5", "@img/sharp-libvips-linux-arm64": "1.0.4", "@img/sharp-libvips-linux-s390x": "1.0.4", "@img/sharp-libvips-linux-x64": "1.0.4", "@img/sharp-libvips-linuxmusl-arm64": "1.0.4", "@img/sharp-libvips-linuxmusl-x64": "1.0.4", "@img/sharp-linux-arm": "0.33.5", "@img/sharp-linux-arm64": "0.33.5", "@img/sharp-linux-s390x": "0.33.5", "@img/sharp-linux-x64": "0.33.5", "@img/sharp-linuxmusl-arm64": "0.33.5", "@img/sharp-linuxmusl-x64": "0.33.5", "@img/sharp-wasm32": "0.33.5", "@img/sharp-win32-ia32": "0.33.5", "@img/sharp-win32-x64": "0.33.5" } }, "sha512-haPVm1EkS9pgvHrQ/F3Xy+hgcuMV0Wm9vfIBSiwZ05k+xgb0PkBQpGsAA/oWdDobNaZTH5ppvHtzCFbnSEwHVw=="], + "sharp": ["sharp@0.34.5", "", { "dependencies": { "@img/colour": "^1.0.0", "detect-libc": "^2.1.2", "semver": "^7.7.3" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "0.34.5", "@img/sharp-darwin-x64": "0.34.5", "@img/sharp-libvips-darwin-arm64": "1.2.4", "@img/sharp-libvips-darwin-x64": "1.2.4", "@img/sharp-libvips-linux-arm": "1.2.4", "@img/sharp-libvips-linux-arm64": "1.2.4", "@img/sharp-libvips-linux-ppc64": "1.2.4", "@img/sharp-libvips-linux-riscv64": "1.2.4", "@img/sharp-libvips-linux-s390x": "1.2.4", "@img/sharp-libvips-linux-x64": "1.2.4", "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", "@img/sharp-libvips-linuxmusl-x64": "1.2.4", "@img/sharp-linux-arm": "0.34.5", "@img/sharp-linux-arm64": "0.34.5", "@img/sharp-linux-ppc64": "0.34.5", "@img/sharp-linux-riscv64": "0.34.5", "@img/sharp-linux-s390x": "0.34.5", "@img/sharp-linux-x64": "0.34.5", "@img/sharp-linuxmusl-arm64": "0.34.5", "@img/sharp-linuxmusl-x64": "0.34.5", "@img/sharp-wasm32": "0.34.5", "@img/sharp-win32-arm64": "0.34.5", "@img/sharp-win32-ia32": "0.34.5", "@img/sharp-win32-x64": "0.34.5" } }, "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg=="], "shebang-command": ["shebang-command@2.0.0", "", { "dependencies": { "shebang-regex": "^3.0.0" } }, "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA=="], @@ -3558,8 +3611,6 @@ "signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="], - "simple-swizzle": ["simple-swizzle@0.2.4", "", { "dependencies": { "is-arrayish": "^0.3.1" } }, "sha512-nAu1WFPQSMNr2Zn9PGSZK9AGn4t/y97lEm+MXTtUDwfP0ksAIX4nO+6ruD9Jwut4C49SB1Ws+fbXsm/yScWOHw=="], - "simple-xml-to-json": ["simple-xml-to-json@1.2.3", "", {}, "sha512-kWJDCr9EWtZ+/EYYM5MareWj2cRnZGF93YDNpH4jQiHB+hBIZnfPFSQiVMzZOdk+zXWqTZ/9fTeQNu2DqeiudA=="], "sisteransi": ["sisteransi@1.0.5", "", {}, "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg=="], @@ -3884,6 +3935,8 @@ "vscode-languageserver-types": ["vscode-languageserver-types@3.17.5", "", {}, "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg=="], + "wavefile": ["wavefile@11.0.0", "", { "bin": { "wavefile": "./bin/wavefile.js" } }, "sha512-/OBiAALgWU24IG7sC84cDO/KfFuvajWc5Uec0oV2zrpOOZZDgGdOwHwgEzOrwh8jkubBk7PtZfQBIcI1OaE5Ng=="], + "web-namespaces": ["web-namespaces@2.0.1", "", {}, "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ=="], "web-streams-polyfill": ["web-streams-polyfill@4.0.0-beta.3", "", {}, "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug=="], @@ -4222,6 +4275,8 @@ "@oslojs/jwt/@oslojs/encoding": ["@oslojs/encoding@0.4.1", "", {}, "sha512-hkjo6MuIK/kQR5CrGNdAPZhS01ZCXuWDRJ187zh6qqF2+yMHZpD9fAYpX8q2bOO6Ryhl3XpCT6kUX76N8hhm4Q=="], + "@parcel/watcher/detect-libc": ["detect-libc@1.0.3", "", { "bin": { "detect-libc": "./bin/detect-libc.js" } }, "sha512-pGjwhsmsp4kL2RTz08wcOlGN83otlqHeD/Z5T8GXZB+/YcpQ/dgo+lbU8ZsGxV0HIvqqxo9l7mqYwyYMD9bKDg=="], + "@pierre/diffs/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="], "@pierre/diffs/@shikijs/engine-javascript": ["@shikijs/engine-javascript@3.19.0", "", { "dependencies": { "@shikijs/types": "3.19.0", "@shikijs/vscode-textmate": "^10.0.2", "oniguruma-to-es": "^4.3.4" } }, "sha512-ZfWJNm2VMhKkQIKT9qXbs76RRcT0SF/CAvEz0+RkpUDAoDaCx0uFdCGzSRiD9gSlhm6AHkjdieOBJMaO2eC1rQ=="], @@ -4272,8 +4327,6 @@ "@solidjs/start/vite": ["vite@7.1.10", "", { "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", "picomatch": "^4.0.3", "postcss": "^8.5.6", "rollup": "^4.43.0", "tinyglobby": "^0.2.15" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^20.19.0 || >=22.12.0", "jiti": ">=1.21.0", "less": "^4.0.0", "lightningcss": "^1.21.0", "sass": "^1.70.0", "sass-embedded": "^1.70.0", "stylus": ">=0.54.8", "sugarss": "^5.0.0", "terser": "^5.16.0", "tsx": "^4.8.1", "yaml": "^2.4.2" }, "optionalPeers": ["@types/node", "jiti", "less", "lightningcss", "sass", "sass-embedded", "stylus", "sugarss", "terser", "tsx", "yaml"], "bin": { "vite": "bin/vite.js" } }, "sha512-CmuvUBzVJ/e3HGxhg6cYk88NGgTnBoOo7ogtfJJ0fefUWAxN/WDSUa50o+oVBxuIhO8FoEZW0j2eW7sfjs5EtA=="], - "@tailwindcss/oxide/detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], - "@tailwindcss/oxide-wasm32-wasi/@emnapi/core": ["@emnapi/core@1.7.1", "", { "dependencies": { "@emnapi/wasi-threads": "1.1.0", "tslib": "^2.4.0" }, "bundled": true }, "sha512-o1uhUASyo921r2XtHYOHy7gdkGLge8ghBEQHMWmyJFoXlpU58kIrhhN3w26lpQb6dspetweapMn2CSNwQ8I4wg=="], "@tailwindcss/oxide-wasm32-wasi/@emnapi/runtime": ["@emnapi/runtime@1.7.1", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-PVtJr5CmLwYAU9PZDMITZoR5iAOShYREoR45EyyLrbntV50mdePTgUn4AmOw90Ifcj+x2kRjdzr1HP3RrNiHGA=="], @@ -4392,8 +4445,6 @@ "lazystream/readable-stream": ["readable-stream@2.3.8", "", { "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", "isarray": "~1.0.0", "process-nextick-args": "~2.0.0", "safe-buffer": "~5.1.1", "string_decoder": "~1.1.1", "util-deprecate": "~1.0.1" } }, "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA=="], - "lightningcss/detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], - "md-to-react-email/marked": ["marked@7.0.4", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-t8eP0dXRJMtMvBojtkcsA7n48BkauktUKzfkPSCq85ZMTJ0v76Rke4DYz01omYpPTUh4p/f7HePgRo3ebG8+QQ=="], "mdast-util-find-and-replace/escape-string-regexp": ["escape-string-regexp@5.0.0", "", {}, "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw=="], @@ -4414,6 +4465,8 @@ "nypm/tinyexec": ["tinyexec@1.0.2", "", {}, "sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg=="], + "onnxruntime-web/long": ["long@4.0.0", "", {}, "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA=="], + "opencode/@ai-sdk/anthropic": ["@ai-sdk/anthropic@2.0.57", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@ai-sdk/provider-utils": "3.0.20" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-DREpYqW2pylgaj69gZ+K8u92bo9DaMgFdictYnY+IwYeY3bawQ4zI7l/o1VkDsBDljAx8iYz5lPURwVZNu+Xpg=="], "opencode/@ai-sdk/openai": ["@ai-sdk/openai@2.0.89", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@ai-sdk/provider-utils": "3.0.20" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-4+qWkBCbL9HPKbgrUO/F2uXZ8GqrYxHa8SWEYIzxEJ9zvWw3ISr3t1/27O1i8MGSym+PzEyHBT48EV4LAwWaEw=="], @@ -4452,6 +4505,8 @@ "prompts/kleur": ["kleur@3.0.3", "", {}, "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w=="], + "protobufjs/long": ["long@4.0.0", "", {}, "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA=="], + "raw-body/iconv-lite": ["iconv-lite@0.4.24", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3" } }, "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA=="], "readable-stream/buffer": ["buffer@6.0.3", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA=="], @@ -4472,10 +4527,6 @@ "send/mime": ["mime@1.6.0", "", { "bin": { "mime": "cli.js" } }, "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="], - "sharp/detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], - - "sharp/semver": ["semver@7.7.2", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA=="], - "shiki/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="], "shiki/@shikijs/types": ["@shikijs/types@3.20.0", "", { "dependencies": { "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-lhYAATn10nkZcBQ0BlzSbJA3wcmL5MXUUF8d2Zzon6saZDlToKaiRX60n2+ZaHJCmXEcZRWNzn+k9vplr8Jhsw=="], diff --git a/package.json b/package.json index 4267ef645661..9507c61a59d4 100644 --- a/package.json +++ b/package.json @@ -76,7 +76,9 @@ "@opencode-ai/plugin": "workspace:*", "@opencode-ai/script": "workspace:*", "@opencode-ai/sdk": "workspace:*", - "typescript": "catalog:" + "sharp": "0.34.5", + "typescript": "catalog:", + "wavefile": "11.0.0" }, "repository": { "type": "git", @@ -96,7 +98,8 @@ ], "overrides": { "@types/bun": "catalog:", - "@types/node": "catalog:" + "@types/node": "catalog:", + "sharp": "0.34.5" }, "patchedDependencies": { "ghostty-web@0.3.0": "patches/ghostty-web@0.3.0.patch" diff --git a/packages/opencode/package.json b/packages/opencode/package.json index 68be07e0c14b..dcceec5e5fd9 100644 --- a/packages/opencode/package.json +++ b/packages/opencode/package.json @@ -89,6 +89,7 @@ "@solid-primitives/event-bus": "1.1.2", "@solid-primitives/scheduled": "1.5.2", "@standard-schema/spec": "1.0.0", + "@xenova/transformers": "2.17.2", "@zip.js/zip.js": "2.7.62", "ai": "catalog:", "bonjour-service": "1.3.0", diff --git a/packages/opencode/python/voice/requirements.txt b/packages/opencode/python/voice/requirements.txt deleted file mode 100644 index 4b452bf4d520..000000000000 --- a/packages/opencode/python/voice/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -# NeMo toolkit for ASR -nemo_toolkit[asr]>=2.4.0 - -# Core dependencies (usually installed with nemo_toolkit) -torch>=2.0.0 -torchaudio>=2.0.0 diff --git a/packages/opencode/python/voice/test_voice.py b/packages/opencode/python/voice/test_voice.py deleted file mode 100644 index fa5a14c1058e..000000000000 --- a/packages/opencode/python/voice/test_voice.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 -"""Test script to verify Parakeet transcription works""" -import sys -import os - -# Suppress NeMo logging -os.environ['NEMO_LOG_LEVEL'] = 'ERROR' - -print("Testing Parakeet transcription...", file=sys.stderr) - -try: - import nemo.collections.asr as nemo_asr - import torch - print(f"✓ Imports successful", file=sys.stderr) - print(f"✓ CUDA available: {torch.cuda.is_available()}", file=sys.stderr) - - # Redirect NeMo logs to stderr - old_stdout = sys.stdout - sys.stdout = sys.stderr - - print("Loading model...", file=sys.stderr) - model = nemo_asr.models.ASRModel.from_pretrained("nvidia/parakeet-tdt-0.6b-v3") - model = model.cpu() - model.eval() - - # Restore stdout - sys.stdout = old_stdout - - print("✓ Model loaded successfully!", file=sys.stderr) - print("✓ Transcription service is ready to use", file=sys.stderr) - -except Exception as e: - print(f"✗ Error: {e}", file=sys.stderr) - sys.exit(1) diff --git a/packages/opencode/python/voice/voice_server.py b/packages/opencode/python/voice/voice_server.py deleted file mode 100644 index e170822f175b..000000000000 --- a/packages/opencode/python/voice/voice_server.py +++ /dev/null @@ -1,197 +0,0 @@ -#!/usr/bin/env python3 -""" -Parakeet TDT v3 Transcription Server -Reads audio chunks from stdin, outputs transcriptions to stdout -Keeps model loaded in memory for fast inference -""" - -import sys -import json -import base64 -import io -import tempfile -import os -import logging -import warnings -from pathlib import Path - -# Suppress all warnings -warnings.filterwarnings('ignore') - -# Suppress NeMo and other library logging -logging.getLogger('nemo_logger').setLevel(logging.CRITICAL) -logging.getLogger('nemo').setLevel(logging.CRITICAL) -logging.getLogger('lightning').setLevel(logging.CRITICAL) -logging.getLogger('pytorch_lightning').setLevel(logging.CRITICAL) -logging.getLogger('torch').setLevel(logging.CRITICAL) -logging.basicConfig(level=logging.CRITICAL) -os.environ['NEMO_LOG_LEVEL'] = 'CRITICAL' -os.environ['HYDRA_FULL_ERROR'] = '0' -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - -try: - import nemo.collections.asr as nemo_asr - import torch -except ImportError as e: - print(json.dumps({"error": f"Failed to import dependencies: {e}"}), file=sys.stderr, flush=True) - sys.exit(1) - - -class TranscriptionServer: - def __init__(self, model_name="nvidia/parakeet-tdt-0.6b-v3", device=None): - self.model_name = model_name - self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") - self.model = None - - def initialize(self): - try: - print(json.dumps({"status": "loading", "model": self.model_name, "device": self.device}), flush=True) - - # Redirect stdout at file descriptor level to suppress NeMo logs - stdout_fd = sys.stdout.fileno() - saved_stdout_fd = os.dup(stdout_fd) - devnull_fd = os.open(os.devnull, os.O_WRONLY) - - try: - # Redirect FD 1 (stdout) to /dev/null - os.dup2(devnull_fd, stdout_fd) - - self.model = nemo_asr.models.ASRModel.from_pretrained( - model_name=self.model_name - ) - - if self.device == "cuda" and torch.cuda.is_available(): - self.model = self.model.cuda() - else: - self.model = self.model.cpu() - - self.model.eval() - finally: - # Restore stdout file descriptor - os.dup2(saved_stdout_fd, stdout_fd) - os.close(saved_stdout_fd) - os.close(devnull_fd) - - print(json.dumps({"status": "ready", "device": self.device}), flush=True) - return True - - except Exception as e: - print(json.dumps({"status": "error", "message": str(e)}), flush=True) - return False - - def transcribe_audio(self, audio_data, timestamps=False): - try: - # Create temporary file for audio data - with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: - tmp.write(audio_data) - tmp_path = tmp.name - - try: - # Call transcribe with minimal parameters - output = self.model.transcribe([tmp_path], batch_size=1, verbose=False) - - # Extract text - NeMo models typically return a list of strings - text = "" - if output: - if isinstance(output, list) and len(output) > 0: - # If it's a list of strings - if isinstance(output[0], str): - text = output[0] - # If it's a list of objects with text attribute - elif hasattr(output[0], 'text'): - text = str(output[0].text) - else: - # Try to convert to string - text = str(output[0]) - elif isinstance(output, str): - text = output - - result = { - "text": text, - } - - if timestamps and output and len(output) > 0 and hasattr(output[0], 'timestamp'): - try: - result["timestamps"] = { - "word": output[0].timestamp.get('word', []), - "segment": output[0].timestamp.get('segment', []), - } - except: - pass - - return result - finally: - # Clean up temp file - try: - os.unlink(tmp_path) - except: - pass - - except Exception as e: - return {"error": str(e)} - - def run(self): - if not self.initialize(): - return 1 - - # Main processing loop - for line in sys.stdin: - try: - line = line.strip() - if not line: - continue - - request = json.loads(line) - command = request.get("command") - - if command == "transcribe": - # Decode base64 audio - audio_base64 = request.get("audio") - if not audio_base64: - print(json.dumps({"error": "No audio data provided"}), flush=True) - continue - - audio_data = base64.b64decode(audio_base64) - timestamps = request.get("timestamps", False) - - # Transcribe - result = self.transcribe_audio(audio_data, timestamps) - - # Ensure result is JSON serializable - safe_result = { - "text": str(result.get("text", "")) if result.get("text") is not None else "", - } - if "timestamps" in result: - safe_result["timestamps"] = result["timestamps"] - - print(json.dumps(safe_result), flush=True) - - elif command == "ping": - print(json.dumps({"status": "alive"}), flush=True) - - elif command == "shutdown": - print(json.dumps({"status": "shutting_down"}), flush=True) - break - - else: - print(json.dumps({"error": f"Unknown command: {command}"}), flush=True) - - except json.JSONDecodeError as e: - print(json.dumps({"error": f"Invalid JSON: {e}"}), flush=True) - except Exception as e: - print(json.dumps({"error": f"Processing error: {e}"}), flush=True) - - return 0 - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser(description="Parakeet Transcription Server") - parser.add_argument("--model", default="nvidia/parakeet-tdt-0.6b-v3", help="Model name") - parser.add_argument("--device", choices=["cuda", "cpu"], help="Device to use") - - args = parser.parse_args() - - server = TranscriptionServer(model_name=args.model, device=args.device) - sys.exit(server.run()) diff --git a/packages/opencode/src/cli/cmd/tui/app.tsx b/packages/opencode/src/cli/cmd/tui/app.tsx index 10d7a25f88f9..2b45103080d1 100644 --- a/packages/opencode/src/cli/cmd/tui/app.tsx +++ b/packages/opencode/src/cli/cmd/tui/app.tsx @@ -12,6 +12,7 @@ import { SyncProvider, useSync } from "@tui/context/sync" import { LocalProvider, useLocal } from "@tui/context/local" import { DialogModel, useConnected } from "@tui/component/dialog-model" import { DialogMcp } from "@tui/component/dialog-mcp" +import { DialogVoice } from "@tui/component/dialog-voice" import { DialogStatus } from "@tui/component/dialog-status" import { DialogThemeList } from "@tui/component/dialog-theme-list" import { DialogHelp } from "./ui/dialog-help" @@ -393,6 +394,17 @@ function App() { dialog.replace(() => ) }, }, + { + title: "Voice Settings", + value: "voice.settings", + category: "Agent", + slash: { + name: "voice", + }, + onSelect: () => { + dialog.replace(() => ) + }, + }, { title: "Agent cycle", value: "agent.cycle", diff --git a/packages/opencode/src/cli/cmd/tui/component/dialog-status.tsx b/packages/opencode/src/cli/cmd/tui/component/dialog-status.tsx index 57f7161567d1..a5bbd2787054 100644 --- a/packages/opencode/src/cli/cmd/tui/component/dialog-status.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/dialog-status.tsx @@ -89,27 +89,6 @@ export function DialogStatus() { - - - Voice Input - - - • - - - Voice-to-text{" "} - - {sync.data.voice?.available ? "Ready (press \\ to record)" : "Loading model..."} - - - - - {sync.data.lsp.length > 0 && ( {sync.data.lsp.length} LSP Servers @@ -180,6 +159,27 @@ export function DialogStatus() { + + + Voice + + + • + + + {sync.data.voice?.status === "ready" ? sync.data.voice.model : "..."}{" "} + + {sync.data.voice?.status === "ready" ? "Ready" : "Loading model..."} + + + + + ) } diff --git a/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx b/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx new file mode 100644 index 000000000000..8db96ff213f8 --- /dev/null +++ b/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx @@ -0,0 +1,147 @@ +import { createMemo, createSignal, For, Show } from "solid-js" +import { useLocal } from "@tui/context/local" +import { useSync } from "@tui/context/sync" +import { DialogSelect, type DialogSelectRef, type DialogSelectOption } from "@tui/ui/dialog-select" +import { useTheme } from "../context/theme" +import { Keybind } from "@/util/keybind" +import { TextAttributes } from "@opentui/core" +import { useSDK } from "@tui/context/sdk" + +function Status(props: { status: string; loading: boolean }) { + const { theme } = useTheme() + if (props.loading) { + return ⋯ Loading + } + if (props.status === "ready") { + return ✓ Ready + } + if (props.status === "downloading") { + return ⬇ Downloading + } + if (props.status === "loading") { + return ⋯ Loading + } + if (props.status === "disabled") { + return ○ Disabled + } + if (props.status === "idle") { + return ○ Idle + } + return ✗ Error +} + +export function DialogVoice() { + const local = useLocal() + const sync = useSync() + const sdk = useSDK() + const [, setRef] = createSignal>() + const [loading, setLoading] = createSignal(null) + + const voiceData = () => sync.data.voice + const voiceStatus = () => (voiceData() as any)?.status ?? "disabled" + const voiceModel = () => (voiceData() as any)?.model + + const options = createMemo(() => { + const loadingModel = loading() + const currentStatus = voiceStatus() + + const result: DialogSelectOption[] = [] + + // Toggle voice on/off + result.push({ + value: "toggle", + title: currentStatus === "disabled" ? "Enable Voice" : "Disable Voice", + description: "Toggle voice transcription", + footer: , + category: "Control", + }) + + // Model selection + const models = [ + { name: "tiny", size: "75 MB", description: "Fast, lower accuracy" }, + { name: "base", size: "142 MB", description: "Balanced speed and accuracy" }, + { name: "small", size: "466 MB", description: "Better accuracy, slower" }, + ] + + for (const model of models) { + const isCurrent = voiceModel() === model.name + result.push({ + value: `model:${model.name}`, + title: `${model.name} (${model.size})`, + description: model.description + (isCurrent ? " (current)" : ""), + footer: loadingModel === model.name ? ⋯ Loading : isCurrent ? ✓ Active : undefined, + category: "Models", + }) + } + + return result + }) + + const keybinds = createMemo(() => [ + { + keybind: Keybind.parse("space")[0], + title: "select", + onTrigger: async (option: DialogSelectOption) => { + if (loading() !== null) return + + const value = option.value + + if (value === "toggle") { + setLoading("toggle") + try { + const currentStatus = voiceStatus() + if (currentStatus === "disabled" || currentStatus === "idle") { + await sdk.client.voice.enable() + const currentModel = voiceModel() + local.voice.set({ + enabled: true, + model: currentModel ? (currentModel as "tiny" | "base" | "small") : "base", + }) + } else { + await sdk.client.voice.disable() + local.voice.set({ enabled: false }) + } + const statusRes = await sdk.client.voice.status() + if (statusRes.data) { + sync.set("voice", statusRes.data) + } + } catch (error) { + console.error("Failed to toggle voice:", error) + } finally { + setLoading(null) + } + return + } + + if (value.startsWith("model:")) { + const modelName = value.replace("model:", "") as "tiny" | "base" | "small" + setLoading(modelName) + try { + await sdk.client.voice.switchModel({ model: modelName }) + local.voice.set({ model: modelName }) + const statusRes = await sdk.client.voice.status() + if (statusRes.data) { + sync.set("voice", statusRes.data) + } + } catch (error) { + console.error("Failed to switch voice model:", error) + } finally { + setLoading(null) + } + } + }, + }, + ]) + + return ( + { + // Don't close on select, only on escape + }} + /> + ) +} diff --git a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx index 097232d4a79b..0824bfd1701d 100644 --- a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx @@ -808,7 +808,7 @@ export function Prompt(props: PromptProps) { e.preventDefault() // Only allow voice input if service is available - if (!sync.data.voice?.available) { + if (sync.data.voice?.status !== "ready") { return } @@ -1148,17 +1148,17 @@ export function Prompt(props: PromptProps) { - + Recording... (\ to stop) - + Transcribing... - + \ voice diff --git a/packages/opencode/src/cli/cmd/tui/context/local.tsx b/packages/opencode/src/cli/cmd/tui/context/local.tsx index d058ce54fb36..17456d77d691 100644 --- a/packages/opencode/src/cli/cmd/tui/context/local.tsx +++ b/packages/opencode/src/cli/cmd/tui/context/local.tsx @@ -374,6 +374,78 @@ export const { use: useLocal, provider: LocalProvider } = createSimpleContext({ }, } + const voice = iife(() => { + const [voiceStore, setVoiceStore] = createStore<{ + ready: boolean + enabled: boolean + model: "tiny" | "base" | "small" + }>({ + ready: false, + enabled: false, + model: "base", + }) + + const file = Bun.file(path.join(Global.Path.state, "voice.json")) + const state = { + pending: false, + } + + function save() { + if (!voiceStore.ready) { + state.pending = true + return + } + state.pending = false + Bun.write( + file, + JSON.stringify({ + enabled: voiceStore.enabled, + model: voiceStore.model, + }), + ) + } + + file + .json() + .then((x) => { + if (typeof x.enabled === "boolean") setVoiceStore("enabled", x.enabled) + if (x.model === "tiny" || x.model === "base" || x.model === "small") setVoiceStore("model", x.model) + }) + .catch(() => {}) + .finally(() => { + setVoiceStore("ready", true) + if (state.pending) save() + }) + + return { + enabled() { + return voiceStore.enabled + }, + model() { + return voiceStore.model + }, + setEnabled(enabled: boolean) { + batch(() => { + setVoiceStore("enabled", enabled) + save() + }) + }, + setModel(model: "tiny" | "base" | "small") { + batch(() => { + setVoiceStore("model", model) + save() + }) + }, + set(opts: { enabled?: boolean; model?: "tiny" | "base" | "small" }) { + batch(() => { + if (opts.enabled !== undefined) setVoiceStore("enabled", opts.enabled) + if (opts.model !== undefined) setVoiceStore("model", opts.model) + save() + }) + }, + } + }) + // Automatically update model when agent changes createEffect(() => { const value = agent.current() @@ -396,6 +468,7 @@ export const { use: useLocal, provider: LocalProvider } = createSimpleContext({ model, agent, mcp, + voice, } return result }, diff --git a/packages/opencode/src/cli/cmd/tui/routes/home.tsx b/packages/opencode/src/cli/cmd/tui/routes/home.tsx index 94be7856e2b6..3acb40a96808 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/home.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/home.tsx @@ -131,10 +131,10 @@ export function Home() { - + - + diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx index 3408e7459934..b84663d2966b 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx @@ -85,10 +85,10 @@ export function Footer() { - + - + diff --git a/packages/opencode/src/cli/cmd/tui/worker.ts b/packages/opencode/src/cli/cmd/tui/worker.ts index 0b468c22c47a..e5f282e2b553 100644 --- a/packages/opencode/src/cli/cmd/tui/worker.ts +++ b/packages/opencode/src/cli/cmd/tui/worker.ts @@ -10,7 +10,6 @@ import { GlobalBus } from "@/bus/global" import { createOpencodeClient, type Event } from "@opencode-ai/sdk/v2" import type { BunWebSocketData } from "hono/bun" import { Flag } from "@/flag/flag" -import { VoiceService } from "@/voice/service" await Log.init({ print: process.argv.includes("--print-logs"), @@ -33,13 +32,6 @@ process.on("uncaughtException", (e) => { }) }) -// Initialize transcription service (non-blocking) -VoiceService.initialize().catch((error) => { - Log.Default.warn("voice service initialization failed", { - error: error instanceof Error ? error.message : String(error), - }) -}) - let server: Bun.Server | undefined const eventStream = { diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index 771c8f34149a..3929cebc757c 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -483,6 +483,25 @@ export namespace Config { export const Mcp = z.discriminatedUnion("type", [McpLocal, McpRemote]) export type Mcp = z.infer + export const Voice = z + .object({ + enabled: z.boolean().optional().describe("Enable or disable voice transcription"), + model: z + .enum(["tiny", "base", "small"]) + .optional() + .default("base") + .describe("Whisper model size: tiny (75MB), base (142MB), or small (466MB)"), + device: z + .enum(["cpu", "gpu", "auto"]) + .optional() + .default("auto") + .describe("Device to run the model on: cpu, gpu, or auto"), + }) + .meta({ + ref: "VoiceConfig", + }) + export type Voice = z.infer + export const PermissionAction = z.enum(["ask", "allow", "deny"]).meta({ ref: "PermissionActionConfig", }) @@ -980,6 +999,7 @@ export namespace Config { ) .optional() .describe("MCP (Model Context Protocol) server configurations"), + voice: Voice.optional().describe("Voice transcription configuration"), formatter: z .union([ z.literal(false), diff --git a/packages/opencode/src/server/routes/voice.ts b/packages/opencode/src/server/routes/voice.ts index 26a52582a0d8..5ba935ed72c9 100644 --- a/packages/opencode/src/server/routes/voice.ts +++ b/packages/opencode/src/server/routes/voice.ts @@ -2,7 +2,7 @@ import { Hono } from "hono" import { describeRoute, validator, resolver } from "hono-openapi" import { upgradeWebSocket } from "hono/bun" import z from "zod" -import { VoiceService } from "../../voice/service" +import { VoiceService, Voice } from "../../voice/service" import { AudioBuffer } from "../../voice/audio-buffer" import { errors } from "../error" import { lazy } from "../../util/lazy" @@ -13,23 +13,99 @@ export const VoiceRoutes = lazy(() => "/status", describeRoute({ summary: "Get voice service status", - description: "Check if the voice service is available and ready", + description: "Check the current status of the voice transcription service", operationId: "voice.status", responses: { 200: { description: "Service status", + content: { + "application/json": { + schema: resolver(Voice.Status), + }, + }, + }, + }, + }), + async (c) => { + return c.json(VoiceService.getStatus()) + }, + ) + .post( + "/enable", + describeRoute({ + summary: "Enable voice transcription", + description: "Enable voice transcription with optional model selection", + operationId: "voice.enable", + responses: { + 200: { + description: "Enable result", content: { "application/json": { schema: resolver( z.object({ - available: z.boolean(), - config: z.object({ - enabled: z.boolean(), - model: z.string(), - device: z.enum(["cuda", "cpu", "auto"]), - maxDuration: z.number(), - chunkDuration: z.number(), - }), + success: z.boolean(), + }), + ), + }, + }, + }, + }, + }), + validator( + "json", + z.object({ + model: z.enum(["tiny", "base", "small"]).optional(), + }), + ), + async (c) => { + const { model } = c.req.valid("json") + const success = await VoiceService.enable(model) + return c.json({ success }) + }, + ) + .post( + "/disable", + describeRoute({ + summary: "Disable voice transcription", + description: "Disable voice transcription service", + operationId: "voice.disable", + responses: { + 200: { + description: "Disabled successfully", + content: { + "application/json": { + schema: resolver(z.object({ success: z.boolean() })), + }, + }, + }, + }, + }), + async (c) => { + await VoiceService.disable() + return c.json({ success: true }) + }, + ) + .get( + "/models", + describeRoute({ + summary: "List available models", + description: "Get list of available Whisper models", + operationId: "voice.models", + responses: { + 200: { + description: "Available models", + content: { + "application/json": { + schema: resolver( + z.object({ + available: z.array( + z.object({ + name: z.enum(["tiny", "base", "small"]), + size: z.string(), + }), + ), + downloaded: z.array(z.enum(["tiny", "base", "small"])), + current: z.enum(["tiny", "base", "small"]), }), ), }, @@ -38,10 +114,43 @@ export const VoiceRoutes = lazy(() => }, }), async (c) => { - return c.json({ - available: VoiceService.isAvailable(), - config: VoiceService.getConfig(), - }) + const available = await VoiceService.getAvailableModels() + const downloaded = await VoiceService.getDownloadedModels() + const current = VoiceService.getCurrentModel() + return c.json({ available, downloaded, current }) + }, + ) + .post( + "/switch-model", + describeRoute({ + summary: "Switch to a different model", + description: "Switch the voice transcription model", + operationId: "voice.switchModel", + responses: { + 200: { + description: "Model switch result", + content: { + "application/json": { + schema: resolver( + z.object({ + success: z.boolean(), + }), + ), + }, + }, + }, + }, + }), + validator( + "json", + z.object({ + model: z.enum(["tiny", "base", "small"]), + }), + ), + async (c) => { + const { model } = c.req.valid("json") + const success = await VoiceService.switchModel(model) + return c.json({ success }) }, ) .post( @@ -58,11 +167,13 @@ export const VoiceRoutes = lazy(() => schema: resolver( z.object({ text: z.string(), - timestamps: z - .object({ - word: z.array(z.object({ start: z.number(), end: z.number(), word: z.string() })), - segment: z.array(z.object({ start: z.number(), end: z.number(), segment: z.string() })), - }) + chunks: z + .array( + z.object({ + text: z.string(), + timestamp: z.tuple([z.number(), z.number()]), + }), + ) .optional(), }), ), @@ -80,8 +191,8 @@ export const VoiceRoutes = lazy(() => }), ), async (c) => { - if (!VoiceService.isAvailable()) { - return c.json({ error: "Transcription service not available" }, 503) + if (!VoiceService.isReady()) { + return c.json({ error: "Transcription service not ready" }, 503) } const { audio, timestamps } = c.req.valid("json") @@ -120,12 +231,13 @@ export const VoiceRoutes = lazy(() => }, }), upgradeWebSocket(() => { - if (!VoiceService.isAvailable()) { - throw new Error("Transcription service not available") + if (!VoiceService.isReady()) { + throw new Error("Transcription service not ready") } const buffer = new AudioBuffer(16000, 1) - const config = VoiceService.getConfig() + const maxDuration = 300 + const chunkDuration = 3 let isProcessing = false let isClosed = false @@ -134,7 +246,7 @@ export const VoiceRoutes = lazy(() => ws.send( JSON.stringify({ type: "ready", - maxDuration: config.maxDuration, + maxDuration, }), ) }, @@ -162,7 +274,7 @@ export const VoiceRoutes = lazy(() => JSON.stringify({ type: "transcription", text: result.text, - timestamps: result.timestamps, + chunks: result.chunks, final: true, }), ) @@ -196,11 +308,11 @@ export const VoiceRoutes = lazy(() => buffer.append(chunk) // Check if we've exceeded max duration - if (buffer.getDuration() > config.maxDuration) { + if (buffer.getDuration() > maxDuration) { ws.send( JSON.stringify({ type: "error", - message: `Maximum recording duration (${config.maxDuration}s) exceeded`, + message: `Maximum recording duration (${maxDuration}s) exceeded`, }), ) ws.close() @@ -216,7 +328,7 @@ export const VoiceRoutes = lazy(() => ) // Optional: Perform intermediate transcription every chunkDuration seconds - if (buffer.getDuration() >= config.chunkDuration && !isProcessing) { + if (buffer.getDuration() >= chunkDuration && !isProcessing) { isProcessing = true try { diff --git a/packages/opencode/src/voice/event.ts b/packages/opencode/src/voice/event.ts index 2ec4c4280e78..08ad2f9b6783 100644 --- a/packages/opencode/src/voice/event.ts +++ b/packages/opencode/src/voice/event.ts @@ -2,11 +2,38 @@ import { BusEvent } from "@/bus/bus-event" import z from "zod" export namespace Voice { + export const Status = z + .discriminatedUnion("status", [ + z.object({ + status: z.literal("disabled"), + }), + z.object({ + status: z.literal("idle"), + }), + z.object({ + status: z.literal("downloading"), + progress: z.number(), + }), + z.object({ + status: z.literal("loading"), + }), + z.object({ + status: z.literal("ready"), + model: z.string(), + }), + z.object({ + status: z.literal("error"), + error: z.string(), + }), + ]) + .meta({ ref: "VoiceStatus" }) + export type Status = z.infer + export const Event = { Updated: BusEvent.define( "voice.updated", z.object({ - available: z.boolean(), + status: Status, }), ), } diff --git a/packages/opencode/src/voice/parakeet-engine.ts b/packages/opencode/src/voice/parakeet-engine.ts deleted file mode 100644 index 4a43d1879460..000000000000 --- a/packages/opencode/src/voice/parakeet-engine.ts +++ /dev/null @@ -1,221 +0,0 @@ -import { spawn, type ChildProcess } from "child_process" -import { join, dirname } from "path" -import { fileURLToPath } from "url" -import { Bus } from "@/bus" -import { Voice } from "./event" -import { Log } from "@/util/log" - -const getCurrentDir = () => { - if (typeof __dirname !== "undefined") return __dirname - if (typeof import.meta.url !== "undefined") return dirname(fileURLToPath(import.meta.url)) - return process.cwd() -} - -export type VoiceResult = { - text: string - timestamps?: { - word: Array<{ start: number; end: number; word: string }> - segment: Array<{ start: number; end: number; segment: string }> - } -} - -export type VoiceStatus = "loading" | "ready" | "error" | "stopped" - -export class ParakeetEngine { - private process: ChildProcess | null = null - private status: VoiceStatus = "stopped" - private model: string - private device: "cuda" | "cpu" | "auto" - private readyPromise: Promise | null = null - private responseHandlers = new Map void>() - private requestId = 0 - private log = Log.create({ service: "voice.parakeet" }) - - constructor(model = "nvidia/parakeet-tdt-0.6b-v3", device: "cuda" | "cpu" | "auto" = "auto") { - this.model = model - this.device = device - } - - async start(): Promise { - if (this.process) { - return this.status === "ready" - } - - this.readyPromise = new Promise((resolve, reject) => { - const currentDir = getCurrentDir() - const pythonScript = join(currentDir, "../../python/voice/voice_server.py") - const pythonBinary = join(currentDir, "../../python/voice/venv/bin/python") - - const args = [pythonScript, "--model", this.model] - - if (this.device !== "auto") { - args.push("--device", this.device) - } - - this.process = spawn(pythonBinary, args, { - stdio: ["pipe", "pipe", "pipe"], - }) - - const state = { resolved: false } - - const timeout = setTimeout(() => { - if (!state.resolved) { - reject(new Error("Initialization timeout")) - } - }, 300000) - - this.process.stdout?.on("data", (data) => { - const lines = data.toString().split("\n") - for (const line of lines) { - if (!line.trim()) continue - - try { - const msg = JSON.parse(line) - - if (msg.status === "loading") { - const previousStatus = this.status - this.status = "loading" - if (previousStatus !== "loading") { - Bus.publish(Voice.Event.Updated, { available: false }) - } - continue - } - if (msg.status === "ready") { - const previousStatus = this.status - this.status = "ready" - if (!state.resolved) { - state.resolved = true - clearTimeout(timeout) - resolve() - } - if (previousStatus !== "ready") { - this.log.info("parakeet engine ready", { - model: this.model, - device: this.device, - }) - Bus.publish(Voice.Event.Updated, { available: true }) - } - continue - } - if (msg.status === "error") { - const previousStatus = this.status - this.status = "error" - if (!state.resolved) { - state.resolved = true - clearTimeout(timeout) - reject(new Error(msg.message)) - } - if (previousStatus !== "error") { - Bus.publish(Voice.Event.Updated, { available: false }) - } - continue - } - if (msg.text !== undefined || msg.error) { - const handler = this.responseHandlers.get(this.requestId - 1) - if (handler) { - handler(msg) - this.responseHandlers.delete(this.requestId - 1) - } - } - } catch (e) { - // Silently skip non-JSON lines - } - } - }) - - this.process.stderr?.on("data", (data) => { - // Suppress stderr - Python NeMo warnings are verbose - }) - - this.process.on("exit", (code) => { - this.status = "stopped" - this.process = null - Bus.publish(Voice.Event.Updated, { available: false }) - if (!state.resolved) { - clearTimeout(timeout) - reject(new Error(`Process exited with code ${code}`)) - } - }) - - this.process.on("error", (err) => { - this.status = "error" - Bus.publish(Voice.Event.Updated, { available: false }) - if (!state.resolved) { - clearTimeout(timeout) - reject(err) - } - }) - }) - - try { - await this.readyPromise - return true - } catch (error) { - return false - } - } - - async transcribe(audioBuffer: Buffer, timestamps = false): Promise { - if (!this.process || this.status !== "ready") { - throw new Error("Voice engine not ready") - } - - return new Promise((resolve, reject) => { - const id = this.requestId++ - const audioBase64 = audioBuffer.toString("base64") - - this.responseHandlers.set(id, (result) => { - if ("error" in result) { - reject(new Error(result.error)) - return - } - resolve(result) - }) - - const request = { - command: "transcribe", - audio: audioBase64, - timestamps, - } - - this.process!.stdin?.write(JSON.stringify(request) + "\n") - - setTimeout(() => { - if (this.responseHandlers.has(id)) { - this.responseHandlers.delete(id) - reject(new Error("Voice timeout")) - } - }, 30000) - }) - } - - async stop() { - if (!this.process) return - - this.process.stdin?.write(JSON.stringify({ command: "shutdown" }) + "\n") - - await new Promise((resolve) => { - const timeout = setTimeout(() => { - this.process?.kill("SIGKILL") - resolve() - }, 5000) - - this.process?.once("exit", () => { - clearTimeout(timeout) - resolve() - }) - }) - - this.process = null - this.status = "stopped" - Bus.publish(Voice.Event.Updated, { available: false }) - } - - getStatus(): VoiceStatus { - return this.status - } - - isReady(): boolean { - return this.status === "ready" - } -} diff --git a/packages/opencode/src/voice/service.ts b/packages/opencode/src/voice/service.ts index ad76156514a5..12caa16d7b1e 100644 --- a/packages/opencode/src/voice/service.ts +++ b/packages/opencode/src/voice/service.ts @@ -1,88 +1,190 @@ -import { ParakeetEngine } from "./parakeet-engine" +import { WhisperEngine, type WhisperModelSize } from "./whisper-engine" import { Bus } from "@/bus" import { Voice } from "./event" import { Log } from "@/util/log" - -export type TranscriptionConfig = { - enabled: boolean - model: string - device: "cuda" | "cpu" | "auto" - maxDuration: number - chunkDuration: number -} +import { Global } from "@/global" +import path from "path" +import { Config } from "@/config/config" export { Voice } class VoiceServiceImpl { - private engine: ParakeetEngine | null = null - private config: TranscriptionConfig = { - enabled: true, - model: "nvidia/parakeet-tdt-0.6b-v3", - device: "auto", - maxDuration: 300, - chunkDuration: 3, - } + private engine: WhisperEngine | null = null private log = Log.create({ service: "voice" }) + private currentModel: WhisperModelSize = "base" + private enabled = false + + private publishStatus() { + const status = (() => { + if (!this.enabled) return { status: "disabled" as const } + if (!this.engine) return { status: "idle" as const } + + const engineStatus = this.engine.getStatus() + if (engineStatus === "idle") return { status: "idle" as const } + if (engineStatus === "downloading") { + return { status: "downloading" as const, progress: this.engine.getDownloadProgress() } + } + if (engineStatus === "loading") return { status: "loading" as const } + if (engineStatus === "ready") return { status: "ready" as const, model: this.currentModel } + return { status: "error" as const, error: "Engine failed to initialize" } + })() + + Bus.publish(Voice.Event.Updated, { status }) + } - async initialize(config?: Partial): Promise { - this.config = { ...this.config, ...config } + async initialize(): Promise { + const cfg = await Config.get() + const file = Bun.file(path.join(Global.Path.state, "voice.json")) + const local = await file.json().catch(() => ({})) - if (!this.config.enabled) { - this.log.info("voice service disabled by config") - Bus.publish(Voice.Event.Updated, { available: false }) + this.log.info("voice initialization", { local, config: cfg.voice }) + + this.enabled = local.enabled ?? cfg.voice?.enabled ?? false + this.currentModel = local.model ?? cfg.voice?.model ?? "base" + + this.log.info("voice enabled state", { enabled: this.enabled, model: this.currentModel }) + + this.publishStatus() + + if (!this.enabled) { + this.log.info("voice service disabled") + return + } + + this.log.info("voice service initialized", { model: this.currentModel, enabled: this.enabled }) + + await this.enable(this.currentModel) + } + + async enable(model?: WhisperModelSize): Promise { + if (model) { + this.currentModel = model + } + + this.enabled = true + this.publishStatus() + + if (this.engine) { + return this.engine.isReady() + } + + this.log.info("enabling voice engine", { model: this.currentModel }) + this.engine = new WhisperEngine(this.currentModel, "auto") + this.publishStatus() + + const started = await this.engine.start() + this.publishStatus() + + if (!started) { + this.log.warn("voice engine failed to start") return false } - try { - this.engine = new ParakeetEngine(this.config.model, this.config.device) - const started = await this.engine.start() + this.log.info("voice service enabled successfully") + return true + } - if (!started) { - this.config.enabled = false - this.log.warn("voice engine failed to start") - Bus.publish(Voice.Event.Updated, { available: false }) - return false - } + async disable(): Promise { + this.enabled = false + if (this.engine) { + await this.engine.stop() + this.engine = null + } + this.publishStatus() + this.log.info("voice service disabled") + } - this.log.info("voice service initialized successfully") - Bus.publish(Voice.Event.Updated, { available: true }) + async switchModel(model: WhisperModelSize): Promise { + if (model === this.currentModel && this.engine?.isReady()) { return true - } catch (error) { - this.config.enabled = false - this.log.error("voice service initialization error", { - error: error instanceof Error ? error.message : String(error), - }) - Bus.publish(Voice.Event.Updated, { available: false }) - return false } + + this.log.info("switching voice model", { from: this.currentModel, to: model }) + this.currentModel = model + + if (this.engine) { + await this.engine.stop() + this.engine = null + } + + if (!this.enabled) { + return true + } + + return this.enable(model) } async transcribe(audioBuffer: Buffer, timestamps = false) { - if (!this.engine || !this.config.enabled) { - throw new Error("Transcription service not available") + if (!this.enabled) { + throw new Error("Voice transcription is disabled") + } + + if (!this.engine) { + const started = await this.enable() + if (!started || !this.engine) { + throw new Error("Failed to start voice engine") + } } if (!this.engine.isReady()) { - throw new Error("Transcription engine not ready") + throw new Error("Voice engine not ready") } return this.engine.transcribe(audioBuffer, timestamps) } async shutdown() { - if (this.engine) { - await this.engine.stop() - this.engine = null + await this.disable() + } + + isEnabled(): boolean { + return this.enabled + } + + isReady(): boolean { + return this.enabled && this.engine !== null && this.engine.isReady() + } + + getStatus(): Voice.Status { + if (!this.enabled) return { status: "disabled" } + if (!this.engine) return { status: "idle" } + + const engineStatus = this.engine.getStatus() + if (engineStatus === "idle") return { status: "idle" } + if (engineStatus === "downloading") { + return { status: "downloading", progress: this.engine.getDownloadProgress() } } - Bus.publish(Voice.Event.Updated, { available: false }) + if (engineStatus === "loading") return { status: "loading" } + if (engineStatus === "ready") return { status: "ready", model: this.currentModel } + return { status: "error", error: "Engine failed to initialize" } + } + + getCurrentModel(): WhisperModelSize { + return this.currentModel } - isAvailable(): boolean { - return this.config.enabled && this.engine !== null && this.engine.isReady() + async getAvailableModels(): Promise> { + return [ + { name: "tiny", size: "75 MB" }, + { name: "base", size: "142 MB" }, + { name: "small", size: "466 MB" }, + ] } - getConfig(): TranscriptionConfig { - return { ...this.config } + async getDownloadedModels(): Promise { + const cacheDir = path.join(Global.Path.cache, "voice-models") + const downloaded: WhisperModelSize[] = [] + + const models: WhisperModelSize[] = ["tiny", "base", "small"] + for (const model of models) { + const modelPath = path.join(cacheDir, `Xenova_whisper-${model}.en`) + const exists = await Bun.file(path.join(modelPath, "config.json")).exists() + if (exists) { + downloaded.push(model) + } + } + + return downloaded } } diff --git a/packages/opencode/src/voice/whisper-engine.ts b/packages/opencode/src/voice/whisper-engine.ts new file mode 100644 index 000000000000..adea0a1d6be7 --- /dev/null +++ b/packages/opencode/src/voice/whisper-engine.ts @@ -0,0 +1,151 @@ +import { pipeline, type PipelineType } from "@xenova/transformers" +import { Log } from "@/util/log" +import { Global } from "@/global" +import path from "path" +import fs from "fs/promises" +import os from "os" +import { WaveFile } from "wavefile" +import { exec } from "child_process" +import { promisify } from "util" + +const execAsync = promisify(exec) + +export type WhisperModelSize = "tiny" | "base" | "small" + +export type WhisperEngineStatus = "idle" | "downloading" | "loading" | "ready" | "error" + +export class WhisperEngine { + private transcriber: any = null + private status: WhisperEngineStatus = "idle" + private log = Log.create({ service: "voice-whisper" }) + private downloadProgress = 0 + + constructor( + private modelSize: WhisperModelSize = "base", + private device: "cpu" | "gpu" | "auto" = "auto", + ) {} + + async start(): Promise { + if (this.status === "ready") return true + if (this.status === "downloading" || this.status === "loading") return false + + this.status = "downloading" + this.log.info("initializing whisper engine", { modelSize: this.modelSize, device: this.device }) + + const modelId = `Xenova/whisper-${this.modelSize}.en` + const cacheDir = path.join(Global.Path.cache, "voice-models") + + try { + this.status = "loading" + + process.env.ORT_LOGGING_LEVEL = "4" + + const originalStderrWrite = process.stderr.write.bind(process.stderr) + let stderrBuffer = "" + + process.stderr.write = ((chunk: any): boolean => { + const str = chunk.toString() + stderrBuffer += str + return true + }) as any + + try { + this.transcriber = await pipeline("automatic-speech-recognition", modelId, { + quantized: true, + device: this.device === "auto" ? undefined : this.device, + cache_dir: cacheDir, + progress_callback: (progress: any) => { + if (progress.status === "downloading") { + const percent = progress.progress ? Math.round(progress.progress) : 0 + if (percent !== this.downloadProgress) { + this.downloadProgress = percent + this.log.debug("model download progress", { percent }) + } + } + }, + } as any) + } finally { + process.stderr.write = originalStderrWrite + } + + this.status = "ready" + this.log.info("whisper engine ready", { modelSize: this.modelSize }) + return true + } catch (error) { + this.status = "error" + this.log.error("failed to initialize whisper engine", { + error: error instanceof Error ? error.message : String(error), + }) + return false + } + } + + async transcribe( + audioBuffer: Buffer, + timestamps = false, + ): Promise<{ text: string; chunks?: Array<{ text: string; timestamp: [number, number] }> }> { + if (!this.isReady()) { + throw new Error("Whisper engine not ready") + } + + const tempInput = path.join(os.tmpdir(), `opencode-audio-${Date.now()}.webm`) + const tempWav = path.join(os.tmpdir(), `opencode-audio-${Date.now()}.wav`) + + try { + await fs.writeFile(tempInput, audioBuffer) + + await execAsync(`ffmpeg -i "${tempInput}" -ar 16000 -ac 1 -f wav "${tempWav}" -y -loglevel quiet`) + + const wavBuffer = await fs.readFile(tempWav) + const wav = new WaveFile(wavBuffer) + + wav.toBitDepth("32f") + wav.toSampleRate(16000) + + let audioData = wav.getSamples() + if (Array.isArray(audioData)) { + if (audioData.length > 1) { + const SCALING_FACTOR = Math.sqrt(2) + for (let i = 0; i < audioData[0].length; ++i) { + audioData[0][i] = (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2 + } + } + audioData = audioData[0] + } + + const result = await this.transcriber(audioData, { + return_timestamps: timestamps, + language: "en", + task: "transcribe", + chunk_length_s: 30, + stride_length_s: 5, + }) + + return { + text: result.text.trim(), + ...(timestamps && result.chunks ? { chunks: result.chunks } : {}), + } + } finally { + await fs.unlink(tempInput).catch(() => {}) + await fs.unlink(tempWav).catch(() => {}) + } + } + + async stop() { + this.transcriber = null + this.status = "idle" + this.log.info("whisper engine stopped") + } + + isReady(): boolean { + return this.status === "ready" && this.transcriber !== null + } + + getStatus(): WhisperEngineStatus { + return this.status + } + + getDownloadProgress(): number { + return this.downloadProgress + } +} diff --git a/packages/sdk/js/src/v2/gen/sdk.gen.ts b/packages/sdk/js/src/v2/gen/sdk.gen.ts index a80daf023acc..aee5c24c3263 100644 --- a/packages/sdk/js/src/v2/gen/sdk.gen.ts +++ b/packages/sdk/js/src/v2/gen/sdk.gen.ts @@ -160,8 +160,12 @@ import type { TuiShowToastResponses, TuiSubmitPromptResponses, VcsGetResponses, + VoiceDisableResponses, + VoiceEnableResponses, + VoiceModelsResponses, VoiceStatusResponses, VoiceStreamResponses, + VoiceSwitchModelResponses, VoiceTranscribeResponses, WorktreeCreateErrors, WorktreeCreateInput, @@ -2066,7 +2070,7 @@ export class Voice extends HeyApiClient { /** * Get voice service status * - * Check if the voice service is available and ready + * Check the current status of the voice transcription service */ public status( parameters?: { @@ -2082,6 +2086,114 @@ export class Voice extends HeyApiClient { }) } + /** + * Enable voice transcription + * + * Enable voice transcription with optional model selection + */ + public enable( + parameters?: { + directory?: string + model?: "tiny" | "base" | "small" + }, + options?: Options, + ) { + const params = buildClientParams( + [parameters], + [ + { + args: [ + { in: "query", key: "directory" }, + { in: "body", key: "model" }, + ], + }, + ], + ) + return (options?.client ?? this.client).post({ + url: "/voice/enable", + ...options, + ...params, + headers: { + "Content-Type": "application/json", + ...options?.headers, + ...params.headers, + }, + }) + } + + /** + * Disable voice transcription + * + * Disable voice transcription service + */ + public disable( + parameters?: { + directory?: string + }, + options?: Options, + ) { + const params = buildClientParams([parameters], [{ args: [{ in: "query", key: "directory" }] }]) + return (options?.client ?? this.client).post({ + url: "/voice/disable", + ...options, + ...params, + }) + } + + /** + * List available models + * + * Get list of available Whisper models + */ + public models( + parameters?: { + directory?: string + }, + options?: Options, + ) { + const params = buildClientParams([parameters], [{ args: [{ in: "query", key: "directory" }] }]) + return (options?.client ?? this.client).get({ + url: "/voice/models", + ...options, + ...params, + }) + } + + /** + * Switch to a different model + * + * Switch the voice transcription model + */ + public switchModel( + parameters?: { + directory?: string + model?: "tiny" | "base" | "small" + }, + options?: Options, + ) { + const params = buildClientParams( + [parameters], + [ + { + args: [ + { in: "query", key: "directory" }, + { in: "body", key: "model" }, + ], + }, + ], + ) + return (options?.client ?? this.client).post({ + url: "/voice/switch-model", + ...options, + ...params, + headers: { + "Content-Type": "application/json", + ...options?.headers, + ...params.headers, + }, + }) + } + /** * Transcribe audio file * diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index 6caba9db6033..8764d7bf5c42 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -881,10 +881,33 @@ export type EventWorktreeFailed = { } } +export type VoiceStatus = + | { + status: "disabled" + } + | { + status: "idle" + } + | { + status: "downloading" + progress: number + } + | { + status: "loading" + } + | { + status: "ready" + model: string + } + | { + status: "error" + error: string + } + export type EventVoiceUpdated = { type: "voice.updated" properties: { - available: boolean + status: VoiceStatus } } @@ -1607,6 +1630,24 @@ export type McpRemoteConfig = { timeout?: number } +/** + * Voice transcription configuration + */ +export type VoiceConfig = { + /** + * Enable or disable voice transcription + */ + enabled?: boolean + /** + * Whisper model size: tiny (75MB), base (142MB), or small (466MB) + */ + model?: "tiny" | "base" | "small" + /** + * Device to run the model on: cpu, gpu, or auto + */ + device?: "cpu" | "gpu" | "auto" +} + /** * @deprecated Always uses stretch layout. */ @@ -1737,6 +1778,7 @@ export type Config = { enabled: boolean } } + voice?: VoiceConfig formatter?: | false | { @@ -4040,19 +4082,99 @@ export type VoiceStatusResponses = { /** * Service status */ + 200: VoiceStatus +} + +export type VoiceStatusResponse = VoiceStatusResponses[keyof VoiceStatusResponses] + +export type VoiceEnableData = { + body?: { + model?: "tiny" | "base" | "small" + } + path?: never + query?: { + directory?: string + } + url: "/voice/enable" +} + +export type VoiceEnableResponses = { + /** + * Enable result + */ 200: { - available: boolean - config: { - enabled: boolean - model: string - device: "cuda" | "cpu" | "auto" - maxDuration: number - chunkDuration: number - } + success: boolean } } -export type VoiceStatusResponse = VoiceStatusResponses[keyof VoiceStatusResponses] +export type VoiceEnableResponse = VoiceEnableResponses[keyof VoiceEnableResponses] + +export type VoiceDisableData = { + body?: never + path?: never + query?: { + directory?: string + } + url: "/voice/disable" +} + +export type VoiceDisableResponses = { + /** + * Disabled successfully + */ + 200: { + success: boolean + } +} + +export type VoiceDisableResponse = VoiceDisableResponses[keyof VoiceDisableResponses] + +export type VoiceModelsData = { + body?: never + path?: never + query?: { + directory?: string + } + url: "/voice/models" +} + +export type VoiceModelsResponses = { + /** + * Available models + */ + 200: { + available: Array<{ + name: "tiny" | "base" | "small" + size: string + }> + downloaded: Array<"tiny" | "base" | "small"> + current: "tiny" | "base" | "small" + } +} + +export type VoiceModelsResponse = VoiceModelsResponses[keyof VoiceModelsResponses] + +export type VoiceSwitchModelData = { + body?: { + model: "tiny" | "base" | "small" + } + path?: never + query?: { + directory?: string + } + url: "/voice/switch-model" +} + +export type VoiceSwitchModelResponses = { + /** + * Model switch result + */ + 200: { + success: boolean + } +} + +export type VoiceSwitchModelResponse = VoiceSwitchModelResponses[keyof VoiceSwitchModelResponses] export type VoiceTranscribeData = { body?: { @@ -4075,18 +4197,10 @@ export type VoiceTranscribeResponses = { */ 200: { text: string - timestamps?: { - word: Array<{ - start: number - end: number - word: string - }> - segment: Array<{ - start: number - end: number - segment: string - }> - } + chunks?: Array<{ + text: string + timestamp: [number, number] + }> } } diff --git a/packages/sdk/openapi.json b/packages/sdk/openapi.json index cf2f29d8589c..197d0e6bc676 100644 --- a/packages/sdk/openapi.json +++ b/packages/sdk/openapi.json @@ -3876,6 +3876,372 @@ ] } }, + "/voice/status": { + "get": { + "operationId": "voice.status", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + } + ], + "summary": "Get voice service status", + "description": "Check the current status of the voice transcription service", + "responses": { + "200": { + "description": "Service status", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VoiceStatus" + } + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.voice.status({\n ...\n})" + } + ] + } + }, + "/voice/enable": { + "post": { + "operationId": "voice.enable", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + } + ], + "summary": "Enable voice transcription", + "description": "Enable voice transcription with optional model selection", + "responses": { + "200": { + "description": "Enable result", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + } + }, + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "model": { + "type": "string", + "enum": ["tiny", "base", "small"] + } + } + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.voice.enable({\n ...\n})" + } + ] + } + }, + "/voice/disable": { + "post": { + "operationId": "voice.disable", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + } + ], + "summary": "Disable voice transcription", + "description": "Disable voice transcription service", + "responses": { + "200": { + "description": "Disabled successfully", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.voice.disable({\n ...\n})" + } + ] + } + }, + "/voice/models": { + "get": { + "operationId": "voice.models", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + } + ], + "summary": "List available models", + "description": "Get list of available Whisper models", + "responses": { + "200": { + "description": "Available models", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "available": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "enum": ["tiny", "base", "small"] + }, + "size": { + "type": "string" + } + }, + "required": ["name", "size"] + } + }, + "downloaded": { + "type": "array", + "items": { + "type": "string", + "enum": ["tiny", "base", "small"] + } + }, + "current": { + "type": "string", + "enum": ["tiny", "base", "small"] + } + }, + "required": ["available", "downloaded", "current"] + } + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.voice.models({\n ...\n})" + } + ] + } + }, + "/voice/switch-model": { + "post": { + "operationId": "voice.switchModel", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + } + ], + "summary": "Switch to a different model", + "description": "Switch the voice transcription model", + "responses": { + "200": { + "description": "Model switch result", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + } + }, + "required": ["success"] + } + } + } + } + }, + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "model": { + "type": "string", + "enum": ["tiny", "base", "small"] + } + }, + "required": ["model"] + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.voice.switchModel({\n ...\n})" + } + ] + } + }, + "/voice/transcribe": { + "post": { + "operationId": "voice.transcribe", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + } + ], + "summary": "Transcribe audio file", + "description": "Submit a base64-encoded audio file for transcription", + "responses": { + "200": { + "description": "Transcription result", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "chunks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "timestamp": { + "type": "array", + "prefixItems": [ + { + "type": "number" + }, + { + "type": "number" + } + ] + } + }, + "required": ["text", "timestamp"] + } + } + }, + "required": ["text"] + } + } + } + } + }, + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "audio": { + "description": "Base64-encoded WAV audio data", + "type": "string" + }, + "timestamps": { + "default": false, + "type": "boolean" + } + }, + "required": ["audio"] + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.voice.transcribe({\n ...\n})" + } + ] + } + }, + "/voice/stream": { + "get": { + "operationId": "voice.stream", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + } + ], + "summary": "Stream audio for transcription", + "description": "Establish a WebSocket connection to stream audio chunks and receive real-time transcriptions", + "responses": { + "200": { + "description": "WebSocket connection established", + "content": { + "application/json": { + "schema": { + "type": "boolean" + } + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.voice.stream({\n ...\n})" + } + ] + } + }, "/find": { "get": { "operationId": "find.text", @@ -8210,6 +8576,98 @@ }, "required": ["type", "properties"] }, + "VoiceStatus": { + "anyOf": [ + { + "type": "object", + "properties": { + "status": { + "type": "string", + "const": "disabled" + } + }, + "required": ["status"] + }, + { + "type": "object", + "properties": { + "status": { + "type": "string", + "const": "idle" + } + }, + "required": ["status"] + }, + { + "type": "object", + "properties": { + "status": { + "type": "string", + "const": "downloading" + }, + "progress": { + "type": "number" + } + }, + "required": ["status", "progress"] + }, + { + "type": "object", + "properties": { + "status": { + "type": "string", + "const": "loading" + } + }, + "required": ["status"] + }, + { + "type": "object", + "properties": { + "status": { + "type": "string", + "const": "ready" + }, + "model": { + "type": "string" + } + }, + "required": ["status", "model"] + }, + { + "type": "object", + "properties": { + "status": { + "type": "string", + "const": "error" + }, + "error": { + "type": "string" + } + }, + "required": ["status", "error"] + } + ] + }, + "Event.voice.updated": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "voice.updated" + }, + "properties": { + "type": "object", + "properties": { + "status": { + "$ref": "#/components/schemas/VoiceStatus" + } + }, + "required": ["status"] + } + }, + "required": ["type", "properties"] + }, "Event": { "anyOf": [ { @@ -8337,6 +8795,9 @@ }, { "$ref": "#/components/schemas/Event.worktree.failed" + }, + { + "$ref": "#/components/schemas/Event.voice.updated" } ] }, @@ -8646,6 +9107,11 @@ "default": "ctrl+v", "type": "string" }, + "voice_input": { + "description": "Voice input (tap to record, tap to stop)", + "default": "\\", + "type": "string" + }, "input_submit": { "description": "Submit input", "default": "return", @@ -9410,6 +9876,29 @@ "required": ["type", "url"], "additionalProperties": false }, + "VoiceConfig": { + "description": "Voice transcription configuration", + "type": "object", + "properties": { + "enabled": { + "description": "Enable or disable voice transcription", + "type": "boolean" + }, + "model": { + "description": "Whisper model size: tiny (75MB), base (142MB), or small (466MB)", + "default": "base", + "type": "string", + "enum": ["tiny", "base", "small"] + }, + "device": { + "description": "Device to run the model on: cpu, gpu, or auto", + "default": "auto", + "type": "string", + "enum": ["cpu", "gpu", "auto"] + } + }, + "additionalProperties": false + }, "LayoutConfig": { "description": "@deprecated Always uses stretch layout.", "type": "string", @@ -9647,6 +10136,9 @@ ] } }, + "voice": { + "$ref": "#/components/schemas/VoiceConfig" + }, "formatter": { "anyOf": [ { From d9fd0f4394e6a88bb69a2d3c1e2ea8ba8a62746a Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 16:46:05 +0100 Subject: [PATCH 03/17] Fixed voice settings. --- .../cli/cmd/tui/component/dialog-voice.tsx | 26 +----- .../src/cli/cmd/tui/context/local.tsx | 82 +++---------------- .../opencode/src/cli/cmd/tui/routes/home.tsx | 2 +- .../src/cli/cmd/tui/routes/session/footer.tsx | 2 +- packages/opencode/src/voice/service.ts | 28 +++++-- packages/opencode/src/voice/whisper-engine.ts | 72 +++++++++++++--- 6 files changed, 95 insertions(+), 117 deletions(-) diff --git a/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx b/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx index 8db96ff213f8..befd6ee24789 100644 --- a/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx @@ -5,7 +5,6 @@ import { DialogSelect, type DialogSelectRef, type DialogSelectOption } from "@tu import { useTheme } from "../context/theme" import { Keybind } from "@/util/keybind" import { TextAttributes } from "@opentui/core" -import { useSDK } from "@tui/context/sdk" function Status(props: { status: string; loading: boolean }) { const { theme } = useTheme() @@ -33,7 +32,6 @@ function Status(props: { status: string; loading: boolean }) { export function DialogVoice() { const local = useLocal() const sync = useSync() - const sdk = useSDK() const [, setRef] = createSignal>() const [loading, setLoading] = createSignal(null) @@ -89,22 +87,7 @@ export function DialogVoice() { if (value === "toggle") { setLoading("toggle") try { - const currentStatus = voiceStatus() - if (currentStatus === "disabled" || currentStatus === "idle") { - await sdk.client.voice.enable() - const currentModel = voiceModel() - local.voice.set({ - enabled: true, - model: currentModel ? (currentModel as "tiny" | "base" | "small") : "base", - }) - } else { - await sdk.client.voice.disable() - local.voice.set({ enabled: false }) - } - const statusRes = await sdk.client.voice.status() - if (statusRes.data) { - sync.set("voice", statusRes.data) - } + await local.voice.toggle() } catch (error) { console.error("Failed to toggle voice:", error) } finally { @@ -117,12 +100,7 @@ export function DialogVoice() { const modelName = value.replace("model:", "") as "tiny" | "base" | "small" setLoading(modelName) try { - await sdk.client.voice.switchModel({ model: modelName }) - local.voice.set({ model: modelName }) - const statusRes = await sdk.client.voice.status() - if (statusRes.data) { - sync.set("voice", statusRes.data) - } + await local.voice.switchModel(modelName) } catch (error) { console.error("Failed to switch voice model:", error) } finally { diff --git a/packages/opencode/src/cli/cmd/tui/context/local.tsx b/packages/opencode/src/cli/cmd/tui/context/local.tsx index 17456d77d691..c11361bff4a6 100644 --- a/packages/opencode/src/cli/cmd/tui/context/local.tsx +++ b/packages/opencode/src/cli/cmd/tui/context/local.tsx @@ -374,77 +374,19 @@ export const { use: useLocal, provider: LocalProvider } = createSimpleContext({ }, } - const voice = iife(() => { - const [voiceStore, setVoiceStore] = createStore<{ - ready: boolean - enabled: boolean - model: "tiny" | "base" | "small" - }>({ - ready: false, - enabled: false, - model: "base", - }) - - const file = Bun.file(path.join(Global.Path.state, "voice.json")) - const state = { - pending: false, - } - - function save() { - if (!voiceStore.ready) { - state.pending = true - return + const voice = { + async toggle() { + const status = sync.data.voice + if (status?.status === "ready") { + await sdk.client.voice.disable() + } else { + await sdk.client.voice.enable() } - state.pending = false - Bun.write( - file, - JSON.stringify({ - enabled: voiceStore.enabled, - model: voiceStore.model, - }), - ) - } - - file - .json() - .then((x) => { - if (typeof x.enabled === "boolean") setVoiceStore("enabled", x.enabled) - if (x.model === "tiny" || x.model === "base" || x.model === "small") setVoiceStore("model", x.model) - }) - .catch(() => {}) - .finally(() => { - setVoiceStore("ready", true) - if (state.pending) save() - }) - - return { - enabled() { - return voiceStore.enabled - }, - model() { - return voiceStore.model - }, - setEnabled(enabled: boolean) { - batch(() => { - setVoiceStore("enabled", enabled) - save() - }) - }, - setModel(model: "tiny" | "base" | "small") { - batch(() => { - setVoiceStore("model", model) - save() - }) - }, - set(opts: { enabled?: boolean; model?: "tiny" | "base" | "small" }) { - batch(() => { - if (opts.enabled !== undefined) setVoiceStore("enabled", opts.enabled) - if (opts.model !== undefined) setVoiceStore("model", opts.model) - save() - }) - }, - } - }) + }, + async switchModel(model: "tiny" | "base" | "small") { + await sdk.client.voice.switchModel({ model }) + }, + } // Automatically update model when agent changes createEffect(() => { diff --git a/packages/opencode/src/cli/cmd/tui/routes/home.tsx b/packages/opencode/src/cli/cmd/tui/routes/home.tsx index 3acb40a96808..c837eec1b7e8 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/home.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/home.tsx @@ -128,7 +128,7 @@ export function Home() { {connectedMcpCount()} MCP - + diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx index b84663d2966b..d20f1753e7b7 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/footer.tsx @@ -82,7 +82,7 @@ export function Footer() { {mcp()} MCP - + diff --git a/packages/opencode/src/voice/service.ts b/packages/opencode/src/voice/service.ts index 12caa16d7b1e..2a0b4d2c595e 100644 --- a/packages/opencode/src/voice/service.ts +++ b/packages/opencode/src/voice/service.ts @@ -14,6 +14,17 @@ class VoiceServiceImpl { private currentModel: WhisperModelSize = "base" private enabled = false + private async saveToDisk() { + await Config.updateGlobal({ + voice: { + enabled: this.enabled, + model: this.currentModel, + device: "auto", + }, + }) + this.log.debug("voice settings saved to config", { enabled: this.enabled, model: this.currentModel }) + } + private publishStatus() { const status = (() => { if (!this.enabled) return { status: "disabled" as const } @@ -33,14 +44,12 @@ class VoiceServiceImpl { } async initialize(): Promise { - const cfg = await Config.get() - const file = Bun.file(path.join(Global.Path.state, "voice.json")) - const local = await file.json().catch(() => ({})) + const cfg = await Config.getGlobal() - this.log.info("voice initialization", { local, config: cfg.voice }) + this.log.info("voice initialization", { config: cfg.voice }) - this.enabled = local.enabled ?? cfg.voice?.enabled ?? false - this.currentModel = local.model ?? cfg.voice?.model ?? "base" + this.enabled = cfg.voice?.enabled ?? false + this.currentModel = cfg.voice?.model ?? "base" this.log.info("voice enabled state", { enabled: this.enabled, model: this.currentModel }) @@ -62,13 +71,14 @@ class VoiceServiceImpl { } this.enabled = true + await this.saveToDisk() this.publishStatus() if (this.engine) { return this.engine.isReady() } - this.log.info("enabling voice engine", { model: this.currentModel }) + this.log.debug("enabling voice engine", { model: this.currentModel }) this.engine = new WhisperEngine(this.currentModel, "auto") this.publishStatus() @@ -80,12 +90,13 @@ class VoiceServiceImpl { return false } - this.log.info("voice service enabled successfully") + this.log.debug("voice service enabled successfully") return true } async disable(): Promise { this.enabled = false + await this.saveToDisk() if (this.engine) { await this.engine.stop() this.engine = null @@ -101,6 +112,7 @@ class VoiceServiceImpl { this.log.info("switching voice model", { from: this.currentModel, to: model }) this.currentModel = model + await this.saveToDisk() if (this.engine) { await this.engine.stop() diff --git a/packages/opencode/src/voice/whisper-engine.ts b/packages/opencode/src/voice/whisper-engine.ts index adea0a1d6be7..38de0cf99888 100644 --- a/packages/opencode/src/voice/whisper-engine.ts +++ b/packages/opencode/src/voice/whisper-engine.ts @@ -7,9 +7,63 @@ import os from "os" import { WaveFile } from "wavefile" import { exec } from "child_process" import { promisify } from "util" +import { openSync, closeSync } from "fs" +import { dlopen, FFIType, suffix } from "bun:ffi" const execAsync = promisify(exec) +// Suppress ONNX runtime warnings globally +process.env.ORT_LOGGING_LEVEL = "4" +process.env.ONNX_LOGGING_LEVEL = "4" + +// HACK: Suppress ONNX Runtime warnings that bypass JavaScript stderr +// +// ONNX Runtime emits warnings directly to file descriptor 2 (stderr) from C++ code +// during model loading, specifically "CleanUnusedInitializersAndNodeArgs" warnings. +// These warnings: +// - Don't respect ORT_LOGGING_LEVEL environment variable +// - Can't be suppressed via process.stderr.write override +// - Are not actionable for end users (they're about internal graph optimization) +// - Clutter the terminal output when enabling voice mode +// +// See: https://github.com/microsoft/onnxruntime/issues/19141 +// +// This workaround uses FFI to call dup2() syscall to temporarily redirect stderr +// to /dev/null at the OS level during model initialization, then restores it. +// This is the only reliable way to suppress these warnings without patching ONNX Runtime. +// +// TODO: Remove this hack if/when ONNX Runtime properly respects logging levels + +const libc = dlopen("/lib/x86_64-linux-gnu/libc.so.6", { + dup: { + args: [FFIType.i32], + returns: FFIType.i32, + }, + dup2: { + args: [FFIType.i32, FFIType.i32], + returns: FFIType.i32, + }, +}) + +function redirectStderr() { + try { + const devNull = openSync("/dev/null", "w") + const stderrBackup = libc.symbols.dup(2) + + libc.symbols.dup2(devNull, 2) + closeSync(devNull) + + return () => { + libc.symbols.dup2(stderrBackup, 2) + try { + closeSync(stderrBackup) + } catch {} + } + } catch (error) { + return () => {} + } +} + export type WhisperModelSize = "tiny" | "base" | "small" export type WhisperEngineStatus = "idle" | "downloading" | "loading" | "ready" | "error" @@ -30,7 +84,7 @@ export class WhisperEngine { if (this.status === "downloading" || this.status === "loading") return false this.status = "downloading" - this.log.info("initializing whisper engine", { modelSize: this.modelSize, device: this.device }) + this.log.debug("initializing whisper engine", { modelSize: this.modelSize, device: this.device }) const modelId = `Xenova/whisper-${this.modelSize}.en` const cacheDir = path.join(Global.Path.cache, "voice-models") @@ -38,16 +92,8 @@ export class WhisperEngine { try { this.status = "loading" - process.env.ORT_LOGGING_LEVEL = "4" - - const originalStderrWrite = process.stderr.write.bind(process.stderr) - let stderrBuffer = "" - - process.stderr.write = ((chunk: any): boolean => { - const str = chunk.toString() - stderrBuffer += str - return true - }) as any + // Redirect stderr to suppress ONNX warnings during model loading + const restoreStderr = redirectStderr() try { this.transcriber = await pipeline("automatic-speech-recognition", modelId, { @@ -65,11 +111,11 @@ export class WhisperEngine { }, } as any) } finally { - process.stderr.write = originalStderrWrite + restoreStderr() } this.status = "ready" - this.log.info("whisper engine ready", { modelSize: this.modelSize }) + this.log.debug("whisper engine ready", { modelSize: this.modelSize }) return true } catch (error) { this.status = "error" From 27805fda4397002cdd5bdbdfe9f441cc8b82506d Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 17:35:20 +0100 Subject: [PATCH 04/17] Updated keybinding references. --- packages/opencode/src/cli/cmd/tui/app.tsx | 2 +- .../src/cli/cmd/tui/component/prompt/index.tsx | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/packages/opencode/src/cli/cmd/tui/app.tsx b/packages/opencode/src/cli/cmd/tui/app.tsx index 2b45103080d1..fada76d8daca 100644 --- a/packages/opencode/src/cli/cmd/tui/app.tsx +++ b/packages/opencode/src/cli/cmd/tui/app.tsx @@ -395,7 +395,7 @@ function App() { }, }, { - title: "Voice Settings", + title: "Voice settings", value: "voice.settings", category: "Agent", slash: { diff --git a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx index b8fe0a69686a..a32ac9d06aeb 100644 --- a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx @@ -805,9 +805,9 @@ export function Prompt(props: PromptProps) { return } - // Handle voice input - toggle recording on backslash - if (e.name === "\\") { - // Prevent default to avoid inserting backslash character + // Handle voice input - toggle recording + if (keybind.match("voice_input", e)) { + // Prevent default to avoid inserting the keybind character e.preventDefault() // Only allow voice input if service is available @@ -1157,7 +1157,9 @@ export function Prompt(props: PromptProps) { - Recording... (\ to stop) + + Recording... ({keybind.print("voice_input")} to stop) + @@ -1167,7 +1169,7 @@ export function Prompt(props: PromptProps) { - \ voice + {keybind.print("voice_input")} voice 0}> From 989980e52e9a8f5b9024993373c99da1a06ce7eb Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 17:37:06 +0100 Subject: [PATCH 05/17] Removed double information. --- packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx b/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx index befd6ee24789..8494568bd8de 100644 --- a/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/dialog-voice.tsx @@ -66,7 +66,7 @@ export function DialogVoice() { result.push({ value: `model:${model.name}`, title: `${model.name} (${model.size})`, - description: model.description + (isCurrent ? " (current)" : ""), + description: model.description, footer: loadingModel === model.name ? ⋯ Loading : isCurrent ? ✓ Active : undefined, category: "Models", }) From 1ac3804a3e8c70e2f92c42f92eb1f5d11c2b3612 Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 17:51:20 +0100 Subject: [PATCH 06/17] Changed code to fit repo guidelines better. --- .../cli/cmd/tui/component/prompt/index.tsx | 23 +++++++++++-------- packages/opencode/src/voice/service.ts | 11 +++------ packages/opencode/src/voice/whisper-engine.ts | 21 +++++++++-------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx index a32ac9d06aeb..cb564a1125d4 100644 --- a/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/prompt/index.tsx @@ -827,7 +827,10 @@ export function Prompt(props: PromptProps) { duration: 3000, }) } - } else if (voiceStatus() === "recording" && voiceRecorder) { + return + } + + if (voiceStatus() === "recording" && voiceRecorder) { // Stop recording and transcribe setVoiceStatus("transcribing") @@ -835,20 +838,21 @@ export function Prompt(props: PromptProps) { const text = await voiceRecorder.stopRecordingAndTranscribe() setVoiceStatus(voiceRecorder.status) - if (text) { - // Insert transcribed text at cursor position - input.insertText(text) - setTimeout(() => { - input.getLayoutNode().markDirty() - renderer.requestRender() - }, 0) - } else { + if (!text) { toast.show({ variant: "warning", message: "No speech detected", duration: 3000, }) + return } + + // Insert transcribed text at cursor position + input.insertText(text) + setTimeout(() => { + input.getLayoutNode().markDirty() + renderer.requestRender() + }, 0) } catch (err) { setVoiceStatus("error") toast.show({ @@ -859,6 +863,7 @@ export function Prompt(props: PromptProps) { // Reset status after error setTimeout(() => setVoiceStatus("idle"), 100) } + return } return diff --git a/packages/opencode/src/voice/service.ts b/packages/opencode/src/voice/service.ts index 2a0b4d2c595e..94086a6b8cbb 100644 --- a/packages/opencode/src/voice/service.ts +++ b/packages/opencode/src/voice/service.ts @@ -46,22 +46,17 @@ class VoiceServiceImpl { async initialize(): Promise { const cfg = await Config.getGlobal() - this.log.info("voice initialization", { config: cfg.voice }) - this.enabled = cfg.voice?.enabled ?? false this.currentModel = cfg.voice?.model ?? "base" - this.log.info("voice enabled state", { enabled: this.enabled, model: this.currentModel }) + this.log.debug("voice service initialized", { enabled: this.enabled, model: this.currentModel }) this.publishStatus() if (!this.enabled) { - this.log.info("voice service disabled") return } - this.log.info("voice service initialized", { model: this.currentModel, enabled: this.enabled }) - await this.enable(this.currentModel) } @@ -102,7 +97,7 @@ class VoiceServiceImpl { this.engine = null } this.publishStatus() - this.log.info("voice service disabled") + this.log.debug("voice service disabled") } async switchModel(model: WhisperModelSize): Promise { @@ -110,7 +105,7 @@ class VoiceServiceImpl { return true } - this.log.info("switching voice model", { from: this.currentModel, to: model }) + this.log.debug("switching voice model", { from: this.currentModel, to: model }) this.currentModel = model await this.saveToDisk() diff --git a/packages/opencode/src/voice/whisper-engine.ts b/packages/opencode/src/voice/whisper-engine.ts index 38de0cf99888..1cb00d56b5a4 100644 --- a/packages/opencode/src/voice/whisper-engine.ts +++ b/packages/opencode/src/voice/whisper-engine.ts @@ -148,16 +148,19 @@ export class WhisperEngine { wav.toBitDepth("32f") wav.toSampleRate(16000) - let audioData = wav.getSamples() - if (Array.isArray(audioData)) { - if (audioData.length > 1) { - const SCALING_FACTOR = Math.sqrt(2) - for (let i = 0; i < audioData[0].length; ++i) { - audioData[0][i] = (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2 - } + const rawAudioData = wav.getSamples() + const audioData = (() => { + if (!Array.isArray(rawAudioData)) return rawAudioData + + if (rawAudioData.length === 1) return rawAudioData[0] + + // Mix stereo to mono + const SCALING_FACTOR = Math.sqrt(2) + for (let i = 0; i < rawAudioData[0].length; ++i) { + rawAudioData[0][i] = (SCALING_FACTOR * (rawAudioData[0][i] + rawAudioData[1][i])) / 2 } - audioData = audioData[0] - } + return rawAudioData[0] + })() const result = await this.transcriber(audioData, { return_timestamps: timestamps, From cc330c8332328dd6bf3dbcb7917d71595da2bb90 Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 18:32:33 +0100 Subject: [PATCH 07/17] Updated voice model cache directory. --- packages/opencode/src/voice/service.ts | 2 +- packages/opencode/src/voice/whisper-engine.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/opencode/src/voice/service.ts b/packages/opencode/src/voice/service.ts index 94086a6b8cbb..0cf268b80dab 100644 --- a/packages/opencode/src/voice/service.ts +++ b/packages/opencode/src/voice/service.ts @@ -179,7 +179,7 @@ class VoiceServiceImpl { } async getDownloadedModels(): Promise { - const cacheDir = path.join(Global.Path.cache, "voice-models") + const cacheDir = path.join(Global.Path.cache, "models") const downloaded: WhisperModelSize[] = [] const models: WhisperModelSize[] = ["tiny", "base", "small"] diff --git a/packages/opencode/src/voice/whisper-engine.ts b/packages/opencode/src/voice/whisper-engine.ts index 1cb00d56b5a4..4542ee2fb9f0 100644 --- a/packages/opencode/src/voice/whisper-engine.ts +++ b/packages/opencode/src/voice/whisper-engine.ts @@ -87,7 +87,7 @@ export class WhisperEngine { this.log.debug("initializing whisper engine", { modelSize: this.modelSize, device: this.device }) const modelId = `Xenova/whisper-${this.modelSize}.en` - const cacheDir = path.join(Global.Path.cache, "voice-models") + const cacheDir = path.join(Global.Path.cache, "models") try { this.status = "loading" From 95746fac18ac9029c7db4fde4ce4acf6cb946224 Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 18:44:19 +0100 Subject: [PATCH 08/17] fix: move voice dependencies to correct package location --- bun.lock | 14 +++++++++----- package.json | 4 +--- packages/opencode/package.json | 2 ++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/bun.lock b/bun.lock index 6b7cad274266..911ddd36515d 100644 --- a/bun.lock +++ b/bun.lock @@ -9,9 +9,7 @@ "@opencode-ai/plugin": "workspace:*", "@opencode-ai/script": "workspace:*", "@opencode-ai/sdk": "workspace:*", - "sharp": "0.34.5", "typescript": "catalog:", - "wavefile": "11.0.0", }, "devDependencies": { "@actions/artifact": "5.0.1", @@ -327,12 +325,14 @@ "opentui-spinner": "0.0.6", "partial-json": "0.1.7", "remeda": "catalog:", + "sharp": "0.34.5", "solid-js": "catalog:", "strip-ansi": "7.1.2", "tree-sitter-bash": "0.25.0", "turndown": "7.2.0", "ulid": "catalog:", "vscode-jsonrpc": "8.2.1", + "wavefile": "11.0.0", "web-tree-sitter": "0.25.10", "xdg-basedir": "5.1.0", "yargs": "18.0.0", @@ -2330,7 +2330,7 @@ "destroy": ["destroy@1.2.0", "", {}, "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg=="], - "detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], + "detect-libc": ["detect-libc@1.0.3", "", { "bin": { "detect-libc": "./bin/detect-libc.js" } }, "sha512-pGjwhsmsp4kL2RTz08wcOlGN83otlqHeD/Z5T8GXZB+/YcpQ/dgo+lbU8ZsGxV0HIvqqxo9l7mqYwyYMD9bKDg=="], "detect-node-es": ["detect-node-es@1.1.0", "", {}, "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ=="], @@ -4256,8 +4256,6 @@ "@oslojs/jwt/@oslojs/encoding": ["@oslojs/encoding@0.4.1", "", {}, "sha512-hkjo6MuIK/kQR5CrGNdAPZhS01ZCXuWDRJ187zh6qqF2+yMHZpD9fAYpX8q2bOO6Ryhl3XpCT6kUX76N8hhm4Q=="], - "@parcel/watcher/detect-libc": ["detect-libc@1.0.3", "", { "bin": { "detect-libc": "./bin/detect-libc.js" } }, "sha512-pGjwhsmsp4kL2RTz08wcOlGN83otlqHeD/Z5T8GXZB+/YcpQ/dgo+lbU8ZsGxV0HIvqqxo9l7mqYwyYMD9bKDg=="], - "@pierre/diffs/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="], "@pierre/diffs/@shikijs/engine-javascript": ["@shikijs/engine-javascript@3.19.0", "", { "dependencies": { "@shikijs/types": "3.19.0", "@shikijs/vscode-textmate": "^10.0.2", "oniguruma-to-es": "^4.3.4" } }, "sha512-ZfWJNm2VMhKkQIKT9qXbs76RRcT0SF/CAvEz0+RkpUDAoDaCx0uFdCGzSRiD9gSlhm6AHkjdieOBJMaO2eC1rQ=="], @@ -4306,6 +4304,8 @@ "@solidjs/start/vite": ["vite@7.1.10", "", { "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", "picomatch": "^4.0.3", "postcss": "^8.5.6", "rollup": "^4.43.0", "tinyglobby": "^0.2.15" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^20.19.0 || >=22.12.0", "jiti": ">=1.21.0", "less": "^4.0.0", "lightningcss": "^1.21.0", "sass": "^1.70.0", "sass-embedded": "^1.70.0", "stylus": ">=0.54.8", "sugarss": "^5.0.0", "terser": "^5.16.0", "tsx": "^4.8.1", "yaml": "^2.4.2" }, "optionalPeers": ["@types/node", "jiti", "less", "lightningcss", "sass", "sass-embedded", "stylus", "sugarss", "terser", "tsx", "yaml"], "bin": { "vite": "bin/vite.js" } }, "sha512-CmuvUBzVJ/e3HGxhg6cYk88NGgTnBoOo7ogtfJJ0fefUWAxN/WDSUa50o+oVBxuIhO8FoEZW0j2eW7sfjs5EtA=="], + "@tailwindcss/oxide/detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], + "@tailwindcss/oxide-wasm32-wasi/@emnapi/core": ["@emnapi/core@1.8.1", "", { "dependencies": { "@emnapi/wasi-threads": "1.1.0", "tslib": "^2.4.0" }, "bundled": true }, "sha512-AvT9QFpxK0Zd8J0jopedNm+w/2fIzvtPKPjqyw9jwvBaReTTqPBk9Hixaz7KbjimP+QNz605/XnjFcDAL2pqBg=="], "@tailwindcss/oxide-wasm32-wasi/@emnapi/runtime": ["@emnapi/runtime@1.8.1", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg=="], @@ -4412,6 +4412,8 @@ "lazystream/readable-stream": ["readable-stream@2.3.8", "", { "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", "isarray": "~1.0.0", "process-nextick-args": "~2.0.0", "safe-buffer": "~5.1.1", "string_decoder": "~1.1.1", "util-deprecate": "~1.0.1" } }, "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA=="], + "lightningcss/detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], + "md-to-react-email/marked": ["marked@7.0.4", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-t8eP0dXRJMtMvBojtkcsA7n48BkauktUKzfkPSCq85ZMTJ0v76Rke4DYz01omYpPTUh4p/f7HePgRo3ebG8+QQ=="], "mdast-util-find-and-replace/escape-string-regexp": ["escape-string-regexp@5.0.0", "", {}, "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw=="], @@ -4492,6 +4494,8 @@ "send/mime": ["mime@1.6.0", "", { "bin": { "mime": "cli.js" } }, "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="], + "sharp/detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], + "shiki/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="], "shiki/@shikijs/types": ["@shikijs/types@3.20.0", "", { "dependencies": { "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-lhYAATn10nkZcBQ0BlzSbJA3wcmL5MXUUF8d2Zzon6saZDlToKaiRX60n2+ZaHJCmXEcZRWNzn+k9vplr8Jhsw=="], diff --git a/package.json b/package.json index 9507c61a59d4..22138fabb39a 100644 --- a/package.json +++ b/package.json @@ -76,9 +76,7 @@ "@opencode-ai/plugin": "workspace:*", "@opencode-ai/script": "workspace:*", "@opencode-ai/sdk": "workspace:*", - "sharp": "0.34.5", - "typescript": "catalog:", - "wavefile": "11.0.0" + "typescript": "catalog:" }, "repository": { "type": "git", diff --git a/packages/opencode/package.json b/packages/opencode/package.json index 62f062cebe84..5bd8ba83a1e0 100644 --- a/packages/opencode/package.json +++ b/packages/opencode/package.json @@ -91,6 +91,8 @@ "@standard-schema/spec": "1.0.0", "@xenova/transformers": "2.17.2", "@zip.js/zip.js": "2.7.62", + "sharp": "0.34.5", + "wavefile": "11.0.0", "ai": "catalog:", "bonjour-service": "1.3.0", "bun-pty": "0.4.4", From 54d74caa774b6225ba863a39c1097482ed97f38c Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 18:47:08 +0100 Subject: [PATCH 09/17] chore: remove unnecessary blank line from .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index a8dcd665f2d9..78a77f819828 100644 --- a/.gitignore +++ b/.gitignore @@ -22,7 +22,6 @@ target .scripts .direnv/ - # Local dev files opencode-dev logs/ From 7356edf44410ce670e15b051190e6f582347ac3b Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 19:35:09 +0100 Subject: [PATCH 10/17] fix: make ONNX stderr suppression platform-agnostic - Only attempt FFI stderr redirection on Linux (process.platform check) - Move dlopen call inside redirectStderr function with try/catch - Gracefully skip stderr suppression on Windows/macOS - Allows voice feature to work cross-platform without E2E test failures --- packages/opencode/src/voice/whisper-engine.ts | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/packages/opencode/src/voice/whisper-engine.ts b/packages/opencode/src/voice/whisper-engine.ts index 4542ee2fb9f0..86f9927b7fb7 100644 --- a/packages/opencode/src/voice/whisper-engine.ts +++ b/packages/opencode/src/voice/whisper-engine.ts @@ -32,21 +32,27 @@ process.env.ONNX_LOGGING_LEVEL = "4" // to /dev/null at the OS level during model initialization, then restores it. // This is the only reliable way to suppress these warnings without patching ONNX Runtime. // +// Platform support: Linux only (gracefully skipped on macOS/Windows) // TODO: Remove this hack if/when ONNX Runtime properly respects logging levels -const libc = dlopen("/lib/x86_64-linux-gnu/libc.so.6", { - dup: { - args: [FFIType.i32], - returns: FFIType.i32, - }, - dup2: { - args: [FFIType.i32, FFIType.i32], - returns: FFIType.i32, - }, -}) - function redirectStderr() { + // Only attempt stderr redirection on Linux + if (process.platform !== "linux") { + return () => {} + } + try { + const libc = dlopen("/lib/x86_64-linux-gnu/libc.so.6", { + dup: { + args: [FFIType.i32], + returns: FFIType.i32, + }, + dup2: { + args: [FFIType.i32, FFIType.i32], + returns: FFIType.i32, + }, + }) + const devNull = openSync("/dev/null", "w") const stderrBackup = libc.symbols.dup(2) @@ -60,6 +66,7 @@ function redirectStderr() { } catch {} } } catch (error) { + // FFI loading failed - continue without stderr suppression return () => {} } } From dba2413cad6f2f814cb06758cedc8d6757503039 Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 20:19:42 +0100 Subject: [PATCH 11/17] switch to @huggingface/transformers and remove ONNX suppression hack Replaces @xenova/transformers with @huggingface/transformers 3.8.1 which properly respects ONNX logging configuration. Removes 63 lines of complex FFI stderr redirection code in favor of simple env.backends.onnx.logSeverityLevel configuration. --- bun.lock | 49 ++++++--- packages/opencode/package.json | 2 +- packages/opencode/src/voice/whisper-engine.ts | 102 ++++-------------- 3 files changed, 55 insertions(+), 98 deletions(-) diff --git a/bun.lock b/bun.lock index 911ddd36515d..5af4809cca87 100644 --- a/bun.lock +++ b/bun.lock @@ -289,6 +289,7 @@ "@gitlab/gitlab-ai-provider": "3.3.1", "@hono/standard-validator": "0.1.5", "@hono/zod-validator": "catalog:", + "@huggingface/transformers": "3.8.1", "@modelcontextprotocol/sdk": "1.25.2", "@octokit/graphql": "9.0.2", "@octokit/rest": "catalog:", @@ -305,7 +306,6 @@ "@solid-primitives/event-bus": "1.1.2", "@solid-primitives/scheduled": "1.5.2", "@standard-schema/spec": "1.0.0", - "@xenova/transformers": "2.17.2", "@zip.js/zip.js": "2.7.62", "ai": "catalog:", "bonjour-service": "1.3.0", @@ -504,6 +504,7 @@ "overrides": { "@types/bun": "catalog:", "@types/node": "catalog:", + "onnxruntime-web": "1.18.0", "sharp": "0.34.5", }, "catalog": { @@ -950,7 +951,9 @@ "@hono/zod-validator": ["@hono/zod-validator@0.4.2", "", { "peerDependencies": { "hono": ">=3.9.0", "zod": "^3.19.1" } }, "sha512-1rrlBg+EpDPhzOV4hT9pxr5+xDVmKuz6YJl+la7VCwK6ass5ldyKm5fD+umJdV2zhHD6jROoCCv8NbTwyfhT0g=="], - "@huggingface/jinja": ["@huggingface/jinja@0.2.2", "", {}, "sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA=="], + "@huggingface/jinja": ["@huggingface/jinja@0.5.4", "", {}, "sha512-VoQJywjpjy2D88Oj0BTHRuS8JCbUgoOg5t1UGgbtGh2fRia9Dx/k6Wf8FqrEWIvWK9fAkfJeeLB9fcSpCNPCpw=="], + + "@huggingface/transformers": ["@huggingface/transformers@3.8.1", "", { "dependencies": { "@huggingface/jinja": "^0.5.3", "onnxruntime-node": "1.21.0", "onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4", "sharp": "^0.34.1" } }, "sha512-tsTk4zVjImqdqjS8/AOZg2yNLd1z9S5v+7oUPpXaasDRwEDhB+xnglK1k5cad26lL5/ZIaeREgWWy0bs9y9pPA=="], "@ibm/plex": ["@ibm/plex@6.4.1", "", { "dependencies": { "@ibm/telemetry-js": "^1.5.1" } }, "sha512-fnsipQywHt3zWvsnlyYKMikcVI7E2fEwpiPnIHFqlbByXVfQfANAAeJk1IV4mNnxhppUIDlhU0TzwYwL++Rn2g=="], @@ -1870,8 +1873,6 @@ "@types/katex": ["@types/katex@0.16.7", "", {}, "sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ=="], - "@types/long": ["@types/long@4.0.2", "", {}, "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA=="], - "@types/luxon": ["@types/luxon@3.7.1", "", {}, "sha512-H3iskjFIAn5SlJU7OuxUmTEpebK6TKB8rxZShDslBMZJ5u9S//KM1sbdAisiSrqwLQncVjnpi2OK2J51h+4lsg=="], "@types/mdast": ["@types/mdast@4.0.4", "", { "dependencies": { "@types/unist": "*" } }, "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA=="], @@ -1970,8 +1971,6 @@ "@webgpu/types": ["@webgpu/types@0.1.54", "", {}, "sha512-81oaalC8LFrXjhsczomEQ0u3jG+TqE6V9QHLA8GNZq/Rnot0KDugu3LhSYSlie8tSdooAN1Hov05asrUUp9qgg=="], - "@xenova/transformers": ["@xenova/transformers@2.17.2", "", { "dependencies": { "@huggingface/jinja": "^0.2.2", "onnxruntime-web": "1.14.0", "sharp": "^0.32.0" }, "optionalDependencies": { "onnxruntime-node": "1.14.0" } }, "sha512-lZmHqzrVIkSvZdKZEx7IYY51TK0WDrC8eR0c5IMnBsO8di8are1zzw8BlLhyO2TklZKLN5UffNGs1IJwT6oOqQ=="], - "@zip.js/zip.js": ["@zip.js/zip.js@2.7.62", "", {}, "sha512-OaLvZ8j4gCkLn048ypkZu29KX30r8/OfFF2w4Jo5WXFr+J04J+lzJ5TKZBVgFXhlvSkqNFQdfnY1Q8TMTCyBVA=="], "abbrev": ["abbrev@2.0.0", "", {}, "sha512-6/mh1E2u2YgEsCHdY0Yx5oW+61gZU+1vXaoiHHrpKeuRNNgFvS+/jrwHiQhB5apAf5oB7UB7E19ol2R2LKH8hQ=="], @@ -2114,6 +2113,8 @@ "boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="], + "boolean": ["boolean@3.2.0", "", {}, "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw=="], + "bottleneck": ["bottleneck@2.19.5", "", {}, "sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw=="], "bowser": ["bowser@2.13.1", "", {}, "sha512-OHawaAbjwx6rqICCKgSG0SAnT05bzd7ppyKLVUITZpANBaaMFBAsaNkto3LoQ31tyFP5kNujE8Cdx85G9VzOkw=="], @@ -2332,6 +2333,8 @@ "detect-libc": ["detect-libc@1.0.3", "", { "bin": { "detect-libc": "./bin/detect-libc.js" } }, "sha512-pGjwhsmsp4kL2RTz08wcOlGN83otlqHeD/Z5T8GXZB+/YcpQ/dgo+lbU8ZsGxV0HIvqqxo9l7mqYwyYMD9bKDg=="], + "detect-node": ["detect-node@2.1.0", "", {}, "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g=="], + "detect-node-es": ["detect-node-es@1.1.0", "", {}, "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ=="], "deterministic-object-hash": ["deterministic-object-hash@2.0.2", "", { "dependencies": { "base-64": "^1.0.0" } }, "sha512-KxektNH63SrbfUyDiwXqRb1rLwKt33AmMv+5Nhsw1kqZ13SJBRTgZHtGbE+hH3a1mVW1cz+4pqSWVPAtLVXTzQ=="], @@ -2426,6 +2429,8 @@ "es-to-primitive": ["es-to-primitive@1.3.0", "", { "dependencies": { "is-callable": "^1.2.7", "is-date-object": "^1.0.5", "is-symbol": "^1.0.4" } }, "sha512-w+5mJ3GuFL+NjVtJlvydShqE1eN3h3PbI7/5LAsYJP/2qtuMXjfL2LpHSRqo4b4eSF5K/DH1JXKUAHSB2UW50g=="], + "es6-error": ["es6-error@4.1.1", "", {}, "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg=="], + "esast-util-from-estree": ["esast-util-from-estree@2.0.0", "", { "dependencies": { "@types/estree-jsx": "^1.0.0", "devlop": "^1.0.0", "estree-util-visit": "^2.0.0", "unist-util-position-from-estree": "^2.0.0" } }, "sha512-4CyanoAudUSBAn5K13H4JhsMH6L9ZP7XbLVe/dKybkxMO7eDyLsT8UHl9TRNrU2Gr9nz+FovfSIjuXWJ81uVwQ=="], "esast-util-from-js": ["esast-util-from-js@2.0.1", "", { "dependencies": { "@types/estree-jsx": "^1.0.0", "acorn": "^8.0.0", "esast-util-from-estree": "^2.0.0", "vfile-message": "^4.0.0" } }, "sha512-8Ja+rNJ0Lt56Pcf3TAmpBZjmx8ZcK5Ts4cAzIOjsjevg9oSXJnl6SUQ2EevU8tv3h6ZLWmoKL5H4fgWvdvfETw=="], @@ -2610,6 +2615,8 @@ "glob-to-regexp": ["glob-to-regexp@0.4.1", "", {}, "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw=="], + "global-agent": ["global-agent@3.0.0", "", { "dependencies": { "boolean": "^3.0.1", "es6-error": "^4.1.1", "matcher": "^3.0.0", "roarr": "^2.15.3", "semver": "^7.3.2", "serialize-error": "^7.0.1" } }, "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q=="], + "globalthis": ["globalthis@1.0.4", "", { "dependencies": { "define-properties": "^1.2.1", "gopd": "^1.0.1" } }, "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ=="], "globby": ["globby@11.0.4", "", { "dependencies": { "array-union": "^2.1.0", "dir-glob": "^3.0.1", "fast-glob": "^3.1.1", "ignore": "^5.1.4", "merge2": "^1.3.0", "slash": "^3.0.0" } }, "sha512-9O4MVG9ioZJ08ffbcyVYyLOJLk5JQ688pJ4eMGLpdWLHq/Wr1D9BlriLQyL0E+jbkuePVZXYFj47QM/v093wHg=="], @@ -2892,6 +2899,8 @@ "json-schema-typed": ["json-schema-typed@8.0.2", "", {}, "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA=="], + "json-stringify-safe": ["json-stringify-safe@5.0.1", "", {}, "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="], + "json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="], "jsonc-parser": ["jsonc-parser@3.3.1", "", {}, "sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ=="], @@ -2998,6 +3007,8 @@ "marked-shiki": ["marked-shiki@1.2.1", "", { "peerDependencies": { "marked": ">=7.0.0", "shiki": ">=1.0.0" } }, "sha512-yHxYQhPY5oYaIRnROn98foKhuClark7M373/VpLxiy5TrDu9Jd/LsMwo8w+U91Up4oDb9IXFrP0N1MFRz8W/DQ=="], + "matcher": ["matcher@3.0.0", "", { "dependencies": { "escape-string-regexp": "^4.0.0" } }, "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng=="], + "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="], "md-to-react-email": ["md-to-react-email@5.0.0", "", { "dependencies": { "marked": "7.0.4" }, "peerDependencies": { "react": "18.x" } }, "sha512-GdBrBUbAAJHypnuyofYGfVos8oUslxHx69hs3CW9P0L8mS1sT6GnJuMBTlz/Fw+2widiwdavcu9UwyLF/BzZ4w=="], @@ -3234,13 +3245,11 @@ "oniguruma-to-es": ["oniguruma-to-es@4.3.4", "", { "dependencies": { "oniguruma-parser": "^0.12.1", "regex": "^6.0.1", "regex-recursion": "^6.0.2" } }, "sha512-3VhUGN3w2eYxnTzHn+ikMI+fp/96KoRSVK9/kMTcFqj1NRDh2IhQCKvYxDnWePKRXY/AqH+Fuiyb7VHSzBjHfA=="], - "onnx-proto": ["onnx-proto@4.0.4", "", { "dependencies": { "protobufjs": "^6.8.8" } }, "sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA=="], + "onnxruntime-common": ["onnxruntime-common@1.21.0", "", {}, "sha512-Q632iLLrtCAVOTO65dh2+mNbQir/QNTVBG3h/QdZBpns7mZ0RYbLRBgGABPbpU9351AgYy7SJf1WaeVwMrBFPQ=="], - "onnxruntime-common": ["onnxruntime-common@1.14.0", "", {}, "sha512-3LJpegM2iMNRX2wUmtYfeX/ytfOzNwAWKSq1HbRrKc9+uqG/FsEA0bbKZl1btQeZaXhC26l44NWpNUeXPII7Ew=="], + "onnxruntime-node": ["onnxruntime-node@1.21.0", "", { "dependencies": { "global-agent": "^3.0.0", "onnxruntime-common": "1.21.0", "tar": "^7.0.1" }, "os": [ "linux", "win32", "darwin", ] }, "sha512-NeaCX6WW2L8cRCSqy3bInlo5ojjQqu2fD3D+9W5qb5irwxhEyWKXeH2vZ8W9r6VxaMPUan+4/7NDwZMtouZxEw=="], - "onnxruntime-node": ["onnxruntime-node@1.14.0", "", { "dependencies": { "onnxruntime-common": "~1.14.0" }, "os": [ "linux", "win32", "darwin", ] }, "sha512-5ba7TWomIV/9b6NH/1x/8QEeowsb+jBEvFzU6z0T4mNsFwdPqXeFUM7uxC6QeSRkEbWu3qEB0VMjrvzN/0S9+w=="], - - "onnxruntime-web": ["onnxruntime-web@1.14.0", "", { "dependencies": { "flatbuffers": "^1.12.0", "guid-typescript": "^1.0.9", "long": "^4.0.0", "onnx-proto": "^4.0.4", "onnxruntime-common": "~1.14.0", "platform": "^1.3.6" } }, "sha512-Kcqf43UMfW8mCydVGcX9OMXI2VN17c0p6XvR7IPSZzBf/6lteBzXHvcEVWDPmCKuGombl997HgLqj91F11DzXw=="], + "onnxruntime-web": ["onnxruntime-web@1.18.0", "", { "dependencies": { "flatbuffers": "^1.12.0", "guid-typescript": "^1.0.9", "long": "^5.2.3", "onnxruntime-common": "1.18.0", "platform": "^1.3.6", "protobufjs": "^7.2.4" } }, "sha512-o1UKj4ABIj1gmG7ae0RKJ3/GT+3yoF0RRpfDfeoe0huzRW4FDRLfbkDETmdFAvnJEXuYDE0YT+hhkia0352StQ=="], "open": ["open@10.1.2", "", { "dependencies": { "default-browser": "^5.2.1", "define-lazy-prop": "^3.0.0", "is-inside-container": "^1.0.0", "is-wsl": "^3.1.0" } }, "sha512-cxN6aIDPz6rm8hbebcP7vrQNhvRcveZoJU72Y7vskh4oIm+BZwBECnx5nTmrlres1Qapvx27Qo1Auukpf8PKXw=="], @@ -3400,7 +3409,7 @@ "proto-list": ["proto-list@1.2.4", "", {}, "sha512-vtK/94akxsTMhe0/cbfpR+syPuszcuwhqVjJq26CuNDgFGj682oRBXOP5MJpv2r7JtE8MsiepGIqvvOTBwn2vA=="], - "protobufjs": ["protobufjs@6.11.4", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.4", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.0", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", "@types/long": "^4.0.1", "@types/node": ">=13.7.0", "long": "^4.0.0" }, "bin": { "pbjs": "bin/pbjs", "pbts": "bin/pbts" } }, "sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw=="], + "protobufjs": ["protobufjs@7.5.4", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.4", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.0", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", "@types/node": ">=13.7.0", "long": "^5.0.0" } }, "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg=="], "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="], @@ -3530,6 +3539,8 @@ "rimraf": ["rimraf@5.0.10", "", { "dependencies": { "glob": "^10.3.7" }, "bin": { "rimraf": "dist/esm/bin.mjs" } }, "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ=="], + "roarr": ["roarr@2.15.4", "", { "dependencies": { "boolean": "^3.0.1", "detect-node": "^2.0.4", "globalthis": "^1.0.1", "json-stringify-safe": "^5.0.1", "semver-compare": "^1.0.0", "sprintf-js": "^1.1.2" } }, "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A=="], + "rollup": ["rollup@4.57.0", "", { "dependencies": { "@types/estree": "1.0.8" }, "optionalDependencies": { "@rollup/rollup-android-arm-eabi": "4.57.0", "@rollup/rollup-android-arm64": "4.57.0", "@rollup/rollup-darwin-arm64": "4.57.0", "@rollup/rollup-darwin-x64": "4.57.0", "@rollup/rollup-freebsd-arm64": "4.57.0", "@rollup/rollup-freebsd-x64": "4.57.0", "@rollup/rollup-linux-arm-gnueabihf": "4.57.0", "@rollup/rollup-linux-arm-musleabihf": "4.57.0", "@rollup/rollup-linux-arm64-gnu": "4.57.0", "@rollup/rollup-linux-arm64-musl": "4.57.0", "@rollup/rollup-linux-loong64-gnu": "4.57.0", "@rollup/rollup-linux-loong64-musl": "4.57.0", "@rollup/rollup-linux-ppc64-gnu": "4.57.0", "@rollup/rollup-linux-ppc64-musl": "4.57.0", "@rollup/rollup-linux-riscv64-gnu": "4.57.0", "@rollup/rollup-linux-riscv64-musl": "4.57.0", "@rollup/rollup-linux-s390x-gnu": "4.57.0", "@rollup/rollup-linux-x64-gnu": "4.57.0", "@rollup/rollup-linux-x64-musl": "4.57.0", "@rollup/rollup-openbsd-x64": "4.57.0", "@rollup/rollup-openharmony-arm64": "4.57.0", "@rollup/rollup-win32-arm64-msvc": "4.57.0", "@rollup/rollup-win32-ia32-msvc": "4.57.0", "@rollup/rollup-win32-x64-gnu": "4.57.0", "@rollup/rollup-win32-x64-msvc": "4.57.0", "fsevents": "~2.3.2" }, "bin": { "rollup": "dist/bin/rollup" } }, "sha512-e5lPJi/aui4TO1LpAXIRLySmwXSE8k3b9zoGfd42p67wzxog4WHjiZF3M2uheQih4DGyc25QEV4yRBbpueNiUA=="], "rou3": ["rou3@0.7.12", "", {}, "sha512-iFE4hLDuloSWcD7mjdCDhx2bKcIsYbtOTpfH5MHHLSKMOUyjqQXTeZVa289uuwEGEKFoE/BAPbhaU4B774nceg=="], @@ -3562,10 +3573,14 @@ "semver": ["semver@7.7.3", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q=="], + "semver-compare": ["semver-compare@1.0.0", "", {}, "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow=="], + "send": ["send@0.19.2", "", { "dependencies": { "debug": "2.6.9", "depd": "2.0.0", "destroy": "1.2.0", "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", "fresh": "~0.5.2", "http-errors": "~2.0.1", "mime": "1.6.0", "ms": "2.1.3", "on-finished": "~2.4.1", "range-parser": "~1.2.1", "statuses": "~2.0.2" } }, "sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg=="], "seq-queue": ["seq-queue@0.0.5", "", {}, "sha512-hr3Wtp/GZIc/6DAGPDcV4/9WoZhjrkXsi5B/07QgX8tsdc6ilr7BFM6PM6rbdAX1kFSDYeZGLipIZZKyQP0O5Q=="], + "serialize-error": ["serialize-error@7.0.1", "", { "dependencies": { "type-fest": "^0.13.1" } }, "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw=="], + "seroval": ["seroval@1.3.2", "", {}, "sha512-RbcPH1n5cfwKrru7v7+zrZvjLurgHhGyso3HTyGtRivGWgYjbOmGuivCQaORNELjNONoK35nj28EoWul9sb1zQ=="], "seroval-plugins": ["seroval-plugins@1.3.3", "", { "peerDependencies": { "seroval": "^1.0" } }, "sha512-16OL3NnUBw8JG1jBLUoZJsLnQq0n5Ua6aHalhJK4fMQkz1lqR7Osz1sA30trBtd9VUDc2NgkuRCn8+/pBwqZ+w=="], @@ -4414,6 +4429,8 @@ "lightningcss/detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], + "matcher/escape-string-regexp": ["escape-string-regexp@4.0.0", "", {}, "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA=="], + "md-to-react-email/marked": ["marked@7.0.4", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-t8eP0dXRJMtMvBojtkcsA7n48BkauktUKzfkPSCq85ZMTJ0v76Rke4DYz01omYpPTUh4p/f7HePgRo3ebG8+QQ=="], "mdast-util-find-and-replace/escape-string-regexp": ["escape-string-regexp@5.0.0", "", {}, "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw=="], @@ -4434,7 +4451,7 @@ "nypm/tinyexec": ["tinyexec@1.0.2", "", {}, "sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg=="], - "onnxruntime-web/long": ["long@4.0.0", "", {}, "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA=="], + "onnxruntime-web/onnxruntime-common": ["onnxruntime-common@1.18.0", "", {}, "sha512-lufrSzX6QdKrktAELG5x5VkBpapbCeS3dQwrXbN0eD9rHvU0yAWl7Ztju9FvgAKWvwd/teEKJNj3OwM6eTZh3Q=="], "opencode/@ai-sdk/anthropic": ["@ai-sdk/anthropic@2.0.57", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@ai-sdk/provider-utils": "3.0.20" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-DREpYqW2pylgaj69gZ+K8u92bo9DaMgFdictYnY+IwYeY3bawQ4zI7l/o1VkDsBDljAx8iYz5lPURwVZNu+Xpg=="], @@ -4474,8 +4491,6 @@ "prompts/kleur": ["kleur@3.0.3", "", {}, "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w=="], - "protobufjs/long": ["long@4.0.0", "", {}, "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA=="], - "raw-body/iconv-lite": ["iconv-lite@0.4.24", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3" } }, "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA=="], "readable-stream/buffer": ["buffer@6.0.3", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA=="], @@ -4484,6 +4499,8 @@ "rimraf/glob": ["glob@10.5.0", "", { "dependencies": { "foreground-child": "^3.1.0", "jackspeak": "^3.1.2", "minimatch": "^9.0.4", "minipass": "^7.1.2", "package-json-from-dist": "^1.0.0", "path-scurry": "^1.11.1" }, "bin": { "glob": "dist/esm/bin.mjs" } }, "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg=="], + "roarr/sprintf-js": ["sprintf-js@1.1.3", "", {}, "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="], + "router/path-to-regexp": ["path-to-regexp@8.3.0", "", {}, "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA=="], "safe-array-concat/isarray": ["isarray@2.0.5", "", {}, "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw=="], @@ -4494,6 +4511,8 @@ "send/mime": ["mime@1.6.0", "", { "bin": { "mime": "cli.js" } }, "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="], + "serialize-error/type-fest": ["type-fest@0.13.1", "", {}, "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg=="], + "sharp/detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], "shiki/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="], diff --git a/packages/opencode/package.json b/packages/opencode/package.json index 5bd8ba83a1e0..d32ad2cbd434 100644 --- a/packages/opencode/package.json +++ b/packages/opencode/package.json @@ -89,7 +89,7 @@ "@solid-primitives/event-bus": "1.1.2", "@solid-primitives/scheduled": "1.5.2", "@standard-schema/spec": "1.0.0", - "@xenova/transformers": "2.17.2", + "@huggingface/transformers": "3.8.1", "@zip.js/zip.js": "2.7.62", "sharp": "0.34.5", "wavefile": "11.0.0", diff --git a/packages/opencode/src/voice/whisper-engine.ts b/packages/opencode/src/voice/whisper-engine.ts index 86f9927b7fb7..d67026b05dd5 100644 --- a/packages/opencode/src/voice/whisper-engine.ts +++ b/packages/opencode/src/voice/whisper-engine.ts @@ -1,4 +1,4 @@ -import { pipeline, type PipelineType } from "@xenova/transformers" +import { pipeline, env } from "@huggingface/transformers" import { Log } from "@/util/log" import { Global } from "@/global" import path from "path" @@ -7,69 +7,10 @@ import os from "os" import { WaveFile } from "wavefile" import { exec } from "child_process" import { promisify } from "util" -import { openSync, closeSync } from "fs" -import { dlopen, FFIType, suffix } from "bun:ffi" -const execAsync = promisify(exec) - -// Suppress ONNX runtime warnings globally -process.env.ORT_LOGGING_LEVEL = "4" -process.env.ONNX_LOGGING_LEVEL = "4" - -// HACK: Suppress ONNX Runtime warnings that bypass JavaScript stderr -// -// ONNX Runtime emits warnings directly to file descriptor 2 (stderr) from C++ code -// during model loading, specifically "CleanUnusedInitializersAndNodeArgs" warnings. -// These warnings: -// - Don't respect ORT_LOGGING_LEVEL environment variable -// - Can't be suppressed via process.stderr.write override -// - Are not actionable for end users (they're about internal graph optimization) -// - Clutter the terminal output when enabling voice mode -// -// See: https://github.com/microsoft/onnxruntime/issues/19141 -// -// This workaround uses FFI to call dup2() syscall to temporarily redirect stderr -// to /dev/null at the OS level during model initialization, then restores it. -// This is the only reliable way to suppress these warnings without patching ONNX Runtime. -// -// Platform support: Linux only (gracefully skipped on macOS/Windows) -// TODO: Remove this hack if/when ONNX Runtime properly respects logging levels - -function redirectStderr() { - // Only attempt stderr redirection on Linux - if (process.platform !== "linux") { - return () => {} - } +env.backends.onnx.logSeverityLevel = 4 - try { - const libc = dlopen("/lib/x86_64-linux-gnu/libc.so.6", { - dup: { - args: [FFIType.i32], - returns: FFIType.i32, - }, - dup2: { - args: [FFIType.i32, FFIType.i32], - returns: FFIType.i32, - }, - }) - - const devNull = openSync("/dev/null", "w") - const stderrBackup = libc.symbols.dup(2) - - libc.symbols.dup2(devNull, 2) - closeSync(devNull) - - return () => { - libc.symbols.dup2(stderrBackup, 2) - try { - closeSync(stderrBackup) - } catch {} - } - } catch (error) { - // FFI loading failed - continue without stderr suppression - return () => {} - } -} +const execAsync = promisify(exec) export type WhisperModelSize = "tiny" | "base" | "small" @@ -99,27 +40,24 @@ export class WhisperEngine { try { this.status = "loading" - // Redirect stderr to suppress ONNX warnings during model loading - const restoreStderr = redirectStderr() - - try { - this.transcriber = await pipeline("automatic-speech-recognition", modelId, { - quantized: true, - device: this.device === "auto" ? undefined : this.device, - cache_dir: cacheDir, - progress_callback: (progress: any) => { - if (progress.status === "downloading") { - const percent = progress.progress ? Math.round(progress.progress) : 0 - if (percent !== this.downloadProgress) { - this.downloadProgress = percent - this.log.debug("model download progress", { percent }) - } + this.transcriber = await pipeline("automatic-speech-recognition", modelId, { + session_options: { + log_severity_level: 4, + }, + dtype: "fp32", + quantized: true, + device: this.device === "auto" ? undefined : this.device, + cache_dir: cacheDir, + progress_callback: (progress: any) => { + if (progress.status === "downloading") { + const percent = progress.progress ? Math.round(progress.progress) : 0 + if (percent !== this.downloadProgress) { + this.downloadProgress = percent + this.log.debug("model download progress", { percent }) } - }, - } as any) - } finally { - restoreStderr() - } + } + }, + } as any) this.status = "ready" this.log.debug("whisper engine ready", { modelSize: this.modelSize }) From e02a2ac0660e78929a45d33e0aa793dcf44acdb2 Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 20:30:46 +0100 Subject: [PATCH 12/17] fix: remove language config from whisper english only model. --- packages/opencode/src/voice/whisper-engine.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/opencode/src/voice/whisper-engine.ts b/packages/opencode/src/voice/whisper-engine.ts index d67026b05dd5..5a7ca2ec9667 100644 --- a/packages/opencode/src/voice/whisper-engine.ts +++ b/packages/opencode/src/voice/whisper-engine.ts @@ -109,8 +109,6 @@ export class WhisperEngine { const result = await this.transcriber(audioData, { return_timestamps: timestamps, - language: "en", - task: "transcribe", chunk_length_s: 30, stride_length_s: 5, }) From e926dc29201ccea5739615f59afdfa8581a1ef05 Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 20:32:32 +0100 Subject: [PATCH 13/17] fix: remove unneccecary bus changes. --- packages/opencode/src/bus/index.ts | 21 +++++---------------- packages/opencode/src/voice/service.ts | 10 ++++++++-- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/packages/opencode/src/bus/index.ts b/packages/opencode/src/bus/index.ts index a4a9055380df..edb093f19747 100644 --- a/packages/opencode/src/bus/index.ts +++ b/packages/opencode/src/bus/index.ts @@ -50,25 +50,14 @@ export namespace Bus { type: def.type, }) const pending = [] - try { - for (const key of [def.type, "*"]) { - const match = state().subscriptions.get(key) - for (const sub of match ?? []) { - pending.push(sub(payload)) - } + for (const key of [def.type, "*"]) { + const match = state().subscriptions.get(key) + for (const sub of match ?? []) { + pending.push(sub(payload)) } - } catch (e) { - // No instance context - skip instance-scoped subscriptions } - const directory = (() => { - try { - return Instance.directory - } catch (e) { - return "" - } - })() GlobalBus.emit("event", { - directory, + directory: Instance.directory, payload, }) return Promise.all(pending) diff --git a/packages/opencode/src/voice/service.ts b/packages/opencode/src/voice/service.ts index 0cf268b80dab..f22bb144f334 100644 --- a/packages/opencode/src/voice/service.ts +++ b/packages/opencode/src/voice/service.ts @@ -1,5 +1,5 @@ import { WhisperEngine, type WhisperModelSize } from "./whisper-engine" -import { Bus } from "@/bus" +import { GlobalBus } from "@/bus/global" import { Voice } from "./event" import { Log } from "@/util/log" import { Global } from "@/global" @@ -40,7 +40,13 @@ class VoiceServiceImpl { return { status: "error" as const, error: "Engine failed to initialize" } })() - Bus.publish(Voice.Event.Updated, { status }) + GlobalBus.emit("event", { + directory: "", + payload: { + type: Voice.Event.Updated.type, + properties: { status }, + }, + }) } async initialize(): Promise { From f4eb9683be84379eb38c367b84232d823a5c76de Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 20:47:32 +0100 Subject: [PATCH 14/17] chore: removed `Xenova` prefix from models --- packages/opencode/src/voice/whisper-engine.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/opencode/src/voice/whisper-engine.ts b/packages/opencode/src/voice/whisper-engine.ts index 5a7ca2ec9667..e045a9994043 100644 --- a/packages/opencode/src/voice/whisper-engine.ts +++ b/packages/opencode/src/voice/whisper-engine.ts @@ -34,7 +34,7 @@ export class WhisperEngine { this.status = "downloading" this.log.debug("initializing whisper engine", { modelSize: this.modelSize, device: this.device }) - const modelId = `Xenova/whisper-${this.modelSize}.en` + const modelId = `whisper-${this.modelSize}.en` const cacheDir = path.join(Global.Path.cache, "models") try { From 1ce16610c44033ce5956e5f44367a8606d42d7db Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 20:49:04 +0100 Subject: [PATCH 15/17] chore: removed `Xenova` prefix from model loading --- packages/opencode/src/voice/service.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/opencode/src/voice/service.ts b/packages/opencode/src/voice/service.ts index f22bb144f334..198f941f3a5f 100644 --- a/packages/opencode/src/voice/service.ts +++ b/packages/opencode/src/voice/service.ts @@ -190,7 +190,7 @@ class VoiceServiceImpl { const models: WhisperModelSize[] = ["tiny", "base", "small"] for (const model of models) { - const modelPath = path.join(cacheDir, `Xenova_whisper-${model}.en`) + const modelPath = path.join(cacheDir, `whisper-${model}.en`) const exists = await Bun.file(path.join(modelPath, "config.json")).exists() if (exists) { downloaded.push(model) From 4756aced56507702ed4786eab5e43e80c992b7e8 Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Fri, 30 Jan 2026 20:55:14 +0100 Subject: [PATCH 16/17] chore: removed top level log override. --- packages/opencode/src/voice/whisper-engine.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/opencode/src/voice/whisper-engine.ts b/packages/opencode/src/voice/whisper-engine.ts index e045a9994043..10aa1a1831c2 100644 --- a/packages/opencode/src/voice/whisper-engine.ts +++ b/packages/opencode/src/voice/whisper-engine.ts @@ -1,4 +1,4 @@ -import { pipeline, env } from "@huggingface/transformers" +import { pipeline } from "@huggingface/transformers" import { Log } from "@/util/log" import { Global } from "@/global" import path from "path" @@ -8,8 +8,6 @@ import { WaveFile } from "wavefile" import { exec } from "child_process" import { promisify } from "util" -env.backends.onnx.logSeverityLevel = 4 - const execAsync = promisify(exec) export type WhisperModelSize = "tiny" | "base" | "small" From 52ef762ce79e839c35fa63495fc2adfb87bcc08d Mon Sep 17 00:00:00 2001 From: cmdNiels <73907007+cmdNiels@users.noreply.github.com> Date: Sat, 31 Jan 2026 17:10:11 +0100 Subject: [PATCH 17/17] chore: updated bun.lock to cleanly resolve conflicts --- bun.lock | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bun.lock b/bun.lock index 211da71af095..13829de68a4e 100644 --- a/bun.lock +++ b/bun.lock @@ -504,7 +504,6 @@ "overrides": { "@types/bun": "catalog:", "@types/node": "catalog:", - "onnxruntime-web": "1.18.0", "sharp": "0.34.5", }, "catalog": { @@ -2527,7 +2526,7 @@ "finity": ["finity@0.5.4", "", {}, "sha512-3l+5/1tuw616Lgb0QBimxfdd2TqaDGpfCBpfX6EqtFmqUV3FtQnVEX4Aa62DagYEqnsTIjZcTfbq9msDbXYgyA=="], - "flatbuffers": ["flatbuffers@1.12.0", "", {}, "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="], + "flatbuffers": ["flatbuffers@25.9.23", "", {}, "sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ=="], "flattie": ["flattie@1.1.1", "", {}, "sha512-9UbaD6XdAL97+k/n+N7JwX46K/M6Zc6KcFYskrYL8wbBV/Uyk0CTAMY0VT+qiK5PM7AIc9aTWYtq65U7T+aCNQ=="], @@ -3249,7 +3248,7 @@ "onnxruntime-node": ["onnxruntime-node@1.21.0", "", { "dependencies": { "global-agent": "^3.0.0", "onnxruntime-common": "1.21.0", "tar": "^7.0.1" }, "os": [ "linux", "win32", "darwin", ] }, "sha512-NeaCX6WW2L8cRCSqy3bInlo5ojjQqu2fD3D+9W5qb5irwxhEyWKXeH2vZ8W9r6VxaMPUan+4/7NDwZMtouZxEw=="], - "onnxruntime-web": ["onnxruntime-web@1.18.0", "", { "dependencies": { "flatbuffers": "^1.12.0", "guid-typescript": "^1.0.9", "long": "^5.2.3", "onnxruntime-common": "1.18.0", "platform": "^1.3.6", "protobufjs": "^7.2.4" } }, "sha512-o1UKj4ABIj1gmG7ae0RKJ3/GT+3yoF0RRpfDfeoe0huzRW4FDRLfbkDETmdFAvnJEXuYDE0YT+hhkia0352StQ=="], + "onnxruntime-web": ["onnxruntime-web@1.22.0-dev.20250409-89f8206ba4", "", { "dependencies": { "flatbuffers": "^25.1.24", "guid-typescript": "^1.0.9", "long": "^5.2.3", "onnxruntime-common": "1.22.0-dev.20250409-89f8206ba4", "platform": "^1.3.6", "protobufjs": "^7.2.4" } }, "sha512-0uS76OPgH0hWCPrFKlL8kYVV7ckM7t/36HfbgoFw6Nd0CZVVbQC4PkrR8mBX8LtNUFZO25IQBqV2Hx2ho3FlbQ=="], "open": ["open@10.1.2", "", { "dependencies": { "default-browser": "^5.2.1", "define-lazy-prop": "^3.0.0", "is-inside-container": "^1.0.0", "is-wsl": "^3.1.0" } }, "sha512-cxN6aIDPz6rm8hbebcP7vrQNhvRcveZoJU72Y7vskh4oIm+BZwBECnx5nTmrlres1Qapvx27Qo1Auukpf8PKXw=="], @@ -4453,7 +4452,7 @@ "nypm/tinyexec": ["tinyexec@1.0.2", "", {}, "sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg=="], - "onnxruntime-web/onnxruntime-common": ["onnxruntime-common@1.18.0", "", {}, "sha512-lufrSzX6QdKrktAELG5x5VkBpapbCeS3dQwrXbN0eD9rHvU0yAWl7Ztju9FvgAKWvwd/teEKJNj3OwM6eTZh3Q=="], + "onnxruntime-web/onnxruntime-common": ["onnxruntime-common@1.22.0-dev.20250409-89f8206ba4", "", {}, "sha512-vDJMkfCfb0b1A836rgHj+ORuZf4B4+cc2bASQtpeoJLueuFc5DuYwjIZUBrSvx/fO5IrLjLz+oTrB3pcGlhovQ=="], "opencode/@ai-sdk/anthropic": ["@ai-sdk/anthropic@2.0.58", "", { "dependencies": { "@ai-sdk/provider": "2.0.1", "@ai-sdk/provider-utils": "3.0.20" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-CkNW5L1Arv8gPtPlEmKd+yf/SG9ucJf0XQdpMG8OiYEtEMc2smuCA+tyCp8zI7IBVg/FE7nUfFHntQFaOjRwJQ=="],