src/voice/voiceToolCallAgents.ts

import { clamp } from "../utils.ts"; import { getResolvedBrowserTaskConfig } from "../settings/agentStack.ts"; import { buildMinecraftSessionScopeKey, findConflictingMinecraftSession, findReusableMinecraftSession, isMinecraftSessionAuthorized, resolveMinecraftSessionById } from "../agents/minecraft/minecraftSessionAccess.ts"; import { isAbortError } from "../tools/abortError.ts"; import { runBrowserBrowseTask } from "../tools/browserTaskRuntime.ts"; import { runOpenAiComputerUseTask } from "../tools/openAiComputerUseRuntime.ts"; import { normalizeInlineText } from "./voiceSessionHelpers.ts"; import { stopBrowserSessionStreamPublish, startBrowserSessionStreamPublish } from "./voiceBrowserStreamPublish.ts"; import { ensureStreamPublishState } from "./voiceStreamPublish.ts"; import type { VoiceRealtimeToolSettings, VoiceSession, VoiceToolRuntimeSessionLike } from "./voiceSessionTypes.ts"; import type { VoiceToolCallArgs, VoiceToolCallManager } from "./voiceToolCallTypes.ts";

type ToolRuntimeSession = VoiceSession | VoiceToolRuntimeSessionLike;

type VoiceAgentToolOptions = { session?: ToolRuntimeSession | null; settings?: VoiceRealtimeToolSettings | null; args?: VoiceToolCallArgs; };

type VoiceBrowserToolOptions = VoiceAgentToolOptions & { signal?: AbortSignal; };

function maybeRemoveCompletedVoiceSession( manager: VoiceToolCallManager["subAgentSessions"], sessionId: string, sessionCompleted?: boolean ) { if (!sessionCompleted) return; manager?.remove?.(sessionId); }

function buildMinecraftBusyError(sessionId: string): string { return Minecraft companion is already active in session '${sessionId}' for another user.; }

export async function executeVoiceBrowserBrowseTool( manager: VoiceToolCallManager, { session, settings, args, signal }: VoiceBrowserToolOptions ) { const instruction = normalizeInlineText(args?.query, 500); if (!instruction) { return { ok: false, text: "", error: "query_required" }; }

const sessionId = typeof args?.session_id === "string" ? String(args.session_id).trim() : ""; if (sessionId && manager.subAgentSessions) { const existingSession = manager.subAgentSessions.get(sessionId); if (!existingSession) { return { ok: false, text: "", error: Browser session '${sessionId}' not found or expired. }; } if (existingSession.ownerUserId && existingSession.ownerUserId !== session?.lastRealtimeToolCallerUserId) { return { ok: false, text: "", error: Not authorized to continue browser session '${sessionId}'. }; } try { const turnResult = await existingSession.runTurn(instruction, { signal }); maybeRemoveCompletedVoiceSession(manager.subAgentSessions, existingSession.id, turnResult.sessionCompleted); if (turnResult.isError) return { ok: false, text: "", error: turnResult.errorMessage }; return turnResult.sessionCompleted ? { ok: true, text: turnResult.text.trim() || "Browser browse completed." } : { ok: true, text: turnResult.text.trim() || "Browser browse completed.", session_id: existingSession.id }; } catch (error: unknown) { return { ok: false, text: "", error: error instanceof Error ? error.message : String(error) }; } }

if (manager.createBrowserAgentSession && manager.subAgentSessions) { const newSession = manager.createBrowserAgentSession({ settings, guildId: session?.guildId || "", channelId: session?.textChannelId || "", userId: session?.lastRealtimeToolCallerUserId || null, source: "voice_realtime_tool_browser_browse" }); if (newSession) { manager.subAgentSessions.register(newSession); try { const turnResult = await newSession.runTurn(instruction, { signal }); maybeRemoveCompletedVoiceSession(manager.subAgentSessions, newSession.id, turnResult.sessionCompleted); if (turnResult.isError) { return turnResult.sessionCompleted ? { ok: false, text: "", error: turnResult.errorMessage } : { ok: false, text: "", error: turnResult.errorMessage, session_id: newSession.id }; } return turnResult.sessionCompleted ? { ok: true, text: turnResult.text.trim() || "Browser browse completed." } : { ok: true, text: turnResult.text.trim() || "Browser browse completed.", session_id: newSession.id }; } catch (error: unknown) { return { ok: false, text: "", error: error instanceof Error ? error.message : String(error) }; } } }

if (!manager.browserManager) return { ok: false, text: "", error: "browser_unavailable" }; if (!manager.llm) return { ok: false, text: "", error: "llm_unavailable" };

const browserTaskConfig = getResolvedBrowserTaskConfig(settings); const maxSteps = clamp(Number(browserTaskConfig.maxStepsPerTask) || 15, 1, 30); const stepTimeoutMs = clamp(Number(browserTaskConfig.stepTimeoutMs) || 30_000, 5_000, 120_000); const computerUseClient = browserTaskConfig.runtime === "openai_computer_use" ? manager.llm.getComputerUseClient(browserTaskConfig.openaiComputerUse.client) : null; if (browserTaskConfig.runtime === "openai_computer_use" && !computerUseClient?.client) { return { ok: false, text: "", error: "openai_computer_use_unavailable" }; }

try { const sessionKey = voice:${String(session?.id || session?.guildId || "unknown")}:${Date.now()}; const trace = { guildId: session?.guildId, channelId: session?.textChannelId, userId: session?.lastRealtimeToolCallerUserId || null, source: "voice_realtime_tool_browser_browse" }; const result = browserTaskConfig.runtime === "openai_computer_use" ? await runOpenAiComputerUseTask({ openai: computerUseClient.client, provider: computerUseClient.provider || "openai", browserManager: manager.browserManager, store: manager.store, sessionKey, instruction, model: browserTaskConfig.openaiComputerUse.model, headed: browserTaskConfig.headed, maxSteps, stepTimeoutMs, sessionTimeoutMs: browserTaskConfig.sessionTimeoutMs, trace, logSource: "voice_realtime_tool_browser_browse", signal }) : await runBrowserBrowseTask({ llm: manager.llm, browserManager: manager.browserManager, store: manager.store, sessionKey, instruction, provider: browserTaskConfig.localAgent.provider, model: browserTaskConfig.localAgent.model, headed: browserTaskConfig.headed, maxSteps, stepTimeoutMs, sessionTimeoutMs: browserTaskConfig.sessionTimeoutMs, trace, logSource: "voice_realtime_tool_browser_browse", signal });

return { ok: true, text: result.text, steps: result.steps, hit_step_limit: result.hitStepLimit };

} catch (error: unknown) { const message = isAbortError(error) ? "Browser session cancelled." : error instanceof Error ? error.message : String(error); return { ok: false, text: "", error: message }; } }

export async function executeVoiceShareBrowserSessionTool( manager: VoiceToolCallManager, { session, args, signal }: VoiceBrowserToolOptions ) { const browserSessionId = normalizeInlineText(args?.session_id, 220); if (!browserSessionId) { return { ok: false, text: "", error: "session_id_required" }; } if (!session?.guildId) { return { ok: false, text: "", error: "voice_session_missing" }; }

try { const result = await startBrowserSessionStreamPublish(manager, { guildId: session.guildId, browserSessionId, requesterUserId: session.lastRealtimeToolCallerUserId || null, source: "voice_realtime_tool_share_browser_session", signal }); if (!result?.ok) { return { ok: false, text: "", error: String(result?.error || "browser_stream_publish_failed") }; } return { ok: true, text: "", started: Boolean(result.started), reused: Boolean(result.reused), session_id: browserSessionId }; } catch (error: unknown) { return { ok: false, text: "", error: error instanceof Error ? error.message : String(error) }; } }

export async function executeVoiceStopVideoShareTool( manager: VoiceToolCallManager, { session }: VoiceBrowserToolOptions ) { if (!session?.guildId) { return { ok: false, text: "", error: "voice_session_missing" }; }

const state = ensureStreamPublishState(manager.sessions.get(String(session.guildId || "").trim()) || null); if (!state?.active || !state.sourceKind) { return { ok: false, text: "", error: "video_share_inactive" }; }

if (state.sourceKind === "browser_session") { const result = await stopBrowserSessionStreamPublish(manager, { guildId: session.guildId, reason: "voice_realtime_tool_stop_video_share" }); return { ok: Boolean(result?.ok), text: "", stopped: Boolean(result?.ok), source_kind: state.sourceKind }; }

const stopMusicStreamPublish = "stopMusicStreamPublish" in manager && typeof manager.stopMusicStreamPublish === "function" ? manager.stopMusicStreamPublish.bind(manager) : null; if (!stopMusicStreamPublish) { return { ok: false, text: "", error: "stream_publish_stop_unavailable" }; }

const result = stopMusicStreamPublish({ guildId: session.guildId, reason: "voice_realtime_tool_stop_video_share" }); return { ok: Boolean(result?.ok), text: "", stopped: Boolean(result?.ok), source_kind: state.sourceKind }; }

// ── Minecraft Task (voice surface) ──────────────────────────────────────────

export async function executeVoiceMinecraftTaskTool( manager: VoiceToolCallManager, { session, settings, args, signal }: VoiceBrowserToolOptions ) { const task = normalizeInlineText(args?.task, 2000); const action = normalizeInlineText(args?.action, 20) || "run"; const mode = normalizeInlineText(args?.mode, 20) || undefined; const server = args?.server && typeof args.server === "object" ? args.server as Record<string, unknown> : undefined; const sessionId = typeof args?.session_id === "string" ? String(args.session_id).trim() : ""; const scopeKey = buildMinecraftSessionScopeKey({ guildId: session?.guildId || null, channelId: session?.textChannelId || null });

const resolveExistingSession = () => { if (!manager.subAgentSessions) return null; if (sessionId) { const requested = resolveMinecraftSessionById(manager.subAgentSessions, sessionId); if (!requested) return { session: null, error: Minecraft session '${sessionId}' not found or expired. }; if (!isMinecraftSessionAuthorized(requested, session?.lastRealtimeToolCallerUserId)) { return { session: null, error: Not authorized to continue Minecraft session '${sessionId}'. }; } return { session: requested, error: null }; } const reusable = findReusableMinecraftSession(manager.subAgentSessions, { ownerUserId: session?.lastRealtimeToolCallerUserId || null, scopeKey }); return { session: reusable, error: null }; };

// Status can work without a task if (action === "status") { const resolved = resolveExistingSession(); if (resolved?.error) return { ok: false, text: "", error: resolved.error }; const existing = resolved?.session; if (existing) { try { const result = await existing.runTurn(JSON.stringify({ command: "status" }), { signal }); return { ok: true, text: result.text.trim() || "Minecraft status retrieved." }; } catch (error: unknown) { return { ok: false, text: "", error: error instanceof Error ? error.message : String(error) }; } } return { ok: false, text: "", error: "No active Minecraft session." }; }

// Cancel if (action === "cancel") { if (!manager.subAgentSessions) return { ok: false, text: "", error: "session_management_unavailable" }; const resolved = resolveExistingSession(); if (resolved?.error) return { ok: false, text: "", error: resolved.error }; const existing = resolved?.session; if (!existing) return { ok: false, text: "", error: "No active Minecraft session." }; existing.cancel?.("Voice cancel"); manager.subAgentSessions.remove?.(existing.id); return { ok: true, text: Minecraft session '${existing.id}' cancelled. }; }

if (!task) return { ok: false, text: "", error: "task_required" };

// Session continuation if (manager.subAgentSessions) { const resolved = resolveExistingSession(); if (resolved?.error) return { ok: false, text: "", error: resolved.error }; const existing = resolved?.session; if (existing) { try { const turnInput = JSON.stringify({ task, mode, constraints: args?.constraints ?? undefined, server }); const turnResult = await existing.runTurn(turnInput, { signal }); maybeRemoveCompletedVoiceSession(manager.subAgentSessions, existing.id, turnResult.sessionCompleted); if (turnResult.isError) return { ok: false, text: "", error: turnResult.errorMessage }; return turnResult.sessionCompleted ? { ok: true, text: turnResult.text.trim() || "Minecraft task completed." } : { ok: true, text: turnResult.text.trim() || "Minecraft task completed.", session_id: existing.id }; } catch (error: unknown) { return { ok: false, text: "", error: error instanceof Error ? error.message : String(error) }; } }

const conflictingSession = findConflictingMinecraftSession(manager.subAgentSessions, {
  ownerUserId: session?.lastRealtimeToolCallerUserId || null,
  scopeKey
});
if (conflictingSession) {
  return { ok: false, text: "", error: buildMinecraftBusyError(conflictingSession.id) };
}

}

// New session if (manager.createMinecraftSession && manager.subAgentSessions) { const newSession = await manager.createMinecraftSession({ settings, guildId: session?.guildId || "", channelId: session?.textChannelId || "", userId: session?.lastRealtimeToolCallerUserId || null, source: "voice_realtime_tool_minecraft_task" }); if (newSession) { manager.subAgentSessions.register(newSession); try { const turnInput = JSON.stringify({ task, mode, constraints: args?.constraints ?? undefined, server }); const turnResult = await newSession.runTurn(turnInput, { signal }); maybeRemoveCompletedVoiceSession(manager.subAgentSessions, newSession.id, turnResult.sessionCompleted); if (turnResult.isError) { return turnResult.sessionCompleted ? { ok: false, text: "", error: turnResult.errorMessage } : { ok: false, text: "", error: turnResult.errorMessage, session_id: newSession.id }; } return turnResult.sessionCompleted ? { ok: true, text: turnResult.text.trim() || "Minecraft task completed." } : { ok: true, text: turnResult.text.trim() || "Minecraft task completed.", session_id: newSession.id }; } catch (error: unknown) { return { ok: false, text: "", error: error instanceof Error ? error.message : String(error) }; } } }

return { ok: false, text: "", error: "minecraft_unavailable" }; }