import { shouldRegisterRealtimeTools } from "./voiceConfigResolver.ts"; import { resolveVoiceSettingsSnapshot } from "./voiceSessionHelpers.ts"; import type { VoiceMcpServerStatus, VoicePendingToolCallState, VoiceRealtimeToolSettings, VoiceSession, VoiceToolRuntimeSessionLike } from "./voiceSessionTypes.ts"; import type { VoiceToolCallManager } from "./voiceToolCallTypes.ts"; import { buildRealtimeFunctionTools, ensureSessionToolRuntimeState, getVoiceMcpServerStatuses, parseRealtimeToolArguments, recordVoiceToolCallEvent, resolveRealtimeToolDescriptor, summarizeVoiceToolOutput } from "./voiceToolCallToolRegistry.ts"; import { executeLocalVoiceToolCall, executeMcpVoiceToolCall } from "./voiceToolCallDispatch.ts"; import { buildVoiceReplyScopeKey } from "../tools/activeReplyRegistry.ts"; import { isAbortError } from "../tools/abortError.ts"; import { shouldRequestVoiceToolFollowup } from "../tools/sharedToolSchemas.ts";
type ToolRuntimeSession = VoiceSession | VoiceToolRuntimeSessionLike;
type RealtimeFunctionOutputClient = NonNullable<VoiceSession["realtimeClient"]> & { sendFunctionCallOutput?: (payload: { callId: string; output: string }) => void; };
type ToolExecutionSession = ToolRuntimeSession & { realtimePendingToolAbortControllers?: Map<string, AbortController>; realtimeClient?: RealtimeFunctionOutputClient | null; };
function isToolOutputRecord(value: unknown): value is Record<string, unknown> { return Boolean(value) && typeof value === "object" && !Array.isArray(value); }
function normalizeRealtimeToolOutputForModel(output: unknown, success: boolean) { const isSemanticError = isToolOutputRecord(output) && output.ok === false; if (!isSemanticError && success) return output; if (isToolOutputRecord(output)) { if (output.is_error === true) return output; return { ...output, is_error: true }; } return { ok: false, is_error: true, result: output == null ? null : String(output) }; }
export async function executeRealtimeFunctionCall( manager: VoiceToolCallManager, { session, settings, pendingCall }: { session?: ToolRuntimeSession | null; settings?: VoiceRealtimeToolSettings | null; pendingCall: VoicePendingToolCallState } ) { if (!session || session.ending) return; const runtimeSession = session as ToolExecutionSession; const callId = String(pendingCall?.callId || "").trim().slice(0, 180); const toolName = String(pendingCall?.name || "").trim().slice(0, 120); if (!callId) return;
const startedAtMs = Date.now(); const resolvedSettings = resolveVoiceSettingsSnapshot(manager.store, session, settings); const callArgs = parseRealtimeToolArguments(manager, pendingCall?.argumentsText || ""); const toolDescriptor = resolveRealtimeToolDescriptor(manager, session, toolName); const resolvedToolName = toolName || toolDescriptor?.name || "unknown_tool"; const toolType = toolDescriptor?.toolType === "mcp" ? "mcp" : "function"; const activeReply = manager.activeReplies?.begin( buildVoiceReplyScopeKey(session.id), "voice-tool", [resolvedToolName] ) || null;
const abortController = new AbortController(); const toolSignal = activeReply ? AbortSignal.any([abortController.signal, activeReply.abortController.signal]) : abortController.signal; if (!(runtimeSession.realtimePendingToolAbortControllers instanceof Map)) { runtimeSession.realtimePendingToolAbortControllers = new Map(); } runtimeSession.realtimePendingToolAbortControllers.set(callId, abortController);
manager.store.logAction({ kind: "voice_runtime", guildId: session.guildId, channelId: session.textChannelId, userId: manager.client.user?.id || null, content: "realtime_tool_call_started", metadata: { sessionId: session.id, callId, toolName: resolvedToolName || null, toolType, arguments: callArgs } });
let success = false;
let output: unknown = null;
let errorMessage = "";
try {
if (!toolDescriptor) throw new Error(unknown_tool:${toolName || "unnamed"});
output = toolDescriptor.toolType === "mcp"
? await executeMcpVoiceToolCall(manager, {
session,
settings: resolvedSettings,
toolDescriptor,
args: callArgs,
signal: toolSignal
})
: await executeLocalVoiceToolCall(manager, {
session,
settings: resolvedSettings,
toolName: toolDescriptor.name,
args: callArgs,
signal: toolSignal
});
success = true;
} catch (error) {
if (isAbortError(error) || toolSignal.aborted) {
errorMessage = "cancelled_by_user";
output = {
ok: false,
cancelled: true,
error: { message: "Tool call cancelled by user." }
};
} else {
errorMessage = String(error?.message || error);
output = { ok: false, error: { message: errorMessage } };
}
} finally {
runtimeSession.realtimePendingToolAbortControllers?.delete(callId);
manager.activeReplies?.clear(activeReply);
}
const runtimeMs = Math.max(0, Date.now() - startedAtMs); const normalizedOutput = normalizeRealtimeToolOutputForModel(output, success); const outputSummary = summarizeVoiceToolOutput(manager, resolvedToolName, normalizedOutput); const responseHadAssistantOutput = typeof manager.hasRealtimeAssistantOutputForResponse === "function" && pendingCall.responseId ? manager.hasRealtimeAssistantOutputForResponse(session, pendingCall.responseId) : false; const requestFollowup = shouldRequestVoiceToolFollowup(resolvedToolName, { toolType, hasSpokenText: responseHadAssistantOutput }); recordVoiceToolCallEvent(manager, { session, event: { callId, toolName: resolvedToolName, toolType, arguments: callArgs, startedAt: new Date(startedAtMs).toISOString(), completedAt: new Date().toISOString(), runtimeMs, success, outputSummary, error: success ? null : errorMessage, sourceEventType: String(pendingCall?.sourceEventType || "") } }); manager.instructionManager.scheduleRealtimeInstructionRefresh?.({ session, settings: resolvedSettings, reason: "tool_result", speakerUserId: session.lastRealtimeToolCallerUserId || null, transcript: "" });
try {
if (typeof runtimeSession.realtimeClient?.sendFunctionCallOutput === "function") {
let serializedOutput = "";
if (typeof normalizedOutput === "string") {
serializedOutput = normalizedOutput;
} else {
try {
serializedOutput = JSON.stringify(normalizedOutput ?? null);
} catch {
serializedOutput = String(normalizedOutput ?? "");
}
}
runtimeSession.realtimeClient.sendFunctionCallOutput({ callId, output: serializedOutput });
}
} catch (sendError) {
manager.store.logAction({
kind: "voice_error",
guildId: session.guildId,
channelId: session.textChannelId,
userId: manager.client.user?.id || null,
content: realtime_tool_output_send_failed: ${String(sendError?.message || sendError)},
metadata: { sessionId: session.id, callId, toolName: resolvedToolName || null }
});
}
manager.store.logAction({ kind: success ? "voice_runtime" : "voice_error", guildId: session.guildId, channelId: session.textChannelId, userId: manager.client.user?.id || null, content: success ? "realtime_tool_call_completed" : "realtime_tool_call_failed", metadata: { sessionId: session.id, callId, toolName: resolvedToolName || null, toolType, runtimeMs, outputSummary, error: success ? null : errorMessage } });
if (session.realtimePendingToolCalls instanceof Map) session.realtimePendingToolCalls.delete(callId); if (session.realtimeCompletedToolCallIds instanceof Map) { session.realtimeCompletedToolCallIds.set(callId, Date.now()); const completedRows = [...session.realtimeCompletedToolCallIds.entries()].sort((a, b) => a[1] - b[1]).slice(-256); session.realtimeCompletedToolCallIds = new Map( completedRows.filter(([, completedAtMs]) => Date.now() - completedAtMs <= 10 * 60 * 1000) ); } if (session.realtimeToolCallExecutions instanceof Map) session.realtimeToolCallExecutions.delete(callId); if (!(session.realtimeToolCallExecutions instanceof Map) || session.realtimeToolCallExecutions.size <= 0) { manager.scheduleRealtimeToolFollowupResponse({ session, userId: session.lastRealtimeToolCallerUserId || null, startedAtMs, requestFollowup, toolName: resolvedToolName || null }); } }
export async function refreshRealtimeTools( manager: VoiceToolCallManager, { session, settings, reason = "voice_context_refresh" }: { session?: ToolRuntimeSession | null; settings?: VoiceRealtimeToolSettings | null; reason?: string } = {} ) { if (!session || session.ending) return; const realtimeClient = session.realtimeClient; const updateTools = realtimeClient && "updateTools" in realtimeClient && typeof realtimeClient.updateTools === "function" ? realtimeClient.updateTools.bind(realtimeClient) : null; if (!updateTools) return;
const resolvedSettings = resolveVoiceSettingsSnapshot(manager.store, session, settings); if (!shouldRegisterRealtimeTools({ session, settings: resolvedSettings })) { const hadRealtimeTools = (Array.isArray(session.realtimeToolDefinitions) && session.realtimeToolDefinitions.length > 0) || Boolean(String(session.lastRealtimeToolHash || "")); if (!hadRealtimeTools) return;
try {
updateTools({
tools: [],
toolChoice: "auto"
});
session.realtimeToolDefinitions = [];
session.lastRealtimeToolHash = "";
session.lastRealtimeToolRefreshAt = Date.now();
manager.store.logAction({
kind: "voice_runtime",
guildId: session.guildId,
channelId: session.textChannelId,
userId: manager.client.user?.id || null,
content: "realtime_tools_cleared",
metadata: {
sessionId: session.id,
reason: String(reason || "voice_context_refresh")
}
});
} catch (error) {
manager.store.logAction({
kind: "voice_error",
guildId: session.guildId,
channelId: session.textChannelId,
userId: manager.client.user?.id || null,
content: `realtime_tools_update_failed: ${String(error?.message || error)}`,
metadata: { sessionId: session.id, reason: String(reason || "voice_context_refresh") }
});
}
return;
}
ensureSessionToolRuntimeState(manager, session); const previousMcpStatuses = new Map<string, VoiceMcpServerStatus>(); for (const entry of Array.isArray(session.mcpStatus) ? session.mcpStatus : []) { const serverName = String(entry?.serverName || ""); if (serverName) previousMcpStatuses.set(serverName, entry); } session.mcpStatus = getVoiceMcpServerStatuses(manager).map((entry) => { const previous = previousMcpStatuses.get(String(entry.serverName || "")); return { ...entry, lastError: previous?.lastError || null, lastConnectedAt: previous?.lastConnectedAt || entry.lastConnectedAt || null, lastCallAt: previous?.lastCallAt || entry.lastCallAt || null }; });
const tools = buildRealtimeFunctionTools(manager, { session, settings: resolvedSettings }); const nextToolHash = JSON.stringify( tools.map((tool) => ({ name: tool.name, toolType: tool.toolType, serverName: tool.serverName || null, description: tool.description, parameters: tool.parameters })) ); if (String(session.lastRealtimeToolHash || "") === nextToolHash) return;
try {
updateTools({
tools: tools.map((tool) => ({
type: "function",
name: tool.name,
description: tool.description,
parameters: tool.parameters
})),
toolChoice: "auto"
});
session.realtimeToolDefinitions = tools;
session.lastRealtimeToolHash = nextToolHash;
session.lastRealtimeToolRefreshAt = Date.now();
manager.store.logAction({
kind: "voice_runtime",
guildId: session.guildId,
channelId: session.textChannelId,
userId: manager.client.user?.id || null,
content: "realtime_tools_updated",
metadata: {
sessionId: session.id,
reason: String(reason || "voice_context_refresh"),
localToolCount: tools.filter((tool) => tool.toolType === "function").length,
mcpToolCount: tools.filter((tool) => tool.toolType === "mcp").length,
toolNames: tools.map((tool) => tool.name)
}
});
} catch (error) {
manager.store.logAction({
kind: "voice_error",
guildId: session.guildId,
channelId: session.textChannelId,
userId: manager.client.user?.id || null,
content: realtime_tools_update_failed: ${String(error?.message || error)},
metadata: { sessionId: session.id, reason: String(reason || "voice_context_refresh") }
});
}
}
