JCV's Portfolio

src/voice/voiceInterruptClassifier.ts

import { buildSingleTurnPromptLog } from "../promptLogging.ts"; import { getPromptBotName } from "../prompts/promptCore.ts"; import { applyOrchestratorOverrideSettings, getResolvedVoiceInterruptClassifierBinding } from "../settings/agentStack.ts"; import { isCancelIntent } from "../tools/cancelDetection.ts"; import { defaultVoiceReplyDecisionModel, normalizeVoiceReplyDecisionProvider, resolveVoiceReplyDecisionMaxOutputTokens } from "./voiceDecisionRuntime.ts"; import { VOICE_INTERRUPT_CLASSIFIER_TIMEOUT_MS, STT_REPLY_MAX_CHARS, STT_TRANSCRIPT_MAX_CHARS } from "./voiceSessionManager.constants.ts"; import { normalizeInlineText, normalizeVoiceText } from "./voiceSessionHelpers.ts"; import type { LoggedVoicePromptBundle, VoiceInterruptOverlapBurstEntry, VoiceSession } from "./voiceSessionTypes.ts";

type InterruptClassifierSettings = Record<string, unknown> | null;

type InterruptClassifierStoreLike = { logAction?: (entry: { kind?: string; guildId?: string | null; channelId?: string | null; userId?: string | null; content: string; metadata?: Record<string, unknown>; }) => void; };

type InterruptClassifierGenerateResult = { text?: string | null; };

type InterruptClassifierHost = { store?: InterruptClassifierStoreLike | null; llm?: { generate?: (args: { settings: InterruptClassifierSettings; systemPrompt: string; userPrompt: string; contextMessages: unknown[]; trace?: { guildId?: string | null; channelId?: string | null; userId?: string | null; source?: string | null; }; signal?: AbortSignal; }) => Promise; } | null; };

type VoiceInterruptClassifierResult = { decision: "interrupt" | "ignore"; source: string; latencyMs: number; promptLog: LoggedVoicePromptBundle | null; rawOutput?: string | null; error?: string | null; };

function normalizeBurstText(text: string, maxChars = STT_TRANSCRIPT_MAX_CHARS) { return normalizeVoiceText(text, maxChars); }

function countTokens(text: string) { const normalized = normalizeBurstText(text, STT_REPLY_MAX_CHARS); if (!normalized) return 0; return normalized.split(/\s+/u).filter(Boolean).length; }

function isObviousInterruptTakeoverText(text: string) { const normalized = normalizeInlineText(text, 200) .toLowerCase() .replace(/\s+/gu, " ") .trim(); if (!normalized) return false; // Cancel intent covers "stop", "cancel", "nevermind", "abort", etc. if (isCancelIntent(normalized)) return true; // Only match phrases that are unambiguously about taking the conversational // floor. Bare words like "wait", "stop", "hold on" fire constantly in // gaming sessions (e.g. "oh wait, I don't have teleportation potions") and // cause false-positive interruptions. return /\b(?:let me talk|lemme talk|can i talk|can i say something|let me finish|shut up|be quiet|shush|hush)\b/u.test(normalized); }

export function hasObviousInterruptTakeoverBurst(entries: VoiceInterruptOverlapBurstEntry[]) { return (Array.isArray(entries) ? entries : []).some((entry) => isObviousInterruptTakeoverText(entry.transcript)); }

function hasClearlySemanticBurst(entries: VoiceInterruptOverlapBurstEntry[]) { return entries.some((entry) => { const transcript = normalizeBurstText(entry.transcript, STT_REPLY_MAX_CHARS); if (!transcript) return false; if (isObviousInterruptTakeoverText(transcript)) return true; if (/[?]/u.test(transcript)) return true; return countTokens(transcript) >= 6 || transcript.length >= 28; }); }

function parseInterruptDecision(text: string): "interrupt" | "ignore" | null { const normalized = normalizeInlineText(text, 80).toUpperCase(); if (!normalized) return null; if (normalized.includes("INTERRUPT")) return "interrupt"; if (normalized.includes("IGNORE")) return "ignore"; return null; }

export async function classifyVoiceInterruptBurst( host: InterruptClassifierHost, { session, settings, interruptedUtteranceText, entries, traceUserId = null, skipLlm = false }: { session: Pick<VoiceSession, "id" | "guildId" | "textChannelId">; settings: InterruptClassifierSettings; interruptedUtteranceText: string; entries: VoiceInterruptOverlapBurstEntry[]; traceUserId?: string | null; skipLlm?: boolean; } ): Promise { const normalizedEntries = (Array.isArray(entries) ? entries : []) .map((entry) => ({ ...entry, transcript: normalizeBurstText(entry.transcript, STT_REPLY_MAX_CHARS) })) .filter((entry) => Boolean(entry.transcript)); if (normalizedEntries.length === 0) { return { decision: "ignore", source: "empty_burst", latencyMs: 0, promptLog: null }; } if (normalizedEntries.every((entry) => isLikelyLowSignalOverlapText(entry.transcript))) { return { decision: "ignore", source: "low_signal_heuristic", latencyMs: 0, promptLog: null }; } if (normalizedEntries.some((entry) => isObviousInterruptTakeoverText(entry.transcript))) { return { decision: "interrupt", source: "takeover_heuristic", latencyMs: 0, promptLog: null }; }

if (skipLlm) { return { decision: "ignore", source: "classifier_disabled", latencyMs: 0, promptLog: null }; }

const binding = getResolvedVoiceInterruptClassifierBinding(settings); const llmProvider = normalizeVoiceReplyDecisionProvider(binding?.provider || "openai"); const llmModel = String(binding?.model || defaultVoiceReplyDecisionModel(llmProvider)).trim() || defaultVoiceReplyDecisionModel(llmProvider); const maxOutputTokens = resolveVoiceReplyDecisionMaxOutputTokens(llmProvider, llmModel); const { systemPrompt, userPrompt, promptLog } = buildInterruptClassifierPrompt({ settings, interruptedUtteranceText, entries: normalizedEntries });

if (!host.llm?.generate) { return { decision: hasClearlySemanticBurst(normalizedEntries) ? "interrupt" : "ignore", source: "llm_unavailable_fallback", latencyMs: 0, promptLog, error: "llm_generate_unavailable" }; }