JCV's Portfolio

src/prompts/voiceAdmissionPolicy.ts

import { normalizeVoiceRuntimeEventContext } from "../voice/voiceSessionHelpers.ts"; import { VOICE_TINY_REPLY_POLICY_LINE } from "./voiceLivePolicy.ts";

type VoiceAdmissionPolicyContext = { attentionMode?: "ACTIVE" | "AMBIENT"; currentSpeakerActive?: boolean; };

type VoiceAdmissionPolicyOptions = { inputKind?: "transcript" | "event"; speakerName?: string; directAddressed?: boolean; nameCueDetected?: boolean; isEagerTurn?: boolean; ambientReplyEagerness?: number; responseWindowEagerness?: number; participantCount?: number; conversationContext?: VoiceAdmissionPolicyContext | null; runtimeEventContext?: unknown; pendingCommandFollowupSignal?: boolean; musicActive?: boolean; musicWakeLatched?: boolean; };

export function getScreenWatchCommentaryTier(eagerness: number): string { if (eagerness <= 0) { return "Screen watch commentary bias: minimal. Prefer silence unless the moment clearly calls for a reaction or someone directly asks for one."; } if (eagerness <= 15) { return "Screen watch commentary bias: very low. Wait for unusually clear, notable, or surprising visual moments before reacting."; } if (eagerness <= 35) { return "Screen watch commentary bias: selective. Favor reacting to meaningful changes or clearly notable moments over routine visual churn."; } if (eagerness <= 50) { return "Screen watch commentary bias: moderate-low. A visible moment that is interesting, funny, or newly notable is often worth a short reaction."; } if (eagerness <= 65) { return "Screen watch commentary bias: moderate. Fresh visual developments are reasonably likely to merit a short reaction, without turning into constant narration."; } if (eagerness <= 80) { return "Screen watch commentary bias: high. When the screen presents a fresh watchable moment, lean toward reacting rather than staying silent."; } return "Screen watch commentary bias: very high. Frequent short reactions are appropriate when the screen keeps presenting fresh moments worth calling out."; }

export function buildVoiceAdmissionPolicyLines({ inputKind = "transcript", speakerName = "unknown", directAddressed = false, nameCueDetected = false, isEagerTurn: _isEagerTurn = false, ambientReplyEagerness = 0, responseWindowEagerness = 0, participantCount = 0, conversationContext = null, runtimeEventContext = null, pendingCommandFollowupSignal = false, musicActive = false, musicWakeLatched = false }: VoiceAdmissionPolicyOptions = {}) { const lines: string[] = []; const normalizedInputKind = String(inputKind || "").trim().toLowerCase() === "event" ? "event" : "transcript"; const normalizedSpeakerName = String(speakerName || "").trim() || "unknown"; const normalizedDirectAddressed = Boolean(directAddressed); const normalizedNameCueDetected = Boolean(nameCueDetected); const normalizedParticipantCount = Math.max(0, Math.floor(Number(participantCount) || 0)); const normalizedAmbientEagerness = Math.max(0, Math.min(100, Number(ambientReplyEagerness) || 0)); const normalizedResponseWindowEagerness = Math.max( 0, Math.min(100, Number(responseWindowEagerness) || 0) ); const normalizedRuntimeEventContext = normalizeVoiceRuntimeEventContext(runtimeEventContext); const currentSpeakerActive = Boolean(conversationContext?.currentSpeakerActive);

// --- Eagerness settings --- lines.push(Voice ambient-reply eagerness: ${normalizedAmbientEagerness}/100.); lines.push(getAmbientReplyTier(normalizedAmbientEagerness)); lines.push(Response-window eagerness: ${normalizedResponseWindowEagerness}/100.); lines.push(getResponseWindowTier(normalizedResponseWindowEagerness));

// --- Room context signals --- if (normalizedParticipantCount <= 1) { lines.push("Room: 1:1 — you are the only other presence, so speech is more likely meant for you."); } else { lines.push(Room: ${normalizedParticipantCount} humans present.); }

if (normalizedDirectAddressed) { lines.push(Addressing: ${normalizedSpeakerName} used your name or wake phrase directly.); } else if (normalizedNameCueDetected) { lines.push(Addressing: your name appeared in ${normalizedSpeakerName}'s transcript, though not as a direct wake phrase.); } else { lines.push(Addressing: no direct address or name cue detected from ${normalizedSpeakerName}.); }

if (currentSpeakerActive) { lines.push("Thread state: you are actively in a thread with this speaker."); }

if (musicActive) { lines.push("Music is currently playing. Music audio often bleeds into the mic and produces garbled or nonsensical transcripts."); if (musicWakeLatched) { lines.push("Music wake latch is active — someone recently addressed you over music."); } }

if (pendingCommandFollowupSignal) { lines.push("Signal: this may be a same-speaker command follow-up (e.g. music disambiguation)."); }

// --- Input type context --- if (normalizedInputKind === "event") { lines.push("This is a voice-room event cue, not literal quoted speech."); if ( normalizedRuntimeEventContext?.category === "membership" && normalizedRuntimeEventContext.eventType === "join" && normalizedRuntimeEventContext.actorRole === "self" ) { lines.push("Event: you just entered the channel."); } else if ( normalizedRuntimeEventContext?.category === "membership" && normalizedRuntimeEventContext.eventType === "join" ) { lines.push("Event: someone joined the channel."); } else if ( normalizedRuntimeEventContext?.category === "membership" && normalizedRuntimeEventContext.eventType === "leave" ) { lines.push("Event: someone left the channel."); } else if (normalizedRuntimeEventContext?.category === "screen_share") { lines.push("Event: screen-watch state change."); if (normalizedRuntimeEventContext.hasVisibleFrame) { lines.push("A visible screen frame is attached."); } } }

// --- Transcript quality reminder --- lines.push("Transcripts come from speech-to-text and can be garbled, nonsensical, or misheard.");

// --- Output guidance --- lines.push(VOICE_TINY_REPLY_POLICY_LINE); lines.push("Output exactly [SKIP] when silence is best.");

return lines; }