src/voice/replyManager.ts

import { estimateUsdCost } from "../llm/pricing.ts"; import type { ActiveReplyRegistry } from "../tools/activeReplyRegistry.ts"; import { buildVoiceReplyScopeKey } from "../tools/activeReplyRegistry.ts"; import { getReplyGenerationSettings, getVoiceRuntimeConfig } from "../settings/agentStack.ts"; import type { AssistantOutputState, ReplyOutputLockState, TtsPlaybackState } from "./assistantOutputState.ts"; import { TTS_PLAYBACK_STATE, buildReplyOutputLockState, createAssistantOutputState, getAssistantOutputActivityAt, normalizeAssistantOutputState, normalizeTtsPlaybackState, patchAssistantOutputState, syncAssistantOutputStateRecord } from "./assistantOutputState.ts"; import { ACTIVITY_TOUCH_THROTTLE_MS, BOT_TURN_SILENCE_RESET_MS, CLANKVOX_TTS_TELEMETRY_STALE_MS, MAX_RESPONSE_SILENCE_RETRIES, RESPONSE_DONE_SILENCE_GRACE_MS, RESPONSE_SILENCE_RETRY_DELAY_MS, STT_REPLY_MAX_CHARS } from "./voiceSessionManager.constants.ts"; import { getRealtimeCommitMinimumBytes, isRealtimeMode, normalizeVoiceText, parseResponseDoneId, parseResponseDoneModel, parseResponseDoneStatus, parseResponseDoneUsage, resolveRealtimeProvider } from "./voiceSessionHelpers.ts"; import { createVoiceOutputLease, hasActiveVoiceOutputLease, normalizeVoiceOutputLeaseMode } from "./voiceOutputLease.ts"; import type { MusicPlaybackPhase, VoiceSession } from "./voiceSessionTypes.ts"; import { musicPhaseIsActive, musicPhaseShouldLockOutput } from "./voiceSessionTypes.ts"; import type { BargeInController, ReplyInterruptionPolicy } from "./bargeInController.ts"; import type { DeferredActionQueue } from "./deferredActionQueue.ts"; import { getMusicResumeStateSnapshot, hasKnownMusicResumeState, noteMusicResumeRequest, setKnownMusicQueuePausedState } from "./musicResumeState.ts"; import { touchMusicWakeLatch } from "./musicWakeLatch.ts";

type ReplyManagerSettings = Record<string, unknown> | null;

interface SilentResponseRecoveryArgs { session: VoiceSession; userId?: string | null; trigger?: string; responseId?: string | null; responseStatus?: string | null; }

type ReplyManagerStoreLike = { getSettings: () => ReplyManagerSettings; logAction: (entry: { kind: string; guildId?: string | null; channelId?: string | null; userId?: string | null; content: string; metadata?: Record<string, unknown>; usdCost?: number; }) => void; };

interface ReplyManagerHost { client: { user?: { id?: string | null; } | null; }; store: ReplyManagerStoreLike; activeReplies?: ActiveReplyRegistry | null; musicPlayer?: { resume?: () => void; } | null; bargeInController: Pick< BargeInController, "clearBargeInOutputSuppression" | "isBargeInOutputSuppressed"

; touchActivity: (guildId: string, settings?: ReplyManagerSettings) => void; logVoiceLatencyStage: (payload: Record<string, unknown>) => void; normalizeReplyInterruptionPolicy: ( rawPolicy?: ReplyInterruptionPolicy | Record<string, unknown> | null ) => ReplyInterruptionPolicy | null; resolveReplyInterruptionPolicy: (args: { session: VoiceSession; userId?: string | null; policy?: ReplyInterruptionPolicy | Record<string, unknown> | null; source?: string | null; }) => ReplyInterruptionPolicy | null; setActiveReplyInterruptionPolicy: ( session: VoiceSession, policy?: ReplyInterruptionPolicy | Record<string, unknown> | null ) => void; maybeClearActiveReplyInterruptionPolicy: (session: VoiceSession) => void; deferredActionQueue: Pick< DeferredActionQueue, | "getDeferredQueuedUserTurns" | "scheduleDeferredVoiceActionRecheck" | "recheckDeferredVoiceActions" | "clearAllDeferredVoiceActions" ; hasDeferredTurnBlockingActiveCapture: (session: VoiceSession) => boolean; endSession: (args: { guildId: string; reason?: string; announcement?: string; settings?: ReplyManagerSettings; }) => Promise; scheduleBotSpeechMusicUnduck: ( session: VoiceSession, settings?: ReplyManagerSettings, delayMs?: number ) => void; getMusicPhase: (session: VoiceSession) => MusicPlaybackPhase; setMusicPhase: (session: VoiceSession, phase: MusicPlaybackPhase) => void; haltSessionOutputForMusicPlayback: (session: VoiceSession, reason?: string) => void; drainPendingRealtimeAssistantUtterances: (session: VoiceSession, reason?: string) => boolean; }

export class ReplyManager { private readonly wakeWordMusicResumeTimers = new WeakMap<VoiceSession, ReturnType>(); private readonly passiveMusicWakeRefreshTimers = new WeakMap<VoiceSession, ReturnType>();

constructor(private readonly host: ReplyManagerHost) {}

private grantOutputLease( session: VoiceSession, { mode = null, requestId = null, source = "voice_reply" }: { mode?: unknown; requestId?: number | null; source?: string | null; } = {} ) { const lease = createVoiceOutputLease({ mode, requestId, source }); session.outputLease = lease; if (!lease) return null; this.host.store.logAction({ kind: "voice_runtime", guildId: session.guildId, channelId: session.textChannelId, userId: this.botUserId, content: "voice_output_lease_granted", metadata: { sessionId: session.id, requestId: lease.requestId, leaseMode: lease.mode, source: lease.source, expiresAt: lease.expiresAt } }); return lease; }

private clearOutputLease( session: VoiceSession, reason = "released", { requestId = null }: { requestId?: number | null; } = {} ) { const lease = session?.outputLease; if (!lease) return false; const normalizedRequestId = Number(requestId || 0) || null; if ( normalizedRequestId && Number(lease.requestId || 0) > 0 && normalizedRequestId !== Number(lease.requestId || 0) ) { return false; } session.outputLease = null; this.host.store.logAction({ kind: "voice_runtime", guildId: session.guildId, channelId: session.textChannelId, userId: this.botUserId, content: "voice_output_lease_released", metadata: { sessionId: session.id, requestId: Number(lease.requestId || 0) || null, leaseMode: lease.mode, source: lease.source, reason: String(reason || "released") } }); return true; }

private clearWakeWordMusicResumeTimer(session: VoiceSession) { const existingTimer = this.wakeWordMusicResumeTimers.get(session); if (existingTimer) { clearTimeout(existingTimer); this.wakeWordMusicResumeTimers.delete(session); } }

private clearPassiveMusicWakeRefreshTimer(session: VoiceSession) { const existingTimer = this.passiveMusicWakeRefreshTimers.get(session); if (existingTimer) { clearTimeout(existingTimer); this.passiveMusicWakeRefreshTimers.delete(session); } }

schedulePausedReplyMusicResume( session: VoiceSession, delayMs = BOT_TURN_SILENCE_RESET_MS ) { this.clearWakeWordMusicResumeTimer(session); const normalizedDelayMs = Math.max(0, Math.round(Number(delayMs) || 0));

const attemptResume = () => {
  this.wakeWordMusicResumeTimers.delete(session);
  if (session.ending) return;
  if (this.host.getMusicPhase(session) !== "paused_wake_word") return;
  if (
    this.hasBufferedTtsPlayback(session) ||
    Boolean(session.botTurnOpen) ||
    this.hasPendingTurnProcessingWork(session) ||
    this.host.hasDeferredTurnBlockingActiveCapture(session)
  ) {
    const retryTimer = setTimeout(attemptResume, 200);
    this.wakeWordMusicResumeTimers.set(session, retryTimer);
    return;
  }
  if (!hasKnownMusicResumeState(session)) {
    const snapshot = getMusicResumeStateSnapshot(session);
    this.host.setMusicPhase(session, "idle");
    setKnownMusicQueuePausedState(session, false);
    this.host.store.logAction({
      kind: "voice_runtime",
      guildId: session.guildId,
      channelId: session.textChannelId,
      userId: this.host.client.user?.id || null,
      content: "voice_music_resume_unavailable",
      metadata: {
        sessionId: session.id,
        source: "music_resumed_after_wake_word",
        phase: "paused_wake_word",
        hasQueuedTrack: snapshot.hasQueuedTrack,
        hasRememberedTrack: snapshot.hasRememberedTrack,
        queueNowPlayingIndex: snapshot.queueNowPlayingIndex,
        queueTrackId: snapshot.queueTrackId,
        rememberedTrackId: snapshot.rememberedTrackId,
        rememberedTrackUrl: snapshot.rememberedTrackUrl
      }
    });
    return;
  }
  noteMusicResumeRequest(session, "music_resumed_after_wake_word");
  this.host.musicPlayer?.resume?.();
};

const timer = setTimeout(attemptResume, normalizedDelayMs);
this.wakeWordMusicResumeTimers.set(session, timer);

}

schedulePassiveMusicWakeLatchRefresh( session: VoiceSession, settings = session?.settingsSnapshot || this.host.store.getSettings(), userId: string | null = null, delayMs = BOT_TURN_SILENCE_RESET_MS ) { if (!session || session.ending) return; this.clearPassiveMusicWakeRefreshTimer(session); const normalizedDelayMs = Math.max(0, Math.round(Number(delayMs) || 0)); const normalizedUserId = String(userId || "").trim() || null;

const attemptRefresh = () => {
  this.passiveMusicWakeRefreshTimers.delete(session);
  if (session.ending) return;
  const musicPhase = this.host.getMusicPhase(session);
  if (!musicPhaseIsActive(musicPhase) || musicPhase === "paused_wake_word") return;
  if (
    this.hasBufferedTtsPlayback(session) ||
    Boolean(session.botTurnOpen) ||
    this.hasPendingTurnProcessingWork(session) ||
    this.host.hasDeferredTurnBlockingActiveCapture(session)
  ) {
    const retryTimer = setTimeout(attemptRefresh, 200);
    this.passiveMusicWakeRefreshTimers.set(session, retryTimer);
    return;
  }
  const latchedUntil = touchMusicWakeLatch(session, settings, normalizedUserId);
  this.host.store.logAction({
    kind: "voice_runtime",
    guildId: session.guildId,
    channelId: session.textChannelId,
    userId: normalizedUserId,
    content: "voice_music_wake_latch_refreshed_after_reply",
    metadata: {
      sessionId: session.id,
      musicPhase,
      latchedUntil: latchedUntil > 0 ? new Date(latchedUntil).toISOString() : null
    }
  });
};

const timer = setTimeout(attemptRefresh, normalizedDelayMs);
this.passiveMusicWakeRefreshTimers.set(session, timer);

}

ensureAssistantOutputState(session: VoiceSession): AssistantOutputState | null { if (!session || session.ending) return null; const now = Date.now(); const existing = session.assistantOutput && typeof session.assistantOutput === "object" ? session.assistantOutput : null; if (existing) { const normalized = normalizeAssistantOutputState(existing, { now }); session.assistantOutput = normalized; return normalized; }

const seeded = createAssistantOutputState({ now, trigger: "session_seed" });
session.assistantOutput = seeded;
return seeded;

}

patchAssistantOutputTelemetry( session: VoiceSession, metadata: { trigger?: string | null; requestId?: number | null; ttsPlaybackState?: string | null; ttsBufferedSamples?: number | null; } = {} ) { const state = this.ensureAssistantOutputState(session); if (!state) return null; const nextState = patchAssistantOutputState(state, { now: Date.now(), trigger: metadata.trigger, requestId: metadata.requestId, ttsPlaybackState: metadata.ttsPlaybackState, ttsBufferedSamples: metadata.ttsBufferedSamples }); session.assistantOutput = nextState; return nextState; }

getSessionTtsPlaybackState( session: VoiceSession, fallbackState: AssistantOutputState | null = null ): TtsPlaybackState { if (!session || session.ending) return TTS_PLAYBACK_STATE.IDLE; const telemetryFresh = this.isClankvoxTtsTelemetryFresh(session); const bufferedSamples = this.getBufferedTtsSamples(session); const voxPlaybackState = session.voxClient?.getTtsPlaybackState?.(); if (typeof voxPlaybackState === "string") { if (!telemetryFresh) { return TTS_PLAYBACK_STATE.IDLE; } if (bufferedSamples > 0) { return TTS_PLAYBACK_STATE.BUFFERED; } return normalizeTtsPlaybackState(voxPlaybackState); } return normalizeTtsPlaybackState(fallbackState?.ttsPlaybackState); }

getClankvoxReportedTtsPlaybackState(session: VoiceSession) { if (!session || session.ending) return null; const playbackState = session.voxClient?.getTtsPlaybackState?.(); if (typeof playbackState !== "string") return null; return normalizeTtsPlaybackState(playbackState); }

getClankvoxReportedTtsBufferedSamples( session: VoiceSession, { requireFresh = false }: { requireFresh?: boolean } = {} ) { if (!session || session.ending) return null; if (requireFresh && !this.isClankvoxTtsTelemetryFresh(session)) { return null; } const voxClient = session.voxClient; if (!voxClient || typeof voxClient !== "object") return null; const rawBufferedSamples = typeof voxClient.getTtsBufferDepthSamples === "function" ? Number(voxClient.getTtsBufferDepthSamples()) : Number(voxClient.ttsBufferDepthSamples || 0); if (!Number.isFinite(rawBufferedSamples)) return null; return Math.max(0, Math.round(rawBufferedSamples)); }

getClankvoxTtsTelemetryAgeMs(session: VoiceSession) { if (!session || session.ending) return null; const updatedAt = Number(session.voxClient?.getTtsTelemetryUpdatedAt?.() || 0); if (!Number.isFinite(updatedAt) || updatedAt <= 0) return null; return Math.max(0, Date.now() - updatedAt); }

isClankvoxTtsTelemetryFresh(session: VoiceSession) { const telemetryAgeMs = this.getClankvoxTtsTelemetryAgeMs(session); if (telemetryAgeMs == null) return true; return telemetryAgeMs <= CLANKVOX_TTS_TELEMETRY_STALE_MS; }

getOutputLockDebugMetadata(session: VoiceSession, outputLockReason: string | null = null) { if (!session || session.ending) return {}; if (String(outputLockReason || "").trim() !== "bot_audio_buffered") return {}; const assistantOutput = this.ensureAssistantOutputState(session); const reportedTtsPlaybackState = this.getClankvoxReportedTtsPlaybackState(session) || assistantOutput?.ttsPlaybackState || null; const reportedTtsBufferedSamples = this.getClankvoxReportedTtsBufferedSamples(session); const telemetryAgeMs = this.getClankvoxTtsTelemetryAgeMs(session); return { outputLockPhase: assistantOutput?.phase || null, outputLockAssistantReason: assistantOutput?.reason || null, outputLockAssistantLastTrigger: assistantOutput?.lastTrigger || null, outputLockTtsPlaybackState: reportedTtsPlaybackState, outputLockTtsBufferedSamples: reportedTtsBufferedSamples, outputLockTtsTelemetryAgeMs: telemetryAgeMs == null ? null : Math.round(telemetryAgeMs), outputLockTtsTelemetryFresh: this.isClankvoxTtsTelemetryFresh(session) }; }

/**

  • Check whether the OpenAI realtime active response is stale (no recent
  • activity for longer than RESPONSE_DONE_SILENCE_GRACE_MS). This is a
  • pure read — no side effects. Call clearStaleRealtimeResponse() to
  • actually clear it.
  • Use isStaleRealtimeResponseAt() to pass a shared snapshot timestamp
  • when the caller also uses that timestamp for derivation. */ isStaleRealtimeResponse(session: VoiceSession) { return this.isStaleRealtimeResponseAt(session, Date.now()); }

isStaleRealtimeResponseAt(session: VoiceSession, now: number) { if (!session || session.ending) return false; if (!this.isRealtimeResponseActive(session)) return false;

const lastRelevantAt = Math.max(
  0,
  Number(session.lastResponseRequestAt || 0),
  Number(session.lastAudioDeltaAt || 0),
  getAssistantOutputActivityAt(session.assistantOutput)
);
if (!lastRelevantAt) return false;
const staleAgeMs = Math.max(0, now - lastRelevantAt);
return staleAgeMs >= RESPONSE_DONE_SILENCE_GRACE_MS;

}

getActiveResponseId(session: VoiceSession): string | null { const realtimeClient = session?.realtimeClient; if ( !realtimeClient || typeof realtimeClient !== "object" || !("activeResponseId" in realtimeClient) ) { return null; } return realtimeClient.activeResponseId ? String(realtimeClient.activeResponseId) : null; }

/**

  • Clear a stale OpenAI realtime active response. Idempotent and
  • best-effort — safe to call even if the response was already cleared
  • or the client state changed since the staleness check.
  • When expectedResponseId is provided, the clear is skipped if the
  • current active response ID no longer matches (a fresh response
  • started since the staleness check). */ clearStaleRealtimeResponse(session: VoiceSession, expectedResponseId: string | null = null) { if (!session || session.ending) return false;
const realtimeClient = session.realtimeClient;
if (
  !realtimeClient ||
  typeof realtimeClient !== "object" ||
  !("clearActiveResponse" in realtimeClient) ||
  typeof realtimeClient.clearActiveResponse !== "function"
) {
  return false;
}

// If we captured a specific response ID at check time, only clear
// if it still matches. A different ID means a new response started.
if (expectedResponseId) {
  const currentId = this.getActiveResponseId(session);
  if (currentId && currentId !== expectedResponseId) return false;
}

const lastRelevantAt = Math.max(
  0,
  Number(session.lastResponseRequestAt || 0),
  Number(session.lastAudioDeltaAt || 0)
);
const staleAgeMs = Math.max(0, Date.now() - lastRelevantAt);

try {
  realtimeClient.clearActiveResponse();
  this.host.store.logAction({
    kind: "voice_runtime",
    guildId: session.guildId,
    channelId: session.textChannelId,
    userId: this.botUserId,
    content: "openai_realtime_active_response_cleared_stale",
    metadata: {
      sessionId: session.id,
      staleAgeMs
    }
  });
  return true;
} catch {
  return false;
}

}

syncAssistantOutputState(session: VoiceSession, trigger = "state_sync") { const state = this.ensureAssistantOutputState(session); if (!state) return null; const now = Date.now(); const previousPhase = String(state.phase || "idle");

const liveAudioStreaming = this.hasRecentAssistantAudioDelta(session);
const pendingResponse =
  session?.pendingResponse && typeof session.pendingResponse === "object"
    ? session.pendingResponse
    : null;
const awaitingToolOutputs =
  session.awaitingToolOutputs ||
  (session.realtimeToolCallExecutions instanceof Map && session.realtimeToolCallExecutions.size > 0);
const bufferedSamples = this.getBufferedTtsSamples(session);

let ttsPlaybackState = this.getSessionTtsPlaybackState(session, state);

let bufferedBotSpeech = ttsPlaybackState === TTS_PLAYBACK_STATE.BUFFERED || bufferedSamples > 0;
const openAiActiveResponse = this.isRealtimeResponseActive(session);
const bufferedStateAgeMs = Math.max(0, now - Number(state.phaseEnteredAt || 0));

// Compute stale-response eligibility BEFORE derivation, since derivation
// will overwrite phaseEnteredAt and make the staleness check see "just started".
// Capture the responseId now so clearStaleRealtimeResponse only clears
// this specific response, not a fresh one that started in the meantime.
const staleResponseId = openAiActiveResponse
  ? this.getActiveResponseId(session)
  : null;
const staleResponseEligible =
  openAiActiveResponse &&
  !pendingResponse &&
  !awaitingToolOutputs &&
  !liveAudioStreaming &&
  !bufferedBotSpeech &&
  this.isStaleRealtimeResponseAt(session, now);

if (
  bufferedBotSpeech &&
  bufferedSamples <= 0 &&
  !liveAudioStreaming &&
  !pendingResponse &&
  !openAiActiveResponse &&
  bufferedStateAgeMs >= RESPONSE_DONE_SILENCE_GRACE_MS
) {
  ttsPlaybackState = TTS_PLAYBACK_STATE.IDLE;
  bufferedBotSpeech = false;
}

const nextState = syncAssistantOutputStateRecord(state, {
  now,
  trigger,
  liveAudioStreaming,
  pendingResponse: Boolean(pendingResponse),
  openAiActiveResponse: staleResponseEligible ? false : openAiActiveResponse,
  awaitingToolOutputs,
  requestId: pendingResponse?.requestId || state.requestId || null,
  ttsPlaybackState,
  ttsBufferedSamples: bufferedSamples
});
session.assistantOutput = nextState;

// Clear stale OpenAI active response AFTER phase derivation.
// We pre-computed eligibility above so the derivation already excluded
// the stale signal; now perform the actual side-effect cleanup.
// Pass the captured responseId so we only clear the response we deemed
// stale, not a fresh one that may have started since the check.
if (staleResponseEligible) {
  this.clearStaleRealtimeResponse(session, staleResponseId);
}

if (
  previousPhase !== "idle" &&
  nextState.phase === "idle" &&
  this.host.deferredActionQueue.getDeferredQueuedUserTurns(session).length > 0
) {
  this.host.deferredActionQueue.scheduleDeferredVoiceActionRecheck(session, {
    type: "queued_user_turns",
    delayMs: 0,
    reason: "assistant_output_idle"
  });
}
return nextState;

}

hasRecentAssistantAudioDelta(session: VoiceSession) { if (!session || session.ending) return false; const msSinceLastDelta = Date.now() - Number(session.lastAudioDeltaAt || 0); return msSinceLastDelta < 200; }

getBufferedTtsSamples(session: VoiceSession) { if (!session || session.ending) return 0; const voxClient = session.voxClient; if (!voxClient || typeof voxClient !== "object") return 0; if ("isAlive" in voxClient && voxClient.isAlive === false) return 0; const rawBufferedSamples = typeof voxClient.getTtsBufferDepthSamples === "function" ? Number(voxClient.getTtsBufferDepthSamples()) : Number(voxClient.ttsBufferDepthSamples || 0); const bufferedSamples = Math.max(0, rawBufferedSamples); if (bufferedSamples <= 0) return 0; return this.isClankvoxTtsTelemetryFresh(session) ? bufferedSamples : 0; }

hasBufferedTtsPlayback(session: VoiceSession) { const state = this.ensureAssistantOutputState(session); return ( this.getBufferedTtsSamples(session) > 0 || this.getSessionTtsPlaybackState(session, state) === TTS_PLAYBACK_STATE.BUFFERED ); }

private hasPendingTurnProcessingWork(session: VoiceSession) { if (!session || session.ending) return false; if (session.activeRealtimeTurn && typeof session.activeRealtimeTurn === "object") return true; if (session.inFlightAcceptedBrainTurn && typeof session.inFlightAcceptedBrainTurn === "object") return true; if (session.responseFlushTimer) return true; if (session.pendingResponse && typeof session.pendingResponse === "object") return true; if (session.realtimeTurnDrainActive) return true; if (session.fileAsrTurnDrainActive) return true; if (Math.max(0, Number(session.pendingRealtimeInputBytes || 0)) > 0) return true; if (Array.isArray(session.pendingRealtimeTurns) && session.pendingRealtimeTurns.length > 0) return true; if (Array.isArray(session.pendingFileAsrTurnsQueue) && session.pendingFileAsrTurnsQueue.length > 0) return true; return false; }

resetBotAudioPlayback(session: VoiceSession) { if (!session) return; if (musicPhaseIsActive(this.host.getMusicPhase(session))) { try { session.voxClient?.stopTtsPlayback?.(); } catch { /* ignore / } } else { try { session.voxClient?.stopPlayback?.(); } catch { / ignore */ } } session.voxClient?.clearTtsPlaybackTelemetry?.(); this.patchAssistantOutputTelemetry(session, { trigger: "reset_bot_audio_playback", ttsPlaybackState: TTS_PLAYBACK_STATE.IDLE, ttsBufferedSamples: 0 }); this.syncAssistantOutputState(session, "reset_bot_audio_playback"); this.host.maybeClearActiveReplyInterruptionPolicy(session); }

getReplyOutputLockState(session: VoiceSession): ReplyOutputLockState { if (!session || session.ending) { return { locked: true, reason: "session_inactive", phase: "idle", musicActive: false, botTurnOpen: false, bufferedBotSpeech: false, pendingResponse: false, openAiActiveResponse: false, awaitingToolOutputs: false, streamBufferedBytes: 0 }; }

const streamBufferedBytes = 0;
const musicActive = musicPhaseShouldLockOutput(this.host.getMusicPhase(session));
const assistantOutput = this.syncAssistantOutputState(session, "reply_output_lock");
const botTurnOpen = Boolean(session.botTurnOpen);
const pendingResponse = Boolean(session.pendingResponse && typeof session.pendingResponse === "object");
const openAiActiveResponse = this.isRealtimeResponseActive(session);
const awaitingToolOutputs =
  session.awaitingToolOutputs ||
  (session.realtimeToolCallExecutions instanceof Map && session.realtimeToolCallExecutions.size > 0);
return buildReplyOutputLockState({
  assistantOutput,
  musicActive,
  botTurnOpen,
  pendingResponse,
  openAiActiveResponse,
  awaitingToolOutputs,
  streamBufferedBytes
});

}

markBotTurnOut(session: VoiceSession, settings = session.settingsSnapshot) { const now = Date.now(); if (now - Number(session.lastBotActivityTouchAt || 0) >= ACTIVITY_TOUCH_THROTTLE_MS) { this.host.touchActivity(session.guildId, settings); session.lastBotActivityTouchAt = now; }

const pendingResponse =
  session.pendingResponse && typeof session.pendingResponse === "object"
    ? session.pendingResponse
    : null;
const pendingLatencyContext =
  pendingResponse?.latencyContext && typeof pendingResponse.latencyContext === "object"
    ? pendingResponse.latencyContext
    : null;
if (pendingLatencyContext && Number(pendingLatencyContext.audioStartedAtMs || 0) <= 0) {
  pendingLatencyContext.audioStartedAtMs = now;
  this.host.logVoiceLatencyStage({
    session,
    userId: this.botUserId,
    stage: "audio_started",
    source: pendingLatencyContext.source || pendingResponse?.source || "realtime",
    captureReason: pendingLatencyContext.captureReason || null,
    requestId: pendingResponse?.requestId || null,
    queueWaitMs: pendingLatencyContext.queueWaitMs,
    pendingQueueDepth: pendingLatencyContext.pendingQueueDepth,
    finalizedAtMs: pendingLatencyContext.finalizedAtMs,
    asrStartedAtMs: pendingLatencyContext.asrStartedAtMs,
    asrCompletedAtMs: pendingLatencyContext.asrCompletedAtMs,
    generationStartedAtMs: pendingLatencyContext.generationStartedAtMs,
    replyRequestedAtMs:
      Number(pendingLatencyContext.replyRequestedAtMs || 0) ||
      Number(pendingResponse?.requestedAt || 0) ||
      0,
    audioStartedAtMs: now
  });
}
if (
  pendingResponse &&
  hasActiveVoiceOutputLease({
    session,
    pendingResponse,
    requestId: Number(pendingResponse.requestId || 0) || null,
    now
  })
) {
  this.clearOutputLease(session, "audio_started", {
    requestId: Number(pendingResponse.requestId || 0) || null
  });
}

if (pendingResponse?.musicWakeRefreshAfterSpeech) {
  this.schedulePassiveMusicWakeLatchRefresh(
    session,
    settings,
    pendingResponse?.userId || null
  );
}

if (!session.botTurnOpen) {
  session.botTurnOpen = true;
  session.botTurnOpenAt = now;
  session.lastAssistantReplyAt = now;
  this.host.store.logAction({
    kind: "voice_turn_out",
    guildId: session.guildId,
    channelId: session.textChannelId,
    userId: this.botUserId,
    content: "bot_audio_started",
    metadata: {
      sessionId: session.id
    }
  });
}
if (session.botTurnResetTimer) {
  clearTimeout(session.botTurnResetTimer);
}

session.botTurnResetTimer = setTimeout(() => {
  session.botTurnOpen = false;
  session.botTurnOpenAt = 0;
  session.botTurnResetTimer = null;
  this.syncAssistantOutputState(session, "bot_turn_reset");
  this.host.maybeClearActiveReplyInterruptionPolicy(session);
}, BOT_TURN_SILENCE_RESET_MS);

}

pendingResponseHasAudio(session: VoiceSession, pendingResponse = session?.pendingResponse) { if (!session || !pendingResponse) return false; const requestedAt = Number(pendingResponse.requestedAt || 0); if (!requestedAt) return false; return Number(session.lastAudioDeltaAt || 0) >= requestedAt; }

private shouldPreserveActiveRepliesForCompletedPendingResponse( session: VoiceSession, pendingResponse: VoiceSession["pendingResponse"] ) { if (!session || !pendingResponse || !this.host.activeReplies) return false; let hasActiveVoiceGeneration = false; try { hasActiveVoiceGeneration = this.host.activeReplies.has(buildVoiceReplyScopeKey(session.id)); } catch { hasActiveVoiceGeneration = false; } if (!hasActiveVoiceGeneration) return false; const normalizedSource = String(pendingResponse.source || "").trim().toLowerCase(); return normalizedSource.endsWith(":busy_utterance") || /(^|:)stream_chunk_\d+$/.test(normalizedSource); }

clearResponseSilenceTimers(session: VoiceSession) { if (!session) return; if (session.responseWatchdogTimer) { clearTimeout(session.responseWatchdogTimer); session.responseWatchdogTimer = null; } if (session.responseDoneGraceTimer) { clearTimeout(session.responseDoneGraceTimer); session.responseDoneGraceTimer = null; } }

private resetPendingResponse({ session, abortActiveReplies = false, abortPendingToolCalls = false, trigger = "pending_response_cleared", recheckDeferredActions = false, clearActiveReplyInterruptionPolicy = true }: { session: VoiceSession; abortActiveReplies?: boolean; abortPendingToolCalls?: boolean; trigger?: string; recheckDeferredActions?: boolean; clearActiveReplyInterruptionPolicy?: boolean; }) { if (!session) return; this.clearResponseSilenceTimers(session); if (abortActiveReplies && this.host.activeReplies) { const voiceReplyScopeKey = buildVoiceReplyScopeKey(session.id); this.host.activeReplies.abortAll(voiceReplyScopeKey, "Pending response cleared"); }

if (abortPendingToolCalls && session.realtimePendingToolAbortControllers) {
  for (const controller of session.realtimePendingToolAbortControllers.values()) {
    try {
      controller.abort("Pending response cleared");
    } catch {
      // ignore
    }
  }
  session.realtimePendingToolAbortControllers.clear();
}

this.clearOutputLease(session, trigger, {
  requestId: Number(session.pendingResponse?.requestId || 0) || null
});
session.pendingResponse = null;
if (
  session.music?.replyHandoffMode === "duck" &&
  !session.botTurnOpen &&
  !session.botSpeechMusicDucked &&
  !this.hasBufferedTtsPlayback(session)
) {
  session.music.replyHandoffMode = null;
  session.music.replyHandoffRequestedByUserId = null;
  session.music.replyHandoffSource = null;
  session.music.replyHandoffAt = 0;
}
this.syncAssistantOutputState(session, trigger);
if (clearActiveReplyInterruptionPolicy) {
  this.host.maybeClearActiveReplyInterruptionPolicy(session);
}
if (recheckDeferredActions) {
  this.host.deferredActionQueue.recheckDeferredVoiceActions({
    session,
    reason: "pending_response_cleared"
  });
}

}

clearPendingResponse(session: VoiceSession) { this.resetPendingResponse({ session, abortActiveReplies: true, abortPendingToolCalls: true, trigger: "pending_response_cleared", recheckDeferredActions: true }); }

settlePendingResponse( session: VoiceSession, trigger = "pending_response_settled", { clearActiveReplyInterruptionPolicy = true }: { clearActiveReplyInterruptionPolicy?: boolean; } = {} ) { this.resetPendingResponse({ session, abortActiveReplies: false, abortPendingToolCalls: false, trigger, recheckDeferredActions: false, clearActiveReplyInterruptionPolicy }); }

isRealtimeResponseActive(session: VoiceSession) { if (!session || !isRealtimeMode(session.mode)) return false; const realtimeClient = session.realtimeClient; if ( !realtimeClient || typeof realtimeClient !== "object" || !("isResponseInProgress" in realtimeClient) || typeof realtimeClient.isResponseInProgress !== "function" ) { return false; } try { return Boolean(realtimeClient.isResponseInProgress.call(realtimeClient)); } catch { return false; } }

armResponseSilenceWatchdog({ session, requestId, userId = null }: { session: VoiceSession; requestId: number; userId?: string | null; }) { if (!session || session.ending) return; if (!isRealtimeMode(session.mode)) return; if (!Number.isFinite(Number(requestId)) || Number(requestId) <= 0) return;

if (session.responseWatchdogTimer) {
  clearTimeout(session.responseWatchdogTimer);
}

session.responseWatchdogTimer = setTimeout(() => {
  session.responseWatchdogTimer = null;
  if (!session || session.ending) return;
  const pending = session.pendingResponse;
  if (!pending) return;
  if (Number(pending.requestId || 0) !== Number(requestId)) return;
  if (this.pendingResponseHasAudio(session, pending)) {
    this.clearPendingResponse(session);
    return;
  }
  this.spawnSilentResponseRecovery({
    session,
    userId: pending.userId || userId,
    trigger: "watchdog"
  });
}, RESPONSE_SILENCE_RETRY_DELAY_MS);

}

private spawnSilentResponseRecovery({ session, userId = null, trigger = "watchdog", responseId = null, responseStatus = null }: SilentResponseRecoveryArgs) { void Promise.resolve(this.handleSilentResponse({ session, userId, trigger, responseId, responseStatus })).catch((error: unknown) => { const active = session.pendingResponse; if (active) { active.handlingSilence = false; } const resolvedUserId = active?.userId || userId || this.botUserId; this.host.store.logAction({ kind: "voice_error", guildId: session.guildId, channelId: session.textChannelId, userId: resolvedUserId, content: response_silent_recovery_failed: ${String((error as Error)?.message || error)}, metadata: { sessionId: session.id, requestId: Number(active?.requestId || 0) || null, trigger, responseId, responseStatus } }); if (session.ending || !isRealtimeMode(session.mode) || !active) return; if (this.pendingResponseHasAudio(session, active)) { this.clearPendingResponse(session); return; } this.armResponseSilenceWatchdog({ session, requestId: active.requestId, userId: active.userId || userId }); }); }

createTrackedAudioResponse({ session, userId = null, source = "turn_flush", resetRetryState = false, emitCreateEvent = true, interruptionPolicy = undefined, outputLeaseMode = undefined, utteranceText = undefined, latencyContext = undefined, musicWakeRefreshAfterSpeech = false }: { session: VoiceSession; userId?: string | null; source?: string; resetRetryState?: boolean; emitCreateEvent?: boolean; interruptionPolicy?: ReplyInterruptionPolicy | Record<string, unknown> | null; outputLeaseMode?: string | null; utteranceText?: string | null; latencyContext?: Record<string, unknown> | null; musicWakeRefreshAfterSpeech?: boolean; }) { if (!session || session.ending) return false; if (!isRealtimeMode(session.mode)) return false; this.host.deferredActionQueue.clearAllDeferredVoiceActions(session); if (emitCreateEvent && this.isRealtimeResponseActive(session)) { this.host.store.logAction({ kind: "voice_runtime", guildId: session.guildId, channelId: session.textChannelId, userId: this.botUserId, content: "response_create_skipped_active_response", metadata: { sessionId: session.id, source: String(source || "turn_flush") } }); return false; } if (emitCreateEvent) { session.realtimeClient.createAudioResponse(); }

const now = Date.now();
if (isRealtimeMode(session.mode)) {
  session.lastRealtimeAssistantAudioItemId = null;
  session.lastRealtimeAssistantAudioItemContentIndex = 0;
  session.lastRealtimeAssistantAudioItemReceivedMs = 0;
}
const requestId = Number(session.nextResponseRequestId || 0) + 1;
session.nextResponseRequestId = requestId;
const previous = session.pendingResponse;
const shouldApplyFallbackInterruptionPolicy = interruptionPolicy === undefined;
const interruptionPolicySeed =
  shouldApplyFallbackInterruptionPolicy
    ? previous?.interruptionPolicy || session.activeReplyInterruptionPolicy
    : interruptionPolicy;
const normalizedInterruptionPolicy = shouldApplyFallbackInterruptionPolicy
  ? this.host.resolveReplyInterruptionPolicy({
      session,
      userId: userId || previous?.userId || null,
      policy: interruptionPolicySeed,
    })
  : this.host.normalizeReplyInterruptionPolicy(interruptionPolicySeed);
const outputLeaseModeSeed =
  outputLeaseMode === undefined
    ? previous?.outputLeaseMode || null
    : outputLeaseMode;
const normalizedOutputLeaseMode = normalizeVoiceOutputLeaseMode(outputLeaseModeSeed);
const utteranceTextSeed = utteranceText === undefined ? previous?.utteranceText || "" : utteranceText || "";
const normalizedUtteranceText =
  normalizeVoiceText(utteranceTextSeed, STT_REPLY_MAX_CHARS) || null;
const latencyContextSeed =
  latencyContext === undefined
    ? previous?.latencyContext || null
    : latencyContext;
const normalizedLatencyContext =
  latencyContextSeed && typeof latencyContextSeed === "object"
    ? {
      finalizedAtMs: Math.max(0, Number(latencyContextSeed.finalizedAtMs || 0)),
      asrStartedAtMs: Math.max(0, Number(latencyContextSeed.asrStartedAtMs || 0)),
      asrCompletedAtMs: Math.max(0, Number(latencyContextSeed.asrCompletedAtMs || 0)),
      generationStartedAtMs: Math.max(0, Number(latencyContextSeed.generationStartedAtMs || 0)),
      replyRequestedAtMs: Math.max(
        0,
        Number(latencyContextSeed.replyRequestedAtMs || 0)
      ) || now,
      audioStartedAtMs: Math.max(0, Number(latencyContextSeed.audioStartedAtMs || 0)),
      source: String(latencyContextSeed.source || source || "turn_flush"),
      captureReason: String(latencyContextSeed.captureReason || "").trim() || null,
      queueWaitMs: Number.isFinite(Number(latencyContextSeed.queueWaitMs))
        ? Math.max(0, Math.round(Number(latencyContextSeed.queueWaitMs)))
        : null,
      pendingQueueDepth: Number.isFinite(Number(latencyContextSeed.pendingQueueDepth))
        ? Math.max(0, Math.round(Number(latencyContextSeed.pendingQueueDepth)))
        : null
    }
    : null;

session.pendingResponse = {
  requestId,
  userId: userId || previous?.userId || null,
  requestedAt: now,
  retryCount: resetRetryState ? 0 : Number(previous?.retryCount || 0),
  hardRecoveryAttempted: resetRetryState ? false : Boolean(previous?.hardRecoveryAttempted),
  source: String(source || "turn_flush"),
  handlingSilence: false,
  audioReceivedAt: 0,
  interruptionPolicy: normalizedInterruptionPolicy,
  outputLeaseMode: normalizedOutputLeaseMode,
  utteranceText: normalizedUtteranceText,
  latencyContext: normalizedLatencyContext,
  musicWakeRefreshAfterSpeech: Boolean(musicWakeRefreshAfterSpeech),
  audioDeliveredBytes: 0,
  audioDeliveredChunks: 0,
  audioSuppressedBytes: 0,
  audioSuppressedChunks: 0,
  firstAudioAt: 0
};
this.grantOutputLease(session, {
  mode: normalizedOutputLeaseMode,
  requestId,
  source: String(source || "turn_flush")
});
session.lastResponseRequestAt = now;
this.host.setActiveReplyInterruptionPolicy(session, normalizedInterruptionPolicy);
this.clearResponseSilenceTimers(session);
this.armResponseSilenceWatchdog({
  session,
  requestId,
  userId: session.pendingResponse.userId
});
this.syncAssistantOutputState(session, "response_requested");
return true;

}

async handleSilentResponse({ session, userId = null, trigger = "watchdog", responseId = null, responseStatus = null }: SilentResponseRecoveryArgs) { if (!session || session.ending) return; if (!isRealtimeMode(session.mode)) return; const pending = session.pendingResponse; if (!pending) return; if (pending.handlingSilence) return; if (this.pendingResponseHasAudio(session, pending)) { this.clearPendingResponse(session); return; }

pending.handlingSilence = true;
this.clearResponseSilenceTimers(session);

if (this.host.hasDeferredTurnBlockingActiveCapture(session)) {
  pending.handlingSilence = false;
  this.armResponseSilenceWatchdog({
    session,
    requestId: pending.requestId,
    userId: pending.userId || userId
  });
  return;
}

const resolvedUserId = pending.userId || userId || this.botUserId;
const setHandlingDone = () => {
  const active = session.pendingResponse;
  if (active && Number(active.requestId || 0) === Number(pending.requestId || 0)) {
    active.handlingSilence = false;
  }
};

if (pending.retryCount < MAX_RESPONSE_SILENCE_RETRIES) {
  pending.retryCount += 1;
  this.host.store.logAction({
    kind: "voice_error",
    guildId: session.guildId,
    channelId: session.textChannelId,
    userId: resolvedUserId,
    content: "response_silent_retry",
    metadata: {
      sessionId: session.id,
      requestId: pending.requestId,
      retryCount: pending.retryCount,
      maxRetries: MAX_RESPONSE_SILENCE_RETRIES,
      responseRequestedAt: pending.requestedAt,
      trigger,
      responseId,
      responseStatus
    }
  });

  try {
    const created = this.createTrackedAudioResponse({
      session,
      userId: resolvedUserId,
      source: "silent_retry",
      resetRetryState: false
    });
    if (!created) {
      this.armResponseSilenceWatchdog({
        session,
        requestId: pending.requestId,
        userId: pending.userId || userId
      });
    }
  } catch (error) {
    this.host.store.logAction({
      kind: "voice_error",
      guildId: session.guildId,
      channelId: session.textChannelId,
      userId: resolvedUserId,
      content: `response_retry_failed: ${String(error?.message || error)}`,
      metadata: {
        sessionId: session.id,
        requestId: pending.requestId
      }
    });
    this.clearPendingResponse(session);
    await this.host.endSession({
      guildId: session.guildId,
      reason: "response_stalled",
      announcement: "voice output stalled and stayed silent, leaving vc.",
      settings: session.settingsSnapshot
    });
  } finally {
    setHandlingDone();
  }
  return;
}

if (!pending.hardRecoveryAttempted) {
  pending.hardRecoveryAttempted = true;
  this.host.store.logAction({
    kind: "voice_error",
    guildId: session.guildId,
    channelId: session.textChannelId,
    userId: resolvedUserId,
    content: "response_silent_hard_recovery",
    metadata: {
      sessionId: session.id,
      requestId: pending.requestId,
      retryCount: pending.retryCount,
      trigger,
      responseId,
      responseStatus
    }
  });

  try {
    const pendingInputBytes = Math.max(0, Number(session.pendingRealtimeInputBytes || 0));
    const minCommitBytes = getRealtimeCommitMinimumBytes(
      session.mode,
      Number(session.realtimeInputSampleRateHz) || 24000
    );
    if (pendingInputBytes >= minCommitBytes) {
      session.realtimeClient.commitInputAudioBuffer();
      session.pendingRealtimeInputBytes = 0;
    }
    const created = this.createTrackedAudioResponse({
      session,
      userId: resolvedUserId,
      source: "hard_recovery",
      resetRetryState: false
    });
    if (!created) {
      this.armResponseSilenceWatchdog({
        session,
        requestId: pending.requestId,
        userId: pending.userId || userId
      });
    }
  } catch (error) {
    this.host.store.logAction({
      kind: "voice_error",
      guildId: session.guildId,
      channelId: session.textChannelId,
      userId: resolvedUserId,
      content: `response_hard_recovery_failed: ${String(error?.message || error)}`,
      metadata: {
        sessionId: session.id,
        requestId: pending.requestId
      }
    });
    this.clearPendingResponse(session);
    await this.host.endSession({
      guildId: session.guildId,
      reason: "response_stalled",
      announcement: "voice output stalled and stayed silent, leaving vc.",
      settings: session.settingsSnapshot
    });
  } finally {
    setHandlingDone();
  }
  return;
}

this.host.store.logAction({
  kind: "voice_error",
  guildId: session.guildId,
  channelId: session.textChannelId,
  userId: resolvedUserId,
  content: "response_silent_fallback",
  metadata: {
    sessionId: session.id,
    requestId: pending.requestId,
    retryCount: pending.retryCount,
    hardRecoveryAttempted: pending.hardRecoveryAttempted,
    trigger,
    responseId,
    responseStatus
  }
});
this.clearPendingResponse(session);

}

handleResponseDone({ session, event, settings = null, runtimeLabel = "openai_realtime" }: { session: VoiceSession; event: Record<string, unknown>; settings?: ReplyManagerSettings; runtimeLabel?: string; }) { if (session.ending) return; const hadBargeSuppression = this.host.bargeInController.isBargeInOutputSuppressed(session); if (hadBargeSuppression) { this.host.bargeInController.clearBargeInOutputSuppression(session, "response_done"); } const pending = session.pendingResponse; const responseId = parseResponseDoneId(event); const responseStatus = parseResponseDoneStatus(event); const responseUsage = parseResponseDoneUsage(event); const resolvedSettings = settings || session.settingsSnapshot || this.host.store.getSettings(); const voiceRuntime = getVoiceRuntimeConfig(resolvedSettings); const replyGeneration = getReplyGenerationSettings(resolvedSettings); const realtimeProvider = resolveRealtimeProvider(session.mode); const realtimeClientSessionModel = session.realtimeClient && typeof session.realtimeClient === "object" && "sessionConfig" in session.realtimeClient && session.realtimeClient.sessionConfig && typeof session.realtimeClient.sessionConfig === "object" ? String(session.realtimeClient.sessionConfig.model || "").trim() : ""; const resolvedResponseModel = isRealtimeMode(session.mode) ? parseResponseDoneModel(event) || realtimeClientSessionModel || String(voiceRuntime.openaiRealtime?.model || "gpt-realtime").trim() || "gpt-realtime" : parseResponseDoneModel(event); const responseUsdCost = isRealtimeMode(session.mode) && responseUsage ? estimateUsdCost({ provider: realtimeProvider || "openai", model: resolvedResponseModel || "gpt-realtime", inputTokens: Number(responseUsage.inputTokens || 0), outputTokens: Number(responseUsage.outputTokens || 0), cacheReadTokens: Number(responseUsage.cacheReadTokens || 0), cacheWriteTokens: 0, customPricing: replyGeneration.pricing }) : 0; const hadAudio = pending ? this.pendingResponseHasAudio(session, pending) : false; const hasInFlightToolCalls = session.awaitingToolOutputs || (session.realtimeToolCallExecutions instanceof Map && session.realtimeToolCallExecutions.size > 0);

this.host.store.logAction({
  kind: "voice_runtime",
  guildId: session.guildId,
  channelId: session.textChannelId,
  userId: this.botUserId,
  content: `${runtimeLabel}_response_done`,
  usdCost: responseUsdCost,
  metadata: {
    sessionId: session.id,
    requestId: pending?.requestId || null,
    responseId,
    responseStatus,
    responseModel: resolvedResponseModel || null,
    responseUsage,
    hadAudio,
    retryCount: pending ? Number(pending.retryCount || 0) : null,
    hardRecoveryAttempted:
      pending && Object.hasOwn(pending, "hardRecoveryAttempted")
        ? Boolean(pending.hardRecoveryAttempted)
        : null
  }
});

// --- Bot utterance completion telemetry ---
// Closes the observability gap between "text sent to TTS" and
// "audio actually played to Discord".  Pairs with the per-utterance
// audio byte counters incremented in the onAudioDelta handler.
if (pending) {
  const utteranceTextLength = pending.utteranceText ? pending.utteranceText.length : 0;
  const audioDeliveredBytes = Math.max(0, Number(pending.audioDeliveredBytes || 0));
  const audioDeliveredChunks = Math.max(0, Number(pending.audioDeliveredChunks || 0));
  const audioSuppressedBytes = Math.max(0, Number(pending.audioSuppressedBytes || 0));
  const audioSuppressedChunks = Math.max(0, Number(pending.audioSuppressedChunks || 0));
  const firstAudioAt = Math.max(0, Number(pending.firstAudioAt || 0));
  const requestedAt = Math.max(0, Number(pending.requestedAt || 0));
  const outputSampleRate = Number(session.realtimeOutputSampleRateHz) || 24000;
  // PCM 16-bit mono: 2 bytes per sample
  const estimatedPlaybackMs = outputSampleRate > 0
    ? Math.round((audioDeliveredBytes / 2 / outputSampleRate) * 1000)
    : 0;
  const firstAudioLatencyMs = firstAudioAt && requestedAt
    ? Math.max(0, firstAudioAt - requestedAt)
    : null;
  const totalAudioBytes = audioDeliveredBytes + audioSuppressedBytes;
  const deliveryRatio = totalAudioBytes > 0
    ? Math.round((audioDeliveredBytes / totalAudioBytes) * 100)
    : hadAudio ? 100 : 0;
  // Buffered TTS samples still in the clankvox pipeline at response_done time
  const ttsBufferedSamplesAtDone = this.getBufferedTtsSamples(session);

  this.host.store.logAction({
    kind: "voice_runtime",
    guildId: session.guildId,
    channelId: session.textChannelId,
    userId: this.botUserId,
    content: "bot_utterance_completed",
    metadata: {
      sessionId: session.id,
      requestId: pending.requestId,
      source: pending.source,
      utteranceTextLength,
      utteranceTextPreview: pending.utteranceText
        ? pending.utteranceText.slice(0, 120)
        : null,
      hadAudio,
      hadBargeSuppression,
      audioDeliveredBytes,
      audioDeliveredChunks,
      audioSuppressedBytes,
      audioSuppressedChunks,
      estimatedPlaybackMs,
      firstAudioLatencyMs,
      deliveryRatio,
      ttsBufferedSamplesAtDone,
      ttsProviderEndReason: responseStatus || "completed",
      mode: session.mode || null
    }
  });
}

if (!pending) {
  this.syncAssistantOutputState(session, "response_done_without_pending");
  this.host.drainPendingRealtimeAssistantUtterances(session, "response_done_without_pending");
  return;
}

if (hadAudio) {
  this.host.scheduleBotSpeechMusicUnduck(session, resolvedSettings, BOT_TURN_SILENCE_RESET_MS);
  const preserveActiveReplies = this.shouldPreserveActiveRepliesForCompletedPendingResponse(session, pending);
  const preserveInFlightToolWork = hasInFlightToolCalls;

  const musicPhase = this.host.getMusicPhase(session);
  if (musicPhase === "paused_wake_word") {
    this.schedulePausedReplyMusicResume(session, BOT_TURN_SILENCE_RESET_MS);
  }

  if (preserveInFlightToolWork || preserveActiveReplies) {
    const settleTrigger = preserveInFlightToolWork
      ? "response_done_had_audio_tool_calls_in_flight"
      : "response_done_busy_utterance_completed";
    this.settlePendingResponse(session, settleTrigger, {
      clearActiveReplyInterruptionPolicy: false
    });
  } else {
    this.clearPendingResponse(session);
  }
  this.syncAssistantOutputState(session, "response_done_had_audio");
  this.host.drainPendingRealtimeAssistantUtterances(session, "response_done_had_audio");
  return;
}

if (hasInFlightToolCalls) {
  this.settlePendingResponse(session, "response_done_tool_calls_in_flight");
  return;
}

if (session.responseDoneGraceTimer) {
  clearTimeout(session.responseDoneGraceTimer);
}

const requestId = Number(pending.requestId || 0);
const responseUserId = pending.userId || null;
session.responseDoneGraceTimer = setTimeout(() => {
  session.responseDoneGraceTimer = null;
  if (!session || session.ending) return;
  const current = session.pendingResponse;
  if (!current || Number(current.requestId || 0) !== requestId) return;
  if (this.pendingResponseHasAudio(session, current)) {
    this.clearPendingResponse(session);
    this.syncAssistantOutputState(session, "response_done_grace_audio_detected");
    return;
  }
  this.spawnSilentResponseRecovery({
    session,
    userId: responseUserId,
    trigger: "response_done",
    responseId,
    responseStatus
  });
}, RESPONSE_DONE_SILENCE_GRACE_MS);

}

private get botUserId() { return this.host.client.user?.id || null; } }