src/voice/voiceSessionWarmMemory.ts

/**

  • Warm memory system for voice sessions.
  • Maintains a hot working set of memory context between turns so that
  • same-topic turns skip durable retrieval entirely. Drift detection uses
  • cosine similarity between each turn's embedding and a running topic
  • fingerprint (exponential moving average of recent turn embeddings).
  • The embedding used for drift detection is the same embedding that the
  • voice memory ingest pipeline already computes — no additional API calls.
  • Architecture:
  • Turn 1 (cold): full retrieval, stores warm snapshot + topic fingerprint
  • Turn 2+: cosine(turnEmbedding, fingerprint) → drift check
  •              no drift  → reuse warm snapshot (zero retrieval latency)
    
  •              drift     → full retrieval, update warm snapshot + fingerprint
    

*/

import type { MemoryFactRow } from "../store/storeMemory.ts";

// ── Types ────────────────────────────────────────────────────────────────

export interface TopicFingerprint { /** Running centroid embedding vector (exponential moving average). / embedding: number[]; /* Embedding model used (must match for comparison). / model: string; /* Number of turns that have contributed to this fingerprint. / turnCount: number; /* Timestamp of last update. */ updatedAt: number; }

export interface WarmMemorySnapshot { /** The continuity result from loadConversationContinuityContext. / continuity: { memorySlice: { participantProfiles?: unknown[]; userFacts?: MemoryFactRow[]; relevantFacts?: MemoryFactRow[]; guidanceFacts?: MemoryFactRow[]; selfFacts?: MemoryFactRow[]; loreFacts?: MemoryFactRow[]; }; recentConversationHistory: unknown[]; }; /* Behavioral facts array. / behavioralFacts: MemoryFactRow[]; /* Whether behavioral facts came from session cache. / usedCachedBehavioralFacts: boolean; /* When this snapshot was captured. / capturedAt: number; /* The transcript that produced this snapshot (for logging). */ sourceTranscript: string; }

export interface IngestEmbeddingEntry { /** The transcript text this embedding was computed for. / transcript: string; /* The embedding vector and model. */ embedding: number[]; model: string; }

export interface WarmMemoryState { /** Current topic fingerprint. Null until first turn completes. / topicFingerprint: TopicFingerprint | null; /* Cached memory snapshot from the last completed turn. / snapshot: WarmMemorySnapshot | null; /* The last resolved ingest embedding, keyed by transcript. / lastIngestEmbedding: IngestEmbeddingEntry | null; /* Promise for the in-flight ingest embedding (resolves with keyed entry). */ pendingIngestEmbedding: Promise<IngestEmbeddingEntry | null> | null; }

// ── Constants ────────────────────────────────────────────────────────────

/** Cosine similarity above this → same topic, use warm memory. */ const SAME_TOPIC_THRESHOLD = 0.85;

/** Cosine similarity below this → definite drift, full retrieval. */ const DRIFT_THRESHOLD = 0.65;

/** Weight for user turns when updating the topic fingerprint. */ const USER_TURN_WEIGHT = 0.3;

/** Weight for bot turns when updating the topic fingerprint. */ const BOT_TURN_WEIGHT = 0.1;

/** Maximum age (ms) for a warm snapshot before forced refresh. */ const WARM_SNAPSHOT_MAX_AGE_MS = 5 * 60 * 1000; // 5 minutes

// ── Core Functions ───────────────────────────────────────────────────────

/**

  • Initialize a fresh warm memory state for a new session. */ export function createWarmMemoryState(): WarmMemoryState { return { topicFingerprint: null, snapshot: null, lastIngestEmbedding: null, pendingIngestEmbedding: null }; }

/**

  • Compute cosine similarity between two vectors.
  • Returns 0 if either vector is empty or they differ in length. */ export function cosineSimilarity(a: number[], b: number[]): number { if (!a.length || !b.length || a.length !== b.length) return 0;

let dotProduct = 0; let normA = 0; let normB = 0;

for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; }

const denominator = Math.sqrt(normA) * Math.sqrt(normB); if (denominator === 0) return 0;

return dotProduct / denominator; }

/**

  • Determine whether the current turn represents a topic drift from the
  • warm memory state.
  • Returns:
  • "cold" — no fingerprint yet (first turn), must do full retrieval
  • "same" — same topic, use warm snapshot
  • "ambiguous" — could go either way (conservative: treat as same)
  • "drift" — topic changed, must do full retrieval
  • "stale" — warm snapshot too old, refresh regardless */ export type DriftVerdict = "cold" | "same" | "ambiguous" | "drift" | "stale";

export function detectTopicDrift( state: WarmMemoryState, turnEmbedding: { embedding: number[]; model: string } ): { verdict: DriftVerdict; similarity: number } { // No fingerprint yet → cold start if (!state.topicFingerprint || !state.topicFingerprint.embedding.length) { return { verdict: "cold", similarity: 0 }; }

// Model mismatch → can't compare, treat as drift if (state.topicFingerprint.model !== turnEmbedding.model) { return { verdict: "drift", similarity: 0 }; }

// Warm snapshot too old → stale if ( state.snapshot && Date.now() - state.snapshot.capturedAt > WARM_SNAPSHOT_MAX_AGE_MS ) { return { verdict: "stale", similarity: 0 }; }

// No snapshot to reuse → need full retrieval anyway if (!state.snapshot) { return { verdict: "cold", similarity: 0 }; }

const similarity = cosineSimilarity( turnEmbedding.embedding, state.topicFingerprint.embedding );

if (similarity >= SAME_TOPIC_THRESHOLD) { return { verdict: "same", similarity }; } if (similarity < DRIFT_THRESHOLD) { return { verdict: "drift", similarity }; }

// Ambiguous zone — conservative: treat as same topic return { verdict: "ambiguous", similarity }; }

/**

  • Update the topic fingerprint with a new turn's embedding.
  • Uses exponential moving average with different weights for user vs bot turns.
  • User turns get higher weight (0.3) because they drive topic direction.
  • Bot turns get lower weight (0.1) to avoid self-reinforcing recall. */ export function updateTopicFingerprint( state: WarmMemoryState, turnEmbedding: { embedding: number[]; model: string }, source: "user" | "bot" = "user" ): void { const weight = source === "user" ? USER_TURN_WEIGHT : BOT_TURN_WEIGHT; const complementWeight = 1 - weight;

if ( !state.topicFingerprint || !state.topicFingerprint.embedding.length || state.topicFingerprint.model !== turnEmbedding.model ) { // First turn or model changed — set the fingerprint directly state.topicFingerprint = { embedding: [...turnEmbedding.embedding], model: turnEmbedding.model, turnCount: 1, updatedAt: Date.now() }; return; }

// Exponential moving average const prev = state.topicFingerprint.embedding; const curr = turnEmbedding.embedding; const updated = new Array(prev.length);

for (let i = 0; i < prev.length; i++) { updated[i] = complementWeight * prev[i] + weight * (curr[i] ?? 0); }

state.topicFingerprint.embedding = updated; state.topicFingerprint.turnCount += 1; state.topicFingerprint.updatedAt = Date.now(); }

/**

  • Store a warm memory snapshot on the session state after a successful
  • memory load + generation turn. */ export function captureWarmSnapshot( state: WarmMemoryState, snapshot: WarmMemorySnapshot ): void { state.snapshot = snapshot; }

/**

  • Invalidate the warm memory snapshot.
  • Called when memory is written (memory_write tool), participants change, etc. */ export function invalidateWarmSnapshot(state: WarmMemoryState): void { state.snapshot = null; }

/**

  • Check whether warm memory should be used for this turn.
  • Returns the warm snapshot if drift detection says we can reuse it,
  • or null if a full retrieval is needed. */ export function resolveWarmMemory( state: WarmMemoryState, turnEmbedding: { embedding: number[]; model: string } | null ): { snapshot: WarmMemorySnapshot | null; drift: DriftVerdict; similarity: number; reason: string; } { if (!turnEmbedding || !turnEmbedding.embedding.length) { return { snapshot: null, drift: "cold", similarity: 0, reason: "no_turn_embedding" }; }

const { verdict, similarity } = detectTopicDrift(state, turnEmbedding);

if (verdict === "same" || verdict === "ambiguous") { return { snapshot: state.snapshot, drift: verdict, similarity, reason: verdict === "same" ? "same_topic" : "ambiguous_conservative_reuse" }; }

return { snapshot: null, drift: verdict, similarity, reason: verdict }; }

// ── Export constants for testing ──────────────────────────────────────────

export const WARM_MEMORY_CONSTANTS = { SAME_TOPIC_THRESHOLD, DRIFT_THRESHOLD, USER_TURN_WEIGHT, BOT_TURN_WEIGHT, WARM_SNAPSHOT_MAX_AGE_MS } as const;