src/bot/imageAnalysis.ts

import { MAX_IMAGE_LOOKUP_QUERY_LEN, extractUrlsFromText, normalizeDirectiveText } from "./botHelpers.ts"; import type { BotContext } from "./botContext.ts"; import { isLikelyImageUrl, parseHistoryImageReference } from "./messageHistory.ts"; import { MAX_MODEL_IMAGE_INPUTS } from "./replyPipelineShared.ts";

const MAX_HISTORY_IMAGE_CANDIDATES = 24; const MAX_HISTORY_IMAGE_LOOKUP_RESULTS = 6; const MAX_IMAGE_LOOKUP_QUERY_TOKENS = 7;

export interface ImageCaptionCacheLike { get?: (url: string) => { caption?: string | null } | null; hasOrInflight?: (url: string) => boolean; getOrCaption?: (payload: { url: string; llm: BotContext["llm"]; settings?: Record<string, unknown> | null; mimeType?: string; trace?: Record<string, unknown> | null; }) => Promise; }

type HistoryImageCandidate = { messageId?: string | null; authorName?: string; createdAt?: string; imageRef?: string; url?: string; filename?: string; contentType?: string; context?: string; recencyRank?: number; hasCachedCaption?: boolean; score?: number; matchReason?: string; };

type CaptionRecentHistoryImagesOptions = { imageCaptionCache: ImageCaptionCacheLike | null; captionTimestamps: number[]; candidates?: HistoryImageCandidate[]; settings?: Record<string, unknown> | null; trace?: Record<string, unknown> | null; };

type ExtractHistoryImageCandidatesOptions = { recentMessages?: Array<Record<string, unknown>>; excluded?: Set; imageCaptionCache?: ImageCaptionCacheLike | null; };

type RankImageLookupCandidatesOptions = { candidates?: HistoryImageCandidate[]; query?: string; };

type RunModelRequestedImageLookupOptions = { imageLookup?: | ({ enabled?: boolean; candidates?: HistoryImageCandidate[]; requested?: boolean; used?: boolean; query?: string; results?: HistoryImageCandidate[]; selectedImageInputs?: Array<Record<string, unknown>>; error?: string | null; } & Record<string, unknown>) | null; query?: string; };

type MergeImageInputsOptions = { baseInputs?: Array<Record<string, unknown>>; extraInputs?: Array<Record<string, unknown>>; maxInputs?: number; };

function getVisionMaxCaptionsPerHour(settings: Record<string, unknown> | null) { const visionSettings = settings?.vision && typeof settings.vision === "object" && !Array.isArray(settings.vision) ? settings.vision : null; const maxPerHour = visionSettings && "maxCaptionsPerHour" in visionSettings ? Number(visionSettings.maxCaptionsPerHour) : Number.NaN; return Number.isFinite(maxPerHour) ? maxPerHour : 60; }

export function captionRecentHistoryImages( ctx: BotContext, { imageCaptionCache, captionTimestamps, candidates = [], settings = null, trace = null }: CaptionRecentHistoryImagesOptions ) { if ( !imageCaptionCache || typeof imageCaptionCache.getOrCaption !== "function" || !Array.isArray(captionTimestamps) ) { return; }

const list = Array.isArray(candidates) ? candidates : []; const maxPerBatch = Math.min(list.length, 5); let scheduled = 0;

const budgetCap = getVisionMaxCaptionsPerHour(settings); const now = Date.now(); const oneHourAgo = now - 60 * 60 * 1000; for (let index = captionTimestamps.length - 1; index >= 0; index -= 1) { if (captionTimestamps[index] <= oneHourAgo) { captionTimestamps.splice(index, 1); } } const remainingBudget = Math.max(0, budgetCap - captionTimestamps.length); if (remainingBudget === 0) return;

for (const candidate of list) { if (scheduled >= maxPerBatch) break; if (scheduled >= remainingBudget) break; if (!candidate?.url) continue; if (imageCaptionCache.hasOrInflight?.(candidate.url)) continue;

scheduled += 1;
captionTimestamps.push(now);
imageCaptionCache
  .getOrCaption({
    url: candidate.url,
    llm: ctx.llm,
    settings,
    mimeType: candidate.contentType || "",
    trace: trace || {
      guildId: null,
      channelId: null,
      userId: null,
      source: "history_image_caption"
    }
  })
  .catch((error) => {
    ctx.store.logAction({
      kind: "bot_error",
      content: `history_image_caption: ${String(error?.message || error)}`.slice(0, 2000),
      metadata: {
        url: candidate.url,
        contentType: candidate.contentType || null,
        source: String(trace?.source || "history_image_caption")
      }
    });
  });

} }

export function extractHistoryImageCandidates({ recentMessages = [], excluded = new Set(), imageCaptionCache = null }: ExtractHistoryImageCandidatesOptions = {}) { const rows = Array.isArray(recentMessages) ? recentMessages : []; const seen = excluded instanceof Set ? new Set(excluded) : new Set(); const candidates = [];

for (const row of rows) { if (candidates.length >= MAX_HISTORY_IMAGE_CANDIDATES) break; const content = String(row?.content || ""); if (!content) continue;

const urls = extractUrlsFromText(content);
if (!urls.length) continue;

for (const rawUrl of urls) {
  if (candidates.length >= MAX_HISTORY_IMAGE_CANDIDATES) break;
  const url = String(rawUrl || "").trim();
  if (!url) continue;
  if (!isLikelyImageUrl(url)) continue;
  if (seen.has(url)) continue;
  seen.add(url);

  const parsed = parseHistoryImageReference(url);
  const contentSansUrl = content.replace(url, " ").replace(/\s+/g, " ").trim();
  const cachedCaption = imageCaptionCache?.get?.(url);
  const captionText = String(cachedCaption?.caption || "");
  const baseContext = contentSansUrl.slice(0, 180);
  const enrichedContext = captionText
    ? (baseContext ? `${baseContext} [caption: ${captionText}]` : `[caption: ${captionText}]`).slice(0, 360)
    : baseContext;

  candidates.push({
    messageId: String(row?.message_id || "").trim() || null,
    authorName: String(row?.author_name || "unknown").trim() || "unknown",
    createdAt: String(row?.created_at || "").trim(),
    imageRef: `IMG ${candidates.length + 1}`,
    url,
    filename: parsed.filename || "(unnamed)",
    contentType: parsed.contentType || "",
    context: enrichedContext,
    recencyRank: candidates.length,
    hasCachedCaption: Boolean(cachedCaption)
  });
}

}

return candidates; }

export function rankImageLookupCandidates({ candidates = [], query = "" }: RankImageLookupCandidatesOptions = {}) { const normalizedQuery = String(query || "") .toLowerCase() .replace(/\s+/g, " ") .trim(); const queryTokens = [...new Set(normalizedQuery.match(/[a-z0-9]{3,}/g) || [])].slice( 0, MAX_IMAGE_LOOKUP_QUERY_TOKENS ); const wantsVisualRecall = /\b(?:image|photo|picture|pic|screenshot|meme|earlier|previous|that)\b/i.test( normalizedQuery );

const ranked = (Array.isArray(candidates) ? candidates : []).map((candidate, index) => { const haystack = [candidate?.context, candidate?.filename, candidate?.authorName] .map((value) => String(value || "").toLowerCase()) .join(" "); let score = Math.max(0, 4 - index * 0.3); const reasons = [];

if (normalizedQuery && haystack.includes(normalizedQuery)) {
  score += 9;
  reasons.push("phrase match");
}

let tokenHits = 0;
for (const token of queryTokens) {
  if (!token) continue;
  if (haystack.includes(token)) {
    score += 2;
    tokenHits += 1;
  }
}
if (tokenHits > 0) {
  reasons.push(`${tokenHits} token hit${tokenHits === 1 ? "" : "s"}`);
}

if (wantsVisualRecall) {
  score += 1;
}

return {
  ...candidate,
  score,
  matchReason: reasons.join(", ") || "recency fallback"
};

});

ranked.sort((a, b) => { if ((b.score || 0) !== (a.score || 0)) return (b.score || 0) - (a.score || 0); return (a.recencyRank || 0) - (b.recencyRank || 0); });

const matched = ranked.filter((item) => (item.score || 0) >= 4); return matched.length ? matched : ranked; }

export async function runModelRequestedImageLookup({ imageLookup, query }: RunModelRequestedImageLookupOptions) { const normalizedQuery = normalizeDirectiveText(query, MAX_IMAGE_LOOKUP_QUERY_LEN); const directRef = normalizeHistoryImageRef(normalizedQuery); const baseState = imageLookup || {}; const state = { ...baseState, enabled: Boolean(baseState.enabled), candidates: Array.isArray(baseState.candidates) ? baseState.candidates : [], requested: true, used: false, query: normalizedQuery, results: [], selectedImageInputs: [], error: null };

if (!state.enabled) { return state; } if (!normalizedQuery) { return { ...state, error: "Missing image lookup query." }; }

const candidates = state.candidates; if (!candidates.length) { return { ...state, error: "No recent history images are available for lookup." }; }

if (directRef) { const directMatch = candidates.find((candidate) => normalizeHistoryImageRef(candidate?.imageRef) === directRef); if (!directMatch) { return { ...state, error: No history image matched ${directRef}. }; }

return {
  ...state,
  used: true,
  results: [
    {
      ...directMatch,
      matchReason: "direct image ref"
    }
  ],
  selectedImageInputs: [
    {
      url: directMatch.url,
      filename: directMatch.filename,
      contentType: directMatch.contentType
    }
  ]
};

}

const ranked = rankImageLookupCandidates({ candidates, query: normalizedQuery }); const selected = ranked.slice(0, Math.min(MAX_HISTORY_IMAGE_LOOKUP_RESULTS, MAX_MODEL_IMAGE_INPUTS)); if (!selected.length) { return { ...state, error: "No matching history images were found." }; }

return { ...state, used: true, results: selected, selectedImageInputs: selected.map((item) => ({ url: item.url, filename: item.filename, contentType: item.contentType })) }; }

export function mergeImageInputs({ baseInputs = [], extraInputs = [], maxInputs = MAX_MODEL_IMAGE_INPUTS }: MergeImageInputsOptions = {}) { const merged = []; const seen = new Set(); const pushUnique = (input: Record<string, unknown>) => { if (!input || typeof input !== "object") return; const url = String(input.url || "").trim(); const mediaType = String(input.mediaType || input.contentType || "").trim().toLowerCase(); const inlineData = String(input.dataBase64 || "").trim(); const key = url ? url:${url} : inlineData ? inline:${mediaType}:${inlineData.slice(0, 80)} : ""; if (!key || seen.has(key)) return; seen.add(key); merged.push(input); };

for (const input of Array.isArray(baseInputs) ? baseInputs : []) { if (merged.length >= maxInputs) break; pushUnique(input); } for (const input of Array.isArray(extraInputs) ? extraInputs : []) { if (merged.length >= maxInputs) break; pushUnique(input); }

return merged.slice(0, maxInputs); }

function normalizeHistoryImageRef(value: unknown) { const match = String(value || "") .trim() .toUpperCase() .match(/^IMG\s*(\d+)$/); if (!match) return ""; return IMG ${Number(match[1])}; }