src/video/videoContextService.ts

import { spawn } from "node:child_process"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { normalizeDiscoveryUrl } from "../services/discovery.ts"; import { assertPublicUrl } from "../services/urlSafety.ts"; import { clamp } from "../utils.ts"; import { sleep } from "../normalization/time.ts"; import { dedupeTargets, extractTikTokIdFromUrl, extractUrls, isLikelyDirectVideoUrl, parseAttachmentTarget, parseEmbedTargets, parseVideoTarget, type VideoTarget } from "./videoTargets.ts"; import { type ErrorWithAttempts, getRetryDelayMs, isRetryableFetchError, isRedirectStatus, shouldRetryHttpStatus, withAttemptCount } from "../retry.ts";

// HTTP fetch and redirect retry limits for metadata/context requests. const REQUEST_TIMEOUT_MS = 5_500; const MAX_FETCH_ATTEMPTS = 3; const MAX_FETCH_REDIRECTS = 5; const CACHE_TTL_MS = 30 * 60 * 1000;

// External tool execution timeouts and log-capture bounds. const YT_DLP_TIMEOUT_MS = 50_000; const FFMPEG_TIMEOUT_MS = 45_000; const FFPROBE_TIMEOUT_MS = 8_000; const MAX_COMMAND_OUTPUT_BYTES = 8 * 1024 * 1024; const MAX_LOG_CONTENT_CHARS = 2000;

// Public API clamps for transcript/keyframe/ASR request parameters. const DEFAULT_MAX_TRANSCRIPT_CHARS = 1200; const MIN_MAX_TRANSCRIPT_CHARS = 200; const MAX_MAX_TRANSCRIPT_CHARS = 4000; const DEFAULT_KEYFRAME_INTERVAL_SECONDS = 0; const MAX_KEYFRAME_INTERVAL_SECONDS = 120; // Floor for adaptive sampling on very short clips: ~15 fps is dense enough to // catch any meaningful motion in a sub-second loop without flooding the model. const MIN_EFFECTIVE_KEYFRAME_INTERVAL_SECONDS = 1 / 15; const DEFAULT_MAX_ASR_SECONDS = 120; const MIN_MAX_ASR_SECONDS = 15; const MAX_MAX_ASR_SECONDS = 600;

// Availability probing cache for yt-dlp/ffmpeg presence checks. const COMMAND_AVAILABILITY_CACHE_TTL_MS = 5 * 60 * 1000; const COMMAND_PROBE_TIMEOUT_MS = 10_000;

// ASR/transcript formatting limits used in extracted context payloads. const ASR_AUDIO_SAMPLE_RATE_HZ = "16000"; const TEXT_SANITIZE_VIDEO_ID_MAX_CHARS = 80; const TEXT_SANITIZE_TITLE_MAX_CHARS = 180; const TEXT_SANITIZE_CHANNEL_MAX_CHARS = 120; const TEXT_SANITIZE_DESCRIPTION_MAX_CHARS = 360; const COMMAND_ERROR_MESSAGE_MAX_CHARS = 400;

// Explicit UA so upstream providers can identify this integration. const VIDEO_USER_AGENT = "clanky/0.2 (+video-context; https://github.com/Volpestyle/clanky)";

type VideoTrace = { guildId?: string | null; channelId?: string | null; userId?: string | null; source?: string; };

type VideoContextDependencyName = "ffmpeg" | "yt-dlp"; type VideoContextDependencyCode = "missing_ffmpeg" | "missing_yt_dlp"; type VideoContextDependencyFailure = { dependency: VideoContextDependencyName; code: VideoContextDependencyCode; };

class VideoContextDependencyError extends Error { readonly dependency: VideoContextDependencyName; readonly code: VideoContextDependencyCode;

constructor({ dependency, detail }: { dependency: VideoContextDependencyName; detail: string }) { super( Local runtime dependency missing: ${dependency} is required to ${detail}. + Install ${dependency} and restart the bot. ); this.name = "VideoContextDependencyError"; this.dependency = dependency; this.code = dependency === "ffmpeg" ? "missing_ffmpeg" : "missing_yt_dlp"; } }

export class VideoContextService { store; llm; cache; toolAvailabilityPromise; toolAvailabilityCheckedAt;

constructor({ store, llm }) { this.store = store; this.llm = llm; this.cache = new Map(); this.toolAvailabilityPromise = null; this.toolAvailabilityCheckedAt = 0; }

logCleanupError(scope: string, error: unknown, metadata: Record<string, unknown> | null = null) { const detail = error instanceof Error ? error.message : String(error); try { this.store.logAction({ kind: "video_context_error", content: ${scope}: ${detail}.slice(0, MAX_LOG_CONTENT_CHARS), metadata }); } catch { console.warn([VideoContextService] ${scope}:, error); } }

extractVideoTargets(text, limit = 2) { const urls = extractUrls(String(text || "")); const maxTargets = clamp(Number(limit) || 2, 0, 8); const targets = []; const seen = new Set();

for (const rawUrl of urls) {
  if (targets.length >= maxTargets) break;
  const target = parseVideoTarget(rawUrl, { source: "message_url" });
  if (!target || seen.has(target.key)) continue;
  seen.add(target.key);
  targets.push(target);
}

return targets;

}

extractMessageTargets(message, limit = 2) { const maxTargets = clamp(Number(limit) || 2, 0, 8); const candidates = []; const text = String(message?.content || ""); if (text) { candidates.push(...this.extractVideoTargets(text, maxTargets)); }

if (message?.attachments?.size) {
  for (const attachment of message.attachments.values()) {
    if (candidates.length >= maxTargets) break;
    const target = parseAttachmentTarget(attachment);
    if (!target) continue;
    candidates.push(target);
  }
}

if (Array.isArray(message?.embeds) && message.embeds.length) {
  for (const embed of message.embeds) {
    if (candidates.length >= maxTargets) break;
    const embedTargets = parseEmbedTargets(embed);
    for (const target of embedTargets) {
      if (candidates.length >= maxTargets) break;
      candidates.push(target);
    }
  }
}

return dedupeTargets(candidates, maxTargets);

}

async fetchContexts({ targets, maxTranscriptChars = DEFAULT_MAX_TRANSCRIPT_CHARS, keyframeIntervalSeconds = DEFAULT_KEYFRAME_INTERVAL_SECONDS, maxKeyframesPerVideo = 0, allowAsrFallback = false, maxAsrSeconds = DEFAULT_MAX_ASR_SECONDS, trace = {} }: { targets: VideoTarget[]; maxTranscriptChars?: number; keyframeIntervalSeconds?: number; maxKeyframesPerVideo?: number; allowAsrFallback?: boolean; maxAsrSeconds?: number; trace?: VideoTrace; }) { const list = Array.isArray(targets) ? targets : []; const transcriptLimit = clamp( Number(maxTranscriptChars) || DEFAULT_MAX_TRANSCRIPT_CHARS, MIN_MAX_TRANSCRIPT_CHARS, MAX_MAX_TRANSCRIPT_CHARS ); const keyframeInterval = clamp( Number(keyframeIntervalSeconds) || DEFAULT_KEYFRAME_INTERVAL_SECONDS, DEFAULT_KEYFRAME_INTERVAL_SECONDS, MAX_KEYFRAME_INTERVAL_SECONDS ); const keyframeCount = clamp(Number(maxKeyframesPerVideo) || 0, 0, 8); const asrSeconds = clamp( Number(maxAsrSeconds) || DEFAULT_MAX_ASR_SECONDS, MIN_MAX_ASR_SECONDS, MAX_MAX_ASR_SECONDS ); const asrEnabled = Boolean(allowAsrFallback); const videos = []; const errors = [];

for (const target of list) {
  try {
    const context = await this.fetchVideoContext({
      target,
      maxTranscriptChars: transcriptLimit,
      keyframeIntervalSeconds: keyframeInterval,
      maxKeyframesPerVideo: keyframeCount,
      allowAsrFallback: asrEnabled,
      maxAsrSeconds: asrSeconds,
      trace
    });
    videos.push(context);
    this.store.logAction({
      kind: "video_context_call",
      guildId: trace.guildId,
      channelId: trace.channelId,
      userId: trace.userId,
      content: String(context.videoId || context.url || target.key || "").slice(0, MAX_LOG_CONTENT_CHARS),
      metadata: {
        source: trace.source || "unknown",
        provider: context.provider,
        kind: context.kind,
        videoId: context.videoId,
        url: context.url,
        title: context.title,
        channel: context.channel,
        hasTranscript: Boolean(context.transcript),
        transcriptSource: context.transcriptSource || null,
        transcriptChars: context.transcript ? context.transcript.length : 0,
        transcriptError: context.transcriptError || null,
        keyframeCount: Number(context.keyframeCount || 0),
        keyframeError: context.keyframeError || null,
        keyframeErrorCode: context.keyframeErrorCode || null,
        transcriptErrorCode: context.transcriptErrorCode || null,
        missingDependencies: Array.isArray(context.missingDependencies) ? context.missingDependencies : [],
        cacheHit: Boolean(context.cacheHit)
      }
    });
  } catch (error) {
    const message = String(error?.message || error);
    const dependencyFailure = getDependencyFailure(error);
    errors.push({
      key: target.key,
      url: target.url,
      error: message,
      errorCode: dependencyFailure?.code || null,
      missingDependency: dependencyFailure?.dependency || null
    });
    this.store.logAction({
      kind: "video_context_error",
      guildId: trace.guildId,
      channelId: trace.channelId,
      userId: trace.userId,
      content: `${target.key}: ${message}`.slice(0, MAX_LOG_CONTENT_CHARS),
      metadata: {
        source: trace.source || "unknown",
        kind: target.kind,
        key: target.key,
        url: target.url,
        errorCode: dependencyFailure?.code || null,
        missingDependency: dependencyFailure?.dependency || null,
        attempts: Number(error?.attempts || 1)
      }
    });
  }
}

return {
  videos,
  errors
};

}

async fetchVideoContext({ target, maxTranscriptChars, keyframeIntervalSeconds, maxKeyframesPerVideo, allowAsrFallback, maxAsrSeconds, trace = {} }: { target: VideoTarget; maxTranscriptChars: number; keyframeIntervalSeconds: number; maxKeyframesPerVideo: number; allowAsrFallback: boolean; maxAsrSeconds: number; trace?: VideoTrace; }) { this.pruneCache(); const cached = this.cache.get(target.key); const hasFreshCache = cached && Date.now() - cached.cachedAt < CACHE_TTL_MS; let base = null; if (hasFreshCache) { base = { ...cached.value, cacheHit: true }; } else { const fetched = await this.fetchBaseSummary({ target, maxTranscriptChars }); base = { ...fetched, cacheHit: false }; this.cache.set(target.key, { cachedAt: Date.now(), value: { ...fetched, cacheHit: false } }); }

const needKeyframes = Number(keyframeIntervalSeconds) > 0 && Number(maxKeyframesPerVideo) > 0;
const shouldAsr = Boolean(allowAsrFallback) && !String(base.transcript || "").trim();
const context = {
  ...base,
  keyframeCount: 0,
  keyframeError: null,
  keyframeErrorCode: null,
  transcriptErrorCode: null,
  missingDependencies: [] as VideoContextDependencyName[],
  frameImages: []
};
if (!needKeyframes && !shouldAsr) return context;

let media = null;
let mediaError = null;
let mediaFailure: unknown = null;
try {
  media = await this.resolveMediaInput(target.url, target.forceDirect);
} catch (error) {
  mediaFailure = error;
  mediaError = String(error?.message || error);
}

if (mediaError) {
  const dependencyFailure = getDependencyFailure(mediaFailure || mediaError);
  if (needKeyframes) {
    context.keyframeError = mediaError;
    if (dependencyFailure) {
      context.keyframeErrorCode = dependencyFailure.code;
      addMissingDependency(context, dependencyFailure.dependency);
    }
  }
  if (shouldAsr && !context.transcriptError) {
    context.transcriptError = mediaError;
    if (dependencyFailure) {
      context.transcriptErrorCode = dependencyFailure.code;
      addMissingDependency(context, dependencyFailure.dependency);
    }
  }
  return context;
}

try {
  if (needKeyframes && media) {
    try {
      const { frames, durationSeconds: probedDurationSeconds } = await this.extractKeyframesFromInput({
        input: media.input,
        keyframeIntervalSeconds,
        maxKeyframesPerVideo
      });
      context.frameImages = frames;
      context.keyframeCount = frames.length;
      // Direct/Tenor sources don't surface duration through their summary
      // path; fill it in from the ffprobe sidecar so downstream logs and
      // the model prompt stop reporting `durationSeconds: null`.
      if (
        (context.durationSeconds == null || !Number.isFinite(Number(context.durationSeconds)) || Number(context.durationSeconds) <= 0) &&
        probedDurationSeconds != null
      ) {
        context.durationSeconds = probedDurationSeconds;
      }
    } catch (error) {
      context.keyframeError = String(error?.message || error);
      const dependencyFailure = getDependencyFailure(error);
      if (dependencyFailure) {
        context.keyframeErrorCode = dependencyFailure.code;
        addMissingDependency(context, dependencyFailure.dependency);
      }
    }
  }

  if (shouldAsr && media) {
    try {
      const transcript = await this.transcribeFromInput({
        input: media.input,
        maxAsrSeconds,
        maxTranscriptChars,
        trace
      });
      if (transcript) {
        context.transcript = transcript;
        context.transcriptSource = "asr";
        context.transcriptError = null;
      }
    } catch (error) {
      if (!context.transcriptError) {
        context.transcriptError = String(error?.message || error);
      }
      const dependencyFailure = getDependencyFailure(error);
      if (dependencyFailure) {
        context.transcriptErrorCode = dependencyFailure.code;
        addMissingDependency(context, dependencyFailure.dependency);
      }
    }
  }
} finally {
  if (media?.cleanup) {
    try {
      await media.cleanup();
    } catch (error) {
      this.logCleanupError("video_media_cleanup_failed", error, {
        source: trace.source || "unknown",
        key: target.key,
        url: target.url
      });
    }
  }
}

return context;

}

pruneCache() { const now = Date.now(); for (const [key, entry] of this.cache.entries()) { if (!entry || now - entry.cachedAt >= CACHE_TTL_MS) { this.cache.delete(key); } } }

async fetchBaseSummary({ target, maxTranscriptChars }) { if (target.kind === "youtube" && target.videoId) { return this.fetchYouTubeSummary({ videoId: target.videoId, sourceUrl: target.url, maxTranscriptChars }); }

if (target.kind !== "direct" && (await this.hasYtDlp())) {
  try {
    return await this.fetchYtDlpSummary({ target, maxTranscriptChars });
  } catch {
    // Fall through to provider-specific fallback.
  }
}

if (target.kind === "tiktok") {
  return this.fetchTikTokSummary(target.url);
}

return this.fetchGenericSummary(target);

}

async fetchYouTubeSummary({ videoId, sourceUrl, maxTranscriptChars }) { const watchUrl = https://www.youtube.com/watch?v=${encodeURIComponent(videoId)}; const html = await fetchTextWithRetry({ url: ${watchUrl}&hl=en, accept: "text/html,application/xhtml+xml,text/plain;q=0.9,/;q=0.2" }); const playerResponse = extractPlayerResponse(html); if (!playerResponse) { throw new Error("YouTube page did not expose playable metadata."); }

const summary = summarizeYouTubeVideo({
  videoId,
  url: sourceUrl || watchUrl,
  playerResponse
});
let transcript = "";
let transcriptError = null;
try {
  transcript = await fetchYouTubeTranscriptText({
    playerResponse,
    maxTranscriptChars
  });
} catch (error) {
  transcriptError = String(error?.message || error);
}

return {
  ...summary,
  provider: "youtube",
  kind: "youtube",
  transcript,
  transcriptSource: transcript ? "captions" : "",
  transcriptError
};

}

async fetchYtDlpSummary({ target, maxTranscriptChars }) { const info = await this.fetchYtDlpInfo(target.url); const transcriptResult = await this.fetchTranscriptFromYtDlpInfo(info, maxTranscriptChars).catch((error) => ({ text: "", source: "", error: String(error?.message || error) })); const provider = target.kind === "tiktok" ? "tiktok" : "generic"; const fallbackHost = safeHostFromUrl(target.url);

return {
  provider,
  kind: target.kind,
  videoId: sanitizeText(String(info?.id || target.videoId || ""), TEXT_SANITIZE_VIDEO_ID_MAX_CHARS) || null,
  url: normalizeDiscoveryUrl(info?.webpage_url || info?.original_url || target.url) || target.url,
  title: sanitizeText(info?.title || "", TEXT_SANITIZE_TITLE_MAX_CHARS) || "untitled video",
  channel:
    sanitizeText(
      info?.uploader || info?.channel || info?.creator || info?.channel_url || fallbackHost || "",
      TEXT_SANITIZE_CHANNEL_MAX_CHARS
    ) || "unknown channel",
  publishedAt: normalizeYtDlpDate(info?.upload_date) || normalizeDateIso(info?.release_timestamp),
  durationSeconds: safeNumber(info?.duration),
  viewCount: safeNumber(info?.view_count),
  description: sanitizeText(info?.description || "", TEXT_SANITIZE_DESCRIPTION_MAX_CHARS),
  transcript: transcriptResult.text || "",
  transcriptSource: transcriptResult.source || "",
  transcriptError: transcriptResult.error || null
};

}

async fetchYtDlpInfo(url) { if (!(await this.hasYtDlp())) { throw new VideoContextDependencyError({ dependency: "yt-dlp", detail: "extract metadata from yt-dlp-supported video pages" }); } const { stdout } = await runCommand({ command: "yt-dlp", args: [ "--no-warnings", "--quiet", "--skip-download", "--no-playlist", "--dump-single-json", String(url) ], timeoutMs: YT_DLP_TIMEOUT_MS });

const output = String(stdout || "").trim();
if (!output) {
  throw new Error("yt-dlp returned empty metadata.");
}

try {
  return JSON.parse(output);
} catch {
  const lastLine = output.split(/\r?

/).filter(Boolean).at(-1) || ""; try { return JSON.parse(lastLine); } catch { throw new Error("yt-dlp metadata JSON parse failed."); } } }

async fetchTranscriptFromYtDlpInfo(info, maxTranscriptChars) { const subtitles = info?.subtitles && typeof info.subtitles === "object" ? info.subtitles : {}; const autoCaptions = info?.automatic_captions && typeof info.automatic_captions === "object" ? info.automatic_captions : {}; const preferred = pickSubtitleTrack(subtitles, { preferManual: true }) || pickSubtitleTrack(autoCaptions, { preferManual: false }); if (!preferred?.url) { return { text: "", source: "", error: null }; }

const raw = await fetchTextWithRetry({
  url: preferred.url,
  accept: "application/xml,text/xml,text/vtt,text/plain;q=0.9,*/*;q=0.2"
});
const text = parseSubtitleText(raw, maxTranscriptChars);
return {
  text,
  source: text ? "captions" : "",
  error: null
};

}

async fetchTikTokSummary(url) { const oembedUrl = https://www.tiktok.com/oembed?url=${encodeURIComponent(url)}; let data = null; try { const raw = await fetchTextWithRetry({ url: oembedUrl, accept: "application/json,text/plain;q=0.9,/;q=0.2" }); data = JSON.parse(raw); } catch { // Fall through to generic summary. }

const host = safeHostFromUrl(url);
return {
  provider: "tiktok",
  kind: "tiktok",
  videoId: sanitizeText(extractTikTokIdFromUrl(url) || "", TEXT_SANITIZE_VIDEO_ID_MAX_CHARS) || null,
  url,
  title: sanitizeText(data?.title || "", TEXT_SANITIZE_TITLE_MAX_CHARS) || "TikTok video",
  channel: sanitizeText(data?.author_name || host || "", TEXT_SANITIZE_CHANNEL_MAX_CHARS) || "unknown channel",
  publishedAt: null,
  durationSeconds: null,
  viewCount: null,
  description: sanitizeText(data?.title || "", TEXT_SANITIZE_DESCRIPTION_MAX_CHARS),
  transcript: "",
  transcriptSource: "",
  transcriptError: null
};

}

async fetchGenericSummary(target) { const host = safeHostFromUrl(target.url); let title = ""; let description = ""; let publishedAt = null; if (target.kind === "generic") { try { const html = await fetchTextWithRetry({ url: target.url, accept: "text/html,application/xhtml+xml,text/plain;q=0.9,/;q=0.2" }); title = sanitizeText( readMetaTag(html, "og:title") || readMetaTag(html, "twitter:title") || readHtmlTitle(html) || "", TEXT_SANITIZE_TITLE_MAX_CHARS ) || ""; description = sanitizeText( readMetaTag(html, "og:description") || readMetaTag(html, "twitter:description") || "", TEXT_SANITIZE_DESCRIPTION_MAX_CHARS ); publishedAt = normalizeDateIso(readMetaTag(html, "article:published_time")) || normalizeDateIso(readMetaTag(html, "og:pubdate")); } catch { // Keep host fallback. } }

return {
  provider: target.kind === "direct" ? "direct" : "generic",
  kind: target.kind,
  videoId: null,
  url: target.url,
  title: title || `${host || "linked"} video`,
  channel: host || "unknown source",
  publishedAt,
  durationSeconds: null,
  viewCount: null,
  description,
  transcript: "",
  transcriptSource: "",
  transcriptError: null
};

}

async resolveMediaInput(url, forceDirect = false) { if (forceDirect || isLikelyDirectVideoUrl(url)) { return { input: url, cleanup: null }; }

if (!(await this.hasYtDlp())) {
  throw new VideoContextDependencyError({
    dependency: "yt-dlp",
    detail: "download hosted video/GIF pages before frame extraction"
  });
}

return this.downloadMediaWithYtDlp(url);

}

async downloadMediaWithYtDlp(url) { const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "clanker-video-")); const outputPattern = path.join(tempDir, "source.%(ext)s"); try { await runCommand({ command: "yt-dlp", args: [ "--no-warnings", "--quiet", "--no-playlist", "--socket-timeout", "8", "--retries", "2", "--max-filesize", "80M", "-f", "b", "-o", outputPattern, String(url) ], timeoutMs: YT_DLP_TIMEOUT_MS });

  const rows = await fs.readdir(tempDir);
  const files = [];
  for (const entry of rows) {
    const full = path.join(tempDir, entry);
    const stat = await fs.stat(full).catch(() => null);
    if (!stat || !stat.isFile()) continue;
    files.push({ full, size: stat.size });
  }
  if (!files.length) {
    throw new Error("yt-dlp produced no downloadable media file.");
  }

  files.sort((a, b) => b.size - a.size);
  const mediaPath = files[0].full;
  return {
    input: mediaPath,
    cleanup: async () => {
      await fs.rm(tempDir, { recursive: true, force: true });
    }
  };
} catch (error) {
  try {
    await fs.rm(tempDir, { recursive: true, force: true });
  } catch (cleanupError) {
    this.logCleanupError("video_download_tempdir_cleanup_failed", cleanupError, {
      url,
      tempDir
    });
  }
  throw error;
}

}

async probeMediaDuration(input: string): Promise<number | null> { try { const { stdout } = await runCommand({ command: "ffprobe", args: [ "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", String(input) ], timeoutMs: FFPROBE_TIMEOUT_MS }); const value = Number(String(stdout || "").trim()); return Number.isFinite(value) && value > 0 ? value : null; } catch (error) { console.warn( [VideoContextService] ffprobe_duration_failed input=${String(input).slice(0, 120)} error=${error instanceof Error ? error.message : String(error)} ); return null; } }

async extractKeyframesFromInput({ input, keyframeIntervalSeconds, maxKeyframesPerVideo }) { if (!(await this.hasFfmpeg())) { throw new VideoContextDependencyError({ dependency: "ffmpeg", detail: "sample frames from GIF/video media" }); }

const configuredInterval = clamp(
  Number(keyframeIntervalSeconds) || DEFAULT_KEYFRAME_INTERVAL_SECONDS,
  1,
  MAX_KEYFRAME_INTERVAL_SECONDS
);
const maxFrames = clamp(Number(maxKeyframesPerVideo) || 0, 1, 8);
// Looping GIFs and other short clips are sub-second; fixed `fps=1/1`
// sampling collapses to a single frame and the maxFrames cap never bites.
// Probe duration first so we can compress the interval into the clip's
// actual length when needed.
const probedDurationSeconds = await this.probeMediaDuration(String(input));
const effectiveInterval = computeEffectiveKeyframeInterval(
  configuredInterval,
  maxFrames,
  probedDurationSeconds
);
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "clanker-frames-"));
const outputPattern = path.join(tempDir, "frame-%03d.jpg");

try {
  const ffmpegStartedAt = Date.now();
  const ffmpegArgs = [
    "-hide_banner",
    "-loglevel",
    "error",
    "-y",
    "-i",
    String(input),
    "-vf",
    `fps=1/${effectiveInterval}`,
    "-frames:v",
    String(maxFrames),
    "-q:v",
    "5",
    outputPattern
  ];
  await runCommand({
    command: "ffmpeg",
    args: ffmpegArgs,
    timeoutMs: FFMPEG_TIMEOUT_MS
  });
  const ffmpegDurationMs = Date.now() - ffmpegStartedAt;

  const rows = await fs.readdir(tempDir);
  const frameFiles = rows.filter((name) => name.toLowerCase().endsWith(".jpg")).sort();
  const images = [];
  const frameSizes: number[] = [];
  for (const frame of frameFiles) {
    const fullPath = path.join(tempDir, frame);
    const stat = await fs.stat(fullPath);
    const dataBase64 = await fs.readFile(fullPath, { encoding: "base64" });
    if (!dataBase64) continue;
    frameSizes.push(stat.size);
    images.push({
      filename: frame,
      contentType: "image/jpeg",
      mediaType: "image/jpeg",
      dataBase64,
      source: "video_keyframe"
    });
  }
  const totalBytes = frameSizes.reduce((a, b) => a + b, 0);
  console.log(
    `[VideoContextService] keyframe_extraction_complete` +
    `  input=${String(input).slice(0, 120)}` +
    `  configuredIntervalSeconds=${configuredInterval}` +
    `  effectiveIntervalSeconds=${effectiveInterval}` +
    `  probedDurationSeconds=${probedDurationSeconds ?? "null"}` +
    `  maxFrames=${maxFrames}` +
    `  extractedFrames=${images.length}` +
    `  frameSizesBytes=[${frameSizes.join(",")}]` +
    `  totalBytes=${totalBytes}` +
    `  ffmpegDurationMs=${ffmpegDurationMs}`
  );
  return { frames: images, durationSeconds: probedDurationSeconds };
} finally {
  try {
    await fs.rm(tempDir, { recursive: true, force: true });
  } catch (error) {
    this.logCleanupError("video_keyframe_tempdir_cleanup_failed", error, {
      input,
      tempDir
    });
  }
}

}

async transcribeFromInput({ input, maxAsrSeconds, maxTranscriptChars, trace = {} }: { input: string; maxAsrSeconds: number; maxTranscriptChars: number; trace?: VideoTrace; }) { if (!(await this.hasFfmpeg())) { throw new VideoContextDependencyError({ dependency: "ffmpeg", detail: "extract audio for ASR fallback" }); } if (!this.llm?.isAsrReady?.()) { throw new Error("ASR fallback requires OPENAI_API_KEY."); }

const segmentSeconds = clamp(
  Number(maxAsrSeconds) || DEFAULT_MAX_ASR_SECONDS,
  MIN_MAX_ASR_SECONDS,
  MAX_MAX_ASR_SECONDS
);
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "clanker-audio-"));
const audioPath = path.join(tempDir, "audio.wav");

try {
  await runCommand({
    command: "ffmpeg",
    args: [
      "-hide_banner",
      "-loglevel",
      "error",
      "-y",
      "-i",
      String(input),
      "-vn",
      "-ac",
      "1",
      "-ar",
      ASR_AUDIO_SAMPLE_RATE_HZ,
      "-t",
      String(segmentSeconds),
      audioPath
    ],
    timeoutMs: FFMPEG_TIMEOUT_MS
  });

  const transcript = await this.llm.transcribeAudio({
    filePath: audioPath,
    trace: {
      ...trace,
      source: trace.source || "video_context_asr"
    }
  });
  return sanitizeText(transcript, maxTranscriptChars);
} finally {
  try {
    await fs.rm(tempDir, { recursive: true, force: true });
  } catch (error) {
    this.logCleanupError("video_asr_tempdir_cleanup_failed", error, {
      input,
      tempDir,
      source: trace.source || "video_context_asr"
    });
  }
}

}

async getToolAvailability() { const now = Date.now(); if (this.toolAvailabilityPromise && now - this.toolAvailabilityCheckedAt < COMMAND_AVAILABILITY_CACHE_TTL_MS) { return this.toolAvailabilityPromise; } this.toolAvailabilityCheckedAt = now; this.toolAvailabilityPromise = Promise.all([ this.commandAvailable("ffmpeg", ["-version"]), this.commandAvailable("yt-dlp", ["--version"]) ]) .then(([ffmpeg, ytDlp]) => { console.log([VideoContextService] tool_availability ffmpeg=${ffmpeg} ytDlp=${ytDlp}); const missingDependencies: VideoContextDependencyName[] = []; if (!ffmpeg) missingDependencies.push("ffmpeg"); if (!ytDlp) missingDependencies.push("yt-dlp"); if (missingDependencies.length) { try { this.store.logAction({ kind: "runtime", content: "video_context_dependency_status", metadata: { ffmpegAvailable: ffmpeg, ytDlpAvailable: ytDlp, missingDependencies } }); } catch { // Console availability already logged above; never fail media handling on diagnostics. } } return { ffmpeg, ytDlp }; }) .catch((error) => { this.toolAvailabilityPromise = null; this.toolAvailabilityCheckedAt = 0; this.logCleanupError("video_context_tool_availability_failed", error); return { ffmpeg: false, ytDlp: false }; }); return this.toolAvailabilityPromise; }

async hasFfmpeg() { const tools = await this.getToolAvailability(); return Boolean(tools.ffmpeg); }

async hasYtDlp() { const tools = await this.getToolAvailability(); return Boolean(tools.ytDlp); }

async commandAvailable(command, args = ["--version"]) { try { await runCommand({ command, args, timeoutMs: COMMAND_PROBE_TIMEOUT_MS, useShell: true }); return true; } catch (error) { console.warn([VideoContextService] command_not_available command=${command} error=${error?.message || error}); return false; } } }

// Pick a sampling interval that still yields up to maxFrames evenly-spaced // keyframes when the clip is shorter than configuredInterval × maxFrames. // Returns the configured interval unchanged when duration is unknown or long // enough that fixed-interval sampling already produces the requested count. export function computeEffectiveKeyframeInterval( configuredInterval: number, maxFrames: number, durationSeconds: number | null | undefined ): number { const interval = Number(configuredInterval); const frames = Number(maxFrames); if (!Number.isFinite(interval) || interval <= 0 || !Number.isFinite(frames) || frames <= 0) { return interval; } if ( durationSeconds == null || !Number.isFinite(Number(durationSeconds)) || Number(durationSeconds) <= 0 || Number(durationSeconds) >= interval * frames ) { return interval; } return Math.max(Number(durationSeconds) / frames, MIN_EFFECTIVE_KEYFRAME_INTERVAL_SECONDS); }

function getDependencyFailure(error: unknown): VideoContextDependencyFailure | null { if (error instanceof VideoContextDependencyError) { return { dependency: error.dependency, code: error.code }; }

const message = String(error instanceof Error ? error.message : error || ""); if (!message) return null; const lower = message.toLowerCase(); const isDependencyMessage = lower.includes("local runtime dependency missing") || lower.includes("not installed") || lower.includes("is required"); if (!isDependencyMessage) return null; if (lower.includes("ffmpeg")) { return { dependency: "ffmpeg", code: "missing_ffmpeg" }; } if (lower.includes("yt-dlp")) { return { dependency: "yt-dlp", code: "missing_yt_dlp" }; } return null; }

function addMissingDependency( context: { missingDependencies?: VideoContextDependencyName[] }, dependency: VideoContextDependencyName ) { const dependencies = Array.isArray(context.missingDependencies) ? context.missingDependencies : []; if (!dependencies.includes(dependency)) dependencies.push(dependency); context.missingDependencies = dependencies; }

function summarizeYouTubeVideo({ videoId, url, playerResponse }) { const details = playerResponse?.videoDetails || {}; const micro = playerResponse?.microformat?.playerMicroformatRenderer || {};

const title = sanitizeText(details?.title || micro?.title?.simpleText || micro?.title || "", TEXT_SANITIZE_TITLE_MAX_CHARS) || "untitled video"; const channel = sanitizeText(details?.author || micro?.ownerChannelName || micro?.ownerChannel || "", TEXT_SANITIZE_CHANNEL_MAX_CHARS) || "unknown channel"; const description = sanitizeText(details?.shortDescription || micro?.description?.simpleText || "", TEXT_SANITIZE_DESCRIPTION_MAX_CHARS); const publishedAt = normalizeDateIso(micro?.publishDate || micro?.uploadDate || ""); const durationSeconds = safeNumber(details?.lengthSeconds); const viewCount = safeNumber(details?.viewCount);

return { videoId, url: String(url || https://www.youtube.com/watch?v=${videoId}), title, channel, publishedAt, durationSeconds, viewCount, description }; }

async function fetchYouTubeTranscriptText({ playerResponse, maxTranscriptChars }) { const tracks = playerResponse?.captions?.playerCaptionsTracklistRenderer?.captionTracks; if (!Array.isArray(tracks) || !tracks.length) return "";

const preferred = tracks.find((track) => /^en(?:-|$)/i.test(String(track?.languageCode || "")) && track?.kind !== "asr") || tracks.find((track) => /^en(?:-|$)/i.test(String(track?.languageCode || ""))) || tracks.find((track) => track?.kind !== "asr") || tracks[0]; const baseUrl = String(preferred?.baseUrl || "").trim(); if (!baseUrl) return "";

const transcriptUrl = new URL(baseUrl); transcriptUrl.searchParams.set("fmt", "srv3"); transcriptUrl.searchParams.set("xorb", "2"); transcriptUrl.searchParams.set("hl", "en");

const raw = await fetchTextWithRetry({ url: transcriptUrl.toString(), accept: "application/xml,text/xml,text/plain;q=0.9,/;q=0.2" }); return parseSubtitleText(raw, maxTranscriptChars); }

function extractPlayerResponse(html) { const source = String(html || ""); const markers = [ "var ytInitialPlayerResponse = ", 'window["ytInitialPlayerResponse"] = ', "window['ytInitialPlayerResponse'] = ", '"ytInitialPlayerResponse":' ];

for (const marker of markers) { const markerIndex = source.indexOf(marker); if (markerIndex < 0) continue; const startIndex = source.indexOf("{", markerIndex + marker.length); if (startIndex < 0) continue; const json = extractBalancedJsonObject(source, startIndex); if (!json) continue; try { return JSON.parse(json); } catch { // Try next marker. } }

return null; }

function extractBalancedJsonObject(text, startIndex) { if (!text || startIndex < 0 || text[startIndex] !== "{") return null;

let depth = 0; let inString = false; let escaping = false; for (let index = startIndex; index < text.length; index += 1) { const char = text[index]; if (inString) { if (escaping) { escaping = false; } else if (char === "\") { escaping = true; } else if (char === '"') { inString = false; } continue; }

if (char === '"') {
  inString = true;
  continue;
}

if (char === "{") {
  depth += 1;
  continue;
}

if (char === "}") {
  depth -= 1;
  if (depth === 0) {
    return text.slice(startIndex, index + 1);
  }
}

}

return null; }

function pickSubtitleTrack(tracksByLang, { preferManual } = { preferManual: true }) { if (!tracksByLang || typeof tracksByLang !== "object") return null; const keys = Object.keys(tracksByLang); if (!keys.length) return null;

const orderedKeys = [ ...keys.filter((key) => /^en(?:[-]|$)/i.test(key)), ...keys.filter((key) => /english/i.test(key)), ...keys.filter((key) => !/^en(?:[-]|$)/i.test(key) && !/english/i.test(key)) ]; const uniqueKeys = [...new Set(orderedKeys)]; const extPriority = ["vtt", "srv3", "ttml", "json3", "srt"];

for (const lang of uniqueKeys) { const rows = Array.isArray(tracksByLang[lang]) ? tracksByLang[lang] : []; if (!rows.length) continue; const orderedTracks = rows .slice() .sort((a, b) => extPriority.indexOf(String(a?.ext || "").toLowerCase()) - extPriority.indexOf(String(b?.ext || "").toLowerCase())); for (const row of orderedTracks) { const url = String(row?.url || row?.data || "").trim(); if (!url) continue; const ext = String(row?.ext || "").toLowerCase(); const appearsAuto = /\bauto(?:matic)?\b/i.test(String(row?.name || "")); if (preferManual && appearsAuto) continue; return { url, ext }; } }

for (const lang of uniqueKeys) { const rows = Array.isArray(tracksByLang[lang]) ? tracksByLang[lang] : []; for (const row of rows) { const url = String(row?.url || row?.data || "").trim(); if (!url) continue; return { url, ext: String(row?.ext || "").toLowerCase() }; } }

return null; }

function parseSubtitleText(raw, maxTranscriptChars) { const source = String(raw || ""); if (!source) return ""; const xmlBlocks = [...source.matchAll(/<(?:text|p)\b[^>]>([\s\S]?)</(?:text|p)>/gi)]; if (xmlBlocks.length) { const joined = xmlBlocks .map((match) => decodeHtmlEntities( String(match?.[1] || "") .replace(/<[^>]+>/g, " ") .replace(/\s+/g, " ") .trim() ) ) .filter(Boolean) .join(" "); return sanitizeText(joined, maxTranscriptChars); }

const lines = source .split(/\r? /g) .map((line) => decodeHtmlEntities(String(line || "").replace(/<[^>]+>/g, " ").trim())) .filter(Boolean) .filter((line) => !/^WEBVTT$/i.test(line)) .filter((line) => !/^\d+$/.test(line)) .filter((line) => !/^(NOTE|STYLE|REGION)\b/i.test(line)) .filter((line) => !/^\d{1,2}:\d{2}(?::\d{2})?[.,]\d{2,3}\s*-->\s*\d{1,2}:\d{2}(?::\d{2})?[.,]\d{2,3}/.test(line)) .filter((line) => !/^\d{1,2}:\d{2}(?::\d{2})?[.,]\d{2,3}$/.test(line)) .map((line) => line.replace(/\s+/g, " ").trim()); return sanitizeText(lines.join(" "), maxTranscriptChars); }

function readMetaTag(html, propertyOrName) { const escaped = escapeRegExp(propertyOrName); const pattern = new RegExp( <meta[^>]+(?:property|name)=["']${escaped}["'][^>]+content=["']([^"']+)["'][^>]*>, "i" ); const match = String(html || "").match(pattern); return decodeHtmlEntities(String(match?.[1] || "").trim()); }

function readHtmlTitle(html) { const match = String(html || "").match(/<title[^>]>([\s\S]?)</title>/i); return decodeHtmlEntities(String(match?.[1] || "").replace(/\s+/g, " ").trim()); }

async function fetchTextWithRetry({ url, accept = "/", maxAttempts = MAX_FETCH_ATTEMPTS }) { const safeUrl = normalizeDiscoveryUrl(url); if (!safeUrl) { throw new Error(blocked or invalid video URL: ${url}); }

const attemptLimit = Math.max(1, Number(maxAttempts) || MAX_FETCH_ATTEMPTS); let attempt = 0; while (attempt < attemptLimit) { attempt += 1; try { const { response, finalUrl } = await fetchPublicResponseWithRedirects({ url: safeUrl, accept });

  if (!response.ok) {
    if (shouldRetryHttpStatus(response.status) && attempt < attemptLimit) {
      await sleep(getRetryDelayMs(attempt));
      continue;
    }
    const error: ErrorWithAttempts = new Error(`Video HTTP ${response.status} for ${finalUrl}`);
    error.attempts = attempt;
    throw error;
  }

  let text = "";
  try {
    text = await response.text();
  } catch (error) {
    throw withAttemptCount(error, attempt);
  }
  if (!text) {
    const error: ErrorWithAttempts = new Error("Video source returned empty response.");
    error.attempts = attempt;
    throw error;
  }

  return text;
} catch (error) {
  if (isRetryableFetchError(error) && attempt < attemptLimit) {
    await sleep(getRetryDelayMs(attempt));
    continue;
  }
  throw withAttemptCount(error, attempt);
}

}

throw withAttemptCount(new Error("Video fetch failed after retries."), attemptLimit); }

async function fetchPublicResponseWithRedirects({ url, accept, maxRedirects = MAX_FETCH_REDIRECTS }) { let currentUrl = String(url || ""); for (let redirects = 0; redirects <= maxRedirects; redirects += 1) { await assertPublicUrl(currentUrl); const response = await fetch(currentUrl, { method: "GET", redirect: "manual", headers: { "user-agent": VIDEO_USER_AGENT, accept }, signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS) });

if (isRedirectStatus(response.status)) {
  const location = String(response.headers.get("location") || "").trim();
  if (!location) {
    throw new Error(`video redirect missing location for ${currentUrl}`);
  }
  const nextUrl = normalizeDiscoveryUrl(new URL(location, currentUrl).toString());
  if (!nextUrl) {
    throw new Error(`blocked or invalid video redirect URL: ${location}`);
  }
  currentUrl = nextUrl;
  continue;
}

const finalUrl = normalizeDiscoveryUrl(response.url || currentUrl);
if (!finalUrl) {
  throw new Error(`blocked or invalid video URL: ${response.url || currentUrl}`);
}
await assertPublicUrl(finalUrl);
return {
  response,
  finalUrl
};

}

throw new Error(too many redirects for video URL: ${url}); }

async function runCommand({ command, args, timeoutMs = 30_000, useShell = false }: { command: string; args: string[]; timeoutMs?: number; useShell?: boolean }) { return new Promise<{ stdout: string; stderr: string }>((resolve, reject) => { const child = spawn(command, args, { stdio: ["ignore", "pipe", "pipe"], ...(useShell ? { shell: true } : {}) });

let stdout = "";
let stderr = "";
let stdoutBytes = 0;
let stderrBytes = 0;
let timedOut = false;
const timer = setTimeout(() => {
  timedOut = true;
  child.kill("SIGKILL");
}, Math.max(1, timeoutMs));

child.stdout.on("data", (chunk) => {
  const text = String(chunk || "");
  const nextBytes = Buffer.byteLength(text);
  if (stdoutBytes < MAX_COMMAND_OUTPUT_BYTES) {
    stdout += text;
    if (Buffer.byteLength(stdout) > MAX_COMMAND_OUTPUT_BYTES) {
      stdout = stdout.slice(0, MAX_COMMAND_OUTPUT_BYTES);
    }
  }
  stdoutBytes += nextBytes;
});

child.stderr.on("data", (chunk) => {
  const text = String(chunk || "");
  const nextBytes = Buffer.byteLength(text);
  if (stderrBytes < MAX_COMMAND_OUTPUT_BYTES) {
    stderr += text;
    if (Buffer.byteLength(stderr) > MAX_COMMAND_OUTPUT_BYTES) {
      stderr = stderr.slice(0, MAX_COMMAND_OUTPUT_BYTES);
    }
  }
  stderrBytes += nextBytes;
});

child.on("error", (error) => {
  clearTimeout(timer);
  reject(error);
});

child.on("close", (code) => {
  clearTimeout(timer);
  if (timedOut) {
    reject(new Error(`${command} timed out after ${timeoutMs}ms.`));
    return;
  }
  if (code !== 0) {
    const message = String(stderr || stdout || "").replace(/\s+/g, " ").trim();
    reject(new Error(`${command} exited with code ${code}${message ? `: ${message.slice(0, COMMAND_ERROR_MESSAGE_MAX_CHARS)}` : ""}`));
    return;
  }
  resolve({
    stdout: String(stdout || "").trim(),
    stderr: String(stderr || "").trim()
  });
});

}); }

function safeNumber(value) { const number = Number(value); return Number.isFinite(number) ? number : null; }

function normalizeDateIso(value) { if (value === null || value === undefined || value === "") return null; const timestamp = Number(value); if (Number.isFinite(timestamp) && timestamp > 0) { const ms = timestamp > 9_999_999_999 ? timestamp : timestamp * 1000; const parsed = new Date(ms); if (!Number.isNaN(parsed.getTime())) return parsed.toISOString(); }

const text = String(value || "").trim(); if (!text) return null; const parsed = Date.parse(text); if (!Number.isFinite(parsed)) return null; return new Date(parsed).toISOString(); }

function normalizeYtDlpDate(value) { const text = String(value || "").trim(); if (!/^\d{8}$/.test(text)) return null; const year = text.slice(0, 4); const month = text.slice(4, 6); const day = text.slice(6, 8); const iso = ${year}-${month}-${day}T00:00:00.000Z; const parsed = Date.parse(iso); return Number.isFinite(parsed) ? new Date(parsed).toISOString() : null; }

function sanitizeText(value, maxLen = 240) { const text = String(value || "") .replace(/\s+/g, " ") .trim();

if (!text) return ""; if (text.length <= maxLen) return text; return ${text.slice(0, Math.max(0, maxLen - 3)).trimEnd()}...; }

function safeHostFromUrl(url) { try { return String(new URL(String(url)).hostname || "") .toLowerCase() .replace(/^www./, ""); } catch { return ""; } }

function decodeHtmlEntities(value) { return String(value || "") .replace(/'|'/gi, "'") .replace(/"/gi, '"') .replace(/&/gi, "&") .replace(/</gi, "<") .replace(/>/gi, ">") .replace(/ /gi, " ") .replace(/&#(\d+);/g, (_match, numberText) => { const number = Number(numberText); return Number.isFinite(number) ? String.fromCharCode(number) : ""; }) .replace(/&#x([0-9a-f]+);/gi, (_match, hexText) => { const number = Number.parseInt(hexText, 16); return Number.isFinite(number) ? String.fromCharCode(number) : ""; }); }

function escapeRegExp(value) { return String(value || "").replace(/[.*+?^${}()|[]\]/g, "\$&"); }