use openh264::decoder::Decoder; use openh264_sys2::{ dsDataErrorConcealed, dsErrorFree, dsNoParamSets, dsRefLost, SBufferInfo, DECODER_OPTION_ERROR_CON_IDC, DECODING_STATE, }; use std::os::raw::c_int; use std::ptr::{addr_of_mut, from_mut, null_mut}; use tokio::time; use tracing::{info, warn};
/// Decoded video frame ready for IPC transmission as JPEG. pub(crate) struct DecodedFrame { pub(crate) jpeg_data: Vec, pub(crate) width: u32, pub(crate) height: u32, /// Instantaneous normalized luma diff (0.0–1.0) between this frame and the /// previous one, computed on a coarse 64×36 grid. 0.0 = identical, /// 1.0 = every sampled pixel changed by maximum amount. pub(crate) change_score: f32, /// EMA-smoothed change score. Rises when sustained visual change occurs /// across multiple consecutive frames; filters out single-frame noise /// (compression shimmer, cursor blink, HUD animation). pub(crate) ema_change_score: f32, /// True when the instantaneous change score is high enough to indicate a /// hard cut (scene change, app switch, new page load, etc.). pub(crate) is_scene_cut: bool, }
// ── Frame-diff constants ────────────────────────────────────────────────
/// Coarse-grid dimensions used for luma diff sampling. /// 64×36 = 2 304 samples — cheap enough to run every frame while capturing /// meaningful spatial structure. const DIFF_GRID_W: usize = 64; const DIFF_GRID_H: usize = 36; const DIFF_GRID_SIZE: usize = DIFF_GRID_W * DIFF_GRID_H;
/// EMA decay factor. 0.35 means each new frame contributes 35 % and history /// contributes 65 %. At 2 fps this gives a ~1.5 s smoothing window. const EMA_ALPHA: f32 = 0.35;
/// Instantaneous diff above this is classified as a scene cut. const SCENE_CUT_THRESHOLD: f32 = 0.40;
/// DECODING_STATE bitmask for states where the decoder may have produced a
/// usable frame (even if concealed or with lost references). We accept
/// these and check iBufferStatus to see if YUV data is actually present.
const ACCEPTABLE_DECODE_STATES: DECODING_STATE = dsErrorFree | dsDataErrorConcealed | dsRefLost;
/// Persistent H264 decoder that accumulates codec state across frames.
///
/// Uses the OpenH264 raw API directly to bypass the Rust wrapper's strict
/// error handling, which treats dsDataErrorConcealed (error concealment
/// applied) as a fatal error even though the decoder produced valid output.
///
/// With error concealment enabled, the decoder can produce frames from
/// P-frames (NAL type 1) even without a prior IDR keyframe — the missing
/// reference data is concealed (green/grey initially, converging to correct
/// output as more P-frames arrive).
pub(crate) struct PersistentVideoDecoder {
decoder: Decoder,
jpeg_quality: i32,
frames_decoded: u64,
decode_errors: u64,
consecutive_errors: u32,
last_reset_at: Optiontime::Instant,
/// Frames decoded since the last reset (or since creation if never
/// reset). Used to determine whether the decoder needs a fresh
/// keyframe via PLI after a reset event.
frames_decoded_since_reset: u64,
/// Set to true after try_reset succeeds. Cleared once the caller
/// has had a chance to act on it (send PLI).
reset_pending_pli: bool,
/// Reusable RGB buffer to avoid per-frame allocation.
rgb_buf: Vec,
// ── Frame-diff state ────────────────────────────────────────────
/// Coarse luma grid from the previously decoded frame.
prev_luma_grid: Vec<u8>,
/// EMA-smoothed change score, persists across frames.
ema_change: f32,
}
/// Configure error concealment on an OpenH264 decoder. /// /// ERROR_CON_SLICE_COPY_CROSS_IDR (4): copies slices across IDR /// boundaries, allowing the decoder to produce (potentially concealed) /// output instead of refusing with error 18 when reference frames are /// missing. #[allow(unsafe_code)] fn configure_error_concealment(decoder: &mut Decoder) { let mut ec_idc: i32 = 4; // ERROR_CON_SLICE_COPY_CROSS_IDR unsafe { let _ = decoder .raw_api() .set_option(DECODER_OPTION_ERROR_CON_IDC, addr_of_mut!(ec_idc).cast()); } }
#[allow(unsafe_code)] impl PersistentVideoDecoder { pub(crate) fn new() -> Result<Self, openh264::Error> { let mut decoder = Decoder::new()?; configure_error_concealment(&mut decoder);
Ok(Self {
decoder,
jpeg_quality: 75,
frames_decoded: 0,
decode_errors: 0,
consecutive_errors: 0,
last_reset_at: None,
frames_decoded_since_reset: 0,
reset_pending_pli: false,
rgb_buf: Vec::new(),
prev_luma_grid: Vec::new(),
ema_change: 0.0,
})
}
/// Update the JPEG compression quality used when encoding decoded frames.
/// Clamped to 10..=100.
pub(crate) fn set_jpeg_quality(&mut self, quality: i32) {
self.jpeg_quality = quality.clamp(10, 100);
}
/// Feed a raw Annex-B access unit to the persistent decoder.
///
/// Returns `Some(DecodedFrame)` when a picture is produced.
///
/// Uses the OpenH264 raw C API directly because the Rust wrapper
/// (`openh264` crate) treats `dsDataErrorConcealed` as a fatal error,
/// but it actually means "decoded with concealment applied" — the
/// decoder produced valid YUV output that we should use.
pub(crate) fn decode_frame(&mut self, annexb_frame: &[u8]) -> Option<DecodedFrame> {
if annexb_frame.is_empty() {
return None;
}
let mut dst = [null_mut::<u8>(); 3];
let mut buffer_info = SBufferInfo::default();
// Call the raw C API directly, bypassing the Rust wrapper's
// `.ok()` which rejects any non-zero DECODING_STATE.
let state: DECODING_STATE = unsafe {
self.decoder.raw_api().decode_frame_no_delay(
annexb_frame.as_ptr(),
annexb_frame.len() as c_int,
from_mut(&mut dst).cast(),
&raw mut buffer_info,
)
};
// Check if the state indicates a usable frame was produced.
// dsErrorFree (0), dsDataErrorConcealed (32), dsRefLost (2),
// and combinations thereof are acceptable — the decoder may
// have produced YUV output. Other states (dsBitstreamError,
// dsNoParamSets, etc.) are genuine failures.
let state_acceptable =
(state & !ACCEPTABLE_DECODE_STATES) == 0 && state != dsNoParamSets as DECODING_STATE;
if !state_acceptable {
self.decode_errors += 1;
self.consecutive_errors += 1;
if self.consecutive_errors <= 5 || self.consecutive_errors % 100 == 0 {
warn!(
consecutive_errors = self.consecutive_errors,
total_errors = self.decode_errors,
frames_decoded = self.frames_decoded,
decode_state = state,
"clankvox_openh264_decode_error"
);
}
if self.consecutive_errors >= 50 {
self.try_reset();
}
return None;
}
// Even with an acceptable state, the decoder might not have
// produced a frame yet (e.g., SPS/PPS only). Check buffer status.
if buffer_info.iBufferStatus == 0 {
// No frame produced — parameter sets ingested or buffering.
self.consecutive_errors = 0;
return None;
}
// Validate YUV plane pointers.
if dst[0].is_null() || dst[1].is_null() || dst[2].is_null() {
warn!("clankvox_openh264_null_yuv_pointers");
return None;
}
// Extract frame dimensions from the buffer info.
let sys_buf = unsafe { buffer_info.UsrData.sSystemBuffer };
let width = sys_buf.iWidth as usize;
let height = sys_buf.iHeight as usize;
let y_stride = sys_buf.iStride[0] as usize;
let uv_stride = sys_buf.iStride[1] as usize;
if width == 0 || height == 0 || y_stride == 0 || uv_stride == 0 {
warn!(
width,
height, y_stride, uv_stride, "clankvox_openh264_zero_dimension_frame"
);
return None;
}
self.frames_decoded += 1;
self.frames_decoded_since_reset += 1;
self.consecutive_errors = 0;
// Convert YUV420 to RGB directly from the raw plane pointers.
let rgb_len = width * height * 3;
self.rgb_buf.resize(rgb_len, 0);
unsafe {
let y_plane = std::slice::from_raw_parts(dst[0], height * y_stride);
let u_plane = std::slice::from_raw_parts(dst[1], (height / 2) * uv_stride);
let v_plane = std::slice::from_raw_parts(dst[2], (height / 2) * uv_stride);
yuv420_to_rgb(
y_plane,
u_plane,
v_plane,
width,
height,
y_stride,
uv_stride,
&mut self.rgb_buf,
);
}
// ── Coarse luma diff ─────────────────────────────────────
let (change_score, ema_change_score, is_scene_cut) = self.compute_change(width, height);
// Encode RGB to JPEG via turbojpeg.
if let Some(jpeg) = self.encode_jpeg(width as u32, height as u32) {
Some(DecodedFrame {
jpeg_data: jpeg,
width: width as u32,
height: height as u32,
change_score,
ema_change_score,
is_scene_cut,
})
} else {
warn!(width, height, rgb_len, "clankvox_turbojpeg_encode_failed");
None
}
}
/// Compute a coarse luma diff between the current `rgb_buf` and the
/// previous frame's luma grid. Updates internal EMA and prev grid.
///
/// Returns `(instantaneous_score, ema_score, is_scene_cut)`.
/// First frame always returns (0.0, 0.0, false) because there is no
/// previous frame to compare against.
fn compute_change(&mut self, width: usize, height: usize) -> (f32, f32, bool) {
// Build a coarse luma grid by sampling the RGB buffer at evenly
// spaced grid points. Luma ≈ 0.299 R + 0.587 G + 0.114 B, but
// integer approximation (77 R + 150 G + 29 B) >> 8 is plenty.
let mut grid = vec![0u8; DIFF_GRID_SIZE];
for gy in 0..DIFF_GRID_H {
let src_y = (gy * height) / DIFF_GRID_H;
for gx in 0..DIFF_GRID_W {
let src_x = (gx * width) / DIFF_GRID_W;
let idx = (src_y * width + src_x) * 3;
if idx + 2 < self.rgb_buf.len() {
let r = self.rgb_buf[idx] as u32;
let g = self.rgb_buf[idx + 1] as u32;
let b = self.rgb_buf[idx + 2] as u32;
grid[gy * DIFF_GRID_W + gx] = ((77 * r + 150 * g + 29 * b) >> 8) as u8;
}
}
}
// Compare against previous grid.
let change_score = if self.prev_luma_grid.len() == DIFF_GRID_SIZE {
let mut total_diff: u64 = 0;
for i in 0..DIFF_GRID_SIZE {
let diff = (grid[i] as i16 - self.prev_luma_grid[i] as i16).unsigned_abs() as u64;
total_diff += diff;
}
// Normalize: max possible diff per sample is 255.
(total_diff as f64 / (DIFF_GRID_SIZE as f64 * 255.0)) as f32
} else {
// First frame — no previous data to compare.
0.0
};
// Update EMA.
self.ema_change = EMA_ALPHA * change_score + (1.0 - EMA_ALPHA) * self.ema_change;
let is_scene_cut = change_score >= SCENE_CUT_THRESHOLD;
// Store current grid as previous for next frame.
self.prev_luma_grid = grid;
(change_score, self.ema_change, is_scene_cut)
}
fn encode_jpeg(&self, width: u32, height: u32) -> Option<Vec<u8>> {
let image = turbojpeg::Image {
pixels: self.rgb_buf.as_slice(),
width: width as usize,
pitch: width as usize * 3,
height: height as usize,
format: turbojpeg::PixelFormat::RGB,
};
turbojpeg::compress(image, self.jpeg_quality, turbojpeg::Subsamp::Sub2x2)
.map_err(|e| {
warn!("turbojpeg compress error: {e}");
e
})
.ok()
.map(|output| output.to_vec())
}
fn try_reset(&mut self) {
let now = time::Instant::now();
if let Some(last) = self.last_reset_at {
if now.duration_since(last) < std::time::Duration::from_secs(5) {
return;
}
}
info!(
consecutive_errors = self.consecutive_errors,
total_errors = self.decode_errors,
frames_decoded = self.frames_decoded,
frames_decoded_since_reset = self.frames_decoded_since_reset,
"clankvox_openh264_decoder_reset"
);
match Decoder::new() {
Ok(mut decoder) => {
configure_error_concealment(&mut decoder);
self.decoder = decoder;
self.consecutive_errors = 0;
self.frames_decoded_since_reset = 0;
self.reset_pending_pli = true;
self.prev_luma_grid.clear();
self.ema_change = 0.0;
self.last_reset_at = Some(now);
}
Err(e) => warn!("clankvox_openh264_decoder_reset_failed: {e}"),
}
}
pub(crate) fn frames_decoded(&self) -> u64 {
self.frames_decoded
}
/// Consume and return the pending PLI flag. Returns `true` exactly
/// once after each decoder reset so the caller can send a PLI/FIR to
/// request a fresh keyframe from the sender.
pub(crate) fn take_pending_pli(&mut self) -> bool {
let pending = self.reset_pending_pli;
self.reset_pending_pli = false;
pending
}
}
/// Convert YUV420 planar data to packed RGB8. /// /// This is a simple scalar conversion — adequate for the 2 fps screen /// capture rate. For higher throughput, SIMD or GPU conversion would /// be needed. fn yuv420_to_rgb( y_plane: &[u8], u_plane: &[u8], v_plane: &[u8], width: usize, height: usize, y_stride: usize, uv_stride: usize, rgb: &mut [u8], ) { for row in 0..height { for col in 0..width { let y_idx = row * y_stride + col; let uv_row = row / 2; let uv_col = col / 2; let uv_idx = uv_row * uv_stride + uv_col;
let y = y_plane[y_idx] as f32;
let u = u_plane[uv_idx] as f32 - 128.0;
let v = v_plane[uv_idx] as f32 - 128.0;
let r = (y + 1.402 * v).clamp(0.0, 255.0) as u8;
let g = (y - 0.344136 * u - 0.714136 * v).clamp(0.0, 255.0) as u8;
let b = (y + 1.772 * u).clamp(0.0, 255.0) as u8;
let rgb_idx = (row * width + col) * 3;
rgb[rgb_idx] = r;
rgb[rgb_idx + 1] = g;
rgb[rgb_idx + 2] = b;
}
}
}
#[cfg(test)] mod tests { use super::PersistentVideoDecoder;
#[test]
fn decoder_initializes_successfully() {
let decoder = PersistentVideoDecoder::new();
assert!(decoder.is_ok());
let decoder = decoder.unwrap();
assert_eq!(decoder.frames_decoded(), 0);
assert!(!decoder.reset_pending_pli);
}
#[test]
fn decoder_handles_empty_input_gracefully() {
let mut decoder = PersistentVideoDecoder::new().unwrap();
let result = decoder.decode_frame(&[]);
assert!(result.is_none());
assert_eq!(decoder.frames_decoded(), 0);
}
#[test]
fn decoder_handles_garbage_input_without_panic() {
let mut decoder = PersistentVideoDecoder::new().unwrap();
let garbage = vec![0x00, 0x00, 0x00, 0x01, 0xFF, 0xAB, 0xCD, 0xEF];
let result = decoder.decode_frame(&garbage);
assert!(result.is_none());
}
}
