diff --git a/tools/daedalus_decode_h264.c b/tools/daedalus_decode_h264.c index e1c3099..01be2fc 100644 --- a/tools/daedalus_decode_h264.c +++ b/tools/daedalus_decode_h264.c @@ -105,12 +105,15 @@ static int max_frames = -1; * no PCM). Other MBs stay on identity-passthrough. */ #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB struct mb_capture { - int valid; /* 1 = real-coeffs path, 0 = identity passthrough */ - int16_t coeffs[256]; /* luma, column-major within 4x4, raster block order */ + int valid; /* 1 = real-coeffs IDCT path, 0 = identity (predicted = pre_deblock_snap) */ + int16_t coeffs[256]; /* luma, raster block order, raw sl->mb layout */ uint8_t predicted[256]; /* luma P recovered = pre_deblock - clipped IDCT(C) */ - uint8_t pre_deblock_snap[256]; /* DIAGNOSTIC: pre_deblock at callback time; - * compared against AVFrame post-receive_frame - * to detect h->cur_pic.f vs AVFrame divergence */ + uint8_t pre_deblock_snap_y[256]; /* luma 16×16 pre-deblock at callback time */ + uint8_t pre_deblock_snap_cb[64]; /* Cb 8×8 pre-deblock */ + uint8_t pre_deblock_snap_cr[64]; /* Cr 8×8 pre-deblock */ + int qp_y; /* QP_Y for this MB (sl->qscale at callback time) */ + int mb_type_intra; /* 1 if MB is intra (any flavour), 0 otherwise */ + int transform_8x8; /* 1 if 8×8 DCT (affects which internal edges fire) */ }; struct inspect_state { @@ -121,14 +124,98 @@ struct inspect_state { int out_of_bounds; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS struct mb_capture *captures; /* mb_w * mb_h entries */ - int real_coeffs_mbs; /* count of MBs in real-coeffs path this frame */ + int real_coeffs_mbs; /* count of MBs in real-coeffs IDCT path this frame */ int skipped_intra16x16; int skipped_8x8dct; int skipped_other; + /* Slice-level deblock params (captured first time the callback sees a + * slice context). Per H.264 spec these are constant per slice; we + * assume single-slice frames in our test stream. */ + int slice_alpha_c0_offset; + int slice_beta_offset; + int slice_deblock_disable; /* sl->deblocking_filter from spec */ #endif }; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS +/* H.264 §8.7.2.2/8.7.2.3 deblock filter tables — transcribed verbatim + * from FFmpeg libavcodec/h264_loopfilter.c (LGPL-2.1+; algorithm + table + * values come from the H.264 spec which is normative and unpatented). + * Tables are size 52*3 — FFmpeg's trick to absorb slice_alpha_c0_offset + + * slice_beta_offset (in -12..+12) into the index without bounds-clamping. + * Usage: alpha = alpha_table[qp + a] where a = 52 + slice_alpha_c0_offset + * (8-bit only; high-bit-depth subtracts qp_bd_offset). */ +static const uint8_t alpha_table[52*3] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, + 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, + 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, + 80, 90,101,113,127,144,162,182,203,226, + 255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255, +}; +static const uint8_t beta_table[52*3] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, + 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, + 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, +}; +static const int8_t tc0_table[52*3][4] = { + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 }, + {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 }, + {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, + {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 }, + {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 }, + {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 }, + {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 }, + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, +}; + +/* H.264 §8.5.11 / Table 8-11: qP_y → qP_chroma mapping for chroma_qp_index_offset == 0. + * For qP_y < 30, qP_c = qP_y. Above that, the spec table compresses. */ +static const uint8_t chroma_qp_table[52] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, + 31, 32, 32, 33, 34, 34, 35, 35, 36, 36, 37, 37, 37, 38, 38, 38, + 39, 39, 39, 39, +}; + /* libavcodec's sl->mb stores coefficients in RASTER (row-major) order, * not zig-zag scan order — h264_cavlc.c does * block[*scantable] = (level * qmul[*scantable] + 32) >> 6 @@ -212,6 +299,16 @@ static void inspect_cb(void *opaque, st->n_cbs_this_frame++; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS + /* Capture slice-level deblock params once. Per spec they're constant + * per slice; for our single-slice test streams we just keep the + * latest values seen. */ + { + const H264SliceContext *sl = &h->slice_ctx[0]; + st->slice_alpha_c0_offset = sl->slice_alpha_c0_offset; + st->slice_beta_offset = sl->slice_beta_offset; + st->slice_deblock_disable = sl->deblocking_filter; + } + /* Real-coeffs path: extract per-MB state for daedalus-decoder * IDCT validation on this MB. Gate: only Intra_4x4 + 4x4 transform * + non-PCM is supported in PR-A3b — other MB flavours fall back @@ -222,6 +319,30 @@ static void inspect_cb(void *opaque, const int mb_xy = mb_y * h->mb_stride + mb_x; const uint32_t mb_type = h->cur_pic.mb_type[mb_xy]; + /* Capture state needed for deblock edge derivation, regardless + * of whether this MB takes the real-coeffs IDCT path. */ + cap->qp_y = h->cur_pic.qscale_table[mb_xy]; + cap->mb_type_intra = IS_INTRA(mb_type) ? 1 : 0; + cap->transform_8x8 = IS_8x8DCT(mb_type) ? 1 : 0; + + /* Snapshot pre-deblock pixels for all 3 planes at this MB's position. */ + { + const int y_stride = h->cur_pic.f->linesize[0]; + const int uv_stride = h->cur_pic.f->linesize[1]; + const uint8_t *mb_y_px = h->cur_pic.f->data[0] + + (ptrdiff_t) mb_y * 16 * y_stride + mb_x * 16; + const uint8_t *mb_cb_px = h->cur_pic.f->data[1] + + (ptrdiff_t) mb_y * 8 * uv_stride + mb_x * 8; + const uint8_t *mb_cr_px = h->cur_pic.f->data[2] + + (ptrdiff_t) mb_y * 8 * uv_stride + mb_x * 8; + for (int r = 0; r < 16; r++) + memcpy(&cap->pre_deblock_snap_y[r * 16], &mb_y_px[r * y_stride], 16); + for (int r = 0; r < 8; r++) { + memcpy(&cap->pre_deblock_snap_cb[r * 8], &mb_cb_px[r * uv_stride], 8); + memcpy(&cap->pre_deblock_snap_cr[r * 8], &mb_cr_px[r * uv_stride], 8); + } + } + if (!IS_INTRA4x4(mb_type)) { if (IS_INTRA16x16(mb_type)) st->skipped_intra16x16++; else st->skipped_other++; @@ -236,11 +357,8 @@ static void inspect_cb(void *opaque, const uint8_t *mb_pixels = luma_plane + (ptrdiff_t) mb_y * 16 * luma_stride + mb_x * 16; - /* Diagnostic snapshot: capture the 16x16 luma block as we see it in - * cur_pic at callback time. Compared against AVFrame contents after - * receive_frame returns; mismatch points at a buffer-divergence bug. */ - for (int r = 0; r < 16; r++) - memcpy(&cap->pre_deblock_snap[r * 16], &mb_pixels[r * luma_stride], 16); + /* (pre_deblock_snap_y is already populated above for all 3 planes; + * we use it later in the main loop as the daedalus predicted input.) */ /* Coefficients are in sl->mb at end of entropy decode but zeroed by * the time the callback fires (IDCT-add consumed them). Patch 0017 @@ -485,12 +603,13 @@ int main(int argc, char **argv) #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS /* Patch 0017's coefficient side buffer lives in H264Context (single - * per-stream); multi-threaded slice decode would race on it. Force - * single-thread. Also disable libavcodec's deblock so AVFrame is - * pre-deblock and the P-recovery math is exact. */ + * per-stream); multi-threaded slice decode would race on it. */ avctx->thread_count = 1; avctx->thread_type = 0; - avctx->skip_loop_filter = AVDISCARD_ALL; + /* PR-A6: keep libavcodec's deblock ON so AVFrame is the post-deblock + * reference we validate daedalus against. Per-MB pre_deblock + * snapshots taken in the inspection callback (before deblock crosses + * into this MB's region) provide daedalus with pre-deblock input. */ #endif if (avcodec_open2(avctx, codec, NULL) < 0) { @@ -611,26 +730,117 @@ int main(int argc, char **argv) const int mb_h = coded_h / 16; uint8_t mb_pred[384]; int16_t mb_coeffs[384] = {0}; + struct daedalus_decoder_edge mb_edges[16]; struct daedalus_decoder_mb_input mb = {0}; +#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS + /* PR-A6 edge derivation: a = 52 + slice_alpha_c0_offset, + * b = 52 + slice_beta_offset (per FFmpeg loopfilter.c + * convention; absorbs the offset into the tripled tables). */ + const int slice_a = 52 + inspect_st.slice_alpha_c0_offset; + const int slice_b = 52 + inspect_st.slice_beta_offset; + const int deblock_off = inspect_st.slice_deblock_disable == 1; /* 1 == disabled */ +#endif for (int my = 0; my < mb_h; my++) { for (int mx = 0; mx < mb_w; mx++) { /* Default: identity-passthrough — luma from AVFrame, - * chroma from AVFrame, coeffs all zero. */ + * chroma from AVFrame, coeffs all zero, no edges. */ pack_mb_predicted(fr, mx, my, mb_pred); memset(mb_coeffs, 0, sizeof(mb_coeffs)); + int n_edges = 0; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS - /* Real-coeffs path: if the callback captured this MB - * as Intra_4x4 / 4x4-DCT, override luma predicted - * with the recovered P and use the real luma coeffs. - * Chroma stays identity-passthrough (PR-A3b scope — - * chroma DC Hadamard + 8x8 transform follow-ups). */ + /* PR-A6: feed daedalus pre-deblock pixels from the + * per-MB snapshots taken in the callback (AVFrame is + * now post-deblock — used as reference, not as input). */ const int mb_idx = my * mb_w + mx; const struct mb_capture *cap = &inspect_st.captures[mb_idx]; + + /* Luma: P_rec for real-coeffs MBs, raw pre-deblock snap + * otherwise (with zero coeffs). Both produce the same + * pre-deblock state after daedalus IDCT-add. */ if (cap->valid) { memcpy(mb_pred, cap->predicted, 256); for (int i = 0; i < 256; i++) mb_coeffs[i] = cap->coeffs[i]; + } else { + memcpy(mb_pred, cap->pre_deblock_snap_y, 256); + } + /* Chroma: always identity-passthrough from snap. + * Chroma DC Hadamard + chroma residual extraction is + * a follow-up (PR-A4). */ + memcpy(mb_pred + 256, cap->pre_deblock_snap_cb, 64); + memcpy(mb_pred + 256 + 64, cap->pre_deblock_snap_cr, 64); + + /* Derive deblock edges for this MB. Spec §8.7.2: + * - Frame-boundary edges: skip (bS=0 — kernel reads p3 at -4). + * - MB-boundary edges with intra neighbour: bS=4. + * - Internal MB edges within intra MB: bS=3. + * - 8x8 DCT MBs: internal edges only at col/row 8 (the + * single 8x8-block boundary inside the MB). + * For non-intra MB types in mixed streams the bS rules + * differ; we'd need cbp/MV/ref info from sl context for + * those. Our test stream is all-intra, so simplified. */ + if (!deblock_off && cap->mb_type_intra && !getenv("DAEDALUS_SKIP_EDGES")) { + const int qp_self = cap->qp_y; + const int qp_left = (mx > 0) + ? inspect_st.captures[mb_idx - 1].qp_y : qp_self; + const int qp_top = (my > 0) + ? inspect_st.captures[mb_idx - mb_w].qp_y : qp_self; + const int qpc_self = chroma_qp_table[qp_self]; + const int qpc_left = chroma_qp_table[qp_left]; + const int qpc_top = chroma_qp_table[qp_top]; + const int qp_avg_left = (qp_self + qp_left + 1) >> 1; + const int qp_avg_top = (qp_self + qp_top + 1) >> 1; + const int qpc_avg_left = (qpc_self + qpc_left + 1) >> 1; + const int qpc_avg_top = (qpc_self + qpc_top + 1) >> 1; + + /* Helper macro to emit one edge. bS=0 (skip) + * edges are still emitted with bS=0 — daedalus's + * partitioner filters them out. */ + #define EMIT_EDGE(orient_, plane_, edge_idx_, bS_, qp_) do { \ + if (n_edges >= 16) break; \ + struct daedalus_decoder_edge *e = &mb_edges[n_edges++]; \ + e->mb_x = (uint16_t) mx; \ + e->mb_y = (uint16_t) my; \ + e->edge_idx = (uint8_t) (edge_idx_); \ + e->orient = (uint8_t) (orient_); \ + e->plane = (uint8_t) (plane_); \ + e->bS = (uint8_t) (bS_); \ + e->alpha = alpha_table[(qp_) + slice_a]; \ + e->beta = beta_table [(qp_) + slice_b]; \ + const int8_t *tc = tc0_table[(qp_) + slice_a]; \ + e->tc0[0] = tc[(bS_) <= 3 ? (bS_) : 0]; \ + e->tc0[1] = tc[(bS_) <= 3 ? (bS_) : 0]; \ + e->tc0[2] = tc[(bS_) <= 3 ? (bS_) : 0]; \ + e->tc0[3] = tc[(bS_) <= 3 ? (bS_) : 0]; \ + } while (0) + + /* Luma V edges: 4 at col 0, 4, 8, 12. Internal + * edges at 4/12 are skipped for 8x8 DCT MBs. */ + EMIT_EDGE(0, 0, 0, (mx > 0) ? 4 : 0, qp_avg_left); + if (!cap->transform_8x8) EMIT_EDGE(0, 0, 1, 3, qp_self); + EMIT_EDGE(0, 0, 2, 3, qp_self); + if (!cap->transform_8x8) EMIT_EDGE(0, 0, 3, 3, qp_self); + + /* Luma H edges: 4 at row 0, 4, 8, 12. */ + EMIT_EDGE(1, 0, 0, (my > 0) ? 4 : 0, qp_avg_top); + if (!cap->transform_8x8) EMIT_EDGE(1, 0, 1, 3, qp_self); + EMIT_EDGE(1, 0, 2, 3, qp_self); + if (!cap->transform_8x8) EMIT_EDGE(1, 0, 3, 3, qp_self); + + /* Chroma V edges: 2 per plane (Cb=1, Cr=2). */ + EMIT_EDGE(0, 1, 0, (mx > 0) ? 4 : 0, qpc_avg_left); + if (!cap->transform_8x8) EMIT_EDGE(0, 1, 1, 3, qpc_self); + EMIT_EDGE(0, 2, 0, (mx > 0) ? 4 : 0, qpc_avg_left); + if (!cap->transform_8x8) EMIT_EDGE(0, 2, 1, 3, qpc_self); + + /* Chroma H edges. */ + EMIT_EDGE(1, 1, 0, (my > 0) ? 4 : 0, qpc_avg_top); + if (!cap->transform_8x8) EMIT_EDGE(1, 1, 1, 3, qpc_self); + EMIT_EDGE(1, 2, 0, (my > 0) ? 4 : 0, qpc_avg_top); + if (!cap->transform_8x8) EMIT_EDGE(1, 2, 1, 3, qpc_self); + + #undef EMIT_EDGE } #endif @@ -639,8 +849,20 @@ int main(int argc, char **argv) mb.transform_8x8 = 0; mb.coeffs = mb_coeffs; mb.predicted = mb_pred; - mb.edges = NULL; - mb.n_edges = 0; + mb.edges = (n_edges > 0) ? mb_edges : NULL; + mb.n_edges = (uint8_t) n_edges; + if (mx == 0 && my == 0 && getenv("DAEDALUS_DUMP_EDGES")) { + fprintf(stderr, "MB(0,0): n_edges=%d, deblock_off=%d, intra=%d\n", + n_edges, deblock_off, cap->mb_type_intra); + for (int e = 0; e < n_edges; e++) { + fprintf(stderr, " edge[%d]: orient=%d plane=%d edge_idx=%d bS=%d alpha=%d beta=%d tc0=%d,%d,%d,%d\n", + e, mb_edges[e].orient, mb_edges[e].plane, + mb_edges[e].edge_idx, mb_edges[e].bS, + mb_edges[e].alpha, mb_edges[e].beta, + mb_edges[e].tc0[0], mb_edges[e].tc0[1], + mb_edges[e].tc0[2], mb_edges[e].tc0[3]); + } + } if (daedalus_decoder_append_mb(dec, &mb) != 0) { fprintf(stderr, "append_mb (%d,%d) failed\n", mx, my); rc = 3; goto cleanup; @@ -661,46 +883,10 @@ int main(int argc, char **argv) out_uv_ref, (size_t) coded_w, coded_w, coded_h); -#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS - /* Diagnostic: for each real-coeffs MB, compare the callback's - * pre_deblock snapshot against AVFrame at the same position. - * If they differ, h->cur_pic.f at callback time isn't the - * eventual AVFrame buffer (or deblock ran despite - * skip_loop_filter=AVDISCARD_ALL). */ - int snap_mismatches = 0; - int first_snap_mismatch_mb = -1; - for (int my2 = 0; my2 < mb_h; my2++) { - for (int mx2 = 0; mx2 < mb_w; mx2++) { - const int idx2 = my2 * mb_w + mx2; - if (!inspect_st.captures[idx2].valid) continue; - const uint8_t *avf_mb = fr->data[0] - + (ptrdiff_t) my2 * 16 * fr->linesize[0] - + mx2 * 16; - for (int r = 0; r < 16; r++) { - for (int c = 0; c < 16; c++) { - if (avf_mb[r * fr->linesize[0] + c] != - inspect_st.captures[idx2].pre_deblock_snap[r * 16 + c]) { - if (first_snap_mismatch_mb < 0) - first_snap_mismatch_mb = idx2; - snap_mismatches++; - } - } - } - } - } - if (snap_mismatches > 0) { - const int mmb_x = first_snap_mismatch_mb % mb_w; - const int mmb_y = first_snap_mismatch_mb / mb_w; - fprintf(stderr, - " DIAG: callback's pre_deblock differs from AVFrame in " - "%d bytes across real-coeffs MBs; first mismatch at MB(%d, %d)\n", - snap_mismatches, mmb_x, mmb_y); - rc = 4; - } - /* Silent on match — the invariant must hold for the - * P-recovery math to be valid; we'd want to know if it - * ever broke, but no need to confirm it every frame. */ -#endif + /* (PR-A3b's pre_deblock vs AVFrame DIAG check is removed in + * PR-A6: with libavcodec's deblock now ENABLED, AVFrame is + * post-deblock and intentionally differs from the per-MB + * pre_deblock snapshots taken in the callback.) */ /* Byte-exact compare + first-diff diagnostic. */ size_t y_diffs = 0, uv_diffs = 0;