/* * Standalone bit-exact C reference for VP9 4-tap inner loop filter * (horizontal, 8-pixel edge), transcribed from FFmpeg's * libavcodec/vp9dsp_template.c loop_filter() function (vendored at * external/ffmpeg-snapshot/, commit f46e514). 8-bit pixels only. * * Provided as a self-contained translation unit so the harness * doesn't need to wrestle FFmpeg's BIT_DEPTH-templated macro * expansion. Cross-checked against the vendored reference at * runtime (see bench_neon_lpf.c::correctness_check()). * * License: LGPL-2.1-or-later (matches upstream reference). * * Spec source: VP9 specification ยง8.8.1 โ€” Loop filter process. */ #include #include static inline int abs_i(int x) { return x < 0 ? -x : x; } static inline int clip_intp2_7(int x) /* clamp to int7 = [-128, 127] */ { return x > 127 ? 127 : x < -128 ? -128 : x; } static inline uint8_t clip_u8(int x) { return (uint8_t)(x > 255 ? 255 : x < 0 ? 0 : x); } static inline int min_i(int a, int b) { return a < b ? a : b; } /* * Horizontal-direction 4-tap inner loop filter, 8-pixel edge. * * stridea = stride (move down rows between iterations) * strideb = 1 (neighborhood spans columns -4..+3) * * Each of the 8 iterations: * - reads neighborhood [p3 p2 p1 p0 | q0 q1 q2 q3] * - tests filter mask `fm` โ€” skip iteration if false * - tests high-edge-variance `hev` โ€” selects 2-pixel vs 4-pixel * update path * * Matches ff_vp9_loop_filter_h_4_8_neon byte-for-byte on 8-bit input. */ void daedalus_vp9_loop_filter_h_4_8_ref(uint8_t *dst, ptrdiff_t stride, int E, int I, int H) { for (int i = 0; i < 8; i++, dst += stride) { int p3 = dst[-4], p2 = dst[-3], p1 = dst[-2], p0 = dst[-1]; int q0 = dst[ 0], q1 = dst[+1], q2 = dst[+2], q3 = dst[+3]; int fm = abs_i(p3 - p2) <= I && abs_i(p2 - p1) <= I && abs_i(p1 - p0) <= I && abs_i(q1 - q0) <= I && abs_i(q2 - q1) <= I && abs_i(q3 - q2) <= I && abs_i(p0 - q0) * 2 + (abs_i(p1 - q1) >> 1) <= E; if (!fm) continue; int hev = abs_i(p1 - p0) > H || abs_i(q1 - q0) > H; if (hev) { int f = clip_intp2_7(p1 - q1); f = clip_intp2_7(3 * (q0 - p0) + f); int f1 = min_i(f + 4, 127) >> 3; int f2 = min_i(f + 3, 127) >> 3; dst[-1] = clip_u8(p0 + f2); dst[ 0] = clip_u8(q0 - f1); } else { int f = clip_intp2_7(3 * (q0 - p0)); int f1 = min_i(f + 4, 127) >> 3; int f2 = min_i(f + 3, 127) >> 3; dst[-1] = clip_u8(p0 + f2); dst[ 0] = clip_u8(q0 - f1); int fp = (f1 + 1) >> 1; dst[-2] = clip_u8(p1 + fp); dst[+1] = clip_u8(q1 - fp); } } }