/* * Standalone bit-exact C reference for H.264 luma qpel 8×8 mc02 * (vertical half-pel, "put" variant). Mirror of mc20 with rows * and columns transposed. 6-tap filter applied vertically: * * dst[r,c] = clip255( (s[r-2,c] - 5*s[r-1,c] + 20*s[r,c] * + 20*s[r+1,c] - 5*s[r+2,c] + s[r+3,c] * + 16) >> 5 ) * * Mirrors FFmpeg `ff_put_h264_qpel8_mc02_neon` (in * external/ffmpeg-snapshot/libavcodec/aarch64/h264qpel_neon.S * line 678, which tail-calls put_h264_qpel8_v_lowpass_neon). * * Signature: * void(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); * * Both dst and src use the SAME stride. src points at row 0 col 0 * of the output block; the filter reads rows -2..+3 (2 rows of top * context, 3 rows of bottom context). Caller must guarantee the * source buffer has those rows available (FFmpeg's edge-emulated * buffer handles this at the frame boundary; matches the contract * documented for mc20). * * License: LGPL-2.1-or-later. */ #include #include static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; } void daedalus_put_h264_qpel8_mc02_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { for (int r = 0; r < 8; r++) { for (int c = 0; c < 8; c++) { int s_m2 = src[(r - 2) * stride + c]; int s_m1 = src[(r - 1) * stride + c]; int s_0 = src[(r + 0) * stride + c]; int s_p1 = src[(r + 1) * stride + c]; int s_p2 = src[(r + 2) * stride + c]; int s_p3 = src[(r + 3) * stride + c]; int v = s_m2 - 5 * s_m1 + 20 * s_0 + 20 * s_p1 - 5 * s_p2 + s_p3 + 16; dst[r * stride + c] = (uint8_t) clip_u8(v >> 5); } } }