/* * Standalone bit-exact C references for the four single-axis quarter- * pel luma qpel positions (H.264 §8.4.2.2.1, "put" variants). Each * is a half-pel lowpass clipped to u8 followed by an L2 rounded-average * with an integer-position source pixel. * * mc10 ("a" pos, ¼ horiz): a = clip255(mc20(s)); dst = (a + s[r,c] + 1) >> 1 * mc30 ("c" pos, ¾ horiz): a = clip255(mc20(s)); dst = (a + s[r,c+1] + 1) >> 1 * mc01 ("d" pos, ¼ vert ): a = clip255(mc02(s)); dst = (a + s[r, c] + 1) >> 1 * mc03 ("n" pos, ¾ vert ): a = clip255(mc02(s)); dst = (a + s[r+1,c] + 1) >> 1 * * Mirror FFmpeg's `ff_put_h264_qpel8_mc{10,30,01,03}_neon` (in * external/ffmpeg-snapshot/libavcodec/aarch64/h264qpel_neon.S * lines 587, 603, 611, 729 — each tail-calls the corresponding * lowpass_l2 helper). * * Same single-stride convention as mc20/mc02 — dst and src share the * same stride; src + src_off points at row 0 col 0 of the output * block, with appropriate edge context already in-buffer. * * License: LGPL-2.1-or-later. */ #include #include static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; } /* Compute one horizontal half-pel pixel at (r, c) — same as mc20. */ static inline uint8_t hpel_h(const uint8_t *s, int r, int c, ptrdiff_t stride) { int v = (int) s[r*stride + c-2] - 5 * (int) s[r*stride + c-1] + 20 * (int) s[r*stride + c] + 20 * (int) s[r*stride + c+1] - 5 * (int) s[r*stride + c+2] + (int) s[r*stride + c+3] + 16; return (uint8_t) clip_u8(v >> 5); } /* Compute one vertical half-pel pixel at (r, c) — same as mc02. */ static inline uint8_t hpel_v(const uint8_t *s, int r, int c, ptrdiff_t stride) { int v = (int) s[(r-2)*stride + c] - 5 * (int) s[(r-1)*stride + c] + 20 * (int) s[r*stride + c] + 20 * (int) s[(r+1)*stride + c] - 5 * (int) s[(r+2)*stride + c] + (int) s[(r+3)*stride + c] + 16; return (uint8_t) clip_u8(v >> 5); } void daedalus_put_h264_qpel8_mc10_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) { uint8_t a = hpel_h(src, r, c, stride); dst[r*stride + c] = (uint8_t) ((a + src[r*stride + c ] + 1) >> 1); } } void daedalus_put_h264_qpel8_mc30_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) { uint8_t a = hpel_h(src, r, c, stride); dst[r*stride + c] = (uint8_t) ((a + src[r*stride + c + 1] + 1) >> 1); } } void daedalus_put_h264_qpel8_mc01_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) { uint8_t a = hpel_v(src, r, c, stride); dst[r*stride + c] = (uint8_t) ((a + src[(r )*stride + c] + 1) >> 1); } } void daedalus_put_h264_qpel8_mc03_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) { uint8_t a = hpel_v(src, r, c, stride); dst[r*stride + c] = (uint8_t) ((a + src[(r + 1)*stride + c] + 1) >> 1); } }