/* * Standalone bit-exact C references for the avg_ qpel anchors — * the biprediction "average against existing dst" form of mc20, * mc02, mc22. Used in B-slices where two qpel-interpolated samples * (one from list0, one from list1) are averaged per H.264 §8.4.2.3. * * Each kernel computes the same half-pel formula as the put_ form, * then averages with dst[r,c] via L2 ((dst + put_val + 1) >> 1). * The dst buffer carries the list0 prediction on entry; the avg_ * call adds the list1 contribution. * * Mirror FFmpeg's `ff_avg_h264_qpel8_{mc20,mc02,mc22}_neon` in * external/ffmpeg-snapshot/libavcodec/aarch64/h264qpel_neon.S * (same `\type=avg` expansion as the put_ functions). * * License: LGPL-2.1-or-later. */ #include #include static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; } static inline uint8_t avg2(uint8_t a, uint8_t b) { return (uint8_t)((a + b + 1) >> 1); } /* Same per-cell helpers as the diag/quarter-axis refs. Duplicated * here (rather than extern'd) so this TU compiles standalone. */ static inline uint8_t hpel_h(const uint8_t *s, int r, int c, ptrdiff_t stride) { int v = (int) s[r*stride + c-2] - 5 * (int) s[r*stride + c-1] + 20 * (int) s[r*stride + c] + 20 * (int) s[r*stride + c+1] - 5 * (int) s[r*stride + c+2] + (int) s[r*stride + c+3] + 16; return (uint8_t) clip_u8(v >> 5); } static inline uint8_t hpel_v(const uint8_t *s, int r, int c, ptrdiff_t stride) { int v = (int) s[(r-2)*stride + c] - 5 * (int) s[(r-1)*stride + c] + 20 * (int) s[r*stride + c] + 20 * (int) s[(r+1)*stride + c] - 5 * (int) s[(r+2)*stride + c] + (int) s[(r+3)*stride + c] + 16; return (uint8_t) clip_u8(v >> 5); } void daedalus_avg_h264_qpel8_mc20_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) dst[r*stride + c] = avg2(dst[r*stride + c], hpel_h(src, r, c, stride)); } void daedalus_avg_h264_qpel8_mc02_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) dst[r*stride + c] = avg2(dst[r*stride + c], hpel_v(src, r, c, stride)); } void daedalus_avg_h264_qpel8_mc22_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { /* Per-cell mc22: same 13-row int16 tmp[] computation as the * put_ reference, then L2 with dst. */ int16_t tmp[13][8]; for (int rr = 0; rr < 13; rr++) { int src_row = rr - 2; const uint8_t *s = src + src_row * stride; for (int c = 0; c < 8; c++) { int v = (int) s[c-2] - 5 * (int) s[c-1] + 20 * (int) s[c] + 20 * (int) s[c+1] - 5 * (int) s[c+2] + (int) s[c+3]; tmp[rr][c] = (int16_t) v; } } for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) { int v = tmp[r+0][c] - 5*tmp[r+1][c] + 20*tmp[r+2][c] + 20*tmp[r+3][c] - 5*tmp[r+4][c] + tmp[r+5][c] + 512; uint8_t p = (uint8_t) clip_u8(v >> 10); dst[r*stride + c] = avg2(dst[r*stride + c], p); } }