/* * Standalone bit-exact C references for the 12 remaining avg_ * biprediction qpel positions (B-slice list0 + list1 averaging): * 4 quarter-axis: avg_mc{10,30,01,03} * 8 diagonals : avg_mc{11,12,13,21,23,31,32,33} * * Each is the put_ formula (per H.264 §8.4.2.2.1 / Table 8-4) with * a final L2 average against the existing dst contents per §8.4.2.3.1. * Caller pre-loads dst with the list0 prediction; the avg_ call * folds in list1. * * Mirror FFmpeg's `ff_avg_h264_qpel8_mc{XY}_neon` (in * external/ffmpeg-snapshot/libavcodec/aarch64/h264qpel_neon.S * — same `\type=avg` expansion as the put_ functions). * * License: LGPL-2.1-or-later. */ #include #include static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; } static inline uint8_t avg2(uint8_t a, uint8_t b) { return (uint8_t)((a + b + 1) >> 1); } static inline uint8_t hpel_h(const uint8_t *s, int r, int c, ptrdiff_t stride) { int v = (int) s[r*stride + c-2] - 5 * (int) s[r*stride + c-1] + 20 * (int) s[r*stride + c] + 20 * (int) s[r*stride + c+1] - 5 * (int) s[r*stride + c+2] + (int) s[r*stride + c+3] + 16; return (uint8_t) clip_u8(v >> 5); } static inline uint8_t hpel_v(const uint8_t *s, int r, int c, ptrdiff_t stride) { int v = (int) s[(r-2)*stride + c] - 5 * (int) s[(r-1)*stride + c] + 20 * (int) s[r*stride + c] + 20 * (int) s[(r+1)*stride + c] - 5 * (int) s[(r+2)*stride + c] + (int) s[(r+3)*stride + c] + 16; return (uint8_t) clip_u8(v >> 5); } static inline uint8_t hpel_hv(const uint8_t *s, int r, int c, ptrdiff_t stride) { int t[6]; for (int i = 0; i < 6; i++) { int rr = r - 2 + i; t[i] = (int) s[rr*stride + c-2] - 5 * (int) s[rr*stride + c-1] + 20 * (int) s[rr*stride + c] + 20 * (int) s[rr*stride + c+1] - 5 * (int) s[rr*stride + c+2] + (int) s[rr*stride + c+3]; } int v = t[0] - 5*t[1] + 20*t[2] + 20*t[3] - 5*t[4] + t[5] + 512; return (uint8_t) clip_u8(v >> 10); } /* Quarter-axis variants: half-pel + L2 with integer source, then * L2 again with dst. */ #define DEFINE_AVG_QUARTER(NAME, A_EXPR, INT_EXPR) \ void daedalus_avg_h264_qpel8_ ## NAME ## _ref(uint8_t *dst, \ const uint8_t *src, ptrdiff_t stride) \ { \ for (int r = 0; r < 8; r++) \ for (int c = 0; c < 8; c++) { \ uint8_t a = (A_EXPR); \ uint8_t p = (uint8_t)((a + (INT_EXPR) + 1) >> 1); \ dst[r*stride + c] = avg2(dst[r*stride + c], p); \ } \ } DEFINE_AVG_QUARTER(mc10, hpel_h(src, r, c, stride), src[r*stride + c ]) DEFINE_AVG_QUARTER(mc30, hpel_h(src, r, c, stride), src[r*stride + c + 1]) DEFINE_AVG_QUARTER(mc01, hpel_v(src, r, c, stride), src[(r )*stride + c]) DEFINE_AVG_QUARTER(mc03, hpel_v(src, r, c, stride), src[(r + 1)*stride + c]) #undef DEFINE_AVG_QUARTER /* Diagonal variants: avg of two half-pels, then L2 with dst. */ #define DEFINE_AVG_DIAG(NAME, A_EXPR, B_EXPR) \ void daedalus_avg_h264_qpel8_ ## NAME ## _ref(uint8_t *dst, \ const uint8_t *src, ptrdiff_t stride) \ { \ for (int r = 0; r < 8; r++) \ for (int c = 0; c < 8; c++) { \ uint8_t a = (A_EXPR); \ uint8_t b = (B_EXPR); \ uint8_t p = avg2(a, b); \ dst[r*stride + c] = avg2(dst[r*stride + c], p); \ } \ } DEFINE_AVG_DIAG(mc11, hpel_h(src, r, c, stride), hpel_v(src, r, c, stride)) DEFINE_AVG_DIAG(mc12, hpel_hv(src, r, c, stride), hpel_v(src, r, c, stride)) DEFINE_AVG_DIAG(mc13, hpel_h(src, r+1, c, stride), hpel_v(src, r, c, stride)) DEFINE_AVG_DIAG(mc21, hpel_hv(src, r, c, stride), hpel_h(src, r, c, stride)) DEFINE_AVG_DIAG(mc23, hpel_hv(src, r, c, stride), hpel_h(src, r+1, c, stride)) DEFINE_AVG_DIAG(mc31, hpel_h(src, r, c, stride), hpel_v(src, r, c+1, stride)) DEFINE_AVG_DIAG(mc32, hpel_hv(src, r, c, stride), hpel_v(src, r, c+1, stride)) DEFINE_AVG_DIAG(mc33, hpel_h(src, r+1, c, stride), hpel_v(src, r, c+1, stride)) #undef DEFINE_AVG_DIAG