daedalus-fourier/tests/h264_qpel8_avg_rest_ref.c

/*
 * Standalone bit-exact C references for the 12 remaining avg_
 * biprediction qpel positions (B-slice list0 + list1 averaging):
 *   4 quarter-axis: avg_mc{10,30,01,03}
 *   8 diagonals  : avg_mc{11,12,13,21,23,31,32,33}
 *
 * Each is the put_ formula (per H.264 §8.4.2.2.1 / Table 8-4) with
 * a final L2 average against the existing dst contents per §8.4.2.3.1.
 * Caller pre-loads dst with the list0 prediction; the avg_ call
 * folds in list1.
 *
 * Mirror FFmpeg's `ff_avg_h264_qpel8_mc{XY}_neon` (in
 * external/ffmpeg-snapshot/libavcodec/aarch64/h264qpel_neon.S
 * — same `\type=avg` expansion as the put_ functions).
 *
 * License: LGPL-2.1-or-later.
 */
#include <stdint.h>
#include <stddef.h>

static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; }
static inline uint8_t avg2(uint8_t a, uint8_t b) { return (uint8_t)((a + b + 1) >> 1); }

static inline uint8_t hpel_h(const uint8_t *s, int r, int c, ptrdiff_t stride)
{
    int v = (int) s[r*stride + c-2] - 5 * (int) s[r*stride + c-1]
          + 20 * (int) s[r*stride + c]   + 20 * (int) s[r*stride + c+1]
          - 5 * (int) s[r*stride + c+2]  + (int) s[r*stride + c+3]
          + 16;
    return (uint8_t) clip_u8(v >> 5);
}
static inline uint8_t hpel_v(const uint8_t *s, int r, int c, ptrdiff_t stride)
{
    int v = (int) s[(r-2)*stride + c] - 5 * (int) s[(r-1)*stride + c]
          + 20 * (int) s[r*stride + c] + 20 * (int) s[(r+1)*stride + c]
          - 5 * (int) s[(r+2)*stride + c] + (int) s[(r+3)*stride + c]
          + 16;
    return (uint8_t) clip_u8(v >> 5);
}
static inline uint8_t hpel_hv(const uint8_t *s, int r, int c, ptrdiff_t stride)
{
    int t[6];
    for (int i = 0; i < 6; i++) {
        int rr = r - 2 + i;
        t[i] = (int) s[rr*stride + c-2] - 5 * (int) s[rr*stride + c-1]
             + 20 * (int) s[rr*stride + c]   + 20 * (int) s[rr*stride + c+1]
             - 5 * (int) s[rr*stride + c+2]  + (int) s[rr*stride + c+3];
    }
    int v = t[0] - 5*t[1] + 20*t[2] + 20*t[3] - 5*t[4] + t[5] + 512;
    return (uint8_t) clip_u8(v >> 10);
}

/* Quarter-axis variants: half-pel + L2 with integer source, then
 * L2 again with dst. */
#define DEFINE_AVG_QUARTER(NAME, A_EXPR, INT_EXPR)                             \
void daedalus_avg_h264_qpel8_ ## NAME ## _ref(uint8_t *dst,                    \
    const uint8_t *src, ptrdiff_t stride)                                      \
{                                                                              \
    for (int r = 0; r < 8; r++)                                                \
        for (int c = 0; c < 8; c++) {                                          \
            uint8_t a = (A_EXPR);                                              \
            uint8_t p = (uint8_t)((a + (INT_EXPR) + 1) >> 1);                  \
            dst[r*stride + c] = avg2(dst[r*stride + c], p);                    \
        }                                                                      \
}

DEFINE_AVG_QUARTER(mc10, hpel_h(src, r, c, stride),     src[r*stride + c    ])
DEFINE_AVG_QUARTER(mc30, hpel_h(src, r, c, stride),     src[r*stride + c + 1])
DEFINE_AVG_QUARTER(mc01, hpel_v(src, r, c, stride),     src[(r    )*stride + c])
DEFINE_AVG_QUARTER(mc03, hpel_v(src, r, c, stride),     src[(r + 1)*stride + c])

#undef DEFINE_AVG_QUARTER

/* Diagonal variants: avg of two half-pels, then L2 with dst. */
#define DEFINE_AVG_DIAG(NAME, A_EXPR, B_EXPR)                                  \
void daedalus_avg_h264_qpel8_ ## NAME ## _ref(uint8_t *dst,                    \
    const uint8_t *src, ptrdiff_t stride)                                      \
{                                                                              \
    for (int r = 0; r < 8; r++)                                                \
        for (int c = 0; c < 8; c++) {                                          \
            uint8_t a = (A_EXPR);                                              \
            uint8_t b = (B_EXPR);                                              \
            uint8_t p = avg2(a, b);                                            \
            dst[r*stride + c] = avg2(dst[r*stride + c], p);                    \
        }                                                                      \
}

DEFINE_AVG_DIAG(mc11, hpel_h(src,   r, c, stride), hpel_v(src, r,   c, stride))
DEFINE_AVG_DIAG(mc12, hpel_hv(src,  r, c, stride), hpel_v(src, r,   c, stride))
DEFINE_AVG_DIAG(mc13, hpel_h(src, r+1, c, stride), hpel_v(src, r,   c, stride))
DEFINE_AVG_DIAG(mc21, hpel_hv(src,  r, c, stride), hpel_h(src, r,   c, stride))
DEFINE_AVG_DIAG(mc23, hpel_hv(src,  r, c, stride), hpel_h(src, r+1, c, stride))
DEFINE_AVG_DIAG(mc31, hpel_h(src,   r, c, stride), hpel_v(src, r, c+1, stride))
DEFINE_AVG_DIAG(mc32, hpel_hv(src,  r, c, stride), hpel_v(src, r, c+1, stride))
DEFINE_AVG_DIAG(mc33, hpel_h(src, r+1, c, stride), hpel_v(src, r, c+1, stride))

#undef DEFINE_AVG_DIAG