/* * Standalone bit-exact C reference for VP9 8-tap "regular" subpel * filter, horizontal direction, 8-pixel-wide output. Transcribed * from FFmpeg's libavcodec/vp9dsp_template.c FILTER_8TAP macro * (vendored at external/ffmpeg-snapshot/). 8-bit pixels only. * * Filter coefficients embedded inline (REGULAR filter only, all 16 * subpel phases). Same values as ff_vp9_subpel_filters[1][mx] in * external/ffmpeg-snapshot/libavcodec/vp9_subpel_filters_table.c. * * License: LGPL-2.1-or-later. * * Spec source: VP9 specification §8.5.1 — subpel motion compensation. */ #include #include static const int16_t vp9_8tap_regular_filters[16][8] = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -5, 126, 8, -3, 1, 0 }, { -1, 3, -10, 122, 18, -6, 2, 0 }, { -1, 4, -13, 118, 27, -9, 3, -1 }, { -1, 4, -16, 112, 37, -11, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 }, { -1, 5, -19, 97, 58, -16, 5, -1 }, { -1, 6, -19, 88, 68, -18, 5, -1 }, { -1, 6, -19, 78, 78, -19, 6, -1 }, { -1, 5, -18, 68, 88, -19, 6, -1 }, { -1, 5, -16, 58, 97, -19, 5, -1 }, { -1, 4, -14, 48, 105, -18, 5, -1 }, { -1, 4, -11, 37, 112, -16, 4, -1 }, { -1, 3, -9, 27, 118, -13, 4, -1 }, { 0, 2, -6, 18, 122, -10, 3, -1 }, { 0, 1, -3, 8, 126, -5, 1, 0 }, }; static inline uint8_t clip_u8(int x) { return (uint8_t)(x > 255 ? 255 : x < 0 ? 0 : x); } /* * 8x8 horizontal 8-tap "put" (non-averaging). Width hard-coded 8. * `src` must point at the row-0 output-column-0 source pixel; valid * source memory must extend src[r*src_stride + (-3..+11)] for r=0..h-1. * `dst` is written at dst[r*dst_stride + 0..7] for r=0..h-1. * * Matches ff_vp9_put_regular8_h_neon byte-for-byte on 8-bit input. */ void daedalus_vp9_put_regular_8h_ref(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int h, int mx, int my) { (void) my; /* horizontal-only filter ignores y phase */ const int16_t *F = vp9_8tap_regular_filters[mx & 15]; for (int r = 0; r < h; r++) { for (int x = 0; x < 8; x++) { int sum = F[0] * (int) src[x - 3] + F[1] * (int) src[x - 2] + F[2] * (int) src[x - 1] + F[3] * (int) src[x + 0] + F[4] * (int) src[x + 1] + F[5] * (int) src[x + 2] + F[6] * (int) src[x + 3] + F[7] * (int) src[x + 4]; dst[x] = clip_u8((sum + 64) >> 7); } dst += dst_stride; src += src_stride; } }