diff --git a/CMakeLists.txt b/CMakeLists.txt index a69ac98..43ebdac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -548,6 +548,15 @@ add_executable(test_intra_pred_16x16 ) target_compile_options(test_intra_pred_16x16 PRIVATE -O2) +# H.264 Intra_8x8 chroma prediction (4 modes: DC, H, V, Plane) — +# reference + tests. DC is per-quadrant (asymmetric); Plane uses +# slope coefficient 34 instead of luma's 5. +add_executable(test_intra_pred_chroma8x8 + tests/test_intra_pred_chroma8x8.c + tests/h264_intra_pred_chroma8x8_ref.c +) +target_compile_options(test_intra_pred_chroma8x8 PRIVATE -O2) + add_executable(bench_pool_overhead tests/bench_pool_overhead.c) target_link_libraries(bench_pool_overhead PRIVATE daedalus_core) target_compile_options(bench_pool_overhead PRIVATE -O2) diff --git a/tests/h264_intra_pred_chroma8x8_ref.c b/tests/h264_intra_pred_chroma8x8_ref.c new file mode 100644 index 0000000..da82024 --- /dev/null +++ b/tests/h264_intra_pred_chroma8x8_ref.c @@ -0,0 +1,123 @@ +/* + * Standalone bit-exact C reference for H.264 chroma Intra_8x8 + * prediction modes (per H.264 §8.3.3), used for both Cb and Cr + * planes at 4:2:0. All 4 modes. + * + * Mode index → name (per H.264 Table 7-16): + * 0 = DC (per-quadrant — asymmetric, see §8.3.3.2) + * 1 = Horizontal + * 2 = Vertical + * 3 = Plane (slope coefficient 34, distinct from luma's 5) + * + * Calling convention (same shape as luma intra refs): + * pred_chroma8x8_(uint8_t *dst, ptrdiff_t stride) + * + * `dst` points at row 0, col 0 of the 8x8 output block (single + * component plane — Cb or Cr, dispatched independently). Neighbours: + * top[0..7] = dst[-stride + 0 .. -stride + 7] + * top-left = dst[-stride - 1] + * left[0..7] = dst[ 0*stride - 1 .. 7*stride - 1] + * + * AVAILABILITY: assumes all neighbours valid (interior-MB case). + * The H.264 spec defines per-quadrant fallback for the DC mode at + * MB boundaries; that's caller-side via the libavcodec intercept. + * + * License: BSD-2-Clause. + */ +#include +#include + +static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; } + +/* Mode 0 — DC (per-quadrant, 4:2:0 layout per §8.3.3.2). + * + * The 8×8 block is split into four 4×4 quadrants. For interior + * MBs (all neighbours available), the DC value per quadrant uses: + * (0,0) top-left : (sum_top[0..3] + sum_left[0..3] + 4) >> 3 + * (0,1) top-right : sum_top[4..7] + 2) >> 2 + * (1,0) bot-left : (sum_left[4..7] + 2) >> 2 + * (1,1) bot-right : (sum_top[4..7] + sum_left[4..7] + 4) >> 3 + * + * The asymmetry mirrors what neighbours are "logically available" + * for each quadrant in the spec's availability model. Top-right + * quadrant ignores the top-left-half because that half is "vertically + * above" the top-left quadrant; the spec uses top[4..7] only. + */ +void daedalus_h264_pred_chroma8x8_dc_ref(uint8_t *dst, ptrdiff_t stride) +{ + const uint8_t *top = dst - stride; + int top_lo = 0, top_hi = 0, left_lo = 0, left_hi = 0; + for (int i = 0; i < 4; i++) { + top_lo += top[i]; + top_hi += top[4 + i]; + left_lo += dst[i * stride - 1]; + left_hi += dst[(4 + i) * stride - 1]; + } + uint8_t dc00 = (uint8_t)((top_lo + left_lo + 4) >> 3); /* top-left */ + uint8_t dc01 = (uint8_t)((top_hi + 2) >> 2); /* top-right */ + uint8_t dc10 = (uint8_t)(( left_hi + 2) >> 2); /* bot-left */ + uint8_t dc11 = (uint8_t)((top_hi + left_hi + 4) >> 3); /* bot-right */ + for (int r = 0; r < 4; r++) { + for (int c = 0; c < 4; c++) { + dst[( r) * stride + c ] = dc00; + dst[( r) * stride + 4 + c ] = dc01; + dst[(4 + r) * stride + c ] = dc10; + dst[(4 + r) * stride + 4 + c ] = dc11; + } + } +} + +/* Mode 1 — Horizontal: each row = left[row]. */ +void daedalus_h264_pred_chroma8x8_horizontal_ref(uint8_t *dst, ptrdiff_t stride) +{ + for (int r = 0; r < 8; r++) { + uint8_t l = dst[r * stride - 1]; + for (int c = 0; c < 8; c++) dst[r * stride + c] = l; + } +} + +/* Mode 2 — Vertical: each col = top[col]. */ +void daedalus_h264_pred_chroma8x8_vertical_ref(uint8_t *dst, ptrdiff_t stride) +{ + const uint8_t *top = dst - stride; + for (int r = 0; r < 8; r++) + for (int c = 0; c < 8; c++) dst[r * stride + c] = top[c]; +} + +/* Mode 3 — Plane (per H.264 §8.3.3.4): + * H = sum_{i=0..3} (i+1) * (p[4+i, -1] - p[2-i, -1]) ; i=3 uses p[-1,-1] + * V = sum_{j=0..3} (j+1) * (p[-1, 4+j] - p[-1, 2-j]) ; j=3 uses p[-1,-1] + * b = (34 * H + 32) >> 6 + * c = (34 * V + 32) >> 6 + * a = 16 * (p[-1, 7] + p[7, -1]) + * pred[y][x] = Clip1((a + b*(x - 3) + c*(y - 3) + 16) >> 5) + * + * Distinct from the Intra_16x16 luma Plane: + * - Slope coefficient is 34 (not 5). + * - Centre is (x-3, y-3) (not x-7, y-7). + * - Spans 4 differences per sum (not 8). + */ +void daedalus_h264_pred_chroma8x8_plane_ref(uint8_t *dst, ptrdiff_t stride) +{ + const uint8_t *top = dst - stride; + int H = 0, V = 0; + for (int i = 0; i < 4; i++) { + int t_right = top[4 + i]; + int t_left = (i == 3) ? top[-1] : top[2 - i]; + H += (i + 1) * (t_right - t_left); + } + for (int j = 0; j < 4; j++) { + int l_bot = dst[(4 + j) * stride - 1]; + int l_top = (j == 3) ? top[-1] : dst[(2 - j) * stride - 1]; + V += (j + 1) * (l_bot - l_top); + } + int b = (34 * H + 32) >> 6; + int c = (34 * V + 32) >> 6; + int a = 16 * (dst[7 * stride - 1] + top[7]); + for (int y = 0; y < 8; y++) { + for (int x = 0; x < 8; x++) { + int v = (a + b * (x - 3) + c * (y - 3) + 16) >> 5; + dst[y * stride + x] = (uint8_t) clip_u8(v); + } + } +} diff --git a/tests/test_intra_pred_chroma8x8.c b/tests/test_intra_pred_chroma8x8.c new file mode 100644 index 0000000..acb9d08 --- /dev/null +++ b/tests/test_intra_pred_chroma8x8.c @@ -0,0 +1,170 @@ +/* + * Tests the 4 H.264 Intra_8x8 chroma prediction modes against + * spec-derived expected patterns. Same buffer layout idea as the + * other intra tests: a buffer that holds the 8x8 output + 1-pixel + * top/left context + 1-pixel top-left corner. + * + * row 0: [tl][t0..t7] + * row 1: [l0][output row 0] + * ... + * row 8: [l7][output row 7] + * + * Dimensions: 9 rows × 9 cols. dst (passed to pred fns) = &buf[1][1]. + */ +#include +#include +#include +#include + +extern void daedalus_h264_pred_chroma8x8_dc_ref(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_chroma8x8_horizontal_ref(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_chroma8x8_vertical_ref(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_chroma8x8_plane_ref(uint8_t *dst, ptrdiff_t stride); + +#define STRIDE 9 +#define ROWS 9 + +static void set_ctx(uint8_t buf[ROWS][STRIDE], int tl, + const int t[8], const int l[8]) +{ + for (int r = 0; r < ROWS; r++) + for (int c = 0; c < STRIDE; c++) buf[r][c] = 0xff; + buf[0][0] = (uint8_t) tl; + for (int c = 0; c < 8; c++) buf[0][1 + c] = (uint8_t) t[c]; + for (int r = 0; r < 8; r++) buf[1 + r][0] = (uint8_t) l[r]; +} + +static int check_per_cell(const uint8_t buf[ROWS][STRIDE], const char *name, + const uint8_t expect[8][8]) +{ + int diff = 0; + int first_r = 0, first_c = 0, first_got = 0, first_exp = 0; + for (int r = 0; r < 8; r++) { + for (int c = 0; c < 8; c++) { + uint8_t got = buf[1 + r][1 + c]; + uint8_t exp = expect[r][c]; + if (got != exp) { + if (diff == 0) { + first_r = r; first_c = c; + first_got = got; first_exp = exp; + } + diff++; + } + } + } + if (diff == 0) + printf(" %-30s PASS\n", name); + else + printf(" %-30s FAIL (%d/64 wrong, first r=%d c=%d got=%u exp=%u)\n", + name, diff, first_r, first_c, first_got, first_exp); + return diff == 0 ? 0 : 1; +} + +int main(void) +{ + int fail = 0; + + /* --- Mode 1 Horizontal --- */ + { + uint8_t buf[ROWS][STRIDE]; + int t[8] = {0}, l[8] = {10, 20, 30, 40, 50, 60, 70, 80}; + set_ctx(buf, 0, t, l); + daedalus_h264_pred_chroma8x8_horizontal_ref(&buf[1][1], STRIDE); + uint8_t exp[8][8]; + for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) exp[r][c] = (uint8_t) l[r]; + fail |= check_per_cell(buf, "Horizontal (mode 1)", exp); + } + + /* --- Mode 2 Vertical --- */ + { + uint8_t buf[ROWS][STRIDE]; + int t[8] = {15, 25, 35, 45, 55, 65, 75, 85}, l[8] = {0}; + set_ctx(buf, 0, t, l); + daedalus_h264_pred_chroma8x8_vertical_ref(&buf[1][1], STRIDE); + uint8_t exp[8][8]; + for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) exp[r][c] = (uint8_t) t[c]; + fail |= check_per_cell(buf, "Vertical (mode 2)", exp); + } + + /* --- Mode 0 DC: per-quadrant. Test with distinct halves so any + * quadrant mix-up surfaces immediately. + * + * top[0..3] = 4 × 8 → sum_top_lo = 32 + * top[4..7] = 4 × 16 → sum_top_hi = 64 + * left[0..3] = 4 × 24 → sum_left_lo = 96 + * left[4..7] = 4 × 40 → sum_left_hi = 160 + * + * dc00 = (32 + 96 + 4) >> 3 = 132/8 = 16 + * dc01 = (64 + 2) >> 2 = 66/4 = 16 + * dc10 = ( 160 + 2) >> 2 = 162/4 = 40 + * dc11 = (64 + 160 + 4) >> 3 = 228/8 = 28 + */ + { + uint8_t buf[ROWS][STRIDE]; + int t[8] = { 8, 8, 8, 8, 16, 16, 16, 16 }; + int l[8] = { 24, 24, 24, 24, 40, 40, 40, 40 }; + set_ctx(buf, 99, t, l); + daedalus_h264_pred_chroma8x8_dc_ref(&buf[1][1], STRIDE); + uint8_t exp[8][8] = { + {16,16,16,16, 16,16,16,16}, + {16,16,16,16, 16,16,16,16}, + {16,16,16,16, 16,16,16,16}, + {16,16,16,16, 16,16,16,16}, + {40,40,40,40, 28,28,28,28}, + {40,40,40,40, 28,28,28,28}, + {40,40,40,40, 28,28,28,28}, + {40,40,40,40, 28,28,28,28}, + }; + fail |= check_per_cell(buf, "DC quadrants (mode 0)", exp); + } + + /* --- Mode 3 Plane (uniform): H = V = 0; a = 16 * (100 + 100) = 3200. + * pred[y][x] = (3200 + 0 + 0 + 16) >> 5 = 3216 >> 5 = 100. */ + { + uint8_t buf[ROWS][STRIDE]; + int t[8], l[8]; + for (int i = 0; i < 8; i++) { t[i] = 100; l[i] = 100; } + set_ctx(buf, 100, t, l); + daedalus_h264_pred_chroma8x8_plane_ref(&buf[1][1], STRIDE); + uint8_t exp[8][8]; + for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) exp[r][c] = 100; + fail |= check_per_cell(buf, "Plane uniform (mode 3)", exp); + } + + /* --- Mode 3 Plane gradient sanity --- + * t = 0..7, l = 0..7, tl = 0. + * H = 1*(t[4]-t[2]) + 2*(t[5]-t[1]) + 3*(t[6]-t[0]) + 4*(t[7]-tl) + * = 1*(4-2) + 2*(5-1) + 3*(6-0) + 4*(7-0) + * = 2 + 8 + 18 + 28 = 56 + * V = same shape on left = 56 + * b = (34*56 + 32) >> 6 = 1936 >> 6 = 30 + * c = 30 + * a = 16 * (l[7] + t[7]) = 16 * (7 + 7) = 224 + * + * pred[0][0] = (224 + 30*(-3) + 30*(-3) + 16) >> 5 + * = (224 - 90 - 90 + 16) >> 5 + * = 60 >> 5 = 1 + * pred[7][7] = (224 + 30*4 + 30*4 + 16) >> 5 + * = (224 + 120 + 120 + 16) >> 5 + * = 480 >> 5 = 15 + * Spot-check those two corners. */ + { + uint8_t buf[ROWS][STRIDE]; + int t[8], l[8]; + for (int i = 0; i < 8; i++) { t[i] = i; l[i] = i; } + set_ctx(buf, 0, t, l); + daedalus_h264_pred_chroma8x8_plane_ref(&buf[1][1], STRIDE); + uint8_t tl_actual = buf[1 + 0][1 + 0]; + uint8_t br_actual = buf[1 + 7][1 + 7]; + int spot_fail = 0; + if (tl_actual != 1) { fprintf(stderr, "Plane gradient pred[0][0] = %u, expected 1\n", tl_actual); spot_fail = 1; } + if (br_actual != 15) { fprintf(stderr, "Plane gradient pred[7][7] = %u, expected 15\n", br_actual); spot_fail = 1; } + if (!spot_fail) printf(" %-30s PASS (corners 1, 15)\n", "Plane gradient (mode 3)"); + else printf(" %-30s FAIL\n", "Plane gradient (mode 3)"); + fail |= spot_fail; + } + + if (fail == 0) printf("\nALL Intra_8x8 chroma mode references PASS\n"); + else fprintf(stderr, "\n%d test(s) FAILED\n", fail); + return fail ? 1 : 0; +}