diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c53e92..4f37451 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -563,6 +563,15 @@ add_executable(test_intra_pred_chroma8x8 ) target_compile_options(test_intra_pred_chroma8x8 PRIVATE -O2) +# H.264 Intra_8x8 luma prediction (High profile, 9 modes + 1-2-1 +# reference-sample pre-filter). This PR ships the pre-filter + the +# 3 simple modes (V, H, DC); the 6 directional modes follow. +add_executable(test_intra_pred_8x8_luma + tests/test_intra_pred_8x8_luma.c + tests/h264_intra_pred_8x8_luma_ref.c +) +target_compile_options(test_intra_pred_8x8_luma PRIVATE -O2) + add_executable(bench_pool_overhead tests/bench_pool_overhead.c) target_link_libraries(bench_pool_overhead PRIVATE daedalus_core) target_compile_options(bench_pool_overhead PRIVATE -O2) diff --git a/tests/h264_intra_pred_8x8_luma_ref.c b/tests/h264_intra_pred_8x8_luma_ref.c new file mode 100644 index 0000000..3bf0524 --- /dev/null +++ b/tests/h264_intra_pred_8x8_luma_ref.c @@ -0,0 +1,123 @@ +/* + * Standalone bit-exact C reference for H.264 luma Intra_8x8 + * prediction modes (per H.264 spec §8.3.2.1). High-profile-only + * MB type — Baseline/Main/Extended profiles don't see Intra_8x8. + * + * Distinct from Intra_4x4 in two ways: + * + * 1. REFERENCE SAMPLE FILTERING (§8.3.2.1.1). The 25 raw + * neighbour samples are pre-filtered with a 1-2-1 smoothing + * filter BEFORE prediction. The filtering has spec-defined + * boundary handling at the corners and the right-edge of the + * top-row extension. + * + * 2. SCALE. All 9 prediction modes operate at 8x8 with the + * filtered samples (Intra_4x4 operates at 4x4 with the raw + * samples). + * + * This PR implements the filter + the 3 simple modes (Vertical, + * Horizontal, DC). The 6 directional modes (DDL, DDR, VR, HD, VL, + * HU at 8x8) follow in a separate PR — same template, different + * formulas per spec sections §8.3.2.1.4..§8.3.2.1.9. + * + * Calling convention (FFmpeg-style): + * pred_8x8__ref(uint8_t *dst, ptrdiff_t stride) + * + * `dst` points at row 0 col 0 of the 8x8 output block. Reads from + * top[0..15] = dst[-stride + 0..15] + * top-left = dst[-stride - 1] + * left[0..7] = dst[ 0*stride - 1 .. 7*stride - 1] + * + * AVAILABILITY: assumes all neighbours valid (interior-MB case). + * + * License: BSD-2-Clause. + */ +#include +#include +#include + +static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; } + +/* H.264 §8.3.2.1.1 reference sample filtering. Filters the 25 raw + * samples around the 8x8 block into a `filt` array with the same + * indices. When called against an "all neighbours available" tile, + * the filtered output uses these spec-defined formulas: + * + * filt[top -1] (= filtered top-left) = (top[0] + 2*tl + left[0] + 2) >> 2 + * + * filt[top 0] = (tl + 2*top[0] + top[1] + 2) >> 2 + * filt[top i] for 1<=i<=14 = (top[i-1] + 2*top[i] + top[i+1] + 2) >> 2 + * filt[top 15] = (top[14] + 3*top[15] + 2) >> 2 (boundary) + * + * filt[left 0] = (tl + 2*left[0] + left[1] + 2) >> 2 + * filt[left j] for 1<=j<=6 = (left[j-1] + 2*left[j] + left[j+1] + 2) >> 2 + * filt[left 7] = (left[6] + 3*left[7] + 2) >> 2 (boundary) + * + * Reads neighbours from the dst buffer; writes filtered values to + * a caller-provided 26-element array indexed as: + * filt[0] = filtered top-left + * filt[1..16] = filtered top[0..15] + * filt[17..24] = filtered left[0..7] + */ +static void filter_refs(const uint8_t *dst, ptrdiff_t stride, + uint8_t filt[25]) +{ + int tl = dst[-stride - 1]; + int t[16]; + for (int i = 0; i < 16; i++) t[i] = dst[-stride + i]; + int l[8]; + for (int j = 0; j < 8; j++) l[j] = dst[j * stride - 1]; + + /* Filtered top-left. */ + filt[0] = (uint8_t)((t[0] + 2*tl + l[0] + 2) >> 2); + + /* Filtered top. */ + filt[1] = (uint8_t)((tl + 2*t[0] + t[1] + 2) >> 2); + for (int i = 1; i <= 14; i++) + filt[1 + i] = (uint8_t)((t[i-1] + 2*t[i] + t[i+1] + 2) >> 2); + filt[1 + 15] = (uint8_t)((t[14] + 3*t[15] + 2) >> 2); + + /* Filtered left. */ + filt[17 + 0] = (uint8_t)((tl + 2*l[0] + l[1] + 2) >> 2); + for (int j = 1; j <= 6; j++) + filt[17 + j] = (uint8_t)((l[j-1] + 2*l[j] + l[j+1] + 2) >> 2); + filt[17 + 7] = (uint8_t)((l[6] + 3*l[7] + 2) >> 2); +} + +/* Convenience macros for accessing the filt[] array by spec-style index. */ +#define FT(i) filt[1 + (i)] /* filtered top[i], i in 0..15 */ +#define FL(j) filt[17 + (j)] /* filtered left[j], j in 0..7 */ +#define FTL filt[0] /* filtered top-left */ + +/* Mode 0 Vertical (§8.3.2.1.2): pred[r,c] = filt_top[c]. */ +void daedalus_h264_pred_8x8l_vertical_ref(uint8_t *dst, ptrdiff_t stride) +{ + uint8_t filt[25]; + filter_refs(dst, stride, filt); + for (int r = 0; r < 8; r++) + for (int c = 0; c < 8; c++) dst[r * stride + c] = FT(c); +} + +/* Mode 1 Horizontal (§8.3.2.1.3): pred[r,c] = filt_left[r]. */ +void daedalus_h264_pred_8x8l_horizontal_ref(uint8_t *dst, ptrdiff_t stride) +{ + uint8_t filt[25]; + filter_refs(dst, stride, filt); + for (int r = 0; r < 8; r++) + for (int c = 0; c < 8; c++) dst[r * stride + c] = FL(r); +} + +/* Mode 2 DC (§8.3.2.1.4): ((sum_filt_top[0..7] + sum_filt_left[0..7] + * + 8) >> 4) broadcast. Note the +8 (not +4 like 4x4): there are + * 16 samples summed total, so >> 4 with half-step rounding +8. */ +void daedalus_h264_pred_8x8l_dc_ref(uint8_t *dst, ptrdiff_t stride) +{ + uint8_t filt[25]; + filter_refs(dst, stride, filt); + int sum = 8; + for (int i = 0; i < 8; i++) sum += FT(i); + for (int j = 0; j < 8; j++) sum += FL(j); + uint8_t v = (uint8_t)(sum >> 4); + for (int r = 0; r < 8; r++) + for (int c = 0; c < 8; c++) dst[r * stride + c] = v; +} diff --git a/tests/test_intra_pred_8x8_luma.c b/tests/test_intra_pred_8x8_luma.c new file mode 100644 index 0000000..ac8b03c --- /dev/null +++ b/tests/test_intra_pred_8x8_luma.c @@ -0,0 +1,139 @@ +/* + * Tests the H.264 Intra_8x8 luma prediction modes against spec-derived + * expectations. Buffer layout is 9 rows × 17 cols (extra cols for the + * top-right extension that DDL/VL need; not exercised by V/H/DC but + * already in-place for the eventual directional-modes follow-up): + * + * row 0: [tl][t0..t15] — 17 bytes + * row 1: [l0][output row 0 ..] — 17 bytes + * ... + * row 8: [l7][output row 7 ..] + */ +#include +#include +#include +#include + +extern void daedalus_h264_pred_8x8l_vertical_ref(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_8x8l_horizontal_ref(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_8x8l_dc_ref(uint8_t *dst, ptrdiff_t stride); + +#define STRIDE 17 +#define ROWS 9 + +static void set_ctx(uint8_t buf[ROWS][STRIDE], int tl, + const int t[16], const int l[8]) +{ + for (int r = 0; r < ROWS; r++) + for (int c = 0; c < STRIDE; c++) buf[r][c] = 0xff; + buf[0][0] = (uint8_t) tl; + for (int c = 0; c < 16; c++) buf[0][1 + c] = (uint8_t) t[c]; + for (int r = 0; r < 8; r++) buf[1 + r][0] = (uint8_t) l[r]; +} + +static int check_uniform(const uint8_t buf[ROWS][STRIDE], const char *name, + uint8_t expect_val) +{ + int diff = 0; + for (int r = 0; r < 8; r++) + for (int c = 0; c < 8; c++) + if (buf[1+r][1+c] != expect_val) diff++; + if (diff == 0) printf(" %-30s PASS\n", name); + else printf(" %-30s FAIL (%d/64 wrong, expected %u)\n", name, diff, expect_val); + return diff == 0 ? 0 : 1; +} + +int main(void) +{ + int fail = 0; + + /* Mode 0 Vertical with uniform top → uniform output. + * Filtered top[c] = (a + 2*a + a + 2) >> 2 = a for uniform a. */ + { + uint8_t buf[ROWS][STRIDE]; + int t[16], l[8]; + for (int i = 0; i < 16; i++) t[i] = 50; + for (int j = 0; j < 8; j++) l[j] = 0; + set_ctx(buf, 50, t, l); + daedalus_h264_pred_8x8l_vertical_ref(&buf[1][1], STRIDE); + fail |= check_uniform(buf, "Vertical (mode 0, uniform top)", 50); + } + + /* Mode 1 Horizontal with uniform left → uniform output. */ + { + uint8_t buf[ROWS][STRIDE]; + int t[16] = {0}, l[8]; + for (int j = 0; j < 8; j++) l[j] = 70; + set_ctx(buf, 70, t, l); + daedalus_h264_pred_8x8l_horizontal_ref(&buf[1][1], STRIDE); + fail |= check_uniform(buf, "Horizontal (mode 1, uniform left)", 70); + } + + /* Mode 2 DC with all-uniform neighbours → uniform output. + * Filtered top[c] = top for uniform; filtered left[j] = left. + * sum = 8*a + 8*a + 8 = 16a + 8. >> 4 = a (exact when +8 rounds). */ + { + uint8_t buf[ROWS][STRIDE]; + int t[16], l[8]; + for (int i = 0; i < 16; i++) t[i] = 33; + for (int j = 0; j < 8; j++) l[j] = 33; + set_ctx(buf, 33, t, l); + daedalus_h264_pred_8x8l_dc_ref(&buf[1][1], STRIDE); + fail |= check_uniform(buf, "DC (mode 2, uniform)", 33); + } + + /* Mode 0 Vertical with NON-uniform top: gradient 0..15. Filtered + * top[c] for c in 1..14 = (t[c-1] + 2*t[c] + t[c+1] + 2) >> 2 + * = (c-1 + 2c + c+1 + 2) >> 2 + * = (4c + 2) >> 2 = c (since (4c+2)/4 = c with rounding). + * Wait — (4c + 2) >> 2 = c + 0 (since 4c is divisible by 4 and +2 rounds + * BELOW 4, doesn't change anything). So filtered = c for c=1..14. + * filt[0] (top-left) = (t[0] + 2*tl + l[0] + 2) >> 2 (not exercised + * directly by Vertical mode). + * filt[top 0] = (tl + 2*t[0] + t[1] + 2) >> 2 = (0 + 0 + 1 + 2) >> 2 = 0 + * (tl=0, t[0]=0, t[1]=1) + * filt[top 15] = (t[14] + 3*t[15] + 2) >> 2 = (14 + 45 + 2) >> 2 + * = 61 >> 2 = 15 + * + * So Vertical output col 0 = filt[top 0] = 0, col 1 = filt[top 1] = 1, + * ..., col 7 = filt[top 7] = 7. Same for all 8 rows. */ + { + uint8_t buf[ROWS][STRIDE]; + int t[16], l[8] = {0}; + for (int i = 0; i < 16; i++) t[i] = i; + set_ctx(buf, 0, t, l); + daedalus_h264_pred_8x8l_vertical_ref(&buf[1][1], STRIDE); + int diff = 0; + for (int r = 0; r < 8; r++) + for (int c = 0; c < 8; c++) + if (buf[1+r][1+c] != c) diff++; + if (diff == 0) printf(" %-30s PASS (filtered gradient)\n", "Vertical (mode 0, gradient)"); + else printf(" %-30s FAIL (%d/64 wrong)\n", "Vertical (mode 0, gradient)", diff); + fail |= (diff == 0) ? 0 : 1; + } + + /* Mode 1 Horizontal gradient: left = 0..7. Filtered left: + * filt[left 0] = (tl + 2*l[0] + l[1] + 2) >> 2 = (0 + 0 + 1 + 2) >> 2 = 0 + * filt[left j] for j=1..6 = (l[j-1] + 2*l[j] + l[j+1] + 2) >> 2 = j + * (same arithmetic as top) + * filt[left 7] = (l[6] + 3*l[7] + 2) >> 2 = (6 + 21 + 2) >> 2 = 7 + * So Horizontal output row 0 = 0, row 7 = 7. */ + { + uint8_t buf[ROWS][STRIDE]; + int t[16] = {0}, l[8]; + for (int j = 0; j < 8; j++) l[j] = j; + set_ctx(buf, 0, t, l); + daedalus_h264_pred_8x8l_horizontal_ref(&buf[1][1], STRIDE); + int diff = 0; + for (int r = 0; r < 8; r++) + for (int c = 0; c < 8; c++) + if (buf[1+r][1+c] != r) diff++; + if (diff == 0) printf(" %-30s PASS (filtered gradient)\n", "Horizontal (mode 1, gradient)"); + else printf(" %-30s FAIL (%d/64 wrong)\n", "Horizontal (mode 1, gradient)", diff); + fail |= (diff == 0) ? 0 : 1; + } + + if (fail == 0) printf("\nALL Intra_8x8 luma PASS (3 modes — V, H, DC)\n"); + else fprintf(stderr, "\n%d test(s) FAILED\n", fail); + return fail ? 1 : 0; +}