h264: Intra_16x16 luma prediction — 4-mode C reference + spec gates #13
@@ -540,6 +540,14 @@ add_executable(test_intra_pred_4x4
|
||||
)
|
||||
target_compile_options(test_intra_pred_4x4 PRIVATE -O2)
|
||||
|
||||
# H.264 Intra_16x16 luma prediction (4 modes: V, H, DC, Plane) —
|
||||
# reference + tests. Same spec-gate role as the 4x4 sibling.
|
||||
add_executable(test_intra_pred_16x16
|
||||
tests/test_intra_pred_16x16.c
|
||||
tests/h264_intra_pred_16x16_ref.c
|
||||
)
|
||||
target_compile_options(test_intra_pred_16x16 PRIVATE -O2)
|
||||
|
||||
add_executable(bench_pool_overhead tests/bench_pool_overhead.c)
|
||||
target_link_libraries(bench_pool_overhead PRIVATE daedalus_core)
|
||||
target_compile_options(bench_pool_overhead PRIVATE -O2)
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Standalone bit-exact C reference for H.264 luma Intra_16x16
|
||||
* prediction modes (per H.264 spec §8.3.2). All 4 modes.
|
||||
*
|
||||
* Mode index → name (per H.264 Table 7-15):
|
||||
* 0 = Vertical
|
||||
* 1 = Horizontal
|
||||
* 2 = DC
|
||||
* 3 = Plane
|
||||
*
|
||||
* Calling convention (FFmpeg-style, matches the Intra_4x4 ref):
|
||||
* pred_16x16_<mode>(uint8_t *dst, ptrdiff_t stride)
|
||||
*
|
||||
* `dst` points at row 0, col 0 of the 16x16 output block. Neighbours:
|
||||
* top[0..15] = dst[-stride + 0 .. -stride + 15]
|
||||
* top-left = dst[-stride - 1]
|
||||
* left[0..15] = dst[ 0*stride - 1 .. 15*stride - 1]
|
||||
*
|
||||
* AVAILABILITY: assumes all neighbours valid (interior-MB case). The
|
||||
* H.264 spec defines fallback for boundary cases (DC averages just
|
||||
* the available side, etc.); the eventual libavcodec intercept
|
||||
* handles availability before calling.
|
||||
*
|
||||
* License: BSD-2-Clause.
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; }
|
||||
|
||||
/* Mode 0 — Vertical: each col = top[col]. */
|
||||
void daedalus_h264_pred_16x16_vertical_ref(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
const uint8_t *top = dst - stride;
|
||||
for (int r = 0; r < 16; r++)
|
||||
for (int c = 0; c < 16; c++) dst[r * stride + c] = top[c];
|
||||
}
|
||||
|
||||
/* Mode 1 — Horizontal: each row = left[row]. */
|
||||
void daedalus_h264_pred_16x16_horizontal_ref(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
for (int r = 0; r < 16; r++) {
|
||||
uint8_t l = dst[r * stride - 1];
|
||||
for (int c = 0; c < 16; c++) dst[r * stride + c] = l;
|
||||
}
|
||||
}
|
||||
|
||||
/* Mode 2 — DC: ((sum_top16 + sum_left16 + 16) >> 5) broadcast. */
|
||||
void daedalus_h264_pred_16x16_dc_ref(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
const uint8_t *top = dst - stride;
|
||||
int sum = 16; /* rounding for >> 5 over 32 samples */
|
||||
for (int i = 0; i < 16; i++) sum += top[i];
|
||||
for (int i = 0; i < 16; i++) sum += dst[i * stride - 1];
|
||||
uint8_t v = (uint8_t)(sum >> 5);
|
||||
for (int r = 0; r < 16; r++)
|
||||
for (int c = 0; c < 16; c++) dst[r * stride + c] = v;
|
||||
}
|
||||
|
||||
/* Mode 3 — Plane (per H.264 §8.3.2.4):
|
||||
* H = sum_{i=0..7} (i+1) * (p[7+i+1, -1] - p[7-i-1, -1])
|
||||
* = sum_{i=0..7} (i+1) * (top[8+i] - top[6-i])
|
||||
* V = sum_{j=0..7} (j+1) * (p[-1, 7+j+1] - p[-1, 7-j-1])
|
||||
* = sum_{j=0..7} (j+1) * (left[8+j] - left[6-j])
|
||||
* b = (5*H + 32) >> 6
|
||||
* c = (5*V + 32) >> 6
|
||||
* a = 16 * (p[-1, 15] + p[15, -1])
|
||||
* = 16 * (left[15] + top[15])
|
||||
* pred[y][x] = Clip1((a + b*(x-7) + c*(y-7) + 16) >> 5)
|
||||
*
|
||||
* Note: spec indexing uses [x, y] with x = col, y = row (or vice
|
||||
* versa depending on the section). Here I use the FFmpeg convention
|
||||
* pred[y][x] = pred[row][col]; the H = horizontal-slope formula uses
|
||||
* the TOP row's left-vs-right asymmetry; V = vertical-slope uses the
|
||||
* LEFT col's top-vs-bottom asymmetry. Boundary participants are
|
||||
* the top-left corner p[-1,-1] inferred from the spec's index range
|
||||
* (it does NOT participate in the H/V sums in the 16x16 case — only
|
||||
* for the chroma 8x8 plane mode).
|
||||
*/
|
||||
void daedalus_h264_pred_16x16_plane_ref(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
const uint8_t *top = dst - stride;
|
||||
/* H accumulates differences across the right vs left half of the
|
||||
* top row. Per spec, the top-left p[-1,-1] participates: i=7 uses
|
||||
* p[15,-1] - p[-1,-1]. We include it by reading top[-1]. */
|
||||
int H = 0, V = 0;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
int t_right = top[8 + i];
|
||||
int t_left = (i == 7) ? top[-1] : top[6 - i];
|
||||
H += (i + 1) * (t_right - t_left);
|
||||
}
|
||||
for (int j = 0; j < 8; j++) {
|
||||
int l_bot = dst[(8 + j) * stride - 1];
|
||||
int l_top = (j == 7) ? top[-1] : dst[(6 - j) * stride - 1];
|
||||
V += (j + 1) * (l_bot - l_top);
|
||||
}
|
||||
int b = (5 * H + 32) >> 6;
|
||||
int c = (5 * V + 32) >> 6;
|
||||
int a = 16 * (dst[15 * stride - 1] + top[15]);
|
||||
for (int y = 0; y < 16; y++) {
|
||||
for (int x = 0; x < 16; x++) {
|
||||
int v = (a + b * (x - 7) + c * (y - 7) + 16) >> 5;
|
||||
dst[y * stride + x] = (uint8_t) clip_u8(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
* Tests the 4 H.264 Intra_16x16 luma prediction modes against
|
||||
* spec-derived expected patterns. Same layout as the 4x4 test:
|
||||
* a buffer that holds the 16x16 output plus 1-pixel top/left
|
||||
* context and 1-pixel top-left corner.
|
||||
*
|
||||
* row 0: [tl][t0..t15]
|
||||
* row 1: [l0][output row 0]
|
||||
* row 2: [l1][output row 1]
|
||||
* ...
|
||||
* row 16: [l15][output row 15]
|
||||
*
|
||||
* Buffer dimensions: 17 rows × 17 cols, total 289 bytes.
|
||||
* dst (passed to the pred fns) points at row 1 col 1.
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
extern void daedalus_h264_pred_16x16_vertical_ref(uint8_t *dst, ptrdiff_t stride);
|
||||
extern void daedalus_h264_pred_16x16_horizontal_ref(uint8_t *dst, ptrdiff_t stride);
|
||||
extern void daedalus_h264_pred_16x16_dc_ref(uint8_t *dst, ptrdiff_t stride);
|
||||
extern void daedalus_h264_pred_16x16_plane_ref(uint8_t *dst, ptrdiff_t stride);
|
||||
|
||||
#define STRIDE 17
|
||||
#define ROWS 17
|
||||
|
||||
static void set_ctx(uint8_t buf[ROWS][STRIDE], int tl,
|
||||
const int t[16], const int l[16])
|
||||
{
|
||||
for (int r = 0; r < ROWS; r++)
|
||||
for (int c = 0; c < STRIDE; c++) buf[r][c] = 0xff;
|
||||
buf[0][0] = (uint8_t) tl;
|
||||
for (int c = 0; c < 16; c++) buf[0][1 + c] = (uint8_t) t[c];
|
||||
for (int r = 0; r < 16; r++) buf[1 + r][0] = (uint8_t) l[r];
|
||||
}
|
||||
|
||||
static int check(const uint8_t buf[ROWS][STRIDE], const char *name,
|
||||
uint8_t (*expect_at)(int r, int c, void *), void *cookie)
|
||||
{
|
||||
int diff = 0;
|
||||
int first_r = 0, first_c = 0, first_got = 0, first_exp = 0;
|
||||
for (int r = 0; r < 16; r++) {
|
||||
for (int c = 0; c < 16; c++) {
|
||||
uint8_t got = buf[1 + r][1 + c];
|
||||
uint8_t exp = expect_at(r, c, cookie);
|
||||
if (got != exp) {
|
||||
if (diff == 0) {
|
||||
first_r = r; first_c = c;
|
||||
first_got = got; first_exp = exp;
|
||||
}
|
||||
diff++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (diff == 0)
|
||||
printf(" %-30s PASS\n", name);
|
||||
else
|
||||
printf(" %-30s FAIL (%d/256 wrong, first r=%d c=%d got=%u exp=%u)\n",
|
||||
name, diff, first_r, first_c, first_got, first_exp);
|
||||
return diff == 0 ? 0 : 1;
|
||||
}
|
||||
|
||||
/* Expectation helpers for each mode. */
|
||||
static uint8_t expect_uniform(int r, int c, void *cookie)
|
||||
{ (void)r; (void)c; return *(uint8_t *)cookie; }
|
||||
|
||||
struct vertical_ctx { const int *t; };
|
||||
static uint8_t expect_vertical(int r, int c, void *cookie)
|
||||
{ (void)r; return (uint8_t) ((struct vertical_ctx *)cookie)->t[c]; }
|
||||
|
||||
struct horizontal_ctx { const int *l; };
|
||||
static uint8_t expect_horizontal(int r, int c, void *cookie)
|
||||
{ (void)c; return (uint8_t) ((struct horizontal_ctx *)cookie)->l[r]; }
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int fail = 0;
|
||||
|
||||
/* --- Mode 0 Vertical: each col = top[col] --- */
|
||||
{
|
||||
uint8_t buf[ROWS][STRIDE];
|
||||
int t[16], l[16];
|
||||
for (int i = 0; i < 16; i++) { t[i] = 10 + i; l[i] = 0; }
|
||||
set_ctx(buf, 0, t, l);
|
||||
daedalus_h264_pred_16x16_vertical_ref(&buf[1][1], STRIDE);
|
||||
struct vertical_ctx vc = { t };
|
||||
fail |= check(buf, "Vertical (mode 0)", expect_vertical, &vc);
|
||||
}
|
||||
|
||||
/* --- Mode 1 Horizontal: each row = left[row] --- */
|
||||
{
|
||||
uint8_t buf[ROWS][STRIDE];
|
||||
int t[16] = {0}, l[16];
|
||||
for (int i = 0; i < 16; i++) l[i] = 50 + i;
|
||||
set_ctx(buf, 0, t, l);
|
||||
daedalus_h264_pred_16x16_horizontal_ref(&buf[1][1], STRIDE);
|
||||
struct horizontal_ctx hc = { l };
|
||||
fail |= check(buf, "Horizontal (mode 1)", expect_horizontal, &hc);
|
||||
}
|
||||
|
||||
/* --- Mode 2 DC: ((sum + 16) >> 5) --- */
|
||||
/* All top = 2, all left = 6: sum = 32 + 96 = 128, +16 = 144,
|
||||
* >>5 = 144/32 = 4. */
|
||||
{
|
||||
uint8_t buf[ROWS][STRIDE];
|
||||
int t[16], l[16];
|
||||
for (int i = 0; i < 16; i++) { t[i] = 2; l[i] = 6; }
|
||||
set_ctx(buf, 99, t, l);
|
||||
daedalus_h264_pred_16x16_dc_ref(&buf[1][1], STRIDE);
|
||||
uint8_t exp_val = 4;
|
||||
fail |= check(buf, "DC (mode 2)", expect_uniform, &exp_val);
|
||||
}
|
||||
|
||||
/* --- Mode 3 Plane: uniform neighbours → uniform output --- */
|
||||
/* H=V=0 when neighbours are uniform. a = 16*(p+p) = 32p.
|
||||
* pred[y][x] = (32p + 0 + 0 + 16) >> 5 = (32p + 16) >> 5 = p
|
||||
* (exact integer for any p, since 32p/32 = p and +16/32 = 0).
|
||||
* Verifies the orientation-free portion of the formula. */
|
||||
{
|
||||
uint8_t buf[ROWS][STRIDE];
|
||||
int t[16], l[16];
|
||||
for (int i = 0; i < 16; i++) { t[i] = 100; l[i] = 100; }
|
||||
set_ctx(buf, 100, t, l); /* uniform tl too — H/V sums actually zero */
|
||||
daedalus_h264_pred_16x16_plane_ref(&buf[1][1], STRIDE);
|
||||
uint8_t exp_val = 100;
|
||||
fail |= check(buf, "Plane (mode 3, uniform)", expect_uniform, &exp_val);
|
||||
}
|
||||
|
||||
/* --- Mode 3 Plane: gradient sanity ---
|
||||
* Top row = 0..15 (gradient), left col = 0..15, tl = 0.
|
||||
* H = sum_{i=0..7} (i+1) * (t[8+i] - t[6-i] for i<7; or t[15]-tl=15 for i=7)
|
||||
* = 1*(8-6) + 2*(9-5) + 3*(10-4) + 4*(11-3) + 5*(12-2) + 6*(13-1)
|
||||
* + 7*(14-0) + 8*(15-0)
|
||||
* = 2 + 8 + 18 + 32 + 50 + 72 + 98 + 120 = 400
|
||||
* V = same shape on left col = 400
|
||||
* b = (5*400 + 32) >> 6 = 2032 >> 6 = 31
|
||||
* c = (5*400 + 32) >> 6 = 31
|
||||
* a = 16 * (l[15] + t[15]) = 16 * (15 + 15) = 480
|
||||
* pred[0][0] = (480 + 31*(-7) + 31*(-7) + 16) >> 5
|
||||
* = (480 - 217 - 217 + 16) >> 5
|
||||
* = 62 >> 5 = 1
|
||||
* pred[15][15] = (480 + 31*8 + 31*8 + 16) >> 5
|
||||
* = (480 + 248 + 248 + 16) >> 5
|
||||
* = 992 >> 5 = 31
|
||||
* Just spot-check those two corners. */
|
||||
{
|
||||
uint8_t buf[ROWS][STRIDE];
|
||||
int t[16], l[16];
|
||||
for (int i = 0; i < 16; i++) { t[i] = i; l[i] = i; }
|
||||
set_ctx(buf, 0, t, l);
|
||||
daedalus_h264_pred_16x16_plane_ref(&buf[1][1], STRIDE);
|
||||
uint8_t tl_actual = buf[1 + 0][1 + 0];
|
||||
uint8_t br_actual = buf[1 + 15][1 + 15];
|
||||
int spot_fail = 0;
|
||||
if (tl_actual != 1) { fprintf(stderr, "Plane gradient pred[0][0] = %u, expected 1\n", tl_actual); spot_fail = 1; }
|
||||
if (br_actual != 31) { fprintf(stderr, "Plane gradient pred[15][15] = %u, expected 31\n", br_actual); spot_fail = 1; }
|
||||
if (!spot_fail) printf(" %-30s PASS (corners 1, 31)\n", "Plane (mode 3, gradient)");
|
||||
else printf(" %-30s FAIL\n", "Plane (mode 3, gradient)");
|
||||
fail |= spot_fail;
|
||||
}
|
||||
|
||||
if (fail == 0) printf("\nALL Intra_16x16 mode references PASS\n");
|
||||
else fprintf(stderr, "\n%d test(s) FAILED\n", fail);
|
||||
return fail ? 1 : 0;
|
||||
}
|
||||
Reference in New Issue
Block a user