h264: Intra_8x8 chroma prediction — 4-mode C reference + spec gates
Third intra-prediction primitive after PR #12 (Intra_4x4 luma) and PR #13 (Intra_16x16 luma). Covers Intra_8x8 chroma per H.264 §8.3.3: 4 modes used for BOTH Cb and Cr planes at 4:2:0. Mode quirks worth flagging in code review: - Mode 0 DC is asymmetric per quadrant. The 8x8 chroma block splits into four 4x4 quadrants with different DC formulas: (0,0) top-left : (sum_top[0..3] + sum_left[0..3] + 4) >> 3 (0,1) top-right : (sum_top[4..7] + 2) >> 2 (1,0) bot-left : (sum_left[4..7] + 2) >> 2 (1,1) bot-right : (sum_top[4..7] + sum_left[4..7] + 4) >> 3 The top-right quadrant deliberately IGNORES the top-left half even though it's available — that's per spec §8.3.3.2. - Mode 3 Plane uses slope coefficient 34 (not 5 like Intra_16x16 luma). Centre is (x-3, y-3) instead of (x-7, y-7). Sums span 4 differences instead of 8. Easy to copy-paste-bug from the luma Plane if you don't notice the constants change. Test highlights: - DC quadrants: distinct expected values per quadrant (16, 16, 40, 28 from asymmetric top/left halves) — any quadrant mix-up would surface immediately. Hand-derived from the formulas in the test comment. - Plane uniform: all-100 context → all-100 output (a = 3200, H = V = 0, (3200+16) >> 5 = 100 exactly). - Plane gradient: top + left = 0..7, hand-derives pred[0][0] = 1 and pred[7][7] = 15 via the full arithmetic chain (H = V = 56, b = c = 30, a = 224). Same hand-traced spec-walkthrough as the Intra_16x16 Plane gradient test. Verified on hertz: $ ./build/test_intra_pred_chroma8x8 Horizontal (mode 1) PASS Vertical (mode 2) PASS DC quadrants (mode 0) PASS Plane uniform (mode 3) PASS Plane gradient (mode 3) PASS (corners 1, 15) ALL Intra_8x8 chroma mode references PASS All 5 tests PASS first try. The DC quadrant correctness is meaningful (4 different formulas in one kernel) and the Plane gradient corners validate the slope=34 + centre=(x-3,y-3) constants vs the luma equivalents. Combined coverage after this PR: - Intra_4x4 luma: 9 modes ✓ (PR #12, all 9 PASS) - Intra_16x16 luma: 4 modes ✓ (PR #13, all 5 tests PASS) - Intra_8x8 chroma: 4 modes ✓ (this PR, all 5 tests PASS) - Intra_8x8 luma (High profile): 9 modes + smoothing — pending. Remaining backlog: Intra_8x8 luma (High profile, 9 modes + 1-2-1 smoothing pre-filter — distinct algorithm from Intra_4x4 because of the pre-filter), neighbour-availability fallback, dispatch wrappers.
This commit is contained in:
@@ -548,6 +548,15 @@ add_executable(test_intra_pred_16x16
|
||||
)
|
||||
target_compile_options(test_intra_pred_16x16 PRIVATE -O2)
|
||||
|
||||
# H.264 Intra_8x8 chroma prediction (4 modes: DC, H, V, Plane) —
|
||||
# reference + tests. DC is per-quadrant (asymmetric); Plane uses
|
||||
# slope coefficient 34 instead of luma's 5.
|
||||
add_executable(test_intra_pred_chroma8x8
|
||||
tests/test_intra_pred_chroma8x8.c
|
||||
tests/h264_intra_pred_chroma8x8_ref.c
|
||||
)
|
||||
target_compile_options(test_intra_pred_chroma8x8 PRIVATE -O2)
|
||||
|
||||
add_executable(bench_pool_overhead tests/bench_pool_overhead.c)
|
||||
target_link_libraries(bench_pool_overhead PRIVATE daedalus_core)
|
||||
target_compile_options(bench_pool_overhead PRIVATE -O2)
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Standalone bit-exact C reference for H.264 chroma Intra_8x8
|
||||
* prediction modes (per H.264 §8.3.3), used for both Cb and Cr
|
||||
* planes at 4:2:0. All 4 modes.
|
||||
*
|
||||
* Mode index → name (per H.264 Table 7-16):
|
||||
* 0 = DC (per-quadrant — asymmetric, see §8.3.3.2)
|
||||
* 1 = Horizontal
|
||||
* 2 = Vertical
|
||||
* 3 = Plane (slope coefficient 34, distinct from luma's 5)
|
||||
*
|
||||
* Calling convention (same shape as luma intra refs):
|
||||
* pred_chroma8x8_<mode>(uint8_t *dst, ptrdiff_t stride)
|
||||
*
|
||||
* `dst` points at row 0, col 0 of the 8x8 output block (single
|
||||
* component plane — Cb or Cr, dispatched independently). Neighbours:
|
||||
* top[0..7] = dst[-stride + 0 .. -stride + 7]
|
||||
* top-left = dst[-stride - 1]
|
||||
* left[0..7] = dst[ 0*stride - 1 .. 7*stride - 1]
|
||||
*
|
||||
* AVAILABILITY: assumes all neighbours valid (interior-MB case).
|
||||
* The H.264 spec defines per-quadrant fallback for the DC mode at
|
||||
* MB boundaries; that's caller-side via the libavcodec intercept.
|
||||
*
|
||||
* License: BSD-2-Clause.
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; }
|
||||
|
||||
/* Mode 0 — DC (per-quadrant, 4:2:0 layout per §8.3.3.2).
|
||||
*
|
||||
* The 8×8 block is split into four 4×4 quadrants. For interior
|
||||
* MBs (all neighbours available), the DC value per quadrant uses:
|
||||
* (0,0) top-left : (sum_top[0..3] + sum_left[0..3] + 4) >> 3
|
||||
* (0,1) top-right : sum_top[4..7] + 2) >> 2
|
||||
* (1,0) bot-left : (sum_left[4..7] + 2) >> 2
|
||||
* (1,1) bot-right : (sum_top[4..7] + sum_left[4..7] + 4) >> 3
|
||||
*
|
||||
* The asymmetry mirrors what neighbours are "logically available"
|
||||
* for each quadrant in the spec's availability model. Top-right
|
||||
* quadrant ignores the top-left-half because that half is "vertically
|
||||
* above" the top-left quadrant; the spec uses top[4..7] only.
|
||||
*/
|
||||
void daedalus_h264_pred_chroma8x8_dc_ref(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
const uint8_t *top = dst - stride;
|
||||
int top_lo = 0, top_hi = 0, left_lo = 0, left_hi = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
top_lo += top[i];
|
||||
top_hi += top[4 + i];
|
||||
left_lo += dst[i * stride - 1];
|
||||
left_hi += dst[(4 + i) * stride - 1];
|
||||
}
|
||||
uint8_t dc00 = (uint8_t)((top_lo + left_lo + 4) >> 3); /* top-left */
|
||||
uint8_t dc01 = (uint8_t)((top_hi + 2) >> 2); /* top-right */
|
||||
uint8_t dc10 = (uint8_t)(( left_hi + 2) >> 2); /* bot-left */
|
||||
uint8_t dc11 = (uint8_t)((top_hi + left_hi + 4) >> 3); /* bot-right */
|
||||
for (int r = 0; r < 4; r++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
dst[( r) * stride + c ] = dc00;
|
||||
dst[( r) * stride + 4 + c ] = dc01;
|
||||
dst[(4 + r) * stride + c ] = dc10;
|
||||
dst[(4 + r) * stride + 4 + c ] = dc11;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Mode 1 — Horizontal: each row = left[row]. */
|
||||
void daedalus_h264_pred_chroma8x8_horizontal_ref(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
for (int r = 0; r < 8; r++) {
|
||||
uint8_t l = dst[r * stride - 1];
|
||||
for (int c = 0; c < 8; c++) dst[r * stride + c] = l;
|
||||
}
|
||||
}
|
||||
|
||||
/* Mode 2 — Vertical: each col = top[col]. */
|
||||
void daedalus_h264_pred_chroma8x8_vertical_ref(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
const uint8_t *top = dst - stride;
|
||||
for (int r = 0; r < 8; r++)
|
||||
for (int c = 0; c < 8; c++) dst[r * stride + c] = top[c];
|
||||
}
|
||||
|
||||
/* Mode 3 — Plane (per H.264 §8.3.3.4):
|
||||
* H = sum_{i=0..3} (i+1) * (p[4+i, -1] - p[2-i, -1]) ; i=3 uses p[-1,-1]
|
||||
* V = sum_{j=0..3} (j+1) * (p[-1, 4+j] - p[-1, 2-j]) ; j=3 uses p[-1,-1]
|
||||
* b = (34 * H + 32) >> 6
|
||||
* c = (34 * V + 32) >> 6
|
||||
* a = 16 * (p[-1, 7] + p[7, -1])
|
||||
* pred[y][x] = Clip1((a + b*(x - 3) + c*(y - 3) + 16) >> 5)
|
||||
*
|
||||
* Distinct from the Intra_16x16 luma Plane:
|
||||
* - Slope coefficient is 34 (not 5).
|
||||
* - Centre is (x-3, y-3) (not x-7, y-7).
|
||||
* - Spans 4 differences per sum (not 8).
|
||||
*/
|
||||
void daedalus_h264_pred_chroma8x8_plane_ref(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
const uint8_t *top = dst - stride;
|
||||
int H = 0, V = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int t_right = top[4 + i];
|
||||
int t_left = (i == 3) ? top[-1] : top[2 - i];
|
||||
H += (i + 1) * (t_right - t_left);
|
||||
}
|
||||
for (int j = 0; j < 4; j++) {
|
||||
int l_bot = dst[(4 + j) * stride - 1];
|
||||
int l_top = (j == 3) ? top[-1] : dst[(2 - j) * stride - 1];
|
||||
V += (j + 1) * (l_bot - l_top);
|
||||
}
|
||||
int b = (34 * H + 32) >> 6;
|
||||
int c = (34 * V + 32) >> 6;
|
||||
int a = 16 * (dst[7 * stride - 1] + top[7]);
|
||||
for (int y = 0; y < 8; y++) {
|
||||
for (int x = 0; x < 8; x++) {
|
||||
int v = (a + b * (x - 3) + c * (y - 3) + 16) >> 5;
|
||||
dst[y * stride + x] = (uint8_t) clip_u8(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Tests the 4 H.264 Intra_8x8 chroma prediction modes against
|
||||
* spec-derived expected patterns. Same buffer layout idea as the
|
||||
* other intra tests: a buffer that holds the 8x8 output + 1-pixel
|
||||
* top/left context + 1-pixel top-left corner.
|
||||
*
|
||||
* row 0: [tl][t0..t7]
|
||||
* row 1: [l0][output row 0]
|
||||
* ...
|
||||
* row 8: [l7][output row 7]
|
||||
*
|
||||
* Dimensions: 9 rows × 9 cols. dst (passed to pred fns) = &buf[1][1].
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
extern void daedalus_h264_pred_chroma8x8_dc_ref(uint8_t *dst, ptrdiff_t stride);
|
||||
extern void daedalus_h264_pred_chroma8x8_horizontal_ref(uint8_t *dst, ptrdiff_t stride);
|
||||
extern void daedalus_h264_pred_chroma8x8_vertical_ref(uint8_t *dst, ptrdiff_t stride);
|
||||
extern void daedalus_h264_pred_chroma8x8_plane_ref(uint8_t *dst, ptrdiff_t stride);
|
||||
|
||||
#define STRIDE 9
|
||||
#define ROWS 9
|
||||
|
||||
static void set_ctx(uint8_t buf[ROWS][STRIDE], int tl,
|
||||
const int t[8], const int l[8])
|
||||
{
|
||||
for (int r = 0; r < ROWS; r++)
|
||||
for (int c = 0; c < STRIDE; c++) buf[r][c] = 0xff;
|
||||
buf[0][0] = (uint8_t) tl;
|
||||
for (int c = 0; c < 8; c++) buf[0][1 + c] = (uint8_t) t[c];
|
||||
for (int r = 0; r < 8; r++) buf[1 + r][0] = (uint8_t) l[r];
|
||||
}
|
||||
|
||||
static int check_per_cell(const uint8_t buf[ROWS][STRIDE], const char *name,
|
||||
const uint8_t expect[8][8])
|
||||
{
|
||||
int diff = 0;
|
||||
int first_r = 0, first_c = 0, first_got = 0, first_exp = 0;
|
||||
for (int r = 0; r < 8; r++) {
|
||||
for (int c = 0; c < 8; c++) {
|
||||
uint8_t got = buf[1 + r][1 + c];
|
||||
uint8_t exp = expect[r][c];
|
||||
if (got != exp) {
|
||||
if (diff == 0) {
|
||||
first_r = r; first_c = c;
|
||||
first_got = got; first_exp = exp;
|
||||
}
|
||||
diff++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (diff == 0)
|
||||
printf(" %-30s PASS\n", name);
|
||||
else
|
||||
printf(" %-30s FAIL (%d/64 wrong, first r=%d c=%d got=%u exp=%u)\n",
|
||||
name, diff, first_r, first_c, first_got, first_exp);
|
||||
return diff == 0 ? 0 : 1;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int fail = 0;
|
||||
|
||||
/* --- Mode 1 Horizontal --- */
|
||||
{
|
||||
uint8_t buf[ROWS][STRIDE];
|
||||
int t[8] = {0}, l[8] = {10, 20, 30, 40, 50, 60, 70, 80};
|
||||
set_ctx(buf, 0, t, l);
|
||||
daedalus_h264_pred_chroma8x8_horizontal_ref(&buf[1][1], STRIDE);
|
||||
uint8_t exp[8][8];
|
||||
for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) exp[r][c] = (uint8_t) l[r];
|
||||
fail |= check_per_cell(buf, "Horizontal (mode 1)", exp);
|
||||
}
|
||||
|
||||
/* --- Mode 2 Vertical --- */
|
||||
{
|
||||
uint8_t buf[ROWS][STRIDE];
|
||||
int t[8] = {15, 25, 35, 45, 55, 65, 75, 85}, l[8] = {0};
|
||||
set_ctx(buf, 0, t, l);
|
||||
daedalus_h264_pred_chroma8x8_vertical_ref(&buf[1][1], STRIDE);
|
||||
uint8_t exp[8][8];
|
||||
for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) exp[r][c] = (uint8_t) t[c];
|
||||
fail |= check_per_cell(buf, "Vertical (mode 2)", exp);
|
||||
}
|
||||
|
||||
/* --- Mode 0 DC: per-quadrant. Test with distinct halves so any
|
||||
* quadrant mix-up surfaces immediately.
|
||||
*
|
||||
* top[0..3] = 4 × 8 → sum_top_lo = 32
|
||||
* top[4..7] = 4 × 16 → sum_top_hi = 64
|
||||
* left[0..3] = 4 × 24 → sum_left_lo = 96
|
||||
* left[4..7] = 4 × 40 → sum_left_hi = 160
|
||||
*
|
||||
* dc00 = (32 + 96 + 4) >> 3 = 132/8 = 16
|
||||
* dc01 = (64 + 2) >> 2 = 66/4 = 16
|
||||
* dc10 = ( 160 + 2) >> 2 = 162/4 = 40
|
||||
* dc11 = (64 + 160 + 4) >> 3 = 228/8 = 28
|
||||
*/
|
||||
{
|
||||
uint8_t buf[ROWS][STRIDE];
|
||||
int t[8] = { 8, 8, 8, 8, 16, 16, 16, 16 };
|
||||
int l[8] = { 24, 24, 24, 24, 40, 40, 40, 40 };
|
||||
set_ctx(buf, 99, t, l);
|
||||
daedalus_h264_pred_chroma8x8_dc_ref(&buf[1][1], STRIDE);
|
||||
uint8_t exp[8][8] = {
|
||||
{16,16,16,16, 16,16,16,16},
|
||||
{16,16,16,16, 16,16,16,16},
|
||||
{16,16,16,16, 16,16,16,16},
|
||||
{16,16,16,16, 16,16,16,16},
|
||||
{40,40,40,40, 28,28,28,28},
|
||||
{40,40,40,40, 28,28,28,28},
|
||||
{40,40,40,40, 28,28,28,28},
|
||||
{40,40,40,40, 28,28,28,28},
|
||||
};
|
||||
fail |= check_per_cell(buf, "DC quadrants (mode 0)", exp);
|
||||
}
|
||||
|
||||
/* --- Mode 3 Plane (uniform): H = V = 0; a = 16 * (100 + 100) = 3200.
|
||||
* pred[y][x] = (3200 + 0 + 0 + 16) >> 5 = 3216 >> 5 = 100. */
|
||||
{
|
||||
uint8_t buf[ROWS][STRIDE];
|
||||
int t[8], l[8];
|
||||
for (int i = 0; i < 8; i++) { t[i] = 100; l[i] = 100; }
|
||||
set_ctx(buf, 100, t, l);
|
||||
daedalus_h264_pred_chroma8x8_plane_ref(&buf[1][1], STRIDE);
|
||||
uint8_t exp[8][8];
|
||||
for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) exp[r][c] = 100;
|
||||
fail |= check_per_cell(buf, "Plane uniform (mode 3)", exp);
|
||||
}
|
||||
|
||||
/* --- Mode 3 Plane gradient sanity ---
|
||||
* t = 0..7, l = 0..7, tl = 0.
|
||||
* H = 1*(t[4]-t[2]) + 2*(t[5]-t[1]) + 3*(t[6]-t[0]) + 4*(t[7]-tl)
|
||||
* = 1*(4-2) + 2*(5-1) + 3*(6-0) + 4*(7-0)
|
||||
* = 2 + 8 + 18 + 28 = 56
|
||||
* V = same shape on left = 56
|
||||
* b = (34*56 + 32) >> 6 = 1936 >> 6 = 30
|
||||
* c = 30
|
||||
* a = 16 * (l[7] + t[7]) = 16 * (7 + 7) = 224
|
||||
*
|
||||
* pred[0][0] = (224 + 30*(-3) + 30*(-3) + 16) >> 5
|
||||
* = (224 - 90 - 90 + 16) >> 5
|
||||
* = 60 >> 5 = 1
|
||||
* pred[7][7] = (224 + 30*4 + 30*4 + 16) >> 5
|
||||
* = (224 + 120 + 120 + 16) >> 5
|
||||
* = 480 >> 5 = 15
|
||||
* Spot-check those two corners. */
|
||||
{
|
||||
uint8_t buf[ROWS][STRIDE];
|
||||
int t[8], l[8];
|
||||
for (int i = 0; i < 8; i++) { t[i] = i; l[i] = i; }
|
||||
set_ctx(buf, 0, t, l);
|
||||
daedalus_h264_pred_chroma8x8_plane_ref(&buf[1][1], STRIDE);
|
||||
uint8_t tl_actual = buf[1 + 0][1 + 0];
|
||||
uint8_t br_actual = buf[1 + 7][1 + 7];
|
||||
int spot_fail = 0;
|
||||
if (tl_actual != 1) { fprintf(stderr, "Plane gradient pred[0][0] = %u, expected 1\n", tl_actual); spot_fail = 1; }
|
||||
if (br_actual != 15) { fprintf(stderr, "Plane gradient pred[7][7] = %u, expected 15\n", br_actual); spot_fail = 1; }
|
||||
if (!spot_fail) printf(" %-30s PASS (corners 1, 15)\n", "Plane gradient (mode 3)");
|
||||
else printf(" %-30s FAIL\n", "Plane gradient (mode 3)");
|
||||
fail |= spot_fail;
|
||||
}
|
||||
|
||||
if (fail == 0) printf("\nALL Intra_8x8 chroma mode references PASS\n");
|
||||
else fprintf(stderr, "\n%d test(s) FAILED\n", fail);
|
||||
return fail ? 1 : 0;
|
||||
}
|
||||
Reference in New Issue
Block a user