Files
daedalus-fourier/src/h264_intra_pred_4x4.c
T
claude-noether df9e1c9d78 h264: promote Intra_4x4 luma prediction (9 modes) to public API
PR #12 added the 9 Intra_4x4 luma intra prediction modes as test-only
spec references in tests/.  This PR promotes them to public src/
symbols so consumers (the eventual marfrit-packages substitution-arc
patch 0014) can link against them.

  Moved: tests/h264_intra_pred_4x4_ref.c → src/h264_intra_pred_4x4.c
  Renamed: daedalus_h264_pred_4x4_<mode>_ref → daedalus_h264_pred_4x4_<mode>
           (9 functions: vertical/horizontal/dc/ddl/ddr/vr/hd/vl/hu)

The src/ implementation is byte-for-byte the same code as the
test-only ref; this PR is plain plumbing.  The test binary now
links against daedalus_core to pull in the public symbols (instead
of compiling the ref file directly), exercising the path that real
consumers will use.

Same promotion shape as PR #25 (chroma DC Hadamard).

Verified on hertz:

  $ ./build/test_intra_pred_4x4
    Vertical (mode 0)          PASS
    Horizontal (mode 1)        PASS
    DC (mode 2)                PASS
    DiagDownLeft (mode 3)      PASS
    DiagDownRight (mode 4)     PASS
    VerticalRight (mode 5)     PASS
    HorizontalDown (mode 6)    PASS
    VerticalLeft (mode 7)      PASS
    HorizontalUp (mode 8)      PASS
    VR asym (sanity)           PASS

  ALL 10 intra-4x4 mode references PASS

  $ nm -g build/libdaedalus_core.a | grep "T daedalus_h264_pred_4x4"
  (9 symbols exported)

Follow-ups (same promotion pattern, can land in parallel):
  - Intra_16x16 luma (4 modes, PR #13)
  - Intra_8x8 chroma (4 modes, PR #14)
  - Intra_8x8 luma (9 modes, PRs #21 + #22)

Once all 26 intra modes are in the public API, the marfrit-packages
substitution arc can route H264PredContext's pred function pointer
tables through daedalus alongside the IDCT / deblock / qpel / DC
Hadamard substitutions already in place.
2026-05-25 14:53:37 +02:00

239 lines
8.1 KiB
C

/*
* Standalone bit-exact C reference for H.264 luma Intra_4x4
* prediction modes (per H.264 spec §8.3.1.4). All 9 modes.
*
* Mode index → name (per H.264 Table 8-2):
* 0 = Vertical
* 1 = Horizontal
* 2 = DC
* 3 = Diagonal_Down_Left
* 4 = Diagonal_Down_Right
* 5 = Vertical_Right
* 6 = Horizontal_Down
* 7 = Vertical_Left
* 8 = Horizontal_Up
*
* Calling convention matches FFmpeg's h264pred:
* pred_4x4_<mode>(uint8_t *dst, ptrdiff_t stride)
*
* `dst` points at row 0, col 0 of the 4x4 output block. Neighbour
* pixels come from the already-decoded surrounding pixels in the same
* buffer:
* top-left = dst[-stride - 1]
* top[0..3] = dst[-stride + 0 .. -stride + 3]
* top-right = dst[-stride + 4 .. -stride + 7] (DDL / VL only)
* left[0..3] = dst[ 0*stride - 1 .. 3*stride - 1]
*
* AVAILABILITY: this reference assumes ALL neighbours are available
* (the "interior MB" case). The H.264 spec defines fallback behaviour
* for unavailable neighbours (e.g. DC averages only the available
* side, top-right substitution from top[3] for DDL/VL near the right
* frame edge); those branches are NOT modelled here. Tests must
* exercise the kernel with all 13 neighbour bytes valid. The eventual
* libavcodec intercept handles availability before calling.
*
* License: BSD-2-Clause for the reference + tests; the underlying
* algorithm is from H.264/ITU-T H.264 (2003) and AVC standards, free
* to implement.
*/
#include <stdint.h>
#include <stddef.h>
/* Helper: 3-tap weighted average ((a + 2*b + c + 2) >> 2). */
static inline uint8_t avg3(int a, int b, int c)
{
return (uint8_t)((a + 2*b + c + 2) >> 2);
}
/* Helper: 2-tap mean ((a + b + 1) >> 1). */
static inline uint8_t avg2(int a, int b)
{
return (uint8_t)((a + b + 1) >> 1);
}
/* Mode 0 — Vertical: each col = top[col]. */
void daedalus_h264_pred_4x4_vertical(uint8_t *dst, ptrdiff_t stride)
{
const uint8_t *top = dst - stride;
for (int r = 0; r < 4; r++) {
for (int c = 0; c < 4; c++) dst[r * stride + c] = top[c];
}
}
/* Mode 1 — Horizontal: each row = left[row]. */
void daedalus_h264_pred_4x4_horizontal(uint8_t *dst, ptrdiff_t stride)
{
for (int r = 0; r < 4; r++) {
uint8_t l = dst[r * stride - 1];
for (int c = 0; c < 4; c++) dst[r * stride + c] = l;
}
}
/* Mode 2 — DC: mean of top 4 + left 4, broadcast. */
void daedalus_h264_pred_4x4_dc(uint8_t *dst, ptrdiff_t stride)
{
const uint8_t *top = dst - stride;
int sum = 4; /* rounding for ((sum + 4) >> 3) */
for (int i = 0; i < 4; i++) sum += top[i];
for (int i = 0; i < 4; i++) sum += dst[i * stride - 1];
uint8_t v = (uint8_t)(sum >> 3);
for (int r = 0; r < 4; r++)
for (int c = 0; c < 4; c++) dst[r * stride + c] = v;
}
/* Mode 3 — Diagonal_Down_Left. Uses top[0..7] (incl. top-right). */
void daedalus_h264_pred_4x4_ddl(uint8_t *dst, ptrdiff_t stride)
{
const uint8_t *top = dst - stride;
int t0 = top[0], t1 = top[1], t2 = top[2], t3 = top[3];
int t4 = top[4], t5 = top[5], t6 = top[6], t7 = top[7];
/* zz[7] = top filtered with 3-tap; spec table 8-7. */
uint8_t zz[7];
zz[0] = avg3(t0, t1, t2);
zz[1] = avg3(t1, t2, t3);
zz[2] = avg3(t2, t3, t4);
zz[3] = avg3(t3, t4, t5);
zz[4] = avg3(t4, t5, t6);
zz[5] = avg3(t5, t6, t7);
zz[6] = avg3(t6, t7, t7); /* spec: t7 doubled at the boundary */
/* dst[r][c] = zz[c + r] */
for (int r = 0; r < 4; r++)
for (int c = 0; c < 4; c++) dst[r * stride + c] = zz[c + r];
}
/* Mode 4 — Diagonal_Down_Right. Uses top-left + top[0..3] + left[0..3]. */
void daedalus_h264_pred_4x4_ddr(uint8_t *dst, ptrdiff_t stride)
{
int tl = dst[-stride - 1];
int t0 = dst[-stride + 0], t1 = dst[-stride + 1];
int t2 = dst[-stride + 2], t3 = dst[-stride + 3];
int l0 = dst[ 0*stride - 1], l1 = dst[ 1*stride - 1];
int l2 = dst[ 2*stride - 1], l3 = dst[ 3*stride - 1];
/* zz indexed by (col - row): -3..+3 */
uint8_t zz_m3 = avg3(l1, l2, l3);
uint8_t zz_m2 = avg3(l0, l1, l2);
uint8_t zz_m1 = avg3(tl, l0, l1);
uint8_t zz_p0 = avg3(l0, tl, t0);
uint8_t zz_p1 = avg3(tl, t0, t1);
uint8_t zz_p2 = avg3(t0, t1, t2);
uint8_t zz_p3 = avg3(t1, t2, t3);
uint8_t zz[7] = { zz_m3, zz_m2, zz_m1, zz_p0, zz_p1, zz_p2, zz_p3 };
for (int r = 0; r < 4; r++)
for (int c = 0; c < 4; c++) dst[r * stride + c] = zz[(c - r) + 3];
}
/* Mode 5 — Vertical_Right. */
void daedalus_h264_pred_4x4_vr(uint8_t *dst, ptrdiff_t stride)
{
int tl = dst[-stride - 1];
int t0 = dst[-stride + 0], t1 = dst[-stride + 1];
int t2 = dst[-stride + 2], t3 = dst[-stride + 3];
int l0 = dst[ 0*stride - 1], l1 = dst[ 1*stride - 1];
int l2 = dst[ 2*stride - 1];
/* H.264 §8.3.1.4.6: two patterns based on (2c - r) parity. */
dst[0*stride + 0] = avg2(tl, t0);
dst[0*stride + 1] = avg2(t0, t1);
dst[0*stride + 2] = avg2(t1, t2);
dst[0*stride + 3] = avg2(t2, t3);
dst[1*stride + 0] = avg3(l0, tl, t0);
dst[1*stride + 1] = avg3(tl, t0, t1);
dst[1*stride + 2] = avg3(t0, t1, t2);
dst[1*stride + 3] = avg3(t1, t2, t3);
dst[2*stride + 0] = avg3(tl, l0, l1);
dst[2*stride + 1] = dst[0*stride + 0];
dst[2*stride + 2] = dst[0*stride + 1];
dst[2*stride + 3] = dst[0*stride + 2];
dst[3*stride + 0] = avg3(l0, l1, l2);
dst[3*stride + 1] = dst[1*stride + 0];
dst[3*stride + 2] = dst[1*stride + 1];
dst[3*stride + 3] = dst[1*stride + 2];
}
/* Mode 6 — Horizontal_Down. */
void daedalus_h264_pred_4x4_hd(uint8_t *dst, ptrdiff_t stride)
{
int tl = dst[-stride - 1];
int t0 = dst[-stride + 0], t1 = dst[-stride + 1], t2 = dst[-stride + 2];
int l0 = dst[ 0*stride - 1], l1 = dst[ 1*stride - 1];
int l2 = dst[ 2*stride - 1], l3 = dst[ 3*stride - 1];
dst[0*stride + 0] = avg2(tl, l0);
dst[0*stride + 1] = avg3(l0, tl, t0);
dst[0*stride + 2] = avg3(tl, t0, t1);
dst[0*stride + 3] = avg3(t0, t1, t2);
dst[1*stride + 0] = avg2(l0, l1);
dst[1*stride + 1] = avg3(tl, l0, l1);
dst[1*stride + 2] = dst[0*stride + 0];
dst[1*stride + 3] = dst[0*stride + 1];
dst[2*stride + 0] = avg2(l1, l2);
dst[2*stride + 1] = avg3(l0, l1, l2);
dst[2*stride + 2] = dst[1*stride + 0];
dst[2*stride + 3] = dst[1*stride + 1];
dst[3*stride + 0] = avg2(l2, l3);
dst[3*stride + 1] = avg3(l1, l2, l3);
dst[3*stride + 2] = dst[2*stride + 0];
dst[3*stride + 3] = dst[2*stride + 1];
}
/* Mode 7 — Vertical_Left. Uses top[0..7]. */
void daedalus_h264_pred_4x4_vl(uint8_t *dst, ptrdiff_t stride)
{
const uint8_t *top = dst - stride;
int t0=top[0], t1=top[1], t2=top[2], t3=top[3];
int t4=top[4], t5=top[5], t6=top[6], t7=top[7];
dst[0*stride + 0] = avg2(t0, t1);
dst[0*stride + 1] = avg2(t1, t2);
dst[0*stride + 2] = avg2(t2, t3);
dst[0*stride + 3] = avg2(t3, t4);
dst[1*stride + 0] = avg3(t0, t1, t2);
dst[1*stride + 1] = avg3(t1, t2, t3);
dst[1*stride + 2] = avg3(t2, t3, t4);
dst[1*stride + 3] = avg3(t3, t4, t5);
dst[2*stride + 0] = avg2(t1, t2);
dst[2*stride + 1] = avg2(t2, t3);
dst[2*stride + 2] = avg2(t3, t4);
dst[2*stride + 3] = avg2(t4, t5);
dst[3*stride + 0] = avg3(t1, t2, t3);
dst[3*stride + 1] = avg3(t2, t3, t4);
dst[3*stride + 2] = avg3(t3, t4, t5);
dst[3*stride + 3] = avg3(t4, t5, t6);
(void) t6; (void) t7; /* t6 used; t7 unused in 4x4 VL */
}
/* Mode 8 — Horizontal_Up. Uses left[0..3] only. */
void daedalus_h264_pred_4x4_hu(uint8_t *dst, ptrdiff_t stride)
{
int l0 = dst[ 0*stride - 1], l1 = dst[ 1*stride - 1];
int l2 = dst[ 2*stride - 1], l3 = dst[ 3*stride - 1];
dst[0*stride + 0] = avg2(l0, l1);
dst[0*stride + 1] = avg3(l0, l1, l2);
dst[0*stride + 2] = avg2(l1, l2);
dst[0*stride + 3] = avg3(l1, l2, l3);
dst[1*stride + 0] = avg2(l1, l2);
dst[1*stride + 1] = avg3(l1, l2, l3);
dst[1*stride + 2] = avg2(l2, l3);
dst[1*stride + 3] = avg3(l2, l3, l3);
dst[2*stride + 0] = avg2(l2, l3);
dst[2*stride + 1] = avg3(l2, l3, l3);
dst[2*stride + 2] = l3;
dst[2*stride + 3] = l3;
dst[3*stride + 0] = l3;
dst[3*stride + 1] = l3;
dst[3*stride + 2] = l3;
dst[3*stride + 3] = l3;
}