From df9e1c9d78d2189609223d7554208ff02947f244 Mon Sep 17 00:00:00 2001 From: claude-noether Date: Mon, 25 May 2026 14:53:37 +0200 Subject: [PATCH] h264: promote Intra_4x4 luma prediction (9 modes) to public API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #12 added the 9 Intra_4x4 luma intra prediction modes as test-only spec references in tests/. This PR promotes them to public src/ symbols so consumers (the eventual marfrit-packages substitution-arc patch 0014) can link against them. Moved: tests/h264_intra_pred_4x4_ref.c → src/h264_intra_pred_4x4.c Renamed: daedalus_h264_pred_4x4__ref → daedalus_h264_pred_4x4_ (9 functions: vertical/horizontal/dc/ddl/ddr/vr/hd/vl/hu) The src/ implementation is byte-for-byte the same code as the test-only ref; this PR is plain plumbing. The test binary now links against daedalus_core to pull in the public symbols (instead of compiling the ref file directly), exercising the path that real consumers will use. Same promotion shape as PR #25 (chroma DC Hadamard). Verified on hertz: $ ./build/test_intra_pred_4x4 Vertical (mode 0) PASS Horizontal (mode 1) PASS DC (mode 2) PASS DiagDownLeft (mode 3) PASS DiagDownRight (mode 4) PASS VerticalRight (mode 5) PASS HorizontalDown (mode 6) PASS VerticalLeft (mode 7) PASS HorizontalUp (mode 8) PASS VR asym (sanity) PASS ALL 10 intra-4x4 mode references PASS $ nm -g build/libdaedalus_core.a | grep "T daedalus_h264_pred_4x4" (9 symbols exported) Follow-ups (same promotion pattern, can land in parallel): - Intra_16x16 luma (4 modes, PR #13) - Intra_8x8 chroma (4 modes, PR #14) - Intra_8x8 luma (9 modes, PRs #21 + #22) Once all 26 intra modes are in the public API, the marfrit-packages substitution arc can route H264PredContext's pred function pointer tables through daedalus alongside the IDCT / deblock / qpel / DC Hadamard substitutions already in place. --- CMakeLists.txt | 14 +++---- include/daedalus.h | 27 +++++++++++++ .../h264_intra_pred_4x4.c | 18 ++++----- tests/test_intra_pred_4x4.c | 38 +++++++++---------- 4 files changed, 62 insertions(+), 35 deletions(-) rename tests/h264_intra_pred_4x4_ref.c => src/h264_intra_pred_4x4.c (92%) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb07a13..1f10f1b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -392,6 +392,7 @@ endif() add_library(daedalus_core STATIC src/daedalus_core.c src/h264_chroma_dc.c + src/h264_intra_pred_4x4.c src/v3d_runner.c ${FFASM_SOURCES} ${FFASM_LPF_SOURCES} @@ -538,13 +539,12 @@ add_executable(test_api_opportunistic_qpu tests/test_api_opportunistic_qpu.c) target_link_libraries(test_api_opportunistic_qpu PRIVATE daedalus_core) target_compile_options(test_api_opportunistic_qpu PRIVATE -O2) -# H.264 Intra_4x4 luma prediction (9 modes) — reference + tests. -# Pure CPU + spec-derived; no daedalus_core dependency yet (this is -# the bit-exact gate for the eventual shader / dispatch wiring). -add_executable(test_intra_pred_4x4 - tests/test_intra_pred_4x4.c - tests/h264_intra_pred_4x4_ref.c -) +# H.264 Intra_4x4 luma prediction (9 modes) — public src primitives. +# The bodies now live in src/h264_intra_pred_4x4.c (linked into +# daedalus_core for use by libavcodec.so substitution-arc consumers). +# This test exercises the public symbols. +add_executable(test_intra_pred_4x4 tests/test_intra_pred_4x4.c) +target_link_libraries(test_intra_pred_4x4 PRIVATE daedalus_core) target_compile_options(test_intra_pred_4x4 PRIVATE -O2) # H.264 Intra_16x16 luma prediction (4 modes: V, H, DC, Plane) — diff --git a/include/daedalus.h b/include/daedalus.h index 0a565d2..cd76602 100644 --- a/include/daedalus.h +++ b/include/daedalus.h @@ -559,6 +559,33 @@ DECLARE_QPEL_AVG(avg_mc33) * ----------------------------------------------------------------- */ void daedalus_h264_chroma_dc_hadamard_2x2(int16_t c[4]); +/* ------------------------------------------------------------------- + * H.264 Intra_4x4 luma prediction (per H.264 §8.3.1.4). 9 modes. + * + * Pure CPU primitives — each is a small straightforward fill of a + * 4x4 output block from neighbour pixels in the same buffer. No + * substrate-dispatch wrapper (the work is too small to amortise). + * + * FFmpeg-style interface: `dst` at row 0 col 0 of the 4x4 output. + * Reads top-left at dst[-stride-1], top at dst[-stride..-stride+7] + * (top-right for DDL/VL), and left at dst[r*stride - 1] for r=0..3. + * Caller must ensure all 13 neighbour bytes are valid (interior-MB + * assumption — H.264 availability fallback handled at caller). + * + * Bit-exact validated against tests/test_intra_pred_4x4.c (10-case + * spec-derived test suite including the asymmetric Vertical_Right + * 16-cell hand-derived case; see fourier PR #12). + * ----------------------------------------------------------------- */ +void daedalus_h264_pred_4x4_vertical (uint8_t *dst, ptrdiff_t stride); +void daedalus_h264_pred_4x4_horizontal(uint8_t *dst, ptrdiff_t stride); +void daedalus_h264_pred_4x4_dc (uint8_t *dst, ptrdiff_t stride); +void daedalus_h264_pred_4x4_ddl (uint8_t *dst, ptrdiff_t stride); +void daedalus_h264_pred_4x4_ddr (uint8_t *dst, ptrdiff_t stride); +void daedalus_h264_pred_4x4_vr (uint8_t *dst, ptrdiff_t stride); +void daedalus_h264_pred_4x4_hd (uint8_t *dst, ptrdiff_t stride); +void daedalus_h264_pred_4x4_vl (uint8_t *dst, ptrdiff_t stride); +void daedalus_h264_pred_4x4_hu (uint8_t *dst, ptrdiff_t stride); + /* ------------------------------------------------------------------- * Recipe query — what does the API recommend for each kernel? * ----------------------------------------------------------------- */ diff --git a/tests/h264_intra_pred_4x4_ref.c b/src/h264_intra_pred_4x4.c similarity index 92% rename from tests/h264_intra_pred_4x4_ref.c rename to src/h264_intra_pred_4x4.c index 6cec9ba..bb1db7d 100644 --- a/tests/h264_intra_pred_4x4_ref.c +++ b/src/h264_intra_pred_4x4.c @@ -52,7 +52,7 @@ static inline uint8_t avg2(int a, int b) } /* Mode 0 — Vertical: each col = top[col]. */ -void daedalus_h264_pred_4x4_vertical_ref(uint8_t *dst, ptrdiff_t stride) +void daedalus_h264_pred_4x4_vertical(uint8_t *dst, ptrdiff_t stride) { const uint8_t *top = dst - stride; for (int r = 0; r < 4; r++) { @@ -61,7 +61,7 @@ void daedalus_h264_pred_4x4_vertical_ref(uint8_t *dst, ptrdiff_t stride) } /* Mode 1 — Horizontal: each row = left[row]. */ -void daedalus_h264_pred_4x4_horizontal_ref(uint8_t *dst, ptrdiff_t stride) +void daedalus_h264_pred_4x4_horizontal(uint8_t *dst, ptrdiff_t stride) { for (int r = 0; r < 4; r++) { uint8_t l = dst[r * stride - 1]; @@ -70,7 +70,7 @@ void daedalus_h264_pred_4x4_horizontal_ref(uint8_t *dst, ptrdiff_t stride) } /* Mode 2 — DC: mean of top 4 + left 4, broadcast. */ -void daedalus_h264_pred_4x4_dc_ref(uint8_t *dst, ptrdiff_t stride) +void daedalus_h264_pred_4x4_dc(uint8_t *dst, ptrdiff_t stride) { const uint8_t *top = dst - stride; int sum = 4; /* rounding for ((sum + 4) >> 3) */ @@ -82,7 +82,7 @@ void daedalus_h264_pred_4x4_dc_ref(uint8_t *dst, ptrdiff_t stride) } /* Mode 3 — Diagonal_Down_Left. Uses top[0..7] (incl. top-right). */ -void daedalus_h264_pred_4x4_ddl_ref(uint8_t *dst, ptrdiff_t stride) +void daedalus_h264_pred_4x4_ddl(uint8_t *dst, ptrdiff_t stride) { const uint8_t *top = dst - stride; int t0 = top[0], t1 = top[1], t2 = top[2], t3 = top[3]; @@ -102,7 +102,7 @@ void daedalus_h264_pred_4x4_ddl_ref(uint8_t *dst, ptrdiff_t stride) } /* Mode 4 — Diagonal_Down_Right. Uses top-left + top[0..3] + left[0..3]. */ -void daedalus_h264_pred_4x4_ddr_ref(uint8_t *dst, ptrdiff_t stride) +void daedalus_h264_pred_4x4_ddr(uint8_t *dst, ptrdiff_t stride) { int tl = dst[-stride - 1]; int t0 = dst[-stride + 0], t1 = dst[-stride + 1]; @@ -123,7 +123,7 @@ void daedalus_h264_pred_4x4_ddr_ref(uint8_t *dst, ptrdiff_t stride) } /* Mode 5 — Vertical_Right. */ -void daedalus_h264_pred_4x4_vr_ref(uint8_t *dst, ptrdiff_t stride) +void daedalus_h264_pred_4x4_vr(uint8_t *dst, ptrdiff_t stride) { int tl = dst[-stride - 1]; int t0 = dst[-stride + 0], t1 = dst[-stride + 1]; @@ -153,7 +153,7 @@ void daedalus_h264_pred_4x4_vr_ref(uint8_t *dst, ptrdiff_t stride) } /* Mode 6 — Horizontal_Down. */ -void daedalus_h264_pred_4x4_hd_ref(uint8_t *dst, ptrdiff_t stride) +void daedalus_h264_pred_4x4_hd(uint8_t *dst, ptrdiff_t stride) { int tl = dst[-stride - 1]; int t0 = dst[-stride + 0], t1 = dst[-stride + 1], t2 = dst[-stride + 2]; @@ -182,7 +182,7 @@ void daedalus_h264_pred_4x4_hd_ref(uint8_t *dst, ptrdiff_t stride) } /* Mode 7 — Vertical_Left. Uses top[0..7]. */ -void daedalus_h264_pred_4x4_vl_ref(uint8_t *dst, ptrdiff_t stride) +void daedalus_h264_pred_4x4_vl(uint8_t *dst, ptrdiff_t stride) { const uint8_t *top = dst - stride; int t0=top[0], t1=top[1], t2=top[2], t3=top[3]; @@ -211,7 +211,7 @@ void daedalus_h264_pred_4x4_vl_ref(uint8_t *dst, ptrdiff_t stride) } /* Mode 8 — Horizontal_Up. Uses left[0..3] only. */ -void daedalus_h264_pred_4x4_hu_ref(uint8_t *dst, ptrdiff_t stride) +void daedalus_h264_pred_4x4_hu(uint8_t *dst, ptrdiff_t stride) { int l0 = dst[ 0*stride - 1], l1 = dst[ 1*stride - 1]; int l2 = dst[ 2*stride - 1], l3 = dst[ 3*stride - 1]; diff --git a/tests/test_intra_pred_4x4.c b/tests/test_intra_pred_4x4.c index 2a44c1d..07afe10 100644 --- a/tests/test_intra_pred_4x4.c +++ b/tests/test_intra_pred_4x4.c @@ -22,15 +22,15 @@ #include #include -extern void daedalus_h264_pred_4x4_vertical_ref(uint8_t *dst, ptrdiff_t stride); -extern void daedalus_h264_pred_4x4_horizontal_ref(uint8_t *dst, ptrdiff_t stride); -extern void daedalus_h264_pred_4x4_dc_ref(uint8_t *dst, ptrdiff_t stride); -extern void daedalus_h264_pred_4x4_ddl_ref(uint8_t *dst, ptrdiff_t stride); -extern void daedalus_h264_pred_4x4_ddr_ref(uint8_t *dst, ptrdiff_t stride); -extern void daedalus_h264_pred_4x4_vr_ref(uint8_t *dst, ptrdiff_t stride); -extern void daedalus_h264_pred_4x4_hd_ref(uint8_t *dst, ptrdiff_t stride); -extern void daedalus_h264_pred_4x4_vl_ref(uint8_t *dst, ptrdiff_t stride); -extern void daedalus_h264_pred_4x4_hu_ref(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_4x4_vertical(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_4x4_horizontal(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_4x4_dc(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_4x4_ddl(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_4x4_ddr(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_4x4_vr(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_4x4_hd(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_4x4_vl(uint8_t *dst, ptrdiff_t stride); +extern void daedalus_h264_pred_4x4_hu(uint8_t *dst, ptrdiff_t stride); #define STRIDE 9 typedef void (*pred_fn)(uint8_t *dst, ptrdiff_t stride); @@ -82,7 +82,7 @@ int main(void) int t[8] = { 10, 20, 30, 40, 0, 0, 0, 0 }; int l[4] = { 0, 0, 0, 0 }; set_ctx(buf, tl, t, l); - daedalus_h264_pred_4x4_vertical_ref(&buf[1][1], STRIDE); + daedalus_h264_pred_4x4_vertical(&buf[1][1], STRIDE); uint8_t exp[4][4] = { {10,20,30,40}, {10,20,30,40}, {10,20,30,40}, {10,20,30,40} }; @@ -95,7 +95,7 @@ int main(void) int t[8] = { 0,0,0,0, 0,0,0,0 }; int l[4] = { 50, 60, 70, 80 }; set_ctx(buf, 0, t, l); - daedalus_h264_pred_4x4_horizontal_ref(&buf[1][1], STRIDE); + daedalus_h264_pred_4x4_horizontal(&buf[1][1], STRIDE); uint8_t exp[4][4] = { {50,50,50,50}, {60,60,60,60}, {70,70,70,70}, {80,80,80,80} }; @@ -110,7 +110,7 @@ int main(void) int t[8] = { 1,1,1,1, 0,0,0,0 }; int l[4] = { 3,3,3,3 }; set_ctx(buf, 99, t, l); /* tl unused for DC */ - daedalus_h264_pred_4x4_dc_ref(&buf[1][1], STRIDE); + daedalus_h264_pred_4x4_dc(&buf[1][1], STRIDE); uint8_t exp[4][4] = { {2,2,2,2}, {2,2,2,2}, {2,2,2,2}, {2,2,2,2} }; @@ -125,7 +125,7 @@ int main(void) int t[8] = { 100,100,100,100, 100,100,100,100 }; int l[4] = { 0,0,0,0 }; set_ctx(buf, 0, t, l); - daedalus_h264_pred_4x4_ddl_ref(&buf[1][1], STRIDE); + daedalus_h264_pred_4x4_ddl(&buf[1][1], STRIDE); uint8_t exp[4][4] = { {100,100,100,100}, {100,100,100,100}, {100,100,100,100}, {100,100,100,100} @@ -140,7 +140,7 @@ int main(void) int t[8] = { 200,200,200,200, 0,0,0,0 }; int l[4] = { 200,200,200,200 }; set_ctx(buf, 200, t, l); - daedalus_h264_pred_4x4_ddr_ref(&buf[1][1], STRIDE); + daedalus_h264_pred_4x4_ddr(&buf[1][1], STRIDE); uint8_t exp[4][4] = { {200,200,200,200}, {200,200,200,200}, {200,200,200,200}, {200,200,200,200} @@ -155,7 +155,7 @@ int main(void) int t[8] = { 80,80,80,80, 0,0,0,0 }; int l[4] = { 80,80,80,80 }; set_ctx(buf, 80, t, l); - daedalus_h264_pred_4x4_vr_ref(&buf[1][1], STRIDE); + daedalus_h264_pred_4x4_vr(&buf[1][1], STRIDE); uint8_t exp[4][4] = { {80,80,80,80}, {80,80,80,80}, {80,80,80,80}, {80,80,80,80} }; @@ -168,7 +168,7 @@ int main(void) int t[8] = { 120,120,120,120, 0,0,0,0 }; int l[4] = { 120,120,120,120 }; set_ctx(buf, 120, t, l); - daedalus_h264_pred_4x4_hd_ref(&buf[1][1], STRIDE); + daedalus_h264_pred_4x4_hd(&buf[1][1], STRIDE); uint8_t exp[4][4] = { {120,120,120,120}, {120,120,120,120}, {120,120,120,120}, {120,120,120,120} @@ -182,7 +182,7 @@ int main(void) int t[8] = { 64,64,64,64, 64,64,64,64 }; int l[4] = { 0,0,0,0 }; set_ctx(buf, 0, t, l); - daedalus_h264_pred_4x4_vl_ref(&buf[1][1], STRIDE); + daedalus_h264_pred_4x4_vl(&buf[1][1], STRIDE); uint8_t exp[4][4] = { {64,64,64,64}, {64,64,64,64}, {64,64,64,64}, {64,64,64,64} }; @@ -195,7 +195,7 @@ int main(void) int t[8] = { 0,0,0,0, 0,0,0,0 }; int l[4] = { 200,200,200,200 }; set_ctx(buf, 0, t, l); - daedalus_h264_pred_4x4_hu_ref(&buf[1][1], STRIDE); + daedalus_h264_pred_4x4_hu(&buf[1][1], STRIDE); uint8_t exp[4][4] = { {200,200,200,200}, {200,200,200,200}, {200,200,200,200}, {200,200,200,200} @@ -230,7 +230,7 @@ int main(void) int t[8] = { 10,20,30,40, 0,0,0,0 }; int l[4] = { 50,60,70,0 }; set_ctx(buf, 5, t, l); - daedalus_h264_pred_4x4_vr_ref(&buf[1][1], STRIDE); + daedalus_h264_pred_4x4_vr(&buf[1][1], STRIDE); uint8_t exp[4][4] = { { 8,15,25,35}, {18,11,20,30},