h264: expose chroma DC 2x2 Hadamard as public API
PR #23 added the Hadamard as a test-only spec reference; this PR promotes it to a public symbol in src/ so consumers (the eventual marfrit-packages substitution-arc patch 0011) can link against it. New: void daedalus_h264_chroma_dc_hadamard_2x2(int16_t c[4]); — operates in-place on 4 int16, no QP-dependent scaling (caller composes that themselves per §8.5.11.2). The src/ implementation is byte-for-byte identical to the test-only ref in tests/h264_chroma_dc_hadamard_ref.c (kept as a separate spec-validation copy). A new "public API parity" test case verifies the two produce identical output for a non-trivial input. Pure CPU primitive — no substrate-dispatch wrapper because the work is 4 adds + 4 subs; the substrate machinery would cost more than the kernel itself. Verified on hertz: $ ./build/test_chroma_dc_hadamard all-uniform 5 PASS col gradient [0,10,0,10] PASS row gradient [0,0,10,10] PASS anti-diagonal [10,0,0,10] PASS asymmetric [1,2,3,4] PASS sign-alternating [-5,5,-5,5] PASS double-Hadamard = 4*orig PASS public API parity vs _ref PASS ALL chroma DC Hadamard tests PASS $ nm -g build/libdaedalus_core.a | grep chroma_dc_hadamard 0000000000000000 T daedalus_h264_chroma_dc_hadamard_2x2 Unblocks marfrit-packages 0011 (substituting H264DSPContext.chroma_dc_dequant_idct, which composes the Hadamard + qmul scaling).
This commit is contained in:
@@ -391,6 +391,7 @@ endif()
|
|||||||
|
|
||||||
add_library(daedalus_core STATIC
|
add_library(daedalus_core STATIC
|
||||||
src/daedalus_core.c
|
src/daedalus_core.c
|
||||||
|
src/h264_chroma_dc.c
|
||||||
src/v3d_runner.c
|
src/v3d_runner.c
|
||||||
${FFASM_SOURCES}
|
${FFASM_SOURCES}
|
||||||
${FFASM_LPF_SOURCES}
|
${FFASM_LPF_SOURCES}
|
||||||
@@ -577,6 +578,9 @@ add_executable(test_chroma_dc_hadamard
|
|||||||
tests/test_chroma_dc_hadamard.c
|
tests/test_chroma_dc_hadamard.c
|
||||||
tests/h264_chroma_dc_hadamard_ref.c
|
tests/h264_chroma_dc_hadamard_ref.c
|
||||||
)
|
)
|
||||||
|
# Links daedalus_core to pull in the public daedalus_h264_chroma_dc_hadamard_2x2
|
||||||
|
# symbol (for the public-API parity test added in this PR).
|
||||||
|
target_link_libraries(test_chroma_dc_hadamard PRIVATE daedalus_core)
|
||||||
target_compile_options(test_chroma_dc_hadamard PRIVATE -O2)
|
target_compile_options(test_chroma_dc_hadamard PRIVATE -O2)
|
||||||
|
|
||||||
# H.264 primitives latency benchmark (NEON CPU baseline).
|
# H.264 primitives latency benchmark (NEON CPU baseline).
|
||||||
|
|||||||
@@ -544,6 +544,21 @@ DECLARE_QPEL_AVG(avg_mc33)
|
|||||||
|
|
||||||
#undef DECLARE_QPEL_AVG
|
#undef DECLARE_QPEL_AVG
|
||||||
|
|
||||||
|
/* -------------------------------------------------------------------
|
||||||
|
* H.264 chroma DC 2x2 Hadamard pre-pass (per H.264 §8.5.11.1).
|
||||||
|
*
|
||||||
|
* Operates in-place on 4 int16 (the DC coefficients of an MB's
|
||||||
|
* chroma 4x4 AC blocks). Pure CPU primitive — no substrate
|
||||||
|
* dispatch wrapper because the work is 4 adds + 4 subs. Callers
|
||||||
|
* compose with QP-dependent scaling themselves; the scale shape
|
||||||
|
* varies by slice/PPS chroma_qp offset context.
|
||||||
|
*
|
||||||
|
* Bit-exact validated against tests/h264_chroma_dc_hadamard_ref.c
|
||||||
|
* (7-case spec-derived test suite including the H·H = 4·I algebraic
|
||||||
|
* invariant; see PR #23).
|
||||||
|
* ----------------------------------------------------------------- */
|
||||||
|
void daedalus_h264_chroma_dc_hadamard_2x2(int16_t c[4]);
|
||||||
|
|
||||||
/* -------------------------------------------------------------------
|
/* -------------------------------------------------------------------
|
||||||
* Recipe query — what does the API recommend for each kernel?
|
* Recipe query — what does the API recommend for each kernel?
|
||||||
* ----------------------------------------------------------------- */
|
* ----------------------------------------------------------------- */
|
||||||
|
|||||||
@@ -0,0 +1,34 @@
|
|||||||
|
/* SPDX-License-Identifier: BSD-2-Clause */
|
||||||
|
/*
|
||||||
|
* H.264 chroma DC 2x2 Hadamard pre-pass (public, in-tree CPU).
|
||||||
|
*
|
||||||
|
* The 4 DC coefficients of an MB's chroma 4x4 AC blocks go through
|
||||||
|
* this 2x2 Hadamard before quant-scaling and re-injection into the
|
||||||
|
* AC blocks' [0,0] coefficient. Algorithm per H.264 §8.5.11.1.
|
||||||
|
*
|
||||||
|
* Pure CPU primitive — there's no substrate-dispatch wrapper because
|
||||||
|
* the work is 4 adds + 4 subs. Callers compose with QP-dependent
|
||||||
|
* scaling themselves (the scale shape varies by slice/PPS chroma_qp
|
||||||
|
* offset context and shouldn't be baked into the kernel).
|
||||||
|
*
|
||||||
|
* Bit-exact validated against tests/h264_chroma_dc_hadamard_ref.c
|
||||||
|
* (7-case spec-derived test suite including the H·H = 4·I algebraic
|
||||||
|
* invariant; see PR #23). Same algorithm; this is the public
|
||||||
|
* src-tree copy.
|
||||||
|
*/
|
||||||
|
#include "daedalus.h"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
void daedalus_h264_chroma_dc_hadamard_2x2(int16_t c[4])
|
||||||
|
{
|
||||||
|
int t0 = c[0] + c[1];
|
||||||
|
int t1 = c[0] - c[1];
|
||||||
|
int t2 = c[2] + c[3];
|
||||||
|
int t3 = c[2] - c[3];
|
||||||
|
|
||||||
|
c[0] = (int16_t)(t0 + t2); /* f[0,0] = sum of all 4 */
|
||||||
|
c[1] = (int16_t)(t1 + t3); /* f[0,1] = col-difference */
|
||||||
|
c[2] = (int16_t)(t0 - t2); /* f[1,0] = row-difference */
|
||||||
|
c[3] = (int16_t)(t1 - t3); /* f[1,1] = anti-diagonal */
|
||||||
|
}
|
||||||
@@ -12,6 +12,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
extern void daedalus_h264_chroma_dc_hadamard_2x2_ref(int16_t c[4]);
|
extern void daedalus_h264_chroma_dc_hadamard_2x2_ref(int16_t c[4]);
|
||||||
|
extern void daedalus_h264_chroma_dc_hadamard_2x2(int16_t c[4]); /* public API */
|
||||||
|
|
||||||
static int check(const char *name, int16_t in[4], int16_t expect[4])
|
static int check(const char *name, int16_t in[4], int16_t expect[4])
|
||||||
{
|
{
|
||||||
@@ -112,6 +113,23 @@ int main(void)
|
|||||||
fail |= local_fail;
|
fail |= local_fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Test 8: public API parity. The public symbol must produce
|
||||||
|
* byte-identical output to the test-only ref for the same input.
|
||||||
|
* If the src/ Hadamard ever drifts from the spec, this catches it. */
|
||||||
|
{
|
||||||
|
int16_t input[4] = { 7, -11, 23, -42 };
|
||||||
|
int16_t a[4], b[4];
|
||||||
|
memcpy(a, input, sizeof(a));
|
||||||
|
memcpy(b, input, sizeof(b));
|
||||||
|
daedalus_h264_chroma_dc_hadamard_2x2_ref(a);
|
||||||
|
daedalus_h264_chroma_dc_hadamard_2x2(b);
|
||||||
|
int local_fail = 0;
|
||||||
|
for (int i = 0; i < 4; i++) if (a[i] != b[i]) local_fail = 1;
|
||||||
|
printf(" %-32s %s\n", "public API parity vs _ref",
|
||||||
|
local_fail ? "FAIL" : "PASS");
|
||||||
|
fail |= local_fail;
|
||||||
|
}
|
||||||
|
|
||||||
if (fail == 0) printf("\nALL chroma DC Hadamard tests PASS\n");
|
if (fail == 0) printf("\nALL chroma DC Hadamard tests PASS\n");
|
||||||
else fprintf(stderr, "\n%d test(s) FAILED\n", fail);
|
else fprintf(stderr, "\n%d test(s) FAILED\n", fail);
|
||||||
return fail ? 1 : 0;
|
return fail ? 1 : 0;
|
||||||
|
|||||||
Reference in New Issue
Block a user