h264: qpel avg anchors (avg_mc20/02/22, biprediction support)
Begins the avg_ qpel buildout for B-slice biprediction. Each avg_
form computes the same half-pel formula as its put_ sibling, then
L2-averages the result with the existing dst contents — the caller
pre-loads dst with the list0 prediction; the avg_ call adds list1
per H.264 §8.4.2.3.1.
Scope (3 anchors, sets the pattern for the remaining 13 avg_
variants):
- 3 new kernel enums (AVG_MC20=31, AVG_MC02=32, AVG_MC22=33) → CPU.
- 3 NEON externs for the vendored ff_avg_h264_qpel8_{mc20,mc02,mc22}_neon.
- 3 CPU dispatches via existing DEFINE_QPEL_CPU_DISPATCH macro
(the macro is type-agnostic so it didn't need changes for avg_).
- 3 public dispatches via DEFINE_QPEL_DISPATCH macro.
- 3 recipe wrappers via DEFINE_QPEL_RECIPE macro.
- tests/h264_qpel8_avg_anchors_ref.c — per-cell helpers + L2 avg.
- Test harness: run_avg_qpel() seeds dst with random content so
the L2 averaging is actually exercised (not just put_-style
overwrite that would silently pass).
Verified on hertz:
$ ./build/test_api_h264 | tail -3
H.264 qpel avg_mc20: 2048/2048 bytes bit-exact (100.0000%)
H.264 qpel avg_mc02: 2048/2048 bytes bit-exact (100.0000%)
H.264 qpel avg_mc22: 2048/2048 bytes bit-exact (100.0000%)
All 3 anchors bit-exact PASS first try.
Why anchors only in this PR: the avg_ pattern is uniform across all
16 positions (each is just "put_ result + L2 with dst"). Landing
the anchors first confirms the macro pattern works for both put_
and avg_; the remaining 13 (avg_mc10/30/01/03 + avg_mc11..33) follow
the same template in a follow-up PR.
State of the qpel matrix after this PR:
put_ : 15 of 16 positions ✓ (mc00 is integer copy, no wrapper)
avg_ : 3 of 16 positions ✓ (mc20, mc02, mc22 anchors)
13 follow-up positions
This commit is contained in:
@@ -152,6 +152,9 @@ daedalus_substrate daedalus_recipe_substrate_for(daedalus_kernel k)
|
||||
case DAEDALUS_KERNEL_H264_QPEL_MC31: return DAEDALUS_SUBSTRATE_CPU; /* diagonal ¾¼ */
|
||||
case DAEDALUS_KERNEL_H264_QPEL_MC32: return DAEDALUS_SUBSTRATE_CPU; /* diagonal ¾½ */
|
||||
case DAEDALUS_KERNEL_H264_QPEL_MC33: return DAEDALUS_SUBSTRATE_CPU; /* diagonal ¾¾ */
|
||||
case DAEDALUS_KERNEL_H264_QPEL_AVG_MC20: return DAEDALUS_SUBSTRATE_CPU; /* biprediction anchors */
|
||||
case DAEDALUS_KERNEL_H264_QPEL_AVG_MC02: return DAEDALUS_SUBSTRATE_CPU;
|
||||
case DAEDALUS_KERNEL_H264_QPEL_AVG_MC22: return DAEDALUS_SUBSTRATE_CPU;
|
||||
}
|
||||
return DAEDALUS_SUBSTRATE_CPU;
|
||||
}
|
||||
@@ -212,6 +215,9 @@ extern void ff_put_h264_qpel8_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdif
|
||||
extern void ff_put_h264_qpel8_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
extern void ff_put_h264_qpel8_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
extern void ff_put_h264_qpel8_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
extern void ff_avg_h264_qpel8_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
extern void ff_avg_h264_qpel8_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
extern void ff_avg_h264_qpel8_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
|
||||
/* -------------------- CPU dispatch implementations -------------- */
|
||||
|
||||
@@ -493,6 +499,12 @@ DEFINE_QPEL_CPU_DISPATCH(mc31, ff_put_h264_qpel8_mc31_neon)
|
||||
DEFINE_QPEL_CPU_DISPATCH(mc32, ff_put_h264_qpel8_mc32_neon)
|
||||
DEFINE_QPEL_CPU_DISPATCH(mc33, ff_put_h264_qpel8_mc33_neon)
|
||||
|
||||
/* avg_ biprediction variants — same dispatch shape as put_, just
|
||||
* different NEON entry that L2-averages with the existing dst. */
|
||||
DEFINE_QPEL_CPU_DISPATCH(avg_mc20, ff_avg_h264_qpel8_mc20_neon)
|
||||
DEFINE_QPEL_CPU_DISPATCH(avg_mc02, ff_avg_h264_qpel8_mc02_neon)
|
||||
DEFINE_QPEL_CPU_DISPATCH(avg_mc22, ff_avg_h264_qpel8_mc22_neon)
|
||||
|
||||
#undef DEFINE_QPEL_CPU_DISPATCH
|
||||
|
||||
/* -------------------- IDCT QPU dispatch (cycle 1 v4 shader) ---- */
|
||||
@@ -1521,6 +1533,9 @@ DEFINE_QPEL_DISPATCH(mc23, DAEDALUS_KERNEL_H264_QPEL_MC23)
|
||||
DEFINE_QPEL_DISPATCH(mc31, DAEDALUS_KERNEL_H264_QPEL_MC31)
|
||||
DEFINE_QPEL_DISPATCH(mc32, DAEDALUS_KERNEL_H264_QPEL_MC32)
|
||||
DEFINE_QPEL_DISPATCH(mc33, DAEDALUS_KERNEL_H264_QPEL_MC33)
|
||||
DEFINE_QPEL_DISPATCH(avg_mc20, DAEDALUS_KERNEL_H264_QPEL_AVG_MC20)
|
||||
DEFINE_QPEL_DISPATCH(avg_mc02, DAEDALUS_KERNEL_H264_QPEL_AVG_MC02)
|
||||
DEFINE_QPEL_DISPATCH(avg_mc22, DAEDALUS_KERNEL_H264_QPEL_AVG_MC22)
|
||||
|
||||
#undef DEFINE_QPEL_DISPATCH
|
||||
|
||||
@@ -1680,5 +1695,8 @@ DEFINE_QPEL_RECIPE(mc23)
|
||||
DEFINE_QPEL_RECIPE(mc31)
|
||||
DEFINE_QPEL_RECIPE(mc32)
|
||||
DEFINE_QPEL_RECIPE(mc33)
|
||||
DEFINE_QPEL_RECIPE(avg_mc20)
|
||||
DEFINE_QPEL_RECIPE(avg_mc02)
|
||||
DEFINE_QPEL_RECIPE(avg_mc22)
|
||||
|
||||
#undef DEFINE_QPEL_RECIPE
|
||||
|
||||
Reference in New Issue
Block a user