h264: promote remaining intra prediction modes (17) to public API
Follows PR #26 (Intra_4x4 luma) with the same promotion pattern for the rest of the intra prediction primitive set: Intra_16x16 luma (4 modes, PR #13) — V/H/DC/Plane Intra_8x8 chroma (4 modes, PR #14) — DC/H/V/Plane (4:2:0) Intra_8x8 luma (9 modes, PRs #21 + #22) — High profile, with 1-2-1 pre-filter 3 file moves via `git mv`, ~17 function renames stripping the `_ref` suffix. Test binaries rewired to link daedalus_core instead of compiling the (now moved) ref files directly. No code change — pure plumbing for substitution-arc consumers. 26 intra prediction modes total now in the public API after this PR. Verified on hertz: test_intra_pred_16x16: 5/5 PASS test_intra_pred_chroma8x8: 5/5 PASS test_intra_pred_8x8_luma: 11/11 PASS All via public symbols (test binaries linked against daedalus_core). Unblocks marfrit-packages substitution arc patch 0014 — wires H264PredContext.pred4x4[], pred16x16[], pred8x8[], pred8x8l[] through daedalus alongside the existing IDCT / deblock / qpel / DC Hadamard substitutions. After 0014 lands, the libavcodec.so built by marfrit-packages will have EVERY hot-path pixel-math kernel of an H.264 8-bit 4:2:0 decode routing through daedalus — the substitution arc is feature- complete for the campaign target (Pi 5 Firefox YouTube playback).
This commit is contained in:
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Standalone bit-exact C reference for H.264 chroma Intra_8x8
|
||||
* prediction modes (per H.264 §8.3.3), used for both Cb and Cr
|
||||
* planes at 4:2:0. All 4 modes.
|
||||
*
|
||||
* Mode index → name (per H.264 Table 7-16):
|
||||
* 0 = DC (per-quadrant — asymmetric, see §8.3.3.2)
|
||||
* 1 = Horizontal
|
||||
* 2 = Vertical
|
||||
* 3 = Plane (slope coefficient 34, distinct from luma's 5)
|
||||
*
|
||||
* Calling convention (same shape as luma intra refs):
|
||||
* pred_chroma8x8_<mode>(uint8_t *dst, ptrdiff_t stride)
|
||||
*
|
||||
* `dst` points at row 0, col 0 of the 8x8 output block (single
|
||||
* component plane — Cb or Cr, dispatched independently). Neighbours:
|
||||
* top[0..7] = dst[-stride + 0 .. -stride + 7]
|
||||
* top-left = dst[-stride - 1]
|
||||
* left[0..7] = dst[ 0*stride - 1 .. 7*stride - 1]
|
||||
*
|
||||
* AVAILABILITY: assumes all neighbours valid (interior-MB case).
|
||||
* The H.264 spec defines per-quadrant fallback for the DC mode at
|
||||
* MB boundaries; that's caller-side via the libavcodec intercept.
|
||||
*
|
||||
* License: BSD-2-Clause.
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; }
|
||||
|
||||
/* Mode 0 — DC (per-quadrant, 4:2:0 layout per §8.3.3.2).
|
||||
*
|
||||
* The 8×8 block is split into four 4×4 quadrants. For interior
|
||||
* MBs (all neighbours available), the DC value per quadrant uses:
|
||||
* (0,0) top-left : (sum_top[0..3] + sum_left[0..3] + 4) >> 3
|
||||
* (0,1) top-right : sum_top[4..7] + 2) >> 2
|
||||
* (1,0) bot-left : (sum_left[4..7] + 2) >> 2
|
||||
* (1,1) bot-right : (sum_top[4..7] + sum_left[4..7] + 4) >> 3
|
||||
*
|
||||
* The asymmetry mirrors what neighbours are "logically available"
|
||||
* for each quadrant in the spec's availability model. Top-right
|
||||
* quadrant ignores the top-left-half because that half is "vertically
|
||||
* above" the top-left quadrant; the spec uses top[4..7] only.
|
||||
*/
|
||||
void daedalus_h264_pred_chroma8x8_dc(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
const uint8_t *top = dst - stride;
|
||||
int top_lo = 0, top_hi = 0, left_lo = 0, left_hi = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
top_lo += top[i];
|
||||
top_hi += top[4 + i];
|
||||
left_lo += dst[i * stride - 1];
|
||||
left_hi += dst[(4 + i) * stride - 1];
|
||||
}
|
||||
uint8_t dc00 = (uint8_t)((top_lo + left_lo + 4) >> 3); /* top-left */
|
||||
uint8_t dc01 = (uint8_t)((top_hi + 2) >> 2); /* top-right */
|
||||
uint8_t dc10 = (uint8_t)(( left_hi + 2) >> 2); /* bot-left */
|
||||
uint8_t dc11 = (uint8_t)((top_hi + left_hi + 4) >> 3); /* bot-right */
|
||||
for (int r = 0; r < 4; r++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
dst[( r) * stride + c ] = dc00;
|
||||
dst[( r) * stride + 4 + c ] = dc01;
|
||||
dst[(4 + r) * stride + c ] = dc10;
|
||||
dst[(4 + r) * stride + 4 + c ] = dc11;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Mode 1 — Horizontal: each row = left[row]. */
|
||||
void daedalus_h264_pred_chroma8x8_horizontal(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
for (int r = 0; r < 8; r++) {
|
||||
uint8_t l = dst[r * stride - 1];
|
||||
for (int c = 0; c < 8; c++) dst[r * stride + c] = l;
|
||||
}
|
||||
}
|
||||
|
||||
/* Mode 2 — Vertical: each col = top[col]. */
|
||||
void daedalus_h264_pred_chroma8x8_vertical(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
const uint8_t *top = dst - stride;
|
||||
for (int r = 0; r < 8; r++)
|
||||
for (int c = 0; c < 8; c++) dst[r * stride + c] = top[c];
|
||||
}
|
||||
|
||||
/* Mode 3 — Plane (per H.264 §8.3.3.4):
|
||||
* H = sum_{i=0..3} (i+1) * (p[4+i, -1] - p[2-i, -1]) ; i=3 uses p[-1,-1]
|
||||
* V = sum_{j=0..3} (j+1) * (p[-1, 4+j] - p[-1, 2-j]) ; j=3 uses p[-1,-1]
|
||||
* b = (34 * H + 32) >> 6
|
||||
* c = (34 * V + 32) >> 6
|
||||
* a = 16 * (p[-1, 7] + p[7, -1])
|
||||
* pred[y][x] = Clip1((a + b*(x - 3) + c*(y - 3) + 16) >> 5)
|
||||
*
|
||||
* Distinct from the Intra_16x16 luma Plane:
|
||||
* - Slope coefficient is 34 (not 5).
|
||||
* - Centre is (x-3, y-3) (not x-7, y-7).
|
||||
* - Spans 4 differences per sum (not 8).
|
||||
*/
|
||||
void daedalus_h264_pred_chroma8x8_plane(uint8_t *dst, ptrdiff_t stride)
|
||||
{
|
||||
const uint8_t *top = dst - stride;
|
||||
int H = 0, V = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int t_right = top[4 + i];
|
||||
int t_left = (i == 3) ? top[-1] : top[2 - i];
|
||||
H += (i + 1) * (t_right - t_left);
|
||||
}
|
||||
for (int j = 0; j < 4; j++) {
|
||||
int l_bot = dst[(4 + j) * stride - 1];
|
||||
int l_top = (j == 3) ? top[-1] : dst[(2 - j) * stride - 1];
|
||||
V += (j + 1) * (l_bot - l_top);
|
||||
}
|
||||
int b = (34 * H + 32) >> 6;
|
||||
int c = (34 * V + 32) >> 6;
|
||||
int a = 16 * (dst[7 * stride - 1] + top[7]);
|
||||
for (int y = 0; y < 8; y++) {
|
||||
for (int x = 0; x < 8; x++) {
|
||||
int v = (a + b * (x - 3) + c * (y - 3) + 16) >> 5;
|
||||
dst[y * stride + x] = (uint8_t) clip_u8(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user