cb3aef3dac
Follows PR #26 (Intra_4x4 luma) with the same promotion pattern for the rest of the intra prediction primitive set: Intra_16x16 luma (4 modes, PR #13) — V/H/DC/Plane Intra_8x8 chroma (4 modes, PR #14) — DC/H/V/Plane (4:2:0) Intra_8x8 luma (9 modes, PRs #21 + #22) — High profile, with 1-2-1 pre-filter 3 file moves via `git mv`, ~17 function renames stripping the `_ref` suffix. Test binaries rewired to link daedalus_core instead of compiling the (now moved) ref files directly. No code change — pure plumbing for substitution-arc consumers. 26 intra prediction modes total now in the public API after this PR. Verified on hertz: test_intra_pred_16x16: 5/5 PASS test_intra_pred_chroma8x8: 5/5 PASS test_intra_pred_8x8_luma: 11/11 PASS All via public symbols (test binaries linked against daedalus_core). Unblocks marfrit-packages substitution arc patch 0014 — wires H264PredContext.pred4x4[], pred16x16[], pred8x8[], pred8x8l[] through daedalus alongside the existing IDCT / deblock / qpel / DC Hadamard substitutions. After 0014 lands, the libavcodec.so built by marfrit-packages will have EVERY hot-path pixel-math kernel of an H.264 8-bit 4:2:0 decode routing through daedalus — the substitution arc is feature- complete for the campaign target (Pi 5 Firefox YouTube playback).
124 lines
4.6 KiB
C
124 lines
4.6 KiB
C
/*
|
||
* Standalone bit-exact C reference for H.264 chroma Intra_8x8
|
||
* prediction modes (per H.264 §8.3.3), used for both Cb and Cr
|
||
* planes at 4:2:0. All 4 modes.
|
||
*
|
||
* Mode index → name (per H.264 Table 7-16):
|
||
* 0 = DC (per-quadrant — asymmetric, see §8.3.3.2)
|
||
* 1 = Horizontal
|
||
* 2 = Vertical
|
||
* 3 = Plane (slope coefficient 34, distinct from luma's 5)
|
||
*
|
||
* Calling convention (same shape as luma intra refs):
|
||
* pred_chroma8x8_<mode>(uint8_t *dst, ptrdiff_t stride)
|
||
*
|
||
* `dst` points at row 0, col 0 of the 8x8 output block (single
|
||
* component plane — Cb or Cr, dispatched independently). Neighbours:
|
||
* top[0..7] = dst[-stride + 0 .. -stride + 7]
|
||
* top-left = dst[-stride - 1]
|
||
* left[0..7] = dst[ 0*stride - 1 .. 7*stride - 1]
|
||
*
|
||
* AVAILABILITY: assumes all neighbours valid (interior-MB case).
|
||
* The H.264 spec defines per-quadrant fallback for the DC mode at
|
||
* MB boundaries; that's caller-side via the libavcodec intercept.
|
||
*
|
||
* License: BSD-2-Clause.
|
||
*/
|
||
#include <stdint.h>
|
||
#include <stddef.h>
|
||
|
||
static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; }
|
||
|
||
/* Mode 0 — DC (per-quadrant, 4:2:0 layout per §8.3.3.2).
|
||
*
|
||
* The 8×8 block is split into four 4×4 quadrants. For interior
|
||
* MBs (all neighbours available), the DC value per quadrant uses:
|
||
* (0,0) top-left : (sum_top[0..3] + sum_left[0..3] + 4) >> 3
|
||
* (0,1) top-right : sum_top[4..7] + 2) >> 2
|
||
* (1,0) bot-left : (sum_left[4..7] + 2) >> 2
|
||
* (1,1) bot-right : (sum_top[4..7] + sum_left[4..7] + 4) >> 3
|
||
*
|
||
* The asymmetry mirrors what neighbours are "logically available"
|
||
* for each quadrant in the spec's availability model. Top-right
|
||
* quadrant ignores the top-left-half because that half is "vertically
|
||
* above" the top-left quadrant; the spec uses top[4..7] only.
|
||
*/
|
||
void daedalus_h264_pred_chroma8x8_dc(uint8_t *dst, ptrdiff_t stride)
|
||
{
|
||
const uint8_t *top = dst - stride;
|
||
int top_lo = 0, top_hi = 0, left_lo = 0, left_hi = 0;
|
||
for (int i = 0; i < 4; i++) {
|
||
top_lo += top[i];
|
||
top_hi += top[4 + i];
|
||
left_lo += dst[i * stride - 1];
|
||
left_hi += dst[(4 + i) * stride - 1];
|
||
}
|
||
uint8_t dc00 = (uint8_t)((top_lo + left_lo + 4) >> 3); /* top-left */
|
||
uint8_t dc01 = (uint8_t)((top_hi + 2) >> 2); /* top-right */
|
||
uint8_t dc10 = (uint8_t)(( left_hi + 2) >> 2); /* bot-left */
|
||
uint8_t dc11 = (uint8_t)((top_hi + left_hi + 4) >> 3); /* bot-right */
|
||
for (int r = 0; r < 4; r++) {
|
||
for (int c = 0; c < 4; c++) {
|
||
dst[( r) * stride + c ] = dc00;
|
||
dst[( r) * stride + 4 + c ] = dc01;
|
||
dst[(4 + r) * stride + c ] = dc10;
|
||
dst[(4 + r) * stride + 4 + c ] = dc11;
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Mode 1 — Horizontal: each row = left[row]. */
|
||
void daedalus_h264_pred_chroma8x8_horizontal(uint8_t *dst, ptrdiff_t stride)
|
||
{
|
||
for (int r = 0; r < 8; r++) {
|
||
uint8_t l = dst[r * stride - 1];
|
||
for (int c = 0; c < 8; c++) dst[r * stride + c] = l;
|
||
}
|
||
}
|
||
|
||
/* Mode 2 — Vertical: each col = top[col]. */
|
||
void daedalus_h264_pred_chroma8x8_vertical(uint8_t *dst, ptrdiff_t stride)
|
||
{
|
||
const uint8_t *top = dst - stride;
|
||
for (int r = 0; r < 8; r++)
|
||
for (int c = 0; c < 8; c++) dst[r * stride + c] = top[c];
|
||
}
|
||
|
||
/* Mode 3 — Plane (per H.264 §8.3.3.4):
|
||
* H = sum_{i=0..3} (i+1) * (p[4+i, -1] - p[2-i, -1]) ; i=3 uses p[-1,-1]
|
||
* V = sum_{j=0..3} (j+1) * (p[-1, 4+j] - p[-1, 2-j]) ; j=3 uses p[-1,-1]
|
||
* b = (34 * H + 32) >> 6
|
||
* c = (34 * V + 32) >> 6
|
||
* a = 16 * (p[-1, 7] + p[7, -1])
|
||
* pred[y][x] = Clip1((a + b*(x - 3) + c*(y - 3) + 16) >> 5)
|
||
*
|
||
* Distinct from the Intra_16x16 luma Plane:
|
||
* - Slope coefficient is 34 (not 5).
|
||
* - Centre is (x-3, y-3) (not x-7, y-7).
|
||
* - Spans 4 differences per sum (not 8).
|
||
*/
|
||
void daedalus_h264_pred_chroma8x8_plane(uint8_t *dst, ptrdiff_t stride)
|
||
{
|
||
const uint8_t *top = dst - stride;
|
||
int H = 0, V = 0;
|
||
for (int i = 0; i < 4; i++) {
|
||
int t_right = top[4 + i];
|
||
int t_left = (i == 3) ? top[-1] : top[2 - i];
|
||
H += (i + 1) * (t_right - t_left);
|
||
}
|
||
for (int j = 0; j < 4; j++) {
|
||
int l_bot = dst[(4 + j) * stride - 1];
|
||
int l_top = (j == 3) ? top[-1] : dst[(2 - j) * stride - 1];
|
||
V += (j + 1) * (l_bot - l_top);
|
||
}
|
||
int b = (34 * H + 32) >> 6;
|
||
int c = (34 * V + 32) >> 6;
|
||
int a = 16 * (dst[7 * stride - 1] + top[7]);
|
||
for (int y = 0; y < 8; y++) {
|
||
for (int x = 0; x < 8; x++) {
|
||
int v = (a + b * (x - 3) + c * (y - 3) + 16) >> 5;
|
||
dst[y * stride + x] = (uint8_t) clip_u8(v);
|
||
}
|
||
}
|
||
}
|