/* * Standalone bit-exact C reference for H.264 chroma Intra_8x8 * prediction modes (per H.264 §8.3.3), used for both Cb and Cr * planes at 4:2:0. All 4 modes. * * Mode index → name (per H.264 Table 7-16): * 0 = DC (per-quadrant — asymmetric, see §8.3.3.2) * 1 = Horizontal * 2 = Vertical * 3 = Plane (slope coefficient 34, distinct from luma's 5) * * Calling convention (same shape as luma intra refs): * pred_chroma8x8_(uint8_t *dst, ptrdiff_t stride) * * `dst` points at row 0, col 0 of the 8x8 output block (single * component plane — Cb or Cr, dispatched independently). Neighbours: * top[0..7] = dst[-stride + 0 .. -stride + 7] * top-left = dst[-stride - 1] * left[0..7] = dst[ 0*stride - 1 .. 7*stride - 1] * * AVAILABILITY: assumes all neighbours valid (interior-MB case). * The H.264 spec defines per-quadrant fallback for the DC mode at * MB boundaries; that's caller-side via the libavcodec intercept. * * License: BSD-2-Clause. */ #include #include static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; } /* Mode 0 — DC (per-quadrant, 4:2:0 layout per §8.3.3.2). * * The 8×8 block is split into four 4×4 quadrants. For interior * MBs (all neighbours available), the DC value per quadrant uses: * (0,0) top-left : (sum_top[0..3] + sum_left[0..3] + 4) >> 3 * (0,1) top-right : sum_top[4..7] + 2) >> 2 * (1,0) bot-left : (sum_left[4..7] + 2) >> 2 * (1,1) bot-right : (sum_top[4..7] + sum_left[4..7] + 4) >> 3 * * The asymmetry mirrors what neighbours are "logically available" * for each quadrant in the spec's availability model. Top-right * quadrant ignores the top-left-half because that half is "vertically * above" the top-left quadrant; the spec uses top[4..7] only. */ void daedalus_h264_pred_chroma8x8_dc_ref(uint8_t *dst, ptrdiff_t stride) { const uint8_t *top = dst - stride; int top_lo = 0, top_hi = 0, left_lo = 0, left_hi = 0; for (int i = 0; i < 4; i++) { top_lo += top[i]; top_hi += top[4 + i]; left_lo += dst[i * stride - 1]; left_hi += dst[(4 + i) * stride - 1]; } uint8_t dc00 = (uint8_t)((top_lo + left_lo + 4) >> 3); /* top-left */ uint8_t dc01 = (uint8_t)((top_hi + 2) >> 2); /* top-right */ uint8_t dc10 = (uint8_t)(( left_hi + 2) >> 2); /* bot-left */ uint8_t dc11 = (uint8_t)((top_hi + left_hi + 4) >> 3); /* bot-right */ for (int r = 0; r < 4; r++) { for (int c = 0; c < 4; c++) { dst[( r) * stride + c ] = dc00; dst[( r) * stride + 4 + c ] = dc01; dst[(4 + r) * stride + c ] = dc10; dst[(4 + r) * stride + 4 + c ] = dc11; } } } /* Mode 1 — Horizontal: each row = left[row]. */ void daedalus_h264_pred_chroma8x8_horizontal_ref(uint8_t *dst, ptrdiff_t stride) { for (int r = 0; r < 8; r++) { uint8_t l = dst[r * stride - 1]; for (int c = 0; c < 8; c++) dst[r * stride + c] = l; } } /* Mode 2 — Vertical: each col = top[col]. */ void daedalus_h264_pred_chroma8x8_vertical_ref(uint8_t *dst, ptrdiff_t stride) { const uint8_t *top = dst - stride; for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) dst[r * stride + c] = top[c]; } /* Mode 3 — Plane (per H.264 §8.3.3.4): * H = sum_{i=0..3} (i+1) * (p[4+i, -1] - p[2-i, -1]) ; i=3 uses p[-1,-1] * V = sum_{j=0..3} (j+1) * (p[-1, 4+j] - p[-1, 2-j]) ; j=3 uses p[-1,-1] * b = (34 * H + 32) >> 6 * c = (34 * V + 32) >> 6 * a = 16 * (p[-1, 7] + p[7, -1]) * pred[y][x] = Clip1((a + b*(x - 3) + c*(y - 3) + 16) >> 5) * * Distinct from the Intra_16x16 luma Plane: * - Slope coefficient is 34 (not 5). * - Centre is (x-3, y-3) (not x-7, y-7). * - Spans 4 differences per sum (not 8). */ void daedalus_h264_pred_chroma8x8_plane_ref(uint8_t *dst, ptrdiff_t stride) { const uint8_t *top = dst - stride; int H = 0, V = 0; for (int i = 0; i < 4; i++) { int t_right = top[4 + i]; int t_left = (i == 3) ? top[-1] : top[2 - i]; H += (i + 1) * (t_right - t_left); } for (int j = 0; j < 4; j++) { int l_bot = dst[(4 + j) * stride - 1]; int l_top = (j == 3) ? top[-1] : dst[(2 - j) * stride - 1]; V += (j + 1) * (l_bot - l_top); } int b = (34 * H + 32) >> 6; int c = (34 * V + 32) >> 6; int a = 16 * (dst[7 * stride - 1] + top[7]); for (int y = 0; y < 8; y++) { for (int x = 0; x < 8; x++) { int v = (a + b * (x - 3) + c * (y - 3) + 16) >> 5; dst[y * stride + x] = (uint8_t) clip_u8(v); } } }