/* * Standalone bit-exact C reference for H.264 luma Intra_8x8 * prediction modes (per H.264 spec §8.3.2.1). High-profile-only * MB type — Baseline/Main/Extended profiles don't see Intra_8x8. * * Distinct from Intra_4x4 in two ways: * * 1. REFERENCE SAMPLE FILTERING (§8.3.2.1.1). The 25 raw * neighbour samples are pre-filtered with a 1-2-1 smoothing * filter BEFORE prediction. The filtering has spec-defined * boundary handling at the corners and the right-edge of the * top-row extension. * * 2. SCALE. All 9 prediction modes operate at 8x8 with the * filtered samples (Intra_4x4 operates at 4x4 with the raw * samples). * * This PR implements the filter + the 3 simple modes (Vertical, * Horizontal, DC). The 6 directional modes (DDL, DDR, VR, HD, VL, * HU at 8x8) follow in a separate PR — same template, different * formulas per spec sections §8.3.2.1.4..§8.3.2.1.9. * * Calling convention (FFmpeg-style): * pred_8x8__ref(uint8_t *dst, ptrdiff_t stride) * * `dst` points at row 0 col 0 of the 8x8 output block. Reads from * top[0..15] = dst[-stride + 0..15] * top-left = dst[-stride - 1] * left[0..7] = dst[ 0*stride - 1 .. 7*stride - 1] * * AVAILABILITY: assumes all neighbours valid (interior-MB case). * * License: BSD-2-Clause. */ #include #include #include static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; } /* H.264 §8.3.2.1.1 reference sample filtering. Filters the 25 raw * samples around the 8x8 block into a `filt` array with the same * indices. When called against an "all neighbours available" tile, * the filtered output uses these spec-defined formulas: * * filt[top -1] (= filtered top-left) = (top[0] + 2*tl + left[0] + 2) >> 2 * * filt[top 0] = (tl + 2*top[0] + top[1] + 2) >> 2 * filt[top i] for 1<=i<=14 = (top[i-1] + 2*top[i] + top[i+1] + 2) >> 2 * filt[top 15] = (top[14] + 3*top[15] + 2) >> 2 (boundary) * * filt[left 0] = (tl + 2*left[0] + left[1] + 2) >> 2 * filt[left j] for 1<=j<=6 = (left[j-1] + 2*left[j] + left[j+1] + 2) >> 2 * filt[left 7] = (left[6] + 3*left[7] + 2) >> 2 (boundary) * * Reads neighbours from the dst buffer; writes filtered values to * a caller-provided 26-element array indexed as: * filt[0] = filtered top-left * filt[1..16] = filtered top[0..15] * filt[17..24] = filtered left[0..7] */ static void filter_refs(const uint8_t *dst, ptrdiff_t stride, uint8_t filt[25]) { int tl = dst[-stride - 1]; int t[16]; for (int i = 0; i < 16; i++) t[i] = dst[-stride + i]; int l[8]; for (int j = 0; j < 8; j++) l[j] = dst[j * stride - 1]; /* Filtered top-left. */ filt[0] = (uint8_t)((t[0] + 2*tl + l[0] + 2) >> 2); /* Filtered top. */ filt[1] = (uint8_t)((tl + 2*t[0] + t[1] + 2) >> 2); for (int i = 1; i <= 14; i++) filt[1 + i] = (uint8_t)((t[i-1] + 2*t[i] + t[i+1] + 2) >> 2); filt[1 + 15] = (uint8_t)((t[14] + 3*t[15] + 2) >> 2); /* Filtered left. */ filt[17 + 0] = (uint8_t)((tl + 2*l[0] + l[1] + 2) >> 2); for (int j = 1; j <= 6; j++) filt[17 + j] = (uint8_t)((l[j-1] + 2*l[j] + l[j+1] + 2) >> 2); filt[17 + 7] = (uint8_t)((l[6] + 3*l[7] + 2) >> 2); } /* Convenience macros for accessing the filt[] array by spec-style index. */ #define FT(i) filt[1 + (i)] /* filtered top[i], i in 0..15 */ #define FL(j) filt[17 + (j)] /* filtered left[j], j in 0..7 */ #define FTL filt[0] /* filtered top-left */ /* Mode 0 Vertical (§8.3.2.1.2): pred[r,c] = filt_top[c]. */ void daedalus_h264_pred_8x8l_vertical_ref(uint8_t *dst, ptrdiff_t stride) { uint8_t filt[25]; filter_refs(dst, stride, filt); for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) dst[r * stride + c] = FT(c); } /* Mode 1 Horizontal (§8.3.2.1.3): pred[r,c] = filt_left[r]. */ void daedalus_h264_pred_8x8l_horizontal_ref(uint8_t *dst, ptrdiff_t stride) { uint8_t filt[25]; filter_refs(dst, stride, filt); for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) dst[r * stride + c] = FL(r); } /* Mode 2 DC (§8.3.2.1.4): ((sum_filt_top[0..7] + sum_filt_left[0..7] * + 8) >> 4) broadcast. Note the +8 (not +4 like 4x4): there are * 16 samples summed total, so >> 4 with half-step rounding +8. */ void daedalus_h264_pred_8x8l_dc_ref(uint8_t *dst, ptrdiff_t stride) { uint8_t filt[25]; filter_refs(dst, stride, filt); int sum = 8; for (int i = 0; i < 8; i++) sum += FT(i); for (int j = 0; j < 8; j++) sum += FL(j); uint8_t v = (uint8_t)(sum >> 4); for (int r = 0; r < 8; r++) for (int c = 0; c < 8; c++) dst[r * stride + c] = v; }