/* * Standalone bit-exact C reference for H.264 8x8 inverse integer * transform + add. Algorithm per H.264 spec §8.5.13.2 (8x8 IT). * * Mirrors FFmpeg `ff_h264_idct8_add_neon` in * external/ffmpeg-snapshot/libavcodec/aarch64/h264idct_neon.S * line 267. Block is COLUMN-MAJOR (per cycle 6 Phase 9 lesson): * block[c*8 + r] = coefficient at (row=r, col=c). * * Signature: * void(uint8_t *dst, int16_t *block, ptrdiff_t stride); * * Zeroes block after transform (per FFmpeg convention). * * License: LGPL-2.1-or-later. */ #include #include #include static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; } /* 1D 8-element H.264 IT butterfly per H.264 §8.5.13.2. * Takes d[0..7], produces g[0..7]. */ static inline void h264_idct8_butterfly(const int d[8], int g[8]) { int e[8], f[8]; e[0] = d[0] + d[4]; e[1] = -d[3] + d[5] - d[7] - (d[7] >> 1); e[2] = d[0] - d[4]; e[3] = d[1] + d[7] - d[3] - (d[3] >> 1); e[4] = (d[2] >> 1) - d[6]; e[5] = -d[1] + d[7] + d[5] + (d[5] >> 1); e[6] = d[2] + (d[6] >> 1); e[7] = d[3] + d[5] + d[1] + (d[1] >> 1); f[0] = e[0] + e[6]; f[1] = e[1] + (e[7] >> 2); f[2] = e[2] + e[4]; f[3] = e[3] + (e[5] >> 2); f[4] = e[2] - e[4]; f[5] = (e[3] >> 2) - e[5]; f[6] = e[0] - e[6]; f[7] = e[7] - (e[1] >> 2); g[0] = f[0] + f[7]; g[1] = f[2] + f[5]; g[2] = f[4] + f[3]; g[3] = f[6] + f[1]; g[4] = f[6] - f[1]; g[5] = f[4] - f[3]; g[6] = f[2] - f[5]; g[7] = f[0] - f[7]; } void daedalus_h264_idct8_add_ref(uint8_t *dst, int16_t *block, ptrdiff_t stride) { int tmp[8][8]; /* Row pass FIRST. Read block as column-major (block[c*8 + r]). * d[c] for row r = block[c*8 + r] = (row=r, col=c) per the * H.264/FFmpeg column-major convention from cycle 6 phase 9. */ for (int r = 0; r < 8; r++) { int d[8]; for (int c = 0; c < 8; c++) d[c] = block[c*8 + r]; int g[8]; h264_idct8_butterfly(d, g); for (int c = 0; c < 8; c++) tmp[r][c] = g[c]; } /* Column pass NEXT (on row-major tmp). */ int col_out[8][8]; for (int c = 0; c < 8; c++) { int d[8]; for (int r = 0; r < 8; r++) d[r] = tmp[r][c]; int g[8]; h264_idct8_butterfly(d, g); for (int r = 0; r < 8; r++) col_out[r][c] = g[r]; } /* Round (+32) >> 6, add to dst, clip to u8. */ for (int r = 0; r < 8; r++) { for (int c = 0; c < 8; c++) { int rounded = (col_out[r][c] + 32) >> 6; dst[r * stride + c] = (uint8_t) clip_u8(dst[r * stride + c] + rounded); } } /* FFmpeg convention: zero the block after transform. */ memset(block, 0, 64 * sizeof(int16_t)); }