Merge pull request 'h264: Intra_8x8 luma — 6 directional modes (DDL/DDR/VR/HD/VL/HU)' (#22) from noether/h264-intra-pred-8x8-directional into main

Reviewed-on: #22
This commit was merged in pull request #22.
This commit is contained in:
2026-05-25 09:16:19 +00:00
2 changed files with 214 additions and 1 deletions
+182
View File
@@ -121,3 +121,185 @@ void daedalus_h264_pred_8x8l_dc_ref(uint8_t *dst, ptrdiff_t stride)
for (int r = 0; r < 8; r++)
for (int c = 0; c < 8; c++) dst[r * stride + c] = v;
}
/* --- 6 directional modes for Intra_8x8 (H.264 §8.3.2.1.5..§8.3.2.1.10).
* Transcribed from FFmpeg libavcodec/h264pred_template.c
* pred8x8l_{down_left, down_right, vertical_right, horizontal_down,
* vertical_left, horizontal_up} (LGPL-2.1+ in the original; algorithm
* reproduced here for test purposes).
*
* All 6 use the same FILTERED reference samples produced by
* filter_refs() above. Mapping from FFmpeg's t0..t15 / l0..l7 / lt
* notation:
* tN = FT(N) for N in 0..15
* lN = FL(N) for N in 0..7
* lt = FTL
*
* SRC(x,y) maps to dst[y*stride + x] (col x, row y).
*/
#define SRC(x, y) dst[(y) * stride + (x)]
#define T(i) FT(i)
#define L(j) FL(j)
#define LT FTL
/* Mode 3 DDL (Diagonal_Down_Left) — uses TOP + TOP_RIGHT, no LEFT. */
void daedalus_h264_pred_8x8l_ddl_ref(uint8_t *dst, ptrdiff_t stride)
{
uint8_t filt[25];
filter_refs(dst, stride, filt);
SRC(0,0)= (T(0) + 2*T(1) + T(2) + 2) >> 2;
SRC(0,1)=SRC(1,0)= (T(1) + 2*T(2) + T(3) + 2) >> 2;
SRC(0,2)=SRC(1,1)=SRC(2,0)= (T(2) + 2*T(3) + T(4) + 2) >> 2;
SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (T(3) + 2*T(4) + T(5) + 2) >> 2;
SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (T(4) + 2*T(5) + T(6) + 2) >> 2;
SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (T(5) + 2*T(6) + T(7) + 2) >> 2;
SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (T(6) + 2*T(7) + T(8) + 2) >> 2;
SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (T(7) + 2*T(8) + T(9) + 2) >> 2;
SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (T(8) + 2*T(9) + T(10) + 2) >> 2;
SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (T(9) + 2*T(10) + T(11) + 2) >> 2;
SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (T(10) + 2*T(11) + T(12) + 2) >> 2;
SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (T(11) + 2*T(12) + T(13) + 2) >> 2;
SRC(5,7)=SRC(6,6)=SRC(7,5)= (T(12) + 2*T(13) + T(14) + 2) >> 2;
SRC(6,7)=SRC(7,6)= (T(13) + 2*T(14) + T(15) + 2) >> 2;
SRC(7,7)= (T(14) + 3*T(15) + 2) >> 2;
}
/* Mode 4 DDR (Diagonal_Down_Right). */
void daedalus_h264_pred_8x8l_ddr_ref(uint8_t *dst, ptrdiff_t stride)
{
uint8_t filt[25];
filter_refs(dst, stride, filt);
SRC(0,7)= (L(7) + 2*L(6) + L(5) + 2) >> 2;
SRC(0,6)=SRC(1,7)= (L(6) + 2*L(5) + L(4) + 2) >> 2;
SRC(0,5)=SRC(1,6)=SRC(2,7)= (L(5) + 2*L(4) + L(3) + 2) >> 2;
SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (L(4) + 2*L(3) + L(2) + 2) >> 2;
SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (L(3) + 2*L(2) + L(1) + 2) >> 2;
SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (L(2) + 2*L(1) + L(0) + 2) >> 2;
SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (L(1) + 2*L(0) + LT + 2) >> 2;
SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (L(0) + 2*LT + T(0) + 2) >> 2;
SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (LT + 2*T(0) + T(1) + 2) >> 2;
SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (T(0) + 2*T(1) + T(2) + 2) >> 2;
SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (T(1) + 2*T(2) + T(3) + 2) >> 2;
SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (T(2) + 2*T(3) + T(4) + 2) >> 2;
SRC(5,0)=SRC(6,1)=SRC(7,2)= (T(3) + 2*T(4) + T(5) + 2) >> 2;
SRC(6,0)=SRC(7,1)= (T(4) + 2*T(5) + T(6) + 2) >> 2;
SRC(7,0)= (T(5) + 2*T(6) + T(7) + 2) >> 2;
}
/* Mode 5 VR (Vertical_Right). */
void daedalus_h264_pred_8x8l_vr_ref(uint8_t *dst, ptrdiff_t stride)
{
uint8_t filt[25];
filter_refs(dst, stride, filt);
SRC(0,6)= (L(5) + 2*L(4) + L(3) + 2) >> 2;
SRC(0,7)= (L(6) + 2*L(5) + L(4) + 2) >> 2;
SRC(0,4)=SRC(1,6)= (L(3) + 2*L(2) + L(1) + 2) >> 2;
SRC(0,5)=SRC(1,7)= (L(4) + 2*L(3) + L(2) + 2) >> 2;
SRC(0,2)=SRC(1,4)=SRC(2,6)= (L(1) + 2*L(0) + LT + 2) >> 2;
SRC(0,3)=SRC(1,5)=SRC(2,7)= (L(2) + 2*L(1) + L(0) + 2) >> 2;
SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (L(0) + 2*LT + T(0) + 2) >> 2;
SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (LT + T(0) + 1) >> 1;
SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (LT + 2*T(0) + T(1) + 2) >> 2;
SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (T(0) + T(1) + 1) >> 1;
SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (T(0) + 2*T(1) + T(2) + 2) >> 2;
SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (T(1) + T(2) + 1) >> 1;
SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (T(1) + 2*T(2) + T(3) + 2) >> 2;
SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (T(2) + T(3) + 1) >> 1;
SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (T(2) + 2*T(3) + T(4) + 2) >> 2;
SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (T(3) + T(4) + 1) >> 1;
SRC(5,1)=SRC(6,3)=SRC(7,5)= (T(3) + 2*T(4) + T(5) + 2) >> 2;
SRC(5,0)=SRC(6,2)=SRC(7,4)= (T(4) + T(5) + 1) >> 1;
SRC(6,1)=SRC(7,3)= (T(4) + 2*T(5) + T(6) + 2) >> 2;
SRC(6,0)=SRC(7,2)= (T(5) + T(6) + 1) >> 1;
SRC(7,1)= (T(5) + 2*T(6) + T(7) + 2) >> 2;
SRC(7,0)= (T(6) + T(7) + 1) >> 1;
}
/* Mode 6 HD (Horizontal_Down). */
void daedalus_h264_pred_8x8l_hd_ref(uint8_t *dst, ptrdiff_t stride)
{
uint8_t filt[25];
filter_refs(dst, stride, filt);
SRC(0,7)= (L(6) + L(7) + 1) >> 1;
SRC(1,7)= (L(5) + 2*L(6) + L(7) + 2) >> 2;
SRC(0,6)=SRC(2,7)= (L(5) + L(6) + 1) >> 1;
SRC(1,6)=SRC(3,7)= (L(4) + 2*L(5) + L(6) + 2) >> 2;
SRC(0,5)=SRC(2,6)=SRC(4,7)= (L(4) + L(5) + 1) >> 1;
SRC(1,5)=SRC(3,6)=SRC(5,7)= (L(3) + 2*L(4) + L(5) + 2) >> 2;
SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (L(3) + L(4) + 1) >> 1;
SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (L(2) + 2*L(3) + L(4) + 2) >> 2;
SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (L(2) + L(3) + 1) >> 1;
SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (L(1) + 2*L(2) + L(3) + 2) >> 2;
SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (L(1) + L(2) + 1) >> 1;
SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (L(0) + 2*L(1) + L(2) + 2) >> 2;
SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (L(0) + L(1) + 1) >> 1;
SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (LT + 2*L(0) + L(1) + 2) >> 2;
SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (LT + L(0) + 1) >> 1;
SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (L(0) + 2*LT + T(0) + 2) >> 2;
SRC(2,0)=SRC(4,1)=SRC(6,2)= (T(1) + 2*T(0) + LT + 2) >> 2;
SRC(3,0)=SRC(5,1)=SRC(7,2)= (T(2) + 2*T(1) + T(0) + 2) >> 2;
SRC(4,0)=SRC(6,1)= (T(3) + 2*T(2) + T(1) + 2) >> 2;
SRC(5,0)=SRC(7,1)= (T(4) + 2*T(3) + T(2) + 2) >> 2;
SRC(6,0)= (T(5) + 2*T(4) + T(3) + 2) >> 2;
SRC(7,0)= (T(6) + 2*T(5) + T(4) + 2) >> 2;
}
/* Mode 7 VL (Vertical_Left) — uses TOP + TOP_RIGHT only. */
void daedalus_h264_pred_8x8l_vl_ref(uint8_t *dst, ptrdiff_t stride)
{
uint8_t filt[25];
filter_refs(dst, stride, filt);
SRC(0,0)= (T(0) + T(1) + 1) >> 1;
SRC(0,1)= (T(0) + 2*T(1) + T(2) + 2) >> 2;
SRC(0,2)=SRC(1,0)= (T(1) + T(2) + 1) >> 1;
SRC(0,3)=SRC(1,1)= (T(1) + 2*T(2) + T(3) + 2) >> 2;
SRC(0,4)=SRC(1,2)=SRC(2,0)= (T(2) + T(3) + 1) >> 1;
SRC(0,5)=SRC(1,3)=SRC(2,1)= (T(2) + 2*T(3) + T(4) + 2) >> 2;
SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (T(3) + T(4) + 1) >> 1;
SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (T(3) + 2*T(4) + T(5) + 2) >> 2;
SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (T(4) + T(5) + 1) >> 1;
SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (T(4) + 2*T(5) + T(6) + 2) >> 2;
SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (T(5) + T(6) + 1) >> 1;
SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (T(5) + 2*T(6) + T(7) + 2) >> 2;
SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (T(6) + T(7) + 1) >> 1;
SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (T(6) + 2*T(7) + T(8) + 2) >> 2;
SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (T(7) + T(8) + 1) >> 1;
SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (T(7) + 2*T(8) + T(9) + 2) >> 2;
SRC(5,6)=SRC(6,4)=SRC(7,2)= (T(8) + T(9) + 1) >> 1;
SRC(5,7)=SRC(6,5)=SRC(7,3)= (T(8) + 2*T(9) + T(10) + 2) >> 2;
SRC(6,6)=SRC(7,4)= (T(9) + T(10) + 1) >> 1;
SRC(6,7)=SRC(7,5)= (T(9) + 2*T(10) + T(11) + 2) >> 2;
SRC(7,6)= (T(10) + T(11) + 1) >> 1;
SRC(7,7)= (T(10) + 2*T(11) + T(12) + 2) >> 2;
}
/* Mode 8 HU (Horizontal_Up) — uses LEFT only. */
void daedalus_h264_pred_8x8l_hu_ref(uint8_t *dst, ptrdiff_t stride)
{
uint8_t filt[25];
filter_refs(dst, stride, filt);
SRC(0,0)= (L(0) + L(1) + 1) >> 1;
SRC(1,0)= (L(0) + 2*L(1) + L(2) + 2) >> 2;
SRC(0,1)=SRC(2,0)= (L(1) + L(2) + 1) >> 1;
SRC(1,1)=SRC(3,0)= (L(1) + 2*L(2) + L(3) + 2) >> 2;
SRC(0,2)=SRC(2,1)=SRC(4,0)= (L(2) + L(3) + 1) >> 1;
SRC(1,2)=SRC(3,1)=SRC(5,0)= (L(2) + 2*L(3) + L(4) + 2) >> 2;
SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (L(3) + L(4) + 1) >> 1;
SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (L(3) + 2*L(4) + L(5) + 2) >> 2;
SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (L(4) + L(5) + 1) >> 1;
SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (L(4) + 2*L(5) + L(6) + 2) >> 2;
SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (L(5) + L(6) + 1) >> 1;
SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (L(5) + 2*L(6) + L(7) + 2) >> 2;
SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (L(6) + L(7) + 1) >> 1;
SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (L(6) + 3*L(7) + 2) >> 2;
/* 20 positions all = L(7) per FFmpeg lines 1097-1100. */
SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= L(7);
}
#undef SRC
#undef T
#undef L
#undef LT
+32 -1
View File
@@ -17,6 +17,12 @@
extern void daedalus_h264_pred_8x8l_vertical_ref(uint8_t *dst, ptrdiff_t stride);
extern void daedalus_h264_pred_8x8l_horizontal_ref(uint8_t *dst, ptrdiff_t stride);
extern void daedalus_h264_pred_8x8l_dc_ref(uint8_t *dst, ptrdiff_t stride);
extern void daedalus_h264_pred_8x8l_ddl_ref(uint8_t *dst, ptrdiff_t stride);
extern void daedalus_h264_pred_8x8l_ddr_ref(uint8_t *dst, ptrdiff_t stride);
extern void daedalus_h264_pred_8x8l_vr_ref(uint8_t *dst, ptrdiff_t stride);
extern void daedalus_h264_pred_8x8l_hd_ref(uint8_t *dst, ptrdiff_t stride);
extern void daedalus_h264_pred_8x8l_vl_ref(uint8_t *dst, ptrdiff_t stride);
extern void daedalus_h264_pred_8x8l_hu_ref(uint8_t *dst, ptrdiff_t stride);
#define STRIDE 17
#define ROWS 9
@@ -133,7 +139,32 @@ int main(void)
fail |= (diff == 0) ? 0 : 1;
}
if (fail == 0) printf("\nALL Intra_8x8 luma PASS (3 modes — V, H, DC)\n");
/* Directional modes — uniform-context sanity tests. With all
* neighbours = N, the 1-2-1 filter produces uniform N, and any
* 3-tap / 2-tap on uniform N produces N. So every directional
* mode should output uniform N on uniform input. */
{
typedef void (*pred_fn_t)(uint8_t *dst, ptrdiff_t stride);
struct { const char *name; pred_fn_t fn; } modes[] = {
{ "DDL (mode 3, uniform)", daedalus_h264_pred_8x8l_ddl_ref },
{ "DDR (mode 4, uniform)", daedalus_h264_pred_8x8l_ddr_ref },
{ "VR (mode 5, uniform)", daedalus_h264_pred_8x8l_vr_ref },
{ "HD (mode 6, uniform)", daedalus_h264_pred_8x8l_hd_ref },
{ "VL (mode 7, uniform)", daedalus_h264_pred_8x8l_vl_ref },
{ "HU (mode 8, uniform)", daedalus_h264_pred_8x8l_hu_ref },
};
for (size_t i = 0; i < sizeof(modes)/sizeof(modes[0]); i++) {
uint8_t buf[ROWS][STRIDE];
int t[16], l[8];
for (int k = 0; k < 16; k++) t[k] = 120;
for (int k = 0; k < 8; k++) l[k] = 120;
set_ctx(buf, 120, t, l);
modes[i].fn(&buf[1][1], STRIDE);
fail |= check_uniform(buf, modes[i].name, 120);
}
}
if (fail == 0) printf("\nALL Intra_8x8 luma PASS (9 modes — V, H, DC, DDL, DDR, VR, HD, VL, HU)\n");
else fprintf(stderr, "\n%d test(s) FAILED\n", fail);
return fail ? 1 : 0;
}