diff --git a/CMakeLists.txt b/CMakeLists.txt
index 38ced99..971a63e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -525,6 +525,7 @@ add_executable(test_api_h264
     tests/h264_qpel8_mc20_ref.c
     tests/h264_qpel8_mc02_ref.c
     tests/h264_qpel8_mc22_ref.c
+    tests/h264_qpel8_quarter_axis_ref.c
 )
 target_link_libraries(test_api_h264 PRIVATE daedalus_core)
 target_compile_options(test_api_h264 PRIVATE -O2)
diff --git a/include/daedalus.h b/include/daedalus.h
index d827d3e..46f9fce 100644
--- a/include/daedalus.h
+++ b/include/daedalus.h
@@ -436,6 +436,45 @@ int daedalus_dispatch_h264_qpel_mc22(daedalus_ctx *ctx, daedalus_substrate sub,
     uint8_t *dst, const uint8_t *src, size_t stride,
     size_t n_blocks, const daedalus_h264_qpel_meta *meta);
 
+/* H.264 luma single-axis quarter-pel qpel positions ("put"):
+ *   mc10  ¼-H ("a" position): clip255(mc20(s)) avg src[r,c]
+ *   mc30  ¾-H ("c" position): clip255(mc20(s)) avg src[r,c+1]
+ *   mc01  ¼-V ("d" position): clip255(mc02(s)) avg src[r,c]
+ *   mc03  ¾-V ("n" position): clip255(mc02(s)) avg src[r+1,c]
+ *
+ * Each is a half-pel lowpass clipped to u8 then averaged with an
+ * integer-aligned source pixel (rounded +1 >> 1).  Same edge
+ * context contract as mc20/mc02.  CPU-only for now; QPU shaders
+ * not yet implemented.  Explicit SUBSTRATE_QPU returns -1.
+ */
+int daedalus_recipe_dispatch_h264_qpel_mc10(daedalus_ctx *ctx,
+    uint8_t *dst, const uint8_t *src, size_t stride,
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta);
+int daedalus_dispatch_h264_qpel_mc10(daedalus_ctx *ctx, daedalus_substrate sub,
+    uint8_t *dst, const uint8_t *src, size_t stride,
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta);
+
+int daedalus_recipe_dispatch_h264_qpel_mc30(daedalus_ctx *ctx,
+    uint8_t *dst, const uint8_t *src, size_t stride,
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta);
+int daedalus_dispatch_h264_qpel_mc30(daedalus_ctx *ctx, daedalus_substrate sub,
+    uint8_t *dst, const uint8_t *src, size_t stride,
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta);
+
+int daedalus_recipe_dispatch_h264_qpel_mc01(daedalus_ctx *ctx,
+    uint8_t *dst, const uint8_t *src, size_t stride,
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta);
+int daedalus_dispatch_h264_qpel_mc01(daedalus_ctx *ctx, daedalus_substrate sub,
+    uint8_t *dst, const uint8_t *src, size_t stride,
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta);
+
+int daedalus_recipe_dispatch_h264_qpel_mc03(daedalus_ctx *ctx,
+    uint8_t *dst, const uint8_t *src, size_t stride,
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta);
+int daedalus_dispatch_h264_qpel_mc03(daedalus_ctx *ctx, daedalus_substrate sub,
+    uint8_t *dst, const uint8_t *src, size_t stride,
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta);
+
 /* -------------------------------------------------------------------
  * Recipe query — what does the API recommend for each kernel?
  * ----------------------------------------------------------------- */
@@ -458,6 +497,10 @@ typedef enum {
     DAEDALUS_KERNEL_H264_DEBLOCK_CH_INTRA = 16,
     DAEDALUS_KERNEL_H264_QPEL_MC02        = 17,
     DAEDALUS_KERNEL_H264_QPEL_MC22        = 18,
+    DAEDALUS_KERNEL_H264_QPEL_MC10        = 19,
+    DAEDALUS_KERNEL_H264_QPEL_MC30        = 20,
+    DAEDALUS_KERNEL_H264_QPEL_MC01        = 21,
+    DAEDALUS_KERNEL_H264_QPEL_MC03        = 22,
 } daedalus_kernel;
 
 daedalus_substrate daedalus_recipe_substrate_for(daedalus_kernel k);
diff --git a/src/daedalus_core.c b/src/daedalus_core.c
index 45ecff5..69edcc1 100644
--- a/src/daedalus_core.c
+++ b/src/daedalus_core.c
@@ -140,6 +140,10 @@ daedalus_substrate daedalus_recipe_substrate_for(daedalus_kernel k)
     case DAEDALUS_KERNEL_H264_QPEL_MC20:   return DAEDALUS_SUBSTRATE_QPU;	/* v3d_h264_qpel_mc20.spv */
     case DAEDALUS_KERNEL_H264_QPEL_MC02:   return DAEDALUS_SUBSTRATE_CPU;	/* QPU mc02 shader pending */
     case DAEDALUS_KERNEL_H264_QPEL_MC22:   return DAEDALUS_SUBSTRATE_CPU;	/* QPU mc22 shader pending (hv lowpass) */
+    case DAEDALUS_KERNEL_H264_QPEL_MC10:   return DAEDALUS_SUBSTRATE_CPU;	/* ¼-H L2 */
+    case DAEDALUS_KERNEL_H264_QPEL_MC30:   return DAEDALUS_SUBSTRATE_CPU;	/* ¾-H L2 */
+    case DAEDALUS_KERNEL_H264_QPEL_MC01:   return DAEDALUS_SUBSTRATE_CPU;	/* ¼-V L2 */
+    case DAEDALUS_KERNEL_H264_QPEL_MC03:   return DAEDALUS_SUBSTRATE_CPU;	/* ¾-V L2 */
     }
     return DAEDALUS_SUBSTRATE_CPU;
 }
@@ -184,6 +188,14 @@ extern void ff_put_h264_qpel8_mc02_neon(uint8_t *dst, const uint8_t *src,
                                          ptrdiff_t stride);
 extern void ff_put_h264_qpel8_mc22_neon(uint8_t *dst, const uint8_t *src,
                                          ptrdiff_t stride);
+extern void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+extern void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+extern void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+extern void ff_put_h264_qpel8_mc03_neon(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
 
 /* -------------------- CPU dispatch implementations -------------- */
 
@@ -437,6 +449,28 @@ static int dispatch_h264_qpel_mc22_cpu(daedalus_ctx *ctx,
     return 0;
 }
 
+/* The four single-axis quarter-pel CPU dispatches are uniform; the
+ * macro collapses ~50 LOC of repetition. */
+#define DEFINE_QPEL_CPU_DISPATCH(suffix, neon_fn)                              \
+static int dispatch_h264_qpel_ ## suffix ## _cpu(daedalus_ctx *ctx,            \
+    uint8_t *dst, const uint8_t *src, size_t stride,                           \
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta)                      \
+{                                                                              \
+    (void) ctx;                                                                \
+    for (size_t i = 0; i < n_blocks; i++) {                                    \
+        neon_fn(dst + meta[i].dst_off, src + meta[i].src_off,                  \
+                (ptrdiff_t) stride);                                           \
+    }                                                                          \
+    return 0;                                                                  \
+}
+
+DEFINE_QPEL_CPU_DISPATCH(mc10, ff_put_h264_qpel8_mc10_neon)
+DEFINE_QPEL_CPU_DISPATCH(mc30, ff_put_h264_qpel8_mc30_neon)
+DEFINE_QPEL_CPU_DISPATCH(mc01, ff_put_h264_qpel8_mc01_neon)
+DEFINE_QPEL_CPU_DISPATCH(mc03, ff_put_h264_qpel8_mc03_neon)
+
+#undef DEFINE_QPEL_CPU_DISPATCH
+
 /* -------------------- IDCT QPU dispatch (cycle 1 v4 shader) ---- */
 
 typedef struct {
@@ -1436,6 +1470,28 @@ int daedalus_dispatch_h264_qpel_mc22(daedalus_ctx *ctx, daedalus_substrate sub,
     return dispatch_h264_qpel_mc22_cpu(ctx, dst, src, stride, n_blocks, meta);
 }
 
+#define DEFINE_QPEL_DISPATCH(suffix, kernel)                                   \
+int daedalus_dispatch_h264_qpel_ ## suffix(daedalus_ctx *ctx,                  \
+    daedalus_substrate sub, uint8_t *dst, const uint8_t *src, size_t stride,   \
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta)                      \
+{                                                                              \
+    daedalus_substrate eff = sub;                                              \
+    if (eff == DAEDALUS_SUBSTRATE_AUTO)                                        \
+        eff = daedalus_recipe_substrate_for(kernel);                           \
+    if (eff == DAEDALUS_SUBSTRATE_QPU && !daedalus_ctx_has_qpu(ctx))           \
+        eff = DAEDALUS_SUBSTRATE_CPU;                                          \
+    if (eff == DAEDALUS_SUBSTRATE_QPU) return -1;                              \
+    return dispatch_h264_qpel_ ## suffix ## _cpu(ctx, dst, src, stride,        \
+                                                  n_blocks, meta);             \
+}
+
+DEFINE_QPEL_DISPATCH(mc10, DAEDALUS_KERNEL_H264_QPEL_MC10)
+DEFINE_QPEL_DISPATCH(mc30, DAEDALUS_KERNEL_H264_QPEL_MC30)
+DEFINE_QPEL_DISPATCH(mc01, DAEDALUS_KERNEL_H264_QPEL_MC01)
+DEFINE_QPEL_DISPATCH(mc03, DAEDALUS_KERNEL_H264_QPEL_MC03)
+
+#undef DEFINE_QPEL_DISPATCH
+
 /* -------------------- Recipe convenience wrappers --------------- */
 
 int daedalus_recipe_dispatch_vp9_idct8(daedalus_ctx *ctx,
@@ -1570,3 +1626,19 @@ int daedalus_recipe_dispatch_h264_qpel_mc22(daedalus_ctx *ctx,
     return daedalus_dispatch_h264_qpel_mc22(ctx, DAEDALUS_SUBSTRATE_AUTO,
                                              dst, src, stride, n_blocks, meta);
 }
+
+#define DEFINE_QPEL_RECIPE(suffix)                                             \
+int daedalus_recipe_dispatch_h264_qpel_ ## suffix(daedalus_ctx *ctx,           \
+    uint8_t *dst, const uint8_t *src, size_t stride,                           \
+    size_t n_blocks, const daedalus_h264_qpel_meta *meta)                      \
+{                                                                              \
+    return daedalus_dispatch_h264_qpel_ ## suffix(ctx, DAEDALUS_SUBSTRATE_AUTO,\
+                                                   dst, src, stride, n_blocks, meta); \
+}
+
+DEFINE_QPEL_RECIPE(mc10)
+DEFINE_QPEL_RECIPE(mc30)
+DEFINE_QPEL_RECIPE(mc01)
+DEFINE_QPEL_RECIPE(mc03)
+
+#undef DEFINE_QPEL_RECIPE
diff --git a/tests/h264_qpel8_quarter_axis_ref.c b/tests/h264_qpel8_quarter_axis_ref.c
new file mode 100644
index 0000000..e581006
--- /dev/null
+++ b/tests/h264_qpel8_quarter_axis_ref.c
@@ -0,0 +1,82 @@
+/*
+ * Standalone bit-exact C references for the four single-axis quarter-
+ * pel luma qpel positions (H.264 §8.4.2.2.1, "put" variants).  Each
+ * is a half-pel lowpass clipped to u8 followed by an L2 rounded-average
+ * with an integer-position source pixel.
+ *
+ *   mc10 ("a" pos, ¼ horiz): a = clip255(mc20(s)); dst = (a + s[r,c]   + 1) >> 1
+ *   mc30 ("c" pos, ¾ horiz): a = clip255(mc20(s)); dst = (a + s[r,c+1] + 1) >> 1
+ *   mc01 ("d" pos, ¼ vert ): a = clip255(mc02(s)); dst = (a + s[r,  c] + 1) >> 1
+ *   mc03 ("n" pos, ¾ vert ): a = clip255(mc02(s)); dst = (a + s[r+1,c] + 1) >> 1
+ *
+ * Mirror FFmpeg's `ff_put_h264_qpel8_mc{10,30,01,03}_neon` (in
+ * external/ffmpeg-snapshot/libavcodec/aarch64/h264qpel_neon.S
+ * lines 587, 603, 611, 729 — each tail-calls the corresponding
+ * lowpass_l2 helper).
+ *
+ * Same single-stride convention as mc20/mc02 — dst and src share the
+ * same stride; src + src_off points at row 0 col 0 of the output
+ * block, with appropriate edge context already in-buffer.
+ *
+ * License: LGPL-2.1-or-later.
+ */
+#include <stdint.h>
+#include <stddef.h>
+
+static inline int clip_u8(int v) { return v < 0 ? 0 : v > 255 ? 255 : v; }
+
+/* Compute one horizontal half-pel pixel at (r, c) — same as mc20. */
+static inline uint8_t hpel_h(const uint8_t *s, int r, int c, ptrdiff_t stride)
+{
+    int v = (int) s[r*stride + c-2] - 5 * (int) s[r*stride + c-1]
+          + 20 * (int) s[r*stride + c] + 20 * (int) s[r*stride + c+1]
+          - 5 * (int) s[r*stride + c+2] + (int) s[r*stride + c+3]
+          + 16;
+    return (uint8_t) clip_u8(v >> 5);
+}
+
+/* Compute one vertical half-pel pixel at (r, c) — same as mc02. */
+static inline uint8_t hpel_v(const uint8_t *s, int r, int c, ptrdiff_t stride)
+{
+    int v = (int) s[(r-2)*stride + c] - 5 * (int) s[(r-1)*stride + c]
+          + 20 * (int) s[r*stride + c] + 20 * (int) s[(r+1)*stride + c]
+          - 5 * (int) s[(r+2)*stride + c] + (int) s[(r+3)*stride + c]
+          + 16;
+    return (uint8_t) clip_u8(v >> 5);
+}
+
+void daedalus_put_h264_qpel8_mc10_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
+{
+    for (int r = 0; r < 8; r++)
+        for (int c = 0; c < 8; c++) {
+            uint8_t a = hpel_h(src, r, c, stride);
+            dst[r*stride + c] = (uint8_t) ((a + src[r*stride + c    ] + 1) >> 1);
+        }
+}
+
+void daedalus_put_h264_qpel8_mc30_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
+{
+    for (int r = 0; r < 8; r++)
+        for (int c = 0; c < 8; c++) {
+            uint8_t a = hpel_h(src, r, c, stride);
+            dst[r*stride + c] = (uint8_t) ((a + src[r*stride + c + 1] + 1) >> 1);
+        }
+}
+
+void daedalus_put_h264_qpel8_mc01_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
+{
+    for (int r = 0; r < 8; r++)
+        for (int c = 0; c < 8; c++) {
+            uint8_t a = hpel_v(src, r, c, stride);
+            dst[r*stride + c] = (uint8_t) ((a + src[(r    )*stride + c] + 1) >> 1);
+        }
+}
+
+void daedalus_put_h264_qpel8_mc03_ref(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
+{
+    for (int r = 0; r < 8; r++)
+        for (int c = 0; c < 8; c++) {
+            uint8_t a = hpel_v(src, r, c, stride);
+            dst[r*stride + c] = (uint8_t) ((a + src[(r + 1)*stride + c] + 1) >> 1);
+        }
+}
diff --git a/tests/test_api_h264.c b/tests/test_api_h264.c
index 275a556..370f34b 100644
--- a/tests/test_api_h264.c
+++ b/tests/test_api_h264.c
@@ -36,6 +36,14 @@ extern void daedalus_put_h264_qpel8_mc02_ref(uint8_t *dst, const uint8_t *src,
                                                 ptrdiff_t stride);
 extern void daedalus_put_h264_qpel8_mc22_ref(uint8_t *dst, const uint8_t *src,
                                                 ptrdiff_t stride);
+extern void daedalus_put_h264_qpel8_mc10_ref(uint8_t *dst, const uint8_t *src,
+                                                ptrdiff_t stride);
+extern void daedalus_put_h264_qpel8_mc30_ref(uint8_t *dst, const uint8_t *src,
+                                                ptrdiff_t stride);
+extern void daedalus_put_h264_qpel8_mc01_ref(uint8_t *dst, const uint8_t *src,
+                                                ptrdiff_t stride);
+extern void daedalus_put_h264_qpel8_mc03_ref(uint8_t *dst, const uint8_t *src,
+                                                ptrdiff_t stride);
 extern void daedalus_put_h264_qpel8_mc20_ref(uint8_t *dst, const uint8_t *src,
                                               ptrdiff_t stride);
 
@@ -483,6 +491,63 @@ static int test_qpel_mc22(void)
     return diff == 0 ? 0 : 1;
 }
 
+/* Generic harness for the 4 single-axis quarter-pel positions; same
+ * tile geometry as mc22 since each one reads the largest of the H/V
+ * lowpass windows (mc10/mc30 need cols -2..+3, mc01/mc03 need rows
+ * -2..+3 OR +1..+3 on the integer side). */
+typedef void (*qpel_ref_fn)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+typedef int  (*qpel_dispatch_fn)(daedalus_ctx *ctx, uint8_t *dst,
+                                  const uint8_t *src, size_t stride,
+                                  size_t n_blocks, const daedalus_h264_qpel_meta *meta);
+
+static int run_quarter_axis_qpel(const char *name,
+                                  qpel_ref_fn ref, qpel_dispatch_fn dispatch)
+{
+    enum { N = 8, TILE_STRIDE = 16, TILE_ROWS = 16,
+           TILE_BYTES = TILE_ROWS * TILE_STRIDE, TOTAL = N * TILE_BYTES,
+           SRC_ROW = 3, SRC_COL = 3 };
+    daedalus_ctx *ctx = daedalus_ctx_create();
+    if (!ctx) return 1;
+
+    uint8_t src[TOTAL], dst[TOTAL], dst_ref[TOTAL];
+    daedalus_h264_qpel_meta meta[N];
+
+    for (int i = 0; i < TOTAL; i++) src[i] = (uint8_t)(xs() & 0xff);
+    memset(dst, 0, sizeof(dst));
+    memset(dst_ref, 0, sizeof(dst_ref));
+
+    for (int i = 0; i < N; i++) {
+        meta[i].src_off = (uint32_t)(i * TILE_BYTES + SRC_ROW * TILE_STRIDE + SRC_COL);
+        meta[i].dst_off = (uint32_t)(i * TILE_BYTES + SRC_ROW * TILE_STRIDE + SRC_COL);
+    }
+
+    for (int i = 0; i < N; i++)
+        ref(dst_ref + meta[i].dst_off, src + meta[i].src_off, TILE_STRIDE);
+
+    int rc = dispatch(ctx, dst, src, TILE_STRIDE, N, meta);
+    if (rc) { fprintf(stderr, "%s dispatch rc=%d\n", name, rc); return 1; }
+    int diff = 0;
+    for (int i = 0; i < TOTAL; i++) if (dst[i] != dst_ref[i]) diff++;
+    printf("  H.264 qpel %s: %d/%d bytes bit-exact (%.4f%%)\n",
+           name, TOTAL - diff, TOTAL, 100.0 * (TOTAL - diff) / TOTAL);
+    daedalus_ctx_destroy(ctx);
+    return diff == 0 ? 0 : 1;
+}
+
+static int test_qpel_quarter_axis_all(void)
+{
+    int fail = 0;
+    fail |= run_quarter_axis_qpel("mc10", daedalus_put_h264_qpel8_mc10_ref,
+                                          daedalus_recipe_dispatch_h264_qpel_mc10);
+    fail |= run_quarter_axis_qpel("mc30", daedalus_put_h264_qpel8_mc30_ref,
+                                          daedalus_recipe_dispatch_h264_qpel_mc30);
+    fail |= run_quarter_axis_qpel("mc01", daedalus_put_h264_qpel8_mc01_ref,
+                                          daedalus_recipe_dispatch_h264_qpel_mc01);
+    fail |= run_quarter_axis_qpel("mc03", daedalus_put_h264_qpel8_mc03_ref,
+                                          daedalus_recipe_dispatch_h264_qpel_mc03);
+    return fail;
+}
+
 int main(void)
 {
     printf("=== Phase 8a API smoke: H.264 kernels via recipe dispatch ===\n");
@@ -515,5 +580,6 @@ int main(void)
     fail |= test_qpel_mc20();
     fail |= test_qpel_mc02();
     fail |= test_qpel_mc22();
+    fail |= test_qpel_quarter_axis_all();
     return fail;
 }