From c4797b1dbfb99760b8134b7c2fd8dcd4812a55ae Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Thu, 7 May 2026 19:49:57 +0200 Subject: [PATCH] bes2600: deliver RX SKBs directly into wsm_handle_rx from sdio_rx_work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch C — collapse the sdio_rx_work → rx_queue → bh_work relay into a direct call from bes2600_sdio_extract_packets into a new helper bes2600_bh_handle_rx_skb that performs the per-SKB bookkeeping previously done inside bes2600_bh_rx_helper after pipe_read. What this saves per RX frame: - two spinlock acquires on self->rx_queue->lock (skb_queue_tail in extract_packets + skb_dequeue in pipe_read) - one bh wait-queue wake-up per IRQ batch (sdio_rx_work no longer calls self->irq_handler) Pre-patch baseline on ohm (4 MB/s sender, ~5 min, srcversion 1B3B3ED0): - 387,532 RX packets, 578 MB, 1.36 MB/s observed receive - sdio_rx_work dispatched 34,994 times = 86.4/s = 90.3 per 1000 RX pkts - sdio_tx_work dispatched 111,770 times = 276.1/s - bes2600_bh_work redispatches: 0 (single long-lived work item, refutes earlier review claim of "9 events per frame") API contract for wsm_handle_rx (cited in bh.c block comment): - declared wsm.h:2108, defined wsm.c:2211, EXPORT_SYMBOL wsm.c:2463 - process context, sleepable - caller MUST hold no bes2600 spinlock; SDIO mutex is released at bes2600_sdio.c before extract_packets is called - SKB ownership: zeroes *skb_p if consumed, caller frees otherwise. bes2600_bh_handle_rx_skb frees on every path (success + error) After patch, bh thread retains responsibility for TX work. TX-confirm packets that release a TX buffer wake the bh thread via bes2600_bh_wakeup() inside bes2600_bh_handle_rx_skb (mirrors the in-bh tx=1 signaling the old bh_rx_helper used to do). Early-boot IRQs before fw_started are still served via the GPIO IRQ handler's fallback self->irq_handler path; that branch is unchanged. Minimum-diff scope: - struct sbus_priv->rx_queue + ->rx_queue_lock are still defined and initialised (allocated but never used after this patch). Removed in a follow-up hygiene patch, not bundled here. - bes2600_sdio_pipe_read still exists; returns NULL after this patch (rx_queue always empty). Removed in same follow-up. - bes2600_bh_rx_helper still exists and still calls pipe_read; it now always returns 0. Wasted 1 spinlock + skb_dequeue per bh wake; quantified in Phase 7 to decide whether worth a follow-up. Awaiting Phase 7 verification on ohm. Per Phase 4 plan §4.5 predicted delta: - rx_queue->lock acquire rate: 1914/s -> 0 (high confidence) - _raw_spin_unlock_irqrestore CPU%: 20% -> 12-15% (medium) - observed RX KB/s @ 4 MB/s: floor lifts toward >= 1 MB/s sustained (medium; rep 3's 75 KB/s death has multiple causes) Phase 7 will N=3 ramp 1 -> 2 -> 4 -> 8 MB/s with identical instrumentation pre/post. See notes/patch-c-phase4-plan-2026-05-07.md in marfrit/besser for the plan; this commit is the Phase 6 deliverable. --- bes2600/bes2600_sdio.c | 37 ++++++++++---- bes2600/bh.c | 109 +++++++++++++++++++++++++++++++++++++++++ bes2600/bh.h | 7 +++ 3 files changed, 144 insertions(+), 9 deletions(-) diff --git a/bes2600/bes2600_sdio.c b/bes2600/bes2600_sdio.c index b998381..c8dfb89 100644 --- a/bes2600/bes2600_sdio.c +++ b/bes2600/bes2600_sdio.c @@ -29,6 +29,7 @@ #include #include "bes2600.h" +#include "bh.h" #include "sbus.h" #include "bes2600_plat.h" #include "bes2600_factory.h" @@ -812,10 +813,23 @@ static int bes2600_sdio_extract_packets(struct sbus_priv *self, u32 ctrl_reg, u8 skb_put(skb, packet_len); memcpy(skb->data, &data[pos], packet_len); bes_devel("%s, %d,%d\n", __func__, packet_len, pos); - spin_lock(&self->rx_queue_lock); - skb_queue_tail(&self->rx_queue, skb); self->rx_data_cnt++; - spin_unlock(&self->rx_queue_lock); + /* + * Patch C: deliver SKB directly into the WSM/mac80211 stack + * instead of skb_queue_tail-ing onto self->rx_queue for later + * pickup by the bh thread. Removes two spinlock acquires + * (rx_queue->lock at queue-tail + at dequeue) per RX frame + * and one bh wait-queue wake-up per IRQ batch. + * + * bes2600_bh_handle_rx_skb owns the SKB on every path. + * Contract: process context, sleepable, caller holds no + * bes2600 spinlock. See bh.c for the contract block. + * + * Pre-condition satisfied here: bes2600_sdio_unlock(self) + * was called at the bottom of the SDIO read sequence in + * sdio_rx_work, so we hold no bes2600 mutex either. + */ + bes2600_bh_handle_rx_skb(self->core, skb); packet_len = (packet_len + 3) & (~0x3); pos += packet_len; #ifdef BES_SDIO_OPTIMIZED_LEN @@ -898,12 +912,17 @@ static void sdio_rx_work(struct work_struct *work) ctrl_reg = 0; - if (likely(self->irq_handler)) { - self->irq_handler(self->irq_priv); - } else { - bes_err("%s,%d\n", __func__, __LINE__); - goto failed; - } + /* + * Patch C: with direct delivery in extract_packets, the bh + * thread no longer drives RX consumption — there is no + * rx_queue to drain. Calling self->irq_handler() here would + * wake the bh thread for nothing on every IRQ batch. TX + * wakes still flow through bes2600_bh_wakeup() from TX + * submitters and from bes2600_bh_handle_rx_skb when a + * confirm releases a TX buffer; early-boot IRQs (before + * fw_started) still go through self->irq_handler from the + * GPIO IRQ handler's fallback branch. + */ } while (again); diff --git a/bes2600/bh.c b/bes2600/bh.c index 6385312..00c6832 100644 --- a/bes2600/bh.c +++ b/bes2600/bh.c @@ -958,6 +958,115 @@ static void bes2600_bh_parse_wakeup_event(struct bes2600_common *hw_priv, struct } } +/* + * Direct-deliver an RX SKB into the WSM/mac80211 stack. + * + * Patch C (sdio_rx_work direct delivery): this function does the + * per-SKB bookkeeping (sequence-number check, exception handling, + * tx-confirm accounting, mac80211 hand-off via wsm_handle_rx) that + * previously ran inside bes2600_bh_rx_helper after pipe_read dequeued + * an SKB from sbus_priv->rx_queue. It is now called inline from + * bes2600_sdio_extract_packets, eliminating the queue + bh-wakeup + * relay (one wait-queue wake-up + two rx_queue->lock acquires per + * RX frame). + * + * Contract: + * - process context, sleepable. wsm_handle_rx (wsm.c:2211, exported + * at wsm.c:2463) acquires wsm_cmd.lock, may call into mac80211 + * and may sleep on wait_event_timeout (wsm.c:2036, 2091). + * - caller MUST hold no bes2600 spinlock. Reference precedent: + * bes2600_bh_rx_helper (this file) called from the bh thread. + * The SDIO mutex is released at bes2600_sdio.c before + * extract_packets is called, so this is satisfied. + * - SKB ownership: function frees on every path (success and error). + * - Returns 0 on success, negative on error. When the SKB carries + * a confirm that releases a TX buffer, the function asynchronously + * wakes the bh thread to drain TX (matches the in-bh tx=1 + * signaling that bh_rx_helper used). + */ +int bes2600_bh_handle_rx_skb(struct bes2600_common *priv, struct sk_buff *skb) +{ + struct wsm_hdr *wsm; + size_t wsm_len; + u16 wsm_id; + u8 wsm_seq; + int tx = 0; + u32 confirm_label = 0x0; + + if (!skb) + return 0; + + wsm = (struct wsm_hdr *)skb->data; + wsm_len = __le16_to_cpu(wsm->len); + if (WARN_ON(wsm_len > skb->len)) { + bes_err("wsm_len err %d %d\n", (int)wsm_len, (int)skb->len); + dev_kfree_skb(skb); + return -1; + } + + if (priv->wsm_enable_wsm_dumps) + print_hex_dump(KERN_DEBUG, "<-- ", DUMP_PREFIX_NONE, 16, 1, + skb->data, wsm_len, false); + + wsm_id = __le16_to_cpu(wsm->id) & 0xFFF; + wsm_seq = (__le16_to_cpu(wsm->id) >> 13) & 7; + bes_devel("bes2600_bh_handle_rx_skb wsm_id:0x%04x seq:%d\n", + wsm_id, wsm_seq); + + skb_trim(skb, wsm_len); + + if (wsm_id == 0x0800) { + wsm_handle_exception(priv, + &skb->data[sizeof(*wsm)], + wsm_len - sizeof(*wsm)); + bes_err("wsm exception\n"); + dev_kfree_skb(skb); + return -1; + } else if ((wsm_seq != priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)])) { + bes_err("seq error! %u. %u. 0x%x.", wsm_seq, + priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)], wsm_id); + dev_kfree_skb(skb); + return -1; + } + + bes2600_bh_parse_wakeup_event(priv, skb); + + priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)] = (wsm_seq + 1) & 7; + + if (IS_DRIVER_TO_MCU_CMD(wsm_id)) + confirm_label = __le32_to_cpu(((struct wsm_mcu_hdr *)wsm)->handle_label); + + if (WSM_CONFIRM_CONDITION(wsm_id, confirm_label)) { + int rc = wsm_release_tx_buffer(priv, 1); + bes2600_bh_dec_pending_count(priv, WSM_TXRX_SEQ_IDX(wsm->id)); + + if (rc < 0) { + bes_err("wsm_release_tx_buffer failed: %d\n", rc); + dev_kfree_skb(skb); + return rc; + } else if (rc > 0) { + tx = 1; + } + } + + /* wsm_handle_rx takes care of SKB lifetime: zeroes *skb_p if consumed. */ + if (wsm_handle_rx(priv, wsm_id, wsm, &skb)) { + bes_err("wsm_handle_rx failed (id=0x%04x)\n", wsm_id); + if (skb) + dev_kfree_skb(skb); + return -1; + } + + if (skb) + dev_kfree_skb(skb); + + if (tx) + bes2600_bh_wakeup(priv); + + return 0; +} +EXPORT_SYMBOL(bes2600_bh_handle_rx_skb); + static int bes2600_bh_rx_helper(struct bes2600_common *priv, int *tx) { struct sk_buff *skb = NULL; diff --git a/bes2600/bh.h b/bes2600/bh.h index 7be82dc..032b9d8 100644 --- a/bes2600/bh.h +++ b/bes2600/bh.h @@ -36,6 +36,13 @@ void bes2600_enable_powersave(struct bes2600_vif *priv, int wsm_release_tx_buffer(struct bes2600_common *hw_priv, int count); int wsm_release_vif_tx_buffer(struct bes2600_common *hw_priv, int if_id, int count); +/* + * Direct-deliver an RX SKB into the WSM/mac80211 stack. + * Process context, sleepable, caller holds no bes2600 spinlock. + * Function frees skb on every path. See bh.c for full contract. + */ +int bes2600_bh_handle_rx_skb(struct bes2600_common *hw_priv, + struct sk_buff *skb); int bes2600_bh_sw_process(struct bes2600_common *hw_priv, struct wsm_tx_confirm *tx_confirm);