bes2600: deliver RX SKBs directly into wsm_handle_rx from sdio_rx_work

Patch C — collapse the sdio_rx_work → rx_queue → bh_work relay into a
direct call from bes2600_sdio_extract_packets into a new helper
bes2600_bh_handle_rx_skb that performs the per-SKB bookkeeping
previously done inside bes2600_bh_rx_helper after pipe_read.

What this saves per RX frame:
  - two spinlock acquires on self->rx_queue->lock
    (skb_queue_tail in extract_packets + skb_dequeue in pipe_read)
  - one bh wait-queue wake-up per IRQ batch
    (sdio_rx_work no longer calls self->irq_handler)

Pre-patch baseline on ohm (4 MB/s sender, ~5 min, srcversion 1B3B3ED0):
  - 387,532 RX packets, 578 MB, 1.36 MB/s observed receive
  - sdio_rx_work dispatched 34,994 times = 86.4/s = 90.3 per 1000 RX pkts
  - sdio_tx_work dispatched 111,770 times = 276.1/s
  - bes2600_bh_work redispatches: 0  (single long-lived work item,
    refutes earlier review claim of "9 events per frame")

API contract for wsm_handle_rx (cited in bh.c block comment):
  - declared wsm.h:2108, defined wsm.c:2211, EXPORT_SYMBOL wsm.c:2463
  - process context, sleepable
  - caller MUST hold no bes2600 spinlock; SDIO mutex is released at
    bes2600_sdio.c before extract_packets is called
  - SKB ownership: zeroes *skb_p if consumed, caller frees otherwise.
    bes2600_bh_handle_rx_skb frees on every path (success + error)

After patch, bh thread retains responsibility for TX work.  TX-confirm
packets that release a TX buffer wake the bh thread via
bes2600_bh_wakeup() inside bes2600_bh_handle_rx_skb (mirrors the in-bh
tx=1 signaling the old bh_rx_helper used to do).  Early-boot IRQs
before fw_started are still served via the GPIO IRQ handler's fallback
self->irq_handler path; that branch is unchanged.

Minimum-diff scope:
  - struct sbus_priv->rx_queue + ->rx_queue_lock are still defined and
    initialised (allocated but never used after this patch).  Removed
    in a follow-up hygiene patch, not bundled here.
  - bes2600_sdio_pipe_read still exists; returns NULL after this patch
    (rx_queue always empty).  Removed in same follow-up.
  - bes2600_bh_rx_helper still exists and still calls pipe_read; it
    now always returns 0.  Wasted 1 spinlock + skb_dequeue per bh wake;
    quantified in Phase 7 to decide whether worth a follow-up.

Awaiting Phase 7 verification on ohm.  Per Phase 4 plan §4.5 predicted
delta:
  - rx_queue->lock acquire rate: 1914/s -> 0  (high confidence)
  - _raw_spin_unlock_irqrestore CPU%: 20% -> 12-15%  (medium)
  - observed RX KB/s @ 4 MB/s: floor lifts toward >= 1 MB/s sustained
    (medium; rep 3's 75 KB/s death has multiple causes)

Phase 7 will N=3 ramp 1 -> 2 -> 4 -> 8 MB/s with identical
instrumentation pre/post.  See notes/patch-c-phase4-plan-2026-05-07.md
in marfrit/besser for the plan; this commit is the Phase 6 deliverable.
This commit is contained in:
2026-05-07 19:49:57 +02:00
parent 90f50b375f
commit c4797b1dbf
3 changed files with 144 additions and 9 deletions
+28 -9
View File
@@ -29,6 +29,7 @@
#include <linux/of_gpio.h> #include <linux/of_gpio.h>
#include "bes2600.h" #include "bes2600.h"
#include "bh.h"
#include "sbus.h" #include "sbus.h"
#include "bes2600_plat.h" #include "bes2600_plat.h"
#include "bes2600_factory.h" #include "bes2600_factory.h"
@@ -812,10 +813,23 @@ static int bes2600_sdio_extract_packets(struct sbus_priv *self, u32 ctrl_reg, u8
skb_put(skb, packet_len); skb_put(skb, packet_len);
memcpy(skb->data, &data[pos], packet_len); memcpy(skb->data, &data[pos], packet_len);
bes_devel("%s, %d,%d\n", __func__, packet_len, pos); bes_devel("%s, %d,%d\n", __func__, packet_len, pos);
spin_lock(&self->rx_queue_lock);
skb_queue_tail(&self->rx_queue, skb);
self->rx_data_cnt++; self->rx_data_cnt++;
spin_unlock(&self->rx_queue_lock); /*
* Patch C: deliver SKB directly into the WSM/mac80211 stack
* instead of skb_queue_tail-ing onto self->rx_queue for later
* pickup by the bh thread. Removes two spinlock acquires
* (rx_queue->lock at queue-tail + at dequeue) per RX frame
* and one bh wait-queue wake-up per IRQ batch.
*
* bes2600_bh_handle_rx_skb owns the SKB on every path.
* Contract: process context, sleepable, caller holds no
* bes2600 spinlock. See bh.c for the contract block.
*
* Pre-condition satisfied here: bes2600_sdio_unlock(self)
* was called at the bottom of the SDIO read sequence in
* sdio_rx_work, so we hold no bes2600 mutex either.
*/
bes2600_bh_handle_rx_skb(self->core, skb);
packet_len = (packet_len + 3) & (~0x3); packet_len = (packet_len + 3) & (~0x3);
pos += packet_len; pos += packet_len;
#ifdef BES_SDIO_OPTIMIZED_LEN #ifdef BES_SDIO_OPTIMIZED_LEN
@@ -898,12 +912,17 @@ static void sdio_rx_work(struct work_struct *work)
ctrl_reg = 0; ctrl_reg = 0;
if (likely(self->irq_handler)) { /*
self->irq_handler(self->irq_priv); * Patch C: with direct delivery in extract_packets, the bh
} else { * thread no longer drives RX consumption — there is no
bes_err("%s,%d\n", __func__, __LINE__); * rx_queue to drain. Calling self->irq_handler() here would
goto failed; * wake the bh thread for nothing on every IRQ batch. TX
} * wakes still flow through bes2600_bh_wakeup() from TX
* submitters and from bes2600_bh_handle_rx_skb when a
* confirm releases a TX buffer; early-boot IRQs (before
* fw_started) still go through self->irq_handler from the
* GPIO IRQ handler's fallback branch.
*/
} while (again); } while (again);
+109
View File
@@ -958,6 +958,115 @@ static void bes2600_bh_parse_wakeup_event(struct bes2600_common *hw_priv, struct
} }
} }
/*
* Direct-deliver an RX SKB into the WSM/mac80211 stack.
*
* Patch C (sdio_rx_work direct delivery): this function does the
* per-SKB bookkeeping (sequence-number check, exception handling,
* tx-confirm accounting, mac80211 hand-off via wsm_handle_rx) that
* previously ran inside bes2600_bh_rx_helper after pipe_read dequeued
* an SKB from sbus_priv->rx_queue. It is now called inline from
* bes2600_sdio_extract_packets, eliminating the queue + bh-wakeup
* relay (one wait-queue wake-up + two rx_queue->lock acquires per
* RX frame).
*
* Contract:
* - process context, sleepable. wsm_handle_rx (wsm.c:2211, exported
* at wsm.c:2463) acquires wsm_cmd.lock, may call into mac80211
* and may sleep on wait_event_timeout (wsm.c:2036, 2091).
* - caller MUST hold no bes2600 spinlock. Reference precedent:
* bes2600_bh_rx_helper (this file) called from the bh thread.
* The SDIO mutex is released at bes2600_sdio.c before
* extract_packets is called, so this is satisfied.
* - SKB ownership: function frees on every path (success and error).
* - Returns 0 on success, negative on error. When the SKB carries
* a confirm that releases a TX buffer, the function asynchronously
* wakes the bh thread to drain TX (matches the in-bh tx=1
* signaling that bh_rx_helper used).
*/
int bes2600_bh_handle_rx_skb(struct bes2600_common *priv, struct sk_buff *skb)
{
struct wsm_hdr *wsm;
size_t wsm_len;
u16 wsm_id;
u8 wsm_seq;
int tx = 0;
u32 confirm_label = 0x0;
if (!skb)
return 0;
wsm = (struct wsm_hdr *)skb->data;
wsm_len = __le16_to_cpu(wsm->len);
if (WARN_ON(wsm_len > skb->len)) {
bes_err("wsm_len err %d %d\n", (int)wsm_len, (int)skb->len);
dev_kfree_skb(skb);
return -1;
}
if (priv->wsm_enable_wsm_dumps)
print_hex_dump(KERN_DEBUG, "<-- ", DUMP_PREFIX_NONE, 16, 1,
skb->data, wsm_len, false);
wsm_id = __le16_to_cpu(wsm->id) & 0xFFF;
wsm_seq = (__le16_to_cpu(wsm->id) >> 13) & 7;
bes_devel("bes2600_bh_handle_rx_skb wsm_id:0x%04x seq:%d\n",
wsm_id, wsm_seq);
skb_trim(skb, wsm_len);
if (wsm_id == 0x0800) {
wsm_handle_exception(priv,
&skb->data[sizeof(*wsm)],
wsm_len - sizeof(*wsm));
bes_err("wsm exception\n");
dev_kfree_skb(skb);
return -1;
} else if ((wsm_seq != priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)])) {
bes_err("seq error! %u. %u. 0x%x.", wsm_seq,
priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)], wsm_id);
dev_kfree_skb(skb);
return -1;
}
bes2600_bh_parse_wakeup_event(priv, skb);
priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)] = (wsm_seq + 1) & 7;
if (IS_DRIVER_TO_MCU_CMD(wsm_id))
confirm_label = __le32_to_cpu(((struct wsm_mcu_hdr *)wsm)->handle_label);
if (WSM_CONFIRM_CONDITION(wsm_id, confirm_label)) {
int rc = wsm_release_tx_buffer(priv, 1);
bes2600_bh_dec_pending_count(priv, WSM_TXRX_SEQ_IDX(wsm->id));
if (rc < 0) {
bes_err("wsm_release_tx_buffer failed: %d\n", rc);
dev_kfree_skb(skb);
return rc;
} else if (rc > 0) {
tx = 1;
}
}
/* wsm_handle_rx takes care of SKB lifetime: zeroes *skb_p if consumed. */
if (wsm_handle_rx(priv, wsm_id, wsm, &skb)) {
bes_err("wsm_handle_rx failed (id=0x%04x)\n", wsm_id);
if (skb)
dev_kfree_skb(skb);
return -1;
}
if (skb)
dev_kfree_skb(skb);
if (tx)
bes2600_bh_wakeup(priv);
return 0;
}
EXPORT_SYMBOL(bes2600_bh_handle_rx_skb);
static int bes2600_bh_rx_helper(struct bes2600_common *priv, int *tx) static int bes2600_bh_rx_helper(struct bes2600_common *priv, int *tx)
{ {
struct sk_buff *skb = NULL; struct sk_buff *skb = NULL;
+7
View File
@@ -36,6 +36,13 @@ void bes2600_enable_powersave(struct bes2600_vif *priv,
int wsm_release_tx_buffer(struct bes2600_common *hw_priv, int count); int wsm_release_tx_buffer(struct bes2600_common *hw_priv, int count);
int wsm_release_vif_tx_buffer(struct bes2600_common *hw_priv, int if_id, int wsm_release_vif_tx_buffer(struct bes2600_common *hw_priv, int if_id,
int count); int count);
/*
* Direct-deliver an RX SKB into the WSM/mac80211 stack.
* Process context, sleepable, caller holds no bes2600 spinlock.
* Function frees skb on every path. See bh.c for full contract.
*/
int bes2600_bh_handle_rx_skb(struct bes2600_common *hw_priv,
struct sk_buff *skb);
int bes2600_bh_sw_process(struct bes2600_common *hw_priv, int bes2600_bh_sw_process(struct bes2600_common *hw_priv,
struct wsm_tx_confirm *tx_confirm); struct wsm_tx_confirm *tx_confirm);