bes2600: deliver RX SKBs directly into wsm_handle_rx from sdio_rx_work
Patch C — collapse the sdio_rx_work → rx_queue → bh_work relay into a
direct call from bes2600_sdio_extract_packets into a new helper
bes2600_bh_handle_rx_skb that performs the per-SKB bookkeeping
previously done inside bes2600_bh_rx_helper after pipe_read.
What this saves per RX frame:
- two spinlock acquires on self->rx_queue->lock
(skb_queue_tail in extract_packets + skb_dequeue in pipe_read)
- one bh wait-queue wake-up per IRQ batch
(sdio_rx_work no longer calls self->irq_handler)
Pre-patch baseline on ohm (4 MB/s sender, ~5 min, srcversion 1B3B3ED0):
- 387,532 RX packets, 578 MB, 1.36 MB/s observed receive
- sdio_rx_work dispatched 34,994 times = 86.4/s = 90.3 per 1000 RX pkts
- sdio_tx_work dispatched 111,770 times = 276.1/s
- bes2600_bh_work redispatches: 0 (single long-lived work item,
refutes earlier review claim of "9 events per frame")
API contract for wsm_handle_rx (cited in bh.c block comment):
- declared wsm.h:2108, defined wsm.c:2211, EXPORT_SYMBOL wsm.c:2463
- process context, sleepable
- caller MUST hold no bes2600 spinlock; SDIO mutex is released at
bes2600_sdio.c before extract_packets is called
- SKB ownership: zeroes *skb_p if consumed, caller frees otherwise.
bes2600_bh_handle_rx_skb frees on every path (success + error)
After patch, bh thread retains responsibility for TX work. TX-confirm
packets that release a TX buffer wake the bh thread via
bes2600_bh_wakeup() inside bes2600_bh_handle_rx_skb (mirrors the in-bh
tx=1 signaling the old bh_rx_helper used to do). Early-boot IRQs
before fw_started are still served via the GPIO IRQ handler's fallback
self->irq_handler path; that branch is unchanged.
Minimum-diff scope:
- struct sbus_priv->rx_queue + ->rx_queue_lock are still defined and
initialised (allocated but never used after this patch). Removed
in a follow-up hygiene patch, not bundled here.
- bes2600_sdio_pipe_read still exists; returns NULL after this patch
(rx_queue always empty). Removed in same follow-up.
- bes2600_bh_rx_helper still exists and still calls pipe_read; it
now always returns 0. Wasted 1 spinlock + skb_dequeue per bh wake;
quantified in Phase 7 to decide whether worth a follow-up.
Awaiting Phase 7 verification on ohm. Per Phase 4 plan §4.5 predicted
delta:
- rx_queue->lock acquire rate: 1914/s -> 0 (high confidence)
- _raw_spin_unlock_irqrestore CPU%: 20% -> 12-15% (medium)
- observed RX KB/s @ 4 MB/s: floor lifts toward >= 1 MB/s sustained
(medium; rep 3's 75 KB/s death has multiple causes)
Phase 7 will N=3 ramp 1 -> 2 -> 4 -> 8 MB/s with identical
instrumentation pre/post. See notes/patch-c-phase4-plan-2026-05-07.md
in marfrit/besser for the plan; this commit is the Phase 6 deliverable.
This commit is contained in:
+28
-9
@@ -29,6 +29,7 @@
|
||||
#include <linux/of_gpio.h>
|
||||
|
||||
#include "bes2600.h"
|
||||
#include "bh.h"
|
||||
#include "sbus.h"
|
||||
#include "bes2600_plat.h"
|
||||
#include "bes2600_factory.h"
|
||||
@@ -812,10 +813,23 @@ static int bes2600_sdio_extract_packets(struct sbus_priv *self, u32 ctrl_reg, u8
|
||||
skb_put(skb, packet_len);
|
||||
memcpy(skb->data, &data[pos], packet_len);
|
||||
bes_devel("%s, %d,%d\n", __func__, packet_len, pos);
|
||||
spin_lock(&self->rx_queue_lock);
|
||||
skb_queue_tail(&self->rx_queue, skb);
|
||||
self->rx_data_cnt++;
|
||||
spin_unlock(&self->rx_queue_lock);
|
||||
/*
|
||||
* Patch C: deliver SKB directly into the WSM/mac80211 stack
|
||||
* instead of skb_queue_tail-ing onto self->rx_queue for later
|
||||
* pickup by the bh thread. Removes two spinlock acquires
|
||||
* (rx_queue->lock at queue-tail + at dequeue) per RX frame
|
||||
* and one bh wait-queue wake-up per IRQ batch.
|
||||
*
|
||||
* bes2600_bh_handle_rx_skb owns the SKB on every path.
|
||||
* Contract: process context, sleepable, caller holds no
|
||||
* bes2600 spinlock. See bh.c for the contract block.
|
||||
*
|
||||
* Pre-condition satisfied here: bes2600_sdio_unlock(self)
|
||||
* was called at the bottom of the SDIO read sequence in
|
||||
* sdio_rx_work, so we hold no bes2600 mutex either.
|
||||
*/
|
||||
bes2600_bh_handle_rx_skb(self->core, skb);
|
||||
packet_len = (packet_len + 3) & (~0x3);
|
||||
pos += packet_len;
|
||||
#ifdef BES_SDIO_OPTIMIZED_LEN
|
||||
@@ -898,12 +912,17 @@ static void sdio_rx_work(struct work_struct *work)
|
||||
|
||||
ctrl_reg = 0;
|
||||
|
||||
if (likely(self->irq_handler)) {
|
||||
self->irq_handler(self->irq_priv);
|
||||
} else {
|
||||
bes_err("%s,%d\n", __func__, __LINE__);
|
||||
goto failed;
|
||||
}
|
||||
/*
|
||||
* Patch C: with direct delivery in extract_packets, the bh
|
||||
* thread no longer drives RX consumption — there is no
|
||||
* rx_queue to drain. Calling self->irq_handler() here would
|
||||
* wake the bh thread for nothing on every IRQ batch. TX
|
||||
* wakes still flow through bes2600_bh_wakeup() from TX
|
||||
* submitters and from bes2600_bh_handle_rx_skb when a
|
||||
* confirm releases a TX buffer; early-boot IRQs (before
|
||||
* fw_started) still go through self->irq_handler from the
|
||||
* GPIO IRQ handler's fallback branch.
|
||||
*/
|
||||
|
||||
} while (again);
|
||||
|
||||
|
||||
+109
@@ -958,6 +958,115 @@ static void bes2600_bh_parse_wakeup_event(struct bes2600_common *hw_priv, struct
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Direct-deliver an RX SKB into the WSM/mac80211 stack.
|
||||
*
|
||||
* Patch C (sdio_rx_work direct delivery): this function does the
|
||||
* per-SKB bookkeeping (sequence-number check, exception handling,
|
||||
* tx-confirm accounting, mac80211 hand-off via wsm_handle_rx) that
|
||||
* previously ran inside bes2600_bh_rx_helper after pipe_read dequeued
|
||||
* an SKB from sbus_priv->rx_queue. It is now called inline from
|
||||
* bes2600_sdio_extract_packets, eliminating the queue + bh-wakeup
|
||||
* relay (one wait-queue wake-up + two rx_queue->lock acquires per
|
||||
* RX frame).
|
||||
*
|
||||
* Contract:
|
||||
* - process context, sleepable. wsm_handle_rx (wsm.c:2211, exported
|
||||
* at wsm.c:2463) acquires wsm_cmd.lock, may call into mac80211
|
||||
* and may sleep on wait_event_timeout (wsm.c:2036, 2091).
|
||||
* - caller MUST hold no bes2600 spinlock. Reference precedent:
|
||||
* bes2600_bh_rx_helper (this file) called from the bh thread.
|
||||
* The SDIO mutex is released at bes2600_sdio.c before
|
||||
* extract_packets is called, so this is satisfied.
|
||||
* - SKB ownership: function frees on every path (success and error).
|
||||
* - Returns 0 on success, negative on error. When the SKB carries
|
||||
* a confirm that releases a TX buffer, the function asynchronously
|
||||
* wakes the bh thread to drain TX (matches the in-bh tx=1
|
||||
* signaling that bh_rx_helper used).
|
||||
*/
|
||||
int bes2600_bh_handle_rx_skb(struct bes2600_common *priv, struct sk_buff *skb)
|
||||
{
|
||||
struct wsm_hdr *wsm;
|
||||
size_t wsm_len;
|
||||
u16 wsm_id;
|
||||
u8 wsm_seq;
|
||||
int tx = 0;
|
||||
u32 confirm_label = 0x0;
|
||||
|
||||
if (!skb)
|
||||
return 0;
|
||||
|
||||
wsm = (struct wsm_hdr *)skb->data;
|
||||
wsm_len = __le16_to_cpu(wsm->len);
|
||||
if (WARN_ON(wsm_len > skb->len)) {
|
||||
bes_err("wsm_len err %d %d\n", (int)wsm_len, (int)skb->len);
|
||||
dev_kfree_skb(skb);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (priv->wsm_enable_wsm_dumps)
|
||||
print_hex_dump(KERN_DEBUG, "<-- ", DUMP_PREFIX_NONE, 16, 1,
|
||||
skb->data, wsm_len, false);
|
||||
|
||||
wsm_id = __le16_to_cpu(wsm->id) & 0xFFF;
|
||||
wsm_seq = (__le16_to_cpu(wsm->id) >> 13) & 7;
|
||||
bes_devel("bes2600_bh_handle_rx_skb wsm_id:0x%04x seq:%d\n",
|
||||
wsm_id, wsm_seq);
|
||||
|
||||
skb_trim(skb, wsm_len);
|
||||
|
||||
if (wsm_id == 0x0800) {
|
||||
wsm_handle_exception(priv,
|
||||
&skb->data[sizeof(*wsm)],
|
||||
wsm_len - sizeof(*wsm));
|
||||
bes_err("wsm exception\n");
|
||||
dev_kfree_skb(skb);
|
||||
return -1;
|
||||
} else if ((wsm_seq != priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)])) {
|
||||
bes_err("seq error! %u. %u. 0x%x.", wsm_seq,
|
||||
priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)], wsm_id);
|
||||
dev_kfree_skb(skb);
|
||||
return -1;
|
||||
}
|
||||
|
||||
bes2600_bh_parse_wakeup_event(priv, skb);
|
||||
|
||||
priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)] = (wsm_seq + 1) & 7;
|
||||
|
||||
if (IS_DRIVER_TO_MCU_CMD(wsm_id))
|
||||
confirm_label = __le32_to_cpu(((struct wsm_mcu_hdr *)wsm)->handle_label);
|
||||
|
||||
if (WSM_CONFIRM_CONDITION(wsm_id, confirm_label)) {
|
||||
int rc = wsm_release_tx_buffer(priv, 1);
|
||||
bes2600_bh_dec_pending_count(priv, WSM_TXRX_SEQ_IDX(wsm->id));
|
||||
|
||||
if (rc < 0) {
|
||||
bes_err("wsm_release_tx_buffer failed: %d\n", rc);
|
||||
dev_kfree_skb(skb);
|
||||
return rc;
|
||||
} else if (rc > 0) {
|
||||
tx = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* wsm_handle_rx takes care of SKB lifetime: zeroes *skb_p if consumed. */
|
||||
if (wsm_handle_rx(priv, wsm_id, wsm, &skb)) {
|
||||
bes_err("wsm_handle_rx failed (id=0x%04x)\n", wsm_id);
|
||||
if (skb)
|
||||
dev_kfree_skb(skb);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (skb)
|
||||
dev_kfree_skb(skb);
|
||||
|
||||
if (tx)
|
||||
bes2600_bh_wakeup(priv);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(bes2600_bh_handle_rx_skb);
|
||||
|
||||
static int bes2600_bh_rx_helper(struct bes2600_common *priv, int *tx)
|
||||
{
|
||||
struct sk_buff *skb = NULL;
|
||||
|
||||
@@ -36,6 +36,13 @@ void bes2600_enable_powersave(struct bes2600_vif *priv,
|
||||
int wsm_release_tx_buffer(struct bes2600_common *hw_priv, int count);
|
||||
int wsm_release_vif_tx_buffer(struct bes2600_common *hw_priv, int if_id,
|
||||
int count);
|
||||
/*
|
||||
* Direct-deliver an RX SKB into the WSM/mac80211 stack.
|
||||
* Process context, sleepable, caller holds no bes2600 spinlock.
|
||||
* Function frees skb on every path. See bh.c for full contract.
|
||||
*/
|
||||
int bes2600_bh_handle_rx_skb(struct bes2600_common *hw_priv,
|
||||
struct sk_buff *skb);
|
||||
int bes2600_bh_sw_process(struct bes2600_common *hw_priv,
|
||||
struct wsm_tx_confirm *tx_confirm);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user