Compare commits

..

1 Commits

Author SHA1 Message Date
test0r c4797b1dbf bes2600: deliver RX SKBs directly into wsm_handle_rx from sdio_rx_work
Patch C — collapse the sdio_rx_work → rx_queue → bh_work relay into a
direct call from bes2600_sdio_extract_packets into a new helper
bes2600_bh_handle_rx_skb that performs the per-SKB bookkeeping
previously done inside bes2600_bh_rx_helper after pipe_read.

What this saves per RX frame:
  - two spinlock acquires on self->rx_queue->lock
    (skb_queue_tail in extract_packets + skb_dequeue in pipe_read)
  - one bh wait-queue wake-up per IRQ batch
    (sdio_rx_work no longer calls self->irq_handler)

Pre-patch baseline on ohm (4 MB/s sender, ~5 min, srcversion 1B3B3ED0):
  - 387,532 RX packets, 578 MB, 1.36 MB/s observed receive
  - sdio_rx_work dispatched 34,994 times = 86.4/s = 90.3 per 1000 RX pkts
  - sdio_tx_work dispatched 111,770 times = 276.1/s
  - bes2600_bh_work redispatches: 0  (single long-lived work item,
    refutes earlier review claim of "9 events per frame")

API contract for wsm_handle_rx (cited in bh.c block comment):
  - declared wsm.h:2108, defined wsm.c:2211, EXPORT_SYMBOL wsm.c:2463
  - process context, sleepable
  - caller MUST hold no bes2600 spinlock; SDIO mutex is released at
    bes2600_sdio.c before extract_packets is called
  - SKB ownership: zeroes *skb_p if consumed, caller frees otherwise.
    bes2600_bh_handle_rx_skb frees on every path (success + error)

After patch, bh thread retains responsibility for TX work.  TX-confirm
packets that release a TX buffer wake the bh thread via
bes2600_bh_wakeup() inside bes2600_bh_handle_rx_skb (mirrors the in-bh
tx=1 signaling the old bh_rx_helper used to do).  Early-boot IRQs
before fw_started are still served via the GPIO IRQ handler's fallback
self->irq_handler path; that branch is unchanged.

Minimum-diff scope:
  - struct sbus_priv->rx_queue + ->rx_queue_lock are still defined and
    initialised (allocated but never used after this patch).  Removed
    in a follow-up hygiene patch, not bundled here.
  - bes2600_sdio_pipe_read still exists; returns NULL after this patch
    (rx_queue always empty).  Removed in same follow-up.
  - bes2600_bh_rx_helper still exists and still calls pipe_read; it
    now always returns 0.  Wasted 1 spinlock + skb_dequeue per bh wake;
    quantified in Phase 7 to decide whether worth a follow-up.

Awaiting Phase 7 verification on ohm.  Per Phase 4 plan §4.5 predicted
delta:
  - rx_queue->lock acquire rate: 1914/s -> 0  (high confidence)
  - _raw_spin_unlock_irqrestore CPU%: 20% -> 12-15%  (medium)
  - observed RX KB/s @ 4 MB/s: floor lifts toward >= 1 MB/s sustained
    (medium; rep 3's 75 KB/s death has multiple causes)

Phase 7 will N=3 ramp 1 -> 2 -> 4 -> 8 MB/s with identical
instrumentation pre/post.  See notes/patch-c-phase4-plan-2026-05-07.md
in marfrit/besser for the plan; this commit is the Phase 6 deliverable.
2026-05-07 19:49:57 +02:00
6 changed files with 167 additions and 41 deletions
+28 -9
View File
@@ -29,6 +29,7 @@
#include <linux/of_gpio.h>
#include "bes2600.h"
#include "bh.h"
#include "sbus.h"
#include "bes2600_plat.h"
#include "bes2600_factory.h"
@@ -812,10 +813,23 @@ static int bes2600_sdio_extract_packets(struct sbus_priv *self, u32 ctrl_reg, u8
skb_put(skb, packet_len);
memcpy(skb->data, &data[pos], packet_len);
bes_devel("%s, %d,%d\n", __func__, packet_len, pos);
spin_lock(&self->rx_queue_lock);
skb_queue_tail(&self->rx_queue, skb);
self->rx_data_cnt++;
spin_unlock(&self->rx_queue_lock);
/*
* Patch C: deliver SKB directly into the WSM/mac80211 stack
* instead of skb_queue_tail-ing onto self->rx_queue for later
* pickup by the bh thread. Removes two spinlock acquires
* (rx_queue->lock at queue-tail + at dequeue) per RX frame
* and one bh wait-queue wake-up per IRQ batch.
*
* bes2600_bh_handle_rx_skb owns the SKB on every path.
* Contract: process context, sleepable, caller holds no
* bes2600 spinlock. See bh.c for the contract block.
*
* Pre-condition satisfied here: bes2600_sdio_unlock(self)
* was called at the bottom of the SDIO read sequence in
* sdio_rx_work, so we hold no bes2600 mutex either.
*/
bes2600_bh_handle_rx_skb(self->core, skb);
packet_len = (packet_len + 3) & (~0x3);
pos += packet_len;
#ifdef BES_SDIO_OPTIMIZED_LEN
@@ -898,12 +912,17 @@ static void sdio_rx_work(struct work_struct *work)
ctrl_reg = 0;
if (likely(self->irq_handler)) {
self->irq_handler(self->irq_priv);
} else {
bes_err("%s,%d\n", __func__, __LINE__);
goto failed;
}
/*
* Patch C: with direct delivery in extract_packets, the bh
* thread no longer drives RX consumption — there is no
* rx_queue to drain. Calling self->irq_handler() here would
* wake the bh thread for nothing on every IRQ batch. TX
* wakes still flow through bes2600_bh_wakeup() from TX
* submitters and from bes2600_bh_handle_rx_skb when a
* confirm releases a TX buffer; early-boot IRQs (before
* fw_started) still go through self->irq_handler from the
* GPIO IRQ handler's fallback branch.
*/
} while (again);
+115 -6
View File
@@ -101,7 +101,7 @@ void bes2600_unregister_bh(struct bes2600_common *hw_priv)
coex_deinit_mode(hw_priv);
#endif
atomic_inc(&hw_priv->bh_term);
atomic_add(1, &hw_priv->bh_term);
wake_up(&hw_priv->bh_wq);
flush_workqueue(hw_priv->bh_workqueue);
@@ -590,7 +590,7 @@ static int bes2600_bh(void *arg)
bes_devel("[BH] Device resume.\n");
atomic_set(&hw_priv->bh_suspend, BES2600_BH_RESUMED);
wake_up(&hw_priv->bh_evt_wq);
atomic_inc(&hw_priv->bh_rx);
atomic_add(1, &hw_priv->bh_rx);
continue;
}
@@ -758,9 +758,9 @@ tx:
#if 0 /* count is not implemented */
if (ret > 1)
atomic_inc(&hw_priv->bh_tx);
atomic_add(1, &hw_priv->bh_tx);
#else
atomic_inc(&hw_priv->bh_tx);
atomic_add(1, &hw_priv->bh_tx);
#endif
#if defined(CONFIG_BES2600_NON_POWER_OF_TWO_BLOCKSIZES)
@@ -958,6 +958,115 @@ static void bes2600_bh_parse_wakeup_event(struct bes2600_common *hw_priv, struct
}
}
/*
* Direct-deliver an RX SKB into the WSM/mac80211 stack.
*
* Patch C (sdio_rx_work direct delivery): this function does the
* per-SKB bookkeeping (sequence-number check, exception handling,
* tx-confirm accounting, mac80211 hand-off via wsm_handle_rx) that
* previously ran inside bes2600_bh_rx_helper after pipe_read dequeued
* an SKB from sbus_priv->rx_queue. It is now called inline from
* bes2600_sdio_extract_packets, eliminating the queue + bh-wakeup
* relay (one wait-queue wake-up + two rx_queue->lock acquires per
* RX frame).
*
* Contract:
* - process context, sleepable. wsm_handle_rx (wsm.c:2211, exported
* at wsm.c:2463) acquires wsm_cmd.lock, may call into mac80211
* and may sleep on wait_event_timeout (wsm.c:2036, 2091).
* - caller MUST hold no bes2600 spinlock. Reference precedent:
* bes2600_bh_rx_helper (this file) called from the bh thread.
* The SDIO mutex is released at bes2600_sdio.c before
* extract_packets is called, so this is satisfied.
* - SKB ownership: function frees on every path (success and error).
* - Returns 0 on success, negative on error. When the SKB carries
* a confirm that releases a TX buffer, the function asynchronously
* wakes the bh thread to drain TX (matches the in-bh tx=1
* signaling that bh_rx_helper used).
*/
int bes2600_bh_handle_rx_skb(struct bes2600_common *priv, struct sk_buff *skb)
{
struct wsm_hdr *wsm;
size_t wsm_len;
u16 wsm_id;
u8 wsm_seq;
int tx = 0;
u32 confirm_label = 0x0;
if (!skb)
return 0;
wsm = (struct wsm_hdr *)skb->data;
wsm_len = __le16_to_cpu(wsm->len);
if (WARN_ON(wsm_len > skb->len)) {
bes_err("wsm_len err %d %d\n", (int)wsm_len, (int)skb->len);
dev_kfree_skb(skb);
return -1;
}
if (priv->wsm_enable_wsm_dumps)
print_hex_dump(KERN_DEBUG, "<-- ", DUMP_PREFIX_NONE, 16, 1,
skb->data, wsm_len, false);
wsm_id = __le16_to_cpu(wsm->id) & 0xFFF;
wsm_seq = (__le16_to_cpu(wsm->id) >> 13) & 7;
bes_devel("bes2600_bh_handle_rx_skb wsm_id:0x%04x seq:%d\n",
wsm_id, wsm_seq);
skb_trim(skb, wsm_len);
if (wsm_id == 0x0800) {
wsm_handle_exception(priv,
&skb->data[sizeof(*wsm)],
wsm_len - sizeof(*wsm));
bes_err("wsm exception\n");
dev_kfree_skb(skb);
return -1;
} else if ((wsm_seq != priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)])) {
bes_err("seq error! %u. %u. 0x%x.", wsm_seq,
priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)], wsm_id);
dev_kfree_skb(skb);
return -1;
}
bes2600_bh_parse_wakeup_event(priv, skb);
priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)] = (wsm_seq + 1) & 7;
if (IS_DRIVER_TO_MCU_CMD(wsm_id))
confirm_label = __le32_to_cpu(((struct wsm_mcu_hdr *)wsm)->handle_label);
if (WSM_CONFIRM_CONDITION(wsm_id, confirm_label)) {
int rc = wsm_release_tx_buffer(priv, 1);
bes2600_bh_dec_pending_count(priv, WSM_TXRX_SEQ_IDX(wsm->id));
if (rc < 0) {
bes_err("wsm_release_tx_buffer failed: %d\n", rc);
dev_kfree_skb(skb);
return rc;
} else if (rc > 0) {
tx = 1;
}
}
/* wsm_handle_rx takes care of SKB lifetime: zeroes *skb_p if consumed. */
if (wsm_handle_rx(priv, wsm_id, wsm, &skb)) {
bes_err("wsm_handle_rx failed (id=0x%04x)\n", wsm_id);
if (skb)
dev_kfree_skb(skb);
return -1;
}
if (skb)
dev_kfree_skb(skb);
if (tx)
bes2600_bh_wakeup(priv);
return 0;
}
EXPORT_SYMBOL(bes2600_bh_handle_rx_skb);
static int bes2600_bh_rx_helper(struct bes2600_common *priv, int *tx)
{
struct sk_buff *skb = NULL;
@@ -1134,7 +1243,7 @@ static int bes2600_bh_tx_helper(struct bes2600_common *hw_priv,
tx_len += 4;
#endif
atomic_inc(&hw_priv->bh_tx);
atomic_add(1, &hw_priv->bh_tx);
tx_len = hw_priv->sbus_ops->align_size(
hw_priv->sbus_priv, tx_len);
@@ -1435,7 +1544,7 @@ static int bes2600_bh(void *arg)
bes_devel("[BH] Device resume.\n");
atomic_set(&hw_priv->bh_suspend, BES2600_BH_RESUMED);
wake_up(&hw_priv->bh_evt_wq);
atomic_inc(&hw_priv->bh_rx);
atomic_add(1, &hw_priv->bh_rx);
goto done;
}
+7
View File
@@ -36,6 +36,13 @@ void bes2600_enable_powersave(struct bes2600_vif *priv,
int wsm_release_tx_buffer(struct bes2600_common *hw_priv, int count);
int wsm_release_vif_tx_buffer(struct bes2600_common *hw_priv, int if_id,
int count);
/*
* Direct-deliver an RX SKB into the WSM/mac80211 stack.
* Process context, sleepable, caller holds no bes2600 spinlock.
* Function frees skb on every path. See bh.c for full contract.
*/
int bes2600_bh_handle_rx_skb(struct bes2600_common *hw_priv,
struct sk_buff *skb);
int bes2600_bh_sw_process(struct bes2600_common *hw_priv,
struct wsm_tx_confirm *tx_confirm);
+1 -1
View File
@@ -570,7 +570,7 @@ int bes2600_itp_get_tx(struct bes2600_common *priv, u8 **data,
*burst = 2;
atomic_set(&priv->bh_tx, 1);
ktime_get_ts(&itp->last_sent);
atomic_inc(&itp->awaiting_confirm);
atomic_add(1, &itp->awaiting_confirm);
spin_unlock_bh(&itp->tx_lock);
return 1;
-2
View File
@@ -497,7 +497,6 @@ static struct ieee80211_hw *bes2600_init_common(size_t hw_priv_data_len)
WLAN_LINK_ID_MAX,
bes2600_skb_dtor,
hw_priv))) {
destroy_workqueue(hw_priv->workqueue);
ieee80211_free_hw(hw);
return NULL;
}
@@ -509,7 +508,6 @@ static struct ieee80211_hw *bes2600_init_common(size_t hw_priv_data_len)
for (; i > 0; i--)
bes2600_queue_deinit(&hw_priv->tx_queue[i - 1]);
bes2600_queue_stats_deinit(&hw_priv->tx_queue_stats);
destroy_workqueue(hw_priv->workqueue);
ieee80211_free_hw(hw);
return NULL;
}
+16 -23
View File
@@ -257,21 +257,18 @@ int bes2600_hw_scan(struct ieee80211_hw *hw,
bes2600_pwr_set_busy_event(hw_priv, BES_PWR_LOCK_ON_SCAN);
/* will be unlocked in bes2600_scan_work() */
down(&hw_priv->scan.lock);
down(&hw_priv->conf_lock);
frame.skb = ieee80211_probereq_get(hw, priv->vif->addr, NULL, 0,
req->ie_len);
if (!frame.skb) {
up(&hw_priv->conf_lock);
up(&hw_priv->scan.lock);
if (!frame.skb)
return -ENOMEM;
}
if (req->ie_len)
skb_put_data(frame.skb, req->ie, req->ie_len);
/* will be unlocked in bes2600_scan_work() */
down(&hw_priv->scan.lock);
down(&hw_priv->conf_lock);
if (frame.skb) {
int ret;
//if (priv->if_id == 0)
@@ -289,9 +286,9 @@ int bes2600_hw_scan(struct ieee80211_hw *hw,
}
#endif
if (ret) {
dev_kfree_skb(frame.skb);
up(&hw_priv->conf_lock);
up(&hw_priv->scan.lock);
dev_kfree_skb(frame.skb);
return ret;
}
}
@@ -321,10 +318,10 @@ int bes2600_hw_scan(struct ieee80211_hw *hw,
++hw_priv->scan.n_ssids;
}
up(&hw_priv->conf_lock);
if (frame.skb)
dev_kfree_skb(frame.skb);
up(&hw_priv->conf_lock);
#ifdef WIFI_BT_COEXIST_EPTA_ENABLE
bwifi_change_current_status(hw_priv, BWIFI_STATUS_SCANNING);
#endif
@@ -365,18 +362,14 @@ int bes2600_hw_sched_scan_start(struct ieee80211_hw *hw,
if (req->n_ssids > hw->wiphy->max_scan_ssids)
return -EINVAL;
frame.skb = ieee80211_probereq_get(hw, priv->vif->addr, NULL, 0,
req->ie_len);
if (!frame.skb)
return -ENOMEM;
/* will be unlocked in bes2600_scan_work() */
down(&hw_priv->scan.lock);
down(&hw_priv->conf_lock);
frame.skb = ieee80211_probereq_get(hw, priv->vif->addr, NULL, 0,
req->ie_len);
if (!frame.skb) {
up(&hw_priv->conf_lock);
up(&hw_priv->scan.lock);
return -ENOMEM;
}
if (frame.skb) {
int ret;
if (priv->if_id == 0)
@@ -387,9 +380,9 @@ int bes2600_hw_sched_scan_start(struct ieee80211_hw *hw,
ret = wsm_set_probe_responder(priv, true);
}
if (ret) {
dev_kfree_skb(frame.skb);
up(&hw_priv->conf_lock);
up(&hw_priv->scan.lock);
dev_kfree_skb(frame.skb);
return ret;
}
}
@@ -421,10 +414,10 @@ int bes2600_hw_sched_scan_start(struct ieee80211_hw *hw,
}
}
up(&hw_priv->conf_lock);
if (frame.skb)
dev_kfree_skb(frame.skb);
up(&hw_priv->conf_lock);
queue_work(hw_priv->workqueue, &hw_priv->scan.swork);
wiphy_warn(hw->wiphy, "<--[SCAN] Scheduled scan request.\n");
return 0;