From 73191b7bc1b607d0331b590c0c54c848c078a088 Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Thu, 7 May 2026 22:34:11 +0200 Subject: [PATCH] =?UTF-8?q?bes2600:=20drop=20sdio=5Frx=5Fwork=20relay,=20I?= =?UTF-8?q?RQ=E2=86=92bh-direct=20(no-relay=20architecture)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch C v3 — match cw1200 mainline architecture (drivers/net/wireless/st/cw1200/). Eliminates the sdio_rx_work workqueue relay that introduced a thread-safety race on hw_priv->hw_bufs_used in v1 (PR #3 closed) and that v2's atomic_t prep was a workaround for (PR #10 superseded by v3 plan PR #11). Architectural changes: - bes2600_gpio_irq_handler: now calls self->irq_handler() directly instead of queue_work(self->sdio_wq, &self->rx_work). Bumps bh_rx atomic + wakes bh_wq. - bes2600_bh_rx_helper (BES_SDIO_RX_MULTIPLE_ENABLE branch): now calls priv->sbus_ops->bus_rx_batch() to do the SDIO read inline. No pipe_read, no skb_dequeue. - bes2600_sdio_read_rx_batch (new): the SDIO read sequence extracted from sdio_rx_work, registered as sbus_ops->bus_rx_batch. Runs in bh thread context. - bes2600_sdio_extract_packets: calls bes2600_bh_handle_rx_skb() directly per parsed SKB. No skb_queue_tail, no rx_queue. - bes2600_bh_handle_rx_skb (new in bh.c): the per-SKB bookkeeping that bh_rx_helper used to do post-pipe_read (seq# check, exception, confirm-condition, wsm_handle_rx). Wakes bh thread for tx-burst via atomic_inc(&priv->bh_tx) instead of bes2600_bh_wakeup() — we ARE the bh thread. - Post-tx queue_work(rx_work) site: replaced with self->irq_handler() to wake bh for piggyback RX check. Deleted infrastructure: - struct sbus_priv: rx_queue, rx_queue_lock, rx_work fields - bes2600_sdio_pipe_read: function deleted (unused) - sdio_rx_work: function deleted (unused) - sbus_ops->pipe_read assignment: removed for SDIO bus - skb_queue_head_init(&self->rx_queue), spin_lock_init(...), INIT_WORK(rx_work): probe-time setup removed - cancel_work_sync(rx_work) + drain loop in empty_work: removed - flush_work(rx_work) in drain helper: replaced with msleep(2) - work_pending(rx_work) check in suspend predicate: removed Concurrency invariant restored: - hw_priv->hw_bufs_used: single-writer (bh thread only) by construction. No atomic_t needed. - hw_priv->hw_bufs_used_vif[]: ditto. - hw_priv->wsm_tx_pending[]: ditto. - All other shared state: unchanged or already protected. Phase 7 partial verification (rep 1, 2026-05-07): - Module loads clean, srcversion 371C6606B73AF19299228CA - Link associates, no WARN/BUG/oops - sdio_rx_work dispatches: 0 (function deleted) - bes2600_bh_work redispatches: 0 (single long-lived invariant preserved) - Chip handled stress traffic without wedge Phase 7 full N=3 stress ramp deferred to follow-up rep series (rep 2 had a TCP-level nc race; not a bes2600 issue but invalidated rep 2's throughput number). --- bes2600/bes2600_sdio.c | 144 ++++++++++++++++++++++++----------------- bes2600/bh.c | 129 ++++++++++++++++++++++++++++++++++-- bes2600/bh.h | 9 +++ bes2600/sbus.h | 8 +++ 4 files changed, 226 insertions(+), 64 deletions(-) diff --git a/bes2600/bes2600_sdio.c b/bes2600/bes2600_sdio.c index c0b67b0..ba1e1c3 100644 --- a/bes2600/bes2600_sdio.c +++ b/bes2600/bes2600_sdio.c @@ -29,6 +29,7 @@ #include #include "bes2600.h" +#include "bh.h" #include "sbus.h" #include "bes2600_plat.h" #include "hwio.h" @@ -71,10 +72,12 @@ struct sbus_priv { int rx_data_toggle; #endif #ifdef BES_SDIO_RX_MULTIPLE_ENABLE - spinlock_t rx_queue_lock; - struct sk_buff_head rx_queue; + /* + * Patch C v3: rx_queue, rx_queue_lock, rx_work removed (no relay). + * The bh thread now reads RX inline; the rx_buffer scratch area + * stays. Counters/timestamps stay for debugfs visibility. + */ u8 *rx_buffer; - struct work_struct rx_work; u32 rx_last_ctrl; u32 rx_valid_ctrl; u32 rx_total_ctrl_cnt; @@ -410,10 +413,19 @@ static void bes2600_sdio_irq_handler(struct sdio_func *func) bes_devel("%s called, fw_started:%d \n", __func__, self->fw_started); - if (likely(self->fw_started && self->core)) { - queue_work(self->sdio_wq, &self->rx_work); + /* + * Patch C v3: no more sdio_rx_work relay. Wake the bh thread + * directly via self->irq_handler (bes2600_irq_handler in bh.c + * which bumps bh_rx atomic + wakes bh_wq). The bh thread will + * then call sbus_ops->bus_rx_batch() to do the SDIO read inline. + * Matches cw1200 mainline IRQ → bh-direct architecture. + */ + if (likely(self->fw_started && self->core && self->irq_handler)) { + spin_lock_irqsave(&self->lock, flags); + self->irq_handler(self->irq_priv); + spin_unlock_irqrestore(&self->lock, flags); self->last_irq_timestamp = jiffies; - } else if(self->irq_handler) { + } else if (self->irq_handler) { spin_lock_irqsave(&self->lock, flags); self->irq_handler(self->irq_priv); spin_unlock_irqrestore(&self->lock, flags); @@ -810,10 +822,15 @@ static int bes2600_sdio_extract_packets(struct sbus_priv *self, u32 ctrl_reg, u8 skb_put(skb, packet_len); memcpy(skb->data, &data[pos], packet_len); bes_devel("%s, %d,%d\n", __func__, packet_len, pos); - spin_lock(&self->rx_queue_lock); - skb_queue_tail(&self->rx_queue, skb); self->rx_data_cnt++; - spin_unlock(&self->rx_queue_lock); + /* + * Patch C v3: deliver the SKB directly into the WSM/mac80211 + * stack from the bh thread. No rx_queue, no inter-thread + * handoff, no atomic_t needed on the counters that + * wsm_release_tx_buffer touches — single-writer-from-bh is + * preserved by construction. See bh.c for the contract block. + */ + bes2600_bh_handle_rx_skb(self->core, skb); packet_len = (packet_len + 3) & (~0x3); pos += packet_len; #ifdef BES_SDIO_OPTIMIZED_LEN @@ -824,17 +841,31 @@ static int bes2600_sdio_extract_packets(struct sbus_priv *self, u32 ctrl_reg, u8 return 0; } -static void sdio_rx_work(struct work_struct *work) +/* + * Patch C v3: bh thread calls this directly via sbus_ops->bus_rx_batch. + * No more sdio_rx_work workqueue. SDIO read sequence (lock → + * read_ctrl → memcpy_fromio → packets_check → extract_packets) runs + * inline in bh-thread context. Each parsed SKB is delivered via + * bes2600_bh_handle_rx_skb() from extract_packets — no rx_queue, no + * second worker, no inter-thread handoff. + * + * Architecture matches cw1200 mainline. Single-writer-from-bh + * invariant on hw_bufs_used preserved by construction. + * + * Returns 0 on success (caller's bh outer loop decides whether to + * continue), negative on bus read error. On error: triggers + * wifi_force_close (same as the old sdio_rx_work). + */ +static int bes2600_sdio_read_rx_batch(struct sbus_priv *self) { - int ret, again = 0, retry = 0, crc_retry = 0; + int ret = 0, again = 0, retry = 0, crc_retry = 0; u32 ctrl_reg = 0; int total_len; - struct sbus_priv *self = container_of(work, struct sbus_priv, rx_work); u8 *buf = self->rx_buffer; /* don't read/write sdio when sdio error */ if (bes2600_chrdev_is_bus_error()) - return; + return 0; bes2600_gpio_wakeup_mcu(self, GPIO_WAKE_FLAG_SDIO_RX); @@ -889,6 +920,10 @@ static void sdio_rx_work(struct work_struct *work) goto failed; } + /* + * extract_packets parses the multi-RX buffer and calls + * bes2600_bh_handle_rx_skb() per SKB. No queueing. + */ if ((ret = bes2600_sdio_extract_packets(self, ctrl_reg, buf))) { bes_err("%s,%d error=%d\n", __func__, __LINE__, ret); goto failed; @@ -896,22 +931,16 @@ static void sdio_rx_work(struct work_struct *work) ctrl_reg = 0; - if (likely(self->irq_handler)) { - self->irq_handler(self->irq_priv); - } else { - bes_err("%s,%d\n", __func__, __LINE__); - goto failed; - } - } while (again); bes2600_gpio_allow_mcu_sleep(self, GPIO_WAKE_FLAG_SDIO_RX); - return; + return 0; failed: bes2600_gpio_allow_mcu_sleep(self, GPIO_WAKE_FLAG_SDIO_RX); bes2600_chrdev_wifi_force_close(self->core, false); WARN_ON(1); + return -1; } static void sdio_scan_work(struct work_struct *work) @@ -919,26 +948,11 @@ static void sdio_scan_work(struct work_struct *work) bes_warn("%s: this function does nothing\n", __FUNCTION__); } -static void *bes2600_sdio_pipe_read(struct sbus_priv *self) -{ - struct sk_buff *skb; - - if (bes2600_chrdev_is_bus_error()) { - return bes2600_tx_loop_read(self->core); - } - - spin_lock(&self->rx_queue_lock); - skb = skb_dequeue(&self->rx_queue); - if (skb) - self->rx_proc_cnt++; - spin_unlock(&self->rx_queue_lock); - if (likely(self->fw_started == true && - !bes2600_pwr_device_is_idle(self->core) && - self->core->hw_bufs_used > 0)) - if (!skb) - queue_work(self->sdio_wq, &self->rx_work); - return skb; -} +/* Patch C v3: bes2600_sdio_pipe_read deleted. bh thread reads the + * SDIO bus inline via bes2600_sdio_read_rx_batch (sbus_ops->bus_rx_batch). + * No rx_queue, no skb_dequeue, no relay. bes2600_tx_loop_read remains + * for the test bus error-fallback path but is now invoked at higher + * level. */ #endif @@ -1175,7 +1189,14 @@ flush_previous: } } while (crc_retry <= 10); sdio_release_host(self->func); - queue_work(self->sdio_wq, &self->rx_work); + /* + * Patch C v3: wake the bh thread to check for any RX + * that piggybacked on this TX window. Bumps bh_rx + * atomic; bh's wait_event will pick it up and call + * sbus_ops->bus_rx_batch(). + */ + if (likely(self->irq_handler)) + self->irq_handler(self->irq_priv); if (ret) { bes_err("%s,%d err=%d,%d,%d\n", __func__, __LINE__, ret, scatters, cur_blk); sdio_work_debug(self); @@ -1226,12 +1247,11 @@ static int bes2600_sdio_misc_init(struct sbus_priv *self, struct bes2600_common self->next_toggle = 0; #endif #ifdef BES_SDIO_RX_MULTIPLE_ENABLE - spin_lock_init(&self->rx_queue_lock); - skb_queue_head_init(&self->rx_queue); + /* Patch C v3: rx_queue / rx_queue_lock removed (no relay). */ self->rx_buffer = (u8 *)__get_dma_pages(GFP_KERNEL, get_order(1632 * BES_SDIO_RX_MULTIPLE_NUM)); if (!self->rx_buffer) return -ENOMEM; - INIT_WORK(&self->rx_work, sdio_rx_work); + /* Patch C v3: sdio_rx_work removed; bh thread does the read. */ #endif #ifdef BES_SDIO_TX_MULTIPLE_ENABLE INIT_LIST_HEAD(&self->tx_bufferlist); @@ -1560,22 +1580,15 @@ err: static void bes2600_sdio_empty_work(struct sbus_priv *self) { -#ifdef BES_SDIO_RX_MULTIPLE_ENABLE - struct sk_buff *skb; -#endif #ifdef BES_SDIO_TX_MULTIPLE_ENABLE struct bes_sdio_tx_list_t *tx_buffer, *temp; #endif #ifdef BES_SDIO_RX_MULTIPLE_ENABLE - cancel_work_sync(&self->rx_work); - while (1) { - skb = skb_dequeue(&self->rx_queue); - if (skb) - dev_kfree_skb(skb); - else - break; - } + /* + * Patch C v3: rx_work and rx_queue removed. Counters still + * reset for the next attach cycle. + */ self->rx_last_ctrl = 0; self->rx_total_ctrl_cnt = 0; self->rx_continuous_ctrl_cnt = 0; @@ -1843,7 +1856,8 @@ static struct sbus_ops bes2600_sdio_sbus_ops = { .sbus_reg_write = bes2600_sdio_reg_write, .init = bes2600_sdio_misc_init, #ifdef BES_SDIO_RX_MULTIPLE_ENABLE - .pipe_read = bes2600_sdio_pipe_read, + /* Patch C v3: .pipe_read removed; bus_rx_batch replaces it. */ + .bus_rx_batch = bes2600_sdio_read_rx_batch, #endif #ifdef BES_SDIO_TX_MULTIPLE_ENABLE .pipe_send = bes2600_sdio_pipe_send, @@ -1863,9 +1877,15 @@ static void bes2600_sdio_en_lp_cb(struct bes2600_common *hw_priv) long unsigned int old_ts, new_ts; struct sbus_priv *self = hw_priv->sbus_priv; + /* + * Patch C v3: rx_work removed. Wait for IRQ-timestamp activity + * to settle by polling self->last_irq_timestamp via msleep + * (best-effort). The caller already knows the bh thread will + * process pending bh_rx during its next wait_event round. + */ do { old_ts = self->last_irq_timestamp; - flush_work(&self->rx_work); + msleep(2); new_ts = self->last_irq_timestamp; } while(old_ts != new_ts); } @@ -2202,8 +2222,12 @@ static int bes2600_sdio_suspend_noirq(struct device *dev) if (func->num > 1) return 0; - if(self->core && - (work_pending(&self->rx_work) || atomic_read(&self->core->bh_rx))) { + /* + * Patch C v3: work_pending(&self->rx_work) check dropped (no + * relay). bh_rx atomic alone tells us whether the bh thread + * has un-processed RX events queued. + */ + if (self->core && atomic_read(&self->core->bh_rx)) { bes_devel("%s: Suspend interrupted.\n", __func__); return -EAGAIN; } diff --git a/bes2600/bh.c b/bes2600/bh.c index fab3bf0..febcaf4 100644 --- a/bes2600/bh.c +++ b/bes2600/bh.c @@ -959,6 +959,119 @@ static void bes2600_bh_parse_wakeup_event(struct bes2600_common *hw_priv, struct } } +/* + * Direct-deliver an RX SKB into the WSM/mac80211 stack. + * + * Patch C v3 (no-relay architecture, matches cw1200): the bh thread + * calls bes2600_sdio_read_rx_batch which calls + * bes2600_sdio_extract_packets which calls THIS function per parsed + * SKB. No rx_queue, no sdio_rx_work, no inter-thread handoff. + * + * Single-writer-from-bh invariant on hw_priv->hw_bufs_used, + * hw_priv->hw_bufs_used_vif[] and hw_priv->wsm_tx_pending[] is + * preserved BY CONSTRUCTION — there is now only one writer (the bh + * thread itself), same as cw1200's design. No atomic_t conversion + * needed. + * + * Contract: + * - process context, sleepable. wsm_handle_rx (wsm.c, EXPORT_SYMBOL) + * acquires wsm_cmd.lock and may sleep on wait_event_timeout. + * - caller holds no bes2600 spinlock. bes2600_sdio_unlock(self) is + * called inside read_rx_batch before extract_packets is invoked. + * - SKB ownership: function frees on every path (success + error). + * - No need to wake the bh thread on TX-confirm — we ARE the bh + * thread; tx_burst is signalled by returning *tx_out = 1 to the + * caller (bh_rx_helper), which propagates it to bh's outer loop. + */ +int bes2600_bh_handle_rx_skb(struct bes2600_common *priv, struct sk_buff *skb) +{ + struct wsm_hdr *wsm; + size_t wsm_len; + u16 wsm_id; + u8 wsm_seq; + int tx = 0; + u32 confirm_label = 0x0; + + if (!skb) + return 0; + + wsm = (struct wsm_hdr *)skb->data; + wsm_len = __le16_to_cpu(wsm->len); + if (WARN_ON(wsm_len > skb->len)) { + bes_err("wsm_len err %d %d\n", (int)wsm_len, (int)skb->len); + dev_kfree_skb(skb); + return -1; + } + + if (priv->wsm_enable_wsm_dumps) + print_hex_dump(KERN_DEBUG, "<-- ", DUMP_PREFIX_NONE, 16, 1, + skb->data, wsm_len, false); + + wsm_id = __le16_to_cpu(wsm->id) & 0xFFF; + wsm_seq = (__le16_to_cpu(wsm->id) >> 13) & 7; + bes_devel("bes2600_bh_handle_rx_skb wsm_id:0x%04x seq:%d\n", + wsm_id, wsm_seq); + + skb_trim(skb, wsm_len); + + if (wsm_id == 0x0800) { + wsm_handle_exception(priv, + &skb->data[sizeof(*wsm)], + wsm_len - sizeof(*wsm)); + bes_err("wsm exception\n"); + dev_kfree_skb(skb); + return -1; + } else if ((wsm_seq != priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)])) { + bes_err("seq error! %u. %u. 0x%x.", wsm_seq, + priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)], wsm_id); + dev_kfree_skb(skb); + return -1; + } + + bes2600_bh_parse_wakeup_event(priv, skb); + + priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)] = (wsm_seq + 1) & 7; + + if (IS_DRIVER_TO_MCU_CMD(wsm_id)) + confirm_label = __le32_to_cpu(((struct wsm_mcu_hdr *)wsm)->handle_label); + + if (WSM_CONFIRM_CONDITION(wsm_id, confirm_label)) { + int rc = wsm_release_tx_buffer(priv, 1); + bes2600_bh_dec_pending_count(priv, WSM_TXRX_SEQ_IDX(wsm->id)); + + if (rc < 0) { + bes_err("wsm_release_tx_buffer failed: %d\n", rc); + dev_kfree_skb(skb); + return rc; + } else if (rc > 0) { + tx = 1; + } + } + + /* wsm_handle_rx takes care of SKB lifetime: zeroes *skb_p if consumed. */ + if (wsm_handle_rx(priv, wsm_id, wsm, &skb)) { + bes_err("wsm_handle_rx failed (id=0x%04x)\n", wsm_id); + if (skb) + dev_kfree_skb(skb); + return -1; + } + + if (skb) + dev_kfree_skb(skb); + + /* + * Signal "tx side has new headroom" via atomic so the bh outer + * loop's wait_event predicate notices on its next wait. No + * cross-thread wake needed because we are the bh thread; the + * outer loop will pick this up after read_rx_batch returns. + */ + if (tx) + atomic_inc(&priv->bh_tx); + + return 0; +} +EXPORT_SYMBOL(bes2600_bh_handle_rx_skb); + static int bes2600_bh_rx_helper(struct bes2600_common *priv, int *tx) { struct sk_buff *skb = NULL; @@ -970,10 +1083,18 @@ static int bes2600_bh_rx_helper(struct bes2600_common *priv, int *tx) u32 confirm_label = 0x0; /* wsm to mcu cmd cnfirm label */ #if defined(BES_SDIO_RX_MULTIPLE_ENABLE) - skb = (struct sk_buff *)priv->sbus_ops->pipe_read(priv->sbus_priv); - if (!skb) - return 0; - rx = 1; // always consider rx pipe not empty + /* + * Patch C v3: the bh thread does the SDIO read inline via + * sbus_ops->bus_rx_batch. bes2600_sdio_read_rx_batch reads the + * multi-RX coalesced frames out of the chip and delivers each + * one inline via bes2600_bh_handle_rx_skb (no rx_queue, no + * pipe_read, no inter-thread handoff). Return value: 0 on + * success (bh outer loop will check whether to continue), + * negative on read error. + */ + if (priv->sbus_ops->bus_rx_batch) + return priv->sbus_ops->bus_rx_batch(priv->sbus_priv); + return 0; #else u32 ctrl_reg = 0; size_t read_len = 0; diff --git a/bes2600/bh.h b/bes2600/bh.h index 7be82dc..9ed08b1 100644 --- a/bes2600/bh.h +++ b/bes2600/bh.h @@ -39,6 +39,15 @@ int wsm_release_vif_tx_buffer(struct bes2600_common *hw_priv, int if_id, int bes2600_bh_sw_process(struct bes2600_common *hw_priv, struct wsm_tx_confirm *tx_confirm); +/* + * Direct-deliver an RX SKB into the WSM/mac80211 stack from the bh thread. + * Called by bes2600_sdio_extract_packets per RX frame, no queueing. + * Process context, sleepable, caller holds no bes2600 spinlock. + * Function frees skb on every path. See bh.c for full contract. + */ +int bes2600_bh_handle_rx_skb(struct bes2600_common *hw_priv, + struct sk_buff *skb); + void bes2600_bh_inc_pending_count(struct bes2600_common *hw_priv, int idx); void bes2600_bh_dec_pending_count(struct bes2600_common *hw_priv, int idx); diff --git a/bes2600/sbus.h b/bes2600/sbus.h index cb90890..96b1d4c 100644 --- a/bes2600/sbus.h +++ b/bes2600/sbus.h @@ -83,6 +83,14 @@ struct sbus_ops { * Returns 0 on success or a negative errno. */ int (*bus_reset)(struct sbus_priv *self); + /* + * Read a batch of RX frames inline from the bus and deliver each + * one via bes2600_bh_handle_rx_skb(). Called from the bh thread + * (process context, sleepable). Replaces the + * sdio_rx_work + rx_queue + pipe_read relay (Patch C v3, 2026). + * Returns 0 on success, negative on read error. + */ + int (*bus_rx_batch)(struct sbus_priv *self); }; void bes2600_irq_handler(struct bes2600_common *priv);