Compare commits

..

1 Commits

Author SHA1 Message Date
claude-noether cdfdac987a bes2600: bus_reset on connection-loss storm to dodge assoc-comeback blackhole
When mac80211 declares connection loss against this AP (typically driven
by inactivity-deauth or beacon-loss), the userspace reauth that follows
sometimes enters a long blackhole: the AP responds to auth with success
but defers assoc with the 802.11v "assoc comeback" timer; ohm retries
faster than the comeback grants permission; the AP eventually fires an
unprotected deauth-reason-6 ("Class 2 frame received from non-
authenticated station"), and recovery only completes via cross-SSID or
cross-channel fallback. Receipts: ~86 s blackhole observed in the
phase-7 rep on 2026-05-07 02:42, with three subsequent BSSIDs returning
assoc comeback timeouts before reason-9 (STA_REQ_ASSOC_WITHOUT_AUTH)
fired. Documented in marfrit/besser:notes/phase4-2026-05-07.md.

When N=3 driver-side connection_loss decisions fire within a 60 s window
on the same vif, skip the ieee80211_connection_loss() path and trigger
the c5.2-introduced bes2600_chrdev_do_bus_reset() instead. The bus
reset removes and re-probes the chip; userspace re-associates with a
fresh chip state, dodging the AP's comeback-timer rejection cycle.

Predicted Phase 7 delta vs current baseline:
- api_connection_loss rate: unchanged (we don't address the trigger)
- conditional probability of >5 s blackhole given event: <= 30 %
- worst-case recovery: 86 s -> < 10 s

Contract pin: bes2600_chrdev_do_bus_reset(sbus_ops, sbus_priv) at
bes2600/bes_chardev.c:455, introduced by c5.2. The function is async-
returning: sbus_ops->bus_reset() schedules an SDIO rescan; the helper
waits up to 3 s for the remove() callback to clear sbus_priv, then
returns. Per-vif state is gone after this point, so the recover work
lives on bes2600_common (hw_priv) and uses the global bes2600_cdev for
the bus_reset call rather than dereferencing per-vif state.

Threshold (3 / 60 s) is well above the steady-state per-vif
connection_loss rate observed in the patch-A phase-7 rep (0.86/h under
sustained load), so a true storm is required to trip it.

Files touched:
- bes2600/bes2600.h: 3 counter fields on struct bes2600_vif, 1
  work_struct on struct bes2600_common, 3 prototypes
- bes2600/sta.c: 3 helpers + storm-account hook in
  bes2600_connection_loss_work + storm-init in bes2600_vif_setup +
  cancel_work_sync in the hw_priv shutdown path; #include bes_chardev.h
  was already pulled in by an earlier c-stack patch
- bes2600/main.c: INIT_WORK alongside other hw_priv work_structs
- bes2600/debug.c: ConnectionLossStormRecoveries seq_printf in the
  per-vif status seq_file output

The cw1200/cw1260 ancestor has no equivalent; this is a clean
addition. checkpatch.pl --no-tree --strict: clean (0/0/0).

Signed-off-by: Claude (noether) <claude@reauktion.de>
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-07 11:30:09 +02:00
6 changed files with 10 additions and 158 deletions
+9 -28
View File
@@ -29,7 +29,6 @@
#include <linux/of_gpio.h>
#include "bes2600.h"
#include "bh.h"
#include "sbus.h"
#include "bes2600_plat.h"
#include "bes2600_factory.h"
@@ -813,23 +812,10 @@ static int bes2600_sdio_extract_packets(struct sbus_priv *self, u32 ctrl_reg, u8
skb_put(skb, packet_len);
memcpy(skb->data, &data[pos], packet_len);
bes_devel("%s, %d,%d\n", __func__, packet_len, pos);
spin_lock(&self->rx_queue_lock);
skb_queue_tail(&self->rx_queue, skb);
self->rx_data_cnt++;
/*
* Patch C: deliver SKB directly into the WSM/mac80211 stack
* instead of skb_queue_tail-ing onto self->rx_queue for later
* pickup by the bh thread. Removes two spinlock acquires
* (rx_queue->lock at queue-tail + at dequeue) per RX frame
* and one bh wait-queue wake-up per IRQ batch.
*
* bes2600_bh_handle_rx_skb owns the SKB on every path.
* Contract: process context, sleepable, caller holds no
* bes2600 spinlock. See bh.c for the contract block.
*
* Pre-condition satisfied here: bes2600_sdio_unlock(self)
* was called at the bottom of the SDIO read sequence in
* sdio_rx_work, so we hold no bes2600 mutex either.
*/
bes2600_bh_handle_rx_skb(self->core, skb);
spin_unlock(&self->rx_queue_lock);
packet_len = (packet_len + 3) & (~0x3);
pos += packet_len;
#ifdef BES_SDIO_OPTIMIZED_LEN
@@ -912,17 +898,12 @@ static void sdio_rx_work(struct work_struct *work)
ctrl_reg = 0;
/*
* Patch C: with direct delivery in extract_packets, the bh
* thread no longer drives RX consumption — there is no
* rx_queue to drain. Calling self->irq_handler() here would
* wake the bh thread for nothing on every IRQ batch. TX
* wakes still flow through bes2600_bh_wakeup() from TX
* submitters and from bes2600_bh_handle_rx_skb when a
* confirm releases a TX buffer; early-boot IRQs (before
* fw_started) still go through self->irq_handler from the
* GPIO IRQ handler's fallback branch.
*/
if (likely(self->irq_handler)) {
self->irq_handler(self->irq_priv);
} else {
bes_err("%s,%d\n", __func__, __LINE__);
goto failed;
}
} while (again);
-12
View File
@@ -484,18 +484,6 @@ int bes2600_chrdev_do_bus_reset(const struct sbus_ops *sbus_ops, struct sbus_pri
return 0;
}
/*
* Trigger bes2600_chrdev_do_bus_reset() against the file-global
* bes2600_cdev. Used by host-side recovery paths outside this
* compilation unit (e.g. sta.c connection-loss-storm fast-recover) so
* those callers do not need to reach the static bes2600_cdev directly.
*/
int bes2600_chrdev_trigger_bus_reset(void)
{
return bes2600_chrdev_do_bus_reset(bes2600_cdev.sbus_ops,
bes2600_cdev.sbus_priv);
}
bool bes2600_chrdev_is_wifi_opened(void)
{
bool wifi_opened = false;
-1
View File
@@ -61,7 +61,6 @@ struct sbus_priv *bes2600_chrdev_get_sbus_priv_data(void);
int bes2600_chrdev_check_system_close(void);
int bes2600_chrdev_do_system_close(const struct sbus_ops *sbus_ops, struct sbus_priv *priv);
int bes2600_chrdev_do_bus_reset(const struct sbus_ops *sbus_ops, struct sbus_priv *priv);
int bes2600_chrdev_trigger_bus_reset(void);
void bes2600_chrdev_wakeup_bt(void);
void bes2600_chrdev_wifi_force_close(struct bes2600_common *hw_priv, bool halt_dev);
void bes2600_chrdev_usb_remove(struct bes2600_common *hw_priv);
-109
View File
@@ -958,115 +958,6 @@ static void bes2600_bh_parse_wakeup_event(struct bes2600_common *hw_priv, struct
}
}
/*
* Direct-deliver an RX SKB into the WSM/mac80211 stack.
*
* Patch C (sdio_rx_work direct delivery): this function does the
* per-SKB bookkeeping (sequence-number check, exception handling,
* tx-confirm accounting, mac80211 hand-off via wsm_handle_rx) that
* previously ran inside bes2600_bh_rx_helper after pipe_read dequeued
* an SKB from sbus_priv->rx_queue. It is now called inline from
* bes2600_sdio_extract_packets, eliminating the queue + bh-wakeup
* relay (one wait-queue wake-up + two rx_queue->lock acquires per
* RX frame).
*
* Contract:
* - process context, sleepable. wsm_handle_rx (wsm.c:2211, exported
* at wsm.c:2463) acquires wsm_cmd.lock, may call into mac80211
* and may sleep on wait_event_timeout (wsm.c:2036, 2091).
* - caller MUST hold no bes2600 spinlock. Reference precedent:
* bes2600_bh_rx_helper (this file) called from the bh thread.
* The SDIO mutex is released at bes2600_sdio.c before
* extract_packets is called, so this is satisfied.
* - SKB ownership: function frees on every path (success and error).
* - Returns 0 on success, negative on error. When the SKB carries
* a confirm that releases a TX buffer, the function asynchronously
* wakes the bh thread to drain TX (matches the in-bh tx=1
* signaling that bh_rx_helper used).
*/
int bes2600_bh_handle_rx_skb(struct bes2600_common *priv, struct sk_buff *skb)
{
struct wsm_hdr *wsm;
size_t wsm_len;
u16 wsm_id;
u8 wsm_seq;
int tx = 0;
u32 confirm_label = 0x0;
if (!skb)
return 0;
wsm = (struct wsm_hdr *)skb->data;
wsm_len = __le16_to_cpu(wsm->len);
if (WARN_ON(wsm_len > skb->len)) {
bes_err("wsm_len err %d %d\n", (int)wsm_len, (int)skb->len);
dev_kfree_skb(skb);
return -1;
}
if (priv->wsm_enable_wsm_dumps)
print_hex_dump(KERN_DEBUG, "<-- ", DUMP_PREFIX_NONE, 16, 1,
skb->data, wsm_len, false);
wsm_id = __le16_to_cpu(wsm->id) & 0xFFF;
wsm_seq = (__le16_to_cpu(wsm->id) >> 13) & 7;
bes_devel("bes2600_bh_handle_rx_skb wsm_id:0x%04x seq:%d\n",
wsm_id, wsm_seq);
skb_trim(skb, wsm_len);
if (wsm_id == 0x0800) {
wsm_handle_exception(priv,
&skb->data[sizeof(*wsm)],
wsm_len - sizeof(*wsm));
bes_err("wsm exception\n");
dev_kfree_skb(skb);
return -1;
} else if ((wsm_seq != priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)])) {
bes_err("seq error! %u. %u. 0x%x.", wsm_seq,
priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)], wsm_id);
dev_kfree_skb(skb);
return -1;
}
bes2600_bh_parse_wakeup_event(priv, skb);
priv->wsm_rx_seq[WSM_TXRX_SEQ_IDX(wsm_id)] = (wsm_seq + 1) & 7;
if (IS_DRIVER_TO_MCU_CMD(wsm_id))
confirm_label = __le32_to_cpu(((struct wsm_mcu_hdr *)wsm)->handle_label);
if (WSM_CONFIRM_CONDITION(wsm_id, confirm_label)) {
int rc = wsm_release_tx_buffer(priv, 1);
bes2600_bh_dec_pending_count(priv, WSM_TXRX_SEQ_IDX(wsm->id));
if (rc < 0) {
bes_err("wsm_release_tx_buffer failed: %d\n", rc);
dev_kfree_skb(skb);
return rc;
} else if (rc > 0) {
tx = 1;
}
}
/* wsm_handle_rx takes care of SKB lifetime: zeroes *skb_p if consumed. */
if (wsm_handle_rx(priv, wsm_id, wsm, &skb)) {
bes_err("wsm_handle_rx failed (id=0x%04x)\n", wsm_id);
if (skb)
dev_kfree_skb(skb);
return -1;
}
if (skb)
dev_kfree_skb(skb);
if (tx)
bes2600_bh_wakeup(priv);
return 0;
}
EXPORT_SYMBOL(bes2600_bh_handle_rx_skb);
static int bes2600_bh_rx_helper(struct bes2600_common *priv, int *tx)
{
struct sk_buff *skb = NULL;
-7
View File
@@ -36,13 +36,6 @@ void bes2600_enable_powersave(struct bes2600_vif *priv,
int wsm_release_tx_buffer(struct bes2600_common *hw_priv, int count);
int wsm_release_vif_tx_buffer(struct bes2600_common *hw_priv, int if_id,
int count);
/*
* Direct-deliver an RX SKB into the WSM/mac80211 stack.
* Process context, sleepable, caller holds no bes2600 spinlock.
* Function frees skb on every path. See bh.c for full contract.
*/
int bes2600_bh_handle_rx_skb(struct bes2600_common *hw_priv,
struct sk_buff *skb);
int bes2600_bh_sw_process(struct bes2600_common *hw_priv,
struct wsm_tx_confirm *tx_confirm);
+1 -1
View File
@@ -1692,7 +1692,7 @@ report:
void bes2600_connection_loss_storm_recover(struct work_struct *work)
{
bes_warn("[bes2600] connection-loss-storm fast-recover: bus_reset\n");
bes2600_chrdev_trigger_bus_reset();
bes2600_chrdev_do_bus_reset(bes2600_cdev.sbus_ops, bes2600_cdev.sbus_priv);
/*
* After bes2600_chrdev_do_bus_reset() returns, the SDIO core has
* scheduled a remove + rescan; per-vif state may already be gone.