Compare commits

..

1 Commits

Author SHA1 Message Date
test0r db4ea70fb5 bes2600: widen scan-defer backoff to 30s and decay count on quiet
The scan-defer logic added in the previous patch ("bes2600: defer
scan and soften WARN on firmware reject") used a 10-second backoff
window and never cleared reject_count outside of a successful scan.
Field testing on a PineTab2 (linux-pinetab2 6.19.10-danctnix1) shows
two distinct mac80211 scan-retry cadences in practice:

  * Idle background scans every ~5 minutes when associated -- well
    outside any plausible backoff, the defer guard correctly falls
    through to a real WSM scan attempt.

  * Roam-evaluation bursts triggered when mac80211 wants to find a
    candidate AP for handover (signal degradation, beacon loss,
    locally-generated DEAUTH_LEAVING reason=3). Cadence is ~12 s, and
    one boot reproduced 14 such rejected scans in 3 minutes during a
    single burst, none of which engaged the defer guard because every
    retry landed just outside the 10 s window.

Two-line behaviour change to fix that:

  1. BES2600_SCAN_BACKOFF_JIFFIES grows from 10*HZ to 30*HZ, so a
     12 s-cadence burst stays inside the window across consecutive
     rejects and the third reject in the burst trips the threshold
     guard. The 5 min idle case is still naturally past the window
     and is unaffected.

  2. bes2600_scan_should_defer() resets reject_count to 0 when
     time_after(jiffies, backoff_until). Without this, reject_count
     accumulated indefinitely across the slow-cadence rejects, so an
     isolated reject after long quiet would have tripped the
     threshold the moment it arrived. After the change, count is
     latched only inside an active burst and decays cleanly when the
     burst ends.

Net effect on a roam burst:

  * t=0   reject #1 (count 1, backoff_until = t0 + 30s)
  * t=12  reject #2 (count 2, backoff_until = t1 + 30s)
  * t=24  reject #3 (count 3, threshold met, next scan deferred)
  * t=36  defer fires, no WSM round-trip, reject not sent
  * ...   defers continue until the firmware-policy state clears
  * scan succeeds -> reject_count = 0, normal cadence resumes

WSM 0x0007 confirm rejections in a burst drop from ~14 to ~3 (just
the scans needed to reach the threshold). wpa_supplicant's reason=3
locally-generated disconnects driven by exhausted roam candidates
during the same burst window also drop.

No new state, no new symbols, no change to mac80211-facing semantics:
the deferred scan still completes via the existing fail: path with
status=-EBUSY, the same response a real firmware-busy would produce.

Signed-off-by: Markus Fritsche <fritsche.markus@gmail.com>
2026-04-28 14:33:00 +02:00
5 changed files with 17 additions and 97 deletions
-29
View File
@@ -16,7 +16,6 @@
#include <linux/mmc/host.h>
#include <linux/mmc/sdio_func.h>
#include <linux/mmc/card.h>
#include <linux/mmc/core.h>
#include <linux/mmc/sdio.h>
#include <linux/spinlock.h>
#include <net/mac80211.h>
@@ -1778,33 +1777,6 @@ static void bes2600_sdio_halt_device(struct sbus_priv *self)
sdio_work_debug(self);
}
/*
* Trigger an SDIO bus reset via mmc_hw_reset().
*
* With multiple SDIO functions probed (PineTab2 binds func 1 for WLAN and
* func 2 for the BT-companion path) mmc_sdio_hw_reset() takes the
* remove-and-rescan path: it marks the card removed and schedules
* mmc_rescan, which tears down the bound function drivers and re-detects
* the card on the next sweep, in turn reinvoking bes2600_sdio_probe().
*
* With a single function probed it instead invokes mmc_power_cycle()
* directly, which on PineTab2 toggles the wifi-reset GPIO via sdio_pwrseq.
*
* In both cases the chip ends up in a freshly reset state, which is the
* goal of the recovery path.
*
* mmc_hw_reset() must be called without holding the SDIO host claim --
* the multi-func remove-and-rescan path acquires the host claim via the
* mmc workqueue.
*/
static int bes2600_sdio_bus_reset(struct sbus_priv *self)
{
if (!self || !self->func || !self->func->card)
return -EINVAL;
return mmc_hw_reset(self->func->card);
}
static bool bes2600_sdio_wakeup_source(struct sbus_priv *self)
{
struct bes2600_platform_data_sdio *pdata = bes2600_get_platform_data();
@@ -1843,7 +1815,6 @@ static struct sbus_ops bes2600_sdio_sbus_ops = {
.gpio_sleep = bes2600_gpio_allow_mcu_sleep,
.halt_device = bes2600_sdio_halt_device,
.wakeup_source = bes2600_sdio_wakeup_source,
.bus_reset = bes2600_sdio_bus_reset,
};
static void bes2600_sdio_en_lp_cb(struct bes2600_common *hw_priv)
+2 -57
View File
@@ -442,48 +442,6 @@ int bes2600_chrdev_do_system_close(const struct sbus_ops *sbus_ops, struct sbus_
return ret;
}
/*
* Hard-reset the bus and wait for the bus core to remove the chip.
*
* Used by the firmware-wedge recovery path on platforms where the normal
* power_switch(0) sequence has no effective chip-reset signal. The bus
* implementation triggers an asynchronous re-detect; this helper waits for
* the resulting remove() callback to clear bes2600_cdev.sbus_priv so that a
* subsequent bes2600_switch_wifi(true) sees a clean state and can wait on
* the fresh probe.
*/
int bes2600_chrdev_do_bus_reset(const struct sbus_ops *sbus_ops, struct sbus_priv *priv)
{
int ret;
long status;
if (!sbus_ops || !priv)
return -EINVAL;
if (!sbus_ops->bus_reset)
return -EOPNOTSUPP;
bes_info("trigger bus reset to recover wedged firmware.\n");
ret = sbus_ops->bus_reset(priv);
if (ret) {
bes_err("bus_reset failed: %d\n", ret);
return ret;
}
/*
* The bus reset is asynchronous: the bus core schedules a rescan
* which removes the bound function drivers and then re-detects the
* chip. Wait for the remove callback to clear sbus_priv. Do not
* dereference 'priv' after this point -- it may already be freed.
*/
status = wait_event_timeout(bes2600_cdev.probe_done_wq,
!bes2600_cdev.sbus_priv, HZ * 3);
WARN_ON(status <= 0);
return 0;
}
bool bes2600_chrdev_is_wifi_opened(void)
{
bool wifi_opened = false;
@@ -582,21 +540,8 @@ static void bes2600_chrdev_wifi_force_close_work(struct work_struct *work)
/* unregister wifi */
bes2600_switch_wifi(0);
/*
* Hard exception with a bus_reset implementation: tear the
* bus down via mmc_hw_reset() (or equivalent) so the next
* bringup probes a freshly reset chip. On PineTab2 this is
* the only effective recovery path -- the existing
* power_switch(0)/(1) sequence has no chip-reset signal of
* its own (sdio_pwrseq owns wifi_reset).
*
* Soft close, or hard close on a board without bus_reset:
* fall back to the legacy power_switch(0) sequence.
*/
if (bes2600_cdev.halt_dev && bes2600_cdev.sbus_ops->bus_reset) {
bes2600_chrdev_do_bus_reset(bes2600_cdev.sbus_ops,
bes2600_cdev.sbus_priv);
} else if (bes2600_chrdev_check_system_close()) {
/* power down device if wifi is only opened */
if (bes2600_chrdev_check_system_close()) {
bes2600_chrdev_do_system_close(bes2600_cdev.sbus_ops,
bes2600_cdev.sbus_priv);
}
-1
View File
@@ -60,7 +60,6 @@ struct sbus_priv *bes2600_chrdev_get_sbus_priv_data(void);
/* used to control device power down */
int bes2600_chrdev_check_system_close(void);
int bes2600_chrdev_do_system_close(const struct sbus_ops *sbus_ops, struct sbus_priv *priv);
int bes2600_chrdev_do_bus_reset(const struct sbus_ops *sbus_ops, struct sbus_priv *priv);
void bes2600_chrdev_wakeup_bt(void);
void bes2600_chrdev_wifi_force_close(struct bes2600_common *hw_priv, bool halt_dev);
void bes2600_chrdev_usb_remove(struct bes2600_common *hw_priv);
-8
View File
@@ -75,14 +75,6 @@ struct sbus_ops {
void (*halt_device)(struct sbus_priv *self);
bool (*wakeup_source)(struct sbus_priv *self);
int (*reboot)(struct sbus_priv *self);
/*
* Force the host bus to re-detect and re-probe the chip. Called
* from the firmware-wedge recovery path when power_switch() has no
* effective chip-reset signal of its own (e.g. PineTab2, where the
* wifi-reset GPIO is owned by sdio_pwrseq, not the bes2600 node).
* Returns 0 on success or a negative errno.
*/
int (*bus_reset)(struct sbus_priv *self);
};
void bes2600_irq_handler(struct bes2600_common *priv);
+15 -2
View File
@@ -22,9 +22,17 @@
* After this many consecutive WSM scan rejections from firmware, stop
* issuing new scans for BES2600_SCAN_BACKOFF_JIFFIES and let the state
* that's rejecting them (coex window, firmware-internal busy) clear.
*
* The backoff has to be at least as long as the natural mac80211 scan-
* retry cadence, otherwise the next attempt lands outside the window
* and bypasses the defer guard. Observed in the wild on PineTab2:
* roam-evaluation bursts at ~12 s cadence, idle background scans at
* ~5 min cadence. 30 s catches the burst and leaves the slow case
* alone (the firmware-policy state has had minutes to clear by then
* anyway).
*/
#define BES2600_SCAN_REJECT_THRESHOLD 3
#define BES2600_SCAN_BACKOFF_JIFFIES (10 * HZ)
#define BES2600_SCAN_BACKOFF_JIFFIES (30 * HZ)
static void bes2600_scan_restart_delayed(struct bes2600_vif *priv);
@@ -40,7 +48,9 @@ static void bes2600_scan_restart_delayed(struct bes2600_vif *priv);
* 2. We already saw >= BES2600_SCAN_REJECT_THRESHOLD consecutive
* rejections on recent scan attempts and the backoff window has
* not yet elapsed. Whatever was rejecting them is likely still
* rejecting them; give it time.
* rejecting them; give it time. If the backoff has elapsed without
* a fresh reject refreshing it, the burst is over and we reset the
* count so an isolated reject doesn't immediately re-trip.
*
* Returns true if the caller should abandon the scan iteration.
*/
@@ -51,6 +61,9 @@ static bool bes2600_scan_should_defer(struct bes2600_common *hw_priv)
return true;
#endif
if (time_after(jiffies, hw_priv->scan.backoff_until))
hw_priv->scan.reject_count = 0;
if (hw_priv->scan.reject_count >= BES2600_SCAN_REJECT_THRESHOLD &&
time_before(jiffies, hw_priv->scan.backoff_until))
return true;