From adc6c1f332d41ee1aadd349eea11809c88139307 Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Fri, 24 Apr 2026 21:31:45 +0200 Subject: [PATCH] bes2600: defer scan and soften WARN on firmware reject On a BES2600-based PineTab2, mac80211's background-scan cadence (about every 30 s when associated) triggers a two-step WARN splat pattern, visible in dmesg roughly 30 times per 10 min of regular WiFi use: wsm_generic_confirm ret 2 WARNING: at wsm_handle_rx+0x8a4/0xf30 [bes2600] ... full stack trace ... ieee80211 phy0: wsm_generic_confirm failed for request 0x0007. WARNING: at bes2600_scan_work+0x5d4/0x810 [bes2600] ... full stack trace ... ieee80211 phy0: [SCAN] Scan failed (-22). 0x0007 is the WSM start-scan request; status 2 is the firmware's rejected-by-policy response, which it returns for at least two conditions: a) BT A2DP streaming in non-FDD coex mode -- the coex arbiter in firmware won't grant an off-channel window while a SCO/ A2DP link is queued. b) A firmware-internal busy state whose exact trigger the driver cannot observe directly (confirmed on ohm with BT disconnected -- rejection still fires). Likely transient firmware-PM transitions. Both are protocol-level policy responses, not kernel bugs, so the full stack-trace WARN treatment is counterproductive: it buries real problems and gets new users convinced the driver is broken. Three-part fix: 1. struct bes2600_scan grows two fields -- reject_count and backoff_until -- zero-initialised via the existing ieee80211_alloc_hw()-provided kzalloc. 2. bes2600_scan_work() now consults bes2600_scan_should_defer() before calling bes2600_scan_start(). The helper short- circuits in two cases: - coex_is_bt_a2dp() is true and coex is not in FDD mode, since we already know the firmware will reject; - BES2600_SCAN_REJECT_THRESHOLD (3) consecutive rejections have fired and the BES2600_SCAN_BACKOFF_JIFFIES (10 s) backoff window has not yet elapsed. On defer or on a real firmware rejection, reject_count is bumped and backoff_until is refreshed. A successful scan clears reject_count. 3. The WARN_ON(hw_priv->scan.status) at the scan_start() call site is replaced with a plain branch into the existing fail: label. wsm_generic_confirm()'s WARN() becomes a bes_devel() -- the per-request wiphy_warn in wsm_handle_rx (which includes the offending request id) is kept, so real debugging information is still on tape. Net behaviour: - Expected rejections no longer produce stack traces. The only log line that remains on a rejected background scan is the upstream-caller's wiphy_warn identifying request 0x0007 or equivalent. - The driver stops hammering the firmware with doomed scan requests -- 3 rejections trigger a 10 s pause, during which bes2600_scan_work() returns without issuing WSM 0x0007. - The scan-completion path is unchanged; mac80211 sees the scan complete with no results and reissues on its normal cadence. - Real protocol-layer bugs (unexpected underflow in the confirm buffer) still WARN_ON at the 'underflow:' label. Verified on ohm (PineTab2, linux-pinetab2 6.19.10-danctnix1-1): WARN splat count dropped from 32 to 0 per 10 min uptime. WiFi stays associated. No regression in other counters (KFENCE, sdio_tx_work, RX failure, PS Mode Error, factory cali fail all remain 0). Signed-off-by: Markus Fritsche --- bes2600/scan.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++- bes2600/scan.h | 11 +++++++++ bes2600/wsm.c | 14 +++++++++++- 3 files changed, 83 insertions(+), 2 deletions(-) diff --git a/bes2600/scan.c b/bes2600/scan.c index b2c22e7..faa1c90 100644 --- a/bes2600/scan.c +++ b/bes2600/scan.c @@ -14,11 +14,50 @@ #include "scan.h" #include "sta.h" #include "pm.h" +#include "epta_coex.h" #include "epta_request.h" #include "bes_pwr.h" +/* + * After this many consecutive WSM scan rejections from firmware, stop + * issuing new scans for BES2600_SCAN_BACKOFF_JIFFIES and let the state + * that's rejecting them (coex window, firmware-internal busy) clear. + */ +#define BES2600_SCAN_REJECT_THRESHOLD 3 +#define BES2600_SCAN_BACKOFF_JIFFIES (10 * HZ) + static void bes2600_scan_restart_delayed(struct bes2600_vif *priv); +/* + * Decide whether to skip sending the next WSM scan command without + * bothering the firmware. Two triggers: + * + * 1. BT A2DP is streaming in non-FDD coex mode. The firmware is + * known to reject scan requests during that window; short- + * circuiting here saves a WSM round-trip and avoids the + * wsm_generic_confirm / scan_work warning cascade that follows. + * + * 2. We already saw >= BES2600_SCAN_REJECT_THRESHOLD consecutive + * rejections on recent scan attempts and the backoff window has + * not yet elapsed. Whatever was rejecting them is likely still + * rejecting them; give it time. + * + * Returns true if the caller should abandon the scan iteration. + */ +static bool bes2600_scan_should_defer(struct bes2600_common *hw_priv) +{ +#ifdef WIFI_BT_COEXIST_EPTA_ENABLE + if (!coex_is_fdd_mode() && coex_is_bt_a2dp()) + return true; +#endif + + if (hw_priv->scan.reject_count >= BES2600_SCAN_REJECT_THRESHOLD && + time_before(jiffies, hw_priv->scan.backoff_until)) + return true; + + return false; +} + #ifdef CONFIG_BES2600_TESTMODE static int bes2600_advance_scan_start(struct bes2600_common *hw_priv) { @@ -702,10 +741,29 @@ void bes2600_scan_work(struct work_struct *work) wsm_unlock_tx(hw_priv); } else #endif + { + if (bes2600_scan_should_defer(hw_priv)) { + hw_priv->scan.status = -EBUSY; + hw_priv->scan.reject_count++; + hw_priv->scan.backoff_until = + jiffies + BES2600_SCAN_BACKOFF_JIFFIES; + wiphy_dbg(priv->hw->wiphy, + "[SCAN] deferred (coex/backoff, reject_count=%u)\n", + hw_priv->scan.reject_count); + kfree(scan.ch); + goto fail; + } hw_priv->scan.status = bes2600_scan_start(priv, &scan); + } kfree(scan.ch); - if (WARN_ON(hw_priv->scan.status)) + if (hw_priv->scan.status) { + hw_priv->scan.reject_count++; + hw_priv->scan.backoff_until = + jiffies + BES2600_SCAN_BACKOFF_JIFFIES; + /* Lower callers already logged the reason at wiphy_warn. */ goto fail; + } + hw_priv->scan.reject_count = 0; hw_priv->scan.curr = it; } up(&hw_priv->conf_lock); diff --git a/bes2600/scan.h b/bes2600/scan.h index e50fa36..1f3adea 100644 --- a/bes2600/scan.h +++ b/bes2600/scan.h @@ -42,6 +42,17 @@ struct bes2600_scan { struct delayed_work probe_work; int direct_probe; u8 if_id; + /* + * Track consecutive firmware-side WSM scan rejections so we can + * back off briefly instead of re-issuing the same scan on every + * mac80211 background-scan tick. Firmware returns WSM status != 0 + * for a handful of transient conditions (BT A2DP active in non- + * FDD coex, firmware-internal busy windows) and keeps rejecting + * until the state clears; retrying at full cadence just floods + * dmesg. + */ + unsigned int reject_count; + unsigned long backoff_until; }; int bes2600_hw_scan(struct ieee80211_hw *hw, diff --git a/bes2600/wsm.c b/bes2600/wsm.c index d40df30..55a4e2b 100644 --- a/bes2600/wsm.c +++ b/bes2600/wsm.c @@ -134,8 +134,20 @@ static int wsm_generic_confirm(struct bes2600_common *hw_priv, struct wsm_buf *buf) { u32 status = WSM_GET32(buf); - if (WARN(status != WSM_STATUS_SUCCESS, "wsm_generic_confirm ret %u", status)) + + /* + * A non-SUCCESS status here is a firmware-side policy decision for + * the command whose confirm this is -- commonly WSM status 2 for + * scan (0x0407) rejected because of a coex window or transient + * firmware-busy state. It is not a driver/kernel bug, so avoid the + * WARN()/stack-trace treatment; the caller already emits a + * wiphy_warn identifying the request id and will propagate the + * error to mac80211. + */ + if (status != WSM_STATUS_SUCCESS) { + bes_devel("%s ret %u\n", __func__, status); return -EINVAL; + } return 0; underflow: -- 2.53.0