bes2600: defer scan and soften WARN on firmware reject
On a BES2600-based PineTab2, mac80211's background-scan cadence
(about every 30 s when associated) triggers a two-step WARN splat
pattern, visible in dmesg roughly 30 times per 10 min of regular
WiFi use:
wsm_generic_confirm ret 2
WARNING: at wsm_handle_rx+0x8a4/0xf30 [bes2600]
... full stack trace ...
ieee80211 phy0: wsm_generic_confirm failed for request 0x0007.
WARNING: at bes2600_scan_work+0x5d4/0x810 [bes2600]
... full stack trace ...
ieee80211 phy0: [SCAN] Scan failed (-22).
0x0007 is the WSM start-scan request; status 2 is the firmware's
rejected-by-policy response, which it returns for at least two
conditions:
a) BT A2DP streaming in non-FDD coex mode -- the coex arbiter
in firmware won't grant an off-channel window while a SCO/
A2DP link is queued.
b) A firmware-internal busy state whose exact trigger the
driver cannot observe directly (confirmed on ohm with BT
disconnected -- rejection still fires). Likely transient
firmware-PM transitions.
Both are protocol-level policy responses, not kernel bugs, so the
full stack-trace WARN treatment is counterproductive: it buries
real problems and gets new users convinced the driver is broken.
Three-part fix:
1. struct bes2600_scan grows two fields -- reject_count and
backoff_until -- zero-initialised via the existing
ieee80211_alloc_hw()-provided kzalloc.
2. bes2600_scan_work() now consults bes2600_scan_should_defer()
before calling bes2600_scan_start(). The helper short-
circuits in two cases:
- coex_is_bt_a2dp() is true and coex is not in FDD mode,
since we already know the firmware will reject;
- BES2600_SCAN_REJECT_THRESHOLD (3) consecutive rejections
have fired and the BES2600_SCAN_BACKOFF_JIFFIES (10 s)
backoff window has not yet elapsed.
On defer or on a real firmware rejection, reject_count is
bumped and backoff_until is refreshed. A successful scan
clears reject_count.
3. The WARN_ON(hw_priv->scan.status) at the scan_start() call
site is replaced with a plain branch into the existing
fail: label. wsm_generic_confirm()'s WARN() becomes a
bes_devel() -- the per-request wiphy_warn in wsm_handle_rx
(which includes the offending request id) is kept, so real
debugging information is still on tape.
Net behaviour:
- Expected rejections no longer produce stack traces. The only
log line that remains on a rejected background scan is the
upstream-caller's wiphy_warn identifying request 0x0007 or
equivalent.
- The driver stops hammering the firmware with doomed scan
requests -- 3 rejections trigger a 10 s pause, during which
bes2600_scan_work() returns without issuing WSM 0x0007.
- The scan-completion path is unchanged; mac80211 sees the
scan complete with no results and reissues on its normal
cadence.
- Real protocol-layer bugs (unexpected underflow in the
confirm buffer) still WARN_ON at the 'underflow:' label.
Verified on ohm (PineTab2, linux-pinetab2 6.19.10-danctnix1-1):
WARN splat count dropped from 32 to 0 per 10 min uptime. WiFi
stays associated. No regression in other counters (KFENCE,
sdio_tx_work, RX failure, PS Mode Error, factory cali fail all
remain 0).
Signed-off-by: Markus Fritsche <fritsche.markus@gmail.com>
This commit is contained in:
+60
-2
@@ -14,11 +14,50 @@
|
||||
#include "scan.h"
|
||||
#include "sta.h"
|
||||
#include "pm.h"
|
||||
#include "epta_coex.h"
|
||||
#include "epta_request.h"
|
||||
#include "bes_pwr.h"
|
||||
|
||||
/*
|
||||
* After this many consecutive WSM scan rejections from firmware, stop
|
||||
* issuing new scans for BES2600_SCAN_BACKOFF_JIFFIES and let the state
|
||||
* that's rejecting them (coex window, firmware-internal busy) clear.
|
||||
*/
|
||||
#define BES2600_SCAN_REJECT_THRESHOLD 3
|
||||
#define BES2600_SCAN_BACKOFF_JIFFIES (10 * HZ)
|
||||
|
||||
static void bes2600_scan_restart_delayed(struct bes2600_vif *priv);
|
||||
|
||||
/*
|
||||
* Decide whether to skip sending the next WSM scan command without
|
||||
* bothering the firmware. Two triggers:
|
||||
*
|
||||
* 1. BT A2DP is streaming in non-FDD coex mode. The firmware is
|
||||
* known to reject scan requests during that window; short-
|
||||
* circuiting here saves a WSM round-trip and avoids the
|
||||
* wsm_generic_confirm / scan_work warning cascade that follows.
|
||||
*
|
||||
* 2. We already saw >= BES2600_SCAN_REJECT_THRESHOLD consecutive
|
||||
* rejections on recent scan attempts and the backoff window has
|
||||
* not yet elapsed. Whatever was rejecting them is likely still
|
||||
* rejecting them; give it time.
|
||||
*
|
||||
* Returns true if the caller should abandon the scan iteration.
|
||||
*/
|
||||
static bool bes2600_scan_should_defer(struct bes2600_common *hw_priv)
|
||||
{
|
||||
#ifdef WIFI_BT_COEXIST_EPTA_ENABLE
|
||||
if (!coex_is_fdd_mode() && coex_is_bt_a2dp())
|
||||
return true;
|
||||
#endif
|
||||
|
||||
if (hw_priv->scan.reject_count >= BES2600_SCAN_REJECT_THRESHOLD &&
|
||||
time_before(jiffies, hw_priv->scan.backoff_until))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BES2600_TESTMODE
|
||||
static int bes2600_advance_scan_start(struct bes2600_common *hw_priv)
|
||||
{
|
||||
@@ -703,10 +742,29 @@ void bes2600_scan_work(struct work_struct *work)
|
||||
wsm_unlock_tx(hw_priv);
|
||||
} else
|
||||
#endif
|
||||
hw_priv->scan.status = bes2600_scan_start(priv, &scan);
|
||||
{
|
||||
if (bes2600_scan_should_defer(hw_priv)) {
|
||||
hw_priv->scan.status = -EBUSY;
|
||||
hw_priv->scan.reject_count++;
|
||||
hw_priv->scan.backoff_until =
|
||||
jiffies + BES2600_SCAN_BACKOFF_JIFFIES;
|
||||
wiphy_dbg(priv->hw->wiphy,
|
||||
"[SCAN] deferred (coex/backoff, reject_count=%u)\n",
|
||||
hw_priv->scan.reject_count);
|
||||
kfree(scan.ch);
|
||||
if (WARN_ON(hw_priv->scan.status))
|
||||
goto fail;
|
||||
}
|
||||
hw_priv->scan.status = bes2600_scan_start(priv, &scan);
|
||||
}
|
||||
kfree(scan.ch);
|
||||
if (hw_priv->scan.status) {
|
||||
hw_priv->scan.reject_count++;
|
||||
hw_priv->scan.backoff_until =
|
||||
jiffies + BES2600_SCAN_BACKOFF_JIFFIES;
|
||||
/* Lower callers already logged the reason at wiphy_warn. */
|
||||
goto fail;
|
||||
}
|
||||
hw_priv->scan.reject_count = 0;
|
||||
hw_priv->scan.curr = it;
|
||||
}
|
||||
up(&hw_priv->conf_lock);
|
||||
|
||||
@@ -42,6 +42,17 @@ struct bes2600_scan {
|
||||
struct delayed_work probe_work;
|
||||
int direct_probe;
|
||||
u8 if_id;
|
||||
/*
|
||||
* Track consecutive firmware-side WSM scan rejections so we can
|
||||
* back off briefly instead of re-issuing the same scan on every
|
||||
* mac80211 background-scan tick. Firmware returns WSM status != 0
|
||||
* for a handful of transient conditions (BT A2DP active in non-
|
||||
* FDD coex, firmware-internal busy windows) and keeps rejecting
|
||||
* until the state clears; retrying at full cadence just floods
|
||||
* dmesg.
|
||||
*/
|
||||
unsigned int reject_count;
|
||||
unsigned long backoff_until;
|
||||
};
|
||||
|
||||
int bes2600_hw_scan(struct ieee80211_hw *hw,
|
||||
|
||||
+13
-1
@@ -134,8 +134,20 @@ static int wsm_generic_confirm(struct bes2600_common *hw_priv,
|
||||
struct wsm_buf *buf)
|
||||
{
|
||||
u32 status = WSM_GET32(buf);
|
||||
if (WARN(status != WSM_STATUS_SUCCESS, "wsm_generic_confirm ret %u", status))
|
||||
|
||||
/*
|
||||
* A non-SUCCESS status here is a firmware-side policy decision for
|
||||
* the command whose confirm this is -- commonly WSM status 2 for
|
||||
* scan (0x0407) rejected because of a coex window or transient
|
||||
* firmware-busy state. It is not a driver/kernel bug, so avoid the
|
||||
* WARN()/stack-trace treatment; the caller already emits a
|
||||
* wiphy_warn identifying the request id and will propagate the
|
||||
* error to mac80211.
|
||||
*/
|
||||
if (status != WSM_STATUS_SUCCESS) {
|
||||
bes_devel("%s ret %u\n", __func__, status);
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
|
||||
underflow:
|
||||
|
||||
Reference in New Issue
Block a user