Compare commits

..

1 Commits

Author SHA1 Message Date
test0r adc6c1f332 bes2600: defer scan and soften WARN on firmware reject
On a BES2600-based PineTab2, mac80211's background-scan cadence
(about every 30 s when associated) triggers a two-step WARN splat
pattern, visible in dmesg roughly 30 times per 10 min of regular
WiFi use:

  wsm_generic_confirm ret 2
  WARNING: at wsm_handle_rx+0x8a4/0xf30 [bes2600]
  ... full stack trace ...
  ieee80211 phy0: wsm_generic_confirm failed for request 0x0007.
  WARNING: at bes2600_scan_work+0x5d4/0x810 [bes2600]
  ... full stack trace ...
  ieee80211 phy0: [SCAN] Scan failed (-22).

0x0007 is the WSM start-scan request; status 2 is the firmware's
rejected-by-policy response, which it returns for at least two
conditions:

  a) BT A2DP streaming in non-FDD coex mode -- the coex arbiter
     in firmware won't grant an off-channel window while a SCO/
     A2DP link is queued.
  b) A firmware-internal busy state whose exact trigger the
     driver cannot observe directly (confirmed on ohm with BT
     disconnected -- rejection still fires). Likely transient
     firmware-PM transitions.

Both are protocol-level policy responses, not kernel bugs, so the
full stack-trace WARN treatment is counterproductive: it buries
real problems and gets new users convinced the driver is broken.

Three-part fix:

  1. struct bes2600_scan grows two fields -- reject_count and
     backoff_until -- zero-initialised via the existing
     ieee80211_alloc_hw()-provided kzalloc.

  2. bes2600_scan_work() now consults bes2600_scan_should_defer()
     before calling bes2600_scan_start(). The helper short-
     circuits in two cases:

       - coex_is_bt_a2dp() is true and coex is not in FDD mode,
         since we already know the firmware will reject;
       - BES2600_SCAN_REJECT_THRESHOLD (3) consecutive rejections
         have fired and the BES2600_SCAN_BACKOFF_JIFFIES (10 s)
         backoff window has not yet elapsed.

     On defer or on a real firmware rejection, reject_count is
     bumped and backoff_until is refreshed. A successful scan
     clears reject_count.

  3. The WARN_ON(hw_priv->scan.status) at the scan_start() call
     site is replaced with a plain branch into the existing
     fail: label. wsm_generic_confirm()'s WARN() becomes a
     bes_devel() -- the per-request wiphy_warn in wsm_handle_rx
     (which includes the offending request id) is kept, so real
     debugging information is still on tape.

Net behaviour:

  - Expected rejections no longer produce stack traces. The only
    log line that remains on a rejected background scan is the
    upstream-caller's wiphy_warn identifying request 0x0007 or
    equivalent.
  - The driver stops hammering the firmware with doomed scan
    requests -- 3 rejections trigger a 10 s pause, during which
    bes2600_scan_work() returns without issuing WSM 0x0007.
  - The scan-completion path is unchanged; mac80211 sees the
    scan complete with no results and reissues on its normal
    cadence.
  - Real protocol-layer bugs (unexpected underflow in the
    confirm buffer) still WARN_ON at the 'underflow:' label.

Verified on ohm (PineTab2, linux-pinetab2 6.19.10-danctnix1-1):
WARN splat count dropped from 32 to 0 per 10 min uptime. WiFi
stays associated. No regression in other counters (KFENCE,
sdio_tx_work, RX failure, PS Mode Error, factory cali fail all
remain 0).

Signed-off-by: Markus Fritsche <fritsche.markus@gmail.com>
2026-04-24 21:32:00 +02:00
5 changed files with 238 additions and 2 deletions
+12
View File
@@ -28,6 +28,7 @@ CONFIG_BES2600_WIFI_BOOT_ON ?= y
CONFIG_BES2600_BT_BOOT_ON ?= n
BES2600_GPIO_WAKEUP_AP ?= n
BES2600_WRITE_DPD_TO_FILE ?= n
BES2600_TX_MORE_RETRY ?= n
# bes evb
@@ -92,6 +93,12 @@ ccflags-y += -DBES_UNIFIED_PM
ccflags-y += -DBES_SDIO_OPTIMIZED_LEN
ccflags-y += -DBES2600_HOST_TIMESTAMP_DEBUG
ifeq ($(BES2600_WRITE_DPD_TO_FILE),y)
BES2600_DPD_PATH ?= /data/cfg/bes2600_dpd.bin
BES2600_DEFAULT_DPD_PATH ?= /lib/firmware/bes2600_dpd.bin
BES2600_DPD_GOLDEN_PATH ?= /data/cfg/bes2600_dpd_golden.bin
endif
ifeq ($(BES2600_DUMP_FW_DPD_LOG),y)
BES2600_DPD_LOG_PATH ?= /data/applog/bes2600_dpd_log.log
endif
@@ -128,6 +135,9 @@ ccflags-y += $(call boolen_flag,BSS_LOSS_CHECK,y)
ccflags-y += $(call string_flag,BES2600_LOAD_FW_TOOL_PATH)
ccflags-y += $(call string_flag,BES2600_LOAD_FW_TOOL_DEVICE)
ccflags-y += $(call string_flag,BES2600_DRV_VERSION)
ccflags-y += $(call string_flag,BES2600_DPD_PATH)
ccflags-y += $(call string_flag,BES2600_DEFAULT_DPD_PATH)
ccflags-y += $(call string_flag,BES2600_DPD_GOLDEN_PATH)
ccflags-y += $(call boolen_flag,BES2600_INDEPENDENT_EVB,y)
ccflags-y += $(call boolen_flag,BES2600_INTEGRATED_MODULE_V1,y)
@@ -149,6 +159,8 @@ ccflags-y += $(call boolen_flag,FACTORY_SAVE_MULTI_PATH,y)
ccflags-y += $(call boolen_flag,FACTORY_CRC_CHECK,y)
ccflags-y += $(call boolen_flag,BES2600_GPIO_WAKEUP_AP,y)
ccflags-y += $(call boolen_flag,BES2600_WRITE_DPD_TO_FILE,y)
ccflags-y += $(call boolen_flag,BES2600_DUMP_FW_DPD_LOG,y)
ccflags-y += $(call string_flag,BES2600_DPD_LOG_PATH)
+143
View File
@@ -63,6 +63,9 @@ struct bes_cdev {
struct delayed_work probe_timeout_work;
enum bus_probe_state bus_probe;
struct work_struct wifi_force_close_work;
#ifdef BES2600_WRITE_DPD_TO_FILE
int no_dpd;
#endif
enum pend_read_op read_flag;
enum wakeup_event wakeup_by_event; /* used to filter unwanted event wakeup reason report */
u16 wakeup_state; /* for userspace check wakeup reason */
@@ -82,6 +85,9 @@ struct bes2600_op_map {
static struct bes_cdev bes2600_cdev;
module_param_named(fw_type, bes2600_cdev.fw_type, int, 0644);
#ifdef BES2600_WRITE_DPD_TO_FILE
module_param_named(no_dpd, bes2600_cdev.no_dpd, int, 0644);
#endif
extern int bes2600_register_net_dev(struct sbus_priv *bus_priv);
extern int bes2600_unregister_net_dev(struct sbus_priv *bus_priv);
@@ -263,8 +269,137 @@ static int bes2600_chrdev_check_system_close_internal(void)
#ifdef BES2600_WRITE_DPD_TO_FILE
static int bes2600_chrdev_write_dpd_data_to_file(const char *path, void *buffer, int size)
{
int ret = 0;
struct file *fp;
if (buffer == NULL || size == 0)
return 0;
fp = filp_open(path, O_TRUNC | O_CREAT | O_RDWR, S_IRUSR);
if (IS_ERR(fp)) {
bes_err("BES2600 : can't open %s\n",path);
return -1;
}
ret = kernel_write(fp, buffer, size, &fp->f_pos);
if (ret < 0)
bes_err("write dpd to file failed\n");
filp_close(fp,NULL);
bes_devel("write dpd to %s\n", path);
return ret;
}
static bool bes2600_chrdev_dpd_is_vaild(u8 *dpd_data)
{
u32 cal_crc = 0;
u32 dpd_crc = le32_to_cpup((__le32 *)(dpd_data));
u32 dpd_ver = le32_to_cpup((__le32 *)(dpd_data + DPD_VERSION_OFFSET));
/* check version */
if (dpd_ver < DPD_CUR_VERSION)
return false;
cal_crc ^= 0xffffffffL;
cal_crc = crc32_le(cal_crc, dpd_data + 4, DPD_BIN_SIZE - 4);
cal_crc ^= 0xffffffffL;
/* check if the dpd data is valid */
if (cal_crc != dpd_crc) {
bes_err(
"bes2600 dpd data from file check failed, calc_crc:0x%08x dpd_crc: 0x%08x\n",
cal_crc, dpd_crc);
return false;
}
return true;
}
static int bes2600_chrdev_read_and_check_dpd_data(const char *file, u8 **data, u32 *len)
{
int ret = 0;
u8* read_data = NULL;
struct file *fp;
/* open file */
fp = filp_open(file, O_RDONLY, 0);//S_IRUSR
if (IS_ERR(fp)) {
bes_devel("BES2600 : can't open %s\n",file);
return -1;
}
#ifdef BES2600_WRITE_DPD_TO_FILE
if (fp->f_inode->i_size != DPD_BIN_FILE_SIZE) {
bes_err(
"bes2600 dpd data file size check failed, read_size: %lld file_size: %d\n",
fp->f_inode->i_size, DPD_BIN_FILE_SIZE);
filp_close(fp, NULL);
return -1;
}
#endif
/* allocate memory for storing reading data */
read_data = kmalloc(fp->f_inode->i_size, GFP_KERNEL);
if (read_data == NULL) {
bes_devel("%s alloc mem fail\n", __func__);
goto err1;
}
/* read data from file */
ret = kernel_read(fp, read_data, fp->f_inode->i_size, &fp->f_pos);
if (ret < DPD_BIN_SIZE) {
bes_err("%s read fail, ret=%d\n", __func__, ret);
goto err2;
}
/* check dpd version and crc */
if (!bes2600_chrdev_dpd_is_vaild(read_data))
goto err2;
/* close file */
filp_close(fp, NULL);
/* copy data to external */
*data = read_data;
*len = DPD_BIN_SIZE;;
/* output debug information */
bes_devel("read dpd data from %s\n", file);
return 0;
err2:
kfree(read_data);
err1:
filp_close(fp, NULL);
*data = NULL;
*len = 0;
return -1;
}
#endif
const u8* bes2600_chrdev_get_dpd_data(u32 *len)
{
#ifdef BES2600_WRITE_DPD_TO_FILE
if (!bes2600_cdev.dpd_calied && bes2600_cdev.no_dpd) {
/* read dpd data from file that stores factory dpd calibration data */
if ((bes2600_chrdev_read_and_check_dpd_data(BES2600_DPD_GOLDEN_PATH,
&bes2600_cdev.dpd_data, &bes2600_cdev.dpd_len) < 0) &&
(bes2600_chrdev_read_and_check_dpd_data(BES2600_DEFAULT_DPD_PATH,
&bes2600_cdev.dpd_data, &bes2600_cdev.dpd_len) < 0)) {
bes_err("%s read dpd data fail\n", __func__);
return NULL;
} else {
bes2600_cdev.dpd_calied = true;
}
}
#endif
if (!bes2600_cdev.dpd_calied)
return NULL;
if (len)
@@ -325,6 +460,14 @@ int bes2600_chrdev_update_dpd_data(void)
}
spin_unlock(&bes2600_cdev.status_lock);
#ifdef BES2600_WRITE_DPD_TO_FILE
/* write dpd data to file */
memset(bes2600_cdev.dpd_data + DPD_BIN_SIZE, 0, DPD_BIN_FILE_SIZE - DPD_BIN_SIZE);
bes2600_chrdev_write_dpd_data_to_file(BES2600_DPD_PATH,
bes2600_cdev.dpd_data, DPD_BIN_FILE_SIZE);
#endif
return 0;
}
+59 -1
View File
@@ -14,11 +14,50 @@
#include "scan.h"
#include "sta.h"
#include "pm.h"
#include "epta_coex.h"
#include "epta_request.h"
#include "bes_pwr.h"
/*
* After this many consecutive WSM scan rejections from firmware, stop
* issuing new scans for BES2600_SCAN_BACKOFF_JIFFIES and let the state
* that's rejecting them (coex window, firmware-internal busy) clear.
*/
#define BES2600_SCAN_REJECT_THRESHOLD 3
#define BES2600_SCAN_BACKOFF_JIFFIES (10 * HZ)
static void bes2600_scan_restart_delayed(struct bes2600_vif *priv);
/*
* Decide whether to skip sending the next WSM scan command without
* bothering the firmware. Two triggers:
*
* 1. BT A2DP is streaming in non-FDD coex mode. The firmware is
* known to reject scan requests during that window; short-
* circuiting here saves a WSM round-trip and avoids the
* wsm_generic_confirm / scan_work warning cascade that follows.
*
* 2. We already saw >= BES2600_SCAN_REJECT_THRESHOLD consecutive
* rejections on recent scan attempts and the backoff window has
* not yet elapsed. Whatever was rejecting them is likely still
* rejecting them; give it time.
*
* Returns true if the caller should abandon the scan iteration.
*/
static bool bes2600_scan_should_defer(struct bes2600_common *hw_priv)
{
#ifdef WIFI_BT_COEXIST_EPTA_ENABLE
if (!coex_is_fdd_mode() && coex_is_bt_a2dp())
return true;
#endif
if (hw_priv->scan.reject_count >= BES2600_SCAN_REJECT_THRESHOLD &&
time_before(jiffies, hw_priv->scan.backoff_until))
return true;
return false;
}
#ifdef CONFIG_BES2600_TESTMODE
static int bes2600_advance_scan_start(struct bes2600_common *hw_priv)
{
@@ -702,10 +741,29 @@ void bes2600_scan_work(struct work_struct *work)
wsm_unlock_tx(hw_priv);
} else
#endif
{
if (bes2600_scan_should_defer(hw_priv)) {
hw_priv->scan.status = -EBUSY;
hw_priv->scan.reject_count++;
hw_priv->scan.backoff_until =
jiffies + BES2600_SCAN_BACKOFF_JIFFIES;
wiphy_dbg(priv->hw->wiphy,
"[SCAN] deferred (coex/backoff, reject_count=%u)\n",
hw_priv->scan.reject_count);
kfree(scan.ch);
goto fail;
}
hw_priv->scan.status = bes2600_scan_start(priv, &scan);
}
kfree(scan.ch);
if (WARN_ON(hw_priv->scan.status))
if (hw_priv->scan.status) {
hw_priv->scan.reject_count++;
hw_priv->scan.backoff_until =
jiffies + BES2600_SCAN_BACKOFF_JIFFIES;
/* Lower callers already logged the reason at wiphy_warn. */
goto fail;
}
hw_priv->scan.reject_count = 0;
hw_priv->scan.curr = it;
}
up(&hw_priv->conf_lock);
+11
View File
@@ -42,6 +42,17 @@ struct bes2600_scan {
struct delayed_work probe_work;
int direct_probe;
u8 if_id;
/*
* Track consecutive firmware-side WSM scan rejections so we can
* back off briefly instead of re-issuing the same scan on every
* mac80211 background-scan tick. Firmware returns WSM status != 0
* for a handful of transient conditions (BT A2DP active in non-
* FDD coex, firmware-internal busy windows) and keeps rejecting
* until the state clears; retrying at full cadence just floods
* dmesg.
*/
unsigned int reject_count;
unsigned long backoff_until;
};
int bes2600_hw_scan(struct ieee80211_hw *hw,
+13 -1
View File
@@ -134,8 +134,20 @@ static int wsm_generic_confirm(struct bes2600_common *hw_priv,
struct wsm_buf *buf)
{
u32 status = WSM_GET32(buf);
if (WARN(status != WSM_STATUS_SUCCESS, "wsm_generic_confirm ret %u", status))
/*
* A non-SUCCESS status here is a firmware-side policy decision for
* the command whose confirm this is -- commonly WSM status 2 for
* scan (0x0407) rejected because of a coex window or transient
* firmware-busy state. It is not a driver/kernel bug, so avoid the
* WARN()/stack-trace treatment; the caller already emits a
* wiphy_warn identifying the request id and will propagate the
* error to mac80211.
*/
if (status != WSM_STATUS_SUCCESS) {
bes_devel("%s ret %u\n", __func__, status);
return -EINVAL;
}
return 0;
underflow: