bes2600: self-detect when firmware does not honor PSM and skip the cycle

The c6 series fixed several host-side bookkeeping bugs around PSM
transitions, but didn't address the underlying contract: this chip's
firmware (BES2600 with the Bestechnic Dec 2023 build that ships on
PineTab2 and most danctnix images) silently drops every WSM_set_pm
request without emitting the corresponding PM_INDICATION. The driver's
own power_down_work delayed work calls bes2600_pwr_enter_lp_mode every
~10s; without firmware acknowledgment each call burns 5s on
wait_for_completion_timeout(pm_enter_cmpl, 5*HZ) and produces a
recurring three-line cascade in dmesg:

  bes2600_pwr_enter_lp_mode, wait pm ind timeout
  bes2600_sdio_active failed, subsys:0
  bes2600_pwr_device_exit_lp_mode, active mcu fail

Confirmed by tripwire instrumentation on PineTab2 (linux-pinetab2
6.19.10-danctnix1, ohm) running the c5+c6 stack: zero
wsm_set_pm_indication() invocations across an entire boot, while
bes2600_pwr_enter_lp_mode timed out repeatedly, and
bes2600_sdio_active() consistently saw BES_SLAVE_STATUS_REG_ID return
0x2f (every "ready" bit set except MCU_WAKEUP_READY (bit 4) - the
firmware reports "I'm awake, there's nothing to wake from").

This patch makes the driver self-heal:

  * struct bes2600_pwr_t gains pm_unsupported (bool) and
    pm_consecutive_timeouts (unsigned int). Both initialised to
    0/false.

  * bes2600_pwr_enter_lp_mode early-returns -EOPNOTSUPP when
    pm_unsupported is set. Skips the per-VIF set_pm round-trip and
    the wait_for_completion entirely.

  * On the cmpxchg-success branch of the timeout path, we increment
    pm_consecutive_timeouts. When it crosses
    BES2600_PM_UNSUPPORTED_THRESHOLD (3, ~15s of trying), we latch
    pm_unsupported = true and force chip_pm_state = ACTIVE so that
    bes2600_pwr_device_exit_lp_mode's c6.2 skip branch covers the
    wake side (no gpio_wake / sbus_active / WSM_set_operational_mode
    reissue past the first one).

  * bes2600_pwr_notify_ps_changed resets pm_consecutive_timeouts to 0
    on any incoming PM indication, and clears pm_unsupported if it
    was previously latched. So a firmware update that fixes PM_IND
    delivery automatically re-enables PSM transitions without a
    driver rebuild.

mac80211's PSM requests via bes2600_set_pm() still flow to the
firmware unchanged; they just don't have host-side timeouts so they
remain silent regardless of firmware acknowledgment. Power
consumption goes up if the firmware actually CAN do PSM (we'd be
keeping the chip awake unnecessarily), but on a chip where the
counter trips this trade-off is forced anyway: the chip stayed awake
under the broken cascade as well, just with constant SDIO churn.

Net effect on dmesg: after ~15s of boot, the three-line cascade stops
firing entirely. The firmware-side wedge is observed once per boot
(captured by the pm_unsupported latch) instead of per-cycle.

Signed-off-by: Markus Fritsche <fritsche.markus@gmail.com>
This commit is contained in:
2026-04-28 16:54:06 +02:00
parent 51d46a2e25
commit 9a0a4c0a46
2 changed files with 78 additions and 1 deletions
+69 -1
View File
@@ -467,6 +467,45 @@ static void bes2600_pwr_device_enter_lp_mode(struct bes2600_common *hw_priv)
bes_devel("device enter sleep\n"); bes_devel("device enter sleep\n");
} }
/*
* Number of consecutive bes2600_pwr_enter_lp_mode timeouts (with zero
* PM_INDICATIONs received) before we conclude the firmware does not
* honor host-driven PSM and switch to a sticky skip path.
*/
#define BES2600_PM_UNSUPPORTED_THRESHOLD 3
/*
* Latch pm_unsupported = true and force chip_pm_state = ACTIVE so the
* c6.2 wake-side skip branch covers bes2600_pwr_device_exit_lp_mode.
* Called after BES2600_PM_UNSUPPORTED_THRESHOLD consecutive enter_lp_mode
* timeouts with zero PM_INDICATIONs.
*/
static void bes2600_pwr_latch_pm_unsupported(struct bes2600_common *hw_priv)
{
bes_warn("PSM not honored (%u timeouts), switching to skip mode\n",
hw_priv->bes_power.pm_consecutive_timeouts);
hw_priv->bes_power.pm_unsupported = true;
atomic_set(&hw_priv->bes_power.chip_pm_state,
BES2600_CHIP_PM_ACTIVE);
/*
* Hold the MCU wake-flag bit permanently. Without this, every
* sdio_rx_work invocation hits bes2600_gpio_wakeup_mcu(SDIO_RX)
* when gpio_wakup_flags == 0, drives the GPIO high and msleeps
* 10 ms per RX. With ~50 RX/s of beacons + multicast that's
* ~50%% of the bes_sdio workqueue thread blocked in msleep,
* which directly caps RX throughput. Holding the MCU bit makes
* those calls bit-only bookkeeping (gpio_wakeup = (flags == 0)
* stays false, no GPIO toggle, no msleep). The bit is never
* cleared once pm_unsupported is set because
* bes2600_pwr_device_enter_lp_mode is unreachable under the
* early-return.
*/
if (hw_priv->sbus_ops->gpio_wake)
hw_priv->sbus_ops->gpio_wake(hw_priv->sbus_priv,
GPIO_WAKE_FLAG_MCU);
}
static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv)
{ {
int i = 0; int i = 0;
@@ -476,6 +515,17 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv)
char ip_str[20]; char ip_str[20];
unsigned long status = 0; unsigned long status = 0;
/*
* Sticky early-return when we've previously concluded the firmware
* doesn't honor PSM. Each attempt would otherwise burn 5s on a
* doomed wait_for_completion_timeout and produce a noisy three-line
* cascade in dmesg every time power_down_work retries (every
* ~10s). The chip stays in active mode, which on this firmware is
* the de-facto state anyway.
*/
if (hw_priv->bes_power.pm_unsupported)
return -EOPNOTSUPP;
/* set interface low power configuration */ /* set interface low power configuration */
bes2600_for_each_vif(hw_priv, priv, i) { bes2600_for_each_vif(hw_priv, priv, i) {
#ifdef P2P_MULTIVIF #ifdef P2P_MULTIVIF
@@ -569,6 +619,9 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv)
atomic_set(&hw_priv->bes_power.chip_pm_state, atomic_set(&hw_priv->bes_power.chip_pm_state,
BES2600_CHIP_PM_UNKNOWN); BES2600_CHIP_PM_UNKNOWN);
timeouts++; timeouts++;
if (++hw_priv->bes_power.pm_consecutive_timeouts
>= BES2600_PM_UNSUPPORTED_THRESHOLD)
bes2600_pwr_latch_pm_unsupported(hw_priv);
} }
} }
} else { } else {
@@ -607,7 +660,8 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv)
* GPIO stays high and the bit clear here is purely * GPIO stays high and the bit clear here is purely
* bookkeeping (so the next gpio_wake doesn't no-op). * bookkeeping (so the next gpio_wake doesn't no-op).
*/ */
if (hw_priv->sbus_ops->gpio_sleep) if (!hw_priv->bes_power.pm_unsupported &&
hw_priv->sbus_ops->gpio_sleep)
hw_priv->sbus_ops->gpio_sleep(hw_priv->sbus_priv, hw_priv->sbus_ops->gpio_sleep(hw_priv->sbus_priv,
GPIO_WAKE_FLAG_MCU); GPIO_WAKE_FLAG_MCU);
ret = -ETIMEDOUT; ret = -ETIMEDOUT;
@@ -930,6 +984,8 @@ void bes2600_pwr_init(struct bes2600_common *hw_priv)
mutex_init(&hw_priv->bes_power.pwr_mutex); mutex_init(&hw_priv->bes_power.pwr_mutex);
atomic_set(&hw_priv->bes_power.dev_state, 0); atomic_set(&hw_priv->bes_power.dev_state, 0);
atomic_set(&hw_priv->bes_power.chip_pm_state, BES2600_CHIP_PM_UNKNOWN); atomic_set(&hw_priv->bes_power.chip_pm_state, BES2600_CHIP_PM_UNKNOWN);
hw_priv->bes_power.pm_unsupported = false;
hw_priv->bes_power.pm_consecutive_timeouts = 0;
init_completion(&hw_priv->bes_power.pm_enter_cmpl); init_completion(&hw_priv->bes_power.pm_enter_cmpl);
sema_init(&hw_priv->bes_power.sync_lock, 1); sema_init(&hw_priv->bes_power.sync_lock, 1);
device_set_wakeup_capable(hw_priv->pdev, true); device_set_wakeup_capable(hw_priv->pdev, true);
@@ -1319,6 +1375,18 @@ void bes2600_pwr_notify_ps_changed(struct bes2600_common *hw_priv, u8 psmode)
* indication can prime a future wait against a freshly * indication can prime a future wait against a freshly
* reinit_completion()'ed state. * reinit_completion()'ed state.
*/ */
/*
* Any PM indication, whatever its psmode, proves the firmware is
* actually emitting them. Reset the consecutive-timeout counter
* so a transient stall doesn't permanently disable PSM, and clear
* pm_unsupported if a previous run had latched it.
*/
hw_priv->bes_power.pm_consecutive_timeouts = 0;
if (hw_priv->bes_power.pm_unsupported) {
bes_warn("PM indication arrived after pm_unsupported was set; re-enabling PSM transitions\n");
hw_priv->bes_power.pm_unsupported = false;
}
if ((psmode & 0x01) != WSM_PSM_ACTIVE) { if ((psmode & 0x01) != WSM_PSM_ACTIVE) {
atomic_set(&hw_priv->bes_power.chip_pm_state, atomic_set(&hw_priv->bes_power.chip_pm_state,
BES2600_CHIP_PM_LP); BES2600_CHIP_PM_LP);
+9
View File
@@ -121,6 +121,15 @@ struct bes2600_pwr_t
struct bes2600_pwr_event_t pwr_events[BES2600_DELAY_EVENT_NUM]; struct bes2600_pwr_event_t pwr_events[BES2600_DELAY_EVENT_NUM];
atomic_t pm_set_in_process; atomic_t pm_set_in_process;
atomic_t chip_pm_state; atomic_t chip_pm_state;
/*
* Sticky flag set after BES2600_PM_UNSUPPORTED_THRESHOLD
* consecutive enter_lp_mode timeouts with zero PM_INDICATIONs
* received from firmware. Indicates this chip's firmware does
* not honor host-driven PSM transitions; further attempts are
* skipped to avoid the 5s timeout cascade.
*/
bool pm_unsupported;
unsigned int pm_consecutive_timeouts;
}; };
#ifdef CONFIG_BES2600_WOWLAN #ifdef CONFIG_BES2600_WOWLAN