From 983bd62dd0ba9b25dfb986f39fc5a79145da6008 Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Tue, 28 Apr 2026 16:54:06 +0200 Subject: [PATCH 18/29] bes2600: self-detect when firmware does not honor PSM and skip the cycle The c6 series fixed several host-side bookkeeping bugs around PSM transitions, but didn't address the underlying contract: this chip's firmware (BES2600 with the Bestechnic Dec 2023 build that ships on PineTab2 and most danctnix images) silently drops every WSM_set_pm request without emitting the corresponding PM_INDICATION. The driver's own power_down_work delayed work calls bes2600_pwr_enter_lp_mode every ~10s; without firmware acknowledgment each call burns 5s on wait_for_completion_timeout(pm_enter_cmpl, 5*HZ) and produces a recurring three-line cascade in dmesg: bes2600_pwr_enter_lp_mode, wait pm ind timeout bes2600_sdio_active failed, subsys:0 bes2600_pwr_device_exit_lp_mode, active mcu fail Confirmed by tripwire instrumentation on PineTab2 (linux-pinetab2 6.19.10-danctnix1, ohm) running the c5+c6 stack: zero wsm_set_pm_indication() invocations across an entire boot, while bes2600_pwr_enter_lp_mode timed out repeatedly, and bes2600_sdio_active() consistently saw BES_SLAVE_STATUS_REG_ID return 0x2f (every "ready" bit set except MCU_WAKEUP_READY (bit 4) - the firmware reports "I'm awake, there's nothing to wake from"). This patch makes the driver self-heal: * struct bes2600_pwr_t gains pm_unsupported (bool) and pm_consecutive_timeouts (unsigned int). Both initialised to 0/false. * bes2600_pwr_enter_lp_mode early-returns -EOPNOTSUPP when pm_unsupported is set. Skips the per-VIF set_pm round-trip and the wait_for_completion entirely. * On the cmpxchg-success branch of the timeout path, we increment pm_consecutive_timeouts. When it crosses BES2600_PM_UNSUPPORTED_THRESHOLD (3, ~15s of trying), we latch pm_unsupported = true and force chip_pm_state = ACTIVE so that bes2600_pwr_device_exit_lp_mode's c6.2 skip branch covers the wake side (no gpio_wake / sbus_active / WSM_set_operational_mode reissue past the first one). * bes2600_pwr_notify_ps_changed resets pm_consecutive_timeouts to 0 on any incoming PM indication, and clears pm_unsupported if it was previously latched. So a firmware update that fixes PM_IND delivery automatically re-enables PSM transitions without a driver rebuild. mac80211's PSM requests via bes2600_set_pm() still flow to the firmware unchanged; they just don't have host-side timeouts so they remain silent regardless of firmware acknowledgment. Power consumption goes up if the firmware actually CAN do PSM (we'd be keeping the chip awake unnecessarily), but on a chip where the counter trips this trade-off is forced anyway: the chip stayed awake under the broken cascade as well, just with constant SDIO churn. Net effect on dmesg: after ~15s of boot, the three-line cascade stops firing entirely. The firmware-side wedge is observed once per boot (captured by the pm_unsupported latch) instead of per-cycle. Signed-off-by: Markus Fritsche --- bes2600/bes_pwr.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++- bes2600/bes_pwr.h | 9 ++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/drivers/staging/bes2600/bes_pwr.c b/drivers/staging/bes2600/bes_pwr.c index b7b6c2f..620acef 100644 --- a/drivers/staging/bes2600/bes_pwr.c +++ b/drivers/staging/bes2600/bes_pwr.c @@ -467,6 +467,45 @@ static void bes2600_pwr_device_enter_lp_mode(struct bes2600_common *hw_priv) bes_devel("device enter sleep\n"); } +/* + * Number of consecutive bes2600_pwr_enter_lp_mode timeouts (with zero + * PM_INDICATIONs received) before we conclude the firmware does not + * honor host-driven PSM and switch to a sticky skip path. + */ +#define BES2600_PM_UNSUPPORTED_THRESHOLD 3 + +/* + * Latch pm_unsupported = true and force chip_pm_state = ACTIVE so the + * c6.2 wake-side skip branch covers bes2600_pwr_device_exit_lp_mode. + * Called after BES2600_PM_UNSUPPORTED_THRESHOLD consecutive enter_lp_mode + * timeouts with zero PM_INDICATIONs. + */ +static void bes2600_pwr_latch_pm_unsupported(struct bes2600_common *hw_priv) +{ + bes_warn("PSM not honored (%u timeouts), switching to skip mode\n", + hw_priv->bes_power.pm_consecutive_timeouts); + hw_priv->bes_power.pm_unsupported = true; + atomic_set(&hw_priv->bes_power.chip_pm_state, + BES2600_CHIP_PM_ACTIVE); + + /* + * Hold the MCU wake-flag bit permanently. Without this, every + * sdio_rx_work invocation hits bes2600_gpio_wakeup_mcu(SDIO_RX) + * when gpio_wakup_flags == 0, drives the GPIO high and msleeps + * 10 ms per RX. With ~50 RX/s of beacons + multicast that's + * ~50%% of the bes_sdio workqueue thread blocked in msleep, + * which directly caps RX throughput. Holding the MCU bit makes + * those calls bit-only bookkeeping (gpio_wakeup = (flags == 0) + * stays false, no GPIO toggle, no msleep). The bit is never + * cleared once pm_unsupported is set because + * bes2600_pwr_device_enter_lp_mode is unreachable under the + * early-return. + */ + if (hw_priv->sbus_ops->gpio_wake) + hw_priv->sbus_ops->gpio_wake(hw_priv->sbus_priv, + GPIO_WAKE_FLAG_MCU); +} + static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) { int i = 0; @@ -476,6 +515,17 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) char ip_str[20]; unsigned long status = 0; + /* + * Sticky early-return when we've previously concluded the firmware + * doesn't honor PSM. Each attempt would otherwise burn 5s on a + * doomed wait_for_completion_timeout and produce a noisy three-line + * cascade in dmesg every time power_down_work retries (every + * ~10s). The chip stays in active mode, which on this firmware is + * the de-facto state anyway. + */ + if (hw_priv->bes_power.pm_unsupported) + return -EOPNOTSUPP; + /* set interface low power configuration */ bes2600_for_each_vif(hw_priv, priv, i) { #ifdef P2P_MULTIVIF @@ -571,6 +621,9 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) atomic_set(&hw_priv->bes_power.chip_pm_state, BES2600_CHIP_PM_UNKNOWN); timeouts++; + if (++hw_priv->bes_power.pm_consecutive_timeouts + >= BES2600_PM_UNSUPPORTED_THRESHOLD) + bes2600_pwr_latch_pm_unsupported(hw_priv); } } } else { @@ -609,7 +662,8 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) * GPIO stays high and the bit clear here is purely * bookkeeping (so the next gpio_wake doesn't no-op). */ - if (hw_priv->sbus_ops->gpio_sleep) + if (!hw_priv->bes_power.pm_unsupported && + hw_priv->sbus_ops->gpio_sleep) hw_priv->sbus_ops->gpio_sleep(hw_priv->sbus_priv, GPIO_WAKE_FLAG_MCU); ret = -ETIMEDOUT; @@ -932,6 +986,8 @@ void bes2600_pwr_init(struct bes2600_common *hw_priv) mutex_init(&hw_priv->bes_power.pwr_mutex); atomic_set(&hw_priv->bes_power.dev_state, 0); atomic_set(&hw_priv->bes_power.chip_pm_state, BES2600_CHIP_PM_UNKNOWN); + hw_priv->bes_power.pm_unsupported = false; + hw_priv->bes_power.pm_consecutive_timeouts = 0; init_completion(&hw_priv->bes_power.pm_enter_cmpl); sema_init(&hw_priv->bes_power.sync_lock, 1); device_set_wakeup_capable(hw_priv->pdev, true); @@ -1321,6 +1377,18 @@ void bes2600_pwr_notify_ps_changed(struct bes2600_common *hw_priv, u8 psmode) * indication can prime a future wait against a freshly * reinit_completion()'ed state. */ + /* + * Any PM indication, whatever its psmode, proves the firmware is + * actually emitting them. Reset the consecutive-timeout counter + * so a transient stall doesn't permanently disable PSM, and clear + * pm_unsupported if a previous run had latched it. + */ + hw_priv->bes_power.pm_consecutive_timeouts = 0; + if (hw_priv->bes_power.pm_unsupported) { + bes_warn("PM indication arrived after pm_unsupported was set; re-enabling PSM transitions\n"); + hw_priv->bes_power.pm_unsupported = false; + } + if ((psmode & 0x01) != WSM_PSM_ACTIVE) { atomic_set(&hw_priv->bes_power.chip_pm_state, BES2600_CHIP_PM_LP); diff --git a/drivers/staging/bes2600/bes_pwr.h b/drivers/staging/bes2600/bes_pwr.h index 6bc44ac..92de90b 100644 --- a/drivers/staging/bes2600/bes_pwr.h +++ b/drivers/staging/bes2600/bes_pwr.h @@ -121,6 +121,15 @@ struct bes2600_pwr_t struct bes2600_pwr_event_t pwr_events[BES2600_DELAY_EVENT_NUM]; atomic_t pm_set_in_process; atomic_t chip_pm_state; + /* + * Sticky flag set after BES2600_PM_UNSUPPORTED_THRESHOLD + * consecutive enter_lp_mode timeouts with zero PM_INDICATIONs + * received from firmware. Indicates this chip's firmware does + * not honor host-driven PSM transitions; further attempts are + * skipped to avoid the 5s timeout cascade. + */ + bool pm_unsupported; + unsigned int pm_consecutive_timeouts; }; #ifdef CONFIG_BES2600_WOWLAN -- 2.54.0