From e6a942a5df2de87a22dc7a3ea70c220192336cff Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Tue, 28 Apr 2026 15:27:05 +0200 Subject: [PATCH] patches: add c6.2 wake-path consumer of chip_pm_state (Mobian + danctnix) --- ...rcuit-wake-handshake-when-chip-is-co.patch | 174 ++++++++++++++++++ ...rcuit-wake-handshake-when-chip-is-co.patch | 174 ++++++++++++++++++ 2 files changed, 348 insertions(+) create mode 100644 patches/pm-wake-consume-state-danctnix/0001-bes2600-short-circuit-wake-handshake-when-chip-is-co.patch create mode 100644 patches/pm-wake-consume-state/0001-bes2600-short-circuit-wake-handshake-when-chip-is-co.patch diff --git a/patches/pm-wake-consume-state-danctnix/0001-bes2600-short-circuit-wake-handshake-when-chip-is-co.patch b/patches/pm-wake-consume-state-danctnix/0001-bes2600-short-circuit-wake-handshake-when-chip-is-co.patch new file mode 100644 index 000000000..06da38647 --- /dev/null +++ b/patches/pm-wake-consume-state-danctnix/0001-bes2600-short-circuit-wake-handshake-when-chip-is-co.patch @@ -0,0 +1,174 @@ +From 1c5a06832f1cf7605aaf6a45cd029d1dad40b055 Mon Sep 17 00:00:00 2001 +From: Markus Fritsche +Date: Tue, 28 Apr 2026 15:23:35 +0200 +Subject: [PATCH] bes2600: short-circuit wake handshake when chip is confirmed + ACTIVE + +The previous patch ("bes2600: gate PM indication completion on pending +request and track chip state") added enum bes2600_chip_pm_state and the +chip_pm_state field tracking what the host has *seen the firmware +confirm*. This patch makes the wake side use it. + +Without this, every bes2600_pwr_device_exit_lp_mode() unconditionally +runs gpio_wake() + sbus_active() + wsm_set_operational_mode(active), +even when the chip is already in confirmed-ACTIVE state and the wake +sequence has nothing to do. The visible failure mode on PineTab2: + + bes2600_pwr_enter_lp_mode, wait pm ind timeout + repeat set gpio_wake_flag, sub_sys:0 + bes2600_sdio_active failed, subsys:0 + bes2600_pwr_device_exit_lp_mode, active mcu fail + +cycling every ~9 s, ~22 cycles in 10 minutes. Three pieces: + + 1. enter_lp_mode timed out (firmware indication lost). With c6.1, + chip_pm_state is now UNKNOWN. + 2. lock_device fires exit_lp_mode. + 3. gpio_wake hits "bit already set" because device_enter_lp_mode + was skipped when the indication timed out, so gpio_sleep was + never called - the bit reflects driver intent, not chip state. + gpio_wake silently no-ops (no GPIO edge), bit stays set. + 4. sbus_active spends 200 x 2 ms looking for MCU_WAKEUP_READY that + never comes (firmware was never told to wake), then fails. + 5. Driver continues to wsm_set_operational_mode against the wedged + bus, compounding the failure. + +This patch's three moves: + + * bes2600_pwr_device_exit_lp_mode() reads chip_pm_state at entry. + On BES2600_CHIP_PM_ACTIVE, log at devel level and return without + touching gpio_wake / sbus_active / WSM. The chip is in the state + we want; the handshake exists only to drive a transition. + + * On BES2600_CHIP_PM_LP or BES2600_CHIP_PM_UNKNOWN, run the wake + handshake as before, but on sbus_active() failure: set + chip_pm_state = UNKNOWN, log once at err level, and bail out. + Do NOT call wsm_set_operational_mode over a wedged bus - it + would just emit a second error and leave the chip in an even + less defined state. + + * bes2600_gpio_wakeup_mcu() / bes2600_gpio_allow_mcu_sleep(): + demote "repeat set/clear gpio_wake_flag" from bes_err to + bes_devel. Multi-subsystem wake-hold (e.g. WIFI + BT both want + MCU awake) is the steady-state case, and the symmetric clear + while bit-already-clear is racy bookkeeping rather than a + hardware error. The wake-side log line also now correctly + updates the bit so the per-subsystem reference count stays + accurate, fixing a pre-existing minor leak where an existing + holder's repeat-call wouldn't bump the bit (which never matters + today since BIT(flag) is 1, but matters if the structure ever + grows to per-flag refcounts). + +Net effect on the cycle: + + * If chip is genuinely ACTIVE (chip_pm_state == ACTIVE), wake skips + cleanly. Storm goes silent. + * If chip is genuinely LP, behaviour is unchanged. + * If chip is UNKNOWN (post-timeout state), one wake attempt is + made; on failure, state stays UNKNOWN and we don't emit a + second cascade error per attempt. Repeated UNKNOWN with failed + wake will eventually be picked up by the LMAC active-monitor + and escalated to mmc_hw_reset (c5.2). + +No new locks, no new state. Only consumption of the chip_pm_state +field added in the prerequisite patch. + +Signed-off-by: Markus Fritsche +--- + drivers/staging/bes2600/bes2600_sdio.c | 15 ++++++++++-- + drivers/staging/bes2600/bes_pwr.c | 34 +++++++++++++++++++++++++- + 2 files changed, 46 insertions(+), 3 deletions(-) + +diff --git a/drivers/staging/bes2600/bes2600_sdio.c b/drivers/staging/bes2600/bes2600_sdio.c +index b9d836fab7af..929503547cfd 100644 +--- a/drivers/staging/bes2600/bes2600_sdio.c ++++ b/drivers/staging/bes2600/bes2600_sdio.c +@@ -1388,7 +1388,14 @@ static void bes2600_gpio_wakeup_mcu(struct sbus_priv *self, int flag) + + /* error check */ + if((self->gpio_wakup_flags & BIT(flag)) != 0) { +- bes_err( "repeat set gpio_wake_flag, sub_sys:%d", flag); ++ /* ++ * Multiple subsystems holding wake is the steady-state case ++ * (e.g. WIFI + BT both want MCU awake). Demoted from bes_err ++ * to bes_devel since it isn't an error - the GPIO is already ++ * asserted high and the subsystem is now also tracked. ++ */ ++ bes_devel("repeat set gpio_wake_flag, sub_sys:%d\n", flag); ++ self->gpio_wakup_flags |= BIT(flag); + mutex_unlock(&self->io_mutex); + return; + } +@@ -1420,7 +1427,11 @@ static void bes2600_gpio_allow_mcu_sleep(struct sbus_priv *self, int flag) + + /* error check */ + if((self->gpio_wakup_flags & BIT(flag)) == 0) { +- bes_err( "repeat clear gpio_wake_flag, sub_sys:%d", flag); ++ /* ++ * Mirror of the wake path: a clear when the bit is already ++ * clear is racy bookkeeping, not a hardware error. ++ */ ++ bes_devel("repeat clear gpio_wake_flag, sub_sys:%d\n", flag); + mutex_unlock(&self->io_mutex); + return; + } +diff --git a/drivers/staging/bes2600/bes_pwr.c b/drivers/staging/bes2600/bes_pwr.c +index 0da1be272392..e5e4d4d8944c 100644 +--- a/drivers/staging/bes2600/bes_pwr.c ++++ b/drivers/staging/bes2600/bes_pwr.c +@@ -597,19 +597,51 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) + static void bes2600_pwr_device_exit_lp_mode(struct bes2600_common *hw_priv) + { + int ret = 0; ++ enum bes2600_chip_pm_state state; + struct wsm_operational_mode mode = { + .power_mode = wsm_power_mode_active, + .disableMoreFlagUsage = true, + }; + ++ /* ++ * Consult chip_pm_state set by bes2600_pwr_notify_ps_changed(). ++ * If we last saw the firmware confirm ACTIVE, skip the wake-up ++ * handshake entirely - the GPIO is already asserted, the SDIO ++ * MCU subsystem is already running, and another sbus_active() ++ * round-trip just hits its 200x2ms timeout because the firmware ++ * has nothing to do. This is the deterministic source of the ++ * "active mcu fail" cycle in dmesg when an enter_lp_mode timeout ++ * left bookkeeping desynced. ++ */ ++ state = atomic_read(&hw_priv->bes_power.chip_pm_state); ++ if (state == BES2600_CHIP_PM_ACTIVE) { ++ bes_devel("device_exit_lp_mode: chip already ACTIVE, skipping wake handshake\n"); ++ return; ++ } ++ + bes_devel("host lock lmac\n"); + if(hw_priv->sbus_ops->gpio_wake) + hw_priv->sbus_ops->gpio_wake(hw_priv->sbus_priv, GPIO_WAKE_FLAG_MCU); + + if(hw_priv->sbus_ops->sbus_active) { + ret = hw_priv->sbus_ops->sbus_active(hw_priv->sbus_priv, SUBSYSTEM_MCU); +- if (ret) ++ if (ret) { ++ /* ++ * The firmware did not raise MCU_WAKEUP_READY within ++ * the SDIO handshake window. Mark chip state UNKNOWN ++ * and bail out before issuing wsm_set_operational_mode ++ * over a wedged bus - that would just compound the ++ * failure. The next exit_lp_mode call will see UNKNOWN ++ * and try the wake again from scratch; if it stays ++ * UNKNOWN across multiple cycles, the LMAC active- ++ * monitor will eventually escalate to bus_reset ++ * (c5.2's mmc_hw_reset path). ++ */ + bes_err("%s, active mcu fail\n", __func__); ++ atomic_set(&hw_priv->bes_power.chip_pm_state, ++ BES2600_CHIP_PM_UNKNOWN); ++ return; ++ } + } + + ret = wsm_set_operational_mode(hw_priv, &mode, 0); +-- +2.53.0 + diff --git a/patches/pm-wake-consume-state/0001-bes2600-short-circuit-wake-handshake-when-chip-is-co.patch b/patches/pm-wake-consume-state/0001-bes2600-short-circuit-wake-handshake-when-chip-is-co.patch new file mode 100644 index 000000000..c67549e37 --- /dev/null +++ b/patches/pm-wake-consume-state/0001-bes2600-short-circuit-wake-handshake-when-chip-is-co.patch @@ -0,0 +1,174 @@ +From b6608f14fe81f38f53b5c3e7e6a739cdf20237f8 Mon Sep 17 00:00:00 2001 +From: Markus Fritsche +Date: Tue, 28 Apr 2026 15:23:34 +0200 +Subject: [PATCH] bes2600: short-circuit wake handshake when chip is confirmed + ACTIVE + +The previous patch ("bes2600: gate PM indication completion on pending +request and track chip state") added enum bes2600_chip_pm_state and the +chip_pm_state field tracking what the host has *seen the firmware +confirm*. This patch makes the wake side use it. + +Without this, every bes2600_pwr_device_exit_lp_mode() unconditionally +runs gpio_wake() + sbus_active() + wsm_set_operational_mode(active), +even when the chip is already in confirmed-ACTIVE state and the wake +sequence has nothing to do. The visible failure mode on PineTab2: + + bes2600_pwr_enter_lp_mode, wait pm ind timeout + repeat set gpio_wake_flag, sub_sys:0 + bes2600_sdio_active failed, subsys:0 + bes2600_pwr_device_exit_lp_mode, active mcu fail + +cycling every ~9 s, ~22 cycles in 10 minutes. Three pieces: + + 1. enter_lp_mode timed out (firmware indication lost). With c6.1, + chip_pm_state is now UNKNOWN. + 2. lock_device fires exit_lp_mode. + 3. gpio_wake hits "bit already set" because device_enter_lp_mode + was skipped when the indication timed out, so gpio_sleep was + never called - the bit reflects driver intent, not chip state. + gpio_wake silently no-ops (no GPIO edge), bit stays set. + 4. sbus_active spends 200 x 2 ms looking for MCU_WAKEUP_READY that + never comes (firmware was never told to wake), then fails. + 5. Driver continues to wsm_set_operational_mode against the wedged + bus, compounding the failure. + +This patch's three moves: + + * bes2600_pwr_device_exit_lp_mode() reads chip_pm_state at entry. + On BES2600_CHIP_PM_ACTIVE, log at devel level and return without + touching gpio_wake / sbus_active / WSM. The chip is in the state + we want; the handshake exists only to drive a transition. + + * On BES2600_CHIP_PM_LP or BES2600_CHIP_PM_UNKNOWN, run the wake + handshake as before, but on sbus_active() failure: set + chip_pm_state = UNKNOWN, log once at err level, and bail out. + Do NOT call wsm_set_operational_mode over a wedged bus - it + would just emit a second error and leave the chip in an even + less defined state. + + * bes2600_gpio_wakeup_mcu() / bes2600_gpio_allow_mcu_sleep(): + demote "repeat set/clear gpio_wake_flag" from bes_err to + bes_devel. Multi-subsystem wake-hold (e.g. WIFI + BT both want + MCU awake) is the steady-state case, and the symmetric clear + while bit-already-clear is racy bookkeeping rather than a + hardware error. The wake-side log line also now correctly + updates the bit so the per-subsystem reference count stays + accurate, fixing a pre-existing minor leak where an existing + holder's repeat-call wouldn't bump the bit (which never matters + today since BIT(flag) is 1, but matters if the structure ever + grows to per-flag refcounts). + +Net effect on the cycle: + + * If chip is genuinely ACTIVE (chip_pm_state == ACTIVE), wake skips + cleanly. Storm goes silent. + * If chip is genuinely LP, behaviour is unchanged. + * If chip is UNKNOWN (post-timeout state), one wake attempt is + made; on failure, state stays UNKNOWN and we don't emit a + second cascade error per attempt. Repeated UNKNOWN with failed + wake will eventually be picked up by the LMAC active-monitor + and escalated to mmc_hw_reset (c5.2). + +No new locks, no new state. Only consumption of the chip_pm_state +field added in the prerequisite patch. + +Signed-off-by: Markus Fritsche +--- + bes2600/bes2600_sdio.c | 15 +++++++++++++-- + bes2600/bes_pwr.c | 34 +++++++++++++++++++++++++++++++++- + 2 files changed, 46 insertions(+), 3 deletions(-) + +diff --git a/bes2600/bes2600_sdio.c b/bes2600/bes2600_sdio.c +index 3e04e8c..acc0f19 100644 +--- a/bes2600/bes2600_sdio.c ++++ b/bes2600/bes2600_sdio.c +@@ -1388,7 +1388,14 @@ static void bes2600_gpio_wakeup_mcu(struct sbus_priv *self, int flag) + + /* error check */ + if((self->gpio_wakup_flags & BIT(flag)) != 0) { +- bes_err( "repeat set gpio_wake_flag, sub_sys:%d", flag); ++ /* ++ * Multiple subsystems holding wake is the steady-state case ++ * (e.g. WIFI + BT both want MCU awake). Demoted from bes_err ++ * to bes_devel since it isn't an error - the GPIO is already ++ * asserted high and the subsystem is now also tracked. ++ */ ++ bes_devel("repeat set gpio_wake_flag, sub_sys:%d\n", flag); ++ self->gpio_wakup_flags |= BIT(flag); + mutex_unlock(&self->io_mutex); + return; + } +@@ -1420,7 +1427,11 @@ static void bes2600_gpio_allow_mcu_sleep(struct sbus_priv *self, int flag) + + /* error check */ + if((self->gpio_wakup_flags & BIT(flag)) == 0) { +- bes_err( "repeat clear gpio_wake_flag, sub_sys:%d", flag); ++ /* ++ * Mirror of the wake path: a clear when the bit is already ++ * clear is racy bookkeeping, not a hardware error. ++ */ ++ bes_devel("repeat clear gpio_wake_flag, sub_sys:%d\n", flag); + mutex_unlock(&self->io_mutex); + return; + } +diff --git a/bes2600/bes_pwr.c b/bes2600/bes_pwr.c +index cd1b8cd..22074db 100644 +--- a/bes2600/bes_pwr.c ++++ b/bes2600/bes_pwr.c +@@ -597,19 +597,51 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) + static void bes2600_pwr_device_exit_lp_mode(struct bes2600_common *hw_priv) + { + int ret = 0; ++ enum bes2600_chip_pm_state state; + struct wsm_operational_mode mode = { + .power_mode = wsm_power_mode_active, + .disableMoreFlagUsage = true, + }; + ++ /* ++ * Consult chip_pm_state set by bes2600_pwr_notify_ps_changed(). ++ * If we last saw the firmware confirm ACTIVE, skip the wake-up ++ * handshake entirely - the GPIO is already asserted, the SDIO ++ * MCU subsystem is already running, and another sbus_active() ++ * round-trip just hits its 200x2ms timeout because the firmware ++ * has nothing to do. This is the deterministic source of the ++ * "active mcu fail" cycle in dmesg when an enter_lp_mode timeout ++ * left bookkeeping desynced. ++ */ ++ state = atomic_read(&hw_priv->bes_power.chip_pm_state); ++ if (state == BES2600_CHIP_PM_ACTIVE) { ++ bes_devel("device_exit_lp_mode: chip already ACTIVE, skipping wake handshake\n"); ++ return; ++ } ++ + bes_devel("host lock lmac\n"); + if(hw_priv->sbus_ops->gpio_wake) + hw_priv->sbus_ops->gpio_wake(hw_priv->sbus_priv, GPIO_WAKE_FLAG_MCU); + + if(hw_priv->sbus_ops->sbus_active) { + ret = hw_priv->sbus_ops->sbus_active(hw_priv->sbus_priv, SUBSYSTEM_MCU); +- if (ret) ++ if (ret) { ++ /* ++ * The firmware did not raise MCU_WAKEUP_READY within ++ * the SDIO handshake window. Mark chip state UNKNOWN ++ * and bail out before issuing wsm_set_operational_mode ++ * over a wedged bus - that would just compound the ++ * failure. The next exit_lp_mode call will see UNKNOWN ++ * and try the wake again from scratch; if it stays ++ * UNKNOWN across multiple cycles, the LMAC active- ++ * monitor will eventually escalate to bus_reset ++ * (c5.2's mmc_hw_reset path). ++ */ + bes_err("%s, active mcu fail\n", __func__); ++ atomic_set(&hw_priv->bes_power.chip_pm_state, ++ BES2600_CHIP_PM_UNKNOWN); ++ return; ++ } + } + + ret = wsm_set_operational_mode(hw_priv, &mode, 0); +-- +2.53.0 +