diff --git a/patches/pm-state-resync-danctnix/0001-bes2600-gate-PM-indication-completion-on-pending-req.patch b/patches/pm-state-resync-danctnix/0001-bes2600-gate-PM-indication-completion-on-pending-req.patch new file mode 100644 index 000000000..31f67dc92 --- /dev/null +++ b/patches/pm-state-resync-danctnix/0001-bes2600-gate-PM-indication-completion-on-pending-req.patch @@ -0,0 +1,209 @@ +From 699e4b59a90c396dc95df8cf8a233824f9c0fedf Mon Sep 17 00:00:00 2001 +From: Markus Fritsche +Date: Tue, 28 Apr 2026 15:05:27 +0200 +Subject: [PATCH] bes2600: gate PM indication completion on pending request and + track chip state + +When mac80211 toggles PSM on the BES2600, the host sends WSM set_pm +and waits up to 5 s on bes_power.pm_enter_cmpl for a firmware-side +PM-changed indication confirming the transition. Three sequenced +flaws make the wait-and-confirm racy and leave host/chip bookkeeping +desynced when anything misfires: + + 1) bes2600_pwr_notify_ps_changed() unconditionally fires + complete(pm_enter_cmpl) for any non-active psmode. It does not + check whether a host-initiated set_pm is actually pending. A + spontaneous indication (firmware-internal coex move, + idle-driven aging) primes the completion, and the next host- + driven enter_lp_mode sees a false success on its first + wait_for_completion_timeout. + + 2) The wait/reinit ordering in bes2600_pwr_enter_lp_mode is + + status = wait_for_completion_timeout(...); + atomic_set(pm_set_in_process, 0); + reinit_completion(...); + + If an indication arrives between wait_for_completion_timeout + returning with status==1 and reinit_completion, the next + enter_lp_mode iteration's wait can also see false success. The + reinit must happen *before* we start the new request, not + after handling the previous one. + + 3) On wait_pm_ind timeout, the driver returns -ETIMEDOUT and walks + away. It does not record that the firmware's actual PM state + is no longer known to the host. Subsequent wake paths + (gpio_wake / sbus_active) assume the chip is still active and + hit deterministic SDIO failures when the firmware has + transitioned anyway. + +This patch is the safe-prerequisite half of a wider fix: + + * bes_pwr.h gains enum bes2600_chip_pm_state {ACTIVE, LP, UNKNOWN} + and bes_power.chip_pm_state. Its job is to track what the host + has *seen the firmware confirm*, not what the host has + requested. Initialised to ACTIVE in bes2600_pwr_init(). + + * bes2600_pwr_notify_ps_changed() unconditionally updates + chip_pm_state on every indication, but only fires + complete(pm_enter_cmpl) when atomic_cmpxchg(pm_set_in_process, + 1, 0) succeeds. A spontaneous indication can no longer prime a + waiter that will only set up its request afterwards. + + * bes2600_pwr_enter_lp_mode() now reinit_completion()s before + setting pm_set_in_process and sending wsm_set_pm. After a + timeout, it cmpxchgs pm_set_in_process back to 0 (so a late + indication cannot prime the next iteration) and on the win- + cmpxchg branch records chip_pm_state=UNKNOWN. + +A follow-up patch consumes chip_pm_state on the wake side +(bes2600_pwr_device_exit_lp_mode + bes2600_gpio_wakeup_mcu) to fix +the deterministic "active mcu fail" cycle this state-record +enables a fix for. Splitting the work this way keeps the lock-free +race fix small and reviewable on its own. + +No new locks, no behaviour change on the success path. Only the +recovery path (timeout + spontaneous indication) gains correctness. + +Signed-off-by: Markus Fritsche +--- + drivers/staging/bes2600/bes_pwr.c | 66 +++++++++++++++++++++++++++---- + drivers/staging/bes2600/bes_pwr.h | 15 +++++++ + 2 files changed, 74 insertions(+), 7 deletions(-) + +diff --git a/drivers/staging/bes2600/bes_pwr.c b/drivers/staging/bes2600/bes_pwr.c +index f62ae226d295..0da1be272392 100644 +--- a/drivers/staging/bes2600/bes_pwr.c ++++ b/drivers/staging/bes2600/bes_pwr.c +@@ -524,7 +524,17 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) + bes_devel("%s, psMode:%s, fastPsmIdlePeriod:%d apPsmChangePeriod:%d minAutoPsPollPeriod:%d\n", + __func__, bes2600_get_ps_mode_str(priv->powersave_mode.pmMode), priv->powersave_mode.fastPsmIdlePeriod, + priv->powersave_mode.apPsmChangePeriod, priv->powersave_mode.minAutoPsPollPeriod); ++ /* ++ * Reinit BEFORE the WSM goes out, so a stale ++ * indication from a previous cycle cannot have ++ * primed pm_enter_cmpl. From here until the ++ * indication callback's cmpxchg(1->0) on ++ * pm_set_in_process, only the indication for ++ * THIS request can complete the wait. ++ */ ++ reinit_completion(&hw_priv->bes_power.pm_enter_cmpl); + atomic_set(&hw_priv->bes_power.pm_set_in_process, 1); ++ + ret = bes2600_set_pm(priv, &priv->powersave_mode); + if (ret) { + atomic_set(&hw_priv->bes_power.pm_set_in_process, 0); +@@ -535,11 +545,33 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) + + /* wait power save mode changed indication */ + status = wait_for_completion_timeout(&hw_priv->bes_power.pm_enter_cmpl, 5 * HZ); +- atomic_set(&hw_priv->bes_power.pm_set_in_process, 0); +- reinit_completion(&hw_priv->bes_power.pm_enter_cmpl); + if (!status) { +- bes_err("%s, wait pm ind timeout\n", __func__); +- timeouts++; ++ /* ++ * The indication callback only fires ++ * complete() when it observes ++ * pm_set_in_process == 1; cmpxchg it ++ * to 0 here so a late indication ++ * cannot prime the next wait. ++ * ++ * If we win the cmpxchg, this is a ++ * real timeout: the firmware's PS ++ * state is unknown to us. Mark it as ++ * such so the next wake path can ++ * probe before assuming the chip is ++ * still active. ++ * ++ * If we lose the cmpxchg, the ++ * indication arrived between the ++ * wait timing out and us getting ++ * here; treat as success. ++ */ ++ if (atomic_cmpxchg(&hw_priv->bes_power.pm_set_in_process, ++ 1, 0) == 1) { ++ bes_err("%s, wait pm ind timeout\n", __func__); ++ atomic_set(&hw_priv->bes_power.chip_pm_state, ++ BES2600_CHIP_PM_UNKNOWN); ++ timeouts++; ++ } + } + } else { + bes_devel("skip enter lp mode\n"); +@@ -833,6 +865,7 @@ void bes2600_pwr_init(struct bes2600_common *hw_priv) + hw_priv->bes_power.power_up_task = NULL; + mutex_init(&hw_priv->bes_power.pwr_mutex); + atomic_set(&hw_priv->bes_power.dev_state, 0); ++ atomic_set(&hw_priv->bes_power.chip_pm_state, BES2600_CHIP_PM_ACTIVE); + init_completion(&hw_priv->bes_power.pm_enter_cmpl); + sema_init(&hw_priv->bes_power.sync_lock, 1); + device_set_wakeup_capable(hw_priv->pdev, true); +@@ -1213,9 +1246,28 @@ int bes2600_pwr_clear_busy_event(struct bes2600_common *hw_priv, u32 event) + + void bes2600_pwr_notify_ps_changed(struct bes2600_common *hw_priv, u8 psmode) + { +- if((psmode & 0x01) != WSM_PSM_ACTIVE) { +- bes_devel("complete pm_enter_cmpl\n"); +- complete(&hw_priv->bes_power.pm_enter_cmpl); ++ /* ++ * The firmware sends a PM-changed indication for every transition, ++ * including ones we didn't ask for (firmware-internal coex moves, ++ * idle-driven aging). Update chip_pm_state unconditionally so the ++ * wake path can use it, but only fire pm_enter_cmpl when a host- ++ * initiated set_pm is actually in flight - otherwise a stale ++ * indication can prime a future wait against a freshly ++ * reinit_completion()'ed state. ++ */ ++ if ((psmode & 0x01) != WSM_PSM_ACTIVE) { ++ atomic_set(&hw_priv->bes_power.chip_pm_state, ++ BES2600_CHIP_PM_LP); ++ if (atomic_cmpxchg(&hw_priv->bes_power.pm_set_in_process, ++ 1, 0) == 1) { ++ bes_devel("complete pm_enter_cmpl\n"); ++ complete(&hw_priv->bes_power.pm_enter_cmpl); ++ } else { ++ bes_devel("PM ind (LP) without pending wait; state recorded\n"); ++ } ++ } else { ++ atomic_set(&hw_priv->bes_power.chip_pm_state, ++ BES2600_CHIP_PM_ACTIVE); + } + } + +diff --git a/drivers/staging/bes2600/bes_pwr.h b/drivers/staging/bes2600/bes_pwr.h +index 1ba866c25c42..6bc44acd7501 100644 +--- a/drivers/staging/bes2600/bes_pwr.h ++++ b/drivers/staging/bes2600/bes_pwr.h +@@ -64,6 +64,20 @@ enum power_down_state + POWER_DOWN_STATE_UNLOCKED, + }; + ++/* ++ * Confirmed PM state of the firmware-side chip. Tracks what the host ++ * has *seen* the firmware acknowledge, not what the host has ++ * requested. UNKNOWN means a host-initiated transition timed out ++ * before the firmware indication arrived; the next wake path should ++ * treat it as "we don't know" and probe before issuing GPIO/SDIO ++ * wakeup ops. ++ */ ++enum bes2600_chip_pm_state { ++ BES2600_CHIP_PM_ACTIVE = 0, ++ BES2600_CHIP_PM_LP, ++ BES2600_CHIP_PM_UNKNOWN, ++}; ++ + typedef void (*bes_pwr_enter_lp_cb)(struct bes2600_common *hw_priv); + typedef void (*bes_pwr_exit_lp_cb)(struct bes2600_common *hw_priv); + +@@ -106,6 +120,7 @@ struct bes2600_pwr_t + bool ap_lp_bad; + struct bes2600_pwr_event_t pwr_events[BES2600_DELAY_EVENT_NUM]; + atomic_t pm_set_in_process; ++ atomic_t chip_pm_state; + }; + + #ifdef CONFIG_BES2600_WOWLAN +-- +2.53.0 + diff --git a/patches/pm-state-resync/0001-bes2600-gate-PM-indication-completion-on-pending-req.patch b/patches/pm-state-resync/0001-bes2600-gate-PM-indication-completion-on-pending-req.patch new file mode 100644 index 000000000..8cb12ca46 --- /dev/null +++ b/patches/pm-state-resync/0001-bes2600-gate-PM-indication-completion-on-pending-req.patch @@ -0,0 +1,209 @@ +From 3853aadb48ae5f328d583a0ff2349c20589dd80b Mon Sep 17 00:00:00 2001 +From: Markus Fritsche +Date: Tue, 28 Apr 2026 15:05:27 +0200 +Subject: [PATCH] bes2600: gate PM indication completion on pending request and + track chip state + +When mac80211 toggles PSM on the BES2600, the host sends WSM set_pm +and waits up to 5 s on bes_power.pm_enter_cmpl for a firmware-side +PM-changed indication confirming the transition. Three sequenced +flaws make the wait-and-confirm racy and leave host/chip bookkeeping +desynced when anything misfires: + + 1) bes2600_pwr_notify_ps_changed() unconditionally fires + complete(pm_enter_cmpl) for any non-active psmode. It does not + check whether a host-initiated set_pm is actually pending. A + spontaneous indication (firmware-internal coex move, + idle-driven aging) primes the completion, and the next host- + driven enter_lp_mode sees a false success on its first + wait_for_completion_timeout. + + 2) The wait/reinit ordering in bes2600_pwr_enter_lp_mode is + + status = wait_for_completion_timeout(...); + atomic_set(pm_set_in_process, 0); + reinit_completion(...); + + If an indication arrives between wait_for_completion_timeout + returning with status==1 and reinit_completion, the next + enter_lp_mode iteration's wait can also see false success. The + reinit must happen *before* we start the new request, not + after handling the previous one. + + 3) On wait_pm_ind timeout, the driver returns -ETIMEDOUT and walks + away. It does not record that the firmware's actual PM state + is no longer known to the host. Subsequent wake paths + (gpio_wake / sbus_active) assume the chip is still active and + hit deterministic SDIO failures when the firmware has + transitioned anyway. + +This patch is the safe-prerequisite half of a wider fix: + + * bes_pwr.h gains enum bes2600_chip_pm_state {ACTIVE, LP, UNKNOWN} + and bes_power.chip_pm_state. Its job is to track what the host + has *seen the firmware confirm*, not what the host has + requested. Initialised to ACTIVE in bes2600_pwr_init(). + + * bes2600_pwr_notify_ps_changed() unconditionally updates + chip_pm_state on every indication, but only fires + complete(pm_enter_cmpl) when atomic_cmpxchg(pm_set_in_process, + 1, 0) succeeds. A spontaneous indication can no longer prime a + waiter that will only set up its request afterwards. + + * bes2600_pwr_enter_lp_mode() now reinit_completion()s before + setting pm_set_in_process and sending wsm_set_pm. After a + timeout, it cmpxchgs pm_set_in_process back to 0 (so a late + indication cannot prime the next iteration) and on the win- + cmpxchg branch records chip_pm_state=UNKNOWN. + +A follow-up patch consumes chip_pm_state on the wake side +(bes2600_pwr_device_exit_lp_mode + bes2600_gpio_wakeup_mcu) to fix +the deterministic "active mcu fail" cycle this state-record +enables a fix for. Splitting the work this way keeps the lock-free +race fix small and reviewable on its own. + +No new locks, no behaviour change on the success path. Only the +recovery path (timeout + spontaneous indication) gains correctness. + +Signed-off-by: Markus Fritsche +--- + bes2600/bes_pwr.c | 66 ++++++++++++++++++++++++++++++++++++++++++----- + bes2600/bes_pwr.h | 15 +++++++++++ + 2 files changed, 74 insertions(+), 7 deletions(-) + +diff --git a/bes2600/bes_pwr.c b/bes2600/bes_pwr.c +index 474b6f1..cd1b8cd 100644 +--- a/bes2600/bes_pwr.c ++++ b/bes2600/bes_pwr.c +@@ -524,7 +524,17 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) + bes_devel("%s, psMode:%s, fastPsmIdlePeriod:%d apPsmChangePeriod:%d minAutoPsPollPeriod:%d\n", + __func__, bes2600_get_ps_mode_str(priv->powersave_mode.pmMode), priv->powersave_mode.fastPsmIdlePeriod, + priv->powersave_mode.apPsmChangePeriod, priv->powersave_mode.minAutoPsPollPeriod); ++ /* ++ * Reinit BEFORE the WSM goes out, so a stale ++ * indication from a previous cycle cannot have ++ * primed pm_enter_cmpl. From here until the ++ * indication callback's cmpxchg(1->0) on ++ * pm_set_in_process, only the indication for ++ * THIS request can complete the wait. ++ */ ++ reinit_completion(&hw_priv->bes_power.pm_enter_cmpl); + atomic_set(&hw_priv->bes_power.pm_set_in_process, 1); ++ + ret = bes2600_set_pm(priv, &priv->powersave_mode); + if (ret) { + atomic_set(&hw_priv->bes_power.pm_set_in_process, 0); +@@ -535,11 +545,33 @@ static int bes2600_pwr_enter_lp_mode(struct bes2600_common *hw_priv) + + /* wait power save mode changed indication */ + status = wait_for_completion_timeout(&hw_priv->bes_power.pm_enter_cmpl, 5 * HZ); +- atomic_set(&hw_priv->bes_power.pm_set_in_process, 0); +- reinit_completion(&hw_priv->bes_power.pm_enter_cmpl); + if (!status) { +- bes_devel("%s, wait pm ind timeout\n", __func__); +- timeouts++; ++ /* ++ * The indication callback only fires ++ * complete() when it observes ++ * pm_set_in_process == 1; cmpxchg it ++ * to 0 here so a late indication ++ * cannot prime the next wait. ++ * ++ * If we win the cmpxchg, this is a ++ * real timeout: the firmware's PS ++ * state is unknown to us. Mark it as ++ * such so the next wake path can ++ * probe before assuming the chip is ++ * still active. ++ * ++ * If we lose the cmpxchg, the ++ * indication arrived between the ++ * wait timing out and us getting ++ * here; treat as success. ++ */ ++ if (atomic_cmpxchg(&hw_priv->bes_power.pm_set_in_process, ++ 1, 0) == 1) { ++ bes_devel("%s, wait pm ind timeout\n", __func__); ++ atomic_set(&hw_priv->bes_power.chip_pm_state, ++ BES2600_CHIP_PM_UNKNOWN); ++ timeouts++; ++ } + } + } else { + bes_devel("skip enter lp mode\n"); +@@ -833,6 +865,7 @@ void bes2600_pwr_init(struct bes2600_common *hw_priv) + hw_priv->bes_power.power_up_task = NULL; + mutex_init(&hw_priv->bes_power.pwr_mutex); + atomic_set(&hw_priv->bes_power.dev_state, 0); ++ atomic_set(&hw_priv->bes_power.chip_pm_state, BES2600_CHIP_PM_ACTIVE); + init_completion(&hw_priv->bes_power.pm_enter_cmpl); + sema_init(&hw_priv->bes_power.sync_lock, 1); + device_set_wakeup_capable(hw_priv->pdev, true); +@@ -1213,9 +1246,28 @@ int bes2600_pwr_clear_busy_event(struct bes2600_common *hw_priv, u32 event) + + void bes2600_pwr_notify_ps_changed(struct bes2600_common *hw_priv, u8 psmode) + { +- if((psmode & 0x01) != WSM_PSM_ACTIVE) { +- bes_devel("complete pm_enter_cmpl\n"); +- complete(&hw_priv->bes_power.pm_enter_cmpl); ++ /* ++ * The firmware sends a PM-changed indication for every transition, ++ * including ones we didn't ask for (firmware-internal coex moves, ++ * idle-driven aging). Update chip_pm_state unconditionally so the ++ * wake path can use it, but only fire pm_enter_cmpl when a host- ++ * initiated set_pm is actually in flight - otherwise a stale ++ * indication can prime a future wait against a freshly ++ * reinit_completion()'ed state. ++ */ ++ if ((psmode & 0x01) != WSM_PSM_ACTIVE) { ++ atomic_set(&hw_priv->bes_power.chip_pm_state, ++ BES2600_CHIP_PM_LP); ++ if (atomic_cmpxchg(&hw_priv->bes_power.pm_set_in_process, ++ 1, 0) == 1) { ++ bes_devel("complete pm_enter_cmpl\n"); ++ complete(&hw_priv->bes_power.pm_enter_cmpl); ++ } else { ++ bes_devel("PM ind (LP) without pending wait; state recorded\n"); ++ } ++ } else { ++ atomic_set(&hw_priv->bes_power.chip_pm_state, ++ BES2600_CHIP_PM_ACTIVE); + } + } + +diff --git a/bes2600/bes_pwr.h b/bes2600/bes_pwr.h +index 1ba866c..6bc44ac 100644 +--- a/bes2600/bes_pwr.h ++++ b/bes2600/bes_pwr.h +@@ -64,6 +64,20 @@ enum power_down_state + POWER_DOWN_STATE_UNLOCKED, + }; + ++/* ++ * Confirmed PM state of the firmware-side chip. Tracks what the host ++ * has *seen* the firmware acknowledge, not what the host has ++ * requested. UNKNOWN means a host-initiated transition timed out ++ * before the firmware indication arrived; the next wake path should ++ * treat it as "we don't know" and probe before issuing GPIO/SDIO ++ * wakeup ops. ++ */ ++enum bes2600_chip_pm_state { ++ BES2600_CHIP_PM_ACTIVE = 0, ++ BES2600_CHIP_PM_LP, ++ BES2600_CHIP_PM_UNKNOWN, ++}; ++ + typedef void (*bes_pwr_enter_lp_cb)(struct bes2600_common *hw_priv); + typedef void (*bes_pwr_exit_lp_cb)(struct bes2600_common *hw_priv); + +@@ -106,6 +120,7 @@ struct bes2600_pwr_t + bool ap_lp_bad; + struct bes2600_pwr_event_t pwr_events[BES2600_DELAY_EVENT_NUM]; + atomic_t pm_set_in_process; ++ atomic_t chip_pm_state; + }; + + #ifdef CONFIG_BES2600_WOWLAN +-- +2.53.0 +