From 0dde479994a84f9e968b327d5409c3a926acb69c Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Sun, 26 Apr 2026 22:31:58 +0200 Subject: [PATCH 15/29] bes2600: recover wedged firmware via mmc_hw_reset on link break When the LMAC active monitor detects 'link break between lmac and host' (the hw_buf_used==pending watchdog in bes2600_bh_lmac_active_monitor), bes2600_chrdev_wifi_force_close(hw_priv, true) is invoked to tear the device down and prepare for a fresh probe. On the wifi_force_close_work side this calls bes2600_chrdev_do_system_close() which dispatches sbus_ops->power_switch(0). On PineTab2 (RK3566 + BES2600WM over SDIO) this recovery path is a no-op: * bes2600_sdio_power_down() writes a SYSTEM_CLOSE host-int message, clears MMC_CAP_NONREMOVABLE, and schedules sdio_scan_work, which is the literal one-line stub bes_warn("...this function does nothing\n"). * bes2600_sdio_on() (the eventual power_switch(1) counterpart) toggles pdata->powerup, which is NULL on PineTab2 because the wifi-reset GPIO is owned by sdio_pwrseq, not the bes2600 device tree node (see arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi: 'The reset pin is claimed by sdio_mmcseq, It is better to move it to U-Boot so the OS can use it.'). Net result: the chip is never reset. The function drivers are not removed (the SDIO core has no signal that the card is gone), the firmware stays wedged, and a subsequent rmmod bes2600 leaves the SDIO function in a half-torn-down state. modprobe bes2600 then fails with 'probe with driver bes2600_wlan failed with error -123' (-ENOMEDIUM) on both functions (:1 wifi, :2 BT-companion) until a full system reboot. Observed on PineTab2 (linux-pinetab2 6.19.10-danctnix1-1) after ~150 minutes of background-scan rejects (wsm_generic_confirm 0x0007, [SCAN] Scan failed (-22)) accumulating until the LMAC stopped acknowledging TX buffers (hw_buf_used:24 pending:24). Reproducible under sustained scan pressure. Add a sbus operation bus_reset() that the recovery path can call when power_switch() has no effective chip-reset signal of its own. Provide an SDIO implementation that calls mmc_hw_reset(self->func->card), which on a multi-function SDIO card (PineTab2 binds func 1 for WLAN and func 2 for the BT-companion path) takes the remove-and-rescan path: mmc_sdio_hw_reset() marks the card removed and schedules mmc_rescan, which tears down the bound function drivers and re-detects the card on the next sweep, in turn reinvoking bes2600_sdio_probe(). With a single function probed it instead invokes mmc_power_cycle() directly, which on PineTab2 toggles the wifi-reset GPIO via sdio_pwrseq. Add bes2600_chrdev_do_bus_reset() as the chrdev-side helper. It invokes the bus op and then waits on probe_done_wq for the SDIO remove() callback to clear sbus_priv, mirroring the wait pattern already used by bes2600_chrdev_do_system_close() so that a subsequent bes2600_switch_wifi(true) sees a clean state and can wait on the fresh probe. Wire it into bes2600_chrdev_wifi_force_close_work(): when halt_dev is set (the hard-exception path used by both bes2600_bh_lmac_active_monitor and bes2600_bh_mcu_active_monitor) and the underlying bus implements bus_reset, take the new recovery path; otherwise fall back to the legacy power_switch(0) sequence so this patch is a no-op on USB or any other future bus that does not provide bus_reset. mmc_hw_reset() is exported by the MMC core and is the canonical recovery primitive; calling it without holding the SDIO host claim is correct because the multi-func remove-and-rescan path acquires the host claim via the mmc workqueue, and the single-func mmc_power_cycle path does not require the host claim. No DT change is required: this works against the existing PineTab2 DTS, where the wifi-reset GPIO and the optional sdio_pwrkey GPIO (on v2.0 boards) are both already configured as MMC pwrseq resets. Signed-off-by: Markus Fritsche --- bes2600/bes2600_sdio.c | 29 +++++++++++++++++++++ bes2600/bes_chardev.c | 59 ++++++++++++++++++++++++++++++++++++++++-- bes2600/bes_chardev.h | 1 + bes2600/sbus.h | 8 ++++++ 4 files changed, 95 insertions(+), 2 deletions(-) diff --git a/drivers/staging/bes2600/bes2600_sdio.c b/drivers/staging/bes2600/bes2600_sdio.c index 3e04e8c..e5840c8 100644 --- a/drivers/staging/bes2600/bes2600_sdio.c +++ b/drivers/staging/bes2600/bes2600_sdio.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -1777,6 +1778,33 @@ static void bes2600_sdio_halt_device(struct sbus_priv *self) sdio_work_debug(self); } +/* + * Trigger an SDIO bus reset via mmc_hw_reset(). + * + * With multiple SDIO functions probed (PineTab2 binds func 1 for WLAN and + * func 2 for the BT-companion path) mmc_sdio_hw_reset() takes the + * remove-and-rescan path: it marks the card removed and schedules + * mmc_rescan, which tears down the bound function drivers and re-detects + * the card on the next sweep, in turn reinvoking bes2600_sdio_probe(). + * + * With a single function probed it instead invokes mmc_power_cycle() + * directly, which on PineTab2 toggles the wifi-reset GPIO via sdio_pwrseq. + * + * In both cases the chip ends up in a freshly reset state, which is the + * goal of the recovery path. + * + * mmc_hw_reset() must be called without holding the SDIO host claim -- + * the multi-func remove-and-rescan path acquires the host claim via the + * mmc workqueue. + */ +static int bes2600_sdio_bus_reset(struct sbus_priv *self) +{ + if (!self || !self->func || !self->func->card) + return -EINVAL; + + return mmc_hw_reset(self->func->card); +} + static bool bes2600_sdio_wakeup_source(struct sbus_priv *self) { struct bes2600_platform_data_sdio *pdata = bes2600_get_platform_data(); @@ -1815,6 +1843,7 @@ static struct sbus_ops bes2600_sdio_sbus_ops = { .gpio_sleep = bes2600_gpio_allow_mcu_sleep, .halt_device = bes2600_sdio_halt_device, .wakeup_source = bes2600_sdio_wakeup_source, + .bus_reset = bes2600_sdio_bus_reset, }; static void bes2600_sdio_en_lp_cb(struct bes2600_common *hw_priv) diff --git a/drivers/staging/bes2600/bes_chardev.c b/drivers/staging/bes2600/bes_chardev.c index a02d6d9..d1375bc 100644 --- a/drivers/staging/bes2600/bes_chardev.c +++ b/drivers/staging/bes2600/bes_chardev.c @@ -442,6 +442,48 @@ int bes2600_chrdev_do_system_close(const struct sbus_ops *sbus_ops, struct sbus_ return ret; } +/* + * Hard-reset the bus and wait for the bus core to remove the chip. + * + * Used by the firmware-wedge recovery path on platforms where the normal + * power_switch(0) sequence has no effective chip-reset signal. The bus + * implementation triggers an asynchronous re-detect; this helper waits for + * the resulting remove() callback to clear bes2600_cdev.sbus_priv so that a + * subsequent bes2600_switch_wifi(true) sees a clean state and can wait on + * the fresh probe. + */ +int bes2600_chrdev_do_bus_reset(const struct sbus_ops *sbus_ops, struct sbus_priv *priv) +{ + int ret; + long status; + + if (!sbus_ops || !priv) + return -EINVAL; + + if (!sbus_ops->bus_reset) + return -EOPNOTSUPP; + + bes_info("trigger bus reset to recover wedged firmware.\n"); + + ret = sbus_ops->bus_reset(priv); + if (ret) { + bes_err("bus_reset failed: %d\n", ret); + return ret; + } + + /* + * The bus reset is asynchronous: the bus core schedules a rescan + * which removes the bound function drivers and then re-detects the + * chip. Wait for the remove callback to clear sbus_priv. Do not + * dereference 'priv' after this point -- it may already be freed. + */ + status = wait_event_timeout(bes2600_cdev.probe_done_wq, + !bes2600_cdev.sbus_priv, HZ * 3); + WARN_ON(status <= 0); + + return 0; +} + bool bes2600_chrdev_is_wifi_opened(void) { bool wifi_opened = false; @@ -540,8 +582,21 @@ static void bes2600_chrdev_wifi_force_close_work(struct work_struct *work) /* unregister wifi */ bes2600_switch_wifi(0); - /* power down device if wifi is only opened */ - if (bes2600_chrdev_check_system_close()) { + /* + * Hard exception with a bus_reset implementation: tear the + * bus down via mmc_hw_reset() (or equivalent) so the next + * bringup probes a freshly reset chip. On PineTab2 this is + * the only effective recovery path -- the existing + * power_switch(0)/(1) sequence has no chip-reset signal of + * its own (sdio_pwrseq owns wifi_reset). + * + * Soft close, or hard close on a board without bus_reset: + * fall back to the legacy power_switch(0) sequence. + */ + if (bes2600_cdev.halt_dev && bes2600_cdev.sbus_ops->bus_reset) { + bes2600_chrdev_do_bus_reset(bes2600_cdev.sbus_ops, + bes2600_cdev.sbus_priv); + } else if (bes2600_chrdev_check_system_close()) { bes2600_chrdev_do_system_close(bes2600_cdev.sbus_ops, bes2600_cdev.sbus_priv); } diff --git a/drivers/staging/bes2600/bes_chardev.h b/drivers/staging/bes2600/bes_chardev.h index 15602ba..3f0c59b 100644 --- a/drivers/staging/bes2600/bes_chardev.h +++ b/drivers/staging/bes2600/bes_chardev.h @@ -60,6 +60,7 @@ struct sbus_priv *bes2600_chrdev_get_sbus_priv_data(void); /* used to control device power down */ int bes2600_chrdev_check_system_close(void); int bes2600_chrdev_do_system_close(const struct sbus_ops *sbus_ops, struct sbus_priv *priv); +int bes2600_chrdev_do_bus_reset(const struct sbus_ops *sbus_ops, struct sbus_priv *priv); void bes2600_chrdev_wakeup_bt(void); void bes2600_chrdev_wifi_force_close(struct bes2600_common *hw_priv, bool halt_dev); void bes2600_chrdev_usb_remove(struct bes2600_common *hw_priv); diff --git a/drivers/staging/bes2600/sbus.h b/drivers/staging/bes2600/sbus.h index 1f2c0cd..cb90890 100644 --- a/drivers/staging/bes2600/sbus.h +++ b/drivers/staging/bes2600/sbus.h @@ -75,6 +75,14 @@ struct sbus_ops { void (*halt_device)(struct sbus_priv *self); bool (*wakeup_source)(struct sbus_priv *self); int (*reboot)(struct sbus_priv *self); + /* + * Force the host bus to re-detect and re-probe the chip. Called + * from the firmware-wedge recovery path when power_switch() has no + * effective chip-reset signal of its own (e.g. PineTab2, where the + * wifi-reset GPIO is owned by sdio_pwrseq, not the bes2600 node). + * Returns 0 on success or a negative errno. + */ + int (*bus_reset)(struct sbus_priv *self); }; void bes2600_irq_handler(struct bes2600_common *priv); -- 2.54.0