Skip to content

Commit 8cc5d64

Browse files
authored
Merge pull request #4853 from willmmiles/bootloop-platform-simplification
Bootloop detection platform factoring
2 parents 4c948cc + 6f914d7 commit 8cc5d64

File tree

1 file changed

+111
-97
lines changed

1 file changed

+111
-97
lines changed

wled00/util.cpp

Lines changed: 111 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -720,125 +720,139 @@ void *realloc_malloc(void *ptr, size_t size) {
720720
// checks if the ESP reboots multiple times due to a crash or watchdog timeout
721721
// if a bootloop is detected: restore settings from backup, then reset settings, then switch boot image (and repeat)
722722

723-
#define BOOTLOOP_THRESHOLD 5 // number of consecutive crashes to trigger bootloop detection
724-
#define BOOTLOOP_ACTION_RESTORE 0 // default action: restore config from /bak.cfg.json
725-
#define BOOTLOOP_ACTION_RESET 1 // if restore does not work, reset config (rename /cfg.json to /rst.cfg.json)
726-
#define BOOTLOOP_ACTION_OTA 2 // swap the boot partition
727-
#define BOOTLOOP_ACTION_DUMP 3 // nothing seems to help, dump files to serial and reboot (until hardware reset)
723+
#define BOOTLOOP_INTERVAL_MILLIS 120000 // time limit between crashes: 120 seconds (2 minutes)
724+
#define BOOTLOOP_THRESHOLD 5 // number of consecutive crashes to trigger bootloop detection
725+
#define BOOTLOOP_ACTION_RESTORE 0 // default action: restore config from /bkp.cfg.json
726+
#define BOOTLOOP_ACTION_RESET 1 // if restore does not work, reset config (rename /cfg.json to /rst.cfg.json)
727+
#define BOOTLOOP_ACTION_OTA 2 // swap the boot partition
728+
#define BOOTLOOP_ACTION_DUMP 3 // nothing seems to help, dump files to serial and reboot (until hardware reset)
729+
730+
// Platform-agnostic abstraction
731+
enum class ResetReason {
732+
Power,
733+
Software,
734+
Crash,
735+
Brownout
736+
};
737+
728738
#ifdef ESP8266
729-
#define BOOTLOOP_INTERVAL_TICKS (5 * 160000) // time limit between crashes: ~5 seconds in RTC ticks
730-
#define BOOT_TIME_IDX 0 // index in RTC memory for boot time
731-
#define CRASH_COUNTER_IDX 1 // index in RTC memory for crash counter
732-
#define ACTIONT_TRACKER_IDX 2 // index in RTC memory for boot action
739+
// Place variables in RTC memory via references, since RTC memory is not exposed via the linker in the Non-OS SDK
740+
// Use an offset of 32 as there's some hints that the first 128 bytes of "user" memory are used by the OTA system
741+
// Ref: https://github.com/esp8266/Arduino/blob/78d0d0aceacc1553f45ad8154592b0af22d1eede/cores/esp8266/Esp.cpp#L168
742+
static volatile uint32_t& bl_last_boottime = *(RTC_USER_MEM + 32);
743+
static volatile uint32_t& bl_crashcounter = *(RTC_USER_MEM + 33);
744+
static volatile uint32_t& bl_actiontracker = *(RTC_USER_MEM + 34);
745+
746+
static inline ResetReason rebootReason() {
747+
uint32_t resetReason = system_get_rst_info()->reason;
748+
if (resetReason == REASON_EXCEPTION_RST
749+
|| resetReason == REASON_WDT_RST
750+
|| resetReason == REASON_SOFT_WDT_RST)
751+
return ResetReason::Crash;
752+
if (resetReason == REASON_SOFT_RESTART)
753+
return ResetReason::Software;
754+
return ResetReason::Power;
755+
}
756+
757+
static inline uint32_t getRtcMillis() { return system_get_rtc_time() / 160; }; // rtc ticks ~160000Hz
758+
733759
#else
734-
#define BOOTLOOP_INTERVAL_TICKS 5000 // time limit between crashes: ~5 seconds in milliseconds
735760
// variables in RTC_NOINIT memory persist between reboots (but not on hardware reset)
736761
RTC_NOINIT_ATTR static uint32_t bl_last_boottime;
737762
RTC_NOINIT_ATTR static uint32_t bl_crashcounter;
738763
RTC_NOINIT_ATTR static uint32_t bl_actiontracker;
764+
765+
static inline ResetReason rebootReason() {
766+
esp_reset_reason_t reason = esp_reset_reason();
767+
if (reason == ESP_RST_BROWNOUT) return ResetReason::Brownout;
768+
if (reason == ESP_RST_SW) return ResetReason::Software;
769+
if (reason == ESP_RST_PANIC || reason == ESP_RST_WDT || reason == ESP_RST_INT_WDT || reason == ESP_RST_TASK_WDT) return ResetReason::Crash;
770+
return ResetReason::Power;
771+
}
772+
773+
#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0)
774+
static inline uint32_t getRtcMillis() { return esp_rtc_get_time_us() / 1000; }
775+
#elif ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(3, 3, 0)
776+
static inline uint32_t getRtcMillis() { return rtc_time_slowclk_to_us(rtc_time_get(), rtc_clk_slow_freq_get_hz()) / 1000; }
777+
#endif
778+
739779
void bootloopCheckOTA() { bl_actiontracker = BOOTLOOP_ACTION_OTA; } // swap boot image if bootloop is detected instead of restoring config
780+
740781
#endif
741782

742783
// detect bootloop by checking the reset reason and the time since last boot
743784
static bool detectBootLoop() {
744-
#if !defined(ESP8266)
745-
#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0)
746-
uint32_t rtctime = esp_rtc_get_time_us() / 1000; // convert to milliseconds
747-
#elif ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(3, 3, 0)
748-
uint64_t rtc_ticks = rtc_time_get();
749-
uint32_t rtctime = rtc_time_slowclk_to_us(rtc_ticks, rtc_clk_slow_freq_get_hz()) / 1000; // convert to milliseconds
750-
#endif
751-
752-
esp_reset_reason_t reason = esp_reset_reason();
785+
uint32_t rtctime = getRtcMillis();
786+
bool result = false;
753787

754-
if (!(reason == ESP_RST_PANIC || reason == ESP_RST_WDT || reason == ESP_RST_INT_WDT || reason == ESP_RST_TASK_WDT)) {
755-
// no crash detected, init variables
756-
bl_crashcounter = 0;
757-
bl_last_boottime = rtctime;
758-
if(reason != ESP_RST_SW)
759-
bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // init action tracker if not an intentional reboot (e.g. from OTA or bootloop handler)
760-
} else if (reason == ESP_RST_BROWNOUT) {
761-
// crash due to brownout can't be detected unless using flash memory to store bootloop variables
762-
// this is a simpler way to preemtively revert the config in case current brownout is caused by a bad choice of settings
763-
DEBUG_PRINTLN(F("brownout detected"));
764-
//restoreConfig(); // TODO: blindly restoring config if brownout detected is a bad idea, need a better way (if at all)
765-
} else {
766-
uint32_t rebootinterval = rtctime - bl_last_boottime;
767-
bl_last_boottime = rtctime; // store current runtime for next reboot
768-
if (rebootinterval < BOOTLOOP_INTERVAL_TICKS) {
769-
bl_crashcounter++;
770-
if (bl_crashcounter >= BOOTLOOP_THRESHOLD) {
771-
DEBUG_PRINTLN(F("!BOOTLOOP DETECTED!"));
772-
bl_crashcounter = 0;
773-
return true;
774-
}
775-
}
776-
}
777-
#else // ESP8266
778-
rst_info* resetreason = system_get_rst_info();
779-
uint32_t bl_last_boottime;
780-
uint32_t bl_crashcounter;
781-
uint32_t bl_actiontracker;
782-
uint32_t rtctime = system_get_rtc_time();
783-
784-
if (!(resetreason->reason == REASON_EXCEPTION_RST || resetreason->reason == REASON_WDT_RST)) {
785-
// no crash detected, init variables
786-
bl_crashcounter = 0;
787-
ESP.rtcUserMemoryWrite(BOOT_TIME_IDX, &rtctime, sizeof(uint32_t));
788-
ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t));
789-
if(resetreason->reason != REASON_SOFT_RESTART) {
788+
switch(rebootReason()) {
789+
case ResetReason::Power:
790790
bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // init action tracker if not an intentional reboot (e.g. from OTA or bootloop handler)
791-
ESP.rtcUserMemoryWrite(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t));
792-
}
793-
} else {
794-
// system has crashed
795-
ESP.rtcUserMemoryRead(BOOT_TIME_IDX, &bl_last_boottime, sizeof(uint32_t));
796-
ESP.rtcUserMemoryRead(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t));
797-
uint32_t rebootinterval = rtctime - bl_last_boottime;
798-
ESP.rtcUserMemoryWrite(BOOT_TIME_IDX, &rtctime, sizeof(uint32_t)); // store current ticks for next reboot
799-
if (rebootinterval < BOOTLOOP_INTERVAL_TICKS) {
800-
bl_crashcounter++;
801-
ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t));
802-
if (bl_crashcounter >= BOOTLOOP_THRESHOLD) {
803-
DEBUG_PRINTLN(F("BOOTLOOP DETECTED"));
791+
// fall through
792+
case ResetReason::Software:
793+
// no crash detected, reset counter
794+
bl_crashcounter = 0;
795+
break;
796+
797+
case ResetReason::Crash:
798+
{
799+
uint32_t rebootinterval = rtctime - bl_last_boottime;
800+
if (rebootinterval < BOOTLOOP_INTERVAL_MILLIS) {
801+
bl_crashcounter++;
802+
if (bl_crashcounter >= BOOTLOOP_THRESHOLD) {
803+
DEBUG_PRINTLN(F("!BOOTLOOP DETECTED!"));
804+
bl_crashcounter = 0;
805+
result = true;
806+
}
807+
} else {
808+
// Reset counter on long intervals to track only consecutive short-interval crashes
804809
bl_crashcounter = 0;
805-
ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t));
806-
return true;
807-
}
810+
// TODO: crash reporting goes here
811+
}
812+
break;
808813
}
814+
815+
case ResetReason::Brownout:
816+
// crash due to brownout can't be detected unless using flash memory to store bootloop variables
817+
DEBUG_PRINTLN(F("brownout detected"));
818+
//restoreConfig(); // TODO: blindly restoring config if brownout detected is a bad idea, need a better way (if at all)
819+
break;
809820
}
810-
#endif
811-
return false; // no bootloop detected
821+
822+
bl_last_boottime = rtctime; // store current runtime for next reboot
823+
824+
return result;
812825
}
813826

814827
void handleBootLoop() {
815-
DEBUG_PRINTLN(F("checking for bootloop"));
828+
DEBUG_PRINTF_P(PSTR("checking for bootloop: time %d, counter %d, action %d\n"), bl_last_boottime, bl_crashcounter, bl_actiontracker);
816829
if (!detectBootLoop()) return; // no bootloop detected
817-
#ifdef ESP8266
818-
uint32_t bl_actiontracker;
819-
ESP.rtcUserMemoryRead(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t));
820-
#endif
821-
if (bl_actiontracker == BOOTLOOP_ACTION_RESTORE) {
822-
restoreConfig(); // note: if this fails, could reset immediately. instead just let things play out and save a few lines of code
823-
bl_actiontracker = BOOTLOOP_ACTION_RESET; // reset config if it keeps bootlooping
824-
} else if (bl_actiontracker == BOOTLOOP_ACTION_RESET) {
825-
resetConfig();
826-
bl_actiontracker = BOOTLOOP_ACTION_OTA; // swap boot partition if it keeps bootlooping. On ESP8266 this is the same as BOOTLOOP_ACTION_NONE
827-
}
830+
831+
switch(bl_actiontracker) {
832+
case BOOTLOOP_ACTION_RESTORE:
833+
restoreConfig();
834+
++bl_actiontracker;
835+
break;
836+
case BOOTLOOP_ACTION_RESET:
837+
resetConfig();
838+
++bl_actiontracker;
839+
break;
840+
case BOOTLOOP_ACTION_OTA:
828841
#ifndef ESP8266
829-
else if (bl_actiontracker == BOOTLOOP_ACTION_OTA) {
830-
if(Update.canRollBack()) {
831-
DEBUG_PRINTLN(F("Swapping boot partition..."));
832-
Update.rollBack(); // swap boot partition
833-
}
834-
bl_actiontracker = BOOTLOOP_ACTION_DUMP; // out of options
835-
}
836-
#endif
837-
else
838-
dumpFilesToSerial();
839-
#ifdef ESP8266
840-
ESP.rtcUserMemoryWrite(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t));
842+
if(Update.canRollBack()) {
843+
DEBUG_PRINTLN(F("Swapping boot partition..."));
844+
Update.rollBack(); // swap boot partition
845+
}
846+
++bl_actiontracker;
847+
break;
848+
#else
849+
// fall through
841850
#endif
851+
case BOOTLOOP_ACTION_DUMP:
852+
dumpFilesToSerial();
853+
break;
854+
}
855+
842856
ESP.restart(); // restart cleanly and don't wait for another crash
843857
}
844858

0 commit comments

Comments
 (0)